Jaja-09 commited on
Commit
e544167
·
1 Parent(s): 9b295f0

fix(build): set writable TRANSFORMERS_CACHE and pre-download distilbert tokenizer/model

Browse files
Files changed (1) hide show
  1. Dockerfile +5 -3
Dockerfile CHANGED
@@ -15,10 +15,12 @@ ARG HF_MODEL_REPO=Jaja-09/authorcheck-model
15
  # Download model snapshot from HF model repo
16
  RUN python -c "from huggingface_hub import snapshot_download; snapshot_download(repo_id='${HF_MODEL_REPO}', local_dir='/app/model')"
17
 
18
- # Pre-download NLTK data to a writable path and set env
19
  ENV NLTK_DATA=/app/nltk_data
20
- RUN mkdir -p /app/nltk_data && \
21
- python -c "import nltk; nltk.download('punkt', download_dir='/app/nltk_data', quiet=True)"
 
 
22
 
23
  EXPOSE 7860
24
  CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
 
15
  # Download model snapshot from HF model repo
16
  RUN python -c "from huggingface_hub import snapshot_download; snapshot_download(repo_id='${HF_MODEL_REPO}', local_dir='/app/model')"
17
 
18
+ # Use writable caches inside /app
19
  ENV NLTK_DATA=/app/nltk_data
20
+ ENV TRANSFORMERS_CACHE=/app/hf_cache
21
+ RUN mkdir -p /app/nltk_data /app/hf_cache && \
22
+ python -c "import nltk; nltk.download('punkt', download_dir='/app/nltk_data', quiet=True)" && \
23
+ python -c "from transformers import AutoTokenizer, AutoModelForSequenceClassification; m='distilbert-base-uncased-finetuned-sst-2-english'; AutoTokenizer.from_pretrained(m, cache_dir='/app/hf_cache'); AutoModelForSequenceClassification.from_pretrained(m, cache_dir='/app/hf_cache')"
24
 
25
  EXPOSE 7860
26
  CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]