Spaces:

Jaja-09
/

authorchecks-backend

Sleeping

Jaja-09 commited on Oct 12

Commit

e544167

1 Parent(s): 9b295f0

fix(build): set writable TRANSFORMERS_CACHE and pre-download distilbert tokenizer/model

Files changed (1) hide show

Dockerfile CHANGED Viewed

@@ -15,10 +15,12 @@ ARG HF_MODEL_REPO=Jaja-09/authorcheck-model
 # Download model snapshot from HF model repo
 RUN python -c "from huggingface_hub import snapshot_download; snapshot_download(repo_id='${HF_MODEL_REPO}', local_dir='/app/model')"
-# Pre-download NLTK data to a writable path and set env
 ENV NLTK_DATA=/app/nltk_data
-RUN mkdir -p /app/nltk_data && \
-    python -c "import nltk; nltk.download('punkt', download_dir='/app/nltk_data', quiet=True)"
 EXPOSE 7860
 CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

 # Download model snapshot from HF model repo
 RUN python -c "from huggingface_hub import snapshot_download; snapshot_download(repo_id='${HF_MODEL_REPO}', local_dir='/app/model')"
+# Use writable caches inside /app
 ENV NLTK_DATA=/app/nltk_data
+ENV TRANSFORMERS_CACHE=/app/hf_cache
+RUN mkdir -p /app/nltk_data /app/hf_cache && \
+    python -c "import nltk; nltk.download('punkt', download_dir='/app/nltk_data', quiet=True)" && \
+    python -c "from transformers import AutoTokenizer, AutoModelForSequenceClassification; m='distilbert-base-uncased-finetuned-sst-2-english'; AutoTokenizer.from_pretrained(m, cache_dir='/app/hf_cache'); AutoModelForSequenceClassification.from_pretrained(m, cache_dir='/app/hf_cache')"
 EXPOSE 7860
 CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]