diff --git a/deploy/lightspeed-stack/Containerfile b/deploy/lightspeed-stack/Containerfile index 567b5793e..8466b302a 100644 --- a/deploy/lightspeed-stack/Containerfile +++ b/deploy/lightspeed-stack/Containerfile @@ -130,6 +130,12 @@ ENV PATH="/app-root/.venv/bin:$PATH" # We place them at /app-root/providers.d. YAMLs there reference lightspeed_stack_providers.*, so that package must be on PYTHONPATH. ENV PYTHONPATH="/app-root" +# Pre-download embedding model for OKP/Solr vector search (~61MB, baked into image). +# Uses a dedicated path so the docker-compose HF cache volume mount does not shadow it. +RUN mkdir -p /app-root/.hf-models && \ + HF_HOME=/app-root/.hf-models python3.12 -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('ibm-granite/granite-embedding-30m-english')" && \ + chown -R 1001:1001 /app-root/.hf-models + # Run the application EXPOSE 8080 ENTRYPOINT ["python3.12", "src/lightspeed_stack.py"] diff --git a/docker-compose-library.yaml b/docker-compose-library.yaml index f0e075848..b42132fa4 100755 --- a/docker-compose-library.yaml +++ b/docker-compose-library.yaml @@ -58,8 +58,10 @@ services: - LLAMA_STACK_LOGGING=${LLAMA_STACK_LOGGING:-} # FAISS test and inline RAG config - FAISS_VECTOR_STORE_ID=${FAISS_VECTOR_STORE_ID:-} + # Pass env var from shell into container to access OKP + - RH_SERVER_OKP=${RH_SERVER_OKP:-} # Prevent HuggingFace Hub update checks (HTTP 429 rate-limiting in CI from parallel jobs). - - HF_HUB_OFFLINE=1 + - HF_HUB_OFFLINE=${HF_HUB_OFFLINE:-1} healthcheck: test: ["CMD", "curl", "-f", "http://localhost:8080/liveness"] interval: 10s # how often to run the check