diff --git a/deploy/lightspeed-stack/Containerfile b/deploy/lightspeed-stack/Containerfile
index 567b5793e..8466b302a 100644
--- a/deploy/lightspeed-stack/Containerfile
+++ b/deploy/lightspeed-stack/Containerfile
@@ -130,6 +130,12 @@ ENV PATH="/app-root/.venv/bin:$PATH"
 # We place them at /app-root/providers.d. YAMLs there reference lightspeed_stack_providers.*, so that package must be on PYTHONPATH.
 ENV PYTHONPATH="/app-root"
 
+# Pre-download embedding model for OKP/Solr vector search (~61MB, baked into image).
+# Uses a dedicated path so the docker-compose HF cache volume mount does not shadow it.
+RUN mkdir -p /app-root/.hf-models && \
+    HF_HOME=/app-root/.hf-models python3.12 -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('ibm-granite/granite-embedding-30m-english')" && \
+    chown -R 1001:1001 /app-root/.hf-models
+
 # Run the application
 EXPOSE 8080
 ENTRYPOINT ["python3.12", "src/lightspeed_stack.py"]
diff --git a/docker-compose-library.yaml b/docker-compose-library.yaml
index f0e075848..b42132fa4 100755
--- a/docker-compose-library.yaml
+++ b/docker-compose-library.yaml
@@ -58,8 +58,10 @@ services:
       - LLAMA_STACK_LOGGING=${LLAMA_STACK_LOGGING:-}
       # FAISS test and inline RAG config
       - FAISS_VECTOR_STORE_ID=${FAISS_VECTOR_STORE_ID:-}
+      # Pass env var from shell into container to access OKP
+      - RH_SERVER_OKP=${RH_SERVER_OKP:-}
       # Prevent HuggingFace Hub update checks (HTTP 429 rate-limiting in CI from parallel jobs).
-      - HF_HUB_OFFLINE=1
+      - HF_HUB_OFFLINE=${HF_HUB_OFFLINE:-1}
     healthcheck:
       test: ["CMD", "curl", "-f", "http://localhost:8080/liveness"]
       interval: 10s   # how often to run the check