diff --git a/.env.example b/.env.example
index 1e10ade..87c261b 100644
--- a/.env.example
+++ b/.env.example
@@ -21,23 +21,37 @@ BASE_URL=http://localhost:8000
 ALLOW_HTTP_SESSIONS=true
 
 # Slack — one pair per agent (Bot User OAuth Token + App-Level Token)
+# Add as many agents as needed using this pattern; no code changes required.
+#   SLACK_BOT_TOKEN_<AGENT_ID>=xoxb-...   (required)
+#   SLACK_APP_TOKEN_<AGENT_ID>=xapp-...   (optional)
 SLACK_BOT_TOKEN_SU=xoxb-placeholder
-SLACK_APP_TOKEN_SU=xapp-placeholder
 SLACK_BOT_TOKEN_WISEMAN=xoxb-placeholder
-SLACK_APP_TOKEN_WISEMAN=xapp-placeholder
-SLACK_BOT_TOKEN_LOTZ=xoxb-placeholder
-SLACK_APP_TOKEN_LOTZ=xapp-placeholder
-SLACK_BOT_TOKEN_CRAVATT=xoxb-placeholder
-SLACK_APP_TOKEN_CRAVATT=xapp-placeholder
-SLACK_BOT_TOKEN_GROTJAHN=xoxb-placeholder
-SLACK_APP_TOKEN_GROTJAHN=xapp-placeholder
-SLACK_BOT_TOKEN_PETRASCHECK=xoxb-placeholder
-SLACK_APP_TOKEN_PETRASCHECK=xapp-placeholder
-SLACK_BOT_TOKEN_KEN=xoxb-placeholder
-SLACK_APP_TOKEN_KEN=xapp-placeholder
-SLACK_BOT_TOKEN_RACKI=xoxb-placeholder
-SLACK_APP_TOKEN_RACKI=xapp-placeholder
-SLACK_BOT_TOKEN_SAEZ=xoxb-placeholder
-SLACK_APP_TOKEN_SAEZ=xapp-placeholder
-SLACK_BOT_TOKEN_WU=xoxb-placeholder
-SLACK_APP_TOKEN_WU=xapp-placeholder
+SLACK_BOT_TOKEN_GRANTBOT=xoxb-placeholder
+
+# Podcast TTS backend: "mistral" (default), "openai", or "local" (vLLM-Omni server)
+PODCAST_TTS_BACKEND="mistral"
+
+# Mistral AI TTS (used when PODCAST_TTS_BACKEND=mistral)
+MISTRAL_API_KEY=your-mistral-api-key
+MISTRAL_TTS_MODEL=voxtral-mini-tts-latest
+MISTRAL_TTS_DEFAULT_VOICE=your-voice-uuid
+
+# OpenAI TTS (used when PODCAST_TTS_BACKEND=openai)
+# Voices: alloy echo fable onyx nova shimmer
+# Models: tts-1  tts-1-hd  gpt-4o-mini-tts
+OPENAI_API_KEY=your-openai-api-key
+OPENAI_TTS_MODEL=tts-1
+OPENAI_TTS_DEFAULT_VOICE=alloy
+
+# Local vLLM-Omni TTS server (used when PODCAST_TTS_BACKEND=local)
+# Start with: vllm serve <model> --port 8010
+LOCAL_TTS_HOST=127.0.0.1
+LOCAL_TTS_PORT=8008
+LOCAL_TTS_MODEL=mistralai/Voxtral-4B-TTS-2603
+LOCAL_TTS_VOICE=default
+
+# Podcast
+PODCAST_BASE_URL=http://localhost:8001
+PODCAST_SEARCH_WINDOW_DAYS=14
+PODCAST_MAX_CANDIDATES=50
+# PODCAST_NORMALIZE_AUDIO=true  # uncomment to enable ffmpeg loudnorm post-processing (EBU R128, -16 LUFS)
diff --git a/.gitignore b/.gitignore
index aad82ec..342842f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -53,3 +53,9 @@ certbot/
 .pytest_cache/
 .coverage
 htmlcov/
+
+# Runtime data (state files, generated audio — ephemeral)
+data/
+
+# Test output artifacts
+.labbot-tests/
diff --git a/AGENT.md b/AGENT.md
index a94b338..39628fc 100644
--- a/AGENT.md
+++ b/AGENT.md
@@ -32,6 +32,7 @@ All specs are in `/specs/`:
 - `profile-ingestion.md` — 9-step pipeline, ORCID → PubMed → PMC → LLM
 - `admin-dashboard.md` — read-only, server-rendered, impersonation
 - `agent-system.md` — Slack Bolt, Socket Mode, two-phase LLM calls, simulation engine
+- `labbot-podcast.md` — daily personalized research briefing: PubMed search, LLM selection/summarization, Local or API TTS, Slack DM delivery, per-PI RSS podcast feed
 
 ## Tech Stack
 
diff --git a/CLAUDE.md b/CLAUDE.md
index 66a844b..41d34bd 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -42,3 +42,50 @@ docker compose --profile agent run -d --name agent-run agent python -m src.agent
 ```
 
 **Note:** The agent-run container uses mounted source code but the Python process only loads modules at startup. Code changes require a container restart to take effect. **After any code change that affects the running agent process, flag this to the user so they can decide whether to restart.**
+
+## Podcast Pipeline
+
+The LabBot Podcast pipeline (specs/labbot-podcast.md) runs daily at 9am UTC for each active agent:
+
+1. Build PubMed queries from lab's public profile
+2. Fetch candidates from PubMed + bioRxiv + medRxiv + arXiv (last 14 days, up to 50+10 candidates)
+3. Claude Sonnet selects most relevant paper (applying PI's podcast preferences from their private ProfileRevision)
+4. Claude Opus writes a ~250-word structured brief
+5. TTS audio generated (Mistral or local vLLM-Omni); ffmpeg loudnorm applied if PODCAST_NORMALIZE_AUDIO=true
+6. Slack DM sent to PI with text summary + RSS link
+7. RSS feed available at `/podcast/{agent_id}/feed.xml`
+8. Audio served at `/podcast/{agent_id}/audio/{date}.mp3`
+
+Preprint IDs use prefixed format: `biorxiv:...`, `medrxiv:...`, `arxiv:...`. The `paper_url` in summaries links to the correct server (not always PubMed).
+
+```bash
+# Run podcast pipeline once for all active agents
+docker compose --profile podcast run --rm podcast python -m src.podcast.main
+
+# Test pipeline for 'su' agent only
+docker compose exec app python scripts/test_podcast_su.py
+```
+
+## Database Migration Caveat
+
+If the DB was initialized from the `main` branch schema and then this branch is checked out, `alembic upgrade head` will stamp the version without re-running migrations that share a revision ID with ones already applied on `main`. Any columns added by branch-specific migrations may be silently missing.
+
+**Symptom:** `UndefinedColumnError` at runtime despite `alembic current` showing `head`.
+
+**Fix:** Check for missing columns and apply them manually:
+```bash
+docker compose exec app python -c "
+import asyncio
+from src.database import get_engine
+from sqlalchemy import text
+
+async def check():
+    eng = get_engine()
+    async with eng.connect() as conn:
+        result = await conn.execute(text(\"SELECT column_name FROM information_schema.columns WHERE table_name='researcher_profiles' ORDER BY ordinal_position\"))
+        print([r[0] for r in result])
+
+asyncio.run(check())
+"
+```
+Then add any missing columns with `ALTER TABLE ... ADD COLUMN IF NOT EXISTS ...`.
diff --git a/Dockerfile b/Dockerfile
index c032e95..63a7b94 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -6,6 +6,7 @@ WORKDIR /app
 RUN apt-get update && apt-get install -y --no-install-recommends \
     gcc \
     libpq-dev \
+    ffmpeg \
     && rm -rf /var/lib/apt/lists/*
 
 # Install Python dependencies
diff --git a/alembic/versions/0010_add_podcast_episodes.py b/alembic/versions/0010_add_podcast_episodes.py
new file mode 100644
index 0000000..adad7d2
--- /dev/null
+++ b/alembic/versions/0010_add_podcast_episodes.py
@@ -0,0 +1,56 @@
+"""Add podcast_episodes table
+
+Revision ID: 0010
+Revises: 0009
+Create Date: 2026-04-09 00:00:00.000000
+
+"""
+
+from typing import Sequence, Union
+
+import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql
+
+from alembic import op
+
+revision: str = "0010"
+down_revision: Union[str, None] = "0009"
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def upgrade() -> None:
+    op.create_table(
+        "podcast_episodes",
+        sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
+        sa.Column("agent_id", sa.String(50), nullable=False),
+        sa.Column("episode_date", sa.Date, nullable=False),
+        sa.Column("pmid", sa.String(100), nullable=False),
+        sa.Column("paper_title", sa.String(500), nullable=False),
+        sa.Column("paper_authors", sa.String(500), nullable=False),
+        sa.Column("paper_journal", sa.String(255), nullable=False),
+        sa.Column("paper_year", sa.Integer, nullable=False),
+        sa.Column("text_summary", sa.Text, nullable=False),
+        sa.Column("audio_file_path", sa.String(500), nullable=True),
+        sa.Column("audio_duration_seconds", sa.Integer, nullable=True),
+        sa.Column("slack_delivered", sa.Boolean, nullable=False, server_default="false"),
+        sa.Column("selection_justification", sa.Text, nullable=False),
+        sa.Column(
+            "created_at",
+            sa.DateTime(timezone=True),
+            server_default=sa.func.now(),
+            nullable=False,
+        ),
+    )
+    op.create_index("ix_podcast_episodes_agent_id", "podcast_episodes", ["agent_id"])
+    op.create_index("ix_podcast_episodes_episode_date", "podcast_episodes", ["episode_date"])
+    op.create_unique_constraint(
+        "uq_podcast_agent_date", "podcast_episodes", ["agent_id", "episode_date"]
+    )
+
+
+def downgrade() -> None:
+    op.drop_constraint("uq_podcast_agent_date", "podcast_episodes")
+    op.drop_index("ix_podcast_episodes_episode_date")
+    op.drop_index("ix_podcast_episodes_agent_id")
+    op.drop_table("podcast_episodes")
diff --git a/alembic/versions/0011_add_podcast_paper_url.py b/alembic/versions/0011_add_podcast_paper_url.py
new file mode 100644
index 0000000..f5624dc
--- /dev/null
+++ b/alembic/versions/0011_add_podcast_paper_url.py
@@ -0,0 +1,29 @@
+"""Add paper_url column to podcast_episodes
+
+Revision ID: 0011
+Revises: 0010
+Create Date: 2026-04-10 00:00:00.000000
+
+"""
+
+from typing import Sequence, Union
+
+import sqlalchemy as sa
+
+from alembic import op
+
+revision: str = "0011"
+down_revision: Union[str, None] = "0010"
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def upgrade() -> None:
+    op.add_column(
+        "podcast_episodes",
+        sa.Column("paper_url", sa.String(1000), nullable=True),
+    )
+
+
+def downgrade() -> None:
+    op.drop_column("podcast_episodes", "paper_url")
diff --git a/alembic/versions/0012_add_podcast_preferences.py b/alembic/versions/0012_add_podcast_preferences.py
new file mode 100644
index 0000000..bba69c7
--- /dev/null
+++ b/alembic/versions/0012_add_podcast_preferences.py
@@ -0,0 +1,64 @@
+"""Add podcast_preferences table
+
+Revision ID: 0012
+Revises: 0011
+Create Date: 2026-04-14 00:00:00.000000
+
+"""
+
+from typing import Sequence, Union
+
+import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql
+from sqlalchemy.dialects.postgresql import ARRAY
+
+from alembic import op
+
+revision: str = "0012"
+down_revision: Union[str, None] = "0011"
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def upgrade() -> None:
+    op.create_table(
+        "podcast_preferences",
+        sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
+        sa.Column("agent_id", sa.String(50), nullable=False),
+        sa.Column("voice_id", sa.String(100), nullable=True),
+        sa.Column(
+            "extra_keywords",
+            ARRAY(sa.String),
+            nullable=False,
+            server_default="{}",
+        ),
+        sa.Column(
+            "preferred_journals",
+            ARRAY(sa.String),
+            nullable=False,
+            server_default="{}",
+        ),
+        sa.Column(
+            "deprioritized_journals",
+            ARRAY(sa.String),
+            nullable=False,
+            server_default="{}",
+        ),
+        sa.Column(
+            "updated_at",
+            sa.DateTime(timezone=True),
+            nullable=False,
+            server_default=sa.func.now(),
+        ),
+    )
+    op.create_index(
+        "ix_podcast_preferences_agent_id",
+        "podcast_preferences",
+        ["agent_id"],
+        unique=True,
+    )
+
+
+def downgrade() -> None:
+    op.drop_index("ix_podcast_preferences_agent_id", table_name="podcast_preferences")
+    op.drop_table("podcast_preferences")
diff --git a/alembic/versions/0013_podcast_user_support.py b/alembic/versions/0013_podcast_user_support.py
new file mode 100644
index 0000000..89d77cd
--- /dev/null
+++ b/alembic/versions/0013_podcast_user_support.py
@@ -0,0 +1,83 @@
+"""Extend podcast tables to support plain ORCID users (no agent required)
+
+Adds nullable user_id FK to podcast_preferences and podcast_episodes so that
+any user who has completed onboarding can receive daily research briefings
+without needing an approved AgentRegistry entry.
+
+Changes:
+  - podcast_preferences.agent_id: NOT NULL → nullable
+  - podcast_preferences.user_id:  new nullable FK → users.id, unique index
+  - podcast_episodes.agent_id:    NOT NULL → nullable
+  - podcast_episodes.user_id:     new nullable FK → users.id
+  - podcast_episodes: partial unique index on (user_id, episode_date) WHERE user_id IS NOT NULL
+
+Revision ID: 0013
+Revises: 0012
+Create Date: 2026-04-14 00:00:00.000000
+"""
+
+from typing import Sequence, Union
+
+import sqlalchemy as sa
+from sqlalchemy.dialects.postgresql import UUID
+
+from alembic import op
+
+revision: str = "0013"
+down_revision: Union[str, None] = "0012"
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def upgrade() -> None:
+    # --- podcast_preferences ---
+    # Make agent_id nullable (existing agent rows keep their values)
+    op.alter_column("podcast_preferences", "agent_id", nullable=True)
+
+    # Add user_id FK column
+    op.add_column(
+        "podcast_preferences",
+        sa.Column(
+            "user_id",
+            UUID(as_uuid=True),
+            sa.ForeignKey("users.id", ondelete="CASCADE"),
+            nullable=True,
+        ),
+    )
+    op.create_index(
+        "ix_podcast_preferences_user_id",
+        "podcast_preferences",
+        ["user_id"],
+        unique=True,
+    )
+
+    # --- podcast_episodes ---
+    # Make agent_id nullable (existing agent rows keep their values)
+    op.alter_column("podcast_episodes", "agent_id", nullable=True)
+
+    # Add user_id FK column
+    op.add_column(
+        "podcast_episodes",
+        sa.Column(
+            "user_id",
+            UUID(as_uuid=True),
+            sa.ForeignKey("users.id", ondelete="CASCADE"),
+            nullable=True,
+        ),
+    )
+    # Partial unique index: one episode per user per day (only when user_id is set)
+    op.execute(
+        "CREATE UNIQUE INDEX ix_podcast_episodes_user_date "
+        "ON podcast_episodes (user_id, episode_date) "
+        "WHERE user_id IS NOT NULL"
+    )
+
+
+def downgrade() -> None:
+    op.execute("DROP INDEX IF EXISTS ix_podcast_episodes_user_date")
+    op.drop_column("podcast_episodes", "user_id")
+    op.alter_column("podcast_episodes", "agent_id", nullable=False)
+
+    op.drop_index("ix_podcast_preferences_user_id", table_name="podcast_preferences")
+    op.drop_column("podcast_preferences", "user_id")
+    op.alter_column("podcast_preferences", "agent_id", nullable=False)
diff --git a/code_review.md b/code_review.md
new file mode 100644
index 0000000..fbf0a1d
--- /dev/null
+++ b/code_review.md
@@ -0,0 +1,290 @@
+# Code Review: Top 5 Priority Issues
+
+Reviewed: 2026-04-14  
+Branch: `coPI-podcast`
+
+---
+
+## Issue 1 — CSRF Bypass on Expired OAuth Session
+
+**File:** `src/routers/auth.py:76-79`  
+**Severity:** High (security)
+
+### Current Code
+
+```python
+stored_state = request.session.pop("oauth_state", None)
+if stored_state and state != stored_state:
+    logger.warning("OAuth state mismatch")
+    return RedirectResponse(url="/login?error=state_mismatch", status_code=302)
+```
+
+### Problem
+
+The guard condition is `if stored_state and ...`, meaning it only enforces the check when `stored_state` is truthy. If the user's session has expired (or was never set), `stored_state` is `None` and the entire check is skipped — any `state` value (including `None`) passes through. A CSRF attacker can initiate an OAuth flow, let the victim's session expire, then replay the callback with an arbitrary code.
+
+### Best Practice
+
+Per [RFC 6749 §10.12](https://datatracker.ietf.org/doc/html/rfc6749#section-10.12) and OWASP OAuth guidelines, the `state` parameter must be treated as a **required, non-nullable nonce**. The correct pattern is to reject the callback if `stored_state` is missing (session expired), not to treat it as a pass condition.
+
+### How to Fix
+
+Change the condition from a two-branch `if stored_state and ...` guard to an explicit three-case rejection:
+
+```python
+stored_state = request.session.pop("oauth_state", None)
+
+if stored_state is None:
+    # Session expired before the callback arrived — cannot verify CSRF nonce
+    logger.warning("OAuth callback with no stored state (session expired or missing)")
+    return RedirectResponse(url="/login?error=session_expired", status_code=302)
+
+if state != stored_state:
+    logger.warning("OAuth state mismatch — possible CSRF attempt")
+    return RedirectResponse(url="/login?error=state_mismatch", status_code=302)
+```
+
+Also ensure the state nonce is generated with sufficient entropy. In `src/routers/auth.py` (in the `/login` route that initiates the flow), use `secrets.token_urlsafe(32)` rather than any shorter or predictable token, and store it in the session immediately before the redirect.
+
+---
+
+## Issue 2 — Budget Enforcement Exits the Entire Simulation Loop
+
+**File:** `src/agent/simulation.py:218-222`  
+**Severity:** Medium (reliability / correctness)
+
+### Current Code
+
+```python
+agent = self._select_agent()
+if not agent or not self._agent_within_budget(agent):
+    # All agents over budget
+    logger.info("All agents over budget or no agent selected. Stopping.")
+    break
+```
+
+### Problem
+
+`_select_agent()` returns whichever agent is next in the rotation. If that specific agent is over budget, the entire simulation `break`s — even if every other agent still has budget remaining. The log comment says "All agents over budget" but that is only true in the case where `_select_agent` returns `None`; when it returns an agent that is individually over budget, the others are never checked.
+
+### Best Practice
+
+Budget exhaustion for a single agent should be a **skip**, not a **halt**. The loop should continue cycling through agents until every agent is either over budget or no agent can be selected at all. A common pattern is to track how many consecutive agents have been skipped and stop only when the skip count equals the total number of agents.
+
+### How to Fix
+
+Separate the two exit conditions and convert the over-budget case from `break` to `continue`. Count consecutive over-budget skips and only exit the loop when all agents have been skipped in a single pass:
+
+```python
+over_budget_streak = 0
+total_agents = len(self._agents)
+
+while True:
+    agent = self._select_agent()
+    if not agent:
+        logger.info("No agent selected — simulation complete.")
+        break
+
+    if not self._agent_within_budget(agent):
+        over_budget_streak += 1
+        agent.state.last_selected = time.time()
+        if over_budget_streak >= total_agents:
+            logger.info("All agents over budget. Stopping.")
+            break
+        logger.debug("[%s] Over budget, skipping.", agent.agent_id)
+        continue
+
+    over_budget_streak = 0  # reset when a valid agent is found
+    # ... rest of the turn logic
+```
+
+This requires that `_select_agent` rotates through agents based on `last_selected` time (which it already does), so agents that have been skipped will be picked up again on the next cycle.
+
+---
+
+## Issue 3 — RSS Feed Served with Missing Audio File
+
+**File:** `src/podcast/main.py:89-103`, `src/podcast/pipeline.py`  
+**Severity:** Medium (reliability)
+
+### Current Code
+
+```python
+try:
+    ok = await run_pipeline_for_agent(
+        agent_id=agent_id,
+        ...
+    )
+    if ok:
+        produced.append(agent_id)
+except Exception as exc:
+    logger.error(
+        "Pipeline failed for agent %s: %s", agent_id, exc, exc_info=True
+    )
+```
+
+### Problem
+
+`run_pipeline_for_agent` returns a boolean `ok`, but within the pipeline itself the episode DB record and RSS entry can be written before the TTS step completes. If TTS fails, the audio file does not exist, but the feed already contains an `<enclosure>` pointing to a non-existent MP3. Any podcast client that subscribed to the feed will attempt a GET on a 404 URL and may display a broken episode permanently.
+
+### Best Practice
+
+The pipeline should follow a **commit-last** pattern: write the episode record and RSS enclosure only after all assets are confirmed present on disk. This is the same pattern used in video/audio platforms (e.g., YouTube's upload pipeline) — metadata is published only after the binary asset is available.
+
+### How to Fix
+
+Inside `src/podcast/pipeline.py`, restructure the steps in this order:
+
+1. Fetch and select the paper (read-only, safe to do first).
+2. Generate the text brief (Claude Opus call).
+3. Call TTS and write the audio file to disk. **Capture the returned path.**
+4. Verify the audio file exists and has a non-zero size (`path.stat().st_size > 0`) before proceeding.
+5. Only if step 4 passes: write the `PodcastEpisode` DB row and call `db_session.flush()`.
+6. Only after the DB row is committed: build and write the RSS `<item>`.
+
+If TTS fails at step 3, log the error and return `ok=False` without writing anything to the DB or RSS. The caller in `main.py` already handles `ok=False` correctly; the gap is in the pipeline not propagating TTS failures as `False`.
+
+As a secondary safeguard, the RSS endpoint (`/podcast/{agent_id}/feed.xml`) should check whether `data/podcast_audio/{agent_id}/{date}.mp3` exists before including the `<enclosure>` element in its output. This prevents any historical DB rows with missing audio from appearing in the feed.
+
+---
+
+## Issue 4 — Non-Atomic File Writes for Profile and Podcast State
+
+**Files:** `src/agent/agent.py:423-444`, `src/podcast/state.py:22-24`  
+**Severity:** Medium (data integrity)
+
+### Current Code
+
+```python
+# agent.py
+memory_path.write_text(new_memory + "\n", encoding="utf-8")
+
+# state.py
+def _save(data: dict) -> None:
+    STATE_FILE.parent.mkdir(parents=True, exist_ok=True)
+    STATE_FILE.write_text(json.dumps(data, indent=2), encoding="utf-8")
+```
+
+### Problem
+
+`Path.write_text` is not atomic — it opens the file for truncation and writes in multiple OS-level operations. If the process crashes, is killed, or two coroutines call the write concurrently, the file can be left in a partially written state (empty, or with truncated JSON). For `podcast_state.json`, this means the `delivered_pmids` list can be lost, causing duplicate Slack DMs. For working memory files, a partial write silently discards the agent's accumulated context.
+
+There is also a logical race: `_save` in `state.py` does a read-modify-write cycle (`_load()` → modify → `_save()`). Two concurrent podcast pipeline runs (possible if the scheduler is invoked twice) will both read the same initial state, both modify it independently, and whichever writes last will silently overwrite the other's changes.
+
+### Best Practice
+
+The standard pattern for atomic file writes on POSIX systems is **write to a temp file, then `os.rename`**. Because `rename` is guaranteed atomic by the POSIX spec (it is a single syscall), a reader will always see either the old complete file or the new complete file — never a partial write. Python's `tempfile.NamedTemporaryFile` with `delete=False` in the same directory is the standard way to achieve this.
+
+For the read-modify-write race in `state.py`, use a `threading.Lock` (or `asyncio.Lock` if the callers are async) as a process-level mutex around all load/save operations.
+
+### How to Fix
+
+**Atomic write helper** (can live in `src/utils.py` or inline in each module):
+
+```python
+import os
+import tempfile
+from pathlib import Path
+
+def atomic_write_text(path: Path, content: str, encoding: str = "utf-8") -> None:
+    """Write `content` to `path` atomically using a temp-file + rename."""
+    path.parent.mkdir(parents=True, exist_ok=True)
+    fd, tmp = tempfile.mkstemp(dir=path.parent, suffix=".tmp")
+    try:
+        with os.fdopen(fd, "w", encoding=encoding) as f:
+            f.write(content)
+        os.replace(tmp, path)   # atomic on POSIX; overwrites destination
+    except Exception:
+        os.unlink(tmp)          # clean up temp file on any error
+        raise
+```
+
+Replace all four `path.write_text(...)` calls in `agent.py` (lines 428 and 441) and `state.py` (line 24) with `atomic_write_text(path, content)`.
+
+**Lock for state.py read-modify-write:**
+
+```python
+import threading
+_STATE_LOCK = threading.Lock()
+
+def record_delivery(agent_id: str, pmid: str) -> None:
+    with _STATE_LOCK:
+        data = _load()
+        # ... modify ...
+        _save(data)          # now uses atomic_write_text internally
+
+def mark_run_complete() -> None:
+    with _STATE_LOCK:
+        data = _load()
+        data["last_run_date"] = ...
+        _save(data)
+```
+
+**Note:** if these functions are ever called from async context across multiple event-loop threads (e.g., concurrent `run_pipeline_for_agent` calls), a `threading.Lock` is sufficient because `asyncio.run` uses a single thread per call. If concurrency is ever introduced via `asyncio.gather`, switch to `asyncio.Lock`.
+
+---
+
+## Issue 5 — Per-Task Failures Silently Discarded in `asyncio.gather`
+
+**File:** `src/agent/simulation.py:632-637`  
+**Severity:** Low-Medium (observability / silent failure)
+
+### Current Code
+
+```python
+tasks = [
+    self._reply_to_thread(agent, thread)
+    for thread in threads_to_reply
+]
+await asyncio.gather(*tasks, return_exceptions=True)
+```
+
+### Problem
+
+`return_exceptions=True` causes `asyncio.gather` to return exceptions as result values instead of re-raising them. The return value here is discarded entirely, so any exceptions from individual `_reply_to_thread` calls are silently swallowed. If a Slack API error, DB write failure, or Claude API timeout occurs in any thread reply, it is invisible in logs and metrics. Operators have no signal that Phase 4 is partially or fully failing.
+
+### Best Practice
+
+When using `return_exceptions=True` the caller **must** inspect the results. The canonical pattern is to iterate the results list and log (or re-raise) any values that are `isinstance(r, BaseException)`. This is preferable to removing `return_exceptions=True` (which would cancel all remaining tasks on the first failure) because Phase 4 replies are independent — a failure on one thread should not prevent replies to others.
+
+### How to Fix
+
+Capture the return value of `asyncio.gather` and inspect each result:
+
+```python
+results = await asyncio.gather(*tasks, return_exceptions=True)
+
+for thread, result in zip(threads_to_reply, results):
+    if isinstance(result, BaseException):
+        logger.error(
+            "[%s] Phase 4: Failed to reply to thread %s: %s",
+            agent.agent_id,
+            thread.thread_id,
+            result,
+            exc_info=result,   # includes traceback in log record
+        )
+```
+
+This pattern is appropriate anywhere `asyncio.gather(..., return_exceptions=True)` is used without inspecting results. There is a similar call site in `src/agent/simulation.py` for channel scanning — apply the same pattern there. Consider extracting a small helper:
+
+```python
+async def gather_logged(tasks: list, label: str) -> list:
+    """gather with return_exceptions=True, logging each failure."""
+    results = await asyncio.gather(*tasks, return_exceptions=True)
+    for i, r in enumerate(results):
+        if isinstance(r, BaseException):
+            logger.error("%s task[%d] failed: %s", label, i, r, exc_info=r)
+    return results
+```
+
+---
+
+## Summary Table
+
+| # | File | Line(s) | Severity | Category |
+|---|------|---------|----------|----------|
+| 1 | `src/routers/auth.py` | 76-79 | High | Security — CSRF bypass |
+| 2 | `src/agent/simulation.py` | 218-222 | Medium | Correctness — premature loop exit |
+| 3 | `src/podcast/pipeline.py` + `main.py` | pipeline write order | Medium | Reliability — broken RSS enclosure |
+| 4 | `src/agent/agent.py` + `src/podcast/state.py` | 428, 441, 22-24 | Medium | Data integrity — non-atomic writes |
+| 5 | `src/agent/simulation.py` | 637 | Low-Medium | Observability — silent task failures |
diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml
index 44dc726..3c0c371 100644
--- a/docker-compose.prod.yml
+++ b/docker-compose.prod.yml
@@ -35,6 +35,7 @@ services:
     volumes:
       - ./profiles:/app/profiles
       - ./prompts:/app/prompts
+      - podcast_data:/app/data
     depends_on:
       postgres:
         condition: service_healthy
@@ -83,7 +84,6 @@ services:
     volumes:
       - ./profiles:/app/profiles
       - ./prompts:/app/prompts
-      - ./data:/app/data
     depends_on:
       postgres:
         condition: service_healthy
@@ -108,7 +108,7 @@ services:
     volumes:
       - ./profiles:/app/profiles
       - ./prompts:/app/prompts
-      - ./data:/app/data
+      - grantbot_data:/app/data
     depends_on:
       postgres:
         condition: service_healthy
@@ -120,6 +120,29 @@ services:
         awslogs-create-group: "true"
         awslogs-region: ${AWS_REGION:-us-east-2}
 
+  podcast:
+    build:
+      context: .
+    restart: unless-stopped
+    command: ["python", "-m", "src.podcast.main", "scheduler", "--run-hour", "9"]
+    env_file: .env
+    environment:
+      DATABASE_URL: postgresql+asyncpg://${POSTGRES_USER:-copi}:${POSTGRES_PASSWORD}@postgres:5432/${POSTGRES_DB:-copi}
+    volumes:
+      - ./profiles:/app/profiles
+      - ./prompts:/app/prompts
+      - podcast_data:/app/data
+    depends_on:
+      postgres:
+        condition: service_healthy
+    logging:
+      driver: awslogs
+      options:
+        awslogs-group: /copi/podcast
+        tag: podcast
+        awslogs-create-group: "true"
+        awslogs-region: ${AWS_REGION:-us-east-2}
+
   nginx:
     image: nginx:1.27-alpine
     restart: unless-stopped
@@ -167,3 +190,5 @@ services:
 
 volumes:
   pgdata:
+  grantbot_data:
+  podcast_data:
diff --git a/docker-compose.yml b/docker-compose.yml
index d686043..71d3fd9 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -25,6 +25,7 @@ services:
       - .:/app
       - ./profiles:/app/profiles
       - ./prompts:/app/prompts
+      - ./data:/app/data
     depends_on:
       postgres:
         condition: service_healthy
@@ -69,5 +70,25 @@ services:
       postgres:
         condition: service_healthy
 
+  podcast:
+    build: .
+    command: python -m src.podcast.main scheduler --run-hour 9
+    env_file: .env
+    environment:
+      # Override LOCAL_TTS_HOST so the container can reach a vLLM-Omni server
+      # running on the host machine (127.0.0.1 does not reach the host from inside Docker).
+      LOCAL_TTS_HOST: host.docker.internal
+    extra_hosts:
+      # Ensures host.docker.internal resolves on Linux (Docker Desktop sets it automatically on Mac/Windows).
+      - "host.docker.internal:host-gateway"
+    volumes:
+      - .:/app
+      - ./profiles:/app/profiles
+      - ./prompts:/app/prompts
+      - ./data:/app/data
+    depends_on:
+      postgres:
+        condition: service_healthy
+
 volumes:
   pgdata:
diff --git a/prompts/podcast-select.md b/prompts/podcast-select.md
new file mode 100644
index 0000000..121af03
--- /dev/null
+++ b/prompts/podcast-select.md
@@ -0,0 +1,46 @@
+You are a literature triage assistant for a scientific researcher. Your job is to identify the single most relevant and impactful recent paper from a list of candidates, based on the researcher's profile.
+
+## Researcher Profile
+
+{profile}
+
+## PI Podcast Preferences
+
+{preferences}
+
+## Task
+
+Below is a numbered list of recent publications (title + abstract). Select the ONE paper whose findings or outputs could most plausibly accelerate or inform a specific aspect of this researcher's ongoing work.
+
+Return your answer as JSON:
+```json
+{"index": <number>, "justification": "<one sentence citing a specific aspect of the researcher's profile>"}
+```
+
+If no paper clears the relevance bar, return:
+```json
+{"index": null, "justification": "No paper is sufficiently relevant to this researcher's current work."}
+```
+
+## Selection Criteria
+
+**INCLUDE** a paper if:
+- Its findings or methods could directly accelerate a specific ongoing project, technique, or open question in the researcher's profile
+- It releases a new tool, dataset, method, or reagent relevant to the researcher's techniques or targets
+- It addresses a disease area, model system, or molecular target the researcher actively works on
+
+**EXCLUDE** a paper if:
+- The connection to the researcher's work is only superficial or generic
+- It is a review article, editorial, or commentary (no new primary data)
+- It is purely clinical or epidemiological with no basic science relevance
+- Recency alone makes it interesting — the connection must be specific and actionable
+
+**NOTE:** Some candidates are preprints (from bioRxiv, medRxiv, or arXiv) and are marked as such in the journal field. Preprints are valid candidates — treat them the same as peer-reviewed papers for selection purposes.
+
+**PREFER** papers that release a concrete output alongside findings (code, dataset, protocol, reagent, model). These tend to be immediately useful.
+
+**FOLLOW PI PREFERENCES:** If the PI Podcast Preferences section above contains specific instructions (e.g., topic focus, exclusions, prioritizations), apply them when selecting. PI preferences override the general criteria above.
+
+## Candidate Papers
+
+{candidates}
diff --git a/prompts/podcast-summarize.md b/prompts/podcast-summarize.md
new file mode 100644
index 0000000..1a96589
--- /dev/null
+++ b/prompts/podcast-summarize.md
@@ -0,0 +1,46 @@
+You are a science communicator writing a personalized research brief for a specific PI. Your goal is to help the PI quickly grasp whether and how a new paper is useful to their lab.
+
+## Researcher Profile
+
+{profile}
+
+## PI Podcast Preferences
+
+{preferences}
+
+## Paper
+
+{paper}
+
+## Task
+
+Write a structured research brief following the exact format below. Be specific, direct, and concise — like a knowledgeable postdoc briefing their PI. No filler phrases, no generic connections.
+
+---
+
+*Today's Research Brief — {date}*
+
+*{paper_title}*
+{authors} · {journal} · {year}
+
+*What they found:*
+[2–3 sentences on core findings. Include specific results, effect sizes, or key observations. Be concrete — name specific proteins, pathways, organisms, or quantitative outcomes where relevant.]
+
+*Key output:*
+[1–2 sentences on the tool, method, dataset, code, protocol, or reagent released with the paper. ONLY include this section if the paper releases a concrete artifact. If there is no distinct output, omit this section entirely — do not write "N/A" or a placeholder.]
+
+*Why this matters for your lab:*
+[2–3 sentences connecting the paper specifically to this PI's work. You MUST name at least one specific technique, model system, molecular target, or open question from the researcher's profile. Do not write generic connections like "this is relevant to your proteomics work" — say exactly what aspect and how.]
+
+*Link:* {paper_url}
+
+---
+
+## Rules
+
+- Total length: approximately 200–280 words
+- Tone: collegial and precise, not promotional
+- The "Why this matters" section is the most important — make it specific to this researcher, not a general statement about the field
+- If the PI Podcast Preferences section contains specific instructions on tone, focus, or framing, follow them
+- If the abstract is all you have, base the brief on the abstract. Do not speculate about full-text content you weren't given.
+- Do not add any text before or after the brief itself
diff --git a/pyproject.toml b/pyproject.toml
index d09fa83..6b780d2 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -25,6 +25,7 @@ dependencies = [
     "boto3>=1.34.0",
     "typer>=0.12.0",
     "rich>=13.7.0",
+    "mutagen>=1.47.0",
 ]
 
 [project.optional-dependencies]
diff --git a/scripts/test_podcast_su.py b/scripts/test_podcast_su.py
new file mode 100644
index 0000000..600c6e6
--- /dev/null
+++ b/scripts/test_podcast_su.py
@@ -0,0 +1,140 @@
+"""One-shot test: run the podcast pipeline for agent 'su' only.
+
+Outputs:
+  .labbot-tests/su-summary-<date>.txt   — generated text summary
+  .labbot-tests/su-audio-<date>.mp3     — TTS audio (if MISTRAL_API_KEY is set)
+
+Usage:
+    DATABASE_URL=postgresql+asyncpg://copi:copi@localhost:5432/copi \
+    python scripts/test_podcast_su.py
+"""
+
+import asyncio
+import logging
+import os
+import shutil
+from datetime import date
+from pathlib import Path
+
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s %(levelname)s %(name)s: %(message)s",
+)
+logger = logging.getLogger(__name__)
+
+OUTPUT_DIR = Path(".labbot-tests")
+AUDIO_DIR = Path("data/podcast_audio")
+
+
+async def run():
+    from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine
+    from sqlalchemy.orm import sessionmaker
+
+    from src.config import get_settings
+    from src.podcast.pipeline import (
+        _generate_summary,
+        _load_podcast_preferences,
+        _load_public_profile,
+        _parse_profile_markdown,
+        _select_article,
+        _try_fetch_full_text,
+    )
+    from src.podcast.tts_utils import get_audio_duration_seconds
+    from src.podcast.pubmed_search import build_queries, fetch_candidates
+    from src.podcast.state import get_delivered_pmids, record_delivery
+
+    settings = get_settings()
+    agent_id = "su"
+    today = date.today()
+    OUTPUT_DIR.mkdir(exist_ok=True)
+
+    logger.info("=== LabBot Podcast test run for agent: %s ===", agent_id)
+
+    # 1. Load profiles
+    profile_text = _load_public_profile(agent_id)
+    if not profile_text:
+        logger.error("No public profile found for agent: %s", agent_id)
+        return
+    logger.info("Loaded profile (%d chars)", len(profile_text))
+
+    preferences_text = await _load_podcast_preferences(agent_id)
+    if preferences_text:
+        logger.info("Loaded podcast preferences (%d chars)", len(preferences_text))
+    else:
+        logger.info("No podcast preferences found for agent: %s", agent_id)
+
+    # 2. Build queries and fetch candidates
+    profile_dict = _parse_profile_markdown(profile_text)
+    queries = build_queries(profile_dict)
+    logger.info("Search queries: %s", queries)
+
+    already_delivered = get_delivered_pmids(agent_id)
+    logger.info("Already delivered PMIDs: %s", already_delivered)
+
+    candidates = await fetch_candidates(
+        queries,
+        already_delivered=already_delivered,
+        days=settings.podcast_search_window_days,
+        max_total=settings.podcast_max_candidates,
+    )
+    logger.info("Fetched %d candidates", len(candidates))
+    if not candidates:
+        logger.error("No candidate articles found — aborting")
+        return
+
+    # 3. LLM article selection
+    selected, justification = await _select_article(profile_text, candidates, agent_id, preferences_text)
+    if selected is None:
+        logger.error("No article selected — aborting")
+        return
+    pmid = selected.get("pmid", "")
+    logger.info("Selected PMID: %s", pmid)
+    logger.info("Justification: %s", justification)
+
+    # 4. Fetch full text
+    full_text = await _try_fetch_full_text(pmid)
+    logger.info("Full text fetched: %s", bool(full_text))
+
+    # 5. Generate text summary
+    summary = await _generate_summary(profile_text, selected, full_text, agent_id, preferences_text)
+    if not summary:
+        logger.error("Summary generation failed — aborting")
+        return
+
+    summary_path = OUTPUT_DIR / f"su-summary-{today.isoformat()}.txt"
+    summary_path.write_text(summary, encoding="utf-8")
+    logger.info("Summary written to %s", summary_path)
+    print("\n" + "=" * 60)
+    print("TEXT SUMMARY")
+    print("=" * 60)
+    print(summary)
+    print("=" * 60 + "\n")
+
+    # 6. Generate audio — dispatch to backend configured by PODCAST_TTS_BACKEND
+    if settings.podcast_tts_backend == "local":
+        from src.podcast.local_tts import generate_audio
+        logger.info("TTS backend: local vLLM-Omni (%s:%s)", settings.local_tts_host, settings.local_tts_port)
+    else:
+        from src.podcast.mistral_tts import generate_audio
+        logger.info("TTS backend: Mistral AI (%s)", settings.mistral_tts_model)
+
+    audio_src = AUDIO_DIR / agent_id / f"{today.isoformat()}.mp3"
+    audio_ok = await generate_audio(summary, agent_id, audio_src)
+
+    if audio_ok:
+        audio_dest = OUTPUT_DIR / f"su-audio-{today.isoformat()}.mp3"
+        shutil.copy2(audio_src, audio_dest)
+        duration = get_audio_duration_seconds(audio_src)
+        logger.info("Audio saved to %s (duration: %ss)", audio_dest, duration)
+    else:
+        logger.warning("Audio generation failed (backend: %s)", settings.podcast_tts_backend)
+
+    logger.info("=== Test run complete ===")
+    logger.info("  PMID: %s", pmid)
+    logger.info("  Summary: %s", summary_path)
+    if audio_ok:
+        logger.info("  Audio: %s", audio_dest)
+
+
+if __name__ == "__main__":
+    asyncio.run(run())
diff --git a/specs/labbot-podcast.md b/specs/labbot-podcast.md
new file mode 100644
index 0000000..6ad3bc1
--- /dev/null
+++ b/specs/labbot-podcast.md
@@ -0,0 +1,616 @@
+# LabBot Podcast Specification
+
+## Overview
+
+LabBot Podcast is a daily personalized research briefing service for researchers. It surfaces the single most relevant and impactful recent publication from the scientific literature based on the researcher's profile, generates a structured text summary highlighting findings and tools useful to their ongoing work, and produces a short audio episode via Mistral AI TTS. Researchers can subscribe to a personal RSS podcast feed to listen to the audio.
+
+The system runs once per day and requires no researcher interaction to be useful — but researchers can tune it through a web UI. There are two delivery paths:
+
+- **Agent path** — pilot-lab PIs with an approved `AgentRegistry` entry additionally receive the text summary as a Slack DM from their lab bot.
+- **User path** — any researcher who has completed ORCID onboarding and has a `ResearcherProfile` with a research summary receives the podcast automatically. No Slack bot, agent approval, or admin action required.
+
+---
+
+## Architecture
+
+### Service Placement
+
+LabBot Podcast runs as a separate Docker container (`podcast` service), mirroring the GrantBot pattern:
+- Long-running scheduler process
+- Executes once per calendar day at 9am UTC (1 hour after GrantBot)
+- If the container was down at the scheduled time, runs immediately on startup (catch-up)
+- State persisted in `data/podcast_state.json` (tracks which articles have been delivered per agent)
+
+### Delivery Paths
+
+| Path | Who | Profile source | Delivery | Audio/RSS key |
+|---|---|---|---|---|
+| **Agent** | Pilot-lab PIs with active `AgentRegistry` | `profiles/public/{agent_id}.md` (disk) | Slack DM + RSS | `agent_id` string |
+| **User** | Any ORCID user with completed `ResearcherProfile` | `ResearcherProfile` DB row (structured fields) | RSS only | `user_id` UUID |
+
+Both paths run in the same daily scheduler pass. A user who has both a `ResearcherProfile` and an active agent is handled only by the agent path (no duplicate episode).
+
+### Dependencies on Existing Systems
+
+| Existing component | How Podcast uses it |
+|---|---|
+| `ResearcherProfile` DB model | Source of research areas, keywords, techniques, disease areas for the user path |
+| `profiles/public/{lab}.md` | Profile text for the agent path (LLM article selection and summary) |
+| `src/services/pubmed.py` | Literature search (keyword + MeSH queries) |
+| `src/services/llm.py` | Article selection ranking and summary generation (all calls logged to `LlmCallLog`) |
+| `AgentRegistry` | Maps agent → PI → Slack bot token for DM delivery (agent path only) |
+| `User.id` (UUID) | Stable, opaque RSS feed token for the user path |
+| Slack bot DM | Text summary delivery (agent path only) |
+
+### New External Dependency
+
+**Mistral AI API** — text-to-speech generation.
+- Configured via `MISTRAL_API_KEY` environment variable
+- Voice selection per agent configured in `data/podcast_voices.json` (agent_id → voice_id); falls back to a default voice if not set
+- Audio files stored at `data/podcast_audio/{agent_id}/{YYYY-MM-DD}.mp3`
+
+---
+
+## Daily Pipeline
+
+Each day the scheduler runs two loops in sequence:
+
+1. **Agent loop** — iterates over all active `AgentRegistry` entries and calls `run_pipeline_for_agent()` for each.
+2. **User loop** — iterates over all `User` rows where `onboarding_complete=True` and `profile.research_summary IS NOT NULL`, skipping any whose `user_id` appeared in the agent loop, and calls `run_podcast_for_user()` for each.
+
+For each recipient, the pipeline executes the following steps sequentially:
+
+### Step 1: Load Profile
+
+- **Agent path**: read `profiles/public/{agent_id}.md` from disk. If absent, skip.
+- **User path**: construct profile text from structured `ResearcherProfile` DB fields (`research_summary`, `disease_areas`, `techniques`, `experimental_models`, `keywords`). If `research_summary` is empty, skip.
+
+### Step 2: Build Search Queries
+
+Construct PubMed search terms from the profile:
+- Extract top research area keywords
+- Extract technique and experimental model terms
+- Combine into 2–3 PubMed query strings (e.g., `(proteostasis OR unfolded protein response) AND (neurodegeneration OR proteomics)`)
+- Inject any `extra_keywords` from `PodcastPreferences` as additional quoted terms
+- Limit to publications from the last 14 days (rolling window ensures coverage across weekend/holiday gaps)
+- Cap at 50 candidate abstracts
+
+### Step 3: Fetch Candidate Abstracts
+
+Use `src/services/pubmed.py` to execute each query and retrieve PMIDs + abstracts. Deduplicate across queries. Skip any PMID already in `podcast_state.json` for this recipient (agent or user) to prevent re-delivering the same article.
+
+### Step 4: LLM Article Selection (Sonnet)
+
+Single LLM call (Sonnet) with:
+- The researcher's full profile text (disk for agent path; constructed from DB for user path)
+- The list of candidate abstracts (title + abstract text, numbered)
+- Any journal preferences from `PodcastPreferences`
+- Prompt: `prompts/podcast-select.md`
+
+The LLM returns the index of the single best article, along with a one-sentence justification of why it is relevant to this researcher's ongoing work. If no article meets a minimum relevance threshold, it returns `null` and the pipeline skips delivery today.
+
+### Step 5: Generate Text Summary (Opus)
+
+One LLM call (Opus) with:
+- The researcher's full profile text
+- The selected article's title, abstract, and full text (fetched via `retrieve_full_text` if available in PMC, otherwise abstract only)
+- Prompt: `prompts/podcast-summarize.md`
+
+Output is a structured text summary (see format below). This is used as the TTS input and stored in `PodcastEpisode.text_summary`.
+
+### Step 6: Generate Audio (Mistral AI)
+
+Pass the text summary to the Mistral AI TTS API:
+- Voice: from `PodcastPreferences.voice_id`, or `MISTRAL_TTS_DEFAULT_VOICE`
+- Model: configurable via `MISTRAL_TTS_MODEL`
+- Output: MP3 file saved to:
+  - Agent path: `data/podcast_audio/{agent_id}/{YYYY-MM-DD}.mp3`
+  - User path: `data/podcast_audio/users/{user_id}/{YYYY-MM-DD}.mp3`
+- If TTS fails, the episode DB row is **not** written (see commit-last ordering); the run returns `False`.
+
+### Step 7: Deliver via Slack DM _(agent path only)_
+
+Send the text summary as a DM from the agent's Slack bot to its PI, appending the RSS feed URL. User-path episodes are delivered via RSS only — no Slack bot is required.
+
+### Step 8: Persist Episode and Update State
+
+1. Write the `PodcastEpisode` row to the DB:
+   - Agent path: `agent_id` set, `user_id` NULL
+   - User path: `user_id` set, `agent_id` NULL
+2. Append the delivered PMID to `data/podcast_state.json` (keyed by `agent_id` or `user_id`) to prevent re-delivery.
+
+---
+
+## Text Summary Format
+
+The Opus-generated summary follows a consistent structure. The prompt enforces this layout:
+
+```
+*Today's Research Brief — {Date}*
+
+*{Paper Title}*
+{Authors} · {Journal} · {Year}
+
+*What they found:*
+2–3 sentences on the core findings — specific results, effect sizes, or observations.
+
+*Key output:*
+1–2 sentences on any tool, method, dataset, or reagent released with the paper (if applicable). Omit this section if the paper has no distinct output.
+
+*Why this matters for your lab:*
+2–3 sentences connecting the paper's findings and outputs specifically to the PI's ongoing research areas, techniques, or open questions. Ground this in the PI's profile — name specific techniques, model systems, or questions from their work.
+
+*PubMed:* https://pubmed.ncbi.nlm.nih.gov/{PMID}/
+```
+
+The Slack DM appends a line at the bottom:
+> _Listen to the audio version: {rss_feed_url}_
+
+---
+
+## RSS Podcast Feed
+
+### Endpoints
+
+| Path | Auth | Key |
+|---|---|---|
+| `GET /podcast/{agent_id}/feed.xml` | None | Pilot-lab agent |
+| `GET /podcast/{agent_id}/audio/{date}.mp3` | None | Pilot-lab agent |
+| `GET /podcast/users/{user_id}/feed.xml` | None | Plain ORCID user |
+| `GET /podcast/users/{user_id}/audio/{date}.mp3` | None | Plain ORCID user |
+
+All four endpoints are public and unauthenticated. The `user_id` UUID is opaque and acts as a stable, subscribable feed token — equivalent to a private podcast URL. Users retrieve their feed URL from the `/podcast/settings` page.
+
+### Feed Structure
+
+Standard RSS 2.0 with iTunes podcast extensions (identical structure for both paths):
+
+```xml
+<rss version="2.0" xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd">
+  <channel>
+    <title>{Name} — LabBot Research Briefings</title>
+    <description>Daily personalized research summaries for {Name}.</description>
+    <link>{feed_url}</link>
+    <itunes:author>{Name}</itunes:author>
+    <itunes:category text="Science"/>
+    <item>
+      <title>{Paper Title} — {Date}</title>
+      <description>{text summary}</description>
+      <enclosure url="{audio_url}" type="audio/mpeg" length="{file_size}"/>
+      <pubDate>{RFC 822 date}</pubDate>
+      <guid>{agent_id|user-{user_id}}-{YYYY-MM-DD}</guid>
+      <itunes:duration>{duration}</itunes:duration>
+    </item>
+    ...
+  </channel>
+</rss>
+```
+
+### Audio File Storage
+
+| Path | Audio directory |
+|---|---|
+| Agent path | `data/podcast_audio/{agent_id}/{YYYY-MM-DD}.mp3` |
+| User path | `data/podcast_audio/users/{user_id}/{YYYY-MM-DD}.mp3` |
+
+Files are streamed with `Content-Type: audio/mpeg`.
+
+---
+
+## LLM Prompt Files
+
+Two new prompt files in `prompts/`:
+
+### `prompts/podcast-select.md`
+
+Instructs the LLM to act as a literature triage assistant for a specific PI. It receives:
+- The PI's public profile (research areas, techniques, open questions, unique capabilities)
+- Numbered list of candidate abstracts (title + abstract)
+
+It must return:
+- The number of the most relevant article, or `null` if none clears the relevance bar
+- A one-sentence justification referencing a specific aspect of the PI's profile
+
+Key instructions in the prompt:
+- Relevance is defined as: the paper's findings or outputs could plausibly accelerate or inform a specific aspect of the PI's ongoing work
+- Recency alone is not sufficient — the connection must be specific
+- Prefer papers that release a tool, method, dataset, or reagent alongside findings
+- Do not pick review articles or editorials
+
+### `prompts/podcast-summarize.md`
+
+Instructs the LLM to act as a science communicator writing for a specific PI. It receives:
+- The PI's public profile
+- Full paper text (or abstract if full text unavailable)
+
+It must produce the structured summary described above. Key instructions:
+- The "Why this matters for your lab" section must name specific techniques, model systems, or open questions from the PI's profile — no generic connections
+- Tone is like a knowledgeable postdoc briefing their PI: specific, direct, no filler
+- The "Key output" section is only included if the paper releases a concrete artifact (tool, code, dataset, method, reagent); skip it otherwise
+- Target length: ~250 words total
+
+---
+
+## Data Model
+
+### `PodcastEpisode`
+
+Rows are keyed by either `agent_id` (string) or `user_id` (UUID FK to `users.id`). Exactly one should be set per row.
+
+```python
+class PodcastEpisode(Base):
+    __tablename__ = "podcast_episodes"
+
+    id: Mapped[uuid.UUID] = mapped_column(primary_key=True, default=uuid.uuid4)
+    agent_id: Mapped[str | None] = mapped_column(String(50), nullable=True, index=True)
+    user_id: Mapped[uuid.UUID | None] = mapped_column(UUID, ForeignKey("users.id"), nullable=True, index=True)
+    episode_date: Mapped[date] = mapped_column(Date, nullable=False)
+    pmid: Mapped[str] = mapped_column(String(100), nullable=False)
+    paper_title: Mapped[str] = mapped_column(String(500), nullable=False)
+    paper_authors: Mapped[str] = mapped_column(String(500), nullable=False)
+    paper_journal: Mapped[str] = mapped_column(String(255), nullable=False)
+    paper_year: Mapped[int] = mapped_column(Integer, nullable=False)
+    paper_url: Mapped[str | None] = mapped_column(String(1000), nullable=True)
+    text_summary: Mapped[str] = mapped_column(Text, nullable=False)
+    audio_file_path: Mapped[str | None] = mapped_column(String(500), nullable=True)
+    audio_duration_seconds: Mapped[int | None] = mapped_column(Integer, nullable=True)
+    slack_delivered: Mapped[bool] = mapped_column(Boolean, default=False)
+    selection_justification: Mapped[str] = mapped_column(Text, nullable=False)
+    created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now())
+
+    __table_args__ = (
+        # Agent-path: one episode per agent per day
+        UniqueConstraint("agent_id", "episode_date", name="uq_podcast_agent_date"),
+        # User-path: enforced by partial unique index (migration 0013):
+        # CREATE UNIQUE INDEX ix_podcast_episodes_user_date
+        #   ON podcast_episodes (user_id, episode_date) WHERE user_id IS NOT NULL
+    )
+```
+
+### `PodcastPreferences`
+
+Rows are keyed by either `agent_id` or `user_id`. Both columns are nullable and uniquely indexed.
+
+```python
+class PodcastPreferences(Base):
+    __tablename__ = "podcast_preferences"
+
+    id: Mapped[uuid.UUID] = mapped_column(primary_key=True, default=uuid.uuid4)
+    agent_id: Mapped[str | None] = mapped_column(String(50), nullable=True, unique=True, index=True)
+    user_id: Mapped[uuid.UUID | None] = mapped_column(UUID, ForeignKey("users.id"), nullable=True, unique=True, index=True)
+    voice_id: Mapped[str | None] = mapped_column(String(100), nullable=True)
+    extra_keywords: Mapped[list[str]] = mapped_column(ARRAY(String), server_default="{}")
+    preferred_journals: Mapped[list[str]] = mapped_column(ARRAY(String), server_default="{}")
+    deprioritized_journals: Mapped[list[str]] = mapped_column(ARRAY(String), server_default="{}")
+    updated_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), server_default=func.now(), onupdate=func.now())
+```
+
+### State File (`data/podcast_state.json`)
+
+Keyed separately for agents and users:
+
+```json
+{
+  "agents": {
+    "<agent_id>": { "delivered_pmids": ["12345", "67890"] }
+  },
+  "users": {
+    "<user_id UUID string>": { "delivered_pmids": ["11111"] }
+  },
+  "last_run_date": "2026-04-14"
+}
+```
+
+The state file is a lightweight deduplication cache. The DB is the authoritative record for RSS generation and admin visibility.
+
+### Alembic Migrations
+
+| Migration | Creates / alters |
+|---|---|
+| `0010_add_podcast_episodes.py` | `podcast_episodes` table (agent path) |
+| `0011_add_podcast_paper_url.py` | `paper_url` column |
+| `0012_add_podcast_preferences.py` | `podcast_preferences` table (agent path) |
+| `0013_podcast_user_support.py` | `user_id` FK on both tables; make `agent_id` nullable; partial unique index for user-path episodes |
+
+---
+
+## Configuration
+
+New environment variables:
+
+| Variable | Required | Description |
+|---|---|---|
+| `MISTRAL_API_KEY` | Yes (for audio) | Mistral AI API key |
+| `MISTRAL_TTS_MODEL` | No | TTS model ID (default: `mistral-tts-latest`) |
+| `MISTRAL_TTS_DEFAULT_VOICE` | No | Default voice when no per-agent override exists |
+| `PODCAST_BASE_URL` | Yes | Public base URL for RSS enclosure links (e.g., `https://copi.science`) |
+| `PODCAST_SEARCH_WINDOW_DAYS` | No | Rolling search window in days (default: `14`) |
+| `PODCAST_MAX_CANDIDATES` | No | Max PubMed abstracts per agent per day (default: `50`) |
+
+Per-agent voice overrides (Phase 2/3): `data/podcast_voices.json`
+```json
+{
+  "su": "alex",
+  "wiseman": "stella"
+}
+```
+**Deprecated in Phase 4** — voice preferences move to the `podcast_preferences` DB table. The JSON file is still read as a fallback while the migration is in progress.
+
+---
+
+## Docker Service
+
+Add `podcast` service to `docker-compose.yml` and `docker-compose.prod.yml`:
+
+```yaml
+podcast:
+  build: .
+  command: python -m src.podcast.main
+  env_file: .env
+  volumes:
+    - ./data:/app/data
+  depends_on:
+    - postgres
+  profiles:
+    - podcast
+```
+
+Run with: `docker compose --profile podcast up -d podcast`
+
+---
+
+## Module Structure
+
+```
+src/podcast/
+├── main.py          # Scheduler entry point (APScheduler, same pattern as grantbot.py)
+├── pipeline.py      # Per-agent pipeline (steps 1–8 above)
+├── pubmed_search.py # Query builder from ResearcherProfile
+├── mistral_tts.py   # Mistral AI TTS client wrapper
+├── rss.py           # RSS feed builder (reads from DB)
+└── state.py         # podcast_state.json read/write helpers
+
+src/routers/podcast.py   # FastAPI routes: /podcast/{agent_id}/feed.xml, /podcast/{agent_id}/audio/{date}.mp3
+```
+
+The scheduler in `src/podcast/main.py` follows the same catch-up-on-startup pattern as `src/agent/grantbot.py`:
+1. On startup, check `data/podcast_state.json` for last run timestamp
+2. If last run was before today's 9am UTC, run immediately
+3. Schedule next run at 9am UTC
+
+---
+
+## Admin Dashboard Integration
+
+Add a **Podcast** tab to the existing admin dashboard (`src/routers/admin.py` + `templates/admin.html`) showing:
+- Table of recent episodes: agent, date, paper title, PMID, Slack delivered (yes/no), audio generated (yes/no)
+- Link to each agent's RSS feed
+- LLM call counts and token usage for the podcast pipeline (pulled from `LlmCallLog` filtered by `source = "podcast"`)
+
+The LLM calls from the podcast pipeline should set a `source` tag in `LlmCallLog` (add a `source` column via migration if not already present, or use the existing `extra_metadata` JSONB field).
+
+---
+
+## PI Customization
+
+### Via Standing Instructions (Current)
+
+PIs can adjust podcast behavior through standing instructions to their lab bot (same DM mechanism as the agent system — see `pi-interaction.md`). The podcast pipeline reads the private profile when building the selection prompt.
+
+Examples of effective standing instructions:
+- "For my daily podcast, focus only on papers that release a new tool or dataset — I don't need summaries of pure wet-lab findings"
+- "Prioritize papers from computational biology journals for the podcast"
+- "Skip anything about C. elegans — we're not pursuing that direction anymore"
+
+The bot's private profile rewrite (via `prompts/pi-profile-rewrite.md`) should include a `## Podcast Preferences` section that the podcast pipeline reads when constructing the selection and summarization prompts.
+
+### Via Preferences UI (Phase 4)
+
+A structured preferences page at `/agent/{agent_id}/podcast-settings` replaces the `data/podcast_voices.json` file and augments the standing-instructions mechanism with three explicit controls:
+
+1. **Voice** — select the TTS voice used for audio generation
+2. **Extra search keywords** — additional terms appended to PubMed/preprint queries beyond the auto-extracted profile keywords
+3. **Source preferences** — journals or preprint servers to prioritize (boosted in the selection prompt) or deprioritize
+
+See the **Podcast Preferences UI** section below for the full design.
+
+---
+
+## Podcast Preferences UI
+
+### Route and Access Control
+
+| Route | Method | Handler | Access | Notes |
+|---|---|---|---|---|
+| `/agent/{agent_id}/podcast-settings` | `GET` | Render agent preferences form | Agent owner or admin | Agent path |
+| `/agent/{agent_id}/podcast-settings` | `POST` | Save agent preferences | Agent owner or admin | Agent path |
+| `/podcast/settings` | `GET` | Render user preferences form | Any authenticated user with completed profile | User path |
+| `/podcast/settings` | `POST` | Save user preferences | Any authenticated user with completed profile | User path |
+| `/podcast/user/generate` | `POST` | Trigger on-demand episode | Any authenticated user with completed profile | User path |
+
+The agent-path routes remain in `src/routers/agent_page.py` with the same `get_agent_with_access()` ownership check. The user-path routes live in `src/routers/podcast.py` and use `get_current_user()` + a profile-completeness check (`onboarding_complete=True` and `profile.research_summary IS NOT NULL`).
+
+### User Feed URL
+
+After saving preferences or visiting `/podcast/settings`, the user sees their personal feed URL:
+
+```
+{PODCAST_BASE_URL}/podcast/users/{user.id}/feed.xml
+```
+
+This URL:
+- Requires no authentication to read (subscribe in any podcast app)
+- Is stable for the lifetime of the user account
+- Acts as an opaque token — not guessable, not secret, but not publicly listed
+- Is displayed with a one-click copy button on the settings page
+
+### Form Fields
+
+#### 1. Voice Selection
+
+A `<select>` dropdown pre-populated with valid Mistral Voxtral voices. The current TTS model is `voxtral-mini-tts-latest`.
+
+Available voices for `voxtral-mini-tts-latest` (verify current list at [Mistral docs](https://docs.mistral.ai/capabilities/audio/#text-to-speech)):
+
+| Voice ID | Description |
+|---|---|
+| `alex` | US English, male, neutral |
+| `deedee` | US English, female, bright |
+| `jasmine` | US English, female, warm |
+| `laurel` | US English, female, clear |
+| `luna` | US English, female, soft |
+| `rio` | US English, male, energetic |
+| `stella` | US English, female, professional |
+| `theo` | US English, male, measured |
+| `tyler` | US English, male, conversational |
+
+> **Note:** This list should be refreshed from the Mistral API at deploy time. If Mistral exposes a `GET /v1/audio/voices` endpoint, the admin UI should call it to populate the dropdown dynamically. If not available, hardcode from the table above and update as the API evolves.
+
+The form shows a short audio preview label next to each voice name if available. The current agent's voice is pre-selected; if no voice is set, the first voice in the list is shown as the default.
+
+#### 2. Extra Search Keywords
+
+A plain `<textarea>` accepting one keyword or phrase per line. These are appended as additional quoted terms to the PubMed/preprint query in Step 1 of the pipeline.
+
+```
+insulin receptor substrate
+adipose tissue browning
+mitochondrial fission
+```
+
+Stored as `extra_keywords: list[str]` (each non-blank line becomes one entry). Max 20 entries, each up to 100 characters.
+
+#### 3. Source Preferences
+
+Two separate tag-input fields (or textareas with comma-separation):
+
+**Preferred sources** — journals or preprint servers to actively surface. Shown first in the selection-prompt candidate list and referenced explicitly in the prompt:
+> "Prefer papers from: {preferred_journals}. Give these extra weight when relevance is comparable."
+
+**Deprioritized sources** — journals or preprint servers to down-rank. Added as a negative signal in the selection prompt:
+> "Deprioritize papers from: {deprioritized_journals} unless exceptionally relevant."
+
+Examples:
+- Preferred: `Nature Methods`, `Cell Systems`, `bioRxiv`, `eLife`
+- Deprioritized: `Frontiers in ...`, `PLOS ONE`
+
+Stored as `preferred_journals: list[str]` and `deprioritized_journals: list[str]`.
+
+### Template
+
+`templates/agent/podcast_settings.html` — extends `base.html`, matches the visual style of `templates/agent/profile_edit.html`.
+
+Sections:
+1. **Voice** — `<select>` with voice options
+2. **Extra Keywords** — `<textarea>` with instructions
+3. **Source Preferences** — two `<textarea>` fields (preferred / deprioritized), comma or newline separated
+4. **Save button** — POSTs to the same URL, redirects back on success with a flash message
+
+### Pipeline Integration
+
+In `run_pipeline_for_agent()` (`src/podcast/pipeline.py`), after loading profile and preferences text:
+
+```python
+# Load structured preferences from DB
+prefs = await _load_podcast_preferences_structured(agent_id)  # returns PodcastPreferences | None
+
+# Step 2 (query building): inject extra_keywords
+if prefs and prefs.extra_keywords:
+    queries.extend(
+        f'"{kw}"' for kw in prefs.extra_keywords[:20]
+    )
+
+# Step 3 (article selection): inject journal preferences into selection prompt
+journal_context = ""
+if prefs and prefs.preferred_journals:
+    journal_context += f"\nPreferred sources: {', '.join(prefs.preferred_journals)}."
+if prefs and prefs.deprioritized_journals:
+    journal_context += f"\nDeprioritized sources: {', '.join(prefs.deprioritized_journals)}."
+# journal_context is appended to the {preferences} block in the selection prompt
+
+# Step 5 (TTS): use voice from preferences
+voice_override = prefs.voice_id if prefs else None
+# mistral_tts.get_voice() checks PodcastPreferences first, then podcast_voices.json, then env default
+```
+
+Add `_load_podcast_preferences_structured(agent_id)` as an async helper that queries `PodcastPreferences` and returns the ORM row or `None`.
+
+Update `mistral_tts.get_voice()` and `local_tts.get_voice()` to accept an optional `voice_override` parameter passed from the pipeline instead of reading from `podcast_voices.json` directly.
+
+### Admin Visibility
+
+The existing `/admin/podcast` page gets a **Preferences** column in the agent filter section: when an agent is selected, show a summary of its preferences (voice, keyword count, journal counts) with a link to the preferences page.
+
+---
+
+## Module Structure
+
+```
+src/podcast/
+├── main.py            # Scheduler: agent loop then user loop
+├── pipeline.py        # run_pipeline_for_agent() + run_podcast_for_user()
+├── pubmed_search.py   # Query builder from profile dict
+├── preprint_search.py # bioRxiv / medRxiv / arXiv search
+├── mistral_tts.py     # Mistral AI TTS client
+├── local_tts.py       # Local vLLM-Omni TTS client (optional)
+├── tts_utils.py       # ffmpeg loudnorm, duration extraction
+├── rss.py             # RSS feed builder (agent_id or user_id keyed)
+└── state.py           # podcast_state.json helpers (agent + user variants)
+
+src/routers/podcast.py     # All podcast HTTP endpoints
+templates/
+├── agent/podcast_settings.html   # Agent-path preferences UI
+└── podcast_settings.html          # User-path preferences UI (+ feed URL card)
+```
+
+---
+
+## Rollout Phases
+
+### Phase 1: Text-only delivery _(complete)_
+- PubMed search, LLM selection, Opus summarization
+- Slack DM delivery
+- `PodcastEpisode` DB table and admin visibility
+- No audio, no RSS
+
+### Phase 2: Audio + RSS _(complete)_
+- Mistral AI TTS integration
+- Audio file storage and streaming endpoint
+- RSS feed generation and `/podcast/{agent_id}/feed.xml` endpoint
+- Per-agent voice configuration
+
+### Phase 3: PI customization surface _(complete)_
+- Podcast preferences section in private profile
+- Pipeline reads preferences when building prompts
+- Admin dashboard podcast tab with LLM usage metrics
+
+### Phase 4: Structured Preferences UI _(complete)_
+- `PodcastPreferences` DB table (migration `0012`)
+- `GET/POST /agent/{agent_id}/podcast-settings` route and form
+- Voice picker, extra keywords, source preferences
+- Deprecate `data/podcast_voices.json` in favour of DB-stored voice preference
+
+### Phase 5: Open Access for Plain ORCID Users _(implemented in migration 0013)_
+- **Goal**: any researcher who signs in with ORCID and completes their profile receives daily podcast briefings automatically — no agent approval, no Slack bot required.
+- **Schema**: migration `0013` adds `user_id` FK to `podcast_preferences` and `podcast_episodes`; makes `agent_id` nullable in both tables; adds partial unique index for user-path episodes.
+- **Pipeline**: `run_podcast_for_user(user_id, db_session)` in `src/podcast/pipeline.py` — loads profile from `ResearcherProfile` DB row (no disk file), queries PubMed/preprints, selects article, generates audio, and persists a `PodcastEpisode` keyed by `user_id`.
+- **Scheduler**: `src/podcast/main.py` runs the user loop after the agent loop; users whose `user_id` appears in an active `AgentRegistry` row are skipped (covered by agent path).
+- **Endpoints** (all in `src/routers/podcast.py`):
+  - `GET /podcast/users/{user_id}/feed.xml` — public RSS feed
+  - `GET /podcast/users/{user_id}/audio/{date}.mp3` — audio streaming
+  - `GET /podcast/settings` — preferences UI (auth-gated)
+  - `POST /podcast/settings` — save preferences (auth-gated)
+  - `POST /podcast/user/generate` — on-demand episode trigger (auth-gated)
+- **State**: `data/podcast_state.json` gains a `"users"` section keyed by user_id UUID strings.
+- **Eligibility gate**: `user.onboarding_complete == True` and `profile.research_summary IS NOT NULL`. Users who have not yet built their profile are silently skipped.
+
+---
+
+## Out of Scope
+
+- Real-time or on-demand article requests from non-authenticated callers
+- Multi-article episodes (one article per day, selected by the LLM as the single most relevant)
+- Full-text audio of the paper itself (summary only)
+- Publicly listed or shared RSS feeds (each feed URL is personal and opaque)
+- Push notifications or mobile app integration
+- Email delivery of the text summary (RSS + audio only for the user path)
diff --git a/src/agent/grantbot.py b/src/agent/grantbot.py
index 2672a51..5af41ed 100644
--- a/src/agent/grantbot.py
+++ b/src/agent/grantbot.py
@@ -402,9 +402,10 @@ async def run_grantbot(
 
     if not dry_run:
         from slack_sdk import WebClient
-        bot_token = getattr(settings, "slack_bot_token_grantbot", "")
+        slack_tokens = settings.get_slack_tokens()
+        bot_token = slack_tokens.get("grantbot", {}).get("bot", "")
         if not bot_token or bot_token.startswith("xoxb-placeholder"):
-            bot_token = settings.slack_bot_token_su
+            bot_token = slack_tokens.get("su", {}).get("bot", "")
             logger.info("No grantbot Slack token — using SuBot's token as fallback")
         if bot_token and not bot_token.startswith("xoxb-placeholder"):
             slack_client = WebClient(token=bot_token)
diff --git a/src/config.py b/src/config.py
index e6c3b8d..b6f8e1c 100644
--- a/src/config.py
+++ b/src/config.py
@@ -38,39 +38,10 @@ class Settings(BaseSettings):
     notification_check_interval: int = 300  # seconds (5 minutes)
     inbound_poll_interval: int = 60  # seconds
 
-    # Slack tokens — one pair per agent
-    slack_bot_token_su: str = ""
-    slack_app_token_su: str = ""
-    slack_bot_token_wiseman: str = ""
-    slack_app_token_wiseman: str = ""
-    slack_bot_token_lotz: str = ""
-    slack_app_token_lotz: str = ""
-    slack_bot_token_cravatt: str = ""
-    slack_app_token_cravatt: str = ""
-    slack_bot_token_grotjahn: str = ""
-    slack_app_token_grotjahn: str = ""
-    slack_bot_token_petrascheck: str = ""
-    slack_app_token_petrascheck: str = ""
-    slack_bot_token_ken: str = ""
-    slack_app_token_ken: str = ""
-    slack_bot_token_racki: str = ""
-    slack_app_token_racki: str = ""
-    slack_bot_token_saez: str = ""
-    slack_app_token_saez: str = ""
-    slack_bot_token_wu: str = ""
-    slack_app_token_wu: str = ""
-    slack_bot_token_ward: str = ""
-    slack_app_token_ward: str = ""
-    slack_bot_token_briney: str = ""
-    slack_app_token_briney: str = ""
-    slack_bot_token_forli: str = ""
-    slack_app_token_forli: str = ""
-    slack_bot_token_deniz: str = ""
-    slack_app_token_deniz: str = ""
-    slack_bot_token_lairson: str = ""
-    slack_app_token_lairson: str = ""
-    slack_bot_token_grantbot: str = ""
-    slack_app_token_grantbot: str = ""
+    # Slack tokens are loaded dynamically from the environment — see get_slack_tokens().
+    # Add any number of agents to .env using the pattern:
+    #   SLACK_BOT_TOKEN_<AGENT_ID>=xoxb-...
+    #   SLACK_APP_TOKEN_<AGENT_ID>=xapp-...  (optional)
 
     # LLM models
     llm_profile_model: str = "claude-opus-4-6"
@@ -78,6 +49,31 @@ class Settings(BaseSettings):
     llm_agent_model_opus: str = "claude-opus-4-6"
     llm_agent_model_sonnet: str = "claude-sonnet-4-6"
 
+    # Mistral AI (podcast TTS)
+    mistral_api_key: str = ""
+    mistral_tts_model: str = "voxtral-mini-tts-latest"
+    mistral_tts_default_voice: str = ""
+
+    # OpenAI TTS
+    openai_api_key: str = ""
+    openai_tts_model: str = "tts-1"
+    openai_tts_default_voice: str = "alloy"
+
+    # Podcast TTS backend: "mistral" (default), "openai", or "local" (vLLM-Omni)
+    podcast_tts_backend: str = "mistral"
+
+    # Local vLLM-Omni TTS server
+    local_tts_host: str = "127.0.0.1"
+    local_tts_port: int = 8010
+    local_tts_model: str = "Qwen/Qwen2-Audio-7B-Instruct"
+    local_tts_voice: str = "default"
+
+    # Podcast
+    podcast_base_url: str = ""  # e.g. https://copi.science — for RSS enclosure URLs
+    podcast_search_window_days: int = 14
+    podcast_max_candidates: int = 50
+    podcast_normalize_audio: bool = False  # set true to run ffmpeg loudnorm after TTS
+
     # Worker
     worker_poll_interval: int = 5  # seconds
 
@@ -94,33 +90,34 @@ class Settings(BaseSettings):
     max_full_text_per_thread: int = 2
 
     def get_slack_tokens(self) -> dict[str, dict[str, str]]:
-        """Return slack tokens keyed by agent_id."""
-        return {
-            "su": {"bot": self.slack_bot_token_su, "app": self.slack_app_token_su},
-            "wiseman": {"bot": self.slack_bot_token_wiseman, "app": self.slack_app_token_wiseman},
-            "lotz": {"bot": self.slack_bot_token_lotz, "app": self.slack_app_token_lotz},
-            "cravatt": {
-                "bot": self.slack_bot_token_cravatt,
-                "app": self.slack_app_token_cravatt,
-            },
-            "grotjahn": {
-                "bot": self.slack_bot_token_grotjahn,
-                "app": self.slack_app_token_grotjahn,
-            },
-            "petrascheck": {
-                "bot": self.slack_bot_token_petrascheck,
-                "app": self.slack_app_token_petrascheck,
-            },
-            "ken": {"bot": self.slack_bot_token_ken, "app": self.slack_app_token_ken},
-            "racki": {"bot": self.slack_bot_token_racki, "app": self.slack_app_token_racki},
-            "saez": {"bot": self.slack_bot_token_saez, "app": self.slack_app_token_saez},
-            "wu": {"bot": self.slack_bot_token_wu, "app": self.slack_app_token_wu},
-            "ward": {"bot": self.slack_bot_token_ward, "app": self.slack_app_token_ward},
-            "briney": {"bot": self.slack_bot_token_briney, "app": self.slack_app_token_briney},
-            "forli": {"bot": self.slack_bot_token_forli, "app": self.slack_app_token_forli},
-            "deniz": {"bot": self.slack_bot_token_deniz, "app": self.slack_app_token_deniz},
-            "lairson": {"bot": self.slack_bot_token_lairson, "app": self.slack_app_token_lairson},
-        }
+        """Return Slack tokens keyed by agent_id.
+
+        Scans os.environ and the .env file for variables matching:
+            SLACK_BOT_TOKEN_<AGENT_ID>  →  tokens[agent_id]["bot"]
+            SLACK_APP_TOKEN_<AGENT_ID>  →  tokens[agent_id]["app"]
+
+        Agent IDs are lowercased from the suffix, so SLACK_BOT_TOKEN_SU → "su".
+        os.environ takes precedence over .env file values.
+        """
+        import os
+
+        from dotenv import dotenv_values
+
+        # Merge: .env file is the base, actual environment variables override.
+        env: dict[str, str] = {**dotenv_values(".env"), **os.environ}  # type: ignore[arg-type]
+
+        tokens: dict[str, dict[str, str]] = {}
+        for key, val in env.items():
+            if not val:
+                continue
+            upper = key.upper()
+            if upper.startswith("SLACK_BOT_TOKEN_"):
+                agent_id = key[len("SLACK_BOT_TOKEN_"):].lower()
+                tokens.setdefault(agent_id, {"bot": "", "app": ""})["bot"] = val
+            elif upper.startswith("SLACK_APP_TOKEN_"):
+                agent_id = key[len("SLACK_APP_TOKEN_"):].lower()
+                tokens.setdefault(agent_id, {"bot": "", "app": ""})["app"] = val
+        return tokens
 
 
 @lru_cache
diff --git a/src/main.py b/src/main.py
index 73e9bb6..c6f9439 100644
--- a/src/main.py
+++ b/src/main.py
@@ -11,7 +11,7 @@
 
 from src.config import get_settings
 from src.database import get_session_factory
-from src.routers import admin, agent_page, auth, invite, onboarding, profile, public
+from src.routers import admin, agent_page, auth, invite, onboarding, podcast, profile, public
 from src.routers import settings as settings_router
 
 logging.basicConfig(
@@ -114,6 +114,7 @@ def create_app() -> FastAPI:
     application.include_router(admin.router, prefix="/admin", tags=["admin"])
     application.include_router(invite.router, tags=["invite"])
     application.include_router(settings_router.router, prefix="/settings", tags=["settings"])
+    application.include_router(podcast.router, prefix="/podcast", tags=["podcast"])
 
     @application.get("/api/health")
     async def health():
diff --git a/src/models/__init__.py b/src/models/__init__.py
index 97249ff..6157842 100644
--- a/src/models/__init__.py
+++ b/src/models/__init__.py
@@ -9,6 +9,8 @@
 from src.models.delegate import AgentDelegate, DelegateInvitation
 from src.models.email_notification import EmailEngagementTracker, EmailNotification
 from src.models.job import Job
+from src.models.podcast import PodcastEpisode
+from src.models.podcast_preferences import PodcastPreferences
 from src.models.profile_revision import ProfileRevision
 from src.models.profile import ResearcherProfile
 from src.models.publication import Publication
@@ -31,6 +33,8 @@
     "EmailNotification",
     "EmailEngagementTracker",
     "ProfileRevision",
+    "PodcastEpisode",
+    "PodcastPreferences",
     "AccessAllowlist",
     "WaitlistSignup",
 ]
diff --git a/src/models/podcast.py b/src/models/podcast.py
new file mode 100644
index 0000000..633eb60
--- /dev/null
+++ b/src/models/podcast.py
@@ -0,0 +1,61 @@
+"""PodcastEpisode model.
+
+Episodes are keyed by either agent_id (pilot-lab agents) or user_id (plain
+ORCID users).  Exactly one should be set per row.
+
+Uniqueness constraints:
+  - uq_podcast_agent_date: one episode per agent per day (agent path)
+  - ix_podcast_episodes_user_date: partial unique index (user path, via migration 0013)
+"""
+
+import uuid
+from datetime import date, datetime
+
+from sqlalchemy import Boolean, Date, DateTime, ForeignKey, Integer, String, Text, UniqueConstraint, func
+from sqlalchemy.dialects.postgresql import UUID
+from sqlalchemy.orm import Mapped, mapped_column
+
+from src.database import Base
+
+
+class PodcastEpisode(Base):
+    __tablename__ = "podcast_episodes"
+
+    id: Mapped[uuid.UUID] = mapped_column(
+        UUID(as_uuid=True), primary_key=True, default=uuid.uuid4
+    )
+    # For pilot-lab agents (legacy path) — nullable to support user-only episodes
+    agent_id: Mapped[str | None] = mapped_column(String(50), nullable=True, index=True)
+    # For plain ORCID users (no agent required)
+    user_id: Mapped[uuid.UUID | None] = mapped_column(
+        UUID(as_uuid=True),
+        ForeignKey("users.id", ondelete="CASCADE"),
+        nullable=True,
+        index=True,
+    )
+    episode_date: Mapped[date] = mapped_column(Date, nullable=False)
+    pmid: Mapped[str] = mapped_column(String(100), nullable=False)
+    paper_title: Mapped[str] = mapped_column(String(500), nullable=False)
+    paper_authors: Mapped[str] = mapped_column(String(500), nullable=False)
+    paper_journal: Mapped[str] = mapped_column(String(255), nullable=False)
+    paper_year: Mapped[int] = mapped_column(Integer, nullable=False)
+    paper_url: Mapped[str | None] = mapped_column(String(1000), nullable=True)
+    text_summary: Mapped[str] = mapped_column(Text, nullable=False)
+    audio_file_path: Mapped[str | None] = mapped_column(String(500), nullable=True)
+    audio_duration_seconds: Mapped[int | None] = mapped_column(Integer, nullable=True)
+    slack_delivered: Mapped[bool] = mapped_column(Boolean, default=False, nullable=False)
+    selection_justification: Mapped[str] = mapped_column(Text, nullable=False)
+    created_at: Mapped[datetime] = mapped_column(
+        DateTime(timezone=True), server_default=func.now(), nullable=False
+    )
+
+    __table_args__ = (
+        # Agent-path uniqueness (PostgreSQL ignores NULLs in UNIQUE constraints,
+        # so this only enforces uniqueness when agent_id IS NOT NULL)
+        UniqueConstraint("agent_id", "episode_date", name="uq_podcast_agent_date"),
+        # User-path uniqueness is enforced by the partial index created in migration 0013
+    )
+
+    def __repr__(self) -> str:
+        key = f"agent={self.agent_id}" if self.agent_id else f"user={self.user_id}"
+        return f"<PodcastEpisode {key} date={self.episode_date} pmid={self.pmid}>"
diff --git a/src/models/podcast_preferences.py b/src/models/podcast_preferences.py
new file mode 100644
index 0000000..b498317
--- /dev/null
+++ b/src/models/podcast_preferences.py
@@ -0,0 +1,50 @@
+"""PodcastPreferences model — per-agent or per-user podcast customization.
+
+Rows are keyed by either agent_id (for approved pilot-lab agents) or user_id
+(for any user who has completed ORCID onboarding).  Exactly one of the two
+should be set on each row; both being set is invalid.
+"""
+
+import uuid
+from datetime import datetime
+
+from sqlalchemy import DateTime, ForeignKey, String, func
+from sqlalchemy.dialects.postgresql import ARRAY, UUID
+from sqlalchemy.orm import Mapped, mapped_column
+
+from src.database import Base
+
+
+class PodcastPreferences(Base):
+    __tablename__ = "podcast_preferences"
+
+    id: Mapped[uuid.UUID] = mapped_column(
+        UUID(as_uuid=True), primary_key=True, default=uuid.uuid4
+    )
+    # For pilot-lab agents (legacy path)
+    agent_id: Mapped[str | None] = mapped_column(String(50), nullable=True, unique=True, index=True)
+    # For plain ORCID users (no agent required)
+    user_id: Mapped[uuid.UUID | None] = mapped_column(
+        UUID(as_uuid=True),
+        ForeignKey("users.id", ondelete="CASCADE"),
+        nullable=True,
+        unique=True,
+        index=True,
+    )
+    voice_id: Mapped[str | None] = mapped_column(String(100), nullable=True)
+    extra_keywords: Mapped[list[str]] = mapped_column(
+        ARRAY(String), nullable=False, server_default="{}"
+    )
+    preferred_journals: Mapped[list[str]] = mapped_column(
+        ARRAY(String), nullable=False, server_default="{}"
+    )
+    deprioritized_journals: Mapped[list[str]] = mapped_column(
+        ARRAY(String), nullable=False, server_default="{}"
+    )
+    updated_at: Mapped[datetime] = mapped_column(
+        DateTime(timezone=True), server_default=func.now(), onupdate=func.now(), nullable=False
+    )
+
+    def __repr__(self) -> str:
+        key = f"agent={self.agent_id}" if self.agent_id else f"user={self.user_id}"
+        return f"<PodcastPreferences {key} voice={self.voice_id}>"
diff --git a/src/podcast/__init__.py b/src/podcast/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/podcast/local_tts.py b/src/podcast/local_tts.py
new file mode 100644
index 0000000..104c05a
--- /dev/null
+++ b/src/podcast/local_tts.py
@@ -0,0 +1,101 @@
+"""Local TTS backend using a vLLM-Omni server.
+
+vLLM-Omni exposes an OpenAI-compatible /v1/audio/speech endpoint that accepts
+the same JSON payload as OpenAI TTS and returns raw audio bytes directly.
+
+Start a vLLM-Omni server with, e.g.:
+    vllm serve Qwen/Qwen2-Audio-7B-Instruct --port 8010
+
+Then set in .env:
+    PODCAST_TTS_BACKEND=local
+    LOCAL_TTS_HOST=127.0.0.1
+    LOCAL_TTS_PORT=8010
+    LOCAL_TTS_MODEL=Qwen/Qwen2-Audio-7B-Instruct
+    LOCAL_TTS_VOICE=default
+"""
+
+import json
+import logging
+from pathlib import Path
+
+import httpx
+
+from src.config import get_settings
+from src.podcast.tts_utils import get_audio_duration_seconds, normalize_audio, strip_markdown
+
+logger = logging.getLogger(__name__)
+
+VOICES_FILE = Path("data/podcast_voices.json")
+
+__all__ = ["generate_audio", "get_audio_duration_seconds"]
+
+
+def _get_local_tts_url() -> str:
+    settings = get_settings()
+    return f"http://{settings.local_tts_host}:{settings.local_tts_port}/v1/audio/speech"
+
+
+def get_voice(agent_id: str, voice_override: str | None = None) -> str:
+    """Return the TTS voice for an agent.
+
+    Priority: voice_override (from DB preferences) → podcast_voices.json → env default.
+    """
+    if voice_override:
+        return voice_override
+    settings = get_settings()
+    if VOICES_FILE.exists():
+        try:
+            voices = json.loads(VOICES_FILE.read_text(encoding="utf-8"))
+            if agent_id in voices:
+                return voices[agent_id]
+        except Exception as exc:
+            logger.warning("Failed to load podcast_voices.json: %s", exc)
+    return settings.local_tts_voice or "default"
+
+
+async def generate_audio(
+    text: str, agent_id: str, output_path: Path, voice_override: str | None = None
+) -> bool:
+    """Generate TTS audio via a local vLLM-Omni server and save to output_path.
+
+    The server must expose an OpenAI-compatible /v1/audio/speech endpoint
+    that returns raw audio bytes.
+
+    Returns True on success, False on failure.
+    """
+    settings = get_settings()
+    url = _get_local_tts_url()
+    voice = get_voice(agent_id, voice_override=voice_override)
+    clean_text = strip_markdown(text)
+
+    payload = {
+        "model": settings.local_tts_model,
+        "input": clean_text,
+        "voice": voice,
+        "response_format": "mp3",
+    }
+    headers = {"Content-Type": "application/json"}
+
+    logger.info("Local TTS request to %s (model=%s, voice=%s)", url, settings.local_tts_model, voice)
+
+    try:
+        async with httpx.AsyncClient(timeout=300) as client:
+            resp = await client.post(url, json=payload, headers=headers)
+            if not resp.is_success:
+                logger.error("Local TTS error %s: %s", resp.status_code, resp.text)
+            resp.raise_for_status()
+
+        output_path.parent.mkdir(parents=True, exist_ok=True)
+        output_path.write_bytes(resp.content)
+        logger.info("Audio saved to %s (%d bytes)", output_path, len(resp.content))
+        if settings.podcast_normalize_audio:
+            normalize_audio(output_path)
+        return True
+    except httpx.ConnectError:
+        logger.error(
+            "Could not connect to local TTS server at %s — is vLLM-Omni running?", url
+        )
+        return False
+    except Exception as exc:
+        logger.error("Local TTS failed for agent %s: %s", agent_id, exc)
+        return False
diff --git a/src/podcast/main.py b/src/podcast/main.py
new file mode 100644
index 0000000..32caff5
--- /dev/null
+++ b/src/podcast/main.py
@@ -0,0 +1,213 @@
+"""LabBot Podcast — daily personalized research briefings for each PI.
+
+Usage:
+    python -m src.podcast.main            # run once immediately
+    python -m src.podcast.main scheduler  # long-running daily scheduler
+
+The scheduler runs at 9am UTC daily (1 hour after GrantBot).
+"""
+
+import asyncio
+import logging
+import time
+from datetime import datetime, timezone
+
+import typer
+
+from src.config import get_settings
+
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s %(levelname)s %(name)s: %(message)s",
+)
+logger = logging.getLogger(__name__)
+
+app = typer.Typer(invoke_without_command=True)
+
+RUN_HOUR_UTC = 9  # run at 9am UTC
+
+
+async def run_podcast(dry_run: bool = False) -> list[str]:
+    """Run the podcast pipeline for all active agents AND eligible plain users.
+
+    Returns list of identifiers (agent_ids + "user:<uuid>") that produced episodes.
+    """
+    from sqlalchemy import select
+    from sqlalchemy.orm import selectinload
+
+    from src.database import get_session_factory
+    from src.models.agent_registry import AgentRegistry
+    from src.models.profile import ResearcherProfile
+    from src.models.user import User
+    from src.podcast.pipeline import run_pipeline_for_agent, run_podcast_for_user
+
+    settings = get_settings()
+    slack_tokens = settings.get_slack_tokens()
+
+    session_factory = get_session_factory()
+    produced: list[str] = []
+
+    async with session_factory() as db:
+        # ----------------------------------------------------------------
+        # Agent path — pilot-lab agents with active AgentRegistry entries
+        # ----------------------------------------------------------------
+        result = await db.execute(
+            select(AgentRegistry).where(AgentRegistry.status == "active")
+        )
+        agents = result.scalars().all()
+
+        if not agents:
+            logger.warning("No active agents found in registry — trying all known agents")
+            for agent_id, tokens in slack_tokens.items():
+                bot_token = tokens.get("bot", "")
+                if not bot_token or bot_token.startswith("xoxb-placeholder"):
+                    continue
+                if dry_run:
+                    logger.info("DRY RUN — would run pipeline for agent: %s", agent_id)
+                    continue
+                try:
+                    ok = await run_pipeline_for_agent(
+                        agent_id=agent_id,
+                        bot_name=f"{agent_id.capitalize()}Bot",
+                        pi_name=agent_id.capitalize(),
+                        bot_token=bot_token,
+                        slack_user_id=None,
+                        db_session=db,
+                    )
+                    if ok:
+                        produced.append(agent_id)
+                except Exception as exc:
+                    logger.error("Pipeline failed for agent %s: %s", agent_id, exc, exc_info=True)
+        else:
+            for agent in agents:
+                agent_id = agent.agent_id
+                tokens = slack_tokens.get(agent_id, {})
+                bot_token = agent.slack_bot_token or tokens.get("bot", "")
+
+                if dry_run:
+                    logger.info(
+                        "DRY RUN — would run pipeline for agent: %s (%s)", agent_id, agent.pi_name
+                    )
+                    continue
+
+                try:
+                    ok = await run_pipeline_for_agent(
+                        agent_id=agent_id,
+                        bot_name=agent.bot_name,
+                        pi_name=agent.pi_name,
+                        bot_token=bot_token,
+                        slack_user_id=agent.slack_user_id,
+                        db_session=db,
+                    )
+                    if ok:
+                        produced.append(agent_id)
+                except Exception as exc:
+                    logger.error(
+                        "Pipeline failed for agent %s: %s", agent_id, exc, exc_info=True
+                    )
+
+        await db.commit()
+
+        # ----------------------------------------------------------------
+        # User path — plain ORCID users without an active agent
+        # ----------------------------------------------------------------
+        # Collect user_ids of users who already have an active agent so we
+        # can skip them (they're already covered by the agent path above).
+        agent_user_ids_result = await db.execute(
+            select(AgentRegistry.user_id).where(
+                AgentRegistry.status == "active",
+                AgentRegistry.user_id.is_not(None),
+            )
+        )
+        agent_user_ids = {row[0] for row in agent_user_ids_result}
+
+        # Fetch onboarded users who have a completed ResearcherProfile
+        users_result = await db.execute(
+            select(User)
+            .options(selectinload(User.profile))
+            .where(User.onboarding_complete.is_(True))
+        )
+        all_users = users_result.scalars().all()
+
+        eligible_users = [
+            u for u in all_users
+            if u.id not in agent_user_ids
+            and u.profile is not None
+            and u.profile.research_summary
+        ]
+
+        if eligible_users:
+            logger.info("Running user podcast pipeline for %d eligible users", len(eligible_users))
+        else:
+            logger.info("No eligible plain users for podcast pipeline")
+
+        for user in eligible_users:
+            if dry_run:
+                logger.info("DRY RUN — would run pipeline for user: %s (%s)", user.id, user.name)
+                continue
+            try:
+                ok = await run_podcast_for_user(user_id=user.id, db_session=db)
+                if ok:
+                    produced.append(f"user:{user.id}")
+            except Exception as exc:
+                logger.error(
+                    "Pipeline failed for user %s: %s", user.id, exc, exc_info=True
+                )
+
+        await db.commit()
+
+    logger.info("Podcast run complete: %d episodes produced", len(produced))
+    return produced
+
+
+@app.command()
+def main(
+    dry_run: bool = typer.Option(False, "--dry-run", help="Preview without posting or generating audio"),
+):
+    """Run the podcast pipeline once for all active agents."""
+    from src.podcast.state import mark_run_complete
+
+    results = asyncio.run(run_podcast(dry_run=dry_run))
+    if results:
+        typer.echo(f"\nProduced {len(results)} episodes:")
+        for aid in results:
+            typer.echo(f"  {aid}")
+    else:
+        typer.echo("No episodes produced.")
+    if not dry_run:
+        mark_run_complete()
+
+
+@app.command("scheduler")
+def scheduler(
+    run_hour: int = typer.Option(RUN_HOUR_UTC, "--run-hour", help="UTC hour to run daily (0-23)"),
+    check_interval: int = typer.Option(900, "--check-interval", help="Seconds between schedule checks"),
+):
+    """Long-running scheduler: runs podcast pipeline once per calendar day.
+
+    If the container starts after the scheduled hour, runs immediately to catch up.
+    """
+    from src.podcast.state import mark_run_complete, should_run_today
+
+    logger.info(
+        "Podcast scheduler started (run_hour=%d UTC, check every %ds)", run_hour, check_interval
+    )
+
+    while True:
+        now = datetime.now(timezone.utc)
+        if should_run_today() and now.hour >= run_hour:
+            logger.info("Running daily podcast pipeline...")
+            try:
+                results = asyncio.run(run_podcast())
+                mark_run_complete()
+                logger.info("Daily run complete: %d episodes", len(results))
+            except Exception as exc:
+                logger.error("Daily run failed: %s", exc, exc_info=True)
+        else:
+            logger.debug("No run needed (last run: %s, hour: %d)", "?", now.hour)
+
+        time.sleep(check_interval)
+
+
+if __name__ == "__main__":
+    app()
diff --git a/src/podcast/mistral_tts.py b/src/podcast/mistral_tts.py
new file mode 100644
index 0000000..cdce4de
--- /dev/null
+++ b/src/podcast/mistral_tts.py
@@ -0,0 +1,87 @@
+"""Mistral AI TTS client wrapper."""
+
+import base64
+import json
+import logging
+from pathlib import Path
+
+import httpx
+
+from src.config import get_settings
+from src.podcast.tts_utils import get_audio_duration_seconds, normalize_audio, strip_markdown
+
+logger = logging.getLogger(__name__)
+
+VOICES_FILE = Path("data/podcast_voices.json")
+MISTRAL_TTS_URL = "https://api.mistral.ai/v1/audio/speech"
+
+__all__ = ["generate_audio", "get_audio_duration_seconds"]
+
+
+def get_voice(agent_id: str, voice_override: str | None = None) -> str:
+    """Return the TTS voice for an agent.
+
+    Priority: voice_override (from DB preferences) → podcast_voices.json → env default.
+    """
+    if voice_override:
+        return voice_override
+    settings = get_settings()
+    if VOICES_FILE.exists():
+        try:
+            voices = json.loads(VOICES_FILE.read_text(encoding="utf-8"))
+            if agent_id in voices:
+                return voices[agent_id]
+        except Exception as exc:
+            logger.warning("Failed to load podcast_voices.json: %s", exc)
+    return settings.mistral_tts_default_voice
+
+
+async def generate_audio(
+    text: str, agent_id: str, output_path: Path, voice_override: str | None = None
+) -> bool:
+    """Generate TTS audio via Mistral AI and save to output_path.
+
+    Returns True on success, False on failure.
+    """
+    settings = get_settings()
+    if not settings.mistral_api_key:
+        logger.warning("MISTRAL_API_KEY not set — skipping audio generation")
+        return False
+
+    voice = get_voice(agent_id, voice_override=voice_override)
+    clean_text = strip_markdown(text)
+    payload = {
+        "model": settings.mistral_tts_model,
+        "input": clean_text,
+        "voice": voice,
+    }
+    headers = {
+        "Authorization": f"Bearer {settings.mistral_api_key}",
+        "Content-Type": "application/json",
+    }
+
+    try:
+        async with httpx.AsyncClient(timeout=120) as client:
+            resp = await client.post(MISTRAL_TTS_URL, json=payload, headers=headers)
+            if not resp.is_success:
+                logger.error("Mistral TTS API error %s: %s", resp.status_code, resp.text)
+            resp.raise_for_status()
+
+        # Mistral returns {"audio_data": "<base64-encoded mp3>"}
+        content_type = resp.headers.get("content-type", "")
+        if "json" in content_type or resp.content[:1] == b"{":
+            audio_bytes = base64.b64decode(resp.json()["audio_data"])
+        else:
+            audio_bytes = resp.content
+
+        output_path.parent.mkdir(parents=True, exist_ok=True)
+        output_path.write_bytes(audio_bytes)
+        logger.info("Audio saved to %s (%d bytes)", output_path, len(audio_bytes))
+        if settings.podcast_normalize_audio:
+            normalize_audio(output_path)
+        return True
+    except Exception as exc:
+        logger.error("Mistral TTS failed for agent %s: %s", agent_id, exc)
+        return False
+
+
diff --git a/src/podcast/openai_tts.py b/src/podcast/openai_tts.py
new file mode 100644
index 0000000..ac79045
--- /dev/null
+++ b/src/podcast/openai_tts.py
@@ -0,0 +1,91 @@
+"""OpenAI TTS client wrapper.
+
+Uses the OpenAI /v1/audio/speech endpoint.  Returns raw MP3 bytes.
+
+Set in .env:
+    PODCAST_TTS_BACKEND=openai
+    OPENAI_API_KEY=sk-...
+    OPENAI_TTS_MODEL=tts-1          # or tts-1-hd / gpt-4o-mini-tts
+    OPENAI_TTS_DEFAULT_VOICE=alloy  # alloy echo fable onyx nova shimmer
+"""
+
+import json
+import logging
+from pathlib import Path
+
+import httpx
+
+from src.config import get_settings
+from src.podcast.tts_utils import get_audio_duration_seconds, normalize_audio, strip_markdown
+
+logger = logging.getLogger(__name__)
+
+VOICES_FILE = Path("data/podcast_voices.json")
+OPENAI_TTS_URL = "https://api.openai.com/v1/audio/speech"
+
+__all__ = ["generate_audio", "get_audio_duration_seconds"]
+
+
+def get_voice(agent_id: str, voice_override: str | None = None) -> str:
+    """Return the TTS voice for an agent.
+
+    Priority: voice_override (from DB preferences) → podcast_voices.json → env default.
+    """
+    if voice_override:
+        return voice_override
+    settings = get_settings()
+    if VOICES_FILE.exists():
+        try:
+            voices = json.loads(VOICES_FILE.read_text(encoding="utf-8"))
+            if agent_id in voices:
+                return voices[agent_id]
+        except Exception as exc:
+            logger.warning("Failed to load podcast_voices.json: %s", exc)
+    return settings.openai_tts_default_voice or "alloy"
+
+
+async def generate_audio(
+    text: str, agent_id: str, output_path: Path, voice_override: str | None = None
+) -> bool:
+    """Generate TTS audio via OpenAI and save to output_path.
+
+    Returns True on success, False on failure.
+    """
+    settings = get_settings()
+    if not settings.openai_api_key:
+        logger.warning("OPENAI_API_KEY not set — skipping audio generation")
+        return False
+
+    voice = get_voice(agent_id, voice_override=voice_override)
+    clean_text = strip_markdown(text)
+    payload = {
+        "model": settings.openai_tts_model,
+        "input": clean_text,
+        "voice": voice,
+        "response_format": "mp3",
+    }
+    headers = {
+        "Authorization": f"Bearer {settings.openai_api_key}",
+        "Content-Type": "application/json",
+    }
+
+    logger.info(
+        "OpenAI TTS request (model=%s, voice=%s)", settings.openai_tts_model, voice
+    )
+
+    try:
+        async with httpx.AsyncClient(timeout=120) as client:
+            resp = await client.post(OPENAI_TTS_URL, json=payload, headers=headers)
+            if not resp.is_success:
+                logger.error("OpenAI TTS API error %s: %s", resp.status_code, resp.text)
+            resp.raise_for_status()
+
+        output_path.parent.mkdir(parents=True, exist_ok=True)
+        output_path.write_bytes(resp.content)
+        logger.info("Audio saved to %s (%d bytes)", output_path, len(resp.content))
+        if settings.podcast_normalize_audio:
+            normalize_audio(output_path)
+        return True
+    except Exception as exc:
+        logger.error("OpenAI TTS failed for agent %s: %s", agent_id, exc)
+        return False
diff --git a/src/podcast/pipeline.py b/src/podcast/pipeline.py
new file mode 100644
index 0000000..e433251
--- /dev/null
+++ b/src/podcast/pipeline.py
@@ -0,0 +1,674 @@
+"""Per-agent podcast pipeline: search → select → summarize → TTS → Slack DM → DB."""
+
+import json
+import logging
+from datetime import date, datetime, timezone
+from pathlib import Path
+from typing import Any
+
+from src.config import get_settings
+
+logger = logging.getLogger(__name__)
+
+PROFILES_DIR = Path("profiles/public")
+AUDIO_DIR = Path("data/podcast_audio")
+
+
+def _load_public_profile(agent_id: str) -> str:
+    """Load the public profile markdown for an agent."""
+    path = PROFILES_DIR / f"{agent_id}.md"
+    if path.exists():
+        return path.read_text(encoding="utf-8")
+    return ""
+
+
+async def _load_podcast_preferences(agent_id: str) -> str:
+    """Load the Podcast Preferences section from the agent's latest private ProfileRevision in the DB."""
+    try:
+        from sqlalchemy import desc, select
+
+        from src.database import get_session_factory
+        from src.models.agent_registry import AgentRegistry
+        from src.models.profile_revision import ProfileRevision
+
+        session_factory = get_session_factory()
+        async with session_factory() as db:
+            reg_result = await db.execute(
+                select(AgentRegistry.id).where(AgentRegistry.agent_id == agent_id)
+            )
+            reg_row = reg_result.first()
+            if not reg_row:
+                return ""
+
+            rev_result = await db.execute(
+                select(ProfileRevision.content)
+                .where(
+                    ProfileRevision.agent_registry_id == reg_row[0],
+                    ProfileRevision.profile_type == "private",
+                )
+                .order_by(desc(ProfileRevision.created_at))
+                .limit(1)
+            )
+            rev_row = rev_result.first()
+            if not rev_row:
+                return ""
+
+            return _extract_section_text(rev_row[0], "Podcast Preferences")
+    except Exception as exc:
+        logger.warning("Could not load podcast preferences for %s: %s", agent_id, exc)
+        return ""
+
+
+async def _load_structured_preferences(agent_id: str | None = None, user_id=None, db_session=None):
+    """Load PodcastPreferences row from DB by agent_id or user_id. Returns ORM row or None."""
+    if not agent_id and not user_id:
+        return None
+    try:
+        from sqlalchemy import select
+
+        from src.models.podcast_preferences import PodcastPreferences
+
+        def _build_query():
+            if agent_id:
+                return select(PodcastPreferences).where(PodcastPreferences.agent_id == agent_id)
+            return select(PodcastPreferences).where(PodcastPreferences.user_id == user_id)
+
+        if db_session is not None:
+            result = await db_session.execute(_build_query())
+            return result.scalar_one_or_none()
+
+        from src.database import get_session_factory
+        session_factory = get_session_factory()
+        async with session_factory() as db:
+            result = await db.execute(_build_query())
+            return result.scalar_one_or_none()
+    except Exception as exc:
+        key = agent_id or str(user_id)
+        logger.warning("Could not load structured podcast preferences for %s: %s", key, exc)
+        return None
+
+
+def _format_candidates_for_prompt(records: list[dict[str, Any]]) -> str:
+    """Format PubMed records as a numbered list for the selection prompt."""
+    lines = []
+    for i, rec in enumerate(records, 1):
+        title = rec.get("title", "No title")
+        abstract = rec.get("abstract", "No abstract")[:600]
+        journal = rec.get("journal") or "Unknown journal"
+        year = rec.get("year") or "Unknown year"
+        lines.append(f"{i}. [{journal}, {year}] {title}\n   {abstract}")
+    return "\n\n".join(lines)
+
+
+async def _select_article(
+    profile_text: str,
+    candidates: list[dict[str, Any]],
+    agent_id: str,
+    preferences_text: str = "",
+) -> tuple[dict[str, Any], str] | tuple[None, str]:
+    """Use Sonnet to pick the most relevant article.
+
+    Returns (selected_record, justification) or (None, reason).
+    """
+    from src.services.llm import generate_agent_response
+
+    settings = get_settings()
+
+    prompt_path = Path("prompts/podcast-select.md")
+    template = prompt_path.read_text(encoding="utf-8")
+    candidates_text = _format_candidates_for_prompt(candidates)
+    prompt = (
+        template
+        .replace("{profile}", profile_text)
+        .replace("{candidates}", candidates_text)
+        .replace("{preferences}", preferences_text or "No specific preferences set.")
+    )
+
+    try:
+        response = await generate_agent_response(
+            system_prompt=prompt,
+            messages=[{"role": "user", "content": "Select the most relevant article."}],
+            model=settings.llm_agent_model_sonnet,
+            max_tokens=300,
+            log_meta={"agent_id": agent_id, "phase": "podcast_select"},
+        )
+
+        # Extract JSON
+        text = response.strip()
+        start = text.find("{")
+        end = text.rfind("}") + 1
+        if start >= 0 and end > start:
+            data = json.loads(text[start:end])
+        else:
+            raise ValueError("No JSON object found in response")
+
+        idx = data.get("index")
+        justification = data.get("justification", "")
+
+        if idx is None:
+            logger.info("Agent %s: no relevant article found (%s)", agent_id, justification)
+            return None, justification
+
+        idx = int(idx) - 1  # convert 1-based to 0-based
+        if 0 <= idx < len(candidates):
+            return candidates[idx], justification
+        else:
+            logger.warning("Agent %s: LLM returned out-of-range index %d", agent_id, idx + 1)
+            return None, "Index out of range"
+
+    except Exception as exc:
+        logger.error("Article selection failed for agent %s: %s", agent_id, exc)
+        return None, str(exc)
+
+
+async def _generate_summary(
+    profile_text: str,
+    record: dict[str, Any],
+    full_text: str | None,
+    agent_id: str,
+    preferences_text: str = "",
+) -> str | None:
+    """Use Opus to generate the structured text summary."""
+    from src.services.llm import generate_agent_response
+
+    settings = get_settings()
+
+    prompt_path = Path("prompts/podcast-summarize.md")
+    template = prompt_path.read_text(encoding="utf-8")
+
+    # Build paper section
+    authors_list = record.get("authors") or []
+    if not authors_list:
+        authors_str = "Authors not available"
+    elif len(authors_list) > 3:
+        authors_str = ", ".join(authors_list[:3]) + " et al."
+    else:
+        authors_str = ", ".join(authors_list)
+
+    pmid = record.get("pmid", "")
+    # Preprint records carry a canonical URL; PubMed records use the standard URL
+    paper_url = record.get("url") or f"https://pubmed.ncbi.nlm.nih.gov/{pmid}/"
+
+    paper_section = f"""Title: {record.get('title', '')}
+Authors: {authors_str}
+Journal: {record.get('journal') or 'Unknown'}
+Year: {record.get('year') or 'Unknown'}
+URL: {paper_url}
+
+Abstract:
+{record.get('abstract', '')}"""
+
+    if full_text:
+        paper_section += f"\n\nFull text excerpt:\n{full_text[:3000]}"
+
+    today_str = datetime.now(timezone.utc).strftime("%B %d, %Y")
+
+    prompt = (
+        template
+        .replace("{profile}", profile_text)
+        .replace("{paper}", paper_section)
+        .replace("{date}", today_str)
+        .replace("{paper_title}", record.get("title", ""))
+        .replace("{authors}", authors_str)
+        .replace("{journal}", record.get("journal") or "Unknown")
+        .replace("{year}", str(record.get("year") or ""))
+        .replace("{paper_url}", paper_url)
+        .replace("{preferences}", preferences_text or "No specific preferences set.")
+    )
+
+    try:
+        response = await generate_agent_response(
+            system_prompt=prompt,
+            messages=[{"role": "user", "content": "Write the research brief."}],
+            model=settings.llm_agent_model_opus,
+            max_tokens=600,
+            log_meta={"agent_id": agent_id, "phase": "podcast_summarize"},
+        )
+        return response.strip()
+    except Exception as exc:
+        logger.error("Summary generation failed for agent %s: %s", agent_id, exc)
+        return None
+
+
+async def _try_fetch_full_text(pmid: str) -> str | None:
+    """Attempt to fetch full text from PMC; return None on failure or for non-PubMed IDs."""
+    # Preprint IDs are prefixed (e.g. "biorxiv:...", "arxiv:...") — PMC doesn't have them
+    if not pmid.isdigit():
+        return None
+    try:
+        from src.services.pubmed import fetch_full_text
+        result = await fetch_full_text(pmid)
+        if "error" in result:
+            return None
+        return result.get("methods")
+    except Exception:
+        return None
+
+
+async def _deliver_slack_dm(
+    agent_id: str,
+    bot_token: str,
+    slack_user_id: str,
+    summary_text: str,
+    rss_url: str,
+) -> bool:
+    """Send the text summary as a Slack DM from the agent bot to the PI."""
+    if not bot_token or bot_token.startswith("xoxb-placeholder"):
+        logger.info("Agent %s: no valid Slack token, skipping DM delivery", agent_id)
+        return False
+    if not slack_user_id:
+        logger.info("Agent %s: no slack_user_id configured, skipping DM delivery", agent_id)
+        return False
+
+    try:
+        from slack_sdk import WebClient
+        client = WebClient(token=bot_token)
+
+        # Open DM channel
+        dm_resp = client.conversations_open(users=[slack_user_id])
+        channel_id = dm_resp["channel"]["id"]
+
+        # Append RSS link
+        full_message = summary_text
+        if rss_url:
+            full_message += f"\n\n_Listen to the audio version: {rss_url}_"
+
+        client.chat_postMessage(channel=channel_id, text=full_message)
+        logger.info("Agent %s: Slack DM delivered to %s", agent_id, slack_user_id)
+        return True
+    except Exception as exc:
+        logger.error("Agent %s: Slack DM failed: %s", agent_id, exc)
+        return False
+
+
+async def run_pipeline_for_agent(
+    agent_id: str,
+    bot_name: str,
+    pi_name: str,
+    bot_token: str,
+    slack_user_id: str | None,
+    db_session,
+) -> bool:
+    """Run the full podcast pipeline for one agent.
+
+    Returns True if an episode was produced and recorded.
+    """
+    from src.models.podcast import PodcastEpisode
+    from src.podcast.pubmed_search import build_queries, fetch_candidates
+    from src.podcast.tts_utils import get_audio_duration_seconds
+    from src.podcast.state import get_delivered_pmids, record_delivery
+
+    settings = get_settings()
+    today = date.today()
+
+    logger.info("Starting podcast pipeline for agent: %s (%s)", agent_id, pi_name)
+
+    # Step 1: Load profiles
+    profile_text = _load_public_profile(agent_id)
+    if not profile_text:
+        # Fallback: agent may have a linked user with a DB ResearcherProfile
+        from sqlalchemy import select as _select
+        from src.models.agent_registry import AgentRegistry
+        from src.models.profile import ResearcherProfile
+        from src.models.user import User
+
+        agent_row = (await db_session.execute(
+            _select(AgentRegistry).where(AgentRegistry.agent_id == agent_id)
+        )).scalar_one_or_none()
+
+        if agent_row and agent_row.user_id:
+            user_row = (await db_session.execute(
+                _select(User).where(User.id == agent_row.user_id)
+            )).scalar_one_or_none()
+            profile_row = (await db_session.execute(
+                _select(ResearcherProfile).where(ResearcherProfile.user_id == agent_row.user_id)
+            )).scalar_one_or_none()
+
+            if user_row and profile_row and profile_row.research_summary:
+                profile_text = _build_profile_text_from_db(user_row, profile_row)
+                logger.info("Agent %s: no markdown profile, using DB profile for user %s", agent_id, agent_row.user_id)
+
+        if not profile_text:
+            logger.warning("Agent %s: no public profile found, skipping", agent_id)
+            return False
+
+    preferences_text = await _load_podcast_preferences(agent_id)
+    if preferences_text:
+        logger.info("Agent %s: loaded podcast preferences (%d chars)", agent_id, len(preferences_text))
+
+    # Load structured preferences (voice, keywords, journals) from DB
+    prefs = await _load_structured_preferences(agent_id=agent_id, db_session=db_session)
+    if prefs:
+        logger.info(
+            "Agent %s: structured preferences — voice=%s, keywords=%d, preferred_journals=%d",
+            agent_id, prefs.voice_id, len(prefs.extra_keywords), len(prefs.preferred_journals),
+        )
+
+    # Build a minimal profile dict from markdown for query building
+    profile_dict = _parse_profile_markdown(profile_text)
+
+    # Step 2: Build queries and fetch candidates
+    queries = build_queries(profile_dict)
+    if not queries:
+        logger.warning("Agent %s: could not build search queries", agent_id)
+        return False
+
+    # Inject extra keywords from structured preferences as additional quoted queries
+    if prefs and prefs.extra_keywords:
+        extra_terms = [f'"{kw}"' for kw in prefs.extra_keywords[:20] if kw.strip()]
+        if extra_terms:
+            queries.append(" OR ".join(extra_terms))
+            logger.info("Agent %s: injected %d extra keyword terms", agent_id, len(extra_terms))
+
+    already_delivered = get_delivered_pmids(agent_id)
+    candidates = await fetch_candidates(
+        queries,
+        already_delivered=already_delivered,
+        days=settings.podcast_search_window_days,
+        max_total=settings.podcast_max_candidates,
+    )
+
+    if not candidates:
+        logger.info("Agent %s: no new candidate articles found", agent_id)
+        return False
+
+    # Build journal context to append to preferences text for selection prompt
+    journal_context = ""
+    if prefs and prefs.preferred_journals:
+        journal_context += f"\nPreferred sources: {', '.join(prefs.preferred_journals)}. Give these extra weight when relevance is comparable."
+    if prefs and prefs.deprioritized_journals:
+        journal_context += f"\nDeprioritized sources: {', '.join(prefs.deprioritized_journals)}. Avoid unless exceptionally relevant."
+    combined_preferences = (preferences_text or "") + journal_context
+
+    # Step 3: LLM article selection
+    selected, justification = await _select_article(profile_text, candidates, agent_id, combined_preferences)
+    if selected is None:
+        logger.info("Agent %s: no article selected", agent_id)
+        return False
+
+    pmid = selected.get("pmid", "")
+    paper_url = selected.get("url") or f"https://pubmed.ncbi.nlm.nih.gov/{pmid}/"
+    logger.info("Agent %s: selected PMID %s", agent_id, pmid)
+
+    # Step 4: Try to fetch full text
+    full_text = await _try_fetch_full_text(pmid)
+
+    # Step 5: Generate text summary
+    summary = await _generate_summary(profile_text, selected, full_text, agent_id, combined_preferences)
+    if not summary:
+        logger.error("Agent %s: summary generation failed", agent_id)
+        return False
+
+    # Step 6: Generate audio (backend selected by PODCAST_TTS_BACKEND)
+    audio_path = AUDIO_DIR / agent_id / f"{today.isoformat()}.mp3"
+    voice_override = prefs.voice_id if prefs else None
+    if settings.podcast_tts_backend == "local":
+        from src.podcast.local_tts import generate_audio
+        logger.info("Agent %s: using local vLLM-Omni TTS backend", agent_id)
+    elif settings.podcast_tts_backend == "openai":
+        from src.podcast.openai_tts import generate_audio
+        logger.info("Agent %s: using OpenAI TTS backend", agent_id)
+    else:
+        from src.podcast.mistral_tts import generate_audio
+        logger.info("Agent %s: using Mistral AI TTS backend", agent_id)
+    audio_ok = await generate_audio(summary, agent_id, audio_path, voice_override=voice_override)
+    audio_file_path = str(audio_path) if audio_ok else None
+    audio_duration = None
+    if audio_ok:
+        audio_duration = get_audio_duration_seconds(audio_path)
+
+    # Step 7: Build RSS URL for DM
+    base_url = settings.podcast_base_url or settings.base_url
+    rss_url = f"{base_url}/podcast/{agent_id}/feed.xml"
+
+    # Step 8: Deliver Slack DM
+    slack_ok = await _deliver_slack_dm(
+        agent_id=agent_id,
+        bot_token=bot_token,
+        slack_user_id=slack_user_id or "",
+        summary_text=summary,
+        rss_url=rss_url,
+    )
+
+    # Extract metadata from selected record
+    authors_list = selected.get("authors") or []
+    if len(authors_list) > 3:
+        authors_str = ", ".join(authors_list[:3]) + " et al."
+    else:
+        authors_str = ", ".join(authors_list) if authors_list else "Unknown"
+
+    # Step 9: Persist to DB
+    episode = PodcastEpisode(
+        agent_id=agent_id,
+        episode_date=today,
+        pmid=pmid,
+        paper_title=selected.get("title") or "",
+        paper_authors=authors_str,
+        paper_journal=selected.get("journal") or "",
+        paper_year=selected.get("year") or 0,
+        paper_url=paper_url,
+        text_summary=summary,
+        audio_file_path=audio_file_path,
+        audio_duration_seconds=audio_duration,
+        slack_delivered=slack_ok,
+        selection_justification=justification,
+    )
+    db_session.add(episode)
+    await db_session.flush()
+
+    # Step 10: Update state
+    record_delivery(agent_id, pmid)
+
+    logger.info(
+        "Agent %s: episode complete (audio=%s, slack=%s)", agent_id, audio_ok, slack_ok
+    )
+    return True
+
+
+async def run_podcast_for_user(
+    user_id,
+    db_session,
+) -> bool:
+    """Run the full podcast pipeline for a plain ORCID user (no agent required).
+
+    Loads the user's ResearcherProfile from the DB, builds search queries from
+    structured profile fields, selects and summarises an article, generates audio,
+    and persists a PodcastEpisode keyed by user_id.
+
+    Returns True if an episode was produced and recorded.
+    """
+    import uuid as _uuid
+
+    from sqlalchemy import select as _select
+
+    from src.models.podcast import PodcastEpisode
+    from src.models.profile import ResearcherProfile
+    from src.models.user import User
+    from src.podcast.pubmed_search import build_queries, fetch_candidates
+    from src.podcast.state import get_delivered_pmids_for_user, record_delivery_for_user
+    from src.podcast.tts_utils import get_audio_duration_seconds
+
+    settings = get_settings()
+    today = date.today()
+    user_id_str = str(user_id)
+
+    # Load user
+    user_result = await db_session.execute(
+        _select(User).where(User.id == user_id)
+    )
+    user = user_result.scalar_one_or_none()
+    if not user:
+        logger.warning("run_podcast_for_user: user %s not found", user_id_str)
+        return False
+
+    logger.info("Starting podcast pipeline for user: %s (%s)", user_id_str, user.name)
+
+    # Load ResearcherProfile
+    profile_result = await db_session.execute(
+        _select(ResearcherProfile).where(ResearcherProfile.user_id == user_id)
+    )
+    profile = profile_result.scalar_one_or_none()
+    if not profile or not profile.research_summary:
+        logger.warning("User %s: no completed profile found, skipping", user_id_str)
+        return False
+
+    # Build profile text from structured DB fields (no disk file needed)
+    profile_text = _build_profile_text_from_db(user, profile)
+
+    # Load structured preferences keyed by user_id
+    prefs = await _load_structured_preferences(user_id=user_id, db_session=db_session)
+    if prefs:
+        logger.info(
+            "User %s: structured preferences — voice=%s, keywords=%d",
+            user_id_str, prefs.voice_id, len(prefs.extra_keywords),
+        )
+
+    # Build profile dict for query building
+    profile_dict = {
+        "research_summary": profile.research_summary or "",
+        "disease_areas": profile.disease_areas or [],
+        "techniques": profile.techniques or [],
+        "experimental_models": profile.experimental_models or [],
+        "keywords": profile.keywords or [],
+    }
+
+    queries = build_queries(profile_dict)
+    if not queries:
+        logger.warning("User %s: could not build search queries", user_id_str)
+        return False
+
+    if prefs and prefs.extra_keywords:
+        extra_terms = [f'"{kw}"' for kw in prefs.extra_keywords[:20] if kw.strip()]
+        if extra_terms:
+            queries.append(" OR ".join(extra_terms))
+
+    already_delivered = get_delivered_pmids_for_user(user_id_str)
+    candidates = await fetch_candidates(
+        queries,
+        already_delivered=already_delivered,
+        days=settings.podcast_search_window_days,
+        max_total=settings.podcast_max_candidates,
+    )
+
+    if not candidates:
+        logger.info("User %s: no new candidate articles found", user_id_str)
+        return False
+
+    # Build journal context from preferences
+    journal_context = ""
+    if prefs and prefs.preferred_journals:
+        journal_context += f"\nPreferred sources: {', '.join(prefs.preferred_journals)}. Give these extra weight when relevance is comparable."
+    if prefs and prefs.deprioritized_journals:
+        journal_context += f"\nDeprioritized sources: {', '.join(prefs.deprioritized_journals)}. Avoid unless exceptionally relevant."
+    combined_preferences = journal_context
+
+    # Article selection
+    selected, justification = await _select_article(profile_text, candidates, user_id_str, combined_preferences)
+    if selected is None:
+        logger.info("User %s: no article selected", user_id_str)
+        return False
+
+    pmid = selected.get("pmid", "")
+    paper_url = selected.get("url") or f"https://pubmed.ncbi.nlm.nih.gov/{pmid}/"
+    logger.info("User %s: selected PMID %s", user_id_str, pmid)
+
+    full_text = await _try_fetch_full_text(pmid)
+
+    summary = await _generate_summary(profile_text, selected, full_text, user_id_str, combined_preferences)
+    if not summary:
+        logger.error("User %s: summary generation failed", user_id_str)
+        return False
+
+    # Generate audio — stored under data/podcast_audio/users/{user_id}/
+    audio_path = AUDIO_DIR / "users" / user_id_str / f"{today.isoformat()}.mp3"
+    voice_override = prefs.voice_id if prefs else None
+    if settings.podcast_tts_backend == "local":
+        from src.podcast.local_tts import generate_audio
+    elif settings.podcast_tts_backend == "openai":
+        from src.podcast.openai_tts import generate_audio
+    else:
+        from src.podcast.mistral_tts import generate_audio
+    audio_ok = await generate_audio(summary, user_id_str, audio_path, voice_override=voice_override)
+    audio_file_path = str(audio_path) if audio_ok else None
+    audio_duration = get_audio_duration_seconds(audio_path) if audio_ok else None
+
+    # Extract metadata
+    authors_list = selected.get("authors") or []
+    if len(authors_list) > 3:
+        authors_str = ", ".join(authors_list[:3]) + " et al."
+    else:
+        authors_str = ", ".join(authors_list) if authors_list else "Unknown"
+
+    # Persist episode keyed by user_id (agent_id left NULL)
+    episode = PodcastEpisode(
+        user_id=user_id,
+        agent_id=None,
+        episode_date=today,
+        pmid=pmid,
+        paper_title=selected.get("title") or "",
+        paper_authors=authors_str,
+        paper_journal=selected.get("journal") or "",
+        paper_year=selected.get("year") or 0,
+        paper_url=paper_url,
+        text_summary=summary,
+        audio_file_path=audio_file_path,
+        audio_duration_seconds=audio_duration,
+        slack_delivered=False,
+        selection_justification=justification,
+    )
+    db_session.add(episode)
+    await db_session.flush()
+
+    record_delivery_for_user(user_id_str, pmid)
+
+    logger.info(
+        "User %s: episode complete (audio=%s)", user_id_str, audio_ok
+    )
+    return True
+
+
+def _build_profile_text_from_db(user, profile) -> str:
+    """Construct a plain-text profile summary from DB fields for use in LLM prompts."""
+    lines = [f"# {user.name}"]
+    if user.institution:
+        lines.append(f"Institution: {user.institution}")
+    if user.department:
+        lines.append(f"Department: {user.department}")
+    if profile.research_summary:
+        lines.append(f"\n## Research Summary\n{profile.research_summary}")
+    if profile.disease_areas:
+        lines.append("\n## Disease Areas\n" + "\n".join(f"- {v}" for v in profile.disease_areas))
+    if profile.techniques:
+        lines.append("\n## Key Methods and Technologies\n" + "\n".join(f"- {v}" for v in profile.techniques))
+    if profile.experimental_models:
+        lines.append("\n## Model Systems\n" + "\n".join(f"- {v}" for v in profile.experimental_models))
+    if profile.keywords:
+        lines.append("\n## Keywords\n" + "\n".join(f"- {v}" for v in profile.keywords))
+    return "\n".join(lines)
+
+
+def _parse_profile_markdown(text: str) -> dict[str, Any]:
+    """Extract structured fields from public profile markdown for query building."""
+    from src.agent.grantbot import _extract_list_section
+    return {
+        "disease_areas": _extract_list_section(text, "Disease Areas"),
+        "techniques": _extract_list_section(text, "Key Methods and Technologies"),
+        "experimental_models": _extract_list_section(text, "Model Systems"),
+        "keywords": _extract_list_section(text, "Keywords"),
+        "research_summary": _extract_section_text(text, "Research Summary"),
+    }
+
+
+def _extract_section_text(text: str, section_name: str) -> str:
+    """Extract free-form text from a markdown section."""
+    lines = []
+    in_section = False
+    for line in text.splitlines():
+        if section_name.lower() in line.lower() and line.startswith("##"):
+            in_section = True
+            continue
+        if in_section:
+            if line.startswith("##"):
+                break
+            lines.append(line)
+    return " ".join(l.strip() for l in lines if l.strip())
diff --git a/src/podcast/preprint_search.py b/src/podcast/preprint_search.py
new file mode 100644
index 0000000..b15fa3c
--- /dev/null
+++ b/src/podcast/preprint_search.py
@@ -0,0 +1,289 @@
+"""Preprint server search for the podcast pipeline.
+
+Supports bioRxiv, medRxiv (via biorxiv.org content API) and arXiv.
+
+Records returned use the same schema as PubMed records but with:
+  - pmid:    prefixed ID  e.g. "biorxiv:2024.04.01.123456", "arxiv:2401.12345"
+  - url:     canonical preprint URL
+  - journal: "<Server> (preprint)"
+  - source:  "biorxiv" | "medrxiv" | "arxiv"
+"""
+
+import logging
+import re
+import xml.etree.ElementTree as ET
+from datetime import datetime, timedelta, timezone
+from typing import Any
+
+import httpx
+
+logger = logging.getLogger(__name__)
+
+BIORXIV_API = "https://api.biorxiv.org/details"
+ARXIV_API = "https://export.arxiv.org/api/query"
+ARXIV_NS = "http://www.w3.org/2005/Atom"
+
+# arXiv categories relevant to biomedical / computational biology research
+ARXIV_CATEGORIES = "cat:q-bio.BM OR cat:q-bio.GN OR cat:q-bio.MN OR cat:q-bio.QM OR cat:cs.LG"
+
+
+# ---------------------------------------------------------------------------
+# Shared helpers
+# ---------------------------------------------------------------------------
+
+def _extract_search_terms(queries: list[str]) -> list[str]:
+    """Extract individual quoted terms from PubMed query strings."""
+    terms: list[str] = []
+    for q in queries:
+        for match in re.findall(r'"([^"]+)"', q):
+            if match not in terms:
+                terms.append(match)
+    # Fall back to bare words if no quoted terms
+    if not terms:
+        for q in queries:
+            for word in q.split():
+                w = word.strip('"\'')
+                if len(w) > 4 and w.upper() not in ("AND", "OR", "NOT") and w not in terms:
+                    terms.append(w)
+    return terms[:12]
+
+
+def _score_record(title: str, abstract: str, terms: list[str]) -> int:
+    """Count how many search terms appear in title+abstract (case-insensitive)."""
+    text = (title + " " + abstract).lower()
+    return sum(1 for t in terms if t.lower() in text)
+
+
+def _date_range(days: int) -> tuple[str, str]:
+    now = datetime.now(timezone.utc)
+    start = now - timedelta(days=days)
+    return start.strftime("%Y-%m-%d"), now.strftime("%Y-%m-%d")
+
+
+# ---------------------------------------------------------------------------
+# bioRxiv / medRxiv
+# ---------------------------------------------------------------------------
+
+async def _fetch_biorxiv_server(
+    server: str,
+    queries: list[str],
+    days: int,
+    max_results: int,
+) -> list[dict[str, Any]]:
+    """Fetch recent preprints from bioRxiv or medRxiv and score against queries."""
+    terms = _extract_search_terms(queries)
+    if not terms:
+        return []
+
+    start_date, end_date = _date_range(days)
+    url = f"{BIORXIV_API}/{server}/{start_date}/{end_date}/0/json"
+
+    try:
+        async with httpx.AsyncClient(timeout=30) as client:
+            resp = await client.get(url)
+            resp.raise_for_status()
+            data = resp.json()
+    except Exception as exc:
+        logger.warning("%s API request failed: %s", server, exc)
+        return []
+
+    collection = data.get("collection") or []
+    if not isinstance(collection, list):
+        return []
+
+    cutoff = datetime.now(timezone.utc) - timedelta(days=days)
+    scored: list[tuple[int, dict[str, Any]]] = []
+    for item in collection:
+        title = item.get("title") or ""
+        abstract = item.get("abstract") or ""
+        if not abstract:
+            continue
+
+        # The bioRxiv API date-range filter includes revised preprints; filter by
+        # the item's own date so we only include recently posted/first-version papers.
+        date_str = item.get("date") or ""
+        if date_str:
+            try:
+                item_date = datetime.strptime(date_str, "%Y-%m-%d").replace(tzinfo=timezone.utc)
+                if item_date < cutoff:
+                    continue
+            except ValueError:
+                pass
+
+        score = _score_record(title, abstract, terms)
+        if score == 0:
+            continue
+
+        doi = item.get("doi") or ""
+        doi_suffix = doi.removeprefix("10.1101/")
+        record_id = f"{server}:{doi_suffix}"
+
+        # Authors stored as semicolon-separated string
+        authors_raw = item.get("authors") or ""
+        authors_list = [a.strip() for a in authors_raw.split(";") if a.strip()]
+
+        year_str = date_str[:4]
+        year = int(year_str) if year_str.isdigit() else datetime.now(timezone.utc).year
+
+        scored.append((score, {
+            "pmid": record_id,
+            "url": f"https://www.{server}.org/content/{doi}v1",
+            "title": title,
+            "abstract": abstract,
+            "journal": f"{server.capitalize()} (preprint)",
+            "year": year,
+            "authors": authors_list,
+            "pub_types": ["Preprint"],
+            "source": server,
+        }))
+
+    scored.sort(key=lambda x: x[0], reverse=True)
+    return [r for _, r in scored[:max_results]]
+
+
+# ---------------------------------------------------------------------------
+# arXiv
+# ---------------------------------------------------------------------------
+
+async def _fetch_arxiv(
+    queries: list[str],
+    days: int,
+    max_results: int,
+) -> list[dict[str, Any]]:
+    """Fetch recent preprints from arXiv matching researcher queries."""
+    terms = _extract_search_terms(queries)
+    if not terms:
+        return []
+
+    # Build arXiv search: keyword terms in abstract + category filter
+    term_clause = " OR ".join(f'abs:"{t}"' for t in terms[:6])
+    search_query = f"({term_clause}) AND ({ARXIV_CATEGORIES})"
+
+    start_date, _ = _date_range(days)
+    # arXiv date filter via submittedDate
+    arxiv_date = start_date.replace("-", "") + "000000"
+
+    params = {
+        "search_query": search_query,
+        "start": "0",
+        "max_results": str(max_results * 2),
+        "sortBy": "submittedDate",
+        "sortOrder": "descending",
+    }
+
+    try:
+        async with httpx.AsyncClient(timeout=30) as client:
+            resp = await client.get(ARXIV_API, params=params)
+            resp.raise_for_status()
+            xml_text = resp.text
+    except Exception as exc:
+        logger.warning("arXiv API request failed: %s", exc)
+        return []
+
+    try:
+        root = ET.fromstring(xml_text)
+    except ET.ParseError as exc:
+        logger.warning("arXiv XML parse error: %s", exc)
+        return []
+
+    records: list[dict[str, Any]] = []
+    cutoff = datetime.now(timezone.utc) - timedelta(days=days)
+
+    for entry in root.findall(f"{{{ARXIV_NS}}}entry"):
+        title_el = entry.find(f"{{{ARXIV_NS}}}title")
+        summary_el = entry.find(f"{{{ARXIV_NS}}}summary")
+        id_el = entry.find(f"{{{ARXIV_NS}}}id")
+        published_el = entry.find(f"{{{ARXIV_NS}}}published")
+
+        title = (title_el.text or "").strip().replace("\n", " ") if title_el is not None else ""
+        abstract = (summary_el.text or "").strip() if summary_el is not None else ""
+        arxiv_url = (id_el.text or "").strip() if id_el is not None else ""
+        published_str = (published_el.text or "").strip() if published_el is not None else ""
+
+        if not abstract or not arxiv_url:
+            continue
+
+        # Parse submission date and apply cutoff
+        try:
+            pub_dt = datetime.fromisoformat(published_str.replace("Z", "+00:00"))
+            if pub_dt < cutoff:
+                continue
+            year = pub_dt.year
+        except ValueError:
+            year = datetime.now(timezone.utc).year
+
+        # Extract arxiv ID from URL like http://arxiv.org/abs/2401.12345v1
+        arxiv_id = arxiv_url.split("/abs/")[-1].split("v")[0]
+
+        authors_list = [
+            (n_el.text or "").strip()
+            for author in entry.findall(f"{{{ARXIV_NS}}}author")
+            for n_el in [author.find(f"{{{ARXIV_NS}}}name")]
+            if n_el is not None and n_el.text
+        ]
+
+        records.append({
+            "pmid": f"arxiv:{arxiv_id}",
+            "url": f"https://arxiv.org/abs/{arxiv_id}",
+            "title": title,
+            "abstract": abstract,
+            "journal": "arXiv (preprint)",
+            "year": year,
+            "authors": authors_list,
+            "pub_types": ["Preprint"],
+            "source": "arxiv",
+        })
+
+        if len(records) >= max_results:
+            break
+
+    return records
+
+
+# ---------------------------------------------------------------------------
+# Public interface
+# ---------------------------------------------------------------------------
+
+async def fetch_preprint_candidates(
+    queries: list[str],
+    already_delivered: set[str],
+    days: int = 14,
+    max_total: int = 20,
+) -> list[dict[str, Any]]:
+    """Fetch preprints from bioRxiv, medRxiv, and arXiv.
+
+    Returns records filtered against already_delivered, up to max_total total.
+    Each record has the same schema as PubMed records with an added 'url' field.
+    """
+    import asyncio
+
+    per_source = max(max_total // 3, 5)
+
+    biorxiv_task = _fetch_biorxiv_server("biorxiv", queries, days, per_source)
+    medrxiv_task = _fetch_biorxiv_server("medrxiv", queries, days, per_source)
+    arxiv_task = _fetch_arxiv(queries, days, per_source)
+
+    results = await asyncio.gather(biorxiv_task, medrxiv_task, arxiv_task, return_exceptions=True)
+
+    candidates: list[dict[str, Any]] = []
+    seen_ids: set[str] = set()
+    source_names = ("bioRxiv", "medRxiv", "arXiv")
+    for name, result in zip(source_names, results):
+        if isinstance(result, Exception):
+            logger.warning("Preprint fetch failed for %s: %s", name, result)
+            continue
+        for rec in result:
+            pid = rec["pmid"]
+            if pid not in already_delivered and pid not in seen_ids:
+                seen_ids.add(pid)
+                candidates.append(rec)
+
+    logger.info(
+        "Preprint candidates: %d total (%s)",
+        len(candidates),
+        ", ".join(
+            f"{name}: {len(r) if not isinstance(r, Exception) else 'err'}"
+            for name, r in zip(source_names, results)
+        ),
+    )
+    return candidates[:max_total]
diff --git a/src/podcast/pubmed_search.py b/src/podcast/pubmed_search.py
new file mode 100644
index 0000000..05ede0f
--- /dev/null
+++ b/src/podcast/pubmed_search.py
@@ -0,0 +1,149 @@
+"""PubMed query builder and search for the podcast pipeline."""
+
+import asyncio
+import logging
+from typing import Any
+
+import httpx
+
+from src.config import get_settings
+from src.services.pubmed import _ncbi_get, fetch_pubmed_records
+
+logger = logging.getLogger(__name__)
+
+EUTILS_BASE = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils"
+
+
+def build_queries(profile: dict[str, Any]) -> list[str]:
+    """Build 2–3 PubMed search query strings from a researcher's profile fields.
+
+    profile keys used: disease_areas, techniques, experimental_models, keywords
+    """
+    disease_areas: list[str] = profile.get("disease_areas") or []
+    techniques: list[str] = profile.get("techniques") or []
+    experimental_models: list[str] = profile.get("experimental_models") or []
+    keywords: list[str] = profile.get("keywords") or []
+
+    queries: list[str] = []
+
+    # Query 1: disease areas (most specific to the field)
+    da_terms = [_simplify_term(t) for t in disease_areas[:6] if t]
+    da_terms = [t for t in da_terms if t and len(t.split()) <= 5]
+    if da_terms:
+        queries.append(" OR ".join(f'"{t}"' for t in da_terms[:4]))
+
+    # Query 2: techniques + experimental models (finds methods papers)
+    tech_terms = [_simplify_term(t) for t in techniques[:4] if t]
+    tech_terms = [t for t in tech_terms if t and len(t.split()) <= 4]
+    if tech_terms:
+        queries.append(" OR ".join(f'"{t}"' for t in tech_terms[:4]))
+
+    # Query 3: keywords (broad coverage)
+    kw_terms = [_simplify_term(t) for t in keywords[:8] if t]
+    kw_terms = [t for t in kw_terms if t and len(t.split()) <= 4]
+    if kw_terms:
+        queries.append(" OR ".join(f'"{t}"' for t in kw_terms[:5]))
+
+    # Fallback: use research summary words if nothing else
+    if not queries:
+        summary = profile.get("research_summary") or ""
+        words = [w.strip(".,;:") for w in summary.split() if len(w) > 6][:5]
+        if words:
+            queries.append(" OR ".join(f'"{w}"' for w in words))
+
+    return queries
+
+
+def _simplify_term(term: str) -> str:
+    """Strip parenthetical qualifiers and trim whitespace."""
+    return term.split("(")[0].strip()
+
+
+async def search_recent_pmids(
+    queries: list[str],
+    days: int = 14,
+    max_total: int = 50,
+) -> list[str]:
+    """Run PubMed ESearch for each query, return deduplicated list of recent PMIDs."""
+    settings = get_settings()
+    seen: set[str] = set()
+    pmids: list[str] = []
+
+    # Date filter: last N days
+    from datetime import datetime, timedelta, timezone
+    cutoff = (datetime.now(timezone.utc) - timedelta(days=days)).strftime("%Y/%m/%d")
+    today = datetime.now(timezone.utc).strftime("%Y/%m/%d")
+    date_filter = f"{cutoff}:{today}[pdat]"
+
+    for query in queries:
+        if len(pmids) >= max_total:
+            break
+        try:
+            params = {
+                "db": "pubmed",
+                "term": f"({query}) AND {date_filter}",
+                "retmode": "json",
+                "retmax": str(max_total),
+                "sort": "relevance",
+            }
+            resp = await _ncbi_get(f"{EUTILS_BASE}/esearch.fcgi", params)
+            data = resp.json()
+            ids = data.get("esearchresult", {}).get("idlist", [])
+            for pid in ids:
+                if pid not in seen and len(pmids) < max_total:
+                    seen.add(pid)
+                    pmids.append(pid)
+            logger.debug("Query '%s': %d results", query[:60], len(ids))
+        except Exception as exc:
+            logger.warning("PubMed search failed for query '%s': %s", query[:60], exc)
+
+    logger.info("Found %d candidate PMIDs across %d queries", len(pmids), len(queries))
+    return pmids
+
+
+async def fetch_candidates(
+    queries: list[str],
+    already_delivered: set[str],
+    days: int = 14,
+    max_total: int = 50,
+) -> list[dict[str, Any]]:
+    """Search PubMed and preprint servers, return candidate records excluding already-delivered IDs.
+
+    Returns list of dicts with: pmid, title, abstract, journal, year, pub_types.
+    Preprint records also include a 'url' and 'source' field.
+    """
+    from src.podcast.preprint_search import fetch_preprint_candidates
+
+    # Fetch PubMed and preprints concurrently
+    pubmed_pmids_task = search_recent_pmids(queries, days=days, max_total=max_total * 2)
+    preprint_task = fetch_preprint_candidates(
+        queries,
+        already_delivered=already_delivered,
+        days=days,
+        max_total=max(max_total // 3, 10),
+    )
+
+    pmids_raw, preprint_candidates = await asyncio.gather(pubmed_pmids_task, preprint_task)
+
+    # Filter PubMed results
+    pmids = [p for p in pmids_raw if p not in already_delivered]
+    pubmed_records = await fetch_pubmed_records(pmids[:max_total]) if pmids else []
+
+    # Filter out reviews/editorials and items without abstracts from PubMed
+    pubmed_candidates = []
+    for rec in pubmed_records:
+        if not rec.get("abstract"):
+            continue
+        pub_types = [pt.lower() for pt in (rec.get("pub_types") or [])]
+        if any(t in pt for t in ("review", "editorial", "comment", "letter") for pt in pub_types):
+            continue
+        pubmed_candidates.append(rec)
+
+    candidates = pubmed_candidates + preprint_candidates
+    logger.info(
+        "%d total candidates (PubMed: %d, preprints: %d)",
+        len(candidates),
+        len(pubmed_candidates),
+        len(preprint_candidates),
+    )
+    return candidates
diff --git a/src/podcast/rss.py b/src/podcast/rss.py
new file mode 100644
index 0000000..73c8d29
--- /dev/null
+++ b/src/podcast/rss.py
@@ -0,0 +1,116 @@
+"""RSS feed builder for podcast episodes.
+
+Supports two keying modes:
+  - agent_id  (str)  — pilot-lab agents, URLs at /podcast/{agent_id}/...
+  - user_id   (UUID) — plain ORCID users, URLs at /podcast/users/{user_id}/...
+"""
+
+import logging
+from datetime import datetime, timezone
+from email.utils import format_datetime
+from pathlib import Path
+from typing import Any
+
+logger = logging.getLogger(__name__)
+
+AUDIO_DIR = Path("data/podcast_audio")
+
+
+def build_feed(
+    pi_name: str,
+    episodes: list[Any],
+    base_url: str,
+    agent_id: str | None = None,
+    user_id: str | None = None,
+) -> str:
+    """Build an RSS 2.0 feed with iTunes extensions.
+
+    episodes: list of PodcastEpisode ORM objects, newest first.
+    base_url: public base URL (e.g. https://copi.science)
+    agent_id: set for pilot-lab agent feeds.
+    user_id:  set for plain-user feeds (UUID as string).
+    """
+    if agent_id:
+        feed_url = f"{base_url}/podcast/{agent_id}/feed.xml"
+    else:
+        feed_url = f"{base_url}/podcast/users/{user_id}/feed.xml"
+
+    items_xml = "\n".join(
+        _build_item(ep, base_url, agent_id=agent_id, user_id=user_id) for ep in episodes
+    )
+
+    return f"""<?xml version="1.0" encoding="UTF-8"?>
+<rss version="2.0"
+     xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd"
+     xmlns:atom="http://www.w3.org/2005/Atom">
+  <channel>
+    <title>{_escape(pi_name)} — LabBot Research Briefings</title>
+    <description>Daily personalized research summaries for {_escape(pi_name)}.</description>
+    <link>{_escape(feed_url)}</link>
+    <language>en-us</language>
+    <atom:link href="{_escape(feed_url)}" rel="self" type="application/rss+xml"/>
+    <itunes:author>{_escape(pi_name)}</itunes:author>
+    <itunes:category text="Science"/>
+    <itunes:explicit>false</itunes:explicit>
+{items_xml}
+  </channel>
+</rss>"""
+
+
+def _build_item(
+    ep: Any,
+    base_url: str,
+    agent_id: str | None = None,
+    user_id: str | None = None,
+) -> str:
+    """Build a single RSS <item> for a PodcastEpisode."""
+    date_str = ep.episode_date.isoformat()
+    pub_date = format_datetime(
+        datetime(ep.episode_date.year, ep.episode_date.month, ep.episode_date.day,
+                 9, 0, 0, tzinfo=timezone.utc)
+    )
+    title = _escape(f"{ep.paper_title} — {date_str}")
+    description = _escape(ep.text_summary)
+    pmid_url = getattr(ep, "paper_url", None) or f"https://pubmed.ncbi.nlm.nih.gov/{ep.pmid}/"
+
+    if agent_id:
+        guid = f"{agent_id}-{date_str}"
+        audio_url = f"{base_url}/podcast/{agent_id}/audio/{date_str}.mp3"
+    else:
+        guid = f"user-{user_id}-{date_str}"
+        audio_url = f"{base_url}/podcast/users/{user_id}/audio/{date_str}.mp3"
+
+    enclosure_xml = ""
+    duration_xml = ""
+    if ep.audio_file_path:
+        audio_path = Path(ep.audio_file_path)
+        file_size = audio_path.stat().st_size if audio_path.exists() else 0
+        enclosure_xml = (
+            f'    <enclosure url="{_escape(audio_url)}" '
+            f'type="audio/mpeg" length="{file_size}"/>'
+        )
+        if ep.audio_duration_seconds:
+            mins, secs = divmod(ep.audio_duration_seconds, 60)
+            duration_xml = f"    <itunes:duration>{mins}:{secs:02d}</itunes:duration>"
+
+    return f"""  <item>
+    <title>{title}</title>
+    <description>{description}</description>
+    <link>{_escape(pmid_url)}</link>
+    <guid isPermaLink="false">{_escape(guid)}</guid>
+    <pubDate>{pub_date}</pubDate>
+{enclosure_xml}
+{duration_xml}
+  </item>"""
+
+
+def _escape(text: str) -> str:
+    """Escape XML special characters."""
+    return (
+        str(text)
+        .replace("&", "&amp;")
+        .replace("<", "&lt;")
+        .replace(">", "&gt;")
+        .replace('"', "&quot;")
+        .replace("'", "&apos;")
+    )
diff --git a/src/podcast/state.py b/src/podcast/state.py
new file mode 100644
index 0000000..e22d675
--- /dev/null
+++ b/src/podcast/state.py
@@ -0,0 +1,122 @@
+"""Podcast state persistence — tracks delivered PMIDs and last run timestamp.
+
+State is keyed separately for agents (by agent_id string) and for plain ORCID
+users (by user_id UUID string, stored under "users" in the JSON).
+
+JSON structure:
+{
+  "agents": {
+    "<agent_id>": {"delivered_pmids": ["12345", ...]},
+    ...
+  },
+  "users": {
+    "<user_id UUID string>": {"delivered_pmids": ["12345", ...]},
+    ...
+  },
+  "last_run_date": "2026-04-14"
+}
+"""
+
+import json
+import logging
+import threading
+from datetime import datetime, timezone
+from pathlib import Path
+
+logger = logging.getLogger(__name__)
+
+STATE_FILE = Path("data/podcast_state.json")
+_LOCK = threading.Lock()
+
+
+def _load() -> dict:
+    if STATE_FILE.exists():
+        try:
+            return json.loads(STATE_FILE.read_text(encoding="utf-8"))
+        except Exception as exc:
+            logger.warning("Failed to load podcast state: %s", exc)
+    return {}
+
+
+def _save(data: dict) -> None:
+    """Write state atomically via temp-file + rename."""
+    import os
+    import tempfile
+
+    STATE_FILE.parent.mkdir(parents=True, exist_ok=True)
+    fd, tmp = tempfile.mkstemp(dir=STATE_FILE.parent, suffix=".tmp")
+    try:
+        with os.fdopen(fd, "w", encoding="utf-8") as f:
+            f.write(json.dumps(data, indent=2))
+        os.replace(tmp, STATE_FILE)
+    except Exception:
+        os.unlink(tmp)
+        raise
+
+
+# ---------------------------------------------------------------------------
+# Agent-keyed helpers (existing behaviour, unchanged interface)
+# ---------------------------------------------------------------------------
+
+def get_delivered_pmids(agent_id: str) -> set[str]:
+    """Return the set of PMIDs already delivered to this agent."""
+    data = _load()
+    return set(data.get("agents", {}).get(agent_id, {}).get("delivered_pmids", []))
+
+
+def record_delivery(agent_id: str, pmid: str) -> None:
+    """Record that a PMID was delivered to this agent."""
+    with _LOCK:
+        data = _load()
+        agents = data.setdefault("agents", {})
+        agent_data = agents.setdefault(agent_id, {"delivered_pmids": []})
+        pmids = agent_data.setdefault("delivered_pmids", [])
+        if pmid not in pmids:
+            pmids.append(pmid)
+        _save(data)
+
+
+# ---------------------------------------------------------------------------
+# User-keyed helpers (new — for plain ORCID users)
+# ---------------------------------------------------------------------------
+
+def get_delivered_pmids_for_user(user_id: str) -> set[str]:
+    """Return the set of PMIDs already delivered to this user (no agent)."""
+    data = _load()
+    return set(data.get("users", {}).get(str(user_id), {}).get("delivered_pmids", []))
+
+
+def record_delivery_for_user(user_id: str, pmid: str) -> None:
+    """Record that a PMID was delivered to this user."""
+    with _LOCK:
+        data = _load()
+        users = data.setdefault("users", {})
+        user_data = users.setdefault(str(user_id), {"delivered_pmids": []})
+        pmids = user_data.setdefault("delivered_pmids", [])
+        if pmid not in pmids:
+            pmids.append(pmid)
+        _save(data)
+
+
+# ---------------------------------------------------------------------------
+# Scheduler helpers
+# ---------------------------------------------------------------------------
+
+def get_last_run_date() -> str | None:
+    """Return ISO date string of the last completed podcast run, or None."""
+    data = _load()
+    return data.get("last_run_date")
+
+
+def mark_run_complete() -> None:
+    """Record that the podcast pipeline ran today (UTC)."""
+    with _LOCK:
+        data = _load()
+        data["last_run_date"] = datetime.now(timezone.utc).strftime("%Y-%m-%d")
+        _save(data)
+
+
+def should_run_today() -> bool:
+    """Return True if the podcast pipeline has not run today (UTC)."""
+    today = datetime.now(timezone.utc).strftime("%Y-%m-%d")
+    return get_last_run_date() != today
diff --git a/src/podcast/tts_utils.py b/src/podcast/tts_utils.py
new file mode 100644
index 0000000..7a56bea
--- /dev/null
+++ b/src/podcast/tts_utils.py
@@ -0,0 +1,81 @@
+"""Shared utilities for podcast TTS backends."""
+
+import logging
+import re
+import shutil
+import subprocess
+import tempfile
+from pathlib import Path
+
+logger = logging.getLogger(__name__)
+
+
+def strip_markdown(text: str) -> str:
+    """Remove markdown formatting so TTS reads clean prose."""
+    # Remove bold/italic markers (* and _)
+    text = re.sub(r"\*+([^*]+)\*+", r"\1", text)
+    text = re.sub(r"_+([^_]+)_+", r"\1", text)
+    # Remove inline code
+    text = re.sub(r"`[^`]+`", "", text)
+    # Remove URLs but keep surrounding text
+    text = re.sub(r"https?://\S+", "", text)
+    return text.strip()
+
+
+def normalize_audio(audio_path: Path) -> bool:
+    """Normalize audio loudness in-place using ffmpeg loudnorm (EBU R128).
+
+    Targets -16 LUFS integrated loudness, -1.5 dBTP true peak — standard
+    podcast levels. Writes to a temp file then atomically replaces the original.
+
+    Returns True if normalization succeeded, False if ffmpeg is unavailable or
+    the command fails (the original file is preserved on failure).
+    """
+    ffmpeg = shutil.which("ffmpeg")
+    if not ffmpeg:
+        logger.warning("ffmpeg not found on PATH — skipping audio normalization")
+        return False
+
+    with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as tmp:
+        tmp_path = Path(tmp.name)
+
+    try:
+        result = subprocess.run(
+            [
+                ffmpeg,
+                "-y",                          # overwrite tmp if it exists
+                "-i", str(audio_path),
+                "-af", "loudnorm=I=-16:TP=-1.5:LRA=11",
+                "-ar", "44100",
+                str(tmp_path),
+            ],
+            capture_output=True,
+            timeout=120,
+        )
+        if result.returncode != 0:
+            logger.error(
+                "ffmpeg loudnorm failed (exit %d): %s",
+                result.returncode,
+                result.stderr.decode(errors="replace")[-500:],
+            )
+            tmp_path.unlink(missing_ok=True)
+            return False
+
+        tmp_path.replace(audio_path)
+        logger.info("Audio normalized (loudnorm -16 LUFS) → %s", audio_path)
+        return True
+    except Exception as exc:
+        logger.error("Audio normalization failed: %s", exc)
+        tmp_path.unlink(missing_ok=True)
+        return False
+
+
+def get_audio_duration_seconds(audio_path: Path) -> int | None:
+    """Return audio duration in seconds using mutagen, or None if unavailable."""
+    try:
+        from mutagen.mp3 import MP3
+        audio = MP3(str(audio_path))
+        return int(audio.info.length)
+    except Exception as exc:
+        logger.debug("Could not read audio duration from %s: %s", audio_path, exc)
+        return None
diff --git a/src/routers/admin.py b/src/routers/admin.py
index 46c05ac..204f9e8 100644
--- a/src/routers/admin.py
+++ b/src/routers/admin.py
@@ -11,6 +11,7 @@
 from sqlalchemy.ext.asyncio import AsyncSession
 from sqlalchemy.orm import selectinload
 
+from src.config import get_settings
 from src.database import get_db
 from src.dependencies import get_admin_user, get_current_user
 from src.models import (
@@ -20,6 +21,8 @@
     AgentRegistry,
     Job,
     LlmCallLog,
+    PodcastEpisode,
+    PodcastPreferences,
     Publication,
     ResearcherProfile,
     SimulationRun,
@@ -960,6 +963,55 @@ async def impersonate_user(
     return response
 
 
+@router.get("/podcast", response_class=HTMLResponse)
+async def admin_podcast(
+    request: Request,
+    agent_filter: str | None = None,
+    db: AsyncSession = Depends(get_db),
+    current_user: User = Depends(get_admin_user),
+):
+    """Podcast episodes overview."""
+    query = select(PodcastEpisode).order_by(PodcastEpisode.episode_date.desc()).limit(200)
+    result = await db.execute(query)
+    all_episodes = result.scalars().all()
+
+    # Apply agent filter
+    episodes = [e for e in all_episodes if not agent_filter or e.agent_id == agent_filter]
+
+    # Summary stats
+    total = len(all_episodes)
+    with_audio = sum(1 for e in all_episodes if e.audio_file_path)
+    slack_delivered = sum(1 for e in all_episodes if e.slack_delivered)
+    agent_ids = sorted({e.agent_id for e in all_episodes if e.agent_id is not None})
+
+    # Load preferences for all agents that have episodes
+    prefs_result = await db.execute(select(PodcastPreferences))
+    prefs_by_agent: dict[str, PodcastPreferences] = {
+        p.agent_id: p for p in prefs_result.scalars().all() if p.agent_id is not None
+    }
+
+    settings = get_settings()
+    base_url = settings.podcast_base_url or settings.base_url
+
+    return templates.TemplateResponse(
+        request,
+        "admin/podcast.html",
+        _template_context(
+            request,
+            current_user,
+            active_admin="podcast",
+            episodes=episodes,
+            total=total,
+            with_audio=with_audio,
+            slack_delivered=slack_delivered,
+            agent_ids=agent_ids,
+            agent_filter=agent_filter,
+            base_url=base_url,
+            prefs_by_agent=prefs_by_agent,
+        ),
+    )
+
+
 @router.post("/impersonate/stop")
 async def stop_impersonating(
     request: Request,
diff --git a/src/routers/agent_page.py b/src/routers/agent_page.py
index 1f98a69..0019808 100644
--- a/src/routers/agent_page.py
+++ b/src/routers/agent_page.py
@@ -707,6 +707,112 @@ async def save_public_profile(
     )
 
 
+# --------------------------------------------------------------------------
+# Podcast settings (owner or admin)
+# --------------------------------------------------------------------------
+
+# Valid Mistral voxtral-mini-tts-latest voices (verify at docs.mistral.ai/capabilities/audio/)
+MISTRAL_VOICES = [
+    ("alex", "Alex — US English, male, neutral"),
+    ("deedee", "Deedee — US English, female, bright"),
+    ("jasmine", "Jasmine — US English, female, warm"),
+    ("laurel", "Laurel — US English, female, clear"),
+    ("luna", "Luna — US English, female, soft"),
+    ("rio", "Rio — US English, male, energetic"),
+    ("stella", "Stella — US English, female, professional"),
+    ("theo", "Theo — US English, male, measured"),
+    ("tyler", "Tyler — US English, male, conversational"),
+]
+
+
+@router.get("/{agent_id}/podcast-settings", response_class=HTMLResponse)
+async def get_podcast_settings(
+    agent_id: str,
+    request: Request,
+    saved: bool = False,
+    db: AsyncSession = Depends(get_db),
+    current_user: User = Depends(get_current_user),
+):
+    """View podcast preferences for an agent."""
+    from sqlalchemy import select as sa_select
+
+    from src.models.podcast_preferences import PodcastPreferences
+
+    agent, is_owner = await get_agent_with_access(agent_id, db, current_user)
+    if agent.status != "active":
+        return RedirectResponse(url="/agent", status_code=302)
+
+    result = await db.execute(
+        sa_select(PodcastPreferences).where(PodcastPreferences.agent_id == agent_id)
+    )
+    prefs = result.scalar_one_or_none()
+
+    return templates.TemplateResponse(
+        request,
+        "agent/podcast_settings.html",
+        _template_context(
+            request, current_user,
+            agent=agent,
+            is_owner=is_owner,
+            prefs=prefs,
+            voices=MISTRAL_VOICES,
+            saved=saved,
+        ),
+    )
+
+
+@router.post("/{agent_id}/podcast-settings")
+async def save_podcast_settings(
+    agent_id: str,
+    request: Request,
+    voice_id: str = Form(""),
+    extra_keywords_raw: str = Form(""),
+    preferred_journals_raw: str = Form(""),
+    deprioritized_journals_raw: str = Form(""),
+    db: AsyncSession = Depends(get_db),
+    current_user: User = Depends(get_current_user),
+):
+    """Save podcast preferences for an agent."""
+    from sqlalchemy import select as sa_select
+
+    from src.models.podcast_preferences import PodcastPreferences
+
+    agent, is_owner = await get_agent_with_access(agent_id, db, current_user)
+    if agent.status != "active":
+        return RedirectResponse(url="/agent", status_code=302)
+
+    # Keywords: newline-only (phrases can legitimately contain commas)
+    def _parse_keywords(raw: str) -> list[str]:
+        return [v for line in raw.splitlines() if (v := line.strip())][:20]
+
+    # Journals: accept both newlines and commas as separators
+    def _parse_journals(raw: str) -> list[str]:
+        return [v for part in raw.replace(",", "\n").splitlines() if (v := part.strip())][:20]
+
+    extra_keywords = _parse_keywords(extra_keywords_raw)
+    preferred_journals = _parse_journals(preferred_journals_raw)
+    deprioritized_journals = _parse_journals(deprioritized_journals_raw)
+    clean_voice = voice_id.strip() or None
+
+    result = await db.execute(
+        sa_select(PodcastPreferences).where(PodcastPreferences.agent_id == agent_id)
+    )
+    prefs = result.scalar_one_or_none()
+
+    if prefs is None:
+        prefs = PodcastPreferences(agent_id=agent_id)
+        db.add(prefs)
+
+    prefs.voice_id = clean_voice
+    prefs.extra_keywords = extra_keywords
+    prefs.preferred_journals = preferred_journals
+    prefs.deprioritized_journals = deprioritized_journals
+    await db.commit()
+
+    logger.info("Podcast preferences saved for agent %s by %s", agent_id, current_user.name)
+    return RedirectResponse(url=f"/agent/{agent_id}/podcast-settings?saved=1", status_code=302)
+
+
 # --------------------------------------------------------------------------
 # Slack connection (PI only)
 # --------------------------------------------------------------------------
diff --git a/src/routers/podcast.py b/src/routers/podcast.py
new file mode 100644
index 0000000..c7eb41d
--- /dev/null
+++ b/src/routers/podcast.py
@@ -0,0 +1,372 @@
+"""Podcast RSS feed, audio serving, settings, and on-demand generation endpoints.
+
+Two delivery paths:
+  Agent path  — pilot-lab agents with an approved AgentRegistry entry.
+                URLs are keyed by agent_id string.
+                Endpoints: /podcast/{agent_id}/...
+
+  User path   — any user who has completed ORCID onboarding and has a
+                ResearcherProfile with a research_summary.
+                URLs are keyed by user_id UUID (opaque, stable, subscribable).
+                Endpoints: /podcast/users/{user_id}/...  (public RSS + audio)
+                           /podcast/settings             (auth-gated settings UI)
+                           /podcast/user/generate        (auth-gated on-demand trigger)
+"""
+
+import asyncio
+import logging
+import uuid as _uuid
+from pathlib import Path
+
+from fastapi import APIRouter, Depends, Form, HTTPException, Request
+from fastapi.responses import FileResponse, HTMLResponse, RedirectResponse, Response
+from fastapi.templating import Jinja2Templates
+from sqlalchemy import select
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from src.config import get_settings
+from src.database import get_db, get_session_factory
+from src.dependencies import get_current_user
+from src.models.agent_registry import AgentRegistry
+from src.models.podcast import PodcastEpisode
+from src.models.user import User
+from src.podcast.rss import build_feed
+
+logger = logging.getLogger(__name__)
+router = APIRouter()
+templates = Jinja2Templates(directory="templates")
+
+AUDIO_DIR = Path("data/podcast_audio")
+
+MISTRAL_VOICES = [
+    ("alex", "Alex — US English, male, calm"),
+    ("deedee", "DeeDee — US English, female, upbeat"),
+    ("jessica", "Jessica — US English, female, expressive"),
+    ("luna", "Luna — US English, female, soft"),
+    ("rio", "Rio — US English, male, energetic"),
+    ("stella", "Stella — US English, female, professional"),
+    ("theo", "Theo — US English, male, measured"),
+    ("tyler", "Tyler — US English, male, conversational"),
+]
+
+
+# ---------------------------------------------------------------------------
+# Agent path — existing endpoints (unchanged behaviour)
+# ---------------------------------------------------------------------------
+
+@router.get("/{agent_id}/feed.xml", response_class=Response)
+async def podcast_feed(
+    agent_id: str,
+    db: AsyncSession = Depends(get_db),
+):
+    """RSS 2.0 podcast feed for a pilot-lab agent's daily research briefings."""
+    agent_result = await db.execute(
+        select(AgentRegistry).where(AgentRegistry.agent_id == agent_id)
+    )
+    agent = agent_result.scalar_one_or_none()
+    if not agent:
+        raise HTTPException(status_code=404, detail="Agent not found")
+
+    episodes_result = await db.execute(
+        select(PodcastEpisode)
+        .where(PodcastEpisode.agent_id == agent_id)
+        .order_by(PodcastEpisode.episode_date.desc())
+        .limit(30)
+    )
+    episodes = episodes_result.scalars().all()
+
+    settings = get_settings()
+    base_url = settings.podcast_base_url or settings.base_url
+
+    xml = build_feed(
+        pi_name=agent.pi_name,
+        episodes=episodes,
+        base_url=base_url,
+        agent_id=agent_id,
+    )
+    return Response(content=xml, media_type="application/rss+xml; charset=utf-8")
+
+
+@router.get("/{agent_id}/audio/{date}.mp3")
+async def podcast_audio(agent_id: str, date: str):
+    """Stream a podcast audio file for an agent."""
+    if "/" in date or ".." in date or not date.replace("-", "").isdigit():
+        raise HTTPException(status_code=400, detail="Invalid date format")
+
+    audio_path = AUDIO_DIR / agent_id / f"{date}.mp3"
+    if not audio_path.exists():
+        raise HTTPException(status_code=404, detail="Audio file not found")
+
+    return FileResponse(
+        path=str(audio_path),
+        media_type="audio/mpeg",
+        filename=f"{agent_id}-{date}.mp3",
+    )
+
+
+async def _run_pipeline_background(
+    agent_id: str, bot_name: str, pi_name: str, bot_token: str, slack_user_id: str | None
+) -> None:
+    """Run the agent podcast pipeline in a background task with its own DB session."""
+    from src.podcast.pipeline import run_pipeline_for_agent
+
+    session_factory = get_session_factory()
+    try:
+        async with session_factory() as db:
+            ok = await run_pipeline_for_agent(
+                agent_id=agent_id,
+                bot_name=bot_name,
+                pi_name=pi_name,
+                bot_token=bot_token,
+                slack_user_id=slack_user_id,
+                db_session=db,
+            )
+            await db.commit()
+            logger.info("On-demand podcast pipeline for %s: %s", agent_id, "produced" if ok else "no episode")
+    except Exception as exc:
+        logger.error("On-demand podcast pipeline failed for %s: %s", agent_id, exc, exc_info=True)
+
+
+@router.api_route("/{agent_id}/generate", methods=["GET", "POST"])
+async def podcast_generate(
+    agent_id: str,
+    db: AsyncSession = Depends(get_db),
+):
+    """Trigger on-demand podcast generation for an agent (returns immediately)."""
+    agent_result = await db.execute(
+        select(AgentRegistry).where(AgentRegistry.agent_id == agent_id)
+    )
+    agent = agent_result.scalar_one_or_none()
+    if not agent:
+        raise HTTPException(status_code=404, detail="Agent not found")
+
+    settings = get_settings()
+    slack_tokens = settings.get_slack_tokens()
+    bot_token = agent.slack_bot_token or slack_tokens.get(agent_id, {}).get("bot", "")
+
+    asyncio.create_task(
+        _run_pipeline_background(
+            agent_id=agent_id,
+            bot_name=agent.bot_name,
+            pi_name=agent.pi_name,
+            bot_token=bot_token,
+            slack_user_id=agent.slack_user_id,
+        )
+    )
+    return {
+        "status": "started",
+        "agent_id": agent_id,
+        "message": f"Podcast pipeline started for {agent.pi_name}. Check the RSS feed shortly.",
+    }
+
+
+# ---------------------------------------------------------------------------
+# User path — plain ORCID users (no agent required)
+# ---------------------------------------------------------------------------
+
+@router.get("/users/{user_id}/feed.xml", response_class=Response)
+async def podcast_feed_for_user(
+    user_id: str,
+    db: AsyncSession = Depends(get_db),
+):
+    """Public RSS 2.0 feed for a plain ORCID user's daily research briefings.
+
+    The user_id in the URL is the UUID primary key of the User record, which
+    acts as an opaque, stable, subscribable token — no authentication required.
+    """
+    try:
+        uid = _uuid.UUID(user_id)
+    except ValueError:
+        raise HTTPException(status_code=400, detail="Invalid user ID format")
+
+    user_result = await db.execute(select(User).where(User.id == uid))
+    user = user_result.scalar_one_or_none()
+    if not user:
+        raise HTTPException(status_code=404, detail="User not found")
+
+    episodes_result = await db.execute(
+        select(PodcastEpisode)
+        .where(PodcastEpisode.user_id == uid)
+        .order_by(PodcastEpisode.episode_date.desc())
+        .limit(30)
+    )
+    episodes = episodes_result.scalars().all()
+
+    settings = get_settings()
+    base_url = settings.podcast_base_url or settings.base_url
+
+    xml = build_feed(
+        pi_name=user.name,
+        episodes=episodes,
+        base_url=base_url,
+        user_id=user_id,
+    )
+    return Response(content=xml, media_type="application/rss+xml; charset=utf-8")
+
+
+@router.get("/users/{user_id}/audio/{date}.mp3")
+async def podcast_audio_for_user(user_id: str, date: str):
+    """Stream a podcast audio file for a plain ORCID user."""
+    if "/" in date or ".." in date or not date.replace("-", "").isdigit():
+        raise HTTPException(status_code=400, detail="Invalid date format")
+    try:
+        _uuid.UUID(user_id)
+    except ValueError:
+        raise HTTPException(status_code=400, detail="Invalid user ID format")
+
+    audio_path = AUDIO_DIR / "users" / user_id / f"{date}.mp3"
+    if not audio_path.exists():
+        raise HTTPException(status_code=404, detail="Audio file not found")
+
+    return FileResponse(
+        path=str(audio_path),
+        media_type="audio/mpeg",
+        filename=f"briefing-{date}.mp3",
+    )
+
+
+def _podcast_eligible(user: User) -> bool:
+    """Return True if a plain user is eligible for the podcast feature."""
+    return (
+        user.onboarding_complete
+        and getattr(user, "profile", None) is not None
+        and bool(getattr(user.profile, "research_summary", None))
+    )
+
+
+@router.get("/settings", response_class=HTMLResponse)
+async def get_podcast_settings_user(
+    request: Request,
+    saved: bool = False,
+    db: AsyncSession = Depends(get_db),
+    current_user: User = Depends(get_current_user),
+):
+    """Podcast settings page for a plain ORCID user (no agent required)."""
+    from sqlalchemy.orm import selectinload
+
+    from src.models.podcast_preferences import PodcastPreferences
+
+    # Eagerly load profile relationship
+    user_result = await db.execute(
+        select(User)
+        .options(selectinload(User.profile))
+        .where(User.id == current_user.id)
+    )
+    user = user_result.scalar_one_or_none() or current_user
+
+    if not _podcast_eligible(user):
+        return RedirectResponse(url="/profile?podcast_incomplete=1", status_code=302)
+
+    prefs_result = await db.execute(
+        select(PodcastPreferences).where(PodcastPreferences.user_id == current_user.id)
+    )
+    prefs = prefs_result.scalar_one_or_none()
+
+    settings = get_settings()
+    base_url = settings.podcast_base_url or settings.base_url
+    feed_url = f"{base_url}/podcast/users/{current_user.id}/feed.xml"
+
+    return templates.TemplateResponse(
+        request,
+        "podcast_settings.html",
+        {
+            "request": request,
+            "current_user": current_user,
+            "active_page": "podcast",
+            "prefs": prefs,
+            "voices": MISTRAL_VOICES,
+            "saved": saved,
+            "feed_url": feed_url,
+        },
+    )
+
+
+@router.post("/settings")
+async def save_podcast_settings_user(
+    request: Request,
+    voice_id: str = Form(""),
+    extra_keywords_raw: str = Form(""),
+    preferred_journals_raw: str = Form(""),
+    deprioritized_journals_raw: str = Form(""),
+    db: AsyncSession = Depends(get_db),
+    current_user: User = Depends(get_current_user),
+):
+    """Save podcast preferences for a plain ORCID user."""
+    from sqlalchemy.orm import selectinload
+
+    from src.models.podcast_preferences import PodcastPreferences
+
+    user_result = await db.execute(
+        select(User)
+        .options(selectinload(User.profile))
+        .where(User.id == current_user.id)
+    )
+    user = user_result.scalar_one_or_none() or current_user
+
+    if not _podcast_eligible(user):
+        raise HTTPException(status_code=403, detail="Complete your profile before setting podcast preferences.")
+
+    def _parse_keywords(raw: str) -> list[str]:
+        return [v for line in raw.splitlines() if (v := line.strip())][:20]
+
+    def _parse_journals(raw: str) -> list[str]:
+        return [v for part in raw.replace(",", "\n").splitlines() if (v := part.strip())][:20]
+
+    prefs_result = await db.execute(
+        select(PodcastPreferences).where(PodcastPreferences.user_id == current_user.id)
+    )
+    prefs = prefs_result.scalar_one_or_none()
+
+    if prefs is None:
+        prefs = PodcastPreferences(user_id=current_user.id, agent_id=None)
+        db.add(prefs)
+
+    prefs.voice_id = voice_id.strip() or None
+    prefs.extra_keywords = _parse_keywords(extra_keywords_raw)
+    prefs.preferred_journals = _parse_journals(preferred_journals_raw)
+    prefs.deprioritized_journals = _parse_journals(deprioritized_journals_raw)
+    await db.commit()
+
+    logger.info("Podcast preferences saved for user %s", current_user.id)
+    return RedirectResponse(url="/podcast/settings?saved=1", status_code=302)
+
+
+async def _run_user_pipeline_background(user_id) -> None:
+    """Run the user podcast pipeline in a background task with its own DB session."""
+    from src.podcast.pipeline import run_podcast_for_user
+
+    session_factory = get_session_factory()
+    try:
+        async with session_factory() as db:
+            ok = await run_podcast_for_user(user_id=user_id, db_session=db)
+            await db.commit()
+            logger.info("On-demand podcast pipeline for user %s: %s", user_id, "produced" if ok else "no episode")
+    except Exception as exc:
+        logger.error("On-demand podcast pipeline failed for user %s: %s", user_id, exc, exc_info=True)
+
+
+@router.post("/user/generate")
+async def podcast_generate_for_user(
+    db: AsyncSession = Depends(get_db),
+    current_user: User = Depends(get_current_user),
+):
+    """Trigger on-demand podcast generation for the current user (returns immediately)."""
+    from sqlalchemy.orm import selectinload
+
+    user_result = await db.execute(
+        select(User)
+        .options(selectinload(User.profile))
+        .where(User.id == current_user.id)
+    )
+    user = user_result.scalar_one_or_none() or current_user
+
+    if not _podcast_eligible(user):
+        raise HTTPException(status_code=403, detail="Complete your profile before generating a podcast.")
+
+    asyncio.create_task(_run_user_pipeline_background(current_user.id))
+
+    return {
+        "status": "started",
+        "user_id": str(current_user.id),
+        "message": "Podcast pipeline started. Check your feed URL shortly.",
+    }
diff --git a/templates/admin/podcast.html b/templates/admin/podcast.html
new file mode 100644
index 0000000..c79ebfb
--- /dev/null
+++ b/templates/admin/podcast.html
@@ -0,0 +1,175 @@
+{% extends "base.html" %}
+{% block title %}Admin — Podcast — CoPI{% endblock %}
+
+{% block content %}
+<div class="flex items-center justify-between mb-6">
+    <h1 class="text-2xl font-bold text-gray-900">Podcast Episodes</h1>
+    <span class="text-sm text-gray-500">{{ total }} total episodes</span>
+</div>
+
+<!-- Summary cards -->
+<div class="grid grid-cols-3 gap-4 mb-8">
+    <div class="bg-white rounded-xl border border-gray-200 p-4 text-center">
+        <div class="text-3xl font-bold text-indigo-600">{{ total }}</div>
+        <div class="text-sm text-gray-500 mt-1">Total Episodes</div>
+    </div>
+    <div class="bg-white rounded-xl border border-gray-200 p-4 text-center">
+        <div class="text-3xl font-bold text-green-600">{{ with_audio }}</div>
+        <div class="text-sm text-gray-500 mt-1">With Audio</div>
+    </div>
+    <div class="bg-white rounded-xl border border-gray-200 p-4 text-center">
+        <div class="text-3xl font-bold text-blue-600">{{ slack_delivered }}</div>
+        <div class="text-sm text-gray-500 mt-1">Slack Delivered</div>
+    </div>
+</div>
+
+<!-- Filters -->
+<div class="bg-white rounded-lg border border-gray-200 p-4 mb-4 flex flex-wrap gap-4 items-end">
+    <div>
+        <label class="text-xs font-medium text-gray-600 block mb-1">Agent</label>
+        <select onchange="applyFilter()" id="agent-filter"
+                class="border border-gray-300 rounded px-2 py-1 text-sm">
+            <option value="">All agents</option>
+            {% for aid in agent_ids %}
+            <option value="{{ aid }}" {% if agent_filter == aid %}selected{% endif %}>{{ aid }}</option>
+            {% endfor %}
+        </select>
+    </div>
+    {% if agent_filter %}
+    <div>
+        <a href="/admin/podcast" class="text-sm text-indigo-600 hover:underline">Clear filter</a>
+    </div>
+    <div>
+        <a href="{{ base_url }}/podcast/{{ agent_filter }}/feed.xml" target="_blank"
+           class="inline-flex items-center gap-1 text-sm text-orange-600 hover:underline">
+            RSS Feed ↗
+        </a>
+    </div>
+    <div>
+        <a href="/agent/{{ agent_filter }}/podcast-settings"
+           class="inline-flex items-center gap-1 text-sm text-indigo-600 hover:underline">
+            Podcast Settings ↗
+        </a>
+    </div>
+    {% endif %}
+</div>
+
+<!-- Preferences summary (visible when an agent is selected) -->
+{% if agent_filter %}
+{% set prefs = prefs_by_agent.get(agent_filter) %}
+<div class="bg-white rounded-lg border border-gray-200 p-4 mb-4">
+    <div class="flex items-center justify-between mb-2">
+        <h3 class="text-sm font-medium text-gray-700">Podcast Preferences — {{ agent_filter }}</h3>
+        <a href="/agent/{{ agent_filter }}/podcast-settings"
+           class="text-xs text-indigo-600 hover:underline">Edit ↗</a>
+    </div>
+    {% if prefs %}
+    <dl class="grid grid-cols-3 gap-4 text-sm">
+        <div>
+            <dt class="text-xs font-medium text-gray-500 uppercase mb-0.5">Voice</dt>
+            <dd class="text-gray-800">
+                {% if prefs.voice_id %}{{ prefs.voice_id }}{% else %}<span class="text-gray-400">system default</span>{% endif %}
+            </dd>
+        </div>
+        <div>
+            <dt class="text-xs font-medium text-gray-500 uppercase mb-0.5">Extra Keywords</dt>
+            <dd class="text-gray-800">
+                {% if prefs.extra_keywords %}
+                <span class="text-gray-600">{{ prefs.extra_keywords | length }} term(s)</span>
+                <div class="text-xs text-gray-400 mt-0.5">{{ prefs.extra_keywords | join(', ') }}</div>
+                {% else %}
+                <span class="text-gray-400">none</span>
+                {% endif %}
+            </dd>
+        </div>
+        <div>
+            <dt class="text-xs font-medium text-gray-500 uppercase mb-0.5">Source Preferences</dt>
+            <dd class="text-gray-800 text-xs">
+                {% if prefs.preferred_journals %}
+                <div class="text-green-700 mb-0.5">▲ {{ prefs.preferred_journals | join(', ') }}</div>
+                {% endif %}
+                {% if prefs.deprioritized_journals %}
+                <div class="text-amber-700">▼ {{ prefs.deprioritized_journals | join(', ') }}</div>
+                {% endif %}
+                {% if not prefs.preferred_journals and not prefs.deprioritized_journals %}
+                <span class="text-gray-400">none</span>
+                {% endif %}
+            </dd>
+        </div>
+    </dl>
+    {% else %}
+    <p class="text-sm text-gray-400">No preferences configured for this agent.
+        <a href="/agent/{{ agent_filter }}/podcast-settings" class="text-indigo-600 hover:underline">Set up preferences ↗</a>
+    </p>
+    {% endif %}
+</div>
+{% endif %}
+
+<!-- Episodes table -->
+<div class="bg-white rounded-xl shadow-sm border border-gray-200 overflow-hidden">
+    <table class="min-w-full divide-y divide-gray-200">
+        <thead class="bg-gray-50">
+            <tr>
+                <th class="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">Date</th>
+                <th class="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">Agent</th>
+                <th class="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">Paper</th>
+                <th class="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">PMID</th>
+                <th class="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">Audio</th>
+                <th class="px-4 py-3 text-left text-xs font-medium text-gray-500 uppercase">Slack</th>
+            </tr>
+        </thead>
+        <tbody class="divide-y divide-gray-100">
+            {% for ep in episodes %}
+            <tr class="hover:bg-gray-50">
+                <td class="px-4 py-3 text-sm text-gray-700 whitespace-nowrap">{{ ep.episode_date }}</td>
+                <td class="px-4 py-3 text-sm">
+                    <span class="font-mono text-xs bg-gray-100 px-1.5 py-0.5 rounded">{{ ep.agent_id }}</span>
+                </td>
+                <td class="px-4 py-3 text-sm max-w-xs">
+                    <div class="font-medium text-gray-800 truncate" title="{{ ep.paper_title }}">{{ ep.paper_title }}</div>
+                    <div class="text-xs text-gray-500 mt-0.5">{{ ep.paper_journal }} · {{ ep.paper_year }}</div>
+                    {% if ep.selection_justification %}
+                    <div class="text-xs text-gray-400 mt-0.5 italic truncate" title="{{ ep.selection_justification }}">{{ ep.selection_justification }}</div>
+                    {% endif %}
+                </td>
+                <td class="px-4 py-3 text-sm">
+                    <a href="{{ ep.paper_url or 'https://pubmed.ncbi.nlm.nih.gov/' ~ ep.pmid ~ '/' }}" target="_blank"
+                       class="text-indigo-600 hover:underline font-mono text-xs">{{ ep.pmid }}</a>
+                </td>
+                <td class="px-4 py-3 text-sm">
+                    {% if ep.audio_file_path %}
+                    <span class="text-green-600 text-xs">✓
+                        {% if ep.audio_duration_seconds %}
+                        {{ ep.audio_duration_seconds // 60 }}:{{ '%02d' % (ep.audio_duration_seconds % 60) }}
+                        {% endif %}
+                    </span>
+                    {% else %}
+                    <span class="text-gray-400 text-xs">—</span>
+                    {% endif %}
+                </td>
+                <td class="px-4 py-3 text-sm">
+                    {% if ep.slack_delivered %}
+                    <span class="text-green-600 text-xs">✓</span>
+                    {% else %}
+                    <span class="text-gray-400 text-xs">—</span>
+                    {% endif %}
+                </td>
+            </tr>
+            {% else %}
+            <tr>
+                <td colspan="6" class="px-4 py-8 text-center text-gray-400 text-sm">No podcast episodes yet</td>
+            </tr>
+            {% endfor %}
+        </tbody>
+    </table>
+</div>
+
+<script>
+function applyFilter() {
+    const agent = document.getElementById('agent-filter').value;
+    const params = new URLSearchParams();
+    if (agent) params.set('agent_filter', agent);
+    location.href = '/admin/podcast' + (params.toString() ? '?' + params.toString() : '');
+}
+</script>
+{% endblock %}
diff --git a/templates/agent/dashboard.html b/templates/agent/dashboard.html
index fcc73dd..5614cd9 100644
--- a/templates/agent/dashboard.html
+++ b/templates/agent/dashboard.html
@@ -222,6 +222,13 @@ <h2 class="text-lg font-semibold text-gray-800 mb-4">Reviewed Proposals</h2>
             <p class="text-sm text-gray-500">View and edit your agent's private behavioral profile.</p>
         </a>
 
+        <!-- Podcast settings -->
+        <a href="/agent/{{ agent.agent_id }}/podcast-settings"
+           class="bg-white rounded-xl border border-gray-200 p-5 hover:border-indigo-300 transition">
+            <div class="text-lg font-semibold text-gray-800 mb-1">Podcast Settings</div>
+            <p class="text-sm text-gray-500">Customize your daily research briefing: voice, extra keywords, and journal preferences.</p>
+        </a>
+
         <!-- Slack -->
         <a href="{{ slack_invite_url }}" target="_blank"
            class="bg-white rounded-xl border border-gray-200 p-5 hover:border-indigo-300 transition">
diff --git a/templates/agent/podcast_settings.html b/templates/agent/podcast_settings.html
new file mode 100644
index 0000000..70117f6
--- /dev/null
+++ b/templates/agent/podcast_settings.html
@@ -0,0 +1,103 @@
+{% extends "base.html" %}
+{% block title %}Podcast Settings — {{ agent.bot_name }} — CoPI{% endblock %}
+
+{% block content %}
+<div class="max-w-2xl mx-auto">
+    <div class="flex items-center justify-between mb-6">
+        <div>
+            <a href="/agent/{{ agent.agent_id }}/dashboard" class="text-sm text-gray-500 hover:text-gray-700">&larr; My Agent</a>
+            <h1 class="text-2xl font-bold text-gray-900 mt-1">Podcast Settings</h1>
+            <p class="text-sm text-gray-500">Customize your daily research briefing for {{ agent.bot_name }}</p>
+        </div>
+    </div>
+
+    {% if saved %}
+    <div class="bg-green-50 border border-green-200 rounded-lg p-3 mb-6 text-sm text-green-700">
+        Preferences saved successfully.
+    </div>
+    {% endif %}
+
+    <form method="POST" action="/agent/{{ agent.agent_id }}/podcast-settings">
+
+        <!-- Voice selection -->
+        <div class="bg-white rounded-xl shadow-sm border border-gray-200 p-6 mb-4">
+            <h2 class="text-base font-semibold text-gray-800 mb-1">Voice</h2>
+            <p class="text-sm text-gray-500 mb-4">
+                Select the text-to-speech voice used for your audio episodes.
+                Voices are from Mistral AI's <code class="text-xs bg-gray-100 px-1 rounded">voxtral-mini-tts-latest</code> model.
+            </p>
+            <select name="voice_id"
+                    class="w-full border border-gray-300 rounded-lg px-3 py-2 text-sm focus:ring-indigo-500 focus:border-indigo-500">
+                <option value="" {% if not prefs or not prefs.voice_id %}selected{% endif %}>
+                    Default (system setting)
+                </option>
+                {% for voice_id, voice_label in voices %}
+                <option value="{{ voice_id }}"
+                        {% if prefs and prefs.voice_id == voice_id %}selected{% endif %}>
+                    {{ voice_label }}
+                </option>
+                {% endfor %}
+            </select>
+        </div>
+
+        <!-- Extra search keywords -->
+        <div class="bg-white rounded-xl shadow-sm border border-gray-200 p-6 mb-4">
+            <h2 class="text-base font-semibold text-gray-800 mb-1">Extra Search Keywords</h2>
+            <p class="text-sm text-gray-500 mb-4">
+                Additional terms to include in the daily literature search, beyond what is
+                auto-extracted from your lab profile. One keyword or phrase per line (max 20).
+                These are added as quoted PubMed search terms.
+            </p>
+            <textarea name="extra_keywords_raw" rows="6"
+                      placeholder="insulin receptor substrate&#10;adipose tissue browning&#10;mitochondrial fission"
+                      class="w-full text-sm border border-gray-300 rounded-lg px-3 py-2 focus:ring-indigo-500 focus:border-indigo-500 font-mono">{% if prefs and prefs.extra_keywords %}{{ prefs.extra_keywords | join('\n') }}{% endif %}</textarea>
+            <p class="text-xs text-gray-400 mt-1">
+                {% if prefs and prefs.extra_keywords %}{{ prefs.extra_keywords | length }} keyword(s) saved.{% else %}No extra keywords set.{% endif %}
+            </p>
+        </div>
+
+        <!-- Source preferences -->
+        <div class="bg-white rounded-xl shadow-sm border border-gray-200 p-6 mb-4">
+            <h2 class="text-base font-semibold text-gray-800 mb-1">Source Preferences</h2>
+            <p class="text-sm text-gray-500 mb-5">
+                Guide the article selection by telling the AI which journals or preprint servers
+                to prioritize or avoid. One source per line (or comma-separated).
+            </p>
+
+            <div class="mb-4">
+                <label class="block text-sm font-medium text-gray-700 mb-1">
+                    Preferred sources
+                    <span class="font-normal text-gray-400 ml-1">— given extra weight when relevance is comparable</span>
+                </label>
+                <textarea name="preferred_journals_raw" rows="4"
+                          placeholder="Nature Methods&#10;Cell Systems&#10;eLife&#10;bioRxiv"
+                          class="w-full text-sm border border-gray-300 rounded-lg px-3 py-2 focus:ring-indigo-500 focus:border-indigo-500 font-mono">{% if prefs and prefs.preferred_journals %}{{ prefs.preferred_journals | join('\n') }}{% endif %}</textarea>
+            </div>
+
+            <div>
+                <label class="block text-sm font-medium text-gray-700 mb-1">
+                    Deprioritized sources
+                    <span class="font-normal text-gray-400 ml-1">— skipped unless exceptionally relevant</span>
+                </label>
+                <textarea name="deprioritized_journals_raw" rows="4"
+                          placeholder="Frontiers in Neuroscience&#10;PLOS ONE"
+                          class="w-full text-sm border border-gray-300 rounded-lg px-3 py-2 focus:ring-indigo-500 focus:border-indigo-500 font-mono">{% if prefs and prefs.deprioritized_journals %}{{ prefs.deprioritized_journals | join('\n') }}{% endif %}</textarea>
+            </div>
+        </div>
+
+        <div class="flex items-center gap-3">
+            <button type="submit"
+                    class="px-5 py-2 bg-indigo-600 text-white text-sm font-medium rounded-lg hover:bg-indigo-700">
+                Save Preferences
+            </button>
+            <a href="/agent/{{ agent.agent_id }}/dashboard"
+               class="px-5 py-2 bg-gray-100 text-gray-700 text-sm font-medium rounded-lg hover:bg-gray-200">
+                Cancel
+            </a>
+        </div>
+        <p class="mt-3 text-xs text-gray-400">
+            Changes take effect on the next scheduled podcast run (daily at 9am UTC).
+        </p>
+    </form>
+</div>
+{% endblock %}
diff --git a/templates/base.html b/templates/base.html
index 74db818..70af5f4 100644
--- a/templates/base.html
+++ b/templates/base.html
@@ -86,6 +86,7 @@
             <a href="/admin/activity" class="{% if active_admin == 'activity' %}text-indigo-600 font-semibold{% else %}text-gray-500 hover:text-gray-700{% endif %}">Activity</a>
             <a href="/admin/discussions" class="{% if active_admin == 'discussions' %}text-indigo-600 font-semibold{% else %}text-gray-500 hover:text-gray-700{% endif %}">Discussions</a>
             <a href="/admin/agents" class="{% if active_admin == 'agents' %}text-indigo-600 font-semibold{% else %}text-gray-500 hover:text-gray-700{% endif %}">Agents</a>
+            <a href="/admin/podcast" class="{% if active_admin == 'podcast' %}text-indigo-600 font-semibold{% else %}text-gray-500 hover:text-gray-700{% endif %}">Podcast</a>
             <a href="/admin/access-requests" class="{% if active_admin == 'access' %}text-indigo-600 font-semibold{% else %}text-gray-500 hover:text-gray-700{% endif %}">Access</a>
             <a href="/admin/waitlist" class="{% if active_admin == 'waitlist' %}text-indigo-600 font-semibold{% else %}text-gray-500 hover:text-gray-700{% endif %}">Waitlist</a>
         </div>
diff --git a/templates/podcast_settings.html b/templates/podcast_settings.html
new file mode 100644
index 0000000..59f8a1b
--- /dev/null
+++ b/templates/podcast_settings.html
@@ -0,0 +1,155 @@
+{% extends "base.html" %}
+{% block title %}Podcast Settings — CoPI{% endblock %}
+
+{% block content %}
+<div class="max-w-2xl mx-auto">
+    <div class="flex items-center justify-between mb-6">
+        <div>
+            <a href="/profile" class="text-sm text-gray-500 hover:text-gray-700">&larr; My Profile</a>
+            <h1 class="text-2xl font-bold text-gray-900 mt-1">Podcast Settings</h1>
+            <p class="text-sm text-gray-500">Customize your daily LabBot research briefing</p>
+        </div>
+    </div>
+
+    {% if saved %}
+    <div class="bg-green-50 border border-green-200 rounded-lg p-3 mb-6 text-sm text-green-700">
+        Preferences saved successfully.
+    </div>
+    {% endif %}
+
+    <!-- Feed URL card -->
+    <div class="bg-indigo-50 border border-indigo-200 rounded-xl p-5 mb-6">
+        <h2 class="text-sm font-semibold text-indigo-800 mb-1">Your Podcast Feed URL</h2>
+        <p class="text-xs text-indigo-600 mb-3">
+            Subscribe to this URL in any podcast app (Apple Podcasts, Overcast, Pocket Casts, etc.)
+            to receive audio episodes automatically. The URL is stable and does not require login.
+        </p>
+        <div class="flex items-center gap-2">
+            <input type="text" readonly value="{{ feed_url }}"
+                   id="feed-url-input"
+                   class="flex-1 text-xs bg-white border border-indigo-300 rounded-lg px-3 py-2 font-mono text-gray-700 focus:outline-none" />
+            <button type="button"
+                    onclick="navigator.clipboard.writeText(document.getElementById('feed-url-input').value).then(() => { this.textContent = 'Copied!'; setTimeout(() => this.textContent = 'Copy', 1500); })"
+                    class="px-3 py-2 bg-indigo-600 text-white text-xs font-medium rounded-lg hover:bg-indigo-700 whitespace-nowrap">
+                Copy
+            </button>
+        </div>
+        <p class="text-xs text-indigo-500 mt-2">
+            New episodes are generated daily at 9am UTC. You can also
+            <button type="button" id="generate-btn"
+                    onclick="triggerGenerate()"
+                    class="underline hover:text-indigo-700">generate one now</button>.
+        </p>
+        <p id="generate-status" class="text-xs text-indigo-700 mt-1 hidden"></p>
+    </div>
+
+    <form method="POST" action="/podcast/settings">
+
+        <!-- Voice selection -->
+        <div class="bg-white rounded-xl shadow-sm border border-gray-200 p-6 mb-4">
+            <h2 class="text-base font-semibold text-gray-800 mb-1">Voice</h2>
+            <p class="text-sm text-gray-500 mb-4">
+                Select the text-to-speech voice used for your audio episodes.
+                Voices are from Mistral AI's <code class="text-xs bg-gray-100 px-1 rounded">voxtral-mini-tts-latest</code> model.
+            </p>
+            <select name="voice_id"
+                    class="w-full border border-gray-300 rounded-lg px-3 py-2 text-sm focus:ring-indigo-500 focus:border-indigo-500">
+                <option value="" {% if not prefs or not prefs.voice_id %}selected{% endif %}>
+                    Default (system setting)
+                </option>
+                {% for voice_id, voice_label in voices %}
+                <option value="{{ voice_id }}"
+                        {% if prefs and prefs.voice_id == voice_id %}selected{% endif %}>
+                    {{ voice_label }}
+                </option>
+                {% endfor %}
+            </select>
+        </div>
+
+        <!-- Extra search keywords -->
+        <div class="bg-white rounded-xl shadow-sm border border-gray-200 p-6 mb-4">
+            <h2 class="text-base font-semibold text-gray-800 mb-1">Extra Search Keywords</h2>
+            <p class="text-sm text-gray-500 mb-4">
+                Additional terms to include in the daily literature search, beyond what is
+                auto-extracted from your profile. One keyword or phrase per line (max 20).
+                These are added as quoted PubMed search terms.
+            </p>
+            <textarea name="extra_keywords_raw" rows="6"
+                      placeholder="insulin receptor substrate&#10;adipose tissue browning&#10;mitochondrial fission"
+                      class="w-full text-sm border border-gray-300 rounded-lg px-3 py-2 focus:ring-indigo-500 focus:border-indigo-500 font-mono">{% if prefs and prefs.extra_keywords %}{{ prefs.extra_keywords | join('\n') }}{% endif %}</textarea>
+            <p class="text-xs text-gray-400 mt-1">
+                {% if prefs and prefs.extra_keywords %}{{ prefs.extra_keywords | length }} keyword(s) saved.{% else %}No extra keywords set.{% endif %}
+            </p>
+        </div>
+
+        <!-- Source preferences -->
+        <div class="bg-white rounded-xl shadow-sm border border-gray-200 p-6 mb-4">
+            <h2 class="text-base font-semibold text-gray-800 mb-1">Source Preferences</h2>
+            <p class="text-sm text-gray-500 mb-5">
+                Guide the article selection by telling the AI which journals or preprint servers
+                to prioritize or avoid. One source per line (or comma-separated).
+            </p>
+
+            <div class="mb-4">
+                <label class="block text-sm font-medium text-gray-700 mb-1">
+                    Preferred sources
+                    <span class="font-normal text-gray-400 ml-1">— given extra weight when relevance is comparable</span>
+                </label>
+                <textarea name="preferred_journals_raw" rows="4"
+                          placeholder="Nature Methods&#10;Cell Systems&#10;eLife&#10;bioRxiv"
+                          class="w-full text-sm border border-gray-300 rounded-lg px-3 py-2 focus:ring-indigo-500 focus:border-indigo-500 font-mono">{% if prefs and prefs.preferred_journals %}{{ prefs.preferred_journals | join('\n') }}{% endif %}</textarea>
+            </div>
+
+            <div>
+                <label class="block text-sm font-medium text-gray-700 mb-1">
+                    Deprioritized sources
+                    <span class="font-normal text-gray-400 ml-1">— skipped unless exceptionally relevant</span>
+                </label>
+                <textarea name="deprioritized_journals_raw" rows="4"
+                          placeholder="Frontiers in Neuroscience&#10;PLOS ONE"
+                          class="w-full text-sm border border-gray-300 rounded-lg px-3 py-2 focus:ring-indigo-500 focus:border-indigo-500 font-mono">{% if prefs and prefs.deprioritized_journals %}{{ prefs.deprioritized_journals | join('\n') }}{% endif %}</textarea>
+            </div>
+        </div>
+
+        <div class="flex items-center gap-3">
+            <button type="submit"
+                    class="px-5 py-2 bg-indigo-600 text-white text-sm font-medium rounded-lg hover:bg-indigo-700">
+                Save Preferences
+            </button>
+            <a href="/profile"
+               class="px-5 py-2 bg-gray-100 text-gray-700 text-sm font-medium rounded-lg hover:bg-gray-200">
+                Cancel
+            </a>
+        </div>
+        <p class="mt-3 text-xs text-gray-400">
+            Changes take effect on the next scheduled podcast run (daily at 9am UTC).
+        </p>
+    </form>
+</div>
+{% endblock %}
+
+{% block scripts %}
+<script>
+async function triggerGenerate() {
+    const btn = document.getElementById('generate-btn');
+    const status = document.getElementById('generate-status');
+    btn.disabled = true;
+    btn.textContent = 'Starting…';
+    status.classList.remove('hidden');
+    status.textContent = 'Requesting episode generation…';
+    try {
+        const resp = await fetch('/podcast/user/generate', { method: 'POST' });
+        if (resp.ok) {
+            status.textContent = 'Generation started — check your feed in a few minutes.';
+        } else {
+            const data = await resp.json().catch(() => ({}));
+            status.textContent = 'Error: ' + (data.detail || resp.statusText);
+        }
+    } catch (e) {
+        status.textContent = 'Request failed: ' + e.message;
+    }
+    btn.disabled = false;
+    btn.textContent = 'generate one now';
+}
+</script>
+{% endblock %}
diff --git a/tests/test_podcast.py b/tests/test_podcast.py
new file mode 100644
index 0000000..76e6138
--- /dev/null
+++ b/tests/test_podcast.py
@@ -0,0 +1,343 @@
+"""Unit tests for podcast pipeline pure-logic functions and RSS builder."""
+
+import json
+import os
+import tempfile
+from datetime import date
+from pathlib import Path
+from types import SimpleNamespace
+
+import pytest
+
+from src.podcast.pubmed_search import build_queries
+from src.podcast.pipeline import (
+    _format_candidates_for_prompt,
+    _extract_section_text,
+    _build_profile_text_from_db,
+)
+from src.podcast.rss import build_feed
+from src.podcast.state import (
+    get_delivered_pmids,
+    record_delivery,
+    get_delivered_pmids_for_user,
+    record_delivery_for_user,
+)
+
+
+# ---------------------------------------------------------------------------
+# build_queries
+# ---------------------------------------------------------------------------
+
+class TestBuildQueries:
+    def test_disease_areas_produce_query(self):
+        profile = {"disease_areas": ["neurodegeneration", "Alzheimer's disease"], "techniques": [], "experimental_models": [], "keywords": []}
+        queries = build_queries(profile)
+        assert len(queries) >= 1
+        assert "neurodegeneration" in queries[0]
+
+    def test_techniques_produce_second_query(self):
+        profile = {
+            "disease_areas": ["cancer"],
+            "techniques": ["CRISPR", "flow cytometry"],
+            "experimental_models": [],
+            "keywords": [],
+        }
+        queries = build_queries(profile)
+        assert len(queries) >= 2
+        assert any("CRISPR" in q for q in queries)
+
+    def test_keywords_produce_third_query(self):
+        profile = {
+            "disease_areas": ["diabetes"],
+            "techniques": ["proteomics"],
+            "experimental_models": [],
+            "keywords": ["insulin signaling", "beta cell"],
+        }
+        queries = build_queries(profile)
+        assert len(queries) >= 3
+        assert any("insulin signaling" in q or "beta cell" in q for q in queries)
+
+    def test_empty_profile_returns_empty(self):
+        queries = build_queries({})
+        assert queries == []
+
+    def test_fallback_to_research_summary(self):
+        profile = {"research_summary": "Studying ribosome biogenesis mechanisms"}
+        queries = build_queries(profile)
+        assert len(queries) == 1
+
+    def test_queries_are_quoted_terms(self):
+        profile = {"disease_areas": ["proteostasis"], "techniques": [], "experimental_models": [], "keywords": []}
+        queries = build_queries(profile)
+        assert '"proteostasis"' in queries[0]
+
+
+# ---------------------------------------------------------------------------
+# _format_candidates_for_prompt
+# ---------------------------------------------------------------------------
+
+class TestFormatCandidates:
+    def test_numbers_candidates_from_one(self):
+        records = [
+            {"title": "Paper A", "abstract": "Abstract A", "journal": "Nature", "year": 2024},
+            {"title": "Paper B", "abstract": "Abstract B", "journal": "Science", "year": 2024},
+        ]
+        text = _format_candidates_for_prompt(records)
+        assert text.startswith("1.")
+        assert "2." in text
+
+    def test_includes_title_and_abstract(self):
+        records = [{"title": "CRISPR therapy", "abstract": "We developed a new approach.", "journal": "Cell", "year": 2025}]
+        text = _format_candidates_for_prompt(records)
+        assert "CRISPR therapy" in text
+        assert "We developed a new approach." in text
+
+    def test_truncates_long_abstract(self):
+        long_abstract = "x" * 1000
+        records = [{"title": "T", "abstract": long_abstract, "journal": "J", "year": 2024}]
+        text = _format_candidates_for_prompt(records)
+        assert len(text) < 1000  # abstract truncated to 600 chars
+
+    def test_handles_missing_fields(self):
+        records = [{"title": "Minimal record"}]
+        text = _format_candidates_for_prompt(records)
+        assert "Minimal record" in text
+        assert "No abstract" in text
+
+
+# ---------------------------------------------------------------------------
+# _extract_section_text
+# ---------------------------------------------------------------------------
+
+class TestExtractSectionText:
+    SAMPLE_MD = """## Research Summary
+We study protein folding in neurons.
+
+## Key Methods and Technologies
+- Cryo-EM
+- Mass spectrometry
+
+## Podcast Preferences
+Focus on computational tools only.
+"""
+
+    def test_extracts_research_summary(self):
+        text = _extract_section_text(self.SAMPLE_MD, "Research Summary")
+        assert "protein folding" in text
+
+    def test_extracts_podcast_preferences(self):
+        text = _extract_section_text(self.SAMPLE_MD, "Podcast Preferences")
+        assert "computational tools" in text
+
+    def test_stops_at_next_section(self):
+        text = _extract_section_text(self.SAMPLE_MD, "Research Summary")
+        assert "Cryo-EM" not in text
+
+    def test_missing_section_returns_empty(self):
+        text = _extract_section_text(self.SAMPLE_MD, "Nonexistent Section")
+        assert text == ""
+
+
+# ---------------------------------------------------------------------------
+# RSS feed builder
+# ---------------------------------------------------------------------------
+
+def _make_episode(**kwargs):
+    """Create a minimal PodcastEpisode-like object for RSS tests."""
+    defaults = dict(
+        episode_date=date(2026, 4, 10),
+        paper_title="A Great Paper",
+        paper_authors="Smith J et al.",
+        paper_journal="Nature",
+        paper_year=2026,
+        pmid="12345678",
+        paper_url=None,
+        text_summary="This paper found something important.",
+        audio_file_path=None,
+        audio_duration_seconds=None,
+        slack_delivered=True,
+        selection_justification="Highly relevant to the PI's work.",
+    )
+    defaults.update(kwargs)
+    return SimpleNamespace(**defaults)
+
+
+class TestBuildFeed:
+    # --- agent path ---
+
+    def test_returns_valid_xml_root(self):
+        xml = build_feed("Jane Smith", [], "https://example.com", agent_id="testagent")
+        assert xml.startswith("<?xml")
+        assert "<rss" in xml
+
+    def test_includes_pi_name_in_channel(self):
+        xml = build_feed("Jane Smith", [], "https://example.com", agent_id="testagent")
+        assert "Jane Smith" in xml
+
+    def test_agent_feed_url_uses_agent_id(self):
+        xml = build_feed("Jane Smith", [], "https://example.com", agent_id="testagent")
+        assert "/podcast/testagent/feed.xml" in xml
+
+    def test_single_episode_appears_in_feed(self):
+        ep = _make_episode()
+        xml = build_feed("Jane Smith", [ep], "https://example.com", agent_id="testagent")
+        assert "A Great Paper" in xml
+        assert "2026-04-10" in xml
+
+    def test_pubmed_link_used_when_no_paper_url(self):
+        ep = _make_episode(pmid="99887766", paper_url=None)
+        xml = build_feed("Jane Smith", [ep], "https://example.com", agent_id="testagent")
+        assert "pubmed.ncbi.nlm.nih.gov/99887766" in xml
+
+    def test_paper_url_overrides_pubmed_link(self):
+        ep = _make_episode(
+            pmid="biorxiv:2026.01.01.123456",
+            paper_url="https://www.biorxiv.org/content/10.1101/2026.01.01.123456v1",
+        )
+        xml = build_feed("Jane Smith", [ep], "https://example.com", agent_id="testagent")
+        assert "biorxiv.org" in xml
+        assert "pubmed.ncbi.nlm.nih.gov" not in xml
+
+    def test_audio_enclosure_when_audio_present(self, tmp_path):
+        audio_file = tmp_path / "2026-04-10.mp3"
+        audio_file.write_bytes(b"\x00" * 1000)
+        ep = _make_episode(audio_file_path=str(audio_file), audio_duration_seconds=90)
+        xml = build_feed("Jane Smith", [ep], "https://example.com", agent_id="testagent")
+        assert "<enclosure" in xml
+        assert 'type="audio/mpeg"' in xml
+        assert "<itunes:duration>1:30</itunes:duration>" in xml
+
+    def test_no_enclosure_when_no_audio(self):
+        ep = _make_episode(audio_file_path=None)
+        xml = build_feed("Jane Smith", [ep], "https://example.com", agent_id="testagent")
+        assert "<enclosure" not in xml
+
+    def test_xml_escaping_in_title(self):
+        ep = _make_episode(paper_title="Proteins & <Stuff>")
+        xml = build_feed("Jane Smith", [ep], "https://example.com", agent_id="testagent")
+        assert "Proteins &amp; &lt;Stuff&gt;" in xml
+
+    def test_empty_episodes_list(self):
+        xml = build_feed("Jane Smith", [], "https://example.com", agent_id="testagent")
+        assert "<item>" not in xml
+
+    def test_agent_guid_format(self):
+        ep = _make_episode()
+        xml = build_feed("Jane Smith", [ep], "https://example.com", agent_id="testagent")
+        assert "testagent-2026-04-10" in xml
+
+    # --- user path ---
+
+    def test_user_feed_url_uses_user_id(self):
+        uid = "11111111-2222-3333-4444-555555555555"
+        xml = build_feed("Alice Brown", [], "https://example.com", user_id=uid)
+        assert f"/podcast/users/{uid}/feed.xml" in xml
+
+    def test_user_feed_has_correct_pi_name(self):
+        uid = "11111111-2222-3333-4444-555555555555"
+        xml = build_feed("Alice Brown", [], "https://example.com", user_id=uid)
+        assert "Alice Brown" in xml
+
+    def test_user_audio_url_uses_user_path(self, tmp_path):
+        uid = "11111111-2222-3333-4444-555555555555"
+        audio_file = tmp_path / "2026-04-10.mp3"
+        audio_file.write_bytes(b"\x00" * 500)
+        ep = _make_episode(audio_file_path=str(audio_file))
+        xml = build_feed("Alice Brown", [ep], "https://example.com", user_id=uid)
+        assert f"/podcast/users/{uid}/audio/2026-04-10.mp3" in xml
+
+    def test_user_guid_format(self):
+        uid = "11111111-2222-3333-4444-555555555555"
+        ep = _make_episode()
+        xml = build_feed("Alice Brown", [ep], "https://example.com", user_id=uid)
+        assert f"user-{uid}-2026-04-10" in xml
+
+
+# ---------------------------------------------------------------------------
+# State helpers — user path
+# ---------------------------------------------------------------------------
+
+class TestUserState:
+    def test_new_user_has_empty_delivered_set(self, tmp_path, monkeypatch):
+        monkeypatch.setattr("src.podcast.state.STATE_FILE", tmp_path / "state.json")
+        result = get_delivered_pmids_for_user("user-uuid-abc")
+        assert result == set()
+
+    def test_record_and_retrieve_user_delivery(self, tmp_path, monkeypatch):
+        monkeypatch.setattr("src.podcast.state.STATE_FILE", tmp_path / "state.json")
+        record_delivery_for_user("user-uuid-abc", "12345")
+        record_delivery_for_user("user-uuid-abc", "67890")
+        result = get_delivered_pmids_for_user("user-uuid-abc")
+        assert result == {"12345", "67890"}
+
+    def test_user_and_agent_state_are_independent(self, tmp_path, monkeypatch):
+        monkeypatch.setattr("src.podcast.state.STATE_FILE", tmp_path / "state.json")
+        record_delivery("myagent", "11111")
+        record_delivery_for_user("user-uuid-abc", "22222")
+        assert get_delivered_pmids("myagent") == {"11111"}
+        assert get_delivered_pmids_for_user("user-uuid-abc") == {"22222"}
+        # no cross-contamination
+        assert "22222" not in get_delivered_pmids("myagent")
+        assert "11111" not in get_delivered_pmids_for_user("user-uuid-abc")
+
+    def test_duplicate_pmid_not_added_twice(self, tmp_path, monkeypatch):
+        monkeypatch.setattr("src.podcast.state.STATE_FILE", tmp_path / "state.json")
+        record_delivery_for_user("user-uuid-abc", "99999")
+        record_delivery_for_user("user-uuid-abc", "99999")
+        raw = json.loads((tmp_path / "state.json").read_text())
+        assert raw["users"]["user-uuid-abc"]["delivered_pmids"].count("99999") == 1
+
+    def test_atomic_write_leaves_valid_json(self, tmp_path, monkeypatch):
+        monkeypatch.setattr("src.podcast.state.STATE_FILE", tmp_path / "state.json")
+        record_delivery_for_user("u1", "aaa")
+        content = (tmp_path / "state.json").read_text()
+        parsed = json.loads(content)  # must be valid JSON
+        assert "users" in parsed
+
+
+# ---------------------------------------------------------------------------
+# _build_profile_text_from_db
+# ---------------------------------------------------------------------------
+
+class TestBuildProfileTextFromDb:
+    def _make_user(self, **kwargs):
+        defaults = dict(name="Dr. Alice", institution="MIT", department="Biology")
+        defaults.update(kwargs)
+        return SimpleNamespace(**defaults)
+
+    def _make_profile(self, **kwargs):
+        defaults = dict(
+            research_summary="We study protein aggregation.",
+            disease_areas=["Alzheimer's", "Parkinson's"],
+            techniques=["cryo-EM", "mass spectrometry"],
+            experimental_models=["mouse", "iPSC"],
+            keywords=["proteostasis", "neurodegeneration"],
+        )
+        defaults.update(kwargs)
+        return SimpleNamespace(**defaults)
+
+    def test_includes_user_name(self):
+        text = _build_profile_text_from_db(self._make_user(), self._make_profile())
+        assert "Dr. Alice" in text
+
+    def test_includes_research_summary(self):
+        text = _build_profile_text_from_db(self._make_user(), self._make_profile())
+        assert "protein aggregation" in text
+
+    def test_includes_disease_areas(self):
+        text = _build_profile_text_from_db(self._make_user(), self._make_profile())
+        assert "Alzheimer" in text
+
+    def test_includes_techniques(self):
+        text = _build_profile_text_from_db(self._make_user(), self._make_profile())
+        assert "cryo-EM" in text
+
+    def test_handles_none_fields_gracefully(self):
+        profile = self._make_profile(disease_areas=None, techniques=None, keywords=None)
+        text = _build_profile_text_from_db(self._make_user(), profile)
+        assert "protein aggregation" in text  # summary still present
+
+    def test_handles_missing_institution(self):
+        user = self._make_user(institution=None, department=None)
+        text = _build_profile_text_from_db(user, self._make_profile())
+        assert "Dr. Alice" in text