SuLab · malanjary-tsri · Mar 30, 2026 · Mar 31, 2026 · Apr 2, 2026 · Apr 6, 2026
diff --git a/.env.example b/.env.example
@@ -21,23 +21,37 @@ BASE_URL=http://localhost:8000
 ALLOW_HTTP_SESSIONS=true
 
 # Slack — one pair per agent (Bot User OAuth Token + App-Level Token)
+# Add as many agents as needed using this pattern; no code changes required.
+#   SLACK_BOT_TOKEN_<AGENT_ID>=xoxb-...   (required)
+#   SLACK_APP_TOKEN_<AGENT_ID>=xapp-...   (optional)
 SLACK_BOT_TOKEN_SU=xoxb-placeholder
-SLACK_APP_TOKEN_SU=xapp-placeholder
 SLACK_BOT_TOKEN_WISEMAN=xoxb-placeholder
-SLACK_APP_TOKEN_WISEMAN=xapp-placeholder
-SLACK_BOT_TOKEN_LOTZ=xoxb-placeholder
-SLACK_APP_TOKEN_LOTZ=xapp-placeholder
-SLACK_BOT_TOKEN_CRAVATT=xoxb-placeholder
-SLACK_APP_TOKEN_CRAVATT=xapp-placeholder
-SLACK_BOT_TOKEN_GROTJAHN=xoxb-placeholder
-SLACK_APP_TOKEN_GROTJAHN=xapp-placeholder
-SLACK_BOT_TOKEN_PETRASCHECK=xoxb-placeholder
-SLACK_APP_TOKEN_PETRASCHECK=xapp-placeholder
-SLACK_BOT_TOKEN_KEN=xoxb-placeholder
-SLACK_APP_TOKEN_KEN=xapp-placeholder
-SLACK_BOT_TOKEN_RACKI=xoxb-placeholder
-SLACK_APP_TOKEN_RACKI=xapp-placeholder
-SLACK_BOT_TOKEN_SAEZ=xoxb-placeholder
-SLACK_APP_TOKEN_SAEZ=xapp-placeholder
-SLACK_BOT_TOKEN_WU=xoxb-placeholder
-SLACK_APP_TOKEN_WU=xapp-placeholder
+SLACK_BOT_TOKEN_GRANTBOT=xoxb-placeholder
+
+# Podcast TTS backend: "mistral" (default), "openai", or "local" (vLLM-Omni server)
+PODCAST_TTS_BACKEND="mistral"
+
+# Mistral AI TTS (used when PODCAST_TTS_BACKEND=mistral)
+MISTRAL_API_KEY=your-mistral-api-key
+MISTRAL_TTS_MODEL=voxtral-mini-tts-latest
+MISTRAL_TTS_DEFAULT_VOICE=your-voice-uuid
+
+# OpenAI TTS (used when PODCAST_TTS_BACKEND=openai)
+# Voices: alloy echo fable onyx nova shimmer
+# Models: tts-1  tts-1-hd  gpt-4o-mini-tts
+OPENAI_API_KEY=your-openai-api-key
+OPENAI_TTS_MODEL=tts-1
+OPENAI_TTS_DEFAULT_VOICE=alloy
+
+# Local vLLM-Omni TTS server (used when PODCAST_TTS_BACKEND=local)
+# Start with: vllm serve <model> --port 8010
+LOCAL_TTS_HOST=127.0.0.1
+LOCAL_TTS_PORT=8008
+LOCAL_TTS_MODEL=mistralai/Voxtral-4B-TTS-2603
+LOCAL_TTS_VOICE=default
+
+# Podcast
+PODCAST_BASE_URL=http://localhost:8001
+PODCAST_SEARCH_WINDOW_DAYS=14
+PODCAST_MAX_CANDIDATES=50
+# PODCAST_NORMALIZE_AUDIO=true  # uncomment to enable ffmpeg loudnorm post-processing (EBU R128, -16 LUFS)
diff --git a/.gitignore b/.gitignore
@@ -53,3 +53,9 @@ certbot/
 .pytest_cache/
 .coverage
 htmlcov/
+
+# Runtime data (state files, generated audio — ephemeral)
+data/
+
+# Test output artifacts
+.labbot-tests/
diff --git a/AGENT.md b/AGENT.md
@@ -32,6 +32,7 @@ All specs are in `/specs/`:
 - `profile-ingestion.md` — 9-step pipeline, ORCID → PubMed → PMC → LLM
 - `admin-dashboard.md` — read-only, server-rendered, impersonation
 - `agent-system.md` — Slack Bolt, Socket Mode, two-phase LLM calls, simulation engine
+- `labbot-podcast.md` — daily personalized research briefing: PubMed search, LLM selection/summarization, Local or API TTS, Slack DM delivery, per-PI RSS podcast feed
 
 ## Tech Stack
 

diff --git a/CLAUDE.md b/CLAUDE.md
@@ -42,3 +42,50 @@ docker compose --profile agent run -d --name agent-run agent python -m src.agent
 ```
 
 **Note:** The agent-run container uses mounted source code but the Python process only loads modules at startup. Code changes require a container restart to take effect. **After any code change that affects the running agent process, flag this to the user so they can decide whether to restart.**
+
+## Podcast Pipeline
+
+The LabBot Podcast pipeline (specs/labbot-podcast.md) runs daily at 9am UTC for each active agent:
+
+1. Build PubMed queries from lab's public profile
+2. Fetch candidates from PubMed + bioRxiv + medRxiv + arXiv (last 14 days, up to 50+10 candidates)
+3. Claude Sonnet selects most relevant paper (applying PI's podcast preferences from their private ProfileRevision)
+4. Claude Opus writes a ~250-word structured brief
+5. TTS audio generated (Mistral or local vLLM-Omni); ffmpeg loudnorm applied if PODCAST_NORMALIZE_AUDIO=true
+6. Slack DM sent to PI with text summary + RSS link
+7. RSS feed available at `/podcast/{agent_id}/feed.xml`
+8. Audio served at `/podcast/{agent_id}/audio/{date}.mp3`
+
+Preprint IDs use prefixed format: `biorxiv:...`, `medrxiv:...`, `arxiv:...`. The `paper_url` in summaries links to the correct server (not always PubMed).
+
+```bash
+# Run podcast pipeline once for all active agents
+docker compose --profile podcast run --rm podcast python -m src.podcast.main
+
+# Test pipeline for 'su' agent only
+docker compose exec app python scripts/test_podcast_su.py
+```
+
+## Database Migration Caveat
+
+If the DB was initialized from the `main` branch schema and then this branch is checked out, `alembic upgrade head` will stamp the version without re-running migrations that share a revision ID with ones already applied on `main`. Any columns added by branch-specific migrations may be silently missing.
+
+**Symptom:** `UndefinedColumnError` at runtime despite `alembic current` showing `head`.
+
+**Fix:** Check for missing columns and apply them manually:
+```bash
+docker compose exec app python -c "
+import asyncio
+from src.database import get_engine
+from sqlalchemy import text
+
+async def check():
+    eng = get_engine()
+    async with eng.connect() as conn:
+        result = await conn.execute(text(\"SELECT column_name FROM information_schema.columns WHERE table_name='researcher_profiles' ORDER BY ordinal_position\"))
+        print([r[0] for r in result])
+
+asyncio.run(check())
+"
+```
+Then add any missing columns with `ALTER TABLE ... ADD COLUMN IF NOT EXISTS ...`.
diff --git a/Dockerfile b/Dockerfile
@@ -6,6 +6,7 @@ WORKDIR /app
 RUN apt-get update && apt-get install -y --no-install-recommends \
     gcc \
     libpq-dev \
+    ffmpeg \
     && rm -rf /var/lib/apt/lists/*
 
 # Install Python dependencies

diff --git a/alembic/versions/0010_add_podcast_episodes.py b/alembic/versions/0010_add_podcast_episodes.py
@@ -0,0 +1,56 @@
+"""Add podcast_episodes table
+
+Revision ID: 0010
+Revises: 0009
+Create Date: 2026-04-09 00:00:00.000000
+
+"""
+
+from typing import Sequence, Union
+
+import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql
+
+from alembic import op
+
+revision: str = "0010"
+down_revision: Union[str, None] = "0009"
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def upgrade() -> None:
+    op.create_table(
+        "podcast_episodes",
+        sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
+        sa.Column("agent_id", sa.String(50), nullable=False),
+        sa.Column("episode_date", sa.Date, nullable=False),
+        sa.Column("pmid", sa.String(100), nullable=False),
+        sa.Column("paper_title", sa.String(500), nullable=False),
+        sa.Column("paper_authors", sa.String(500), nullable=False),
+        sa.Column("paper_journal", sa.String(255), nullable=False),
+        sa.Column("paper_year", sa.Integer, nullable=False),
+        sa.Column("text_summary", sa.Text, nullable=False),
+        sa.Column("audio_file_path", sa.String(500), nullable=True),
+        sa.Column("audio_duration_seconds", sa.Integer, nullable=True),
+        sa.Column("slack_delivered", sa.Boolean, nullable=False, server_default="false"),
+        sa.Column("selection_justification", sa.Text, nullable=False),
+        sa.Column(
+            "created_at",
+            sa.DateTime(timezone=True),
+            server_default=sa.func.now(),
+            nullable=False,
+        ),
+    )
+    op.create_index("ix_podcast_episodes_agent_id", "podcast_episodes", ["agent_id"])
+    op.create_index("ix_podcast_episodes_episode_date", "podcast_episodes", ["episode_date"])
+    op.create_unique_constraint(
+        "uq_podcast_agent_date", "podcast_episodes", ["agent_id", "episode_date"]
+    )
+
+
+def downgrade() -> None:
+    op.drop_constraint("uq_podcast_agent_date", "podcast_episodes")
+    op.drop_index("ix_podcast_episodes_episode_date")
+    op.drop_index("ix_podcast_episodes_agent_id")
+    op.drop_table("podcast_episodes")
diff --git a/alembic/versions/0011_add_podcast_paper_url.py b/alembic/versions/0011_add_podcast_paper_url.py
@@ -0,0 +1,29 @@
+"""Add paper_url column to podcast_episodes
+
+Revision ID: 0011
+Revises: 0010
+Create Date: 2026-04-10 00:00:00.000000
+
+"""
+
+from typing import Sequence, Union
+
+import sqlalchemy as sa
+
+from alembic import op
+
+revision: str = "0011"
+down_revision: Union[str, None] = "0010"
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def upgrade() -> None:
+    op.add_column(
+        "podcast_episodes",
+        sa.Column("paper_url", sa.String(1000), nullable=True),
+    )
+
+
+def downgrade() -> None:
+    op.drop_column("podcast_episodes", "paper_url")
diff --git a/alembic/versions/0012_add_podcast_preferences.py b/alembic/versions/0012_add_podcast_preferences.py
@@ -0,0 +1,64 @@
+"""Add podcast_preferences table
+
+Revision ID: 0012
+Revises: 0011
+Create Date: 2026-04-14 00:00:00.000000
+
+"""
+
+from typing import Sequence, Union
+
+import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql
+from sqlalchemy.dialects.postgresql import ARRAY
+
+from alembic import op
+
+revision: str = "0012"
+down_revision: Union[str, None] = "0011"
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def upgrade() -> None:
+    op.create_table(
+        "podcast_preferences",
+        sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
+        sa.Column("agent_id", sa.String(50), nullable=False),
+        sa.Column("voice_id", sa.String(100), nullable=True),
+        sa.Column(
+            "extra_keywords",
+            ARRAY(sa.String),
+            nullable=False,
+            server_default="{}",
+        ),
+        sa.Column(
+            "preferred_journals",
+            ARRAY(sa.String),
+            nullable=False,
+            server_default="{}",
+        ),
+        sa.Column(
+            "deprioritized_journals",
+            ARRAY(sa.String),
+            nullable=False,
+            server_default="{}",
+        ),
+        sa.Column(
+            "updated_at",
+            sa.DateTime(timezone=True),
+            nullable=False,
+            server_default=sa.func.now(),
+        ),
+    )
+    op.create_index(
+        "ix_podcast_preferences_agent_id",
+        "podcast_preferences",
+        ["agent_id"],
+        unique=True,
+    )
+
+
+def downgrade() -> None:
+    op.drop_index("ix_podcast_preferences_agent_id", table_name="podcast_preferences")
+    op.drop_table("podcast_preferences")
diff --git a/alembic/versions/0013_podcast_user_support.py b/alembic/versions/0013_podcast_user_support.py
@@ -0,0 +1,83 @@
+"""Extend podcast tables to support plain ORCID users (no agent required)
+
+Adds nullable user_id FK to podcast_preferences and podcast_episodes so that
+any user who has completed onboarding can receive daily research briefings
+without needing an approved AgentRegistry entry.
+
+Changes:
+  - podcast_preferences.agent_id: NOT NULL → nullable
+  - podcast_preferences.user_id:  new nullable FK → users.id, unique index
+  - podcast_episodes.agent_id:    NOT NULL → nullable
+  - podcast_episodes.user_id:     new nullable FK → users.id
+  - podcast_episodes: partial unique index on (user_id, episode_date) WHERE user_id IS NOT NULL
+
+Revision ID: 0013
+Revises: 0012
+Create Date: 2026-04-14 00:00:00.000000
+"""
+
+from typing import Sequence, Union
+
+import sqlalchemy as sa
+from sqlalchemy.dialects.postgresql import UUID
+
+from alembic import op
+
+revision: str = "0013"
+down_revision: Union[str, None] = "0012"
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def upgrade() -> None:
+    # --- podcast_preferences ---
+    # Make agent_id nullable (existing agent rows keep their values)
+    op.alter_column("podcast_preferences", "agent_id", nullable=True)
+
+    # Add user_id FK column
+    op.add_column(
+        "podcast_preferences",
+        sa.Column(
+            "user_id",
+            UUID(as_uuid=True),
+            sa.ForeignKey("users.id", ondelete="CASCADE"),
+            nullable=True,
+        ),
+    )
+    op.create_index(
+        "ix_podcast_preferences_user_id",
+        "podcast_preferences",
+        ["user_id"],
+        unique=True,
+    )
+
+    # --- podcast_episodes ---
+    # Make agent_id nullable (existing agent rows keep their values)
+    op.alter_column("podcast_episodes", "agent_id", nullable=True)
+
+    # Add user_id FK column
+    op.add_column(
+        "podcast_episodes",
+        sa.Column(
+            "user_id",
+            UUID(as_uuid=True),
+            sa.ForeignKey("users.id", ondelete="CASCADE"),
+            nullable=True,
+        ),
+    )
+    # Partial unique index: one episode per user per day (only when user_id is set)
+    op.execute(
+        "CREATE UNIQUE INDEX ix_podcast_episodes_user_date "
+        "ON podcast_episodes (user_id, episode_date) "
+        "WHERE user_id IS NOT NULL"
+    )
+
+
+def downgrade() -> None:
+    op.execute("DROP INDEX IF EXISTS ix_podcast_episodes_user_date")
+    op.drop_column("podcast_episodes", "user_id")
+    op.alter_column("podcast_episodes", "agent_id", nullable=False)
+
+    op.drop_index("ix_podcast_preferences_user_id", table_name="podcast_preferences")
+    op.drop_column("podcast_preferences", "user_id")
+    op.alter_column("podcast_preferences", "agent_id", nullable=False)