Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
ca46ce5
Implement labbot-podcast: daily personalized research briefings for e…
Mar 30, 2026
d321773
Local dev setup: expose postgres port, ignore data/, add podcast test…
Mar 31, 2026
5176fc5
Switch podcast TTS from ElevenLabs to Mistral AI (voxtral-mini-tts-la…
Apr 2, 2026
76d54b3
add option for local TTS server
Apr 6, 2026
7b8b871
Add podcast service to prod compose with shared volume and host netwo…
Apr 6, 2026
a8a2de0
Add ffmpeg loudnorm post-processing to normalize TTS audio volume (EB…
Apr 6, 2026
4c647e6
Add preprint server support to labbot-podcast (bioRxiv, medRxiv, arXiv).
Apr 8, 2026
fba6bfa
Expand CLAUDE.md with full project context from AGENT.md.
Apr 9, 2026
dd48fb6
Add podcast preferences, voices config, and optional audio normalization
Apr 9, 2026
c5c85c0
Ignore .labbot-tests/ in git
Apr 9, 2026
d8d70ce
Redesign podcast for clean merge with main
Apr 9, 2026
2055ae3
Merge main into coPI-podcast
Apr 9, 2026
989b2fc
Renumber podcast migration to 0010 to resolve conflict with main's 0005
Apr 9, 2026
6c041ff
Start podcast scheduler by default alongside app (remove podcast prof…
Apr 9, 2026
4d39f05
Add on-demand podcast generation endpoint (POST /podcast/{agent_id}/g…
Apr 9, 2026
2267d65
Fix pmid column width (VARCHAR 20→100) to support preprint IDs
Apr 9, 2026
2b68e57
Add paper_url to podcast episodes and fix preprint links
Apr 13, 2026
ab984a9
Add podcast preferences UI (voice, keywords, journal sources)
Apr 15, 2026
7e2cdbd
Merge origin/main into coPI-podcast
Apr 15, 2026
79bbd67
Add code review with top 5 priority issues and fix guidance
Apr 15, 2026
2d2f188
Merge origin/main into coPI-podcast
Apr 15, 2026
10807ca
Refactor Slack tokens to dynamic env discovery; add OpenAI TTS backend
Apr 15, 2026
1d926c3
Add podcast user support, preferences UI, and expanded RSS/state hand…
Apr 15, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 32 additions & 18 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -21,23 +21,37 @@ BASE_URL=http://localhost:8000
ALLOW_HTTP_SESSIONS=true

# Slack — one pair per agent (Bot User OAuth Token + App-Level Token)
# Add as many agents as needed using this pattern; no code changes required.
# SLACK_BOT_TOKEN_<AGENT_ID>=xoxb-... (required)
# SLACK_APP_TOKEN_<AGENT_ID>=xapp-... (optional)
SLACK_BOT_TOKEN_SU=xoxb-placeholder
SLACK_APP_TOKEN_SU=xapp-placeholder
SLACK_BOT_TOKEN_WISEMAN=xoxb-placeholder
SLACK_APP_TOKEN_WISEMAN=xapp-placeholder
SLACK_BOT_TOKEN_LOTZ=xoxb-placeholder
SLACK_APP_TOKEN_LOTZ=xapp-placeholder
SLACK_BOT_TOKEN_CRAVATT=xoxb-placeholder
SLACK_APP_TOKEN_CRAVATT=xapp-placeholder
SLACK_BOT_TOKEN_GROTJAHN=xoxb-placeholder
SLACK_APP_TOKEN_GROTJAHN=xapp-placeholder
SLACK_BOT_TOKEN_PETRASCHECK=xoxb-placeholder
SLACK_APP_TOKEN_PETRASCHECK=xapp-placeholder
SLACK_BOT_TOKEN_KEN=xoxb-placeholder
SLACK_APP_TOKEN_KEN=xapp-placeholder
SLACK_BOT_TOKEN_RACKI=xoxb-placeholder
SLACK_APP_TOKEN_RACKI=xapp-placeholder
SLACK_BOT_TOKEN_SAEZ=xoxb-placeholder
SLACK_APP_TOKEN_SAEZ=xapp-placeholder
SLACK_BOT_TOKEN_WU=xoxb-placeholder
SLACK_APP_TOKEN_WU=xapp-placeholder
SLACK_BOT_TOKEN_GRANTBOT=xoxb-placeholder

# Podcast TTS backend: "mistral" (default), "openai", or "local" (vLLM-Omni server)
PODCAST_TTS_BACKEND="mistral"

# Mistral AI TTS (used when PODCAST_TTS_BACKEND=mistral)
MISTRAL_API_KEY=your-mistral-api-key
MISTRAL_TTS_MODEL=voxtral-mini-tts-latest
MISTRAL_TTS_DEFAULT_VOICE=your-voice-uuid

# OpenAI TTS (used when PODCAST_TTS_BACKEND=openai)
# Voices: alloy echo fable onyx nova shimmer
# Models: tts-1 tts-1-hd gpt-4o-mini-tts
OPENAI_API_KEY=your-openai-api-key
OPENAI_TTS_MODEL=tts-1
OPENAI_TTS_DEFAULT_VOICE=alloy

# Local vLLM-Omni TTS server (used when PODCAST_TTS_BACKEND=local)
# Start with: vllm serve <model> --port 8010
LOCAL_TTS_HOST=127.0.0.1
LOCAL_TTS_PORT=8008
LOCAL_TTS_MODEL=mistralai/Voxtral-4B-TTS-2603
LOCAL_TTS_VOICE=default

# Podcast
PODCAST_BASE_URL=http://localhost:8001
PODCAST_SEARCH_WINDOW_DAYS=14
PODCAST_MAX_CANDIDATES=50
# PODCAST_NORMALIZE_AUDIO=true # uncomment to enable ffmpeg loudnorm post-processing (EBU R128, -16 LUFS)
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -53,3 +53,9 @@ certbot/
.pytest_cache/
.coverage
htmlcov/

# Runtime data (state files, generated audio — ephemeral)
data/

# Test output artifacts
.labbot-tests/
1 change: 1 addition & 0 deletions AGENT.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ All specs are in `/specs/`:
- `profile-ingestion.md` — 9-step pipeline, ORCID → PubMed → PMC → LLM
- `admin-dashboard.md` — read-only, server-rendered, impersonation
- `agent-system.md` — Slack Bolt, Socket Mode, two-phase LLM calls, simulation engine
- `labbot-podcast.md` — daily personalized research briefing: PubMed search, LLM selection/summarization, Local or API TTS, Slack DM delivery, per-PI RSS podcast feed

## Tech Stack

Expand Down
47 changes: 47 additions & 0 deletions CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,50 @@ docker compose --profile agent run -d --name agent-run agent python -m src.agent
```

**Note:** The agent-run container uses mounted source code but the Python process only loads modules at startup. Code changes require a container restart to take effect. **After any code change that affects the running agent process, flag this to the user so they can decide whether to restart.**

## Podcast Pipeline

The LabBot Podcast pipeline (specs/labbot-podcast.md) runs daily at 9am UTC for each active agent:

1. Build PubMed queries from lab's public profile
2. Fetch candidates from PubMed + bioRxiv + medRxiv + arXiv (last 14 days, up to 50+10 candidates)
3. Claude Sonnet selects most relevant paper (applying PI's podcast preferences from their private ProfileRevision)
4. Claude Opus writes a ~250-word structured brief
5. TTS audio generated (Mistral or local vLLM-Omni); ffmpeg loudnorm applied if PODCAST_NORMALIZE_AUDIO=true
6. Slack DM sent to PI with text summary + RSS link
7. RSS feed available at `/podcast/{agent_id}/feed.xml`
8. Audio served at `/podcast/{agent_id}/audio/{date}.mp3`

Preprint IDs use prefixed format: `biorxiv:...`, `medrxiv:...`, `arxiv:...`. The `paper_url` in summaries links to the correct server (not always PubMed).

```bash
# Run podcast pipeline once for all active agents
docker compose --profile podcast run --rm podcast python -m src.podcast.main

# Test pipeline for 'su' agent only
docker compose exec app python scripts/test_podcast_su.py
```

## Database Migration Caveat

If the DB was initialized from the `main` branch schema and then this branch is checked out, `alembic upgrade head` will stamp the version without re-running migrations that share a revision ID with ones already applied on `main`. Any columns added by branch-specific migrations may be silently missing.

**Symptom:** `UndefinedColumnError` at runtime despite `alembic current` showing `head`.

**Fix:** Check for missing columns and apply them manually:
```bash
docker compose exec app python -c "
import asyncio
from src.database import get_engine
from sqlalchemy import text

async def check():
eng = get_engine()
async with eng.connect() as conn:
result = await conn.execute(text(\"SELECT column_name FROM information_schema.columns WHERE table_name='researcher_profiles' ORDER BY ordinal_position\"))
print([r[0] for r in result])

asyncio.run(check())
"
```
Then add any missing columns with `ALTER TABLE ... ADD COLUMN IF NOT EXISTS ...`.
1 change: 1 addition & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ WORKDIR /app
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc \
libpq-dev \
ffmpeg \
&& rm -rf /var/lib/apt/lists/*

# Install Python dependencies
Expand Down
56 changes: 56 additions & 0 deletions alembic/versions/0010_add_podcast_episodes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
"""Add podcast_episodes table

Revision ID: 0010
Revises: 0009
Create Date: 2026-04-09 00:00:00.000000

"""

from typing import Sequence, Union

import sqlalchemy as sa
from sqlalchemy.dialects import postgresql

from alembic import op

revision: str = "0010"
down_revision: Union[str, None] = "0009"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None


def upgrade() -> None:
op.create_table(
"podcast_episodes",
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
sa.Column("agent_id", sa.String(50), nullable=False),
sa.Column("episode_date", sa.Date, nullable=False),
sa.Column("pmid", sa.String(100), nullable=False),
sa.Column("paper_title", sa.String(500), nullable=False),
sa.Column("paper_authors", sa.String(500), nullable=False),
sa.Column("paper_journal", sa.String(255), nullable=False),
sa.Column("paper_year", sa.Integer, nullable=False),
sa.Column("text_summary", sa.Text, nullable=False),
sa.Column("audio_file_path", sa.String(500), nullable=True),
sa.Column("audio_duration_seconds", sa.Integer, nullable=True),
sa.Column("slack_delivered", sa.Boolean, nullable=False, server_default="false"),
sa.Column("selection_justification", sa.Text, nullable=False),
sa.Column(
"created_at",
sa.DateTime(timezone=True),
server_default=sa.func.now(),
nullable=False,
),
)
op.create_index("ix_podcast_episodes_agent_id", "podcast_episodes", ["agent_id"])
op.create_index("ix_podcast_episodes_episode_date", "podcast_episodes", ["episode_date"])
op.create_unique_constraint(
"uq_podcast_agent_date", "podcast_episodes", ["agent_id", "episode_date"]
)


def downgrade() -> None:
op.drop_constraint("uq_podcast_agent_date", "podcast_episodes")
op.drop_index("ix_podcast_episodes_episode_date")
op.drop_index("ix_podcast_episodes_agent_id")
op.drop_table("podcast_episodes")
29 changes: 29 additions & 0 deletions alembic/versions/0011_add_podcast_paper_url.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
"""Add paper_url column to podcast_episodes

Revision ID: 0011
Revises: 0010
Create Date: 2026-04-10 00:00:00.000000

"""

from typing import Sequence, Union

import sqlalchemy as sa

from alembic import op

revision: str = "0011"
down_revision: Union[str, None] = "0010"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None


def upgrade() -> None:
op.add_column(
"podcast_episodes",
sa.Column("paper_url", sa.String(1000), nullable=True),
)


def downgrade() -> None:
op.drop_column("podcast_episodes", "paper_url")
64 changes: 64 additions & 0 deletions alembic/versions/0012_add_podcast_preferences.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
"""Add podcast_preferences table

Revision ID: 0012
Revises: 0011
Create Date: 2026-04-14 00:00:00.000000

"""

from typing import Sequence, Union

import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
from sqlalchemy.dialects.postgresql import ARRAY

from alembic import op

revision: str = "0012"
down_revision: Union[str, None] = "0011"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None


def upgrade() -> None:
op.create_table(
"podcast_preferences",
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
sa.Column("agent_id", sa.String(50), nullable=False),
sa.Column("voice_id", sa.String(100), nullable=True),
sa.Column(
"extra_keywords",
ARRAY(sa.String),
nullable=False,
server_default="{}",
),
sa.Column(
"preferred_journals",
ARRAY(sa.String),
nullable=False,
server_default="{}",
),
sa.Column(
"deprioritized_journals",
ARRAY(sa.String),
nullable=False,
server_default="{}",
),
sa.Column(
"updated_at",
sa.DateTime(timezone=True),
nullable=False,
server_default=sa.func.now(),
),
)
op.create_index(
"ix_podcast_preferences_agent_id",
"podcast_preferences",
["agent_id"],
unique=True,
)


def downgrade() -> None:
op.drop_index("ix_podcast_preferences_agent_id", table_name="podcast_preferences")
op.drop_table("podcast_preferences")
83 changes: 83 additions & 0 deletions alembic/versions/0013_podcast_user_support.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
"""Extend podcast tables to support plain ORCID users (no agent required)

Adds nullable user_id FK to podcast_preferences and podcast_episodes so that
any user who has completed onboarding can receive daily research briefings
without needing an approved AgentRegistry entry.

Changes:
- podcast_preferences.agent_id: NOT NULL → nullable
- podcast_preferences.user_id: new nullable FK → users.id, unique index
- podcast_episodes.agent_id: NOT NULL → nullable
- podcast_episodes.user_id: new nullable FK → users.id
- podcast_episodes: partial unique index on (user_id, episode_date) WHERE user_id IS NOT NULL

Revision ID: 0013
Revises: 0012
Create Date: 2026-04-14 00:00:00.000000
"""

from typing import Sequence, Union

import sqlalchemy as sa
from sqlalchemy.dialects.postgresql import UUID

from alembic import op

revision: str = "0013"
down_revision: Union[str, None] = "0012"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None


def upgrade() -> None:
# --- podcast_preferences ---
# Make agent_id nullable (existing agent rows keep their values)
op.alter_column("podcast_preferences", "agent_id", nullable=True)

# Add user_id FK column
op.add_column(
"podcast_preferences",
sa.Column(
"user_id",
UUID(as_uuid=True),
sa.ForeignKey("users.id", ondelete="CASCADE"),
nullable=True,
),
)
op.create_index(
"ix_podcast_preferences_user_id",
"podcast_preferences",
["user_id"],
unique=True,
)

# --- podcast_episodes ---
# Make agent_id nullable (existing agent rows keep their values)
op.alter_column("podcast_episodes", "agent_id", nullable=True)

# Add user_id FK column
op.add_column(
"podcast_episodes",
sa.Column(
"user_id",
UUID(as_uuid=True),
sa.ForeignKey("users.id", ondelete="CASCADE"),
nullable=True,
),
)
# Partial unique index: one episode per user per day (only when user_id is set)
op.execute(
"CREATE UNIQUE INDEX ix_podcast_episodes_user_date "
"ON podcast_episodes (user_id, episode_date) "
"WHERE user_id IS NOT NULL"
)


def downgrade() -> None:
op.execute("DROP INDEX IF EXISTS ix_podcast_episodes_user_date")
op.drop_column("podcast_episodes", "user_id")
op.alter_column("podcast_episodes", "agent_id", nullable=False)

op.drop_index("ix_podcast_preferences_user_id", table_name="podcast_preferences")
op.drop_column("podcast_preferences", "user_id")
op.alter_column("podcast_preferences", "agent_id", nullable=False)
Loading