Skip to content

Commit 3ce3a40

Browse files
committed
Improve AI metadata generation for videos
1 parent e16356d commit 3ce3a40

2 files changed

Lines changed: 272 additions & 70 deletions

File tree

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,3 +58,6 @@ tauri.windows.conf.json
5858
scripts/backfill-releases.sh
5959
scripts/update-github-releases.sh
6060
scripts/releases-backfill-data.txt
61+
62+
# SEO agent state (machine-local, contains sensitive ranking data)
63+
seo/

apps/web/actions/videos/generate-ai-metadata.ts

Lines changed: 269 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,137 @@ import { Effect, Option } from "effect";
1111
import { GROQ_MODEL, getGroqClient } from "@/lib/groq-client";
1212
import { runPromise } from "@/lib/server";
1313

14+
const MAX_CHARS_PER_CHUNK = 24000;
15+
16+
interface VttSegment {
17+
start: number;
18+
text: string;
19+
}
20+
21+
function parseVttWithTimestamps(vttContent: string): VttSegment[] {
22+
const lines = vttContent.split("\n");
23+
const segments: VttSegment[] = [];
24+
let currentStart = 0;
25+
26+
for (let i = 0; i < lines.length; i++) {
27+
const line = lines[i]?.trim() ?? "";
28+
if (line.includes("-->")) {
29+
const timeMatch = line.match(/(\d{2}):(\d{2}):(\d{2})[.,](\d{3})/);
30+
if (timeMatch) {
31+
currentStart =
32+
parseInt(timeMatch[1] ?? "0", 10) * 3600 +
33+
parseInt(timeMatch[2] ?? "0", 10) * 60 +
34+
parseInt(timeMatch[3] ?? "0", 10);
35+
}
36+
} else if (
37+
line &&
38+
line !== "WEBVTT" &&
39+
!/^\d+$/.test(line) &&
40+
!line.includes("-->")
41+
) {
42+
segments.push({ start: currentStart, text: line });
43+
}
44+
}
45+
46+
return segments;
47+
}
48+
49+
function chunkTranscriptWithTimestamps(
50+
segments: VttSegment[],
51+
): { text: string; startTime: number; endTime: number }[] {
52+
const chunks: { text: string; startTime: number; endTime: number }[] = [];
53+
let currentChunk: VttSegment[] = [];
54+
let currentLength = 0;
55+
56+
for (const segment of segments) {
57+
if (
58+
currentLength + segment.text.length > MAX_CHARS_PER_CHUNK &&
59+
currentChunk.length > 0
60+
) {
61+
chunks.push({
62+
text: currentChunk.map((s) => s.text).join(" "),
63+
startTime: currentChunk[0]?.start ?? 0,
64+
endTime: currentChunk[currentChunk.length - 1]?.start ?? 0,
65+
});
66+
currentChunk = [];
67+
currentLength = 0;
68+
}
69+
currentChunk.push(segment);
70+
currentLength += segment.text.length + 1;
71+
}
72+
73+
if (currentChunk.length > 0) {
74+
chunks.push({
75+
text: currentChunk.map((s) => s.text).join(" "),
76+
startTime: currentChunk[0]?.start ?? 0,
77+
endTime: currentChunk[currentChunk.length - 1]?.start ?? 0,
78+
});
79+
}
80+
81+
return chunks;
82+
}
83+
84+
async function callAiApi(
85+
prompt: string,
86+
groqClient: ReturnType<typeof getGroqClient>,
87+
): Promise<string> {
88+
if (groqClient) {
89+
try {
90+
const completion = await groqClient.chat.completions.create({
91+
messages: [{ role: "user", content: prompt }],
92+
model: GROQ_MODEL,
93+
});
94+
return completion.choices?.[0]?.message?.content || "{}";
95+
} catch (groqError) {
96+
console.error(
97+
`[generateAiMetadata] Groq API error: ${groqError}, falling back to OpenAI`,
98+
);
99+
if (serverEnv().OPENAI_API_KEY) {
100+
const aiRes = await fetch(
101+
"https://api.openai.com/v1/chat/completions",
102+
{
103+
method: "POST",
104+
headers: {
105+
"Content-Type": "application/json",
106+
Authorization: `Bearer ${serverEnv().OPENAI_API_KEY}`,
107+
},
108+
body: JSON.stringify({
109+
model: "gpt-4o-mini",
110+
messages: [{ role: "user", content: prompt }],
111+
}),
112+
},
113+
);
114+
if (!aiRes.ok) {
115+
const errorText = await aiRes.text();
116+
throw new Error(`OpenAI API error: ${aiRes.status} ${errorText}`);
117+
}
118+
const aiJson = await aiRes.json();
119+
return aiJson.choices?.[0]?.message?.content || "{}";
120+
}
121+
throw groqError;
122+
}
123+
} else if (serverEnv().OPENAI_API_KEY) {
124+
const aiRes = await fetch("https://api.openai.com/v1/chat/completions", {
125+
method: "POST",
126+
headers: {
127+
"Content-Type": "application/json",
128+
Authorization: `Bearer ${serverEnv().OPENAI_API_KEY}`,
129+
},
130+
body: JSON.stringify({
131+
model: "gpt-4o-mini",
132+
messages: [{ role: "user", content: prompt }],
133+
}),
134+
});
135+
if (!aiRes.ok) {
136+
const errorText = await aiRes.text();
137+
throw new Error(`OpenAI API error: ${aiRes.status} ${errorText}`);
138+
}
139+
const aiJson = await aiRes.json();
140+
return aiJson.choices?.[0]?.message?.content || "{}";
141+
}
142+
return "{}";
143+
}
144+
14145
export async function generateAiMetadata(
15146
videoId: Video.VideoId,
16147
userId: string,
@@ -160,15 +291,9 @@ export async function generateAiMetadata(
160291
return;
161292
}
162293

163-
const transcriptText = vtt.value
164-
.split("\n")
165-
.filter(
166-
(l) =>
167-
l.trim() &&
168-
l !== "WEBVTT" &&
169-
!/^\d+$/.test(l.trim()) &&
170-
!l.includes("-->"),
171-
)
294+
const segments = parseVttWithTimestamps(vtt.value);
295+
const transcriptText = segments
296+
.map((s) => s.text)
172297
.join(" ")
173298
.trim();
174299

@@ -189,80 +314,154 @@ export async function generateAiMetadata(
189314
return;
190315
}
191316

192-
const prompt = `You are Cap AI. Summarize the transcript and provide JSON in the following format:
317+
const chunks = chunkTranscriptWithTimestamps(segments);
318+
console.log(
319+
`[generateAiMetadata] Processing ${videoId}: ${transcriptText.length} chars, ${chunks.length} chunk(s)`,
320+
);
321+
322+
let content = "{}";
323+
324+
if (chunks.length === 1) {
325+
const prompt = `You are Cap AI, an expert at analyzing video content and creating comprehensive summaries.
326+
327+
Analyze this transcript thoroughly and provide a detailed JSON response:
193328
{
194-
"title": "string",
195-
"summary": "string (write from 1st person perspective if appropriate, e.g. 'In this video, I demonstrate...' to make it feel personable)",
196-
"chapters": [{"title": "string", "start": number}]
329+
"title": "string (concise but descriptive title that captures the main topic)",
330+
"summary": "string (detailed summary that covers ALL key points discussed. For meetings: include decisions made, action items, and key discussion points. For tutorials: cover all steps and concepts explained. For presentations: summarize all main arguments and supporting points. Write from 1st person perspective if the speaker is teaching/presenting, e.g. 'In this video, I walk through...'. Make it comprehensive enough that someone could understand the full content without watching.)",
331+
"chapters": [{"title": "string (descriptive chapter title)", "start": number (seconds from start)}]
197332
}
333+
334+
Guidelines:
335+
- The summary should be detailed and comprehensive, not a brief overview
336+
- Capture ALL important topics, not just the main theme
337+
- For longer content, organize the summary by topic or chronologically
338+
- Include specific details, names, numbers, and conclusions mentioned
339+
- Chapters should mark distinct topic changes or sections
340+
198341
Return ONLY valid JSON without any markdown formatting or code blocks.
199342
Transcript:
200343
${transcriptText}`;
344+
content = await callAiApi(prompt, groqClient);
345+
} else {
346+
const chunkSummaries: {
347+
summary: string;
348+
keyPoints: string[];
349+
chapters: { title: string; start: number }[];
350+
startTime: number;
351+
endTime: number;
352+
}[] = [];
201353

202-
let content = "{}";
203-
204-
if (groqClient) {
205-
try {
206-
const completion = await groqClient.chat.completions.create({
207-
messages: [{ role: "user", content: prompt }],
208-
model: GROQ_MODEL,
209-
});
210-
content = completion.choices?.[0]?.message?.content || "{}";
211-
} catch (groqError) {
212-
console.error(
213-
`[generateAiMetadata] Groq API error: ${groqError}, falling back to OpenAI`,
354+
for (let i = 0; i < chunks.length; i++) {
355+
const chunk = chunks[i];
356+
if (!chunk) continue;
357+
console.log(
358+
`[generateAiMetadata] Processing chunk ${i + 1}/${chunks.length} for ${videoId} (${chunk.startTime}s - ${chunk.endTime}s)`,
214359
);
215-
// Fallback to OpenAI if Groq fails and OpenAI key exists
216-
if (serverEnv().OPENAI_API_KEY) {
217-
const aiRes = await fetch(
218-
"https://api.openai.com/v1/chat/completions",
219-
{
220-
method: "POST",
221-
headers: {
222-
"Content-Type": "application/json",
223-
Authorization: `Bearer ${serverEnv().OPENAI_API_KEY}`,
224-
},
225-
body: JSON.stringify({
226-
model: "gpt-4o-mini",
227-
messages: [{ role: "user", content: prompt }],
228-
}),
229-
},
230-
);
231-
if (!aiRes.ok) {
232-
const errorText = await aiRes.text();
233-
console.error(
234-
`[generateAiMetadata] OpenAI API error: ${aiRes.status} ${errorText}`,
235-
);
236-
throw new Error(`OpenAI API error: ${aiRes.status} ${errorText}`);
360+
361+
const chunkPrompt = `You are Cap AI, an expert at analyzing video content. This is section ${i + 1} of ${chunks.length} from a longer video (timestamp ${Math.floor(chunk.startTime / 60)}:${String(chunk.startTime % 60).padStart(2, "0")} to ${Math.floor(chunk.endTime / 60)}:${String(chunk.endTime % 60).padStart(2, "0")}).
362+
363+
Analyze this section thoroughly and provide JSON:
364+
{
365+
"summary": "string (detailed summary of this section - capture ALL key points, topics discussed, decisions made, or concepts explained. Include specific details like names, numbers, action items, and conclusions. This should be 3-6 sentences minimum.)",
366+
"keyPoints": ["string (specific key point or takeaway)", ...],
367+
"chapters": [{"title": "string (descriptive title for this topic/section)", "start": number (seconds from video start)}]
368+
}
369+
370+
Be thorough - this summary will be combined with other sections to create a comprehensive overview.
371+
Return ONLY valid JSON without any markdown formatting or code blocks.
372+
Transcript section:
373+
${chunk.text}`;
374+
375+
const chunkContent = await callAiApi(chunkPrompt, groqClient);
376+
try {
377+
let cleanContent = chunkContent;
378+
if (chunkContent.includes("```json")) {
379+
cleanContent = chunkContent
380+
.replace(/```json\s*/g, "")
381+
.replace(/```\s*/g, "");
382+
} else if (chunkContent.includes("```")) {
383+
cleanContent = chunkContent.replace(/```\s*/g, "");
237384
}
238-
const aiJson = await aiRes.json();
239-
content = aiJson.choices?.[0]?.message?.content || "{}";
240-
} else {
241-
throw groqError;
385+
const parsed = JSON.parse(cleanContent.trim());
386+
chunkSummaries.push({
387+
summary: parsed.summary || "",
388+
keyPoints: parsed.keyPoints || [],
389+
chapters: parsed.chapters || [],
390+
startTime: chunk.startTime,
391+
endTime: chunk.endTime,
392+
});
393+
} catch {
394+
console.error(
395+
`[generateAiMetadata] Failed to parse chunk ${i + 1} response for ${videoId}`,
396+
);
242397
}
243398
}
244-
} else if (serverEnv().OPENAI_API_KEY) {
245-
// Use OpenAI if Groq client is not available
246-
const aiRes = await fetch("https://api.openai.com/v1/chat/completions", {
247-
method: "POST",
248-
headers: {
249-
"Content-Type": "application/json",
250-
Authorization: `Bearer ${serverEnv().OPENAI_API_KEY}`,
251-
},
252-
body: JSON.stringify({
253-
model: "gpt-4o-mini",
254-
messages: [{ role: "user", content: prompt }],
255-
}),
256-
});
257-
if (!aiRes.ok) {
258-
const errorText = await aiRes.text();
399+
400+
const allChapters = chunkSummaries.flatMap((c) => c.chapters);
401+
const allKeyPoints = chunkSummaries.flatMap((c) => c.keyPoints);
402+
403+
const sectionDetails = chunkSummaries
404+
.map((c, i) => {
405+
const timeRange = `${Math.floor(c.startTime / 60)}:${String(c.startTime % 60).padStart(2, "0")} - ${Math.floor(c.endTime / 60)}:${String(c.endTime % 60).padStart(2, "0")}`;
406+
const keyPointsList =
407+
c.keyPoints.length > 0
408+
? `\nKey points: ${c.keyPoints.join("; ")}`
409+
: "";
410+
return `Section ${i + 1} (${timeRange}):\n${c.summary}${keyPointsList}`;
411+
})
412+
.join("\n\n");
413+
414+
const finalPrompt = `You are Cap AI, an expert at synthesizing information into comprehensive, well-organized summaries.
415+
416+
Based on these detailed section analyses of a video, create a thorough final summary that captures EVERYTHING important.
417+
418+
Section analyses:
419+
${sectionDetails}
420+
421+
${allKeyPoints.length > 0 ? `All key points identified:\n${allKeyPoints.map((p, i) => `${i + 1}. ${p}`).join("\n")}\n` : ""}
422+
423+
Provide JSON in the following format:
424+
{
425+
"title": "string (concise but descriptive title that captures the main topic/purpose)",
426+
"summary": "string (COMPREHENSIVE summary that covers the entire video thoroughly. This should be detailed enough that someone could understand all the important content without watching. Include: main topics covered, key decisions or conclusions, important details mentioned, action items if any. Organize it logically - for meetings use topics/agenda items, for tutorials use steps/concepts, for presentations use main arguments. Write from 1st person perspective if appropriate. This should be several paragraphs for longer content.)"
427+
}
428+
429+
The summary must be detailed and comprehensive - not a brief overview. Capture all the important information from every section.
430+
Return ONLY valid JSON without any markdown formatting or code blocks.`;
431+
432+
const finalContent = await callAiApi(finalPrompt, groqClient);
433+
try {
434+
let cleanContent = finalContent;
435+
if (finalContent.includes("```json")) {
436+
cleanContent = finalContent
437+
.replace(/```json\s*/g, "")
438+
.replace(/```\s*/g, "");
439+
} else if (finalContent.includes("```")) {
440+
cleanContent = finalContent.replace(/```\s*/g, "");
441+
}
442+
const parsed = JSON.parse(cleanContent.trim());
443+
content = JSON.stringify({
444+
title: parsed.title,
445+
summary: parsed.summary,
446+
chapters: allChapters,
447+
});
448+
} catch {
259449
console.error(
260-
`[generateAiMetadata] OpenAI API error: ${aiRes.status} ${errorText}`,
450+
`[generateAiMetadata] Failed to parse final summary for ${videoId}`,
261451
);
262-
throw new Error(`OpenAI API error: ${aiRes.status} ${errorText}`);
452+
const fallbackSummary = chunkSummaries
453+
.map((c, i) => `**Part ${i + 1}:** ${c.summary}`)
454+
.join("\n\n");
455+
const keyPointsSummary =
456+
allKeyPoints.length > 0
457+
? `\n\n**Key Points:**\n${allKeyPoints.map((p) => `- ${p}`).join("\n")}`
458+
: "";
459+
content = JSON.stringify({
460+
title: "Video Summary",
461+
summary: fallbackSummary + keyPointsSummary,
462+
chapters: allChapters,
463+
});
263464
}
264-
const aiJson = await aiRes.json();
265-
content = aiJson.choices?.[0]?.message?.content || "{}";
266465
}
267466

268467
let data: {

0 commit comments

Comments
 (0)