From 15c6ff0c15dec6cf62dd93466d423303ba09a464 Mon Sep 17 00:00:00 2001 From: oritwoen <18102267+oritwoen@users.noreply.github.com> Date: Tue, 12 May 2026 12:20:54 +0200 Subject: [PATCH 1/2] refactor: reuse content resolver in install --- src/commands/install.ts | 112 +++++----------------------------------- 1 file changed, 14 insertions(+), 98 deletions(-) diff --git a/src/commands/install.ts b/src/commands/install.ts index 1379cc15..5e932f78 100644 --- a/src/commands/install.ts +++ b/src/commands/install.ts @@ -44,16 +44,9 @@ import { sanitizeMarkdown } from '../core/sanitize.ts' import { indexResources } from '../retriv/index-pipeline.ts' import { createIndex, SearchDepsUnavailableError } from '../retriv/index.ts' import { shutdownWorker } from '../retriv/pool.ts' +import { resolveContentDocs } from '../sources/content-resolver.ts' import { fetchGitSkills } from '../sources/git-skills.ts' import { - downloadLlmsDocs, - fetchGitDocs, - fetchGitHubRaw, - fetchLlmsTxt, - fetchReadmeContent, - filterFrameworkDocs, - isShallowGitDocs, - normalizeLlmsLinks, parseGitHubUrl, resolveEntryFiles, resolvePackageDocs, @@ -226,98 +219,21 @@ export async function installCommand(opts: InstallOptions): Promise { continue } - const cachedDocs: Array<{ path: string, content: string }> = [] - const docsToIndex: Array<{ id: string, content: string, metadata: Record }> = [] - const isFrameworkDoc = (path: string) => filterFrameworkDocs([path], pkgName).length > 0 - - // Try git docs first - if (resolved.gitDocsUrl && resolved.repoUrl) { - const gh = parseGitHubUrl(resolved.repoUrl) - if (gh) { - const gitDocs = await fetchGitDocs(gh.owner, gh.repo, version, pkgName) - if (gitDocs?.files.length) { - const BATCH_SIZE = 20 - for (let i = 0; i < gitDocs.files.length; i += BATCH_SIZE) { - const batch = gitDocs.files.slice(i, i + BATCH_SIZE) - const results = await Promise.all( - batch.map(async (file) => { - const url = `${gitDocs.baseUrl}/${file}` - const content = await fetchGitHubRaw(url) - if (!content) - return null - return { file, content } - }), - ) - for (const r of results) { - if (r) { - const stripped = gitDocs.docsPrefix ? r.file.replace(gitDocs.docsPrefix, '') : r.file - const cachePath = stripped.startsWith('docs/') ? stripped : `docs/${stripped}` - cachedDocs.push({ path: cachePath, content: r.content }) - docsToIndex.push({ id: cachePath, content: r.content, metadata: { package: pkgName, source: cachePath, type: 'doc' } }) - } - } - } - - // Shallow git-docs: if < threshold and llms.txt exists, discard and fall through - if (isShallowGitDocs(cachedDocs.length) && resolved.llmsUrl) { - cachedDocs.length = 0 - docsToIndex.length = 0 - } - else if (cachedDocs.length > 0 && resolved.llmsUrl) { - // Always cache llms.txt alongside good git-docs as supplementary reference - const llmsContent = await fetchLlmsTxt(resolved.llmsUrl) - if (llmsContent) { - const baseUrl = resolved.docsUrl || new URL(resolved.llmsUrl).origin - cachedDocs.push({ path: 'llms.txt', content: normalizeLlmsLinks(llmsContent.raw) }) - if (llmsContent.links.length > 0) { - const docs = await downloadLlmsDocs(llmsContent, baseUrl) - for (const doc of docs) { - if (!isFrameworkDoc(doc.url)) - continue - const localPath = doc.url.startsWith('/') ? doc.url.slice(1) : doc.url - cachedDocs.push({ path: join('llms-docs', ...localPath.split('/')), content: doc.content }) - } - } - } - } - } - } - } - - // Try llms.txt - if (resolved.llmsUrl && cachedDocs.length === 0) { - const llmsContent = await fetchLlmsTxt(resolved.llmsUrl) - if (llmsContent) { - cachedDocs.push({ path: 'llms.txt', content: normalizeLlmsLinks(llmsContent.raw) }) - if (llmsContent.links.length > 0) { - const baseUrl = resolved.docsUrl || new URL(resolved.llmsUrl).origin - const docs = await downloadLlmsDocs(llmsContent, baseUrl) - for (const doc of docs) { - if (!isFrameworkDoc(doc.url)) - continue - const localPath = doc.url.startsWith('/') ? doc.url.slice(1) : doc.url - const cachePath = join('docs', ...localPath.split('/')) - cachedDocs.push({ path: cachePath, content: doc.content }) - docsToIndex.push({ id: doc.url, content: doc.content, metadata: { package: pkgName, source: cachePath, type: 'doc' } }) - } - } - } - } + const content = await resolveContentDocs({ + packageName: pkgName, + resolved, + version, + onProgress: msg => spin.message(msg), + }) - // Fallback to README - if (resolved.readmeUrl && cachedDocs.length === 0) { - const content = await fetchReadmeContent(resolved.readmeUrl) - if (content) { - cachedDocs.push({ path: 'docs/README.md', content }) - docsToIndex.push({ id: 'README.md', content, metadata: { package: pkgName, source: 'docs/README.md', type: 'doc' } }) - } - } + for (const warning of content.warnings) + p.log.warn(`${name}: ${warning}`) - if (cachedDocs.length > 0) { - cache.write(cachedDocs) + if (content.docs.length > 0) { + cache.write(content.docs) const repoGh = info.repo ? parseGitHubUrl(`https://github.com/${info.repo}`) : null - const docsType = inferDocsTypeFromCache(cache.dir, info.source) + const docsType = content.docsType cache.linkInto(skillDir, cwd, docsType, { extraPackages: parsePackages(info.packages), features, @@ -326,8 +242,8 @@ export async function installCommand(opts: InstallOptions): Promise { if (features.search) { try { - if (docsToIndex.length > 0) { - await createIndex(docsToIndex, { dbPath: getPackageDbPath(pkgName, version) }) + if (content.docsToIndex.length > 0) { + await createIndex(content.docsToIndex, { dbPath: getPackageDbPath(pkgName, version) }) } // Index package entry files (.d.ts / .js) From 612f53d03c5fd047211fc30ff12448160edee04c Mon Sep 17 00:00:00 2001 From: oritwoen <18102267+oritwoen@users.noreply.github.com> Date: Tue, 12 May 2026 12:35:16 +0200 Subject: [PATCH 2/2] test: cover install content resolver restore --- test/unit/install-command.test.ts | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 test/unit/install-command.test.ts diff --git a/test/unit/install-command.test.ts b/test/unit/install-command.test.ts new file mode 100644 index 00000000..54936431 --- /dev/null +++ b/test/unit/install-command.test.ts @@ -0,0 +1,11 @@ +import { readFileSync } from 'node:fs' +import { describe, expect, it } from 'vitest' + +describe('install docs restore', () => { + it('uses the shared content resolver instead of duplicating the docs cascade', () => { + const source = readFileSync('src/commands/install.ts', 'utf8') + + expect(source).toContain('resolveContentDocs') + expect(source).not.toMatch(/\b(fetchGitDocs|fetchGitHubRaw|fetchLlmsTxt|downloadLlmsDocs|fetchReadmeContent|normalizeLlmsLinks|filterFrameworkDocs|isShallowGitDocs)\b/) + }) +})