Skip to content

Commit a10ea99

Browse files
feat(api): manual updates
1 parent 05d8660 commit a10ea99

9 files changed

Lines changed: 219 additions & 2 deletions

File tree

.stats.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
configured_endpoints: 20
1+
configured_endpoints: 21
22
openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/context-dev%2Fcontext.dev-97cdb78dc0d72e9df643a89660f2b0c9687f12c6e4d93f7767f6cfc1b4f2e4c7.yml
33
openapi_spec_hash: 92fc94fd8865fabe78c2667490ca3884
4-
config_hash: 51eb368cba05800d9497df4fa318828e
4+
config_hash: 682b89b02a20f5d1c13e2c91ecbcf5ce

api.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
Types:
44

55
- <code><a href="./src/resources/web.ts">WebScreenshotResponse</a></code>
6+
- <code><a href="./src/resources/web.ts">WebWebCrawlMdResponse</a></code>
67
- <code><a href="./src/resources/web.ts">WebWebScrapeHTMLResponse</a></code>
78
- <code><a href="./src/resources/web.ts">WebWebScrapeImagesResponse</a></code>
89
- <code><a href="./src/resources/web.ts">WebWebScrapeMdResponse</a></code>
@@ -11,6 +12,7 @@ Types:
1112
Methods:
1213

1314
- <code title="get /brand/screenshot">client.web.<a href="./src/resources/web.ts">screenshot</a>({ ...params }) -> WebScreenshotResponse</code>
15+
- <code title="post /web/crawl">client.web.<a href="./src/resources/web.ts">webCrawlMd</a>({ ...params }) -> WebWebCrawlMdResponse</code>
1416
- <code title="get /web/scrape/html">client.web.<a href="./src/resources/web.ts">webScrapeHTML</a>({ ...params }) -> WebWebScrapeHTMLResponse</code>
1517
- <code title="get /web/scrape/images">client.web.<a href="./src/resources/web.ts">webScrapeImages</a>({ ...params }) -> WebWebScrapeImagesResponse</code>
1618
- <code title="get /web/scrape/markdown">client.web.<a href="./src/resources/web.ts">webScrapeMd</a>({ ...params }) -> WebWebScrapeMdResponse</code>

packages/mcp-server/src/code-tool-worker.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@ function getTSDiagnostics(code: string): string[] {
109109
const fuse = new Fuse(
110110
[
111111
'client.web.screenshot',
112+
'client.web.webCrawlMd',
112113
'client.web.webScrapeHTML',
113114
'client.web.webScrapeImages',
114115
'client.web.webScrapeMd',

packages/mcp-server/src/local-docs-search.ts

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,52 @@ const EMBEDDED_METHODS: MethodEntry[] = [
237237
},
238238
},
239239
},
240+
{
241+
name: 'web_crawl_md',
242+
endpoint: '/web/crawl',
243+
httpMethod: 'post',
244+
summary: 'Crawl website and extract Markdown',
245+
description:
246+
'Performs a crawl starting from a given URL, extracts page content as Markdown, and returns results for all crawled pages. Only follows links within the same domain as the starting URL. Costs 1 credit per successful page crawled.',
247+
stainlessPath: '(resource) web > (method) web_crawl_md',
248+
qualified: 'client.web.webCrawlMd',
249+
params: [
250+
'url: string;',
251+
'followSubdomains?: boolean;',
252+
'includeImages?: boolean;',
253+
'includeLinks?: boolean;',
254+
'maxDepth?: number;',
255+
'maxPages?: number;',
256+
'shortenBase64Images?: boolean;',
257+
'urlRegex?: string;',
258+
'useMainContentOnly?: boolean;',
259+
],
260+
response:
261+
'{ metadata: { maxCrawlDepth: number; numFailed: number; numSucceeded: number; numUrls: number; }; results: { markdown: string; metadata: { crawlDepth: number; statusCode: number; success: boolean; title: string; url: string; }; }[]; }',
262+
markdown:
263+
"## web_crawl_md\n\n`client.web.webCrawlMd(url: string, followSubdomains?: boolean, includeImages?: boolean, includeLinks?: boolean, maxDepth?: number, maxPages?: number, shortenBase64Images?: boolean, urlRegex?: string, useMainContentOnly?: boolean): { metadata: object; results: object[]; }`\n\n**post** `/web/crawl`\n\nPerforms a crawl starting from a given URL, extracts page content as Markdown, and returns results for all crawled pages. Only follows links within the same domain as the starting URL. Costs 1 credit per successful page crawled.\n\n### Parameters\n\n- `url: string`\n The starting URL for the crawl (must include http:// or https:// protocol)\n\n- `followSubdomains?: boolean`\n When true, follow links on subdomains of the starting URL's domain (e.g. docs.example.com when starting from example.com). www and apex are always treated as equivalent.\n\n- `includeImages?: boolean`\n Include image references in the Markdown output\n\n- `includeLinks?: boolean`\n Preserve hyperlinks in the Markdown output\n\n- `maxDepth?: number`\n Maximum link depth from the starting URL (0 = only the starting page)\n\n- `maxPages?: number`\n Maximum number of pages to crawl. Hard cap: 500.\n\n- `shortenBase64Images?: boolean`\n Truncate base64-encoded image data in the Markdown output\n\n- `urlRegex?: string`\n Regex pattern. Only URLs matching this pattern will be followed and scraped.\n\n- `useMainContentOnly?: boolean`\n Extract only the main content, stripping headers, footers, sidebars, and navigation\n\n### Returns\n\n- `{ metadata: { maxCrawlDepth: number; numFailed: number; numSucceeded: number; numUrls: number; }; results: { markdown: string; metadata: { crawlDepth: number; statusCode: number; success: boolean; title: string; url: string; }; }[]; }`\n\n - `metadata: { maxCrawlDepth: number; numFailed: number; numSucceeded: number; numUrls: number; }`\n - `results: { markdown: string; metadata: { crawlDepth: number; statusCode: number; success: boolean; title: string; url: string; }; }[]`\n\n### Example\n\n```typescript\nimport ContextDev from 'context.dev';\n\nconst client = new ContextDev();\n\nconst response = await client.web.webCrawlMd({ url: 'https://example.com' });\n\nconsole.log(response);\n```",
264+
perLanguage: {
265+
http: {
266+
example:
267+
'curl https://api.context.dev/v1/web/crawl \\\n -H \'Content-Type: application/json\' \\\n -H "Authorization: Bearer $CONTEXT_DEV_API_KEY" \\\n -d \'{\n "url": "https://example.com"\n }\'',
268+
},
269+
python: {
270+
method: 'web.web_crawl_md',
271+
example:
272+
'import os\nfrom context.dev import ContextDev\n\nclient = ContextDev(\n api_key=os.environ.get("CONTEXT_DEV_API_KEY"), # This is the default and can be omitted\n)\nresponse = client.web.web_crawl_md(\n url="https://example.com",\n)\nprint(response.metadata)',
273+
},
274+
ruby: {
275+
method: 'web.web_crawl_md',
276+
example:
277+
'require "context_dev"\n\ncontext_dev = ContextDev::Client.new(api_key: "My API Key")\n\nresponse = context_dev.web.web_crawl_md(url: "https://example.com")\n\nputs(response)',
278+
},
279+
typescript: {
280+
method: 'client.web.webCrawlMd',
281+
example:
282+
"import ContextDev from 'context.dev';\n\nconst client = new ContextDev({\n apiKey: process.env['CONTEXT_DEV_API_KEY'], // This is the default and can be omitted\n});\n\nconst response = await client.web.webCrawlMd({ url: 'https://example.com' });\n\nconsole.log(response.metadata);",
283+
},
284+
},
285+
},
240286
{
241287
name: 'extract_products',
242288
endpoint: '/brand/ai/products',

packages/mcp-server/src/methods.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,12 @@ export const sdkMethods: SdkMethod[] = [
1616
httpMethod: 'get',
1717
httpPath: '/brand/screenshot',
1818
},
19+
{
20+
clientCallName: 'client.web.webCrawlMd',
21+
fullyQualifiedName: 'web.webCrawlMd',
22+
httpMethod: 'post',
23+
httpPath: '/web/crawl',
24+
},
1925
{
2026
clientCallName: 'client.web.webScrapeHTML',
2127
fullyQualifiedName: 'web.webScrapeHTML',

src/client.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,8 @@ import {
6262
Web,
6363
WebScreenshotParams,
6464
WebScreenshotResponse,
65+
WebWebCrawlMdParams,
66+
WebWebCrawlMdResponse,
6567
WebWebScrapeHTMLParams,
6668
WebWebScrapeHTMLResponse,
6769
WebWebScrapeImagesParams,
@@ -792,11 +794,13 @@ export declare namespace ContextDev {
792794
export {
793795
Web as Web,
794796
type WebScreenshotResponse as WebScreenshotResponse,
797+
type WebWebCrawlMdResponse as WebWebCrawlMdResponse,
795798
type WebWebScrapeHTMLResponse as WebWebScrapeHTMLResponse,
796799
type WebWebScrapeImagesResponse as WebWebScrapeImagesResponse,
797800
type WebWebScrapeMdResponse as WebWebScrapeMdResponse,
798801
type WebWebScrapeSitemapResponse as WebWebScrapeSitemapResponse,
799802
type WebScreenshotParams as WebScreenshotParams,
803+
type WebWebCrawlMdParams as WebWebCrawlMdParams,
800804
type WebWebScrapeHTMLParams as WebWebScrapeHTMLParams,
801805
type WebWebScrapeImagesParams as WebWebScrapeImagesParams,
802806
type WebWebScrapeMdParams as WebWebScrapeMdParams,

src/resources/index.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,11 +44,13 @@ export {
4444
export {
4545
Web,
4646
type WebScreenshotResponse,
47+
type WebWebCrawlMdResponse,
4748
type WebWebScrapeHTMLResponse,
4849
type WebWebScrapeImagesResponse,
4950
type WebWebScrapeMdResponse,
5051
type WebWebScrapeSitemapResponse,
5152
type WebScreenshotParams,
53+
type WebWebCrawlMdParams,
5254
type WebWebScrapeHTMLParams,
5355
type WebWebScrapeImagesParams,
5456
type WebWebScrapeMdParams,

src/resources/web.ts

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,15 @@ export class Web extends APIResource {
1515
return this._client.get('/brand/screenshot', { query, ...options });
1616
}
1717

18+
/**
19+
* Performs a crawl starting from a given URL, extracts page content as Markdown,
20+
* and returns results for all crawled pages. Only follows links within the same
21+
* domain as the starting URL. Costs 1 credit per successful page crawled.
22+
*/
23+
webCrawlMd(body: WebWebCrawlMdParams, options?: RequestOptions): APIPromise<WebWebCrawlMdResponse> {
24+
return this._client.post('/web/crawl', { body, ...options });
25+
}
26+
1827
/**
1928
* Scrapes the given URL and returns the raw HTML content of the page.
2029
*/
@@ -85,6 +94,74 @@ export interface WebScreenshotResponse {
8594
status?: string;
8695
}
8796

97+
export interface WebWebCrawlMdResponse {
98+
metadata: WebWebCrawlMdResponse.Metadata;
99+
100+
results: Array<WebWebCrawlMdResponse.Result>;
101+
}
102+
103+
export namespace WebWebCrawlMdResponse {
104+
export interface Metadata {
105+
/**
106+
* Maximum crawl depth reached during the crawl
107+
*/
108+
maxCrawlDepth: number;
109+
110+
/**
111+
* Number of pages that failed to crawl
112+
*/
113+
numFailed: number;
114+
115+
/**
116+
* Number of pages successfully crawled
117+
*/
118+
numSucceeded: number;
119+
120+
/**
121+
* Total number of URLs crawled
122+
*/
123+
numUrls: number;
124+
}
125+
126+
export interface Result {
127+
/**
128+
* Extracted page content as Markdown (empty string on failure)
129+
*/
130+
markdown: string;
131+
132+
metadata: Result.Metadata;
133+
}
134+
135+
export namespace Result {
136+
export interface Metadata {
137+
/**
138+
* Depth relative to the start URL. 0 = start URL, 1 = one link away.
139+
*/
140+
crawlDepth: number;
141+
142+
/**
143+
* HTTP status code of the response
144+
*/
145+
statusCode: number;
146+
147+
/**
148+
* true if the page was fetched and parsed successfully
149+
*/
150+
success: boolean;
151+
152+
/**
153+
* The page's <title> content (empty string if unavailable)
154+
*/
155+
title: string;
156+
157+
/**
158+
* The URL that was fetched
159+
*/
160+
url: string;
161+
}
162+
}
163+
}
164+
88165
export interface WebWebScrapeHTMLResponse {
89166
/**
90167
* Raw HTML content of the page
@@ -239,6 +316,56 @@ export interface WebScreenshotParams {
239316
prioritize?: 'speed' | 'quality';
240317
}
241318

319+
export interface WebWebCrawlMdParams {
320+
/**
321+
* The starting URL for the crawl (must include http:// or https:// protocol)
322+
*/
323+
url: string;
324+
325+
/**
326+
* When true, follow links on subdomains of the starting URL's domain (e.g.
327+
* docs.example.com when starting from example.com). www and apex are always
328+
* treated as equivalent.
329+
*/
330+
followSubdomains?: boolean;
331+
332+
/**
333+
* Include image references in the Markdown output
334+
*/
335+
includeImages?: boolean;
336+
337+
/**
338+
* Preserve hyperlinks in the Markdown output
339+
*/
340+
includeLinks?: boolean;
341+
342+
/**
343+
* Maximum link depth from the starting URL (0 = only the starting page)
344+
*/
345+
maxDepth?: number;
346+
347+
/**
348+
* Maximum number of pages to crawl. Hard cap: 500.
349+
*/
350+
maxPages?: number;
351+
352+
/**
353+
* Truncate base64-encoded image data in the Markdown output
354+
*/
355+
shortenBase64Images?: boolean;
356+
357+
/**
358+
* Regex pattern. Only URLs matching this pattern will be followed and scraped.
359+
*/
360+
urlRegex?: string;
361+
362+
/**
363+
* Extract only the main content, stripping headers, footers, sidebars, and
364+
* navigation
365+
*/
366+
useMainContentOnly?: boolean;
367+
}
368+
242369
export interface WebWebScrapeHTMLParams {
243370
/**
244371
* Full URL to scrape (must include http:// or https:// protocol)
@@ -299,11 +426,13 @@ export interface WebWebScrapeSitemapParams {
299426
export declare namespace Web {
300427
export {
301428
type WebScreenshotResponse as WebScreenshotResponse,
429+
type WebWebCrawlMdResponse as WebWebCrawlMdResponse,
302430
type WebWebScrapeHTMLResponse as WebWebScrapeHTMLResponse,
303431
type WebWebScrapeImagesResponse as WebWebScrapeImagesResponse,
304432
type WebWebScrapeMdResponse as WebWebScrapeMdResponse,
305433
type WebWebScrapeSitemapResponse as WebWebScrapeSitemapResponse,
306434
type WebScreenshotParams as WebScreenshotParams,
435+
type WebWebCrawlMdParams as WebWebCrawlMdParams,
307436
type WebWebScrapeHTMLParams as WebWebScrapeHTMLParams,
308437
type WebWebScrapeImagesParams as WebWebScrapeImagesParams,
309438
type WebWebScrapeMdParams as WebWebScrapeMdParams,

tests/api-resources/web.test.ts

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,33 @@ describe('resource web', () => {
3030
});
3131
});
3232

33+
// Mock server tests are disabled
34+
test.skip('webCrawlMd: only required params', async () => {
35+
const responsePromise = client.web.webCrawlMd({ url: 'https://example.com' });
36+
const rawResponse = await responsePromise.asResponse();
37+
expect(rawResponse).toBeInstanceOf(Response);
38+
const response = await responsePromise;
39+
expect(response).not.toBeInstanceOf(Response);
40+
const dataAndResponse = await responsePromise.withResponse();
41+
expect(dataAndResponse.data).toBe(response);
42+
expect(dataAndResponse.response).toBe(rawResponse);
43+
});
44+
45+
// Mock server tests are disabled
46+
test.skip('webCrawlMd: required and optional params', async () => {
47+
const response = await client.web.webCrawlMd({
48+
url: 'https://example.com',
49+
followSubdomains: true,
50+
includeImages: true,
51+
includeLinks: true,
52+
maxDepth: 0,
53+
maxPages: 1,
54+
shortenBase64Images: true,
55+
urlRegex: 'urlRegex',
56+
useMainContentOnly: true,
57+
});
58+
});
59+
3360
// Mock server tests are disabled
3461
test.skip('webScrapeHTML: only required params', async () => {
3562
const responsePromise = client.web.webScrapeHTML({ url: 'https://example.com' });

0 commit comments

Comments
 (0)