vakra-dev
diff --git a/‎.gitignore‎
Lines changed: 17 additions & 0 deletions b/‎.gitignore‎
Lines changed: 17 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 21 additions & 0 deletions b/‎README.md‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎api-reference/crawl-options.mdx‎
Lines changed: 161 additions & 0 deletions b/‎api-reference/crawl-options.mdx‎
Lines changed: 161 additions & 0 deletions
diff --git a/‎api-reference/crawl-result.mdx‎
Lines changed: 141 additions & 0 deletions b/‎api-reference/crawl-result.mdx‎
Lines changed: 141 additions & 0 deletions
@@ -0,0 +1,17 @@
+# Mintlify
+.mintlify/
+
+# Dependencies
+node_modules/
+
+# OS
+.DS_Store
+
+# IDE
+.idea/
+.vscode/
+*.swp
+*.swo
+
+# Claude
+.claude/
@@ -0,0 +1,21 @@
+# reader-docs
+
+Documentation for [Reader](https://github.com/vakra-dev/reader) - open-source web scraping for LLMs.
+
+Built with [Mintlify](https://mintlify.com).
+
+## Development
+
+```bash
+npx mintlify dev
+```
+
+Open [http://localhost:3000](http://localhost:3000).
+
+## Deployment
+
+Push to GitHub and connect to Mintlify Dashboard for automatic deployments.
+
+## License
+
+Apache 2.0
@@ -0,0 +1,161 @@
+---
+title: CrawlOptions
+description: Options for the crawl() function
+---
+
+## Type Definition
+
+```typescript
+interface CrawlOptions {
+  // Required
+  url: string;
+
+  // Crawl limits
+  depth?: number;
+  maxPages?: number;
+
+  // Scraping
+  scrape?: boolean;
+  scrapeConcurrency?: number;
+  formats?: Array<"markdown" | "html">;
+
+  // Rate limiting
+  delayMs?: number;
+  timeoutMs?: number;
+
+  // URL filtering
+  includePatterns?: string[];
+  excludePatterns?: string[];
+
+  // Request configuration
+  userAgent?: string;
+  proxy?: ProxyConfig;
+
+  // Debugging
+  verbose?: boolean;
+  showChrome?: boolean;
+}
+```
+
+## Options Reference
+
+### Required Options
+
+| Option | Type     | Description                     |
+| ------ | -------- | ------------------------------- |
+| `url`  | `string` | Seed URL to start crawling from |
+
+### Crawl Limit Options
+
+| Option     | Type     | Default | Description               |
+| ---------- | -------- | ------- | ------------------------- |
+| `depth`    | `number` | `1`     | Maximum crawl depth       |
+| `maxPages` | `number` | `20`    | Maximum pages to discover |
+
+### Scraping Options
+
+| Option              | Type                          | Default                | Description                             |
+| ------------------- | ----------------------------- | ---------------------- | --------------------------------------- |
+| `scrape`            | `boolean`                     | `false`                | Also scrape content of discovered pages |
+| `scrapeConcurrency` | `number`                      | `2`                    | Concurrent scraping threads             |
+| `formats`           | `Array<"markdown" \| "html">` | `["markdown", "html"]` | Output formats when scraping            |
+
+### Rate Limiting Options
+
+| Option      | Type     | Default     | Description                  |
+| ----------- | -------- | ----------- | ---------------------------- |
+| `delayMs`   | `number` | `1000`      | Delay between requests (ms)  |
+| `timeoutMs` | `number` | `undefined` | Total timeout for crawl (ms) |
+
+### URL Filtering Options
+
+| Option            | Type       | Default     | Description                     |
+| ----------------- | ---------- | ----------- | ------------------------------- |
+| `includePatterns` | `string[]` | `undefined` | URL patterns to include (regex) |
+| `excludePatterns` | `string[]` | `undefined` | URL patterns to exclude (regex) |
+
+### Request Configuration Options
+
+| Option      | Type          | Default     | Description              |
+| ----------- | ------------- | ----------- | ------------------------ |
+| `userAgent` | `string`      | `undefined` | Custom user agent string |
+| `proxy`     | `ProxyConfig` | `undefined` | Proxy configuration      |
+
+### Debugging Options
+
+| Option       | Type      | Default | Description            |
+| ------------ | --------- | ------- | ---------------------- |
+| `verbose`    | `boolean` | `false` | Enable verbose logging |
+| `showChrome` | `boolean` | `false` | Show browser window    |
+
+## Examples
+
+### Basic Crawl
+
+```typescript
+await reader.crawl({
+  url: "https://example.com",
+  depth: 2,
+  maxPages: 50,
+});
+```
+
+### Crawl with Scraping
+
+```typescript
+await reader.crawl({
+  url: "https://example.com",
+  depth: 2,
+  maxPages: 50,
+  scrape: true,
+  scrapeConcurrency: 5,
+  formats: ["markdown"],
+});
+```
+
+### With URL Filtering
+
+```typescript
+await reader.crawl({
+  url: "https://example.com",
+  depth: 3,
+  maxPages: 100,
+  includePatterns: ["^/docs/", "^/guides/"],
+  excludePatterns: ["^/admin/", "^/api/"],
+});
+```
+
+### With Rate Limiting
+
+```typescript
+await reader.crawl({
+  url: "https://example.com",
+  depth: 2,
+  delayMs: 2000, // 2 seconds between requests
+  timeoutMs: 300000, // 5 minute total timeout
+});
+```
+
+### Full Options
+
+```typescript
+await reader.crawl({
+  url: "https://example.com",
+  depth: 3,
+  maxPages: 100,
+  scrape: true,
+  scrapeConcurrency: 5,
+  formats: ["markdown"],
+  delayMs: 1000,
+  timeoutMs: 600000,
+  includePatterns: ["^/docs/"],
+  excludePatterns: ["^/docs/legacy/"],
+  proxy: {
+    host: "proxy.example.com",
+    port: 8080,
+    username: "user",
+    password: "pass",
+  },
+  verbose: true,
+});
+```
@@ -0,0 +1,141 @@
+---
+title: CrawlResult
+description: Result structure from the crawl() function
+---
+
+## Type Definition
+
+```typescript
+interface CrawlResult {
+  urls: CrawlUrl[];
+  scraped?: ScrapeResult;
+  metadata: CrawlMetadata;
+}
+```
+
+## CrawlUrl
+
+Information about each discovered URL:
+
+```typescript
+interface CrawlUrl {
+  url: string;
+  title: string;
+  description: string | null;
+}
+```
+
+## CrawlMetadata
+
+Metadata about the crawl operation:
+
+```typescript
+interface CrawlMetadata {
+  totalUrls: number;
+  maxDepth: number;
+  totalDuration: number; // Milliseconds
+  seedUrl: string;
+}
+```
+
+## ScrapeResult
+
+When `scrape: true`, the `scraped` property contains the scraped content:
+
+```typescript
+interface ScrapeResult {
+  data: WebsiteScrapeResult[];
+  batchMetadata: BatchMetadata;
+}
+```
+
+See [ScrapeResult](/api-reference/scrape-result) for full structure.
+
+## Examples
+
+### Access Discovered URLs
+
+```typescript
+const result = await reader.crawl({
+  url: "https://example.com",
+  depth: 2,
+  maxPages: 50,
+});
+
+console.log(`Found ${result.urls.length} pages`);
+
+result.urls.forEach((page) => {
+  console.log(`- ${page.title}`);
+  console.log(`  URL: ${page.url}`);
+  console.log(`  Description: ${page.description}`);
+});
+```
+
+### Access Crawl Metadata
+
+```typescript
+const result = await reader.crawl({
+  url: "https://example.com",
+  depth: 2,
+});
+
+const { metadata } = result;
+
+console.log("Seed URL:", metadata.seedUrl);
+console.log("Total URLs:", metadata.totalUrls);
+console.log("Max Depth:", metadata.maxDepth);
+console.log("Duration:", metadata.totalDuration, "ms");
+```
+
+### Access Scraped Content
+
+```typescript
+const result = await reader.crawl({
+  url: "https://example.com",
+  depth: 2,
+  scrape: true,
+});
+
+if (result.scraped) {
+  console.log(`Scraped ${result.scraped.batchMetadata.successfulUrls} pages`);
+
+  result.scraped.data.forEach((page) => {
+    console.log(`Title: ${page.metadata.website.title}`);
+    console.log(`Content: ${page.markdown?.substring(0, 200)}...`);
+  });
+}
+```
+
+### Full Example
+
+```typescript
+const result = await reader.crawl({
+  url: "https://docs.example.com",
+  depth: 3,
+  maxPages: 100,
+  scrape: true,
+  formats: ["markdown"],
+});
+
+console.log("=== Crawl Summary ===");
+console.log(`Seed URL: ${result.metadata.seedUrl}`);
+console.log(`Pages discovered: ${result.metadata.totalUrls}`);
+console.log(`Duration: ${(result.metadata.totalDuration / 1000).toFixed(1)}s`);
+
+console.log("\n=== Discovered URLs ===");
+result.urls.forEach((page, i) => {
+  console.log(`${i + 1}. ${page.title}`);
+  console.log(`   ${page.url}`);
+});
+
+if (result.scraped) {
+  console.log("\n=== Scraped Content ===");
+  console.log(`Success: ${result.scraped.batchMetadata.successfulUrls}`);
+  console.log(`Failed: ${result.scraped.batchMetadata.failedUrls}`);
+
+  result.scraped.data.forEach((page) => {
+    console.log(`\n--- ${page.metadata.website.title} ---`);
+    console.log(page.markdown?.substring(0, 500));
+  });
+}
+```