Skip to content

Commit 1237877

Browse files
committed
Update firecrawl tool with added features
1 parent da14a29 commit 1237877

2 files changed

Lines changed: 112 additions & 2 deletions

File tree

agentstack/_tools/firecrawl/__init__.py

Lines changed: 102 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import os
22
from firecrawl import FirecrawlApp
3-
3+
from typing import List, Dict, Any, Optional
44
app = FirecrawlApp(api_key=os.getenv('FIRECRAWL_API_KEY'))
55

66

@@ -38,3 +38,104 @@ def retrieve_web_crawl(crawl_id: str):
3838
will tell you if the crawl is finished. If it is not, wait some more time then try again.
3939
"""
4040
return app.check_crawl_status(crawl_id)
41+
42+
43+
def batch_scrape(urls: List[str], formats: List[str] = ['markdown', 'html']):
44+
"""
45+
Batch scrape multiple URLs simultaneously.
46+
47+
Args:
48+
urls: List of URLs to scrape
49+
formats: List of desired output formats (e.g., ['markdown', 'html'])
50+
51+
Returns:
52+
Dictionary containing the batch scrape results
53+
"""
54+
batch_result = app.batch_scrape_urls(urls, {'formats': formats})
55+
return batch_result
56+
57+
58+
def async_batch_scrape(urls: List[str], formats: List[str] = ['markdown', 'html']):
59+
"""
60+
Asynchronously batch scrape multiple URLs.
61+
62+
Args:
63+
urls: List of URLs to scrape
64+
formats: List of desired output formats (e.g., ['markdown', 'html'])
65+
66+
Returns:
67+
Dictionary containing the job ID and status URL
68+
"""
69+
batch_job = app.async_batch_scrape_urls(urls, {'formats': formats})
70+
return batch_job
71+
72+
73+
def check_batch_status(job_id: str):
74+
"""
75+
Check the status of an asynchronous batch scrape job.
76+
77+
Args:
78+
job_id: The ID of the batch scrape job
79+
80+
Returns:
81+
Dictionary containing the current status and results if completed
82+
"""
83+
return app.check_batch_scrape_status(job_id)
84+
85+
86+
def extract_data(urls: List[str], schema: Dict[str, Any] = None, prompt: str = None):
87+
"""
88+
Extract structured data from URLs using LLMs.
89+
90+
Args:
91+
urls: List of URLs to extract data from
92+
schema: Optional JSON schema defining the structure of data to extract
93+
prompt: Optional natural language prompt describing the data to extract
94+
95+
Returns:
96+
Dictionary containing the extracted structured data
97+
"""
98+
params = {
99+
'prompt': prompt
100+
} if prompt else {
101+
'schema': schema
102+
}
103+
104+
data = app.extract(urls, params)
105+
return data
106+
107+
108+
def map_website(url: str, search: Optional[str] = None):
109+
"""
110+
Map a website to get all URLs, with optional search functionality.
111+
112+
Args:
113+
url: The base URL to map
114+
search: Optional search term to filter URLs
115+
116+
Returns:
117+
Dictionary containing the list of discovered URLs
118+
"""
119+
params = {'search': search} if search else {}
120+
map_result = app.map_url(url, params)
121+
return map_result
122+
123+
124+
def batch_extract(urls: List[str], extract_params: Dict[str, Any]):
125+
"""
126+
Batch extract structured data from multiple URLs.
127+
128+
Args:
129+
urls: List of URLs to extract data from
130+
extract_params: Dictionary containing extraction parameters including prompt or schema
131+
132+
Returns:
133+
Dictionary containing the extracted data from all URLs
134+
"""
135+
params = {
136+
'formats': ['extract'],
137+
'extract': extract_params
138+
}
139+
140+
batch_result = app.batch_scrape_urls(urls, params)
141+
return batch_result

agentstack/_tools/firecrawl/config.json

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,15 @@
88
"dependencies": [
99
"firecrawl-py>=1.6.4"
1010
],
11-
"tools": ["web_scrape", "web_crawl", "retrieve_web_crawl"],
11+
"tools": [
12+
"web_scrape",
13+
"web_crawl",
14+
"retrieve_web_crawl",
15+
"batch_scrape",
16+
"check_batch_status",
17+
"extract_data",
18+
"map_website",
19+
"batch_extract"
20+
],
1221
"cta": "Create an API key at https://www.firecrawl.dev/"
1322
}

0 commit comments

Comments
 (0)