11import os
22from firecrawl import FirecrawlApp
3-
3+ from typing import List , Dict , Any , Optional
44app = FirecrawlApp (api_key = os .getenv ('FIRECRAWL_API_KEY' ))
55
66
@@ -38,3 +38,104 @@ def retrieve_web_crawl(crawl_id: str):
3838 will tell you if the crawl is finished. If it is not, wait some more time then try again.
3939 """
4040 return app .check_crawl_status (crawl_id )
41+
42+
43+ def batch_scrape (urls : List [str ], formats : List [str ] = ['markdown' , 'html' ]):
44+ """
45+ Batch scrape multiple URLs simultaneously.
46+
47+ Args:
48+ urls: List of URLs to scrape
49+ formats: List of desired output formats (e.g., ['markdown', 'html'])
50+
51+ Returns:
52+ Dictionary containing the batch scrape results
53+ """
54+ batch_result = app .batch_scrape_urls (urls , {'formats' : formats })
55+ return batch_result
56+
57+
58+ def async_batch_scrape (urls : List [str ], formats : List [str ] = ['markdown' , 'html' ]):
59+ """
60+ Asynchronously batch scrape multiple URLs.
61+
62+ Args:
63+ urls: List of URLs to scrape
64+ formats: List of desired output formats (e.g., ['markdown', 'html'])
65+
66+ Returns:
67+ Dictionary containing the job ID and status URL
68+ """
69+ batch_job = app .async_batch_scrape_urls (urls , {'formats' : formats })
70+ return batch_job
71+
72+
73+ def check_batch_status (job_id : str ):
74+ """
75+ Check the status of an asynchronous batch scrape job.
76+
77+ Args:
78+ job_id: The ID of the batch scrape job
79+
80+ Returns:
81+ Dictionary containing the current status and results if completed
82+ """
83+ return app .check_batch_scrape_status (job_id )
84+
85+
86+ def extract_data (urls : List [str ], schema : Dict [str , Any ] = None , prompt : str = None ):
87+ """
88+ Extract structured data from URLs using LLMs.
89+
90+ Args:
91+ urls: List of URLs to extract data from
92+ schema: Optional JSON schema defining the structure of data to extract
93+ prompt: Optional natural language prompt describing the data to extract
94+
95+ Returns:
96+ Dictionary containing the extracted structured data
97+ """
98+ params = {
99+ 'prompt' : prompt
100+ } if prompt else {
101+ 'schema' : schema
102+ }
103+
104+ data = app .extract (urls , params )
105+ return data
106+
107+
108+ def map_website (url : str , search : Optional [str ] = None ):
109+ """
110+ Map a website to get all URLs, with optional search functionality.
111+
112+ Args:
113+ url: The base URL to map
114+ search: Optional search term to filter URLs
115+
116+ Returns:
117+ Dictionary containing the list of discovered URLs
118+ """
119+ params = {'search' : search } if search else {}
120+ map_result = app .map_url (url , params )
121+ return map_result
122+
123+
124+ def batch_extract (urls : List [str ], extract_params : Dict [str , Any ]):
125+ """
126+ Batch extract structured data from multiple URLs.
127+
128+ Args:
129+ urls: List of URLs to extract data from
130+ extract_params: Dictionary containing extraction parameters including prompt or schema
131+
132+ Returns:
133+ Dictionary containing the extracted data from all URLs
134+ """
135+ params = {
136+ 'formats' : ['extract' ],
137+ 'extract' : extract_params
138+ }
139+
140+ batch_result = app .batch_scrape_urls (urls , params )
141+ return batch_result
0 commit comments