Skip to content

Commit a99694f

Browse files
committed
Add vision and firecrawl tools
1 parent 1237877 commit a99694f

3 files changed

Lines changed: 107 additions & 47 deletions

File tree

agentstack/_tools/agentql/__init__.py

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -17,32 +17,32 @@ def query_data(url: str, query: Optional[str], prompt: Optional[str]) -> dict:
1717
1818
AgentQL query to scrape the url.
1919
20-
Here is a guide on AgentQL query syntax:
20+
Here is a guide on AgentQL query syntax:
2121
22-
Enclose all AgentQL query terms within curly braces `{}`. The following query structure isn't valid because the term "social\_media\_links" is wrongly enclosed within parenthesis `()`.
22+
Enclose all AgentQL query terms within curly braces `{}`. The following query structure isn't valid because the term "social/media/links" is wrongly enclosed within parenthesis `()`.
2323
24-
```
25-
( # Should be {
26-
social_media_links(The icons that lead to Facebook, Snapchat, etc.)[]
27-
) # Should be }
28-
```
24+
```
25+
( # Should be {
26+
social_media_links(The icons that lead to Facebook, Snapchat, etc.)[]
27+
) # Should be }
28+
```
2929
30-
The following query is also invalid since its missing the curly braces `{}`
30+
The following query is also invalid since its missing the curly braces `{}`
3131
32-
```
33-
# should include {
34-
social_media_links(The icons that lead to Facebook, Snapchat, etc.)[]
35-
# should include }
36-
```
32+
```
33+
# should include {
34+
social_media_links(The icons that lead to Facebook, Snapchat, etc.)[]
35+
# should include }
36+
```
3737
38-
You can't include new lines in your semantic context. The following query structure isn't valid because the semantic context isn't contained within one line.
38+
You can't include new lines in your semantic context. The following query structure isn't valid because the semantic context isn't contained within one line.
3939
40-
```
41-
{
42-
social_media_links(The icons that lead
43-
to Facebook, Snapchat, etc.)[]
44-
}
45-
```
40+
```
41+
{
42+
social_media_links(The icons that lead
43+
to Facebook, Snapchat, etc.)[]
44+
}
45+
```
4646
"""
4747
payload = {
4848
"url": url,
Lines changed: 81 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,51 +1,68 @@
1-
"""Vision tool for analyzing images using OpenAI's Vision API."""
1+
"""Vision tool for analyzing images using OpenAI's Vision API and Claude."""
22

33
import base64
4-
from typing import Optional
4+
from typing import Optional, Literal
55
import requests
66
from openai import OpenAI
7+
import anthropic
78

89
__all__ = ["analyze_image"]
910

1011

11-
def analyze_image(image_path_url: str) -> str:
12+
def analyze_image(image_path_url: str, model: Literal["openai", "claude"] = "openai") -> str:
1213
"""
13-
Analyze an image using OpenAI's Vision API.
14+
Analyze an image using either OpenAI's Vision API or Claude.
1415
1516
Args:
1617
image_path_url: Local path or URL to the image
18+
model: Which model to use ("openai" or "claude"). Defaults to "openai"
1719
1820
Returns:
1921
str: Description of the image contents
2022
"""
21-
client = OpenAI()
22-
2323
if not image_path_url:
2424
return "Image Path or URL is required."
2525

26-
if "http" in image_path_url:
27-
return _analyze_web_image(client, image_path_url)
28-
return _analyze_local_image(client, image_path_url)
26+
if model == "openai":
27+
client = OpenAI()
28+
if "http" in image_path_url:
29+
return _analyze_web_image_openai(client, image_path_url)
30+
return _analyze_local_image_openai(client, image_path_url)
31+
elif model == "claude":
32+
client = Anthropic()
33+
if "http" in image_path_url:
34+
return _analyze_web_image_claude(client, image_path_url)
35+
return _analyze_local_image_claude(client, image_path_url)
36+
else:
37+
raise ValueError("Model must be either 'openai' or 'claude'")
2938

3039

31-
def _analyze_web_image(client: OpenAI, image_path_url: str) -> str:
32-
response = client.chat.completions.create(
33-
model="gpt-4-vision-preview",
34-
messages=[
35-
{
40+
def _analyze_web_image_openai(client: OpenAI, image_url: str) -> str:
41+
"""Analyze a web-hosted image using OpenAI's Vision API."""
42+
try:
43+
response = client.chat.completions.create(
44+
model="gpt-4-vision-preview",
45+
messages=[{
3646
"role": "user",
3747
"content": [
38-
{"type": "text", "text": "What's in this image?"},
39-
{"type": "image_url", "image_url": {"url": image_path_url}},
40-
],
41-
}
42-
],
43-
max_tokens=300,
44-
)
45-
return response.choices[0].message.content # type: ignore[return-value]
48+
{
49+
"type": "text",
50+
"text": "What's in this image?"
51+
},
52+
{
53+
"type": "image_url",
54+
"image_url": {"url": image_url}
55+
}
56+
]
57+
}],
58+
max_tokens=300
59+
)
60+
return response.choices[0].message.content or "No description available"
61+
except Exception as e:
62+
return f"Error analyzing image: {str(e)}"
4663

4764

48-
def _analyze_local_image(client: OpenAI, image_path: str) -> str:
65+
def _analyze_local_image_openai(client: OpenAI, image_path: str) -> str:
4966
base64_image = _encode_image(image_path)
5067
headers = {"Content-Type": "application/json", "Authorization": f"Bearer {client.api_key}"}
5168
payload = {
@@ -65,6 +82,46 @@ def _analyze_local_image(client: OpenAI, image_path: str) -> str:
6582
return response.json()["choices"][0]["message"]["content"]
6683

6784

85+
def _analyze_web_image_claude(client: Anthropic, image_path_url: str) -> str:
86+
response = client.messages.create(
87+
model="claude-3-opus-20240229",
88+
max_tokens=300,
89+
messages=[{
90+
"role": "user",
91+
"content": [
92+
{"type": "text", "text": "What's in this image?"},
93+
{"type": "image", "source": {"type": "url", "url": image_path_url}}
94+
]
95+
}]
96+
)
97+
return response.content[0].text
98+
99+
100+
def _analyze_local_image_claude(client: Anthropic, image_path: str) -> str:
101+
with open(image_path, "rb") as image_file:
102+
media_data = image_file.read()
103+
104+
response = client.messages.create(
105+
model="claude-3-opus-20240229",
106+
max_tokens=300,
107+
messages=[{
108+
"role": "user",
109+
"content": [
110+
{"type": "text", "text": "What's in this image?"},
111+
{
112+
"type": "image",
113+
"source": {
114+
"type": "base64",
115+
"media_type": "image/jpeg",
116+
"data": base64.b64encode(media_data).decode()
117+
}
118+
}
119+
]
120+
}]
121+
)
122+
return response.content[0].text
123+
124+
68125
def _encode_image(image_path: str) -> str:
69126
with open(image_path, "rb") as image_file:
70-
return base64.b64encode(image_file.read()).decode("utf-8")
127+
return base64.b64encode(image_file.read()).decode("utf-8")

agentstack/_tools/vision/config.json

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,10 @@
66
},
77
"dependencies": [
88
"openai>=1.0.0",
9-
"requests>=2.31.0"
9+
"requests>=2.31.0",
10+
"anthropic"
1011
],
11-
"tools": ["analyze_image"]
12-
}
12+
"tools": [
13+
"analyze_image"
14+
]
15+
}

0 commit comments

Comments
 (0)