diff --git a/README.md b/README.md index 4937b64..4ff3642 100644 --- a/README.md +++ b/README.md @@ -46,6 +46,8 @@ OpenGradient enables developers to build AI applications with verifiable executi - **Verifiable LLM Inference**: Drop-in replacement for OpenAI and Anthropic APIs with cryptographic attestation - **Multi-Provider Support**: Access models from OpenAI, Anthropic, Google, and xAI through a unified interface +- **Native Web Search**: Opt-in `web_search` flag enables each provider's built-in web search, billed per search +- **Image Generation**: Native image-output models ("nano banana") return generated images directly on the response - **TEE Execution**: Trusted Execution Environment inference with cryptographic verification - **Model Hub Integration**: Registry for model discovery, versioning, and deployment - **Consensus-Based Verification**: End-to-end verified AI execution through the OpenGradient network @@ -168,6 +170,37 @@ async for chunk in stream: print(chunk.choices[0].delta.content, end="") ``` +### Native Web Search + +Set `web_search=True` to let the model search the web while answering. Each search is billed per search on top of token usage, at the provider's list price. Supported by OpenAI, Anthropic, Google, and xAI models; other providers ignore the flag. +```python +completion = await llm.chat( + model=og.TEE_LLM.CLAUDE_SONNET_4_6, + messages=[{"role": "user", "content": "What are today's top tech headlines?"}], + max_tokens=500, + web_search=True, +) +print(completion.chat_output["content"]) +``` + +### Image Generation + +Native image-output models ("nano banana") return generated images on the response. The generated images are available in `result.images` as `data:` URIs, while any text caption is in `chat_output["content"]`. Images travel out-of-band and are not part of the signed output hash. +```python +import base64 + +result = await llm.chat( + model=og.TEE_LLM.GEMINI_3_1_FLASH_IMAGE, + messages=[{"role": "user", "content": "A friendly robot reading under a tree"}], + max_tokens=1024, +) + +for i, image in enumerate(result.images or []): + payload = image.split(",", 1)[1] # strip the "data:image/png;base64," prefix + with open(f"image_{i}.png", "wb") as f: + f.write(base64.b64decode(payload)) +``` + ### Verifiable LangChain Integration Use OpenGradient as a drop-in LLM provider for LangChain agents with network-verified execution: @@ -219,6 +252,9 @@ The SDK provides access to models from multiple providers via the `og.TEE_LLM` e - Gemini 2.5 Flash Lite - Gemini 3 Pro - Gemini 3 Flash +- Gemini 3.5 Flash +- Gemini 2.5 Flash Image (native image generation, "nano banana") +- Gemini 3.1 Flash Image (native image generation, "nano banana 2") #### xAI - Grok 4 diff --git a/examples/README.md b/examples/README.md index 4f8c76f..c941cf6 100644 --- a/examples/README.md +++ b/examples/README.md @@ -84,6 +84,30 @@ python examples/llm_tool_calling.py - The model decides when to invoke tools based on the user's query - Uses x402 protocol for payment processing +#### `llm_web_search.py` +Runs an LLM chat completion with native web search enabled. + +```bash +python examples/llm_web_search.py +``` + +**What it does:** +- Sets `web_search=True` to enable the provider's built-in web search +- The model searches the web to answer the query, citing live sources +- Each search is billed per search on top of token usage (supported by OpenAI, Anthropic, Google, and xAI models) + +#### `llm_image_generation.py` +Generates an image using a native image-output model ("nano banana"). + +```bash +python examples/llm_image_generation.py +``` + +**What it does:** +- Calls an image-output model (`og.TEE_LLM.GEMINI_3_1_FLASH_IMAGE`) with a text prompt +- Reads the generated images from `result.images` (data URIs) and writes them to disk +- Image output is billed as completion tokens; images travel out-of-band and are not part of the signed output hash + ## Alpha Testnet Examples Examples for features only available on the **Alpha Testnet** are located in the [`alpha/`](./alpha/) folder. These include: diff --git a/examples/llm_image_generation.py b/examples/llm_image_generation.py new file mode 100644 index 0000000..547ed84 --- /dev/null +++ b/examples/llm_image_generation.py @@ -0,0 +1,52 @@ +import asyncio +import base64 +import logging +import os +import re + +import opengradient as og + +logging.basicConfig() +logging.getLogger("opengradient").setLevel(logging.DEBUG) + +_DATA_URI_RE = re.compile(r"^data:(?P[^;,]+)?(?:;base64)?,(?P.*)$", re.DOTALL) + + +def save_data_uri(data_uri: str, path: str) -> None: + """Decode a ``data:image/...;base64,...`` URI and write it to ``path``.""" + match = _DATA_URI_RE.match(data_uri) + payload = match.group("data") if match else data_uri + with open(path, "wb") as f: + f.write(base64.b64decode(payload)) + + +async def main(): + llm = og.LLM(private_key=os.environ.get("OG_PRIVATE_KEY")) + llm.ensure_opg_approval(min_allowance=0.1) + + messages = [ + {"role": "user", "content": "Generate an image of a friendly robot reading a book under a tree."}, + ] + + # Image-output models ("nano banana") return generated images on the response. + # The text caption (if any) is in chat_output["content"]; the generated images + # are in result.images as data: URIs. Images travel out-of-band and are not part + # of the signed output hash. + result = await llm.chat( + model=og.TEE_LLM.GEMINI_3_1_FLASH_IMAGE, + messages=messages, + max_tokens=1024, + ) + + if result.chat_output and result.chat_output.get("content"): + print(result.chat_output["content"]) + + images = result.images or [] + print(f"Generated {len(images)} image(s)") + for index, image in enumerate(images): + path = f"generated_image_{index + 1}.png" + save_data_uri(image, path) + print(f"Saved {path}") + + +asyncio.run(main()) diff --git a/examples/llm_web_search.py b/examples/llm_web_search.py new file mode 100644 index 0000000..a6940b2 --- /dev/null +++ b/examples/llm_web_search.py @@ -0,0 +1,31 @@ +import asyncio +import logging +import os + +import opengradient as og + +logging.basicConfig() +logging.getLogger("opengradient").setLevel(logging.DEBUG) + + +async def main(): + llm = og.LLM(private_key=os.environ.get("OG_PRIVATE_KEY")) + llm.ensure_opg_approval(min_allowance=0.1) + + messages = [ + {"role": "user", "content": "What are the top technology headlines today? Cite your sources."}, + ] + + # Enable the provider's native web search with web_search=True. Each search is + # billed per search on top of token usage. Web search is supported by OpenAI, + # Anthropic, Google, and xAI models; other providers ignore the flag. + result = await llm.chat( + model=og.TEE_LLM.CLAUDE_SONNET_4_6, + messages=messages, + max_tokens=500, + web_search=True, + ) + print(result.chat_output["content"]) + + +asyncio.run(main()) diff --git a/src/opengradient/agents/og_langchain.py b/src/opengradient/agents/og_langchain.py index cc3ce16..1ce83ff 100644 --- a/src/opengradient/agents/og_langchain.py +++ b/src/opengradient/agents/og_langchain.py @@ -135,6 +135,7 @@ class OpenGradientChatModel(BaseChatModel): model_cid: Union[TEE_LLM, str] max_tokens: int = 300 temperature: float = 0.0 + web_search: bool = False x402_settlement_mode: x402SettlementMode = x402SettlementMode.BATCH_HASHED _llm: LLM = PrivateAttr() @@ -149,6 +150,7 @@ def __init__( model: Optional[Union[TEE_LLM, str]] = None, max_tokens: int = 300, temperature: float = 0.0, + web_search: bool = False, x402_settlement_mode: x402SettlementMode = x402SettlementMode.BATCH_HASHED, client: Optional[LLM] = None, rpc_url: Optional[str] = None, @@ -165,6 +167,7 @@ def __init__( model_cid=resolved_model_cid, max_tokens=max_tokens, temperature=temperature, + web_search=web_search, x402_settlement_mode=x402_settlement_mode, **kwargs, ) @@ -307,6 +310,7 @@ def _build_chat_kwargs( "temperature": kwargs.get("temperature", self.temperature), "tools": kwargs.get("tools", self._tools), "tool_choice": kwargs.get("tool_choice", self._tool_choice), + "web_search": kwargs.get("web_search", self.web_search), "x402_settlement_mode": x402_settlement_mode, "stream": stream, } diff --git a/src/opengradient/cli.py b/src/opengradient/cli.py index c48ce7b..7c3b016 100644 --- a/src/opengradient/cli.py +++ b/src/opengradient/cli.py @@ -2,9 +2,12 @@ import ast import asyncio +import base64 import json import logging +import re import sys +import time import webbrowser from pathlib import Path from typing import Dict, List, Optional @@ -370,6 +373,12 @@ def infer(ctx, model_cid: str, inference_mode: str, input_data, input_file: Path @click.option("--max-tokens", type=int, default=100, help="Maximum number of tokens for LLM completion output") @click.option("--stop-sequence", multiple=True, help="Stop sequences for LLM") @click.option("--temperature", type=float, default=0.0, help="Temperature for LLM inference (0.0 to 1.0)") +@click.option( + "--web-search", + is_flag=True, + default=False, + help="Enable the provider's native web search (billed per search). Supported by OpenAI, Anthropic, Google, and xAI models.", +) @click.option( "--x402-settlement-mode", "x402_settlement_mode", @@ -386,6 +395,7 @@ def completion( max_tokens: int, stop_sequence: List[str], temperature: float, + web_search: bool, ): """ Run completion inference on an LLM model via TEE. @@ -410,6 +420,7 @@ def completion( max_tokens=max_tokens, stop_sequence=list(stop_sequence), temperature=temperature, + web_search=web_search, x402_settlement_mode=x402SettlementModes[x402_settlement_mode], ) ) @@ -491,6 +502,18 @@ def print_llm_completion_result(model_cid, tx_hash, llm_output, is_vanilla=True, "--tools-file", type=click.Path(exists=True, path_type=Path), required=False, help="Path to JSON file containing tool configurations" ) @click.option("--tool-choice", type=str, default="", help="Specific tool choice for the LLM") +@click.option( + "--web-search", + is_flag=True, + default=False, + help="Enable the provider's native web search (billed per search). Supported by OpenAI, Anthropic, Google, and xAI models.", +) +@click.option( + "--image-output-dir", + type=click.Path(file_okay=False, path_type=Path), + default=None, + help="Directory to write images generated by image-output models (e.g. google/gemini-3.1-flash-image). Defaults to the current directory.", +) @click.option( "--x402-settlement-mode", type=click.Choice(x402SettlementModes.keys()), @@ -510,6 +533,8 @@ def chat( tools: Optional[str], tools_file: Optional[Path], tool_choice: Optional[str], + web_search: bool, + image_output_dir: Optional[Path], x402_settlement_mode: Optional[str], stream: bool, ): @@ -597,6 +622,7 @@ def chat( temperature=temperature, tools=parsed_tools, tool_choice=tool_choice, + web_search=web_search, x402_settlement_mode=x402SettlementModes[x402_settlement_mode], stream=stream, ) @@ -604,7 +630,7 @@ def chat( # Handle response based on streaming flag if stream: - print_streaming_chat_result(model_cid, result, is_tee=True) + print_streaming_chat_result(model_cid, result, is_tee=True, image_output_dir=image_output_dir) else: print_llm_chat_result( model_cid, @@ -613,13 +639,72 @@ def chat( result.chat_output, is_vanilla=False, result=result, + image_output_dir=image_output_dir, ) except Exception as e: click.echo(f"Error running LLM chat inference: {str(e)}") -def print_llm_chat_result(model_cid, tx_hash, finish_reason, chat_output, is_vanilla=True, result=None): +_DATA_URI_RE = re.compile(r"^data:(?P[^;,]+)?(?:;base64)?,(?P.*)$", re.DOTALL) + +_MIME_EXTENSIONS = { + "image/png": "png", + "image/jpeg": "jpg", + "image/jpg": "jpg", + "image/webp": "webp", + "image/gif": "gif", +} + + +def _save_generated_images(images, output_dir=None): + """Decode generated image ``data:`` URIs and write them to disk. + + Returns the list of file paths written. Images that cannot be decoded are skipped. + """ + if not images: + return [] + + target_dir = Path(output_dir) if output_dir else Path.cwd() + target_dir.mkdir(parents=True, exist_ok=True) + + prefix = f"og_image_{int(time.time())}" + saved_paths = [] + for index, image in enumerate(images): + match = _DATA_URI_RE.match(image) + if match: + mime = (match.group("mime") or "image/png").strip() + payload = match.group("data") + else: + # Not a data URI; assume raw base64 PNG. + mime = "image/png" + payload = image + try: + raw = base64.b64decode(payload) + except Exception: + click.secho(f" Could not decode generated image #{index + 1}", fg="red") + continue + ext = _MIME_EXTENSIONS.get(mime, "png") + suffix = f"_{index + 1}" if len(images) > 1 else "" + path = target_dir / f"{prefix}{suffix}.{ext}" + path.write_bytes(raw) + saved_paths.append(path) + return saved_paths + + +def _print_generated_images(images, output_dir=None): + """Save any generated images to disk and report their paths.""" + if not images: + return + click.secho(f"Generated {len(images)} image(s):", fg="yellow", bold=True) + saved_paths = _save_generated_images(images, output_dir) + for path in saved_paths: + click.echo(" Saved: ", nl=False) + click.secho(str(path), fg="green") + click.echo() + + +def print_llm_chat_result(model_cid, tx_hash, finish_reason, chat_output, is_vanilla=True, result=None, image_output_dir=None): click.secho("✅ LLM Chat Successful", fg="green", bold=True) click.echo("──────────────────────────────────────") click.echo("Model: ", nl=False) @@ -648,6 +733,9 @@ def print_llm_chat_result(model_cid, tx_hash, finish_reason, chat_output, is_van for key, value in chat_output.items(): if value is None or value in ("", "[]", []): continue + if key == "images": + # Rendered separately below to avoid dumping base64 data URIs. + continue if key == "tool_calls": # Format tool calls the same way as the streaming path click.secho("Tool Calls:", fg="yellow", bold=True) @@ -666,13 +754,16 @@ def print_llm_chat_result(model_cid, tx_hash, finish_reason, chat_output, is_van click.echo(f"{key}: {value}") click.echo() + images = result.images if result is not None else chat_output.get("images") + _print_generated_images(images, image_output_dir) + -def print_streaming_chat_result(model_cid, stream, is_tee=True): +def print_streaming_chat_result(model_cid, stream, is_tee=True, image_output_dir=None): """Handle streaming chat response with typed chunks - prints in real-time""" - asyncio.run(_print_streaming_chat_result_async(model_cid, stream, is_tee)) + asyncio.run(_print_streaming_chat_result_async(model_cid, stream, is_tee, image_output_dir)) -async def _print_streaming_chat_result_async(model_cid, stream, is_tee=True): +async def _print_streaming_chat_result_async(model_cid, stream, is_tee=True, image_output_dir=None): click.secho("🌊 Streaming LLM Chat", fg="green", bold=True) click.echo("──────────────────────────────────────") click.echo("Model: ", nl=False) @@ -726,6 +817,9 @@ async def _print_streaming_chat_result_async(model_cid, stream, is_tee=True): _print_tee_info(chunk.tee_id, chunk.tee_endpoint, chunk.tee_payment_address) + if chunk.images: + _print_generated_images(chunk.images, image_output_dir) + click.echo("──────────────────────────────────────") click.echo(f"Chunks received: {chunk_count}") click.echo(f"Content length: {len(''.join(content_parts))} characters") diff --git a/src/opengradient/client/llm.py b/src/opengradient/client/llm.py index c4308ee..ecb422e 100644 --- a/src/opengradient/client/llm.py +++ b/src/opengradient/client/llm.py @@ -56,6 +56,7 @@ class _ChatParams: tool_choice: Optional[str] response_format: Optional[ResponseFormat] x402_settlement_mode: x402SettlementMode + web_search: bool = False class LLM: @@ -191,6 +192,8 @@ def _chat_payload(self, params: _ChatParams, messages: List[Dict], stream: bool payload["tool_choice"] = params.tool_choice or "auto" if params.response_format: payload["response_format"] = params.response_format.to_dict() + if params.web_search: + payload["web_search"] = True return payload async def _call_with_tee_retry( @@ -265,6 +268,7 @@ async def completion( max_tokens: int = 100, stop_sequence: Optional[List[str]] = None, temperature: float = 0.0, + web_search: bool = False, x402_settlement_mode: x402SettlementMode = x402SettlementMode.BATCH_HASHED, ) -> TextGenerationOutput: """ @@ -276,6 +280,10 @@ async def completion( max_tokens (int): Maximum number of tokens for LLM output. Default is 100. stop_sequence (List[str], optional): List of stop sequences for LLM. Default is None. temperature (float): Temperature for LLM inference, between 0 and 1. Default is 0.0. + web_search (bool, optional): Enable the provider's native web search. When True, + the model can search the web to answer the prompt; each search is billed per + search on top of token usage at the provider's list price. Supported by OpenAI, + Anthropic, Google, and xAI models; other providers ignore the flag. Default is False. x402_settlement_mode (x402SettlementMode, optional): Settlement mode for x402 payments. - PRIVATE: Payment only, no input/output data on-chain (most privacy-preserving). - BATCH_HASHED: Aggregates inferences into a Merkle tree with input/output hashes and signatures (default, most cost-efficient). @@ -300,6 +308,8 @@ async def completion( } if stop_sequence: payload["stop"] = stop_sequence + if web_search: + payload["web_search"] = True async def _request() -> TextGenerationOutput: tee = self._tee.get() @@ -338,6 +348,7 @@ async def chat( tools: Optional[List[Dict]] = None, tool_choice: Optional[str] = None, response_format: Optional[ResponseFormat] = None, + web_search: bool = False, x402_settlement_mode: x402SettlementMode = x402SettlementMode.BATCH_HASHED, stream: bool = False, ) -> Union[TextGenerationOutput, AsyncGenerator[StreamChunk, None]]: @@ -357,6 +368,10 @@ async def chat( by Anthropic models). Use ``ResponseFormat(type="json_schema", json_schema={...})`` to enforce a strict schema (supported by all providers including Anthropic). Defaults to None (plain text). + web_search (bool, optional): Enable the provider's native web search. When True, + the model can search the web while answering; each search is billed per search + on top of token usage at the provider's list price. Supported by OpenAI, + Anthropic, Google, and xAI models; other providers ignore the flag. Default is False. x402_settlement_mode (x402SettlementMode, optional): Settlement mode for x402 payments. - PRIVATE: Payment only, no input/output data on-chain (most privacy-preserving). - BATCH_HASHED: Aggregates inferences into a Merkle tree with input/output hashes and signatures (default, most cost-efficient). @@ -366,8 +381,9 @@ async def chat( Returns: Union[TextGenerationOutput, AsyncGenerator[StreamChunk, None]]: - - If stream=False: TextGenerationOutput with chat_output, data settlement metadata, finish_reason, and payment_hash - - If stream=True: Async generator yielding StreamChunk objects + - If stream=False: TextGenerationOutput with chat_output, data settlement metadata, finish_reason, and payment_hash. + Image-output models (e.g. ``TEE_LLM.GEMINI_3_1_FLASH_IMAGE``) populate ``images`` with the generated images as ``data:`` URIs. + - If stream=True: Async generator yielding StreamChunk objects. The final chunk carries any generated ``images``. Raises: ValueError: If ``response_format="json_object"`` is used with an Anthropic model. @@ -390,6 +406,7 @@ async def chat( tool_choice=tool_choice, response_format=response_format, x402_settlement_mode=x402_settlement_mode, + web_search=web_search, ) if not stream: @@ -441,6 +458,7 @@ async def _request() -> TextGenerationOutput: data_settlement_blob_id=self._data_settlement_blob_id(response), finish_reason=choices[0].get("finish_reason"), chat_output=message, + images=message.get("images"), usage=result.get("usage"), tee_signature=result.get("tee_signature"), tee_timestamp=result.get("tee_timestamp"), @@ -479,6 +497,7 @@ async def _chat_tools_as_stream(self, params: _ChatParams, messages: List[Dict]) tee_payment_address=result.tee_payment_address, data_settlement_transaction_hash=result.data_settlement_transaction_hash, data_settlement_blob_id=result.data_settlement_blob_id, + images=result.images, ) async def _chat_stream(self, params: _ChatParams, messages: List[Dict]) -> AsyncGenerator[StreamChunk, None]: @@ -571,12 +590,9 @@ async def _parse_sse_response(self, response, tee) -> AsyncGenerator[StreamChunk chunk = StreamChunk.from_sse_data(data) if chunk.is_final: chunk.data_settlement_transaction_hash = ( - chunk.data_settlement_transaction_hash - or self._data_settlement_transaction_hash(response) - ) - chunk.data_settlement_blob_id = ( - chunk.data_settlement_blob_id or self._data_settlement_blob_id(response) + chunk.data_settlement_transaction_hash or self._data_settlement_transaction_hash(response) ) + chunk.data_settlement_blob_id = chunk.data_settlement_blob_id or self._data_settlement_blob_id(response) chunk.tee_id = tee.tee_id chunk.tee_endpoint = tee.endpoint chunk.tee_payment_address = tee.payment_address diff --git a/src/opengradient/types.py b/src/opengradient/types.py index 38e54e1..4eb0ea5 100644 --- a/src/opengradient/types.py +++ b/src/opengradient/types.py @@ -241,6 +241,8 @@ class StreamChunk: transaction, present on the final chunk when available. data_settlement_blob_id: Walrus blob ID for individual data settlement, present on the final chunk when available. + images: Generated images returned by image-output models, present on the + final chunk when available. Each entry is a ``data:`` URI. """ choices: List[StreamChoice] @@ -254,6 +256,7 @@ class StreamChunk: tee_payment_address: Optional[str] = None data_settlement_transaction_hash: Optional[str] = None data_settlement_blob_id: Optional[str] = None + images: Optional[List[str]] = None @classmethod def from_sse_data(cls, data: Dict) -> "StreamChunk": @@ -295,6 +298,7 @@ def from_sse_data(cls, data: Dict) -> "StreamChunk": tee_timestamp=data.get("tee_timestamp"), data_settlement_transaction_hash=data.get("data_settlement_transaction_hash"), data_settlement_blob_id=data.get("data_settlement_blob_id"), + images=data.get("images"), ) @@ -443,6 +447,11 @@ class TextGenerationOutput: completion_output: Optional[str] = None """Raw text returned by a completion request.""" + images: Optional[List[str]] = None + """Generated images returned by image-output models (e.g. ``TEE_LLM.GEMINI_3_1_FLASH_IMAGE``). + Each entry is a ``data:`` URI (``data:image/png;base64,...``). ``None`` when the request did + not generate any images. Images travel out-of-band and are not part of the signed output hash.""" + usage: Optional[Dict] = None """Token usage for the request. Contains ``prompt_tokens``, ``completion_tokens``, and ``total_tokens`` when reported by the server.""" @@ -549,6 +558,13 @@ class TEE_LLM(str, Enum): GEMINI_3_FLASH = "google/gemini-3-flash-preview" GEMINI_3_1_PRO_PREVIEW = "google/gemini-3.1-pro-preview" GEMINI_3_1_FLASH_LITE_PREVIEW = "google/gemini-3.1-flash-lite-preview" + GEMINI_3_5_FLASH = "google/gemini-3.5-flash" + + # Google native image-generation models ("nano banana") via TEE. + # These return generated images on the response (see ``TextGenerationOutput.images`` + # and ``StreamChunk.images``) and bill image output as completion tokens. + GEMINI_2_5_FLASH_IMAGE = "google/gemini-2.5-flash-image" + GEMINI_3_1_FLASH_IMAGE = "google/gemini-3.1-flash-image" # xAI Grok models via TEE GROK_4 = "x-ai/grok-4" diff --git a/tests/langchain_adapter_test.py b/tests/langchain_adapter_test.py index b08e205..e48c772 100644 --- a/tests/langchain_adapter_test.py +++ b/tests/langchain_adapter_test.py @@ -239,6 +239,7 @@ def test_passes_correct_params_to_client(self, model, mock_llm_client): temperature=0.0, tools=[], tool_choice=None, + web_search=False, x402_settlement_mode=x402SettlementMode.BATCH_HASHED, stream=False, ) diff --git a/tests/llm_test.py b/tests/llm_test.py index 2918b5a..fdc93bf 100644 --- a/tests/llm_test.py +++ b/tests/llm_test.py @@ -215,6 +215,24 @@ async def test_stop_sequence_omitted_when_none(self, fake_http): payload = fake_http.post_calls[0]["json"] assert "stop" not in payload + async def test_web_search_included_in_payload(self, fake_http): + fake_http.set_response(200, {"completion": "ok"}) + llm = _make_llm() + + await llm.completion(model=TEE_LLM.GPT_5, prompt="Hi", web_search=True) + + payload = fake_http.post_calls[0]["json"] + assert payload["web_search"] is True + + async def test_web_search_omitted_by_default(self, fake_http): + fake_http.set_response(200, {"completion": "ok"}) + llm = _make_llm() + + await llm.completion(model=TEE_LLM.GPT_5, prompt="Hi") + + payload = fake_http.post_calls[0]["json"] + assert "web_search" not in payload + async def test_settlement_mode_header(self, fake_http): fake_http.set_response(200, {"completion": "ok"}) llm = _make_llm() @@ -366,6 +384,70 @@ async def test_tool_choice_defaults_to_auto(self, fake_http): payload = fake_http.post_calls[0]["json"] assert payload["tool_choice"] == "auto" + async def test_web_search_included_in_payload(self, fake_http): + fake_http.set_response( + 200, + { + "choices": [{"message": {"role": "assistant", "content": "ok"}, "finish_reason": "stop"}], + }, + ) + llm = _make_llm() + + await llm.chat(model=TEE_LLM.GPT_5, messages=[{"role": "user", "content": "Hi"}], web_search=True) + + payload = fake_http.post_calls[0]["json"] + assert payload["web_search"] is True + + async def test_web_search_omitted_by_default(self, fake_http): + fake_http.set_response( + 200, + { + "choices": [{"message": {"role": "assistant", "content": "ok"}, "finish_reason": "stop"}], + }, + ) + llm = _make_llm() + + await llm.chat(model=TEE_LLM.GPT_5, messages=[{"role": "user", "content": "Hi"}]) + + payload = fake_http.post_calls[0]["json"] + assert "web_search" not in payload + + async def test_generated_images_surfaced(self, fake_http): + fake_http.set_response( + 200, + { + "choices": [ + { + "message": { + "role": "assistant", + "content": "Here is your image", + "images": ["data:image/png;base64,aGVsbG8="], + }, + "finish_reason": "stop", + } + ], + }, + ) + llm = _make_llm() + + result = await llm.chat(model=TEE_LLM.GEMINI_3_1_FLASH_IMAGE, messages=[{"role": "user", "content": "Draw a cat"}]) + + assert result.images == ["data:image/png;base64,aGVsbG8="] + assert result.chat_output["content"] == "Here is your image" + + async def test_images_none_when_absent(self, fake_http): + fake_http.set_response( + 200, + { + "choices": [{"message": {"role": "assistant", "content": "ok"}, "finish_reason": "stop"}], + }, + ) + llm = _make_llm() + + result = await llm.chat(model=TEE_LLM.GPT_5, messages=[{"role": "user", "content": "Hi"}]) + + assert result.images is None + async def test_empty_choices_raises(self, fake_http): fake_http.set_response(200, {"choices": []}) llm = _make_llm() @@ -505,6 +587,31 @@ async def test_stream_falls_back_to_settlement_headers_when_event_omits_them(sel assert final.data_settlement_transaction_hash == "0xheader" assert final.data_settlement_blob_id == "blob-header" + async def test_stream_surfaces_images_on_final_chunk(self, fake_http): + fake_http.set_stream_response( + 200, + [ + ( + b'data: {"model":"gemini-3.1-flash-image","choices":[{"index":0,"delta":{"content":"caption"},' + b'"finish_reason":"stop"}],"images":["data:image/png;base64,aGVsbG8="]}\n\n' + ), + b"data: [DONE]\n\n", + ], + ) + llm = _make_llm() + + gen = await llm.chat( + model=TEE_LLM.GEMINI_3_1_FLASH_IMAGE, + messages=[{"role": "user", "content": "Draw a cat"}], + stream=True, + ) + + chunks = [chunk async for chunk in gen] + + final = chunks[-1] + assert final.is_final + assert final.images == ["data:image/png;base64,aGVsbG8="] + async def test_stream_error_raises(self, fake_http): fake_http.set_stream_response(500, [b"Internal Server Error"]) llm = _make_llm()