diff --git a/scripts/populate_tox/config.py b/scripts/populate_tox/config.py index 00257164f5..457b52dbb2 100644 --- a/scripts/populate_tox/config.py +++ b/scripts/populate_tox/config.py @@ -238,7 +238,7 @@ "litellm": { "package": "litellm", "deps": { - "*": ["anthropic", "google-genai"], + "*": ["anthropic", "google-genai", "pytest-asyncio"], }, }, "litestar": { diff --git a/sentry_sdk/integrations/litellm.py b/sentry_sdk/integrations/litellm.py index a007fbf7d0..bf07db9239 100644 --- a/sentry_sdk/integrations/litellm.py +++ b/sentry_sdk/integrations/litellm.py @@ -82,7 +82,7 @@ def _input_callback(kwargs: "Dict[str, Any]") -> None: provider = "unknown" call_type = kwargs.get("call_type", None) - if call_type == "embedding": + if call_type == "embedding" or call_type == "aembedding": operation = "embeddings" else: operation = "chat" @@ -170,6 +170,10 @@ def _input_callback(kwargs: "Dict[str, Any]") -> None: set_data_normalized(span, f"gen_ai.litellm.{key}", value) +async def _async_input_callback(kwargs: "Dict[str, Any]") -> None: + return _input_callback(kwargs) + + def _success_callback( kwargs: "Dict[str, Any]", completion_response: "Any", @@ -234,12 +238,30 @@ def _success_callback( is_streaming = kwargs.get("stream") # Callback is fired multiple times when streaming a response. # Streaming flag checked at https://github.com/BerriAI/litellm/blob/33c3f13443eaf990ac8c6e3da78bddbc2b7d0e7a/litellm/litellm_core_utils/litellm_logging.py#L1603 - if is_streaming is not True or "complete_streaming_response" in kwargs: + if ( + is_streaming is not True + or "complete_streaming_response" in kwargs + or "async_complete_streaming_response" in kwargs + ): span = metadata.pop("_sentry_span", None) if span is not None: span.__exit__(None, None, None) +async def _async_success_callback( + kwargs: "Dict[str, Any]", + completion_response: "Any", + start_time: "datetime", + end_time: "datetime", +) -> None: + return _success_callback( + kwargs, + completion_response, + start_time, + end_time, + ) + + def _failure_callback( kwargs: "Dict[str, Any]", exception: Exception, @@ -321,10 +343,14 @@ def setup_once() -> None: litellm.input_callback = input_callback or [] if _input_callback not in litellm.input_callback: litellm.input_callback.append(_input_callback) + if _async_input_callback not in litellm.input_callback: + litellm.input_callback.append(_async_input_callback) litellm.success_callback = success_callback or [] if _success_callback not in litellm.success_callback: litellm.success_callback.append(_success_callback) + if _async_success_callback not in litellm.success_callback: + litellm.success_callback.append(_async_success_callback) litellm.failure_callback = failure_callback or [] if _failure_callback not in litellm.failure_callback: diff --git a/tests/integrations/litellm/test_litellm.py b/tests/integrations/litellm/test_litellm.py index 107e0b29ad..b31016ea02 100644 --- a/tests/integrations/litellm/test_litellm.py +++ b/tests/integrations/litellm/test_litellm.py @@ -2,6 +2,7 @@ import json import pytest import time +import asyncio from unittest import mock from datetime import datetime @@ -31,7 +32,7 @@ async def __call__(self, *args, **kwargs): ) from sentry_sdk.utils import package_version -from openai import OpenAI +from openai import OpenAI, AsyncOpenAI from concurrent.futures import ThreadPoolExecutor @@ -39,7 +40,8 @@ async def __call__(self, *args, **kwargs): from litellm.litellm_core_utils import streaming_handler from litellm.litellm_core_utils import thread_pool_executor from litellm.litellm_core_utils import litellm_logging -from litellm.llms.custom_httpx.http_handler import HTTPHandler +from litellm.litellm_core_utils.logging_worker import GLOBAL_LOGGING_WORKER +from litellm.llms.custom_httpx.http_handler import HTTPHandler, AsyncHTTPHandler LITELLM_VERSION = package_version("litellm") @@ -210,6 +212,89 @@ def test_nonstreaming_chat_completion( assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 +@pytest.mark.asyncio(loop_scope="session") +@pytest.mark.parametrize( + "send_default_pii, include_prompts", + [ + (True, True), + (True, False), + (False, True), + (False, False), + ], +) +async def test_async_nonstreaming_chat_completion( + sentry_init, + capture_events, + send_default_pii, + include_prompts, + get_model_response, + nonstreaming_chat_completions_model_response, +): + sentry_init( + integrations=[LiteLLMIntegration(include_prompts=include_prompts)], + traces_sample_rate=1.0, + send_default_pii=send_default_pii, + ) + events = capture_events() + + messages = [{"role": "user", "content": "Hello!"}] + + client = AsyncOpenAI(api_key="z") + + model_response = get_model_response( + nonstreaming_chat_completions_model_response, + serialize_pydantic=True, + request_headers={"X-Stainless-Raw-Response": "true"}, + ) + + with mock.patch.object( + client.completions._client._client, + "send", + return_value=model_response, + ): + with start_transaction(name="litellm test"): + await litellm.acompletion( + model="gpt-3.5-turbo", + messages=messages, + client=client, + ) + + await GLOBAL_LOGGING_WORKER.flush() + await asyncio.sleep(0.5) + + assert len(events) == 1 + (event,) = events + + assert event["type"] == "transaction" + assert event["transaction"] == "litellm test" + + chat_spans = list( + x + for x in event["spans"] + if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm" + ) + assert len(chat_spans) == 1 + span = chat_spans[0] + + assert span["op"] == OP.GEN_AI_CHAT + assert span["description"] == "chat gpt-3.5-turbo" + assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "gpt-3.5-turbo" + assert span["data"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "gpt-3.5-turbo" + assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai" + assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat" + + if send_default_pii and include_prompts: + assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT in span["data"] + else: + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] + + assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10 + assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 + assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 + + @pytest.mark.parametrize( "send_default_pii, include_prompts", [ @@ -281,6 +366,81 @@ def test_streaming_chat_completion( assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True +@pytest.mark.asyncio(loop_scope="session") +@pytest.mark.parametrize( + "send_default_pii, include_prompts", + [ + (True, True), + (True, False), + (False, True), + (False, False), + ], +) +async def test_async_streaming_chat_completion( + sentry_init, + capture_events, + send_default_pii, + include_prompts, + get_model_response, + async_iterator, + server_side_event_chunks, + streaming_chat_completions_model_response, +): + sentry_init( + integrations=[LiteLLMIntegration(include_prompts=include_prompts)], + traces_sample_rate=1.0, + send_default_pii=send_default_pii, + ) + events = capture_events() + + messages = [{"role": "user", "content": "Hello!"}] + + client = AsyncOpenAI(api_key="z") + + model_response = get_model_response( + async_iterator( + server_side_event_chunks( + streaming_chat_completions_model_response, + include_event_type=False, + ), + ), + request_headers={"X-Stainless-Raw-Response": "true"}, + ) + + with mock.patch.object( + client.completions._client._client, + "send", + return_value=model_response, + ): + with start_transaction(name="litellm test"): + response = await litellm.acompletion( + model="gpt-3.5-turbo", + messages=messages, + client=client, + stream=True, + ) + async for _ in response: + pass + + await GLOBAL_LOGGING_WORKER.flush() + await asyncio.sleep(0.5) + + assert len(events) == 1 + (event,) = events + + assert event["type"] == "transaction" + chat_spans = list( + x + for x in event["spans"] + if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm" + ) + assert len(chat_spans) == 1 + span = chat_spans[0] + + assert span["op"] == OP.GEN_AI_CHAT + assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + + def test_embeddings_create( sentry_init, capture_events, @@ -347,6 +507,74 @@ def test_embeddings_create( assert json.loads(embeddings_input) == ["Hello, world!"] +@pytest.mark.asyncio(loop_scope="session") +async def test_async_embeddings_create( + sentry_init, + capture_events, + get_model_response, + openai_embedding_model_response, + clear_litellm_cache, +): + """ + Test that litellm.embedding() calls are properly instrumented. + + This test calls the actual litellm.embedding() function (not just callbacks) + to ensure proper integration testing. + """ + sentry_init( + integrations=[LiteLLMIntegration(include_prompts=True)], + traces_sample_rate=1.0, + send_default_pii=True, + ) + events = capture_events() + + client = AsyncOpenAI(api_key="z") + + model_response = get_model_response( + openai_embedding_model_response, + serialize_pydantic=True, + request_headers={"X-Stainless-Raw-Response": "true"}, + ) + + with mock.patch.object( + client.embeddings._client._client, + "send", + return_value=model_response, + ): + with start_transaction(name="litellm test"): + response = await litellm.aembedding( + model="text-embedding-ada-002", + input="Hello, world!", + client=client, + ) + + await GLOBAL_LOGGING_WORKER.flush() + await asyncio.sleep(0.5) + + # Response is processed by litellm, so just check it exists + assert response is not None + assert len(events) == 1 + (event,) = events + + assert event["type"] == "transaction" + spans = list( + x + for x in event["spans"] + if x["op"] == OP.GEN_AI_EMBEDDINGS and x["origin"] == "auto.ai.litellm" + ) + assert len(spans) == 1 + span = spans[0] + + assert span["op"] == OP.GEN_AI_EMBEDDINGS + assert span["description"] == "embeddings text-embedding-ada-002" + assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings" + assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 5 + assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-ada-002" + # Check that embeddings input is captured (it's JSON serialized) + embeddings_input = span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] + assert json.loads(embeddings_input) == ["Hello, world!"] + + def test_embeddings_create_with_list_input( sentry_init, capture_events, @@ -409,6 +637,70 @@ def test_embeddings_create_with_list_input( ] +@pytest.mark.asyncio(loop_scope="session") +async def test_async_embeddings_create_with_list_input( + sentry_init, + capture_events, + get_model_response, + openai_embedding_model_response, + clear_litellm_cache, +): + """Test embedding with list input.""" + sentry_init( + integrations=[LiteLLMIntegration(include_prompts=True)], + traces_sample_rate=1.0, + send_default_pii=True, + ) + events = capture_events() + + client = AsyncOpenAI(api_key="z") + + model_response = get_model_response( + openai_embedding_model_response, + serialize_pydantic=True, + request_headers={"X-Stainless-Raw-Response": "true"}, + ) + + with mock.patch.object( + client.embeddings._client._client, + "send", + return_value=model_response, + ): + with start_transaction(name="litellm test"): + response = await litellm.aembedding( + model="text-embedding-ada-002", + input=["First text", "Second text", "Third text"], + client=client, + ) + + await GLOBAL_LOGGING_WORKER.flush() + await asyncio.sleep(0.5) + + # Response is processed by litellm, so just check it exists + assert response is not None + assert len(events) == 1 + (event,) = events + + assert event["type"] == "transaction" + spans = list( + x + for x in event["spans"] + if x["op"] == OP.GEN_AI_EMBEDDINGS and x["origin"] == "auto.ai.litellm" + ) + assert len(spans) == 1 + span = spans[0] + + assert span["op"] == OP.GEN_AI_EMBEDDINGS + assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings" + # Check that list of embeddings input is captured (it's JSON serialized) + embeddings_input = span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT] + assert json.loads(embeddings_input) == [ + "First text", + "Second text", + "Third text", + ] + + def test_embeddings_no_pii( sentry_init, capture_events, @@ -465,6 +757,64 @@ def test_embeddings_no_pii( assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in span["data"] +@pytest.mark.asyncio(loop_scope="session") +async def test_async_embeddings_no_pii( + sentry_init, + capture_events, + get_model_response, + openai_embedding_model_response, + clear_litellm_cache, +): + """Test that PII is not captured when disabled.""" + sentry_init( + integrations=[LiteLLMIntegration(include_prompts=True)], + traces_sample_rate=1.0, + send_default_pii=False, # PII disabled + ) + events = capture_events() + + client = AsyncOpenAI(api_key="z") + + model_response = get_model_response( + openai_embedding_model_response, + serialize_pydantic=True, + request_headers={"X-Stainless-Raw-Response": "true"}, + ) + + with mock.patch.object( + client.embeddings._client._client, + "send", + return_value=model_response, + ): + with start_transaction(name="litellm test"): + response = await litellm.aembedding( + model="text-embedding-ada-002", + input="Hello, world!", + client=client, + ) + + await GLOBAL_LOGGING_WORKER.flush() + await asyncio.sleep(0.5) + + # Response is processed by litellm, so just check it exists + assert response is not None + assert len(events) == 1 + (event,) = events + + assert event["type"] == "transaction" + spans = list( + x + for x in event["spans"] + if x["op"] == OP.GEN_AI_EMBEDDINGS and x["origin"] == "auto.ai.litellm" + ) + assert len(spans) == 1 + span = spans[0] + + assert span["op"] == OP.GEN_AI_EMBEDDINGS + # Check that embeddings input is NOT captured when PII is disabled + assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in span["data"] + + def test_exception_handling( reset_litellm_executor, sentry_init, capture_events, get_rate_limit_model_response ): @@ -500,6 +850,42 @@ def test_exception_handling( assert len(error_events) == 1 +@pytest.mark.asyncio(loop_scope="session") +async def test_async_exception_handling( + sentry_init, capture_events, get_rate_limit_model_response +): + sentry_init( + integrations=[LiteLLMIntegration()], + traces_sample_rate=1.0, + ) + events = capture_events() + + messages = [{"role": "user", "content": "Hello!"}] + + client = AsyncOpenAI(api_key="z") + + model_response = get_rate_limit_model_response() + + with mock.patch.object( + client.embeddings._client._client, + "send", + return_value=model_response, + ): + with start_transaction(name="litellm test"): + with pytest.raises(litellm.RateLimitError): + await litellm.acompletion( + model="gpt-3.5-turbo", + messages=messages, + client=client, + ) + + # Should have error event and transaction + assert len(events) >= 1 + # Find the error event + error_events = [e for e in events if e.get("level") == "error"] + assert len(error_events) == 1 + + def test_span_origin( reset_litellm_executor, sentry_init, @@ -637,8 +1023,163 @@ def test_multiple_providers( assert SPANDATA.GEN_AI_SYSTEM in span["data"] -def test_additional_parameters( - reset_litellm_executor, +@pytest.mark.asyncio(loop_scope="session") +async def test_async_multiple_providers( + sentry_init, + capture_events, + get_model_response, + nonstreaming_chat_completions_model_response, + nonstreaming_anthropic_model_response, + nonstreaming_google_genai_model_response, +): + """Test that the integration correctly identifies different providers.""" + sentry_init( + integrations=[LiteLLMIntegration()], + traces_sample_rate=1.0, + ) + events = capture_events() + + messages = [{"role": "user", "content": "Hello!"}] + + openai_client = AsyncOpenAI(api_key="z") + openai_model_response = get_model_response( + nonstreaming_chat_completions_model_response, + serialize_pydantic=True, + request_headers={"X-Stainless-Raw-Response": "true"}, + ) + + with mock.patch.object( + openai_client.completions._client._client, + "send", + return_value=openai_model_response, + ): + with start_transaction(name="test gpt-3.5-turbo"): + await litellm.acompletion( + model="gpt-3.5-turbo", + messages=messages, + client=openai_client, + ) + + await GLOBAL_LOGGING_WORKER.flush() + await asyncio.sleep(0.5) + + _reset_litellm_executor() + + anthropic_client = AsyncHTTPHandler() + anthropic_model_response = get_model_response( + nonstreaming_anthropic_model_response, + serialize_pydantic=True, + request_headers={"X-Stainless-Raw-Response": "True"}, + ) + + with mock.patch.object( + anthropic_client, + "post", + return_value=anthropic_model_response, + ): + with start_transaction(name="test claude-3-opus-20240229"): + await litellm.acompletion( + model="claude-3-opus-20240229", + messages=messages, + client=anthropic_client, + api_key="z", + ) + + await GLOBAL_LOGGING_WORKER.flush() + await asyncio.sleep(0.5) + + _reset_litellm_executor() + + gemini_client = AsyncHTTPHandler() + gemini_model_response = get_model_response( + nonstreaming_google_genai_model_response, + serialize_pydantic=True, + ) + + with mock.patch.object( + gemini_client, + "post", + return_value=gemini_model_response, + ): + with start_transaction(name="test gemini/gemini-pro"): + await litellm.acompletion( + model="gemini/gemini-pro", + messages=messages, + client=gemini_client, + api_key="z", + ) + + await GLOBAL_LOGGING_WORKER.flush() + await asyncio.sleep(0.5) + + assert len(events) == 3 + + for i in range(3): + span = events[i]["spans"][0] + # The provider should be detected by litellm.get_llm_provider + assert SPANDATA.GEN_AI_SYSTEM in span["data"] + + +def test_additional_parameters( + reset_litellm_executor, + sentry_init, + capture_events, + get_model_response, + nonstreaming_chat_completions_model_response, +): + """Test that additional parameters are captured.""" + sentry_init( + integrations=[LiteLLMIntegration()], + traces_sample_rate=1.0, + ) + events = capture_events() + + messages = [{"role": "user", "content": "Hello!"}] + client = OpenAI(api_key="z") + + model_response = get_model_response( + nonstreaming_chat_completions_model_response, + serialize_pydantic=True, + request_headers={"X-Stainless-Raw-Response": "true"}, + ) + + with mock.patch.object( + client.completions._client._client, + "send", + return_value=model_response, + ): + with start_transaction(name="litellm test"): + litellm.completion( + model="gpt-3.5-turbo", + messages=messages, + client=client, + temperature=0.7, + max_tokens=100, + top_p=0.9, + frequency_penalty=0.5, + presence_penalty=0.5, + ) + + litellm_utils.executor.shutdown(wait=True) + + (event,) = events + chat_spans = list( + x + for x in event["spans"] + if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm" + ) + assert len(chat_spans) == 1 + span = chat_spans[0] + + assert span["data"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7 + assert span["data"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100 + assert span["data"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9 + assert span["data"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.5 + assert span["data"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.5 + + +@pytest.mark.asyncio(loop_scope="session") +async def test_async_additional_parameters( sentry_init, capture_events, get_model_response, @@ -652,7 +1193,7 @@ def test_additional_parameters( events = capture_events() messages = [{"role": "user", "content": "Hello!"}] - client = OpenAI(api_key="z") + client = AsyncOpenAI(api_key="z") model_response = get_model_response( nonstreaming_chat_completions_model_response, @@ -666,7 +1207,7 @@ def test_additional_parameters( return_value=model_response, ): with start_transaction(name="litellm test"): - litellm.completion( + await litellm.acompletion( model="gpt-3.5-turbo", messages=messages, client=client, @@ -677,7 +1218,8 @@ def test_additional_parameters( presence_penalty=0.5, ) - litellm_utils.executor.shutdown(wait=True) + await GLOBAL_LOGGING_WORKER.flush() + await asyncio.sleep(0.5) (event,) = events chat_spans = list( @@ -778,6 +1320,54 @@ def test_no_integration( assert len(chat_spans) == 0 +@pytest.mark.asyncio(loop_scope="session") +async def test_async_no_integration( + sentry_init, + capture_events, + get_model_response, + nonstreaming_chat_completions_model_response, +): + """Test that when integration is not enabled, callbacks don't break.""" + sentry_init( + traces_sample_rate=1.0, + ) + events = capture_events() + + messages = [{"role": "user", "content": "Hello!"}] + client = AsyncOpenAI(api_key="z") + + model_response = get_model_response( + nonstreaming_chat_completions_model_response, + serialize_pydantic=True, + request_headers={"X-Stainless-Raw-Response": "true"}, + ) + + with mock.patch.object( + client.completions._client._client, + "send", + return_value=model_response, + ): + with start_transaction(name="litellm test"): + await litellm.acompletion( + model="gpt-3.5-turbo", + messages=messages, + client=client, + ) + + await GLOBAL_LOGGING_WORKER.flush() + await asyncio.sleep(0.5) + + (event,) = events + # Should still have the transaction, but no child spans since integration is off + assert event["type"] == "transaction" + chat_spans = list( + x + for x in event["spans"] + if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm" + ) + assert len(chat_spans) == 0 + + def test_response_without_usage(sentry_init, capture_events): """Test handling of responses without usage information.""" sentry_init( @@ -973,6 +1563,85 @@ def test_binary_content_encoding_image_url( ) +@pytest.mark.asyncio(loop_scope="session") +async def test_async_binary_content_encoding_image_url( + sentry_init, + capture_events, + get_model_response, + nonstreaming_chat_completions_model_response, +): + sentry_init( + integrations=[LiteLLMIntegration(include_prompts=True)], + traces_sample_rate=1.0, + send_default_pii=True, + ) + events = capture_events() + + messages = [ + { + "role": "user", + "content": [ + {"type": "text", "text": "Look at this image:"}, + { + "type": "image_url", + "image_url": {"url": IMAGE_DATA_URI, "detail": "high"}, + }, + ], + } + ] + client = AsyncOpenAI(api_key="z") + + model_response = get_model_response( + nonstreaming_chat_completions_model_response, + serialize_pydantic=True, + request_headers={"X-Stainless-Raw-Response": "true"}, + ) + + with mock.patch.object( + client.completions._client._client, + "send", + return_value=model_response, + ): + with start_transaction(name="litellm test"): + await litellm.acompletion( + model="gpt-4-vision-preview", + messages=messages, + client=client, + custom_llm_provider="openai", + ) + + await GLOBAL_LOGGING_WORKER.flush() + await asyncio.sleep(0.5) + + (event,) = events + chat_spans = list( + x + for x in event["spans"] + if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm" + ) + assert len(chat_spans) == 1 + span = chat_spans[0] + messages_data = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + + blob_item = next( + ( + item + for msg in messages_data + if "content" in msg + for item in msg["content"] + if item.get("type") == "blob" + ), + None, + ) + assert blob_item is not None + assert blob_item["modality"] == "image" + assert blob_item["mime_type"] == "image/png" + assert ( + IMAGE_B64 in blob_item["content"] + or blob_item["content"] == BLOB_DATA_SUBSTITUTE + ) + + def test_binary_content_encoding_mixed_content( reset_litellm_executor, sentry_init, @@ -1040,6 +1709,74 @@ def test_binary_content_encoding_mixed_content( assert any(item.get("type") == "blob" for item in content_items) +@pytest.mark.asyncio(loop_scope="session") +async def test_async_binary_content_encoding_mixed_content( + sentry_init, + capture_events, + get_model_response, + nonstreaming_chat_completions_model_response, +): + sentry_init( + integrations=[LiteLLMIntegration(include_prompts=True)], + traces_sample_rate=1.0, + send_default_pii=True, + ) + events = capture_events() + + messages = [ + { + "role": "user", + "content": [ + {"type": "text", "text": "Here is an image:"}, + { + "type": "image_url", + "image_url": {"url": IMAGE_DATA_URI}, + }, + {"type": "text", "text": "What do you see?"}, + ], + } + ] + client = AsyncOpenAI(api_key="z") + + model_response = get_model_response( + nonstreaming_chat_completions_model_response, + serialize_pydantic=True, + request_headers={"X-Stainless-Raw-Response": "true"}, + ) + + with mock.patch.object( + client.completions._client._client, + "send", + return_value=model_response, + ): + with start_transaction(name="litellm test"): + await litellm.acompletion( + model="gpt-4-vision-preview", + messages=messages, + client=client, + custom_llm_provider="openai", + ) + + await GLOBAL_LOGGING_WORKER.flush() + await asyncio.sleep(0.5) + + (event,) = events + chat_spans = list( + x + for x in event["spans"] + if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm" + ) + assert len(chat_spans) == 1 + span = chat_spans[0] + messages_data = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + + content_items = [ + item for msg in messages_data if "content" in msg for item in msg["content"] + ] + assert any(item.get("type") == "text" for item in content_items) + assert any(item.get("type") == "blob" for item in content_items) + + def test_binary_content_encoding_uri_type( reset_litellm_executor, sentry_init, @@ -1112,6 +1849,79 @@ def test_binary_content_encoding_uri_type( assert uri_item["uri"] == "https://example.com/image.jpg" +@pytest.mark.asyncio(loop_scope="session") +async def test_async_binary_content_encoding_uri_type( + sentry_init, + capture_events, + get_model_response, + nonstreaming_chat_completions_model_response, +): + sentry_init( + integrations=[LiteLLMIntegration(include_prompts=True)], + traces_sample_rate=1.0, + send_default_pii=True, + ) + events = capture_events() + + messages = [ + { + "role": "user", + "content": [ + { + "type": "image_url", + "image_url": {"url": "https://example.com/image.jpg"}, + } + ], + } + ] + client = AsyncOpenAI(api_key="z") + + model_response = get_model_response( + nonstreaming_chat_completions_model_response, + serialize_pydantic=True, + request_headers={"X-Stainless-Raw-Response": "true"}, + ) + + with mock.patch.object( + client.completions._client._client, + "send", + return_value=model_response, + ): + with start_transaction(name="litellm test"): + await litellm.acompletion( + model="gpt-4-vision-preview", + messages=messages, + client=client, + custom_llm_provider="openai", + ) + + await GLOBAL_LOGGING_WORKER.flush() + await asyncio.sleep(0.5) + + (event,) = events + chat_spans = list( + x + for x in event["spans"] + if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm" + ) + assert len(chat_spans) == 1 + span = chat_spans[0] + messages_data = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + + uri_item = next( + ( + item + for msg in messages_data + if "content" in msg + for item in msg["content"] + if item.get("type") == "uri" + ), + None, + ) + assert uri_item is not None + assert uri_item["uri"] == "https://example.com/image.jpg" + + def test_convert_message_parts_direct(): messages = [ { diff --git a/tox.ini b/tox.ini index cd5d939f6d..9a974973a2 100644 --- a/tox.ini +++ b/tox.ini @@ -552,6 +552,7 @@ deps = litellm-latest: litellm==1.83.4 litellm: anthropic litellm: google-genai + litellm: pytest-asyncio openai-base-v1.0.1: openai==1.0.1 openai-base-v1.109.1: openai==1.109.1