diff --git a/scripts/populate_tox/config.py b/scripts/populate_tox/config.py
index 00257164f5..457b52dbb2 100644
--- a/scripts/populate_tox/config.py
+++ b/scripts/populate_tox/config.py
@@ -238,7 +238,7 @@
     "litellm": {
         "package": "litellm",
         "deps": {
-            "*": ["anthropic", "google-genai"],
+            "*": ["anthropic", "google-genai", "pytest-asyncio"],
         },
     },
     "litestar": {
diff --git a/sentry_sdk/integrations/litellm.py b/sentry_sdk/integrations/litellm.py
index a007fbf7d0..bf07db9239 100644
--- a/sentry_sdk/integrations/litellm.py
+++ b/sentry_sdk/integrations/litellm.py
@@ -82,7 +82,7 @@ def _input_callback(kwargs: "Dict[str, Any]") -> None:
         provider = "unknown"
 
     call_type = kwargs.get("call_type", None)
-    if call_type == "embedding":
+    if call_type == "embedding" or call_type == "aembedding":
         operation = "embeddings"
     else:
         operation = "chat"
@@ -170,6 +170,10 @@ def _input_callback(kwargs: "Dict[str, Any]") -> None:
             set_data_normalized(span, f"gen_ai.litellm.{key}", value)
 
 
+async def _async_input_callback(kwargs: "Dict[str, Any]") -> None:
+    return _input_callback(kwargs)
+
+
 def _success_callback(
     kwargs: "Dict[str, Any]",
     completion_response: "Any",
@@ -234,12 +238,30 @@ def _success_callback(
         is_streaming = kwargs.get("stream")
         # Callback is fired multiple times when streaming a response.
         # Streaming flag checked at https://github.com/BerriAI/litellm/blob/33c3f13443eaf990ac8c6e3da78bddbc2b7d0e7a/litellm/litellm_core_utils/litellm_logging.py#L1603
-        if is_streaming is not True or "complete_streaming_response" in kwargs:
+        if (
+            is_streaming is not True
+            or "complete_streaming_response" in kwargs
+            or "async_complete_streaming_response" in kwargs
+        ):
             span = metadata.pop("_sentry_span", None)
             if span is not None:
                 span.__exit__(None, None, None)
 
 
+async def _async_success_callback(
+    kwargs: "Dict[str, Any]",
+    completion_response: "Any",
+    start_time: "datetime",
+    end_time: "datetime",
+) -> None:
+    return _success_callback(
+        kwargs,
+        completion_response,
+        start_time,
+        end_time,
+    )
+
+
 def _failure_callback(
     kwargs: "Dict[str, Any]",
     exception: Exception,
@@ -321,10 +343,14 @@ def setup_once() -> None:
         litellm.input_callback = input_callback or []
         if _input_callback not in litellm.input_callback:
             litellm.input_callback.append(_input_callback)
+        if _async_input_callback not in litellm.input_callback:
+            litellm.input_callback.append(_async_input_callback)
 
         litellm.success_callback = success_callback or []
         if _success_callback not in litellm.success_callback:
             litellm.success_callback.append(_success_callback)
+        if _async_success_callback not in litellm.success_callback:
+            litellm.success_callback.append(_async_success_callback)
 
         litellm.failure_callback = failure_callback or []
         if _failure_callback not in litellm.failure_callback:
diff --git a/tests/integrations/litellm/test_litellm.py b/tests/integrations/litellm/test_litellm.py
index 107e0b29ad..b31016ea02 100644
--- a/tests/integrations/litellm/test_litellm.py
+++ b/tests/integrations/litellm/test_litellm.py
@@ -2,6 +2,7 @@
 import json
 import pytest
 import time
+import asyncio
 from unittest import mock
 from datetime import datetime
 
@@ -31,7 +32,7 @@ async def __call__(self, *args, **kwargs):
 )
 from sentry_sdk.utils import package_version
 
-from openai import OpenAI
+from openai import OpenAI, AsyncOpenAI
 
 from concurrent.futures import ThreadPoolExecutor
 
@@ -39,7 +40,8 @@ async def __call__(self, *args, **kwargs):
 from litellm.litellm_core_utils import streaming_handler
 from litellm.litellm_core_utils import thread_pool_executor
 from litellm.litellm_core_utils import litellm_logging
-from litellm.llms.custom_httpx.http_handler import HTTPHandler
+from litellm.litellm_core_utils.logging_worker import GLOBAL_LOGGING_WORKER
+from litellm.llms.custom_httpx.http_handler import HTTPHandler, AsyncHTTPHandler
 
 
 LITELLM_VERSION = package_version("litellm")
@@ -210,6 +212,89 @@ def test_nonstreaming_chat_completion(
     assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
 
 
+@pytest.mark.asyncio(loop_scope="session")
+@pytest.mark.parametrize(
+    "send_default_pii, include_prompts",
+    [
+        (True, True),
+        (True, False),
+        (False, True),
+        (False, False),
+    ],
+)
+async def test_async_nonstreaming_chat_completion(
+    sentry_init,
+    capture_events,
+    send_default_pii,
+    include_prompts,
+    get_model_response,
+    nonstreaming_chat_completions_model_response,
+):
+    sentry_init(
+        integrations=[LiteLLMIntegration(include_prompts=include_prompts)],
+        traces_sample_rate=1.0,
+        send_default_pii=send_default_pii,
+    )
+    events = capture_events()
+
+    messages = [{"role": "user", "content": "Hello!"}]
+
+    client = AsyncOpenAI(api_key="z")
+
+    model_response = get_model_response(
+        nonstreaming_chat_completions_model_response,
+        serialize_pydantic=True,
+        request_headers={"X-Stainless-Raw-Response": "true"},
+    )
+
+    with mock.patch.object(
+        client.completions._client._client,
+        "send",
+        return_value=model_response,
+    ):
+        with start_transaction(name="litellm test"):
+            await litellm.acompletion(
+                model="gpt-3.5-turbo",
+                messages=messages,
+                client=client,
+            )
+
+            await GLOBAL_LOGGING_WORKER.flush()
+            await asyncio.sleep(0.5)
+
+    assert len(events) == 1
+    (event,) = events
+
+    assert event["type"] == "transaction"
+    assert event["transaction"] == "litellm test"
+
+    chat_spans = list(
+        x
+        for x in event["spans"]
+        if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm"
+    )
+    assert len(chat_spans) == 1
+    span = chat_spans[0]
+
+    assert span["op"] == OP.GEN_AI_CHAT
+    assert span["description"] == "chat gpt-3.5-turbo"
+    assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "gpt-3.5-turbo"
+    assert span["data"][SPANDATA.GEN_AI_RESPONSE_MODEL] == "gpt-3.5-turbo"
+    assert span["data"][SPANDATA.GEN_AI_SYSTEM] == "openai"
+    assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "chat"
+
+    if send_default_pii and include_prompts:
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"]
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT in span["data"]
+    else:
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
+
+    assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 10
+    assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20
+    assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30
+
+
 @pytest.mark.parametrize(
     "send_default_pii, include_prompts",
     [
@@ -281,6 +366,81 @@ def test_streaming_chat_completion(
     assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
 
 
+@pytest.mark.asyncio(loop_scope="session")
+@pytest.mark.parametrize(
+    "send_default_pii, include_prompts",
+    [
+        (True, True),
+        (True, False),
+        (False, True),
+        (False, False),
+    ],
+)
+async def test_async_streaming_chat_completion(
+    sentry_init,
+    capture_events,
+    send_default_pii,
+    include_prompts,
+    get_model_response,
+    async_iterator,
+    server_side_event_chunks,
+    streaming_chat_completions_model_response,
+):
+    sentry_init(
+        integrations=[LiteLLMIntegration(include_prompts=include_prompts)],
+        traces_sample_rate=1.0,
+        send_default_pii=send_default_pii,
+    )
+    events = capture_events()
+
+    messages = [{"role": "user", "content": "Hello!"}]
+
+    client = AsyncOpenAI(api_key="z")
+
+    model_response = get_model_response(
+        async_iterator(
+            server_side_event_chunks(
+                streaming_chat_completions_model_response,
+                include_event_type=False,
+            ),
+        ),
+        request_headers={"X-Stainless-Raw-Response": "true"},
+    )
+
+    with mock.patch.object(
+        client.completions._client._client,
+        "send",
+        return_value=model_response,
+    ):
+        with start_transaction(name="litellm test"):
+            response = await litellm.acompletion(
+                model="gpt-3.5-turbo",
+                messages=messages,
+                client=client,
+                stream=True,
+            )
+            async for _ in response:
+                pass
+
+            await GLOBAL_LOGGING_WORKER.flush()
+            await asyncio.sleep(0.5)
+
+    assert len(events) == 1
+    (event,) = events
+
+    assert event["type"] == "transaction"
+    chat_spans = list(
+        x
+        for x in event["spans"]
+        if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm"
+    )
+    assert len(chat_spans) == 1
+    span = chat_spans[0]
+
+    assert span["op"] == OP.GEN_AI_CHAT
+    assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True
+
+
 def test_embeddings_create(
     sentry_init,
     capture_events,
@@ -347,6 +507,74 @@ def test_embeddings_create(
         assert json.loads(embeddings_input) == ["Hello, world!"]
 
 
+@pytest.mark.asyncio(loop_scope="session")
+async def test_async_embeddings_create(
+    sentry_init,
+    capture_events,
+    get_model_response,
+    openai_embedding_model_response,
+    clear_litellm_cache,
+):
+    """
+    Test that litellm.embedding() calls are properly instrumented.
+
+    This test calls the actual litellm.embedding() function (not just callbacks)
+    to ensure proper integration testing.
+    """
+    sentry_init(
+        integrations=[LiteLLMIntegration(include_prompts=True)],
+        traces_sample_rate=1.0,
+        send_default_pii=True,
+    )
+    events = capture_events()
+
+    client = AsyncOpenAI(api_key="z")
+
+    model_response = get_model_response(
+        openai_embedding_model_response,
+        serialize_pydantic=True,
+        request_headers={"X-Stainless-Raw-Response": "true"},
+    )
+
+    with mock.patch.object(
+        client.embeddings._client._client,
+        "send",
+        return_value=model_response,
+    ):
+        with start_transaction(name="litellm test"):
+            response = await litellm.aembedding(
+                model="text-embedding-ada-002",
+                input="Hello, world!",
+                client=client,
+            )
+
+            await GLOBAL_LOGGING_WORKER.flush()
+            await asyncio.sleep(0.5)
+
+        # Response is processed by litellm, so just check it exists
+        assert response is not None
+        assert len(events) == 1
+        (event,) = events
+
+        assert event["type"] == "transaction"
+        spans = list(
+            x
+            for x in event["spans"]
+            if x["op"] == OP.GEN_AI_EMBEDDINGS and x["origin"] == "auto.ai.litellm"
+        )
+        assert len(spans) == 1
+        span = spans[0]
+
+        assert span["op"] == OP.GEN_AI_EMBEDDINGS
+        assert span["description"] == "embeddings text-embedding-ada-002"
+        assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings"
+        assert span["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS] == 5
+        assert span["data"][SPANDATA.GEN_AI_REQUEST_MODEL] == "text-embedding-ada-002"
+        # Check that embeddings input is captured (it's JSON serialized)
+        embeddings_input = span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
+        assert json.loads(embeddings_input) == ["Hello, world!"]
+
+
 def test_embeddings_create_with_list_input(
     sentry_init,
     capture_events,
@@ -409,6 +637,70 @@ def test_embeddings_create_with_list_input(
         ]
 
 
+@pytest.mark.asyncio(loop_scope="session")
+async def test_async_embeddings_create_with_list_input(
+    sentry_init,
+    capture_events,
+    get_model_response,
+    openai_embedding_model_response,
+    clear_litellm_cache,
+):
+    """Test embedding with list input."""
+    sentry_init(
+        integrations=[LiteLLMIntegration(include_prompts=True)],
+        traces_sample_rate=1.0,
+        send_default_pii=True,
+    )
+    events = capture_events()
+
+    client = AsyncOpenAI(api_key="z")
+
+    model_response = get_model_response(
+        openai_embedding_model_response,
+        serialize_pydantic=True,
+        request_headers={"X-Stainless-Raw-Response": "true"},
+    )
+
+    with mock.patch.object(
+        client.embeddings._client._client,
+        "send",
+        return_value=model_response,
+    ):
+        with start_transaction(name="litellm test"):
+            response = await litellm.aembedding(
+                model="text-embedding-ada-002",
+                input=["First text", "Second text", "Third text"],
+                client=client,
+            )
+
+            await GLOBAL_LOGGING_WORKER.flush()
+            await asyncio.sleep(0.5)
+
+        # Response is processed by litellm, so just check it exists
+        assert response is not None
+        assert len(events) == 1
+        (event,) = events
+
+        assert event["type"] == "transaction"
+        spans = list(
+            x
+            for x in event["spans"]
+            if x["op"] == OP.GEN_AI_EMBEDDINGS and x["origin"] == "auto.ai.litellm"
+        )
+        assert len(spans) == 1
+        span = spans[0]
+
+        assert span["op"] == OP.GEN_AI_EMBEDDINGS
+        assert span["data"][SPANDATA.GEN_AI_OPERATION_NAME] == "embeddings"
+        # Check that list of embeddings input is captured (it's JSON serialized)
+        embeddings_input = span["data"][SPANDATA.GEN_AI_EMBEDDINGS_INPUT]
+        assert json.loads(embeddings_input) == [
+            "First text",
+            "Second text",
+            "Third text",
+        ]
+
+
 def test_embeddings_no_pii(
     sentry_init,
     capture_events,
@@ -465,6 +757,64 @@ def test_embeddings_no_pii(
         assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in span["data"]
 
 
+@pytest.mark.asyncio(loop_scope="session")
+async def test_async_embeddings_no_pii(
+    sentry_init,
+    capture_events,
+    get_model_response,
+    openai_embedding_model_response,
+    clear_litellm_cache,
+):
+    """Test that PII is not captured when disabled."""
+    sentry_init(
+        integrations=[LiteLLMIntegration(include_prompts=True)],
+        traces_sample_rate=1.0,
+        send_default_pii=False,  # PII disabled
+    )
+    events = capture_events()
+
+    client = AsyncOpenAI(api_key="z")
+
+    model_response = get_model_response(
+        openai_embedding_model_response,
+        serialize_pydantic=True,
+        request_headers={"X-Stainless-Raw-Response": "true"},
+    )
+
+    with mock.patch.object(
+        client.embeddings._client._client,
+        "send",
+        return_value=model_response,
+    ):
+        with start_transaction(name="litellm test"):
+            response = await litellm.aembedding(
+                model="text-embedding-ada-002",
+                input="Hello, world!",
+                client=client,
+            )
+
+            await GLOBAL_LOGGING_WORKER.flush()
+            await asyncio.sleep(0.5)
+
+        # Response is processed by litellm, so just check it exists
+        assert response is not None
+        assert len(events) == 1
+        (event,) = events
+
+        assert event["type"] == "transaction"
+        spans = list(
+            x
+            for x in event["spans"]
+            if x["op"] == OP.GEN_AI_EMBEDDINGS and x["origin"] == "auto.ai.litellm"
+        )
+        assert len(spans) == 1
+        span = spans[0]
+
+        assert span["op"] == OP.GEN_AI_EMBEDDINGS
+        # Check that embeddings input is NOT captured when PII is disabled
+        assert SPANDATA.GEN_AI_EMBEDDINGS_INPUT not in span["data"]
+
+
 def test_exception_handling(
     reset_litellm_executor, sentry_init, capture_events, get_rate_limit_model_response
 ):
@@ -500,6 +850,42 @@ def test_exception_handling(
     assert len(error_events) == 1
 
 
+@pytest.mark.asyncio(loop_scope="session")
+async def test_async_exception_handling(
+    sentry_init, capture_events, get_rate_limit_model_response
+):
+    sentry_init(
+        integrations=[LiteLLMIntegration()],
+        traces_sample_rate=1.0,
+    )
+    events = capture_events()
+
+    messages = [{"role": "user", "content": "Hello!"}]
+
+    client = AsyncOpenAI(api_key="z")
+
+    model_response = get_rate_limit_model_response()
+
+    with mock.patch.object(
+        client.embeddings._client._client,
+        "send",
+        return_value=model_response,
+    ):
+        with start_transaction(name="litellm test"):
+            with pytest.raises(litellm.RateLimitError):
+                await litellm.acompletion(
+                    model="gpt-3.5-turbo",
+                    messages=messages,
+                    client=client,
+                )
+
+    # Should have error event and transaction
+    assert len(events) >= 1
+    # Find the error event
+    error_events = [e for e in events if e.get("level") == "error"]
+    assert len(error_events) == 1
+
+
 def test_span_origin(
     reset_litellm_executor,
     sentry_init,
@@ -637,8 +1023,163 @@ def test_multiple_providers(
         assert SPANDATA.GEN_AI_SYSTEM in span["data"]
 
 
-def test_additional_parameters(
-    reset_litellm_executor,
+@pytest.mark.asyncio(loop_scope="session")
+async def test_async_multiple_providers(
+    sentry_init,
+    capture_events,
+    get_model_response,
+    nonstreaming_chat_completions_model_response,
+    nonstreaming_anthropic_model_response,
+    nonstreaming_google_genai_model_response,
+):
+    """Test that the integration correctly identifies different providers."""
+    sentry_init(
+        integrations=[LiteLLMIntegration()],
+        traces_sample_rate=1.0,
+    )
+    events = capture_events()
+
+    messages = [{"role": "user", "content": "Hello!"}]
+
+    openai_client = AsyncOpenAI(api_key="z")
+    openai_model_response = get_model_response(
+        nonstreaming_chat_completions_model_response,
+        serialize_pydantic=True,
+        request_headers={"X-Stainless-Raw-Response": "true"},
+    )
+
+    with mock.patch.object(
+        openai_client.completions._client._client,
+        "send",
+        return_value=openai_model_response,
+    ):
+        with start_transaction(name="test gpt-3.5-turbo"):
+            await litellm.acompletion(
+                model="gpt-3.5-turbo",
+                messages=messages,
+                client=openai_client,
+            )
+
+            await GLOBAL_LOGGING_WORKER.flush()
+            await asyncio.sleep(0.5)
+
+    _reset_litellm_executor()
+
+    anthropic_client = AsyncHTTPHandler()
+    anthropic_model_response = get_model_response(
+        nonstreaming_anthropic_model_response,
+        serialize_pydantic=True,
+        request_headers={"X-Stainless-Raw-Response": "True"},
+    )
+
+    with mock.patch.object(
+        anthropic_client,
+        "post",
+        return_value=anthropic_model_response,
+    ):
+        with start_transaction(name="test claude-3-opus-20240229"):
+            await litellm.acompletion(
+                model="claude-3-opus-20240229",
+                messages=messages,
+                client=anthropic_client,
+                api_key="z",
+            )
+
+            await GLOBAL_LOGGING_WORKER.flush()
+            await asyncio.sleep(0.5)
+
+    _reset_litellm_executor()
+
+    gemini_client = AsyncHTTPHandler()
+    gemini_model_response = get_model_response(
+        nonstreaming_google_genai_model_response,
+        serialize_pydantic=True,
+    )
+
+    with mock.patch.object(
+        gemini_client,
+        "post",
+        return_value=gemini_model_response,
+    ):
+        with start_transaction(name="test gemini/gemini-pro"):
+            await litellm.acompletion(
+                model="gemini/gemini-pro",
+                messages=messages,
+                client=gemini_client,
+                api_key="z",
+            )
+
+            await GLOBAL_LOGGING_WORKER.flush()
+            await asyncio.sleep(0.5)
+
+    assert len(events) == 3
+
+    for i in range(3):
+        span = events[i]["spans"][0]
+        # The provider should be detected by litellm.get_llm_provider
+        assert SPANDATA.GEN_AI_SYSTEM in span["data"]
+
+
+def test_additional_parameters(
+    reset_litellm_executor,
+    sentry_init,
+    capture_events,
+    get_model_response,
+    nonstreaming_chat_completions_model_response,
+):
+    """Test that additional parameters are captured."""
+    sentry_init(
+        integrations=[LiteLLMIntegration()],
+        traces_sample_rate=1.0,
+    )
+    events = capture_events()
+
+    messages = [{"role": "user", "content": "Hello!"}]
+    client = OpenAI(api_key="z")
+
+    model_response = get_model_response(
+        nonstreaming_chat_completions_model_response,
+        serialize_pydantic=True,
+        request_headers={"X-Stainless-Raw-Response": "true"},
+    )
+
+    with mock.patch.object(
+        client.completions._client._client,
+        "send",
+        return_value=model_response,
+    ):
+        with start_transaction(name="litellm test"):
+            litellm.completion(
+                model="gpt-3.5-turbo",
+                messages=messages,
+                client=client,
+                temperature=0.7,
+                max_tokens=100,
+                top_p=0.9,
+                frequency_penalty=0.5,
+                presence_penalty=0.5,
+            )
+
+            litellm_utils.executor.shutdown(wait=True)
+
+    (event,) = events
+    chat_spans = list(
+        x
+        for x in event["spans"]
+        if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm"
+    )
+    assert len(chat_spans) == 1
+    span = chat_spans[0]
+
+    assert span["data"][SPANDATA.GEN_AI_REQUEST_TEMPERATURE] == 0.7
+    assert span["data"][SPANDATA.GEN_AI_REQUEST_MAX_TOKENS] == 100
+    assert span["data"][SPANDATA.GEN_AI_REQUEST_TOP_P] == 0.9
+    assert span["data"][SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY] == 0.5
+    assert span["data"][SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY] == 0.5
+
+
+@pytest.mark.asyncio(loop_scope="session")
+async def test_async_additional_parameters(
     sentry_init,
     capture_events,
     get_model_response,
@@ -652,7 +1193,7 @@ def test_additional_parameters(
     events = capture_events()
 
     messages = [{"role": "user", "content": "Hello!"}]
-    client = OpenAI(api_key="z")
+    client = AsyncOpenAI(api_key="z")
 
     model_response = get_model_response(
         nonstreaming_chat_completions_model_response,
@@ -666,7 +1207,7 @@ def test_additional_parameters(
         return_value=model_response,
     ):
         with start_transaction(name="litellm test"):
-            litellm.completion(
+            await litellm.acompletion(
                 model="gpt-3.5-turbo",
                 messages=messages,
                 client=client,
@@ -677,7 +1218,8 @@ def test_additional_parameters(
                 presence_penalty=0.5,
             )
 
-            litellm_utils.executor.shutdown(wait=True)
+            await GLOBAL_LOGGING_WORKER.flush()
+            await asyncio.sleep(0.5)
 
     (event,) = events
     chat_spans = list(
@@ -778,6 +1320,54 @@ def test_no_integration(
     assert len(chat_spans) == 0
 
 
+@pytest.mark.asyncio(loop_scope="session")
+async def test_async_no_integration(
+    sentry_init,
+    capture_events,
+    get_model_response,
+    nonstreaming_chat_completions_model_response,
+):
+    """Test that when integration is not enabled, callbacks don't break."""
+    sentry_init(
+        traces_sample_rate=1.0,
+    )
+    events = capture_events()
+
+    messages = [{"role": "user", "content": "Hello!"}]
+    client = AsyncOpenAI(api_key="z")
+
+    model_response = get_model_response(
+        nonstreaming_chat_completions_model_response,
+        serialize_pydantic=True,
+        request_headers={"X-Stainless-Raw-Response": "true"},
+    )
+
+    with mock.patch.object(
+        client.completions._client._client,
+        "send",
+        return_value=model_response,
+    ):
+        with start_transaction(name="litellm test"):
+            await litellm.acompletion(
+                model="gpt-3.5-turbo",
+                messages=messages,
+                client=client,
+            )
+
+            await GLOBAL_LOGGING_WORKER.flush()
+            await asyncio.sleep(0.5)
+
+    (event,) = events
+    # Should still have the transaction, but no child spans since integration is off
+    assert event["type"] == "transaction"
+    chat_spans = list(
+        x
+        for x in event["spans"]
+        if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm"
+    )
+    assert len(chat_spans) == 0
+
+
 def test_response_without_usage(sentry_init, capture_events):
     """Test handling of responses without usage information."""
     sentry_init(
@@ -973,6 +1563,85 @@ def test_binary_content_encoding_image_url(
     )
 
 
+@pytest.mark.asyncio(loop_scope="session")
+async def test_async_binary_content_encoding_image_url(
+    sentry_init,
+    capture_events,
+    get_model_response,
+    nonstreaming_chat_completions_model_response,
+):
+    sentry_init(
+        integrations=[LiteLLMIntegration(include_prompts=True)],
+        traces_sample_rate=1.0,
+        send_default_pii=True,
+    )
+    events = capture_events()
+
+    messages = [
+        {
+            "role": "user",
+            "content": [
+                {"type": "text", "text": "Look at this image:"},
+                {
+                    "type": "image_url",
+                    "image_url": {"url": IMAGE_DATA_URI, "detail": "high"},
+                },
+            ],
+        }
+    ]
+    client = AsyncOpenAI(api_key="z")
+
+    model_response = get_model_response(
+        nonstreaming_chat_completions_model_response,
+        serialize_pydantic=True,
+        request_headers={"X-Stainless-Raw-Response": "true"},
+    )
+
+    with mock.patch.object(
+        client.completions._client._client,
+        "send",
+        return_value=model_response,
+    ):
+        with start_transaction(name="litellm test"):
+            await litellm.acompletion(
+                model="gpt-4-vision-preview",
+                messages=messages,
+                client=client,
+                custom_llm_provider="openai",
+            )
+
+            await GLOBAL_LOGGING_WORKER.flush()
+            await asyncio.sleep(0.5)
+
+    (event,) = events
+    chat_spans = list(
+        x
+        for x in event["spans"]
+        if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm"
+    )
+    assert len(chat_spans) == 1
+    span = chat_spans[0]
+    messages_data = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+
+    blob_item = next(
+        (
+            item
+            for msg in messages_data
+            if "content" in msg
+            for item in msg["content"]
+            if item.get("type") == "blob"
+        ),
+        None,
+    )
+    assert blob_item is not None
+    assert blob_item["modality"] == "image"
+    assert blob_item["mime_type"] == "image/png"
+    assert (
+        IMAGE_B64 in blob_item["content"]
+        or blob_item["content"] == BLOB_DATA_SUBSTITUTE
+    )
+
+
 def test_binary_content_encoding_mixed_content(
     reset_litellm_executor,
     sentry_init,
@@ -1040,6 +1709,74 @@ def test_binary_content_encoding_mixed_content(
     assert any(item.get("type") == "blob" for item in content_items)
 
 
+@pytest.mark.asyncio(loop_scope="session")
+async def test_async_binary_content_encoding_mixed_content(
+    sentry_init,
+    capture_events,
+    get_model_response,
+    nonstreaming_chat_completions_model_response,
+):
+    sentry_init(
+        integrations=[LiteLLMIntegration(include_prompts=True)],
+        traces_sample_rate=1.0,
+        send_default_pii=True,
+    )
+    events = capture_events()
+
+    messages = [
+        {
+            "role": "user",
+            "content": [
+                {"type": "text", "text": "Here is an image:"},
+                {
+                    "type": "image_url",
+                    "image_url": {"url": IMAGE_DATA_URI},
+                },
+                {"type": "text", "text": "What do you see?"},
+            ],
+        }
+    ]
+    client = AsyncOpenAI(api_key="z")
+
+    model_response = get_model_response(
+        nonstreaming_chat_completions_model_response,
+        serialize_pydantic=True,
+        request_headers={"X-Stainless-Raw-Response": "true"},
+    )
+
+    with mock.patch.object(
+        client.completions._client._client,
+        "send",
+        return_value=model_response,
+    ):
+        with start_transaction(name="litellm test"):
+            await litellm.acompletion(
+                model="gpt-4-vision-preview",
+                messages=messages,
+                client=client,
+                custom_llm_provider="openai",
+            )
+
+            await GLOBAL_LOGGING_WORKER.flush()
+            await asyncio.sleep(0.5)
+
+    (event,) = events
+    chat_spans = list(
+        x
+        for x in event["spans"]
+        if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm"
+    )
+    assert len(chat_spans) == 1
+    span = chat_spans[0]
+    messages_data = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+
+    content_items = [
+        item for msg in messages_data if "content" in msg for item in msg["content"]
+    ]
+    assert any(item.get("type") == "text" for item in content_items)
+    assert any(item.get("type") == "blob" for item in content_items)
+
+
 def test_binary_content_encoding_uri_type(
     reset_litellm_executor,
     sentry_init,
@@ -1112,6 +1849,79 @@ def test_binary_content_encoding_uri_type(
     assert uri_item["uri"] == "https://example.com/image.jpg"
 
 
+@pytest.mark.asyncio(loop_scope="session")
+async def test_async_binary_content_encoding_uri_type(
+    sentry_init,
+    capture_events,
+    get_model_response,
+    nonstreaming_chat_completions_model_response,
+):
+    sentry_init(
+        integrations=[LiteLLMIntegration(include_prompts=True)],
+        traces_sample_rate=1.0,
+        send_default_pii=True,
+    )
+    events = capture_events()
+
+    messages = [
+        {
+            "role": "user",
+            "content": [
+                {
+                    "type": "image_url",
+                    "image_url": {"url": "https://example.com/image.jpg"},
+                }
+            ],
+        }
+    ]
+    client = AsyncOpenAI(api_key="z")
+
+    model_response = get_model_response(
+        nonstreaming_chat_completions_model_response,
+        serialize_pydantic=True,
+        request_headers={"X-Stainless-Raw-Response": "true"},
+    )
+
+    with mock.patch.object(
+        client.completions._client._client,
+        "send",
+        return_value=model_response,
+    ):
+        with start_transaction(name="litellm test"):
+            await litellm.acompletion(
+                model="gpt-4-vision-preview",
+                messages=messages,
+                client=client,
+                custom_llm_provider="openai",
+            )
+
+            await GLOBAL_LOGGING_WORKER.flush()
+            await asyncio.sleep(0.5)
+
+    (event,) = events
+    chat_spans = list(
+        x
+        for x in event["spans"]
+        if x["op"] == OP.GEN_AI_CHAT and x["origin"] == "auto.ai.litellm"
+    )
+    assert len(chat_spans) == 1
+    span = chat_spans[0]
+    messages_data = json.loads(span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+
+    uri_item = next(
+        (
+            item
+            for msg in messages_data
+            if "content" in msg
+            for item in msg["content"]
+            if item.get("type") == "uri"
+        ),
+        None,
+    )
+    assert uri_item is not None
+    assert uri_item["uri"] == "https://example.com/image.jpg"
+
+
 def test_convert_message_parts_direct():
     messages = [
         {
diff --git a/tox.ini b/tox.ini
index cd5d939f6d..9a974973a2 100644
--- a/tox.ini
+++ b/tox.ini
@@ -552,6 +552,7 @@ deps =
     litellm-latest: litellm==1.83.4
     litellm: anthropic
     litellm: google-genai
+    litellm: pytest-asyncio
 
     openai-base-v1.0.1: openai==1.0.1
     openai-base-v1.109.1: openai==1.109.1