Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 31 additions & 9 deletions python/packages/kagent-adk/src/kagent/adk/models/_openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@
if TYPE_CHECKING:
from google.adk.models.llm_request import LlmRequest

# Emitted when a guardrail or content filter blocks a response, leaving no content to surface.
_CONTENT_BLOCKED_PLACEHOLDER = "Response blocked by content policy."


def _convert_role_to_openai(role: Optional[str]) -> str:
"""Convert google.genai role to OpenAI role."""
Expand Down Expand Up @@ -316,6 +319,24 @@ def _convert_tools_to_openai(tools: list[types.Tool]) -> list[ChatCompletionTool

def _convert_openai_response_to_llm_response(response: ChatCompletion) -> LlmResponse:
"""Convert OpenAI response to LlmResponse."""
# Handle usage metadata
usage_metadata = None
if hasattr(response, "usage") and response.usage:
usage_metadata = types.GenerateContentResponseUsageMetadata(
prompt_token_count=response.usage.prompt_tokens,
candidates_token_count=response.usage.completion_tokens,
total_token_count=response.usage.total_tokens,
)

if not response.choices:
return LlmResponse(
content=types.Content(
role="model",
parts=[types.Part.from_text(text=_CONTENT_BLOCKED_PLACEHOLDER)],
),
usage_metadata=usage_metadata,
finish_reason=types.FinishReason.SAFETY,
)
Comment on lines +331 to +339

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed in f49c71d — usage extraction is now hoisted above the empty-choices guard and included in the early return.

choice = response.choices[0]
message = choice.message

Expand Down Expand Up @@ -346,15 +367,6 @@ def _convert_openai_response_to_llm_response(response: ChatCompletion) -> LlmRes

content = types.Content(role="model", parts=parts)

# Handle usage metadata
usage_metadata = None
if hasattr(response, "usage") and response.usage:
usage_metadata = types.GenerateContentResponseUsageMetadata(
prompt_token_count=response.usage.prompt_tokens,
candidates_token_count=response.usage.completion_tokens,
total_token_count=response.usage.total_tokens,
)

# Handle finish reason
finish_reason = types.FinishReason.STOP
if choice.finish_reason == "length":
Expand Down Expand Up @@ -582,6 +594,16 @@ async def generate_content_async(
elif finish_reason == "tool_calls":
final_reason = types.FinishReason.STOP # Tool calls is a normal completion

# Guardrail or content filter can produce zero content/tool chunks.
# An empty parts list causes downstream IndexError; emit a placeholder.
if not final_parts:
if final_reason == types.FinishReason.MAX_TOKENS:
# Truncated by length before any content; not a safety block.
final_parts.append(types.Part.from_text(text=""))
else:
final_parts.append(types.Part.from_text(text=_CONTENT_BLOCKED_PLACEHOLDER))
final_reason = types.FinishReason.SAFETY
Comment on lines +597 to +605

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Keeping the broad fallback intentionally: the motivating case is AWS Bedrock guardrails (via agentgateway), which block responses without setting finish_reason == "content_filter" — narrowing to an explicit filter signal would reintroduce the original crash for exactly the case this PR fixes. A stop finish with literally zero content and zero tool calls does not occur in normal operation, so treating it as a filtered response is the safer default. MAX_TOKENS is preserved since that one has a legitimate empty-content interpretation.


# Always yield final response to signal completion and valid metadata
final_content = types.Content(role="model", parts=final_parts)
yield LlmResponse(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -526,6 +526,65 @@ async def gen():
assert final_response.usage_metadata.total_token_count == 15


@pytest.mark.asyncio
async def test_streaming_with_no_content_chunks_returns_safety_finish_reason(openai_llm, llm_request):
"""A stream where every chunk is filtered (e.g. by a guardrail) should not raise IndexError."""

class EmptyContentDelta:
role = "assistant"
tool_calls = None
content = None

class EmptyContentChunkChoice:
def __init__(self, finish_reason=None):
self.delta = EmptyContentDelta()
self.finish_reason = finish_reason
self.index = 0

class EmptyContentChunk:
id = "chatcmpl-test"
created = 1234567890
model = "gpt-3.5-turbo"
object = "chat.completion.chunk"
usage = None

def __init__(self, finish_reason=None):
self.choices = [EmptyContentChunkChoice(finish_reason)]

with mock.patch.object(openai_llm, "_client") as mock_client:

async def mock_stream_gen_func(*args, **kwargs):
async def gen():
yield EmptyContentChunk(finish_reason="content_filter")

return gen()

mock_client.chat.completions.create.side_effect = mock_stream_gen_func

stream_results = [resp async for resp in openai_llm.generate_content_async(llm_request, stream=True)]

final_response = stream_results[-1]
assert final_response.finish_reason == types.FinishReason.SAFETY
assert final_response.content.parts[0].text == "Response blocked by content policy."

# An empty stream truncated by length keeps MAX_TOKENS instead of being marked SAFETY.
with mock.patch.object(openai_llm, "_client") as mock_client:

async def mock_length_stream_gen_func(*args, **kwargs):
async def gen():
yield EmptyContentChunk(finish_reason="length")

return gen()

mock_client.chat.completions.create.side_effect = mock_length_stream_gen_func

stream_results = [resp async for resp in openai_llm.generate_content_async(llm_request, stream=True)]

final_response = stream_results[-1]
assert final_response.finish_reason == types.FinishReason.MAX_TOKENS
assert final_response.content.parts[0].text == ""


# ============================================================================
# SSL/TLS Configuration Tests
# ============================================================================
Expand Down Expand Up @@ -933,6 +992,22 @@ def __init__(self, message):
self.choices = [TestConvertOpenAIResponseToLlmResponse._MockChoice(message)]
self.usage = TestConvertOpenAIResponseToLlmResponse._MockUsage()

class _MockEmptyChoicesResponse:
def __init__(self):
self.choices = []
self.usage = TestConvertOpenAIResponseToLlmResponse._MockUsage()

def test_empty_choices_returns_safety_finish_reason(self):
"""A response with no choices (e.g. blocked by a guardrail) should not raise IndexError."""
response = self._MockEmptyChoicesResponse()

llm_response = _convert_openai_response_to_llm_response(response)

assert llm_response.finish_reason == types.FinishReason.SAFETY
assert llm_response.content.parts[0].text == "Response blocked by content policy."
assert llm_response.usage_metadata is not None
assert llm_response.usage_metadata.total_token_count == 8

def test_preserves_thought_signature_from_openai_tool_call_response(self):
response = self._MockResponse(
self._MockMessage(
Expand Down