Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -374,7 +374,7 @@ conversion-report vocabulary. It can run contract-only with no OpenTelemetry
dependency, or emit real spans when the optional OTel extras are installed:

```bash
pip install -e "python[otel]"
pip install -e "python[otel,validation]"
PYTHONPATH=python python3 -m unittest discover -s python/tests
```

Expand Down Expand Up @@ -406,6 +406,11 @@ assert report.contract_version == "eval2otel.v1"
Provider hooks are optional. If provider packages and compatible
OpenLLMetry/OpenInference instrumentors are installed, Eval2Otel invokes them;
otherwise it returns structured handles explaining what was available.
The Python package also registers an `opentelemetry_instrumentor` entry point
named `eval2otel`, so `opentelemetry-instrument python main.py` can discover the
same `instrument_all()` path when the OTel instrumentation extra is installed.
Install the `validation` extra for optional Pydantic models in
`eval2otel.models`.

See [python/README.md](./python/README.md).

Expand Down
44 changes: 43 additions & 1 deletion python/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ those extras, it still validates Eval2Otel payloads and returns conversion
reports.

```bash
pip install -e ".[otel]"
pip install -e ".[otel,validation]"
```

```python
Expand Down Expand Up @@ -40,6 +40,30 @@ assert report.contract_version == "eval2otel.v1"
client.shutdown()
```

## Zero-Code Instrumentation

The package registers an `opentelemetry_instrumentor` entry point named
`eval2otel`. In an environment with `opentelemetry-instrumentation` installed,
`opentelemetry-instrument` can discover Eval2Otel and call the same
`instrument_all()` path used above:

```bash
OTEL_SERVICE_NAME=my-ai-service \
OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318 \
OTEL_EXPORTER_OTLP_PROTOCOL=http/protobuf \
EVAL2OTEL_PROVIDERS=openai,anthropic \
opentelemetry-instrument python main.py
```

Programmatic use is also available:

```python
from eval2otel import Eval2OtelInstrumentor, get_instrumented_client

Eval2OtelInstrumentor().instrument()
client = get_instrumented_client()
```

## Environment

`instrument_all()` reads:
Expand Down Expand Up @@ -77,6 +101,24 @@ Supported provider names:

Set `EVAL2OTEL_PROVIDERS=openai,anthropic` to limit discovery.

## Typed Validation

Install the `validation` extra to use optional Pydantic models:

```python
from eval2otel.models import EvalResultModel

payload = EvalResultModel.model_validate({
"id": "case-1",
"model": "gpt-4o-mini",
"operation": "chat",
"request": {"model": "gpt-4o-mini"},
"performance": {"duration": 0.25},
})

client.process_evaluation(payload.to_eval_result())
```

## Development

From the repository root:
Expand Down
3 changes: 3 additions & 0 deletions python/eval2otel/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from .auto import instrument_all
from .autoinstrument import Eval2OtelInstrumentor, get_instrumented_client
from .contract import (
EVAL2OTEL_CONTRACT_VERSION,
UNKNOWN_SEMCONV_VERSION,
Expand Down Expand Up @@ -30,12 +31,14 @@
"ConversionReport",
"ConversionWarning",
"Eval2Otel",
"Eval2OtelInstrumentor",
"Eval2OtelEvidence",
"Eval2OtelProvenance",
"EvalResult",
"build_eval2otel_attributes",
"build_span_attributes",
"instrument_all",
"get_instrumented_client",
"instrument_all_providers",
"instrument_anthropic",
"instrument_bedrock",
Expand Down
49 changes: 49 additions & 0 deletions python/eval2otel/autoinstrument.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
from __future__ import annotations

from typing import Any, Iterable

from .auto import instrument_all
from .contract import Eval2Otel

try:
from opentelemetry.instrumentation.instrumentor import BaseInstrumentor
except ImportError:
class BaseInstrumentor: # type: ignore[no-redef]
def instrument(self, **kwargs: Any) -> None:
self._instrument(**kwargs)

def uninstrument(self, **kwargs: Any) -> None:
self._uninstrument(**kwargs)


_INSTRUMENTED_CLIENT: Eval2Otel | None = None


class Eval2OtelInstrumentor(BaseInstrumentor):
"""OpenTelemetry auto-instrumentation entry point for eval2otel."""

def instrumentation_dependencies(self) -> list[str]:
return []

def _instrument(
self,
*,
providers: Iterable[str] | None = None,
patch_providers: bool = True,
**_: Any,
) -> None:
global _INSTRUMENTED_CLIENT
_INSTRUMENTED_CLIENT = instrument_all(
providers=providers,
patch_providers=patch_providers,
)

def _uninstrument(self, **_: Any) -> None:
global _INSTRUMENTED_CLIENT
if _INSTRUMENTED_CLIENT is not None:
_INSTRUMENTED_CLIENT.shutdown()
_INSTRUMENTED_CLIENT = None


def get_instrumented_client() -> Eval2Otel | None:
return _INSTRUMENTED_CLIENT
78 changes: 78 additions & 0 deletions python/eval2otel/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
from __future__ import annotations

from typing import Any, Dict, List, Literal, Optional, Union

try:
from pydantic import BaseModel, ConfigDict, Field, field_validator
except ImportError as exc:
raise ImportError(
"eval2otel.models requires pydantic. Install eval2otel-python[validation]."
) from exc

from .contract import EvalResult


class ConversionWarningModel(BaseModel):
code: str
message: str
severity: Literal["info", "warning", "error"] = "warning"


class Eval2OtelProvenanceModel(BaseModel):
model_config = ConfigDict(populate_by_name=True)

source_framework: Optional[str] = Field(default=None, alias="sourceFramework")
run_id: Optional[str] = Field(default=None, alias="runId")
case_id: Optional[str] = Field(default=None, alias="caseId")
dataset_id: Optional[str] = Field(default=None, alias="datasetId")
dataset_version: Optional[str] = Field(default=None, alias="datasetVersion")
adapter: Optional[str] = None
adapter_version: Optional[str] = Field(default=None, alias="adapterVersion")
contract_version: Optional[str] = Field(default=None, alias="contractVersion")
semconv_version: Optional[str] = Field(default=None, alias="semconvVersion")


class Eval2OtelEvidenceModel(BaseModel):
model_config = ConfigDict(populate_by_name=True)

raw_payload_sha256: Optional[str] = Field(default=None, alias="rawPayloadSha256")
prompt_sha256: Optional[str] = Field(default=None, alias="promptSha256")
response_sha256: Optional[str] = Field(default=None, alias="responseSha256")
redacted_content_count: int = Field(default=0, alias="redactedContentCount", ge=0)
truncated_content_count: int = Field(default=0, alias="truncatedContentCount", ge=0)
dropped_event_count: int = Field(default=0, alias="droppedEventCount", ge=0)
warning_count: Optional[int] = Field(default=None, alias="warningCount", ge=0)
warnings: List[ConversionWarningModel] = Field(default_factory=list)


class EvalResultModel(BaseModel):
model_config = ConfigDict(populate_by_name=True, extra="allow")

id: str
timestamp: Optional[float] = None
model: str
system: Optional[str] = None
operation: Literal["chat", "text_completion", "embeddings", "execute_tool", "agent_execution", "workflow_step"]
request: Dict[str, Any]
response: Dict[str, Any] = Field(default_factory=dict)
usage: Dict[str, Any] = Field(default_factory=dict)
performance: Dict[str, Any]
conversation: Optional[Dict[str, Any]] = None
provenance: Optional[Union[Eval2OtelProvenanceModel, Dict[str, Any]]] = None
evidence: Optional[Union[Eval2OtelEvidenceModel, Dict[str, Any]]] = None

@field_validator("performance")
@classmethod
def require_duration(cls, value: Dict[str, Any]) -> Dict[str, Any]:
if "duration" not in value:
raise ValueError("performance.duration is required")
return value

def to_eval_result(self) -> EvalResult:
return EvalResult.from_mapping(self.model_dump(by_alias=True, exclude_none=True))


def validate_eval_result(value: Union[EvalResult, Dict[str, Any]]) -> EvalResult:
if isinstance(value, EvalResult):
return value
return EvalResultModel.model_validate(value).to_eval_result()
9 changes: 8 additions & 1 deletion python/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ name = "eval2otel-python"
version = "0.1.0"
description = "Python SDK preview for eval2otel telemetry payloads"
readme = "README.md"
requires-python = ">=3.10"
requires-python = ">=3.9"
license = { text = "MIT" }
authors = [
{ name = "EvalOps" }
Expand All @@ -19,12 +19,19 @@ otel = [
"opentelemetry-api>=1.29",
"opentelemetry-sdk>=1.29",
"opentelemetry-exporter-otlp>=1.29",
"opentelemetry-instrumentation>=0.50b0",
]
validation = [
"pydantic>=2",
]
providers = [
"openai>=1",
"anthropic>=0.34",
"cohere>=5",
]

[project.entry-points.opentelemetry_instrumentor]
eval2otel = "eval2otel.autoinstrument:Eval2OtelInstrumentor"

[tool.hatch.build.targets.wheel]
packages = ["eval2otel"]
35 changes: 35 additions & 0 deletions python/tests/test_contract.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,12 @@

from eval2otel import (
EVAL2OTEL_CONTRACT_VERSION,
Eval2OtelInstrumentor,
Eval2Otel,
EvalResult,
build_eval2otel_attributes,
build_span_attributes,
get_instrumented_client,
instrument_all,
instrument_all_providers,
instrument_openai,
Expand Down Expand Up @@ -157,6 +159,39 @@ def fake_find_spec(name: str):
self.assertEqual(handle.instrumentation, "opentelemetry.instrumentation.openai.OpenAIInstrumentor")
self.assertEqual(FakeOpenAIInstrumentor.calls, [{}])

def test_auto_instrumentor_entrypoint_uses_env_configuration(self) -> None:
instrumentor = Eval2OtelInstrumentor()
with mock.patch.dict("os.environ", {"EVAL2OTEL_PROVIDERS": "unknown"}, clear=False):
instrumentor.instrument()

client = get_instrumented_client()
self.assertIsInstance(client, Eval2Otel)
self.assertEqual(client.service_name, "eval2otel-python")
self.assertEqual(len(client.instrumentation_handles), 1)
self.assertEqual(client.instrumentation_handles[0].provider, "unknown")

instrumentor.uninstrument()
self.assertIsNone(get_instrumented_client())

def test_pydantic_model_validates_and_converts_eval_results(self) -> None:
try:
from eval2otel.models import EvalResultModel, validate_eval_result
except ImportError as exc:
self.skipTest(str(exc))

model = EvalResultModel.model_validate({
"id": "py-pydantic",
"model": "gpt-4o-mini",
"operation": "chat",
"request": {"model": "gpt-4o-mini"},
"performance": {"duration": 0.2},
"provenance": {"sourceFramework": "pytest", "caseId": "case-1"},
})
result = model.to_eval_result()
self.assertIsInstance(result, EvalResult)
self.assertEqual(result.provenance.source_framework, "pytest")
self.assertEqual(validate_eval_result(model.model_dump(by_alias=True)).id, "py-pydantic")

def test_required_contract_fields_are_validated(self) -> None:
with self.assertRaisesRegex(ValueError, "performance.duration"):
EvalResult.from_mapping({
Expand Down
Loading