From fe8c1fdf20c399ff10e81e0bfccd19ad83bf07bb Mon Sep 17 00:00:00 2001 From: Jonathan Haas Date: Mon, 18 May 2026 15:15:30 -0700 Subject: [PATCH] Add Python auto-instrument entry point --- README.md | 7 ++- python/README.md | 44 ++++++++++++++++- python/eval2otel/__init__.py | 3 ++ python/eval2otel/autoinstrument.py | 49 +++++++++++++++++++ python/eval2otel/models.py | 78 ++++++++++++++++++++++++++++++ python/pyproject.toml | 9 +++- python/tests/test_contract.py | 35 ++++++++++++++ 7 files changed, 222 insertions(+), 3 deletions(-) create mode 100644 python/eval2otel/autoinstrument.py create mode 100644 python/eval2otel/models.py diff --git a/README.md b/README.md index b667a92..06d2efc 100644 --- a/README.md +++ b/README.md @@ -374,7 +374,7 @@ conversion-report vocabulary. It can run contract-only with no OpenTelemetry dependency, or emit real spans when the optional OTel extras are installed: ```bash -pip install -e "python[otel]" +pip install -e "python[otel,validation]" PYTHONPATH=python python3 -m unittest discover -s python/tests ``` @@ -406,6 +406,11 @@ assert report.contract_version == "eval2otel.v1" Provider hooks are optional. If provider packages and compatible OpenLLMetry/OpenInference instrumentors are installed, Eval2Otel invokes them; otherwise it returns structured handles explaining what was available. +The Python package also registers an `opentelemetry_instrumentor` entry point +named `eval2otel`, so `opentelemetry-instrument python main.py` can discover the +same `instrument_all()` path when the OTel instrumentation extra is installed. +Install the `validation` extra for optional Pydantic models in +`eval2otel.models`. See [python/README.md](./python/README.md). diff --git a/python/README.md b/python/README.md index e4cabec..ff168e1 100644 --- a/python/README.md +++ b/python/README.md @@ -6,7 +6,7 @@ those extras, it still validates Eval2Otel payloads and returns conversion reports. ```bash -pip install -e ".[otel]" +pip install -e ".[otel,validation]" ``` ```python @@ -40,6 +40,30 @@ assert report.contract_version == "eval2otel.v1" client.shutdown() ``` +## Zero-Code Instrumentation + +The package registers an `opentelemetry_instrumentor` entry point named +`eval2otel`. In an environment with `opentelemetry-instrumentation` installed, +`opentelemetry-instrument` can discover Eval2Otel and call the same +`instrument_all()` path used above: + +```bash +OTEL_SERVICE_NAME=my-ai-service \ +OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318 \ +OTEL_EXPORTER_OTLP_PROTOCOL=http/protobuf \ +EVAL2OTEL_PROVIDERS=openai,anthropic \ +opentelemetry-instrument python main.py +``` + +Programmatic use is also available: + +```python +from eval2otel import Eval2OtelInstrumentor, get_instrumented_client + +Eval2OtelInstrumentor().instrument() +client = get_instrumented_client() +``` + ## Environment `instrument_all()` reads: @@ -77,6 +101,24 @@ Supported provider names: Set `EVAL2OTEL_PROVIDERS=openai,anthropic` to limit discovery. +## Typed Validation + +Install the `validation` extra to use optional Pydantic models: + +```python +from eval2otel.models import EvalResultModel + +payload = EvalResultModel.model_validate({ + "id": "case-1", + "model": "gpt-4o-mini", + "operation": "chat", + "request": {"model": "gpt-4o-mini"}, + "performance": {"duration": 0.25}, +}) + +client.process_evaluation(payload.to_eval_result()) +``` + ## Development From the repository root: diff --git a/python/eval2otel/__init__.py b/python/eval2otel/__init__.py index af8e337..f98e447 100644 --- a/python/eval2otel/__init__.py +++ b/python/eval2otel/__init__.py @@ -1,4 +1,5 @@ from .auto import instrument_all +from .autoinstrument import Eval2OtelInstrumentor, get_instrumented_client from .contract import ( EVAL2OTEL_CONTRACT_VERSION, UNKNOWN_SEMCONV_VERSION, @@ -30,12 +31,14 @@ "ConversionReport", "ConversionWarning", "Eval2Otel", + "Eval2OtelInstrumentor", "Eval2OtelEvidence", "Eval2OtelProvenance", "EvalResult", "build_eval2otel_attributes", "build_span_attributes", "instrument_all", + "get_instrumented_client", "instrument_all_providers", "instrument_anthropic", "instrument_bedrock", diff --git a/python/eval2otel/autoinstrument.py b/python/eval2otel/autoinstrument.py new file mode 100644 index 0000000..3baab8b --- /dev/null +++ b/python/eval2otel/autoinstrument.py @@ -0,0 +1,49 @@ +from __future__ import annotations + +from typing import Any, Iterable + +from .auto import instrument_all +from .contract import Eval2Otel + +try: + from opentelemetry.instrumentation.instrumentor import BaseInstrumentor +except ImportError: + class BaseInstrumentor: # type: ignore[no-redef] + def instrument(self, **kwargs: Any) -> None: + self._instrument(**kwargs) + + def uninstrument(self, **kwargs: Any) -> None: + self._uninstrument(**kwargs) + + +_INSTRUMENTED_CLIENT: Eval2Otel | None = None + + +class Eval2OtelInstrumentor(BaseInstrumentor): + """OpenTelemetry auto-instrumentation entry point for eval2otel.""" + + def instrumentation_dependencies(self) -> list[str]: + return [] + + def _instrument( + self, + *, + providers: Iterable[str] | None = None, + patch_providers: bool = True, + **_: Any, + ) -> None: + global _INSTRUMENTED_CLIENT + _INSTRUMENTED_CLIENT = instrument_all( + providers=providers, + patch_providers=patch_providers, + ) + + def _uninstrument(self, **_: Any) -> None: + global _INSTRUMENTED_CLIENT + if _INSTRUMENTED_CLIENT is not None: + _INSTRUMENTED_CLIENT.shutdown() + _INSTRUMENTED_CLIENT = None + + +def get_instrumented_client() -> Eval2Otel | None: + return _INSTRUMENTED_CLIENT diff --git a/python/eval2otel/models.py b/python/eval2otel/models.py new file mode 100644 index 0000000..7540c09 --- /dev/null +++ b/python/eval2otel/models.py @@ -0,0 +1,78 @@ +from __future__ import annotations + +from typing import Any, Dict, List, Literal, Optional, Union + +try: + from pydantic import BaseModel, ConfigDict, Field, field_validator +except ImportError as exc: + raise ImportError( + "eval2otel.models requires pydantic. Install eval2otel-python[validation]." + ) from exc + +from .contract import EvalResult + + +class ConversionWarningModel(BaseModel): + code: str + message: str + severity: Literal["info", "warning", "error"] = "warning" + + +class Eval2OtelProvenanceModel(BaseModel): + model_config = ConfigDict(populate_by_name=True) + + source_framework: Optional[str] = Field(default=None, alias="sourceFramework") + run_id: Optional[str] = Field(default=None, alias="runId") + case_id: Optional[str] = Field(default=None, alias="caseId") + dataset_id: Optional[str] = Field(default=None, alias="datasetId") + dataset_version: Optional[str] = Field(default=None, alias="datasetVersion") + adapter: Optional[str] = None + adapter_version: Optional[str] = Field(default=None, alias="adapterVersion") + contract_version: Optional[str] = Field(default=None, alias="contractVersion") + semconv_version: Optional[str] = Field(default=None, alias="semconvVersion") + + +class Eval2OtelEvidenceModel(BaseModel): + model_config = ConfigDict(populate_by_name=True) + + raw_payload_sha256: Optional[str] = Field(default=None, alias="rawPayloadSha256") + prompt_sha256: Optional[str] = Field(default=None, alias="promptSha256") + response_sha256: Optional[str] = Field(default=None, alias="responseSha256") + redacted_content_count: int = Field(default=0, alias="redactedContentCount", ge=0) + truncated_content_count: int = Field(default=0, alias="truncatedContentCount", ge=0) + dropped_event_count: int = Field(default=0, alias="droppedEventCount", ge=0) + warning_count: Optional[int] = Field(default=None, alias="warningCount", ge=0) + warnings: List[ConversionWarningModel] = Field(default_factory=list) + + +class EvalResultModel(BaseModel): + model_config = ConfigDict(populate_by_name=True, extra="allow") + + id: str + timestamp: Optional[float] = None + model: str + system: Optional[str] = None + operation: Literal["chat", "text_completion", "embeddings", "execute_tool", "agent_execution", "workflow_step"] + request: Dict[str, Any] + response: Dict[str, Any] = Field(default_factory=dict) + usage: Dict[str, Any] = Field(default_factory=dict) + performance: Dict[str, Any] + conversation: Optional[Dict[str, Any]] = None + provenance: Optional[Union[Eval2OtelProvenanceModel, Dict[str, Any]]] = None + evidence: Optional[Union[Eval2OtelEvidenceModel, Dict[str, Any]]] = None + + @field_validator("performance") + @classmethod + def require_duration(cls, value: Dict[str, Any]) -> Dict[str, Any]: + if "duration" not in value: + raise ValueError("performance.duration is required") + return value + + def to_eval_result(self) -> EvalResult: + return EvalResult.from_mapping(self.model_dump(by_alias=True, exclude_none=True)) + + +def validate_eval_result(value: Union[EvalResult, Dict[str, Any]]) -> EvalResult: + if isinstance(value, EvalResult): + return value + return EvalResultModel.model_validate(value).to_eval_result() diff --git a/python/pyproject.toml b/python/pyproject.toml index b2a6484..4949312 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -7,7 +7,7 @@ name = "eval2otel-python" version = "0.1.0" description = "Python SDK preview for eval2otel telemetry payloads" readme = "README.md" -requires-python = ">=3.10" +requires-python = ">=3.9" license = { text = "MIT" } authors = [ { name = "EvalOps" } @@ -19,6 +19,10 @@ otel = [ "opentelemetry-api>=1.29", "opentelemetry-sdk>=1.29", "opentelemetry-exporter-otlp>=1.29", + "opentelemetry-instrumentation>=0.50b0", +] +validation = [ + "pydantic>=2", ] providers = [ "openai>=1", @@ -26,5 +30,8 @@ providers = [ "cohere>=5", ] +[project.entry-points.opentelemetry_instrumentor] +eval2otel = "eval2otel.autoinstrument:Eval2OtelInstrumentor" + [tool.hatch.build.targets.wheel] packages = ["eval2otel"] diff --git a/python/tests/test_contract.py b/python/tests/test_contract.py index 408173e..61859ed 100644 --- a/python/tests/test_contract.py +++ b/python/tests/test_contract.py @@ -3,10 +3,12 @@ from eval2otel import ( EVAL2OTEL_CONTRACT_VERSION, + Eval2OtelInstrumentor, Eval2Otel, EvalResult, build_eval2otel_attributes, build_span_attributes, + get_instrumented_client, instrument_all, instrument_all_providers, instrument_openai, @@ -157,6 +159,39 @@ def fake_find_spec(name: str): self.assertEqual(handle.instrumentation, "opentelemetry.instrumentation.openai.OpenAIInstrumentor") self.assertEqual(FakeOpenAIInstrumentor.calls, [{}]) + def test_auto_instrumentor_entrypoint_uses_env_configuration(self) -> None: + instrumentor = Eval2OtelInstrumentor() + with mock.patch.dict("os.environ", {"EVAL2OTEL_PROVIDERS": "unknown"}, clear=False): + instrumentor.instrument() + + client = get_instrumented_client() + self.assertIsInstance(client, Eval2Otel) + self.assertEqual(client.service_name, "eval2otel-python") + self.assertEqual(len(client.instrumentation_handles), 1) + self.assertEqual(client.instrumentation_handles[0].provider, "unknown") + + instrumentor.uninstrument() + self.assertIsNone(get_instrumented_client()) + + def test_pydantic_model_validates_and_converts_eval_results(self) -> None: + try: + from eval2otel.models import EvalResultModel, validate_eval_result + except ImportError as exc: + self.skipTest(str(exc)) + + model = EvalResultModel.model_validate({ + "id": "py-pydantic", + "model": "gpt-4o-mini", + "operation": "chat", + "request": {"model": "gpt-4o-mini"}, + "performance": {"duration": 0.2}, + "provenance": {"sourceFramework": "pytest", "caseId": "case-1"}, + }) + result = model.to_eval_result() + self.assertIsInstance(result, EvalResult) + self.assertEqual(result.provenance.source_framework, "pytest") + self.assertEqual(validate_eval_result(model.model_dump(by_alias=True)).id, "py-pydantic") + def test_required_contract_fields_are_validated(self) -> None: with self.assertRaisesRegex(ValueError, "performance.duration"): EvalResult.from_mapping({