diff --git a/dashboards/datadog-dashboard.json b/dashboards/datadog-dashboard.json index 468b065..775a5d2 100644 --- a/dashboards/datadog-dashboard.json +++ b/dashboards/datadog-dashboard.json @@ -85,13 +85,13 @@ { "id": 4, "definition": { - "title": "Error Rate by System", + "title": "Error Rate by Provider", "title_size": "16", "title_align": "left", "type": "query_value", "requests": [ { - "q": "sum:gen_ai.client.operation.error{*} by {gen_ai.system}.as_rate() / sum:gen_ai.client.operation.duration{*} by {gen_ai.system}.as_count()", + "q": "sum:gen_ai.client.operation.error{*} by {gen_ai.provider.name}.as_rate() / sum:gen_ai.client.operation.duration{*} by {gen_ai.provider.name}.as_count()", "aggregator": "last" } ], @@ -232,9 +232,9 @@ "prefix": "gen_ai.request.model" }, { - "name": "system", + "name": "provider", "default": "*", - "prefix": "gen_ai.system" + "prefix": "gen_ai.provider.name" } ], "layout_type": "ordered", diff --git a/docs/contract/eval2otel-v1.md b/docs/contract/eval2otel-v1.md index c617544..f7fc311 100644 --- a/docs/contract/eval2otel-v1.md +++ b/docs/contract/eval2otel-v1.md @@ -19,7 +19,7 @@ Every converted evaluation creates one client span named by operation: Every span must include: - `gen_ai.operation.name` -- `gen_ai.system` +- `gen_ai.provider.name` - `evalops.contract.version` - `evalops.semconv.version` - `evalops.eval.id` @@ -28,8 +28,9 @@ Every span must include: - `evalops.redacted_content_count` - `evalops.truncated_content_count` -Provider-aware conversions should include `gen_ai.provider.name` using the -normalized provider names exported by `normalizeProviderName`. +`gen_ai.provider.name` uses the normalized provider names exported by +`normalizeProviderName`. `gen_ai.system` is intentionally not emitted because it +is no longer present in the upstream OpenTelemetry GenAI registry. ## Attribute Registry diff --git a/package-lock.json b/package-lock.json index 2fdf143..26bd74f 100644 --- a/package-lock.json +++ b/package-lock.json @@ -27,7 +27,8 @@ "jest": "^29.5.0", "prettier": "^3.0.0", "ts-jest": "^29.1.0", - "typescript": "^5.0.0" + "typescript": "^5.0.0", + "yaml": "^2.9.0" } }, "node_modules/@ampproject/remapping": { diff --git a/package.json b/package.json index 799120b..193022c 100644 --- a/package.json +++ b/package.json @@ -50,7 +50,8 @@ "jest": "^29.5.0", "prettier": "^3.0.0", "ts-jest": "^29.1.0", - "typescript": "^5.0.0" + "typescript": "^5.0.0", + "yaml": "^2.9.0" }, "dependencies": { "@opentelemetry/api": "^1.9.1", diff --git a/python/eval2otel/contract.py b/python/eval2otel/contract.py index 66e5756..eedeaa1 100644 --- a/python/eval2otel/contract.py +++ b/python/eval2otel/contract.py @@ -12,6 +12,23 @@ UNKNOWN_SEMCONV_VERSION = "unspecified" +def normalize_provider_name(system: str | None) -> str | None: + if not system: + return None + value = system.lower() + if "azure" in value: + return "azure.openai" + if "bedrock" in value or "aws" in value: + return "aws.bedrock" + if "vertex" in value or "gemini" in value or "google" in value: + return "google.vertex" + if "anthropic" in value or "claude" in value: + return "anthropic" + if "openai" in value: + return "openai" + return value + + def sha256_payload(value: Any) -> str: encoded = json.dumps(value, sort_keys=True, separators=(",", ":"), default=str).encode("utf-8") return hashlib.sha256(encoded).hexdigest() @@ -302,7 +319,7 @@ def build_span_attributes( ) -> MutableMapping[str, str | int | float]: attrs: MutableMapping[str, str | int | float] = build_eval2otel_attributes(eval_result, semconv_version) attrs["gen_ai.operation.name"] = eval_result.operation - attrs["gen_ai.system"] = eval_result.system or "unknown" + attrs["gen_ai.provider.name"] = normalize_provider_name(eval_result.system) or "unknown" if eval_result.request.get("model") is not None: attrs["gen_ai.request.model"] = str(eval_result.request["model"]) if eval_result.response.get("model") is not None: diff --git a/python/tests/test_contract.py b/python/tests/test_contract.py index 61859ed..b17cc0f 100644 --- a/python/tests/test_contract.py +++ b/python/tests/test_contract.py @@ -100,7 +100,8 @@ def test_process_evaluation_emits_span_and_content_events_with_injected_tracer(s self.assertEqual(report.event_count, 2) self.assertEqual(tracer.spans[0].name, "gen_ai.chat") - self.assertEqual(tracer.spans[0].attributes["gen_ai.system"], "openai") + self.assertEqual(tracer.spans[0].attributes["gen_ai.provider.name"], "openai") + self.assertNotIn("gen_ai.system", tracer.spans[0].attributes) self.assertEqual(tracer.spans[0].attributes["gen_ai.usage.input_tokens"], 3) self.assertEqual(tracer.spans[0].events[0][0], "gen_ai.user.message") self.assertIn("evalops.content_sha256", tracer.spans[0].events[0][1]) diff --git a/src/converter.ts b/src/converter.ts index f0a93e8..3f43745 100644 --- a/src/converter.ts +++ b/src/converter.ts @@ -246,18 +246,13 @@ export class Eval2OtelConverter { evalResult: EvalResult, additionalAttributes?: Record ): GenAIAttributes { + const provider = normalizeProviderName(evalResult.system) ?? 'unknown'; const attributes: GenAIAttributes = { 'gen_ai.operation.name': evalResult.operation, - 'gen_ai.system': evalResult.system ?? 'unknown', + [ATTR.PROVIDER_NAME]: provider, ...buildEval2OtelAttributes(evalResult, this.config), }; - // Provider discriminator aligned with latest GenAI semconv - const provider = normalizeProviderName(evalResult.system); - if (provider) { - (attributes as any)[ATTR.PROVIDER_NAME] = provider; - } - // Add service attributes if (this.config.environment) { attributes['deployment.environment'] = this.config.environment; @@ -454,7 +449,6 @@ export class Eval2OtelConverter { evalResult.conversation.messages.forEach((message, index) => { const eventName = `gen_ai.${message.role}.message`; const attributes: Record = { - 'gen_ai.system': evalResult.system ?? 'unknown', [ATTR.PROVIDER_NAME]: normalizeProviderName(evalResult.system) ?? 'unknown', [ATTR.MESSAGE_ROLE]: message.role, [ATTR.MESSAGE_INDEX]: index, @@ -524,7 +518,6 @@ export class Eval2OtelConverter { evalResult.response.choices.forEach((choice) => { const attributes: Record = { - 'gen_ai.system': evalResult.system ?? 'unknown', [ATTR.PROVIDER_NAME]: normalizeProviderName(evalResult.system) ?? 'unknown', [ATTR.RESPONSE_CHOICE_INDEX]: choice.index, [ATTR.RESPONSE_FINISH_REASON]: choice.finishReason, @@ -583,7 +576,7 @@ export class Eval2OtelConverter { : JSON.stringify(toolCall.function.arguments); if (this.canAddEvent(span)) { span.addEvent('gen_ai.tool.message', { - 'gen_ai.system': evalResult.system ?? 'unknown', + [ATTR.PROVIDER_NAME]: normalizeProviderName(evalResult.system) ?? 'unknown', [ATTR.TOOL_NAME]: toolCall.function.name, [ATTR.TOOL_CALL_ID]: toolCall.id, [ATTR.RESPONSE_CHOICE_INDEX]: choice.index, diff --git a/src/metrics.ts b/src/metrics.ts index 1dd2aaa..5d3a58e 100644 --- a/src/metrics.ts +++ b/src/metrics.ts @@ -1,4 +1,5 @@ import { metrics, Counter, Histogram, Meter, context as otContext } from '@opentelemetry/api'; +import { ATTR } from './attributes'; import { normalizeProviderName } from './contract'; import { getRagMetricValue } from './rag'; import { ConversionReport, EvalResult, OtelConfig, ProcessOptions } from './types'; @@ -208,17 +209,11 @@ export class Eval2OtelMetrics { recordMetrics(evalResult: EvalResult, options?: ProcessOptions): void { const baseAttributes = { 'gen_ai.operation.name': evalResult.operation, - 'gen_ai.system': evalResult.system ?? 'unknown', + [ATTR.PROVIDER_NAME]: normalizeProviderName(evalResult.system) ?? 'unknown', 'gen_ai.request.model': evalResult.request.model, 'gen_ai.response.model': evalResult.response.model ?? evalResult.request.model, }; - // Provider discriminator aligned with latest GenAI semconv - const provider = normalizeProviderName(evalResult.system); - if (provider) { - (baseAttributes as any)['gen_ai.provider.name'] = provider; - } - // Add error type if present const attributes: Record = { ...baseAttributes }; if (evalResult.error) { @@ -388,9 +383,8 @@ export class Eval2OtelMetrics { attrs['gen_ai.operation.name'] = evalResult.operation; } if (evalResult?.system) { - attrs['gen_ai.system'] = evalResult.system; const provider = normalizeProviderName(evalResult.system); - if (provider) attrs['gen_ai.provider.name'] = provider; + if (provider) attrs[ATTR.PROVIDER_NAME] = provider; } if (evalResult?.request?.model) { attrs['gen_ai.request.model'] = evalResult.request.model; @@ -506,7 +500,7 @@ export class Eval2OtelMetrics { }): void { const baseAttributes = { 'gen_ai.operation.name': evalResult.operation, - 'gen_ai.system': evalResult.system ?? 'unknown', + [ATTR.PROVIDER_NAME]: normalizeProviderName(evalResult.system) ?? 'unknown', 'gen_ai.request.model': evalResult.request.model, }; diff --git a/src/semconv.ts b/src/semconv.ts index 83670fe..9ef72a8 100644 --- a/src/semconv.ts +++ b/src/semconv.ts @@ -12,7 +12,6 @@ export interface AttributeSpec { export const ATTRIBUTE_REGISTRY: AttributeSpec[] = [ { key: 'gen_ai.operation.name', source: 'otel-genai', signal: 'all', stability: 'stable', description: 'GenAI operation name.' }, - { key: 'gen_ai.system', source: 'otel-genai', signal: 'all', stability: 'stable', description: 'AI system or provider family.' }, { key: 'error.type', source: 'otel-genai', signal: 'all', stability: 'stable', description: 'Error type for failed operations.' }, { key: 'deployment.environment', source: 'otel-genai', signal: 'all', stability: 'stable', description: 'Deployment environment resource/context attribute.' }, { key: ATTR.PROVIDER_NAME, source: 'otel-genai', signal: 'all', stability: 'stable', description: 'Normalized provider name.' }, diff --git a/src/types.ts b/src/types.ts index 9375021..028eb75 100644 --- a/src/types.ts +++ b/src/types.ts @@ -338,7 +338,7 @@ export interface ProcessOptions { export interface GenAIAttributes { // Required 'gen_ai.operation.name': string; - 'gen_ai.system': string; + 'gen_ai.provider.name': string; // Conditionally required 'error.type'?: string; diff --git a/test-harness/README.md b/test-harness/README.md index 5e97467..1b9af16 100644 --- a/test-harness/README.md +++ b/test-harness/README.md @@ -74,7 +74,7 @@ curl "http://localhost:16686/api/traces?service=eval2otel-e2e-test" ### Traces in Jaeger - Service: `eval2otel-e2e-test` - Span names: `gen_ai.chat`, `gen_ai.embeddings`, `gen_ai.execute_tool` -- Attributes: `gen_ai.system=openai`, `gen_ai.request.model=gpt-4`, etc. +- Attributes: `gen_ai.provider.name=openai`, `gen_ai.request.model=gpt-4`, etc. - Events: Message events (when `captureContent=true`) ### Metrics in Prometheus diff --git a/test/fixtures/otel/gen-ai-registry.yaml b/test/fixtures/otel/gen-ai-registry.yaml new file mode 100644 index 0000000..7a477bb --- /dev/null +++ b/test/fixtures/otel/gen-ai-registry.yaml @@ -0,0 +1,780 @@ +file_format: definition/2 +attributes: + - key: gen_ai.provider.name + type: + members: + - id: openai + stability: development + value: "openai" + brief: '[OpenAI](https://openai.com/)' + - id: gcp.gen_ai + stability: development + value: "gcp.gen_ai" + brief: "Any Google generative AI endpoint" + note: > + May be used when specific backend is unknown. + - id: gcp.vertex_ai + stability: development + value: "gcp.vertex_ai" + brief: "[Vertex AI](https://cloud.google.com/vertex-ai)" + note: > + Used when accessing the 'aiplatform.googleapis.com' endpoint. + - id: gcp.gemini + stability: development + value: "gcp.gemini" + brief: '[Gemini](https://cloud.google.com/products/gemini)' + note: > + Used when accessing the 'generativelanguage.googleapis.com' endpoint. + Also known as the AI Studio API. + - id: anthropic + stability: development + value: "anthropic" + brief: '[Anthropic](https://www.anthropic.com/)' + - id: cohere + stability: development + value: "cohere" + brief: '[Cohere](https://cohere.com/)' + - id: azure.ai.inference + stability: development + value: "azure.ai.inference" + brief: 'Azure AI Inference' + - id: azure.ai.openai + stability: development + value: "azure.ai.openai" + brief: '[Azure OpenAI](https://learn.microsoft.com/en-us/azure/ai-services/openai/overview)' + - id: ibm.watsonx.ai + stability: development + value: "ibm.watsonx.ai" + brief: '[IBM Watsonx AI](https://www.ibm.com/products/watsonx-ai)' + - id: aws.bedrock + stability: development + value: "aws.bedrock" + brief: '[AWS Bedrock](https://aws.amazon.com/bedrock)' + - id: perplexity + stability: development + value: "perplexity" + brief: '[Perplexity](https://www.perplexity.ai/)' + - id: x_ai + stability: development + value: "x_ai" + brief: '[xAI](https://x.ai/)' + - id: deepseek + stability: development + value: "deepseek" + brief: '[DeepSeek](https://www.deepseek.com/)' + - id: groq + stability: development + value: "groq" + brief: '[Groq](https://groq.com/)' + - id: mistral_ai + stability: development + value: "mistral_ai" + brief: '[Mistral AI](https://mistral.ai/)' + + brief: The Generative AI provider as identified by the client or server instrumentation. + note: | + The attribute SHOULD be set based on the instrumentation's best + knowledge and may differ from the actual model provider. + + Multiple providers, including Azure OpenAI, Gemini, and AI hosting platforms + are accessible using the OpenAI REST API and corresponding client libraries, + but may proxy or host models from different providers. + + The `gen_ai.request.model`, `gen_ai.response.model`, and `server.address` + attributes may help identify the actual system in use. + + The `gen_ai.provider.name` attribute acts as a discriminator that + identifies the GenAI telemetry format flavor specific to that provider + within GenAI semantic conventions. + It SHOULD be set consistently with provider-specific attributes and signals. + For example, GenAI spans, metrics, and events related to AWS Bedrock + should have the `gen_ai.provider.name` set to `aws.bedrock` and include + applicable `aws.bedrock.*` attributes and are not expected to include + `openai.*` attributes. + stability: development + - key: gen_ai.request.model + type: string + brief: The name of the GenAI model a request is being made to. + examples: 'gpt-4' + stability: development + - key: gen_ai.request.max_tokens + type: int + brief: The maximum number of tokens the model generates for a request. + examples: [100] + stability: development + - key: gen_ai.request.choice.count + type: int + brief: The target number of candidate completions to return. + examples: [3] + stability: development + - key: gen_ai.request.temperature + type: double + brief: The temperature setting for the GenAI request. + examples: [0.0] + stability: development + - key: gen_ai.request.top_p + type: double + brief: The top_p sampling setting for the GenAI request. + examples: [1.0] + stability: development + - key: gen_ai.request.top_k + type: double + brief: The top_k sampling setting for the GenAI request. + examples: [1.0] + stability: development + - key: gen_ai.request.stop_sequences + type: string[] + brief: List of sequences that the model will use to stop generating further tokens. + examples: + - [forest, lived] + stability: development + - key: gen_ai.request.frequency_penalty + type: double + brief: The frequency penalty setting for the GenAI request. + examples: [0.1] + stability: development + - key: gen_ai.request.presence_penalty + type: double + brief: The presence penalty setting for the GenAI request. + examples: [0.1] + stability: development + - key: gen_ai.request.encoding_formats + type: string[] + brief: The encoding formats requested in an embeddings operation, if specified. + note: > + In some GenAI systems the encoding formats are called embedding types. + Also, some GenAI systems only accept a single format per request. + examples: + - ['base64'] + - ['float', 'binary'] + stability: development + - key: gen_ai.request.seed + type: int + brief: Requests with same seed value more likely to return same result. + examples: [100] + stability: development + - key: gen_ai.request.stream + type: boolean + brief: Indicates whether the GenAI request was made in streaming mode. + stability: development + - key: gen_ai.response.id + type: string + brief: The unique identifier for the completion. + examples: ['chatcmpl-123'] + stability: development + - key: gen_ai.response.model + type: string + brief: The name of the model that generated the response. + examples: ['gpt-4-0613'] + stability: development + - key: gen_ai.response.finish_reasons + type: string[] + brief: Array of reasons the model stopped generating tokens, corresponding to each generation received. + examples: + - [stop] + - [stop, length] + stability: development + - key: gen_ai.response.time_to_first_chunk + type: double + brief: > + Time to first chunk in a streaming response, measured from request issuance, in seconds. + The value is measured from when the client issues the generation request to when the first + chunk is received in the response stream. + examples: [0.5, 1.2] + stability: development + - key: gen_ai.usage.input_tokens + type: int + brief: The number of tokens used in the GenAI input (prompt). + note: | + This value SHOULD include all types of input tokens, including cached tokens. + Instrumentations SHOULD make a best effort to populate this value, using a total + provided by the provider when available or, depending on the provider API, + by summing different token types parsed from the provider output. + examples: [100] + stability: development + - key: gen_ai.usage.cache_read.input_tokens + type: int + brief: The number of input tokens served from a provider-managed cache. + note: > + The value SHOULD be included in `gen_ai.usage.input_tokens`. + examples: [50] + stability: development + - key: gen_ai.usage.cache_creation.input_tokens + type: int + brief: The number of input tokens written to a provider-managed cache. + note: > + The value SHOULD be included in `gen_ai.usage.input_tokens`. + examples: [25] + stability: development + - key: gen_ai.usage.output_tokens + type: int + brief: The number of tokens used in the GenAI response (completion). + examples: [180] + stability: development + - key: gen_ai.usage.reasoning.output_tokens + type: int + brief: The number of output tokens used for reasoning (e.g. chain-of-thought, extended thinking). + note: > + The value SHOULD be included in `gen_ai.usage.output_tokens`. + examples: [50] + stability: development + - key: gen_ai.token.type + type: + members: + - id: input + stability: development + value: "input" + brief: 'Input tokens (prompt, input, etc.)' + - id: output + stability: development + value: "output" + brief: 'Output tokens (completion, response, etc.)' + brief: The type of token being counted. + examples: ['input', 'output'] + stability: development + - key: gen_ai.conversation.id + type: string + brief: The unique identifier for a conversation (session, thread), used to store and correlate messages within this conversation. + examples: ["conv_5j66UpCpwteGg4YSxUnt7lPY"] + stability: development + - key: gen_ai.agent.id + type: string + brief: The unique identifier of the GenAI agent. + examples: ['asst_5j66UpCpwteGg4YSxUnt7lPY'] + stability: development + - key: gen_ai.agent.name + type: string + brief: Human-readable name of the GenAI agent provided by the application. + examples: ["Math Tutor", "Fiction Writer"] + stability: development + - key: gen_ai.agent.description + type: string + brief: Free-form description of the GenAI agent provided by the application. + examples: ["Helps with math problems", "Generates fiction stories"] + stability: development + - key: gen_ai.agent.version + type: string + brief: The version of the GenAI agent. + examples: ["1.0.0", "2025-05-01"] + stability: development + - key: gen_ai.tool.name + type: string + brief: Name of the tool utilized by the agent. + examples: ["Flights"] + stability: development + - key: gen_ai.tool.call.id + type: string + brief: The tool call identifier. + examples: ['call_mszuSIzqtI65i1wAUOE8w5H4'] + stability: development + - key: gen_ai.tool.description + type: string + brief: The tool description. + examples: ["Multiply two numbers"] + stability: development + - key: gen_ai.tool.type + type: string + brief: Type of the tool utilized by the agent + note: > + Extension: A tool executed on the agent-side to directly call external APIs, bridging the gap between the agent and real-world systems. + Agent-side operations involve actions that are performed by the agent on the server or within the agent's controlled environment. + Function: A tool executed on the client-side, where the agent generates parameters for a predefined function, and the client executes the logic. + Client-side operations are actions taken on the user's end or within the client application. + Datastore: A tool used by the agent to access and query structured or unstructured external data for retrieval-augmented tasks or knowledge updates. + examples: ['function', 'extension', 'datastore'] + stability: development + - key: gen_ai.tool.call.arguments + type: any + brief: Parameters passed to the tool call. + note: | + > [!WARNING] + > This attribute may contain sensitive information. + + It's expected to be an object - in case a serialized string is available + to the instrumentation, the instrumentation SHOULD do the best effort to + deserialize it to an object. When recorded on spans, it MAY be recorded as a JSON string if structured format is not supported and SHOULD be recorded in structured form otherwise. + examples: + - | + { + "location": "San Francisco?", + "date": "2025-10-01" + } + + stability: development + - key: gen_ai.tool.call.result + type: any + brief: The result returned by the tool call (if any and if execution was successful). + note: | + > [!WARNING] + > This attribute may contain sensitive information. + + It's expected to be an object - in case a serialized string is available + to the instrumentation, the instrumentation SHOULD do the best effort to + deserialize it to an object. When recorded on spans, it MAY be recorded as a JSON string if structured format is not supported and SHOULD be recorded in structured form otherwise. + examples: + - | + { + "temperature_range": { + "high": 75, + "low": 60 + }, + "conditions": "sunny" + } + + stability: development + - key: gen_ai.tool.definitions + type: any + brief: The list of tool definitions available to the GenAI agent or model. + note: | + Instrumentations MUST follow [Tool Definitions JSON Schema](/docs/gen-ai/gen-ai-tool-definitions.json). + + When the attribute is recorded on events, it MUST be recorded in structured + form. When recorded on spans, it MAY be recorded as a JSON string if structured + format is not supported and SHOULD be recorded in structured form otherwise. + + Since this attribute could be large, it's NOT RECOMMENDED to populate + non-required properties by default. Instrumentations MAY provide a way + to enable populating optional properties. + examples: + - | + [ + { + "type": "function", + "name": "get_current_weather", + "description": "Get the current weather in a given location", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state, e.g. San Francisco, CA" + }, + "unit": { + "type": "string", + "enum": [ + "celsius", + "fahrenheit" + ] + } + }, + "required": [ + "location", + "unit" + ] + } + } + ] + + stability: development + - key: gen_ai.data_source.id + type: string + brief: The data source identifier. + note: > + Data sources are used by AI agents and RAG applications to store grounding data. + A data source may be an external database, object store, document collection, website, or any other storage system used by the GenAI agent or application. + The `gen_ai.data_source.id` SHOULD match the identifier used by the GenAI system rather than a name specific to the external storage, such as a database or + object store. Semantic conventions referencing `gen_ai.data_source.id` MAY also leverage additional attributes, such as `db.*`, to further identify and describe the data source. + examples: ['H7STPQYOND'] + stability: development + - key: gen_ai.operation.name + type: + members: + - id: chat + value: "chat" + brief: 'Chat completion operation such as [OpenAI Chat API](https://platform.openai.com/docs/api-reference/chat)' + stability: development + - id: generate_content + value: "generate_content" + brief: 'Multimodal content generation operation such as [Gemini Generate Content](https://ai.google.dev/api/generate-content)' + stability: development + - id: text_completion + value: "text_completion" + brief: 'Text completions operation such as [OpenAI Completions API (Legacy)](https://platform.openai.com/docs/api-reference/completions)' + stability: development + - id: embeddings + value: "embeddings" + brief: 'Embeddings operation such as [OpenAI Create embeddings API](https://platform.openai.com/docs/api-reference/embeddings/create)' + stability: development + - id: retrieval + value: "retrieval" + brief: 'Retrieval operation such as [OpenAI Search Vector Store API](https://platform.openai.com/docs/api-reference/vector-stores/search)' + stability: development + - id: create_agent + value: "create_agent" + brief: 'Create GenAI agent' + stability: development + - id: invoke_agent + value: "invoke_agent" + brief: 'Invoke GenAI agent' + stability: development + - id: execute_tool + value: "execute_tool" + brief: 'Execute a tool' + stability: development + - id: invoke_workflow + value: "invoke_workflow" + brief: 'Invoke GenAI workflow' + stability: development + - id: plan + value: "plan" + brief: 'Agent planning or task decomposition phase' + stability: development + - id: search_memory + value: "search_memory" + brief: 'Search/query memories from a memory store' + stability: development + - id: create_memory + value: "create_memory" + brief: 'Create new memory records' + stability: development + - id: update_memory + value: "update_memory" + brief: 'Update existing memory records' + stability: development + - id: upsert_memory + value: "upsert_memory" + brief: 'Create or update memory records without the caller choosing which' + stability: development + - id: delete_memory + value: "delete_memory" + brief: 'Delete memory records' + stability: development + - id: create_memory_store + value: "create_memory_store" + brief: 'Create or initialize a memory store' + stability: development + - id: delete_memory_store + value: "delete_memory_store" + brief: 'Delete or deprovision a memory store' + stability: development + brief: The name of the operation being performed. + note: > + If one of the predefined values applies, but specific system uses a different name it's RECOMMENDED to document it in the semantic + conventions for specific GenAI system and use system-specific name in the instrumentation. + If a different name is not documented, instrumentation libraries SHOULD use applicable predefined value. + stability: development + - key: gen_ai.output.type + type: + members: + - id: text + value: "text" + brief: 'Plain text' + stability: development + - id: json + value: "json" + brief: 'JSON object with known or unknown schema' + stability: development + - id: image + value: "image" + brief: 'Image' + stability: development + - id: speech + value: "speech" + brief: 'Speech' + stability: development + # we might need to record requested and actual output types on the same span/event + # at some point. In this case, we might need to add a new attribute. + # we may also need to record an array of types if multiple are requested/returned. + brief: Represents the content type requested by the client. + note: > + This attribute SHOULD be used when the client requests output of a + specific type. The model may return zero or more outputs of this type. + + This attribute specifies the output modality and not the actual output format. + For example, if an image is requested, the actual output could be a + URL pointing to an image file. + + Additional output format details may be recorded in the future in the + `gen_ai.output.{type}.*` attributes. + stability: development + - key: gen_ai.embeddings.dimension.count + type: int + brief: The number of dimensions the resulting output embeddings should have. + examples: [512, 1024] + stability: development + - key: gen_ai.retrieval.documents + type: any + brief: The documents retrieved. + note: | + Instrumentations MUST follow [Retrieval documents JSON schema](/docs/gen-ai/gen-ai-retrieval-documents.json). + When the attribute is recorded on events, it MUST be recorded in structured + form. When recorded on spans, it MAY be recorded as a JSON string if structured + format is not supported and SHOULD be recorded in structured form otherwise. + + Each document object SHOULD contain at least the following properties: + `id` (string): A unique identifier for the document, `score` (double): The relevance score of the document + examples: + - | + [ + { + "id": "doc_123", + "score": 0.95 + }, + { + "id": "doc_456", + "score": 0.87 + }, + { + "id": "doc_789", + "score": 0.82 + } + ] + stability: development + - key: gen_ai.retrieval.query.text + type: string + brief: The query text used for retrieval. + note: | + > [!Warning] + > This attribute may contain sensitive information. + examples: ["What is the capital of France?", "weather in Paris"] + stability: development + - key: gen_ai.memory.store.id + type: string + brief: The unique identifier of the memory store. + note: > + Semantic conventions for individual components SHOULD document what `gen_ai.memory.store.id` maps + to within the implementation. + examples: ["ms_abc123", "user-preferences-store"] + stability: development + - key: gen_ai.memory.record.id + type: string + brief: The unique identifier of the memory record. + examples: ["mem_5j66UpCpwteGg4YSxUnt7lPY"] + stability: development + - key: gen_ai.memory.record.count + type: int + brief: The number of memory records relevant to the operation. + note: > + For `search_memory` operations, this is the number of memory records returned by the operation. + For `create_memory` operations, this is the number of memory records the operation attempted to create. + For `update_memory` operations, this is the number of memory records the operation attempted to + modify. For `upsert_memory` operations, this is the number of memory records the operation + attempted to create or update. For `delete_memory` operations, this is the number of memory + records the operation attempted to delete. + examples: [3] + stability: development + - key: gen_ai.memory.query.text + type: string + brief: The search query used to retrieve memories. + note: | + Instrumentations SHOULD NOT capture this attribute by default. Capture SHOULD be gated + by an explicit user opt-in, for example `OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT`. + + > [!Warning] + > This attribute may contain sensitive information. + examples: ["user dietary preferences", "past flight bookings"] + stability: development + - key: gen_ai.memory.records + type: any + brief: The memory records stored or retrieved in a memory operation. + note: | + Instrumentations MUST follow [Memory records JSON schema](/docs/gen-ai/gen-ai-memory-records.json). + When the attribute is recorded on events, it MUST be recorded in structured + form. When recorded on spans, it MAY be recorded as a JSON string if structured + format is not supported and SHOULD be recorded in structured form otherwise. + + Instrumentations SHOULD NOT capture this attribute by default. Capture SHOULD be gated + by an explicit user opt-in, for example `OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT`. + + > [!Warning] + > This attribute may contain sensitive information including user/PII data. + examples: + - | + [ + { + "content": "User prefers dark mode", + "id": "mem_123", + "score": 0.95 + }, + { + "content": { + "preference": "vegetarian meals", + "confidence": 0.9 + }, + "metadata": { + "source": "profile" + } + } + ] + stability: development + - key: gen_ai.system_instructions + type: any + brief: The system message or instructions provided to the GenAI model separately from the chat history. + note: | + This attribute SHOULD be used when the corresponding provider or API + allows to provide system instructions or messages separately from the + chat history. + + Instructions that are part of the chat history SHOULD be recorded in + `gen_ai.input.messages` attribute instead. + + Instrumentations MUST follow [System instructions JSON schema](/docs/gen-ai/gen-ai-system-instructions.json). + + When recorded on spans, it MAY be recorded as a JSON string if structured + format is not supported and SHOULD be recorded in structured form otherwise. + + Instrumentations MAY provide a way for users to filter or truncate + system instructions. + + > [!Warning] + > This attribute may contain sensitive information. + + See [Recording content on attributes](/docs/gen-ai/gen-ai-spans.md#recording-content-on-attributes) + section for more details. + examples: + - | + [ + { + "type": "text", + "content": "You are an Agent that greet users, always use greetings tool to respond" + } + ] + - | + [ + { + "type": "text", + "content": "You are a language translator." + }, + { + "type": "text", + "content": "Your mission is to translate text in English to French." + } + ] + stability: development + - key: gen_ai.input.messages + type: any + brief: > + The chat history provided to the model as an input. + note: | + Instrumentations MUST follow [Input messages JSON schema](/docs/gen-ai/gen-ai-input-messages.json). + When the attribute is recorded on events, it MUST be recorded in structured + form. When recorded on spans, it MAY be recorded as a JSON string if structured + format is not supported and SHOULD be recorded in structured form otherwise. + + Messages MUST be provided in the order they were sent to the model. + Instrumentations MAY provide a way for users to filter or truncate + input messages. + + > [!Warning] + > This attribute is likely to contain sensitive information including user/PII data. + + See [Recording content on attributes](/docs/gen-ai/gen-ai-spans.md#recording-content-on-attributes) + section for more details. + examples: + - | + [ + { + "role": "user", + "parts": [ + { + "type": "text", + "content": "Weather in Paris?" + } + ] + }, + { + "role": "assistant", + "parts": [ + { + "type": "tool_call", + "id": "call_VSPygqKTWdrhaFErNvMV18Yl", + "name": "get_weather", + "arguments": { + "location": "Paris" + } + } + ] + }, + { + "role": "tool", + "parts": [ + { + "type": "tool_call_response", + "id": " call_VSPygqKTWdrhaFErNvMV18Yl", + "result": "rainy, 57°F" + } + ] + } + ] + stability: development + - key: gen_ai.output.messages + type: any + brief: Messages returned by the model where each message represents a specific model response (choice, candidate). + note: | + Instrumentations MUST follow [Output messages JSON schema](/docs/gen-ai/gen-ai-output-messages.json) + + Each message represents a single output choice/candidate generated by + the model. Each message corresponds to exactly one generation + (choice/candidate) and vice versa - one choice cannot be split across + multiple messages or one message cannot contain parts from multiple choices. + + When the attribute is recorded on events, it MUST be recorded in structured + form. When recorded on spans, it MAY be recorded as a JSON string if structured + format is not supported and SHOULD be recorded in structured form otherwise. + + Instrumentations MAY provide a way for users to filter or truncate + output messages. + + > [!Warning] + > This attribute is likely to contain sensitive information including user/PII data. + + See [Recording content on attributes](/docs/gen-ai/gen-ai-spans.md#recording-content-on-attributes) + section for more details. + examples: + - | + [ + { + "role": "assistant", + "parts": [ + { + "type": "text", + "content": "The weather in Paris is currently rainy with a temperature of 57°F." + } + ], + "finish_reason": "stop" + } + ] + stability: development + - key: gen_ai.evaluation.name + type: string + brief: The name of the evaluation metric used for the GenAI response. + examples: ["Relevance", "IntentResolution"] + stability: development + - key: gen_ai.evaluation.score.value + type: double + brief: The evaluation score returned by the evaluator. + examples: [4.0] + stability: development + - key: gen_ai.evaluation.score.label + type: string + brief: Human readable label for evaluation. + note: > + This attribute provides a human-readable interpretation of the evaluation score produced by an evaluator. + For example, a score value of 1 could mean "relevant" in one evaluation system and "not relevant" in another, depending on the scoring range and evaluator. + The label SHOULD have low cardinality. + Possible values depend on the evaluation metric and evaluator used; implementations SHOULD document the possible values. + examples: ["relevant", "not_relevant", "correct", "incorrect", "pass", "fail"] + stability: development + - key: gen_ai.evaluation.explanation + type: string + brief: A free-form explanation for the assigned score provided by the evaluator. + examples: ["The response is factually accurate but lacks sufficient detail to fully address the question."] + stability: development + - key: gen_ai.prompt.name + type: string + brief: The name of the prompt that uniquely identifies it. + examples: ["analyze-code"] + stability: development + - key: gen_ai.workflow.name + type: string + brief: Human-readable name of the GenAI workflow provided by the application. + note: | + This attribute can be populated in different frameworks; for example, as the name of the first chain in LangChain or the name of the crew in CrewAI. + The workflow name is usually provided by the application in a way that is specific to the generative AI framework or library that orchestrates the workflow. + It is usually a static name that is expected to be unique within an application. + + `gen_ai.workflow.name` MUST have low cardinality. + Semantic conventions for individual Generative AI frameworks SHOULD document what `gen_ai.workflow.name` means in the context of that framework. + If there is no low-cardinality workflow name available for a given framework, this attribute MUST NOT be captured by default. + examples: ["multi_agent_rag", "customer_support_pipeline"] + stability: development diff --git a/test/metrics-guards.test.ts b/test/metrics-guards.test.ts index b155f3e..2aa4686 100644 --- a/test/metrics-guards.test.ts +++ b/test/metrics-guards.test.ts @@ -16,7 +16,7 @@ describe('Metrics guardrails: allowlist and cap', () => { const m = new Eval2OtelMetrics({ serviceName: 'svc', enableExemplars: true, - metricAttributeAllowlist: ['gen_ai.operation.name', 'gen_ai.system', 'gen_ai.request.model', 'gen_ai.token.type'], + metricAttributeAllowlist: ['gen_ai.operation.name', 'gen_ai.provider.name', 'gen_ai.request.model', 'gen_ai.token.type'], maxMetricAttributes: 3, } as any); @@ -33,8 +33,7 @@ describe('Metrics guardrails: allowlist and cap', () => { // Only allowlisted keys remain and capped to 3 keys expect(Object.keys(attrs).sort()).toHaveLength(3); Object.keys(attrs).forEach(k => { - expect(['gen_ai.operation.name', 'gen_ai.system', 'gen_ai.request.model', 'gen_ai.token.type']).toContain(k); + expect(['gen_ai.operation.name', 'gen_ai.provider.name', 'gen_ai.request.model', 'gen_ai.token.type']).toContain(k); }); }); }); - diff --git a/test/semconv.test.ts b/test/semconv.test.ts index 11d2b08..0cceb7b 100644 --- a/test/semconv.test.ts +++ b/test/semconv.test.ts @@ -1,5 +1,6 @@ import * as fs from 'fs'; import * as path from 'path'; +import { parse } from 'yaml'; import { ATTRIBUTE_REGISTRY, assertRegisteredAttributes, @@ -48,4 +49,27 @@ describe('semantic convention registry', () => { } } }); + + it('keeps stable GenAI attributes aligned with the upstream registry fixture', () => { + const fixturePath = path.join(__dirname, 'fixtures', 'otel', 'gen-ai-registry.yaml'); + const registry = parse(fs.readFileSync(fixturePath, 'utf8')) as { attributes?: Array<{ key?: string }> }; + const upstreamKeys = new Set((registry.attributes ?? []) + .map(attribute => attribute.key) + .filter((key): key is string => typeof key === 'string')); + + expect(upstreamKeys).toContain(ATTR.PROVIDER_NAME); + expect(upstreamKeys).not.toContain('gen_ai.system'); + expect(isRegisteredAttribute('gen_ai.system')).toBe(false); + + const stableSpanKeys = ATTRIBUTE_REGISTRY + .filter(spec => spec.source === 'otel-genai') + .filter(spec => spec.stability === 'stable') + .filter(spec => spec.signal !== 'metric') + .map(spec => spec.key) + .filter(key => key.startsWith('gen_ai.')) + .sort(); + const missingFromUpstream = stableSpanKeys.filter(key => !upstreamKeys.has(key)); + + expect(missingFromUpstream).toEqual([]); + }); });