11"""Common attribute processing utilities shared across all instrumentors.
22
3+ This utility ensures consistent attribute extraction and transformation across different
4+ instrumentation use cases.
5+
36This module provides core utilities for extracting and formatting
47OpenTelemetry-compatible attributes from span data. These functions
58are provider-agnostic and used by all instrumentors in the AgentOps
1922These utilities ensure consistent attribute handling across different
2023LLM service instrumentors while maintaining separation of concerns.
2124"""
22- from typing import Dict , Any , Optional , List
25+ from typing import runtime_checkable , Protocol , Any , Optional , Dict , TypedDict
2326from agentops .logging import logger
2427from agentops .helpers import safe_serialize , get_agentops_version
2528from agentops .semconv import (
2831 WorkflowAttributes ,
2932)
3033
31- # target_attribute_key: source_attribute
32- AttributeMap = Dict [str , Any ]
34+
35+ # `AttributeMap` is a dictionary that maps target attribute keys to source attribute keys.
36+ # It is used to extract and transform attributes from a span or trace data object
37+ # into a standardized format following OpenTelemetry semantic conventions.
38+ #
39+ # Key-Value Format:
40+ # - Key (str): The target attribute key in the standardized output format
41+ # - Value (str): The source attribute key in the input data object
42+ #
43+ # Example Usage:
44+ # --------------
45+ #
46+ # Create your mapping:
47+ # attribute_mapping: AttributeMap = {
48+ # CoreAttributes.TRACE_ID: "trace_id",
49+ # CoreAttributes.SPAN_ID: "span_id"
50+ # }
51+ #
52+ # Extract the attributes:
53+ # span_data = {
54+ # "trace_id": "12345",
55+ # "span_id": "67890",
56+ # }
57+ #
58+ # attributes = _extract_attributes_from_mapping(span_data, attribute_mapping)
59+ # # >> {"trace.id": "12345", "span.id": "67890"}
60+ AttributeMap = Dict [str , str ] # target_attribute_key: source_attribute
61+
62+
63+ # `IndexedAttributeMap` differs from `AttributeMap` in that it allows for dynamic formatting of
64+ # target attribute keys using indices `i` and optionally `j`. This is particularly useful
65+ # when dealing with collections of similar attributes that should be uniquely identified
66+ # in the output.
67+ #
68+ # Key-Value Format:
69+ # - Key (IndexedAttribute): An object implementing the IndexedAttribute protocol with a format method
70+ # - Value (str): The source attribute key in the input data object
71+ #
72+ # Example Usage:
73+ # --------------
74+ #
75+ # Create your mapping:
76+ # attribute_mapping: IndexedAttributeMap = {
77+ # MessageAttributes.TOOL_CALL_ID: "id",
78+ # MessageAttributes.TOOL_CALL_TYPE: "type"
79+ # }
80+ #
81+ # Process tool calls:
82+ # span_data = {
83+ # "id": "tool_1",
84+ # "type": "search",
85+ # }
86+ #
87+ # attributes = _extract_attributes_from_mapping_with_index(
88+ # span_data, attribute_mapping, i=0)
89+ # # >> {"gen_ai.request.tools.0.id": "tool_1", "gen_ai.request.tools.0.type": "search"}
90+
91+ @runtime_checkable
92+ class IndexedAttribute (Protocol ):
93+ """
94+ Protocol for objects that define a method to format indexed attributes using
95+ only the provided indices `i` and optionally `j`. This allows for dynamic
96+ formatting of attribute keys based on the indices.
97+ """
98+
99+ def format (self , * , i : int , j : Optional [int ] = None ) -> str :
100+ ...
101+
102+ IndexedAttributeMap = Dict [IndexedAttribute , str ] # target_attribute_key: source_attribute
103+
104+
105+ class IndexedAttributeData (TypedDict , total = False ):
106+ """
107+ Represents a dictionary structure for indexed attribute data.
108+
109+ Attributes:
110+ i (int): The primary index value. This field is required.
111+ j (Optional[int]): An optional secondary index value.
112+ """
113+ i : int
114+ j : Optional [int ] = None
33115
34116
35117def _extract_attributes_from_mapping (span_data : Any , attribute_mapping : AttributeMap ) -> AttributeMap :
36118 """Helper function to extract attributes based on a mapping.
37-
119+
38120 Args:
39121 span_data: The span data object or dict to extract attributes from
40122 attribute_mapping: Dictionary mapping target attributes to source attributes
41-
123+
42124 Returns:
43125 Dictionary of extracted attributes
44126 """
@@ -56,19 +138,48 @@ def _extract_attributes_from_mapping(span_data: Any, attribute_mapping: Attribut
56138 # Skip if value is None or empty
57139 if value is None or (isinstance (value , (list , dict , str )) and not value ):
58140 continue
59-
141+
60142 # Serialize complex objects
61143 elif isinstance (value , (dict , list , object )) and not isinstance (value , (str , int , float , bool )):
62144 value = safe_serialize (value )
63-
145+
64146 attributes [target_attr ] = value
65-
147+
66148 return attributes
67149
68150
151+ def _extract_attributes_from_mapping_with_index (span_data : Any , attribute_mapping : IndexedAttributeMap , i : int , j : Optional [int ] = None ) -> AttributeMap :
152+ """Helper function to extract attributes based on a mapping with index.
153+
154+ This function extends `_extract_attributes_from_mapping` by allowing for indexed keys in the attribute mapping.
155+
156+ Span data is expected to have keys which contain format strings for i/j, e.g. `my_attr_{i}` or `my_attr_{i}_{j}`.
157+
158+ Args:
159+ span_data: The span data object or dict to extract attributes from
160+ attribute_mapping: Dictionary mapping target attributes to source attributes, with format strings for i/j
161+ i: The primary index to use in formatting the attribute keys
162+ j: An optional secondary index (default is None)
163+ Returns:
164+ Dictionary of extracted attributes with formatted indexed keys.
165+ """
166+
167+ # `i` is required for formatting the attribute keys, `j` is optional
168+ format_kwargs : IndexedAttributeData = {'i' : i }
169+ if j is not None :
170+ format_kwargs ['j' ] = j
171+
172+ # Update the attribute mapping to include the index for the span
173+ attribute_mapping_with_index : AttributeMap = {}
174+ for target_attr , source_attr in attribute_mapping .items ():
175+ attribute_mapping_with_index [target_attr .format (** format_kwargs )] = source_attr
176+
177+ return _extract_attributes_from_mapping (span_data , attribute_mapping_with_index )
178+
179+
69180def get_common_attributes () -> AttributeMap :
70181 """Get common instrumentation attributes used across traces and spans.
71-
182+
72183 Returns:
73184 Dictionary of common instrumentation attributes
74185 """
@@ -80,58 +191,58 @@ def get_common_attributes() -> AttributeMap:
80191
81192def get_base_trace_attributes (trace : Any ) -> AttributeMap :
82193 """Create the base attributes dictionary for an OpenTelemetry trace.
83-
194+
84195 Args:
85196 trace: The trace object to extract attributes from
86-
197+
87198 Returns:
88199 Dictionary containing base trace attributes
89200 """
90- if not hasattr (trace , ' trace_id' ):
201+ if not hasattr (trace , " trace_id" ):
91202 logger .warning ("Cannot create trace attributes: missing trace_id" )
92203 return {}
93-
204+
94205 attributes = {
95206 WorkflowAttributes .WORKFLOW_NAME : trace .name ,
96207 CoreAttributes .TRACE_ID : trace .trace_id ,
97208 WorkflowAttributes .WORKFLOW_STEP_TYPE : "trace" ,
98209 ** get_common_attributes (),
99210 }
100-
211+
101212 # Add tags from the config to the trace attributes (these should only be added to the trace)
102213 from agentops import get_client
103-
214+
104215 config = get_client ().config
105216 tags = []
106217 if config .default_tags :
107218 # `default_tags` can either be a `set` or a `list`
108219 tags = list (config .default_tags )
109-
220+
110221 attributes [CoreAttributes .TAGS ] = tags
111-
222+
112223 return attributes
113224
114225
115226def get_base_span_attributes (span : Any ) -> AttributeMap :
116227 """Create the base attributes dictionary for an OpenTelemetry span.
117-
228+
118229 Args:
119230 span: The span object to extract attributes from
120-
231+
121232 Returns:
122233 Dictionary containing base span attributes
123234 """
124- span_id = getattr (span , ' span_id' , ' unknown' )
125- trace_id = getattr (span , ' trace_id' , ' unknown' )
126- parent_id = getattr (span , ' parent_id' , None )
127-
235+ span_id = getattr (span , " span_id" , " unknown" )
236+ trace_id = getattr (span , " trace_id" , " unknown" )
237+ parent_id = getattr (span , " parent_id" , None )
238+
128239 attributes = {
129240 CoreAttributes .TRACE_ID : trace_id ,
130241 CoreAttributes .SPAN_ID : span_id ,
131242 ** get_common_attributes (),
132243 }
133-
244+
134245 if parent_id :
135246 attributes [CoreAttributes .PARENT_ID ] = parent_id
136-
137- return attributes
247+
248+ return attributes
0 commit comments