/
/
opt
/
hc_python
/
lib
/
python3.12
/
site-packages
/
sentry_sdk
/
integrations
Server: server63.web-hosting.com (198.54.116.184)
You: 216.73.216.187
PHP 7.4.33
Dir:
/opt/hc_python/lib/python3.12/site-packages/sentry_sdk/integrations
Edit:
/opt/hc_python/lib/python3.12/site-packages/sentry_sdk/integrations/litellm.py
from typing import TYPE_CHECKING import sentry_sdk from sentry_sdk import consts from sentry_sdk.ai.monitoring import record_token_usage from sentry_sdk.ai.utils import ( get_start_span_function, set_data_normalized, truncate_and_annotate_messages, ) from sentry_sdk.consts import SPANDATA from sentry_sdk.integrations import DidNotEnable, Integration from sentry_sdk.scope import should_send_default_pii from sentry_sdk.utils import event_from_exception if TYPE_CHECKING: from typing import Any, Dict from datetime import datetime try: import litellm # type: ignore[import-not-found] except ImportError: raise DidNotEnable("LiteLLM not installed") def _get_metadata_dict(kwargs): # type: (Dict[str, Any]) -> Dict[str, Any] """Get the metadata dictionary from the kwargs.""" litellm_params = kwargs.setdefault("litellm_params", {}) # we need this weird little dance, as metadata might be set but may be None initially metadata = litellm_params.get("metadata") if metadata is None: metadata = {} litellm_params["metadata"] = metadata return metadata def _input_callback(kwargs): # type: (Dict[str, Any]) -> None """Handle the start of a request.""" integration = sentry_sdk.get_client().get_integration(LiteLLMIntegration) if integration is None: return # Get key parameters full_model = kwargs.get("model", "") try: model, provider, _, _ = litellm.get_llm_provider(full_model) except Exception: model = full_model provider = "unknown" call_type = kwargs.get("call_type", None) if call_type == "embedding": operation = "embeddings" else: operation = "chat" # Start a new span/transaction span = get_start_span_function()( op=( consts.OP.GEN_AI_CHAT if operation == "chat" else consts.OP.GEN_AI_EMBEDDINGS ), name=f"{operation} {model}", origin=LiteLLMIntegration.origin, ) span.__enter__() # Store span for later _get_metadata_dict(kwargs)["_sentry_span"] = span # Set basic data set_data_normalized(span, SPANDATA.GEN_AI_SYSTEM, provider) set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, operation) # Record input/messages if allowed if should_send_default_pii() and integration.include_prompts: if operation == "embeddings": # For embeddings, look for the 'input' parameter embedding_input = kwargs.get("input") if embedding_input: scope = sentry_sdk.get_current_scope() # Normalize to list format input_list = ( embedding_input if isinstance(embedding_input, list) else [embedding_input] ) messages_data = truncate_and_annotate_messages(input_list, span, scope) if messages_data is not None: set_data_normalized( span, SPANDATA.GEN_AI_EMBEDDINGS_INPUT, messages_data, unpack=False, ) else: # For chat, look for the 'messages' parameter messages = kwargs.get("messages", []) if messages: scope = sentry_sdk.get_current_scope() messages_data = truncate_and_annotate_messages(messages, span, scope) if messages_data is not None: set_data_normalized( span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data, unpack=False, ) # Record other parameters params = { "model": SPANDATA.GEN_AI_REQUEST_MODEL, "stream": SPANDATA.GEN_AI_RESPONSE_STREAMING, "max_tokens": SPANDATA.GEN_AI_REQUEST_MAX_TOKENS, "presence_penalty": SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY, "frequency_penalty": SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY, "temperature": SPANDATA.GEN_AI_REQUEST_TEMPERATURE, "top_p": SPANDATA.GEN_AI_REQUEST_TOP_P, } for key, attribute in params.items(): value = kwargs.get(key) if value is not None: set_data_normalized(span, attribute, value) # Record LiteLLM-specific parameters litellm_params = { "api_base": kwargs.get("api_base"), "api_version": kwargs.get("api_version"), "custom_llm_provider": kwargs.get("custom_llm_provider"), } for key, value in litellm_params.items(): if value is not None: set_data_normalized(span, f"gen_ai.litellm.{key}", value) def _success_callback(kwargs, completion_response, start_time, end_time): # type: (Dict[str, Any], Any, datetime, datetime) -> None """Handle successful completion.""" span = _get_metadata_dict(kwargs).get("_sentry_span") if span is None: return integration = sentry_sdk.get_client().get_integration(LiteLLMIntegration) if integration is None: return try: # Record model information if hasattr(completion_response, "model"): set_data_normalized( span, SPANDATA.GEN_AI_RESPONSE_MODEL, completion_response.model ) # Record response content if allowed if should_send_default_pii() and integration.include_prompts: if hasattr(completion_response, "choices"): response_messages = [] for choice in completion_response.choices: if hasattr(choice, "message"): if hasattr(choice.message, "model_dump"): response_messages.append(choice.message.model_dump()) elif hasattr(choice.message, "dict"): response_messages.append(choice.message.dict()) else: # Fallback for basic message objects msg = {} if hasattr(choice.message, "role"): msg["role"] = choice.message.role if hasattr(choice.message, "content"): msg["content"] = choice.message.content if hasattr(choice.message, "tool_calls"): msg["tool_calls"] = choice.message.tool_calls response_messages.append(msg) if response_messages: set_data_normalized( span, SPANDATA.GEN_AI_RESPONSE_TEXT, response_messages ) # Record token usage if hasattr(completion_response, "usage"): usage = completion_response.usage record_token_usage( span, input_tokens=getattr(usage, "prompt_tokens", None), output_tokens=getattr(usage, "completion_tokens", None), total_tokens=getattr(usage, "total_tokens", None), ) finally: # Always finish the span and clean up span.__exit__(None, None, None) def _failure_callback(kwargs, exception, start_time, end_time): # type: (Dict[str, Any], Exception, datetime, datetime) -> None """Handle request failure.""" span = _get_metadata_dict(kwargs).get("_sentry_span") if span is None: return try: # Capture the exception event, hint = event_from_exception( exception, client_options=sentry_sdk.get_client().options, mechanism={"type": "litellm", "handled": False}, ) sentry_sdk.capture_event(event, hint=hint) finally: # Always finish the span and clean up span.__exit__(type(exception), exception, None) class LiteLLMIntegration(Integration): """ LiteLLM integration for Sentry. This integration automatically captures LiteLLM API calls and sends them to Sentry for monitoring and error tracking. It supports all 100+ LLM providers that LiteLLM supports, including OpenAI, Anthropic, Google, Cohere, and many others. Features: - Automatic exception capture for all LiteLLM calls - Token usage tracking across all providers - Provider detection and attribution - Input/output message capture (configurable) - Streaming response support - Cost tracking integration Usage: ```python import litellm import sentry_sdk # Initialize Sentry with the LiteLLM integration sentry_sdk.init( dsn="your-dsn", send_default_pii=True integrations=[ sentry_sdk.integrations.LiteLLMIntegration( include_prompts=True # Set to False to exclude message content ) ] ) # All LiteLLM calls will now be monitored response = litellm.completion( model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hello!"}] ) ``` Configuration: - include_prompts (bool): Whether to include prompts and responses in spans. Defaults to True. Set to False to exclude potentially sensitive data. """ identifier = "litellm" origin = f"auto.ai.{identifier}" def __init__(self, include_prompts=True): # type: (LiteLLMIntegration, bool) -> None self.include_prompts = include_prompts @staticmethod def setup_once(): # type: () -> None """Set up LiteLLM callbacks for monitoring.""" litellm.input_callback = litellm.input_callback or [] if _input_callback not in litellm.input_callback: litellm.input_callback.append(_input_callback) litellm.success_callback = litellm.success_callback or [] if _success_callback not in litellm.success_callback: litellm.success_callback.append(_success_callback) litellm.failure_callback = litellm.failure_callback or [] if _failure_callback not in litellm.failure_callback: litellm.failure_callback.append(_failure_callback)
Ukuran: 10.0 KB