From 9ce1ae5d1197432aca1d880832f0cf72b41bc798 Mon Sep 17 00:00:00 2001 From: Alexander Alderman Webb Date: Tue, 21 Apr 2026 08:46:26 +0200 Subject: [PATCH] ref(langchain): Revert input truncation --- sentry_sdk/integrations/langchain.py | 61 +++++---------- .../integrations/langchain/test_langchain.py | 78 ------------------- 2 files changed, 20 insertions(+), 119 deletions(-) diff --git a/sentry_sdk/integrations/langchain.py b/sentry_sdk/integrations/langchain.py index 52a7fe6695..438137becf 100644 --- a/sentry_sdk/integrations/langchain.py +++ b/sentry_sdk/integrations/langchain.py @@ -14,7 +14,6 @@ get_start_span_function, normalize_message_roles, set_data_normalized, - truncate_and_annotate_messages, ) from sentry_sdk.consts import OP, SPANDATA from sentry_sdk.integrations import DidNotEnable, Integration @@ -377,17 +376,12 @@ def on_llm_start( } for prompt in prompts ] - scope = sentry_sdk.get_current_scope() - messages_data = truncate_and_annotate_messages( - normalized_messages, span, scope + set_data_normalized( + span, + SPANDATA.GEN_AI_REQUEST_MESSAGES, + normalized_messages, + unpack=False, ) - if messages_data is not None: - set_data_normalized( - span, - SPANDATA.GEN_AI_REQUEST_MESSAGES, - messages_data, - unpack=False, - ) def on_chat_model_start( self: "SentryLangchainCallback", @@ -457,17 +451,12 @@ def on_chat_model_start( self._normalize_langchain_message(message) ) normalized_messages = normalize_message_roles(normalized_messages) - scope = sentry_sdk.get_current_scope() - messages_data = truncate_and_annotate_messages( - normalized_messages, span, scope + set_data_normalized( + span, + SPANDATA.GEN_AI_REQUEST_MESSAGES, + normalized_messages, + unpack=False, ) - if messages_data is not None: - set_data_normalized( - span, - SPANDATA.GEN_AI_REQUEST_MESSAGES, - messages_data, - unpack=False, - ) def on_chat_model_end( self: "SentryLangchainCallback", @@ -979,17 +968,12 @@ def new_invoke(self: "Any", *args: "Any", **kwargs: "Any") -> "Any": and integration.include_prompts ): normalized_messages = normalize_message_roles([input]) - scope = sentry_sdk.get_current_scope() - messages_data = truncate_and_annotate_messages( - normalized_messages, span, scope + set_data_normalized( + span, + SPANDATA.GEN_AI_REQUEST_MESSAGES, + normalized_messages, + unpack=False, ) - if messages_data is not None: - set_data_normalized( - span, - SPANDATA.GEN_AI_REQUEST_MESSAGES, - messages_data, - unpack=False, - ) output = result.get("output") if ( @@ -1041,17 +1025,12 @@ def new_stream(self: "Any", *args: "Any", **kwargs: "Any") -> "Any": and integration.include_prompts ): normalized_messages = normalize_message_roles([input]) - scope = sentry_sdk.get_current_scope() - messages_data = truncate_and_annotate_messages( - normalized_messages, span, scope + set_data_normalized( + span, + SPANDATA.GEN_AI_REQUEST_MESSAGES, + normalized_messages, + unpack=False, ) - if messages_data is not None: - set_data_normalized( - span, - SPANDATA.GEN_AI_REQUEST_MESSAGES, - messages_data, - unpack=False, - ) # Run the agent result = f(self, *args, **kwargs) diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py index 319b96a06a..2fd7b953e8 100644 --- a/tests/integrations/langchain/test_langchain.py +++ b/tests/integrations/langchain/test_langchain.py @@ -1290,84 +1290,6 @@ def test_langchain_message_role_normalization_units(): assert normalized[5] == "string message" # String message unchanged -def test_langchain_message_truncation(sentry_init, capture_items): - """Test that large messages are truncated properly in Langchain integration.""" - from langchain_core.outputs import LLMResult, Generation - - sentry_init( - integrations=[LangchainIntegration(include_prompts=True)], - traces_sample_rate=1.0, - send_default_pii=True, - ) - items = capture_items("transaction", "span") - - callback = SentryLangchainCallback(max_span_map_size=100, include_prompts=True) - - run_id = "12345678-1234-1234-1234-123456789012" - serialized = {"_type": "openai-chat", "model_name": "gpt-3.5-turbo"} - - large_content = ( - "This is a very long message that will exceed our size limits. " * 1000 - ) - prompts = [ - "small message 1", - large_content, - large_content, - "small message 4", - "small message 5", - ] - - with start_transaction(): - callback.on_llm_start( - serialized=serialized, - prompts=prompts, - run_id=run_id, - name="my_pipeline", - invocation_params={ - "temperature": 0.7, - "max_tokens": 100, - "model": "gpt-3.5-turbo", - }, - ) - - response = LLMResult( - generations=[[Generation(text="The response")]], - llm_output={ - "token_usage": { - "total_tokens": 25, - "prompt_tokens": 10, - "completion_tokens": 15, - } - }, - ) - callback.on_llm_end(response=response, run_id=run_id) - - tx = next(item.payload for item in items if item.type == "transaction") - assert tx["type"] == "transaction" - - spans = [item.payload for item in items if item.type == "span"] - llm_spans = [ - span - for span in spans - if span["attributes"].get("sentry.op") == "gen_ai.text_completion" - ] - assert len(llm_spans) > 0 - - llm_span = llm_spans[0] - assert llm_span["attributes"]["gen_ai.operation.name"] == "text_completion" - assert llm_span["attributes"][SPANDATA.GEN_AI_PIPELINE_NAME] == "my_pipeline" - - assert SPANDATA.GEN_AI_REQUEST_MESSAGES in llm_span["attributes"] - messages_data = llm_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - assert isinstance(messages_data, str) - - parsed_messages = json.loads(messages_data) - assert isinstance(parsed_messages, list) - assert len(parsed_messages) == 1 - assert "small message 5" in str(parsed_messages[0]) - assert tx["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 5 - - @pytest.mark.parametrize( "send_default_pii, include_prompts", [