From 0d5d63feb73b2f6fb7e7185549e1a70d4828d706 Mon Sep 17 00:00:00 2001 From: MemOS AutoDev Date: Mon, 29 Jun 2026 23:04:08 +0800 Subject: [PATCH] fix: split chat-extraction prompt into system+user messages (#1269) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The mem_reader chat path was sending the entire prompt (instructions + examples + ``Conversation:\n\n\nYour Output:``) as a single ``user`` role message. Weak instruction-following LLMs (qwen2.5:1.5b, phi4-mini, etc.) then continued the trailing ``user: ...`` lines as if they were the live conversation, replying to the user's last message instead of emitting the structured JSON. The resulting ``summary`` field (surfaced as ``TreeNodeTextualMemoryMetadata.background`` in the Memos UI memory summary) became a chat reply, not a summary. Introduce ``build_chat_extraction_messages`` in ``src/memos/mem_reader/utils.py`` that splits the rendered prompt at the ``Conversation:`` / ``对话:`` marker, placing instructions / examples / format spec in a ``system`` message and the conversation block (plus a strict ``Return ONLY the single JSON object... do not reply to or continue the conversation`` trailer) in the ``user`` message. Doc / general_string templates without the marker fall back to a single ``user`` message so non-chat callers are unaffected. Rewire the three chat call sites (``SimpleStructMemReader``, ``StrategyStructMemReader``, ``MultiModalMemReader``) to use the new helper and add regression tests covering both the system+user shape and the fallback case. Refs: https://github.com/MemTensor/MemOS/issues/1269 --- src/memos/mem_reader/multi_modal_struct.py | 9 +++- src/memos/mem_reader/simple_struct.py | 8 ++- src/memos/mem_reader/strategy_struct.py | 5 +- src/memos/mem_reader/utils.py | 56 +++++++++++++++++++ tests/mem_reader/test_simple_structure.py | 63 +++++++++++++++++++++- 5 files changed, 136 insertions(+), 5 deletions(-) diff --git a/src/memos/mem_reader/multi_modal_struct.py b/src/memos/mem_reader/multi_modal_struct.py index da35a4656..477ce11f1 100644 --- a/src/memos/mem_reader/multi_modal_struct.py +++ b/src/memos/mem_reader/multi_modal_struct.py @@ -14,7 +14,7 @@ from memos.mem_reader.read_pref_memory.process_preference_memory import process_preference_fine from memos.mem_reader.read_skill_memory.process_skill_memory import process_skill_memory_fine from memos.mem_reader.simple_struct import PROMPT_DICT, SimpleStructMemReader -from memos.mem_reader.utils import parse_json_result +from memos.mem_reader.utils import build_chat_extraction_messages, parse_json_result from memos.memories.textual.item import TextualMemoryItem, TreeNodeTextualMemoryMetadata from memos.plugins.hook_defs import H from memos.plugins.hooks import trigger_hook, trigger_single_hook @@ -490,7 +490,12 @@ def _get_llm_response( logger.info(f"[MultiModalParser] Process String Fine Prompt: {prompt}") - messages = [{"role": "user", "content": prompt}] + # Split into system + user messages for chat-mode prompts so weak + # LLMs don't continue the embedded ``user: ...`` conversation + # (issue #1269). Doc / general_string templates have no + # ``Conversation:`` marker; the helper passes them through as a + # single ``user`` message — call shape preserved. + messages = build_chat_extraction_messages(prompt) try: response_text = self.llm.generate(messages) response_json = parse_json_result(response_text) diff --git a/src/memos/mem_reader/simple_struct.py b/src/memos/mem_reader/simple_struct.py index 3c82350df..9c61a6ba5 100644 --- a/src/memos/mem_reader/simple_struct.py +++ b/src/memos/mem_reader/simple_struct.py @@ -25,6 +25,7 @@ from memos.types.general_types import UserContext from memos.mem_reader.read_multi_modal import coerce_scene_data, detect_lang from memos.mem_reader.utils import ( + build_chat_extraction_messages, count_tokens_text, derive_key, parse_json_result, @@ -280,7 +281,12 @@ def _get_llm_response(self, mem_str: str, custom_tags: list[str] | None) -> dict if self.config.remove_prompt_example: prompt = prompt.replace(examples, "") - messages = [{"role": "user", "content": prompt}] + # Split into system + user messages so weak instruction-following + # LLMs do not interpret the embedded ``user: ...`` lines at the + # tail of the prompt as a chat they should continue, which would + # cause ``summary`` to be a chat reply instead of a real summary + # (issue #1269). + messages = build_chat_extraction_messages(prompt) response_text = self._safe_generate(messages) response_json = self._safe_parse(response_text) diff --git a/src/memos/mem_reader/strategy_struct.py b/src/memos/mem_reader/strategy_struct.py index d550d89e9..9d216d67a 100644 --- a/src/memos/mem_reader/strategy_struct.py +++ b/src/memos/mem_reader/strategy_struct.py @@ -7,6 +7,7 @@ from memos.configs.parser import ParserConfigFactory from memos.mem_reader.read_multi_modal import detect_lang from memos.mem_reader.simple_struct import SimpleStructMemReader +from memos.mem_reader.utils import build_chat_extraction_messages from memos.parsers.factory import ParserFactory from memos.templates.mem_reader_prompts import ( CUSTOM_TAGS_INSTRUCTION, @@ -57,7 +58,9 @@ def _get_llm_response(self, mem_str: str, custom_tags: list[str] | None) -> dict if self.config.remove_prompt_example: # TODO unused prompt = prompt.replace(examples, "") - messages = [{"role": "user", "content": prompt}] + # Split into system + user messages — see simple_struct for the + # rationale (issue #1269). + messages = build_chat_extraction_messages(prompt) try: response_text = self.llm.generate(messages) response_json = self.parse_json_result(response_text) diff --git a/src/memos/mem_reader/utils.py b/src/memos/mem_reader/utils.py index c08e8a4ed..29694ee22 100644 --- a/src/memos/mem_reader/utils.py +++ b/src/memos/mem_reader/utils.py @@ -35,6 +35,62 @@ def derive_key(text: str, max_len: int = 80) -> str: return (sent[:max_len]).strip() +# Markers that introduce the rendered conversation block inside the +# mem_reader chat-extraction prompt templates (English + Chinese). +_CHAT_CONVO_MARKERS: tuple[str, ...] = ("\nConversation:\n", "\n对话:\n") +_CHAT_OUTPUT_SUFFIXES: tuple[str, ...] = ("Your Output:", "您的输出:") +_CHAT_JSON_ONLY_TRAILER: str = ( + "\n\nReturn ONLY the single JSON object described above. " + "Do not reply to or continue the conversation." +) + + +def build_chat_extraction_messages(prompt: str) -> list[dict[str, str]]: + """Split a rendered chat-extraction prompt into ``system`` + ``user`` messages. + + The mem_reader chat prompt templates end with:: + + ... + Conversation: + + + Your Output: + + Sending this whole block as a single ``user`` role message causes weak + instruction-following LLMs (small Ollama models, qwen2.5:1.5b, + phi4-mini, etc.) to *continue* the trailing ``user: ...`` lines as if + they were the live conversation, replying to the user's last message + instead of emitting the structured JSON. Putting the instructions / + examples / format spec in a ``system`` message and only the + conversation block (plus an explicit "JSON only, do not reply" + trailer) in the ``user`` message restores instruction-following on + those models (see issue #1269). + + If no conversation marker is found (doc / general_string templates or + a custom caller), fall back to a single ``user`` message so non-chat + call sites are unaffected. + """ + if not prompt: + return [{"role": "user", "content": prompt or ""}] + for marker in _CHAT_CONVO_MARKERS: + idx = prompt.find(marker) + if idx == -1: + continue + system_part = prompt[:idx].rstrip() + # Keep the leading newline trimmed; preserve the "Conversation:" + # header on the user side so the model still sees a familiar label. + user_part = prompt[idx + 1 :].rstrip() + for suffix in _CHAT_OUTPUT_SUFFIXES: + if user_part.endswith(suffix): + user_part = user_part[: -len(suffix)].rstrip() + user_part = f"{user_part}{_CHAT_JSON_ONLY_TRAILER}" + return [ + {"role": "system", "content": system_part}, + {"role": "user", "content": user_part}, + ] + return [{"role": "user", "content": prompt}] + + def parse_json_result(response_text: str) -> dict: s = (response_text or "").strip() diff --git a/tests/mem_reader/test_simple_structure.py b/tests/mem_reader/test_simple_structure.py index 8243cc96e..e1924f847 100644 --- a/tests/mem_reader/test_simple_structure.py +++ b/tests/mem_reader/test_simple_structure.py @@ -7,7 +7,7 @@ from memos.embedders.factory import EmbedderFactory from memos.llms.factory import LLMFactory from memos.mem_reader.simple_struct import SimpleStructMemReader -from memos.mem_reader.utils import parse_json_result +from memos.mem_reader.utils import build_chat_extraction_messages, parse_json_result from memos.memories.textual.item import TextualMemoryItem @@ -118,6 +118,67 @@ def test_parse_json_result_failure(self): self.assertEqual(result, {}) + def test_get_llm_response_uses_system_and_user_messages(self): + """Regression for #1269: weak models reply to the conversation + instead of summarising because the entire prompt (instructions + + examples + conversation + ``Your Output:`` trailer) was being sent + as a single ``user`` message. The chat extraction path must split + the prompt into a system message (instructions / examples / format) + and a user message (conversation block + JSON-only trailer). + """ + captured: list[list[dict]] = [] + + def fake_generate(messages): + captured.append(messages) + return ( + '{"memory list": [{"key": "k", "memory_type": "UserMemory", ' + '"value": "v", "tags": []}], "summary": "real summary"}' + ) + + self.reader.llm.generate.side_effect = fake_generate + self.reader.embedder.embed.return_value = [[0.0]] + + self.reader._get_llm_response( + "user: [2025-06-29 10:00]: Hello, how are you?\n" + "assistant: I'm fine, thanks. And you?\n" + "user: [2025-06-29 10:01]: Pretty good.\n", + custom_tags=None, + ) + + assert captured, "LLM was not called" + messages = captured[0] + roles = [m["role"] for m in messages] + self.assertEqual( + roles[:2], + ["system", "user"], + f"chat extraction must send a system+user pair, got {roles!r}", + ) + + system_content = messages[0]["content"] + user_content = messages[1]["content"] + + # Instructions / examples / format spec must live in the system + # message, not the user message. + self.assertIn("memory extraction expert", system_content) + self.assertIn('"summary"', system_content) + self.assertNotIn("memory extraction expert", user_content) + + # User message must carry the conversation block + an explicit + # "JSON only, do not reply" trailer that prevents small models + # from continuing the embedded chat. + self.assertIn("Conversation:", user_content) + self.assertIn("Pretty good.", user_content) + self.assertIn("JSON", user_content) + self.assertNotIn("Your Output:", user_content) + + def test_build_chat_extraction_messages_fallback(self): + """When no Conversation: marker is present (doc / general string + templates), the helper falls back to a single user message so that + callers outside the chat path are unaffected. + """ + msgs = build_chat_extraction_messages("plain prompt without marker") + self.assertEqual(msgs, [{"role": "user", "content": "plain prompt without marker"}]) + if __name__ == "__main__": unittest.main()