Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions src/memos/mem_reader/multi_modal_struct.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from memos.mem_reader.read_pref_memory.process_preference_memory import process_preference_fine
from memos.mem_reader.read_skill_memory.process_skill_memory import process_skill_memory_fine
from memos.mem_reader.simple_struct import PROMPT_DICT, SimpleStructMemReader
from memos.mem_reader.utils import parse_json_result
from memos.mem_reader.utils import build_chat_extraction_messages, parse_json_result
from memos.memories.textual.item import TextualMemoryItem, TreeNodeTextualMemoryMetadata
from memos.plugins.hook_defs import H
from memos.plugins.hooks import trigger_hook, trigger_single_hook
Expand Down Expand Up @@ -490,7 +490,12 @@ def _get_llm_response(

logger.info(f"[MultiModalParser] Process String Fine Prompt: {prompt}")

messages = [{"role": "user", "content": prompt}]
# Split into system + user messages for chat-mode prompts so weak
# LLMs don't continue the embedded ``user: ...`` conversation
# (issue #1269). Doc / general_string templates have no
# ``Conversation:`` marker; the helper passes them through as a
# single ``user`` message — call shape preserved.
messages = build_chat_extraction_messages(prompt)
try:
response_text = self.llm.generate(messages)
response_json = parse_json_result(response_text)
Expand Down
8 changes: 7 additions & 1 deletion src/memos/mem_reader/simple_struct.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from memos.types.general_types import UserContext
from memos.mem_reader.read_multi_modal import coerce_scene_data, detect_lang
from memos.mem_reader.utils import (
build_chat_extraction_messages,
count_tokens_text,
derive_key,
parse_json_result,
Expand Down Expand Up @@ -280,7 +281,12 @@ def _get_llm_response(self, mem_str: str, custom_tags: list[str] | None) -> dict

if self.config.remove_prompt_example:
prompt = prompt.replace(examples, "")
messages = [{"role": "user", "content": prompt}]
# Split into system + user messages so weak instruction-following
# LLMs do not interpret the embedded ``user: ...`` lines at the
# tail of the prompt as a chat they should continue, which would
# cause ``summary`` to be a chat reply instead of a real summary
# (issue #1269).
messages = build_chat_extraction_messages(prompt)

response_text = self._safe_generate(messages)
response_json = self._safe_parse(response_text)
Expand Down
5 changes: 4 additions & 1 deletion src/memos/mem_reader/strategy_struct.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from memos.configs.parser import ParserConfigFactory
from memos.mem_reader.read_multi_modal import detect_lang
from memos.mem_reader.simple_struct import SimpleStructMemReader
from memos.mem_reader.utils import build_chat_extraction_messages
from memos.parsers.factory import ParserFactory
from memos.templates.mem_reader_prompts import (
CUSTOM_TAGS_INSTRUCTION,
Expand Down Expand Up @@ -57,7 +58,9 @@ def _get_llm_response(self, mem_str: str, custom_tags: list[str] | None) -> dict

if self.config.remove_prompt_example: # TODO unused
prompt = prompt.replace(examples, "")
messages = [{"role": "user", "content": prompt}]
# Split into system + user messages — see simple_struct for the
# rationale (issue #1269).
messages = build_chat_extraction_messages(prompt)
try:
response_text = self.llm.generate(messages)
response_json = self.parse_json_result(response_text)
Expand Down
56 changes: 56 additions & 0 deletions src/memos/mem_reader/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,62 @@ def derive_key(text: str, max_len: int = 80) -> str:
return (sent[:max_len]).strip()


# Markers that introduce the rendered conversation block inside the
# mem_reader chat-extraction prompt templates (English + Chinese).
_CHAT_CONVO_MARKERS: tuple[str, ...] = ("\nConversation:\n", "\n对话:\n")
_CHAT_OUTPUT_SUFFIXES: tuple[str, ...] = ("Your Output:", "您的输出:")
_CHAT_JSON_ONLY_TRAILER: str = (
"\n\nReturn ONLY the single JSON object described above. "
"Do not reply to or continue the conversation."
)


def build_chat_extraction_messages(prompt: str) -> list[dict[str, str]]:
"""Split a rendered chat-extraction prompt into ``system`` + ``user`` messages.

The mem_reader chat prompt templates end with::

...
Conversation:
<rendered conversation>

Your Output:

Sending this whole block as a single ``user`` role message causes weak
instruction-following LLMs (small Ollama models, qwen2.5:1.5b,
phi4-mini, etc.) to *continue* the trailing ``user: ...`` lines as if
they were the live conversation, replying to the user's last message
instead of emitting the structured JSON. Putting the instructions /
examples / format spec in a ``system`` message and only the
conversation block (plus an explicit "JSON only, do not reply"
trailer) in the ``user`` message restores instruction-following on
those models (see issue #1269).

If no conversation marker is found (doc / general_string templates or
a custom caller), fall back to a single ``user`` message so non-chat
call sites are unaffected.
"""
if not prompt:
return [{"role": "user", "content": prompt or ""}]
for marker in _CHAT_CONVO_MARKERS:
idx = prompt.find(marker)
if idx == -1:
continue
system_part = prompt[:idx].rstrip()
# Keep the leading newline trimmed; preserve the "Conversation:"
# header on the user side so the model still sees a familiar label.
user_part = prompt[idx + 1 :].rstrip()
for suffix in _CHAT_OUTPUT_SUFFIXES:
if user_part.endswith(suffix):
user_part = user_part[: -len(suffix)].rstrip()
user_part = f"{user_part}{_CHAT_JSON_ONLY_TRAILER}"
return [
{"role": "system", "content": system_part},
{"role": "user", "content": user_part},
]
return [{"role": "user", "content": prompt}]


def parse_json_result(response_text: str) -> dict:
s = (response_text or "").strip()

Expand Down
63 changes: 62 additions & 1 deletion tests/mem_reader/test_simple_structure.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from memos.embedders.factory import EmbedderFactory
from memos.llms.factory import LLMFactory
from memos.mem_reader.simple_struct import SimpleStructMemReader
from memos.mem_reader.utils import parse_json_result
from memos.mem_reader.utils import build_chat_extraction_messages, parse_json_result
from memos.memories.textual.item import TextualMemoryItem


Expand Down Expand Up @@ -118,6 +118,67 @@ def test_parse_json_result_failure(self):

self.assertEqual(result, {})

def test_get_llm_response_uses_system_and_user_messages(self):
"""Regression for #1269: weak models reply to the conversation
instead of summarising because the entire prompt (instructions +
examples + conversation + ``Your Output:`` trailer) was being sent
as a single ``user`` message. The chat extraction path must split
the prompt into a system message (instructions / examples / format)
and a user message (conversation block + JSON-only trailer).
"""
captured: list[list[dict]] = []

def fake_generate(messages):
captured.append(messages)
return (
'{"memory list": [{"key": "k", "memory_type": "UserMemory", '
'"value": "v", "tags": []}], "summary": "real summary"}'
)

self.reader.llm.generate.side_effect = fake_generate
self.reader.embedder.embed.return_value = [[0.0]]

self.reader._get_llm_response(
"user: [2025-06-29 10:00]: Hello, how are you?\n"
"assistant: I'm fine, thanks. And you?\n"
"user: [2025-06-29 10:01]: Pretty good.\n",
custom_tags=None,
)

assert captured, "LLM was not called"
messages = captured[0]
roles = [m["role"] for m in messages]
self.assertEqual(
roles[:2],
["system", "user"],
f"chat extraction must send a system+user pair, got {roles!r}",
)

system_content = messages[0]["content"]
user_content = messages[1]["content"]

# Instructions / examples / format spec must live in the system
# message, not the user message.
self.assertIn("memory extraction expert", system_content)
self.assertIn('"summary"', system_content)
self.assertNotIn("memory extraction expert", user_content)

# User message must carry the conversation block + an explicit
# "JSON only, do not reply" trailer that prevents small models
# from continuing the embedded chat.
self.assertIn("Conversation:", user_content)
self.assertIn("Pretty good.", user_content)
self.assertIn("JSON", user_content)
self.assertNotIn("Your Output:", user_content)

def test_build_chat_extraction_messages_fallback(self):
"""When no Conversation: marker is present (doc / general string
templates), the helper falls back to a single user message so that
callers outside the chat path are unaffected.
"""
msgs = build_chat_extraction_messages("plain prompt without marker")
self.assertEqual(msgs, [{"role": "user", "content": "plain prompt without marker"}])


if __name__ == "__main__":
unittest.main()
Loading