From 034c621f2e38ddb983f08be99770fb75169143f6 Mon Sep 17 00:00:00 2001 From: autodev Date: Tue, 30 Jun 2026 11:56:45 +0800 Subject: [PATCH] fix(mem_reader): use "memory list" key in LLM-unparseable fallback (#1355) When the LLM response could not be parsed as JSON, SimpleStructMemReader._get_llm_response returned a salvage dict whose sole memory item was keyed by "memory_list" (with an underscore). Downstream consumers in _process_chat_data / _process_transfer_chat_data read the item via `resp.get("memory list", [])` (with a space), so the salvaged UserMemory was silently dropped. Consequence: /product/add (sync mode) returned 200 with "Memory added successfully" while writing zero nodes to Neo4j / Qdrant. The typo was introduced by PR #913 (commit d7aaf57e) while refactoring the LLM exception path; both sibling readers (multi_modal_struct.py, strategy_struct.py) kept the original "memory list" spelling. Fix: restore the "memory list" key in the fallback and document the contract in an inline comment. Add two regression tests covering the unit-level fallback shape and the reader-stage end-to-end path. Closes #1355 --- src/memos/mem_reader/simple_struct.py | 8 ++- tests/mem_reader/test_simple_structure.py | 60 +++++++++++++++++++++++ 2 files changed, 67 insertions(+), 1 deletion(-) diff --git a/src/memos/mem_reader/simple_struct.py b/src/memos/mem_reader/simple_struct.py index 3c82350df..01f9ec6e8 100644 --- a/src/memos/mem_reader/simple_struct.py +++ b/src/memos/mem_reader/simple_struct.py @@ -286,8 +286,14 @@ def _get_llm_response(self, mem_str: str, custom_tags: list[str] | None) -> dict response_json = self._safe_parse(response_text) if not response_json: + # NOTE: the key MUST be ``"memory list"`` (with a space) — the + # downstream consumers in ``_process_chat_data`` / + # ``_process_transfer_chat_data`` read via + # ``resp.get("memory list", [])``. A typo here drops the + # salvaged item silently and causes ``/product/add`` to return + # 200 with zero memories written to Neo4j (bug #1355). return { - "memory_list": [ + "memory list": [ { "key": mem_str[:10], "memory_type": "UserMemory", diff --git a/tests/mem_reader/test_simple_structure.py b/tests/mem_reader/test_simple_structure.py index 8243cc96e..41272c0b2 100644 --- a/tests/mem_reader/test_simple_structure.py +++ b/tests/mem_reader/test_simple_structure.py @@ -118,6 +118,66 @@ def test_parse_json_result_failure(self): self.assertEqual(result, {}) + def test_get_llm_response_fallback_key_matches_consumer(self): + """Regression test for issue #1355. + + When the LLM response cannot be parsed as JSON, `_get_llm_response` + is expected to return a fallback dict whose ``"memory list"`` (with + a single space) entry contains one salvaged ``UserMemory`` item built + from the raw user input. Downstream consumers in + ``_process_chat_data`` read ``resp.get("memory list", [])`` — if the + fallback uses the wrong key (e.g. ``"memory_list"`` with an + underscore) the salvaged memory is silently dropped and the request + produces zero memories despite returning HTTP 200. + """ + # Force `_safe_parse` to return None so the fallback branch is taken. + self.reader.llm.generate.return_value = "not-valid-json" + self.reader.config.remove_prompt_example = False + + resp = self.reader._get_llm_response("test memory content", custom_tags=None) + + # The fallback must use the "memory list" (with space) key the + # rest of the pipeline reads; verify the salvaged item shape too. + salvaged = resp.get("memory list", []) + self.assertEqual( + len(salvaged), + 1, + f"Fallback memory list must contain exactly one salvaged item, got: {resp!r}", + ) + self.assertEqual(salvaged[0]["value"], "test memory content") + self.assertEqual(salvaged[0]["memory_type"], "UserMemory") + + def test_process_chat_data_fine_yields_node_when_llm_unparseable(self): + """Regression test for issue #1355 (end-to-end of the reader stage). + + With Kimi-style outputs that fail strict JSON parsing, + ``_process_chat_data`` (fine mode) should still emit a fallback + ``TextualMemoryItem`` so that the upstream `/product/add` request + ultimately writes at least one node to Neo4j instead of returning + 200 with zero stored memories. + """ + # Embedder produces a stable embedding for the salvaged item. + self.reader.embedder.embed = MagicMock(return_value=[[0.1, 0.2, 0.3]]) + # LLM returns garbage so `_safe_parse` returns None and the fallback + # in `_get_llm_response` is exercised. + self.reader.llm.generate.return_value = "I cannot produce JSON sorry" + self.reader.config.remove_prompt_example = False + + scene_data_info = [{"role": "user", "content": "test memory content"}] + info = {"user_id": "user1", "session_id": "session1"} + + result = self.reader._process_chat_data(scene_data_info, info, mode="fine") + + self.assertIsInstance(result, list) + self.assertEqual( + len(result), + 1, + "fine-mode _process_chat_data must produce one salvaged memory " + "item when LLM output is unparseable (bug #1355)", + ) + self.assertIsInstance(result[0], TextualMemoryItem) + self.assertIn("test memory content", result[0].memory) + if __name__ == "__main__": unittest.main()