From a18af8b0a3c49e5c5efd4b00f4903d55e937d26f Mon Sep 17 00:00:00 2001 From: hijzy Date: Sun, 25 Jan 2026 23:35:35 +0800 Subject: [PATCH 01/40] test: add mmr dedup --- src/memos/api/handlers/search_handler.py | 161 +++++++++++++++++++++-- src/memos/api/product_models.py | 4 +- src/memos/multi_mem_cube/single_cube.py | 8 +- 3 files changed, 155 insertions(+), 18 deletions(-) diff --git a/src/memos/api/handlers/search_handler.py b/src/memos/api/handlers/search_handler.py index 3774410dc..c8246c7cf 100644 --- a/src/memos/api/handlers/search_handler.py +++ b/src/memos/api/handlers/search_handler.py @@ -5,6 +5,7 @@ using dependency injection for better modularity and testability. """ +import math from typing import Any from memos.api.handlers.base_handler import BaseHandler, HandlerDependencies @@ -55,19 +56,55 @@ def handle_search_memories(self, search_req: APISearchRequest) -> SearchResponse """ self.logger.info(f"[SearchHandler] Search Req is: {search_req}") - # Increase recall pool if deduplication is enabled to ensure diversity original_top_k = search_req.top_k - if search_req.dedup == "sim": - search_req.top_k = original_top_k * 5 - - cube_view = self._build_cube_view(search_req) - - results = cube_view.search_memories(search_req) - if search_req.dedup == "sim": - results = self._dedup_text_memories(results, original_top_k) - self._strip_embeddings(results) - # Restore original top_k for downstream logic or response metadata - search_req.top_k = original_top_k + adjusted_top_k = False + + prev_text_mem_include_embedding: bool | None = None + prev_graph_retriever_include_embedding: bool | None = None + + search_req.dedup = "mmr" + + try: + if search_req.dedup == "sim": + search_req.top_k = original_top_k * 5 + adjusted_top_k = True + elif search_req.dedup == "mmr": + search_req.top_k = original_top_k * 5 + adjusted_top_k = True + + if search_req.dedup == "mmr": + text_mem = getattr(self.naive_mem_cube, "text_mem", None) + if text_mem is not None and hasattr(text_mem, "include_embedding"): + prev_text_mem_include_embedding = text_mem.include_embedding + text_mem.include_embedding = True + + graph_retriever = getattr(self.searcher, "graph_retriever", None) + if graph_retriever is not None and hasattr(graph_retriever, "include_embedding"): + prev_graph_retriever_include_embedding = graph_retriever.include_embedding + graph_retriever.include_embedding = True + + cube_view = self._build_cube_view(search_req) + results = cube_view.search_memories(search_req) + + if search_req.dedup == "sim": + results = self._dedup_text_memories(results, original_top_k) + self._strip_embeddings(results) + elif search_req.dedup == "mmr": + results = self._mmr_dedup_text_memories(results, original_top_k) + self._strip_embeddings(results) + finally: + if adjusted_top_k: + search_req.top_k = original_top_k + + if prev_text_mem_include_embedding is not None: + text_mem = getattr(self.naive_mem_cube, "text_mem", None) + if text_mem is not None and hasattr(text_mem, "include_embedding"): + text_mem.include_embedding = prev_text_mem_include_embedding + + if prev_graph_retriever_include_embedding is not None: + graph_retriever = getattr(self.searcher, "graph_retriever", None) + if graph_retriever is not None and hasattr(graph_retriever, "include_embedding"): + graph_retriever.include_embedding = prev_graph_retriever_include_embedding self.logger.info( f"[SearchHandler] Final search results: count={len(results)} results={results}" @@ -125,6 +162,78 @@ def _dedup_text_memories(self, results: dict[str, Any], target_top_k: int) -> di bucket["memories"] = [flat[i][1] for i in selected_indices] return results + def _mmr_dedup_text_memories(self, results: dict[str, Any], target_top_k: int) -> dict[str, Any]: + buckets = results.get("text_mem", []) + if not buckets: + return results + + flat: list[tuple[int, dict[str, Any], float]] = [] + for bucket_idx, bucket in enumerate(buckets): + for mem in bucket.get("memories", []): + score = mem.get("metadata", {}).get("relativity", 0.0) + flat.append((bucket_idx, mem, float(score) if score is not None else 0.0)) + + if len(flat) <= 1: + return results + + embeddings = self._extract_embeddings([mem for _, mem, _ in flat]) + if embeddings is None: + documents = [mem.get("memory", "") for _, mem, _ in flat] + embeddings = self.searcher.embedder.embed(documents) + + similarity_matrix = self._cosine_similarity_matrix_local(embeddings) + + indices_by_bucket: dict[int, list[int]] = {i: [] for i in range(len(buckets))} + for flat_index, (bucket_idx, _, _) in enumerate(flat): + indices_by_bucket[bucket_idx].append(flat_index) + + selected_global: list[int] = [] + selected_by_bucket: dict[int, list[int]] = {i: [] for i in range(len(buckets))} + + lambda_relevance = 0.8 + remaining = set(range(len(flat))) + while remaining: + best_idx: int | None = None + best_mmr: float | None = None + + for idx in remaining: + bucket_idx = flat[idx][0] + if len(selected_by_bucket[bucket_idx]) >= target_top_k: + continue + + relevance = flat[idx][2] + diversity = ( + 0.0 + if not selected_global + else max(similarity_matrix[idx][j] for j in selected_global) + ) + mmr_score = lambda_relevance * relevance - (1.0 - lambda_relevance) * diversity + + if best_mmr is None or mmr_score > best_mmr: + best_mmr = mmr_score + best_idx = idx + + if best_idx is None: + break + + selected_global.append(best_idx) + selected_by_bucket[flat[best_idx][0]].append(best_idx) + remaining.remove(best_idx) + + all_full = True + for bucket_idx, bucket_indices in indices_by_bucket.items(): + if len(selected_by_bucket[bucket_idx]) < min(target_top_k, len(bucket_indices)): + all_full = False + break + if all_full: + break + + for bucket_idx, bucket in enumerate(buckets): + selected_indices = selected_by_bucket.get(bucket_idx, []) + bucket["memories"] = [flat[i][1] for i in selected_indices] + + return results + @staticmethod def _is_unrelated( index: int, @@ -165,6 +274,34 @@ def _strip_embeddings(results: dict[str, Any]) -> None: if "embedding" in metadata: metadata["embedding"] = [] + @staticmethod + def _cosine_similarity_matrix_local(embeddings: list[list[float]]) -> list[list[float]]: + if not embeddings: + return [] + + normalized: list[list[float]] = [] + for vec in embeddings: + norm_sq = 0.0 + for x in vec: + xf = float(x) + norm_sq += xf * xf + denom = math.sqrt(norm_sq) if norm_sq > 0.0 else 1.0 + normalized.append([float(x) / denom for x in vec]) + + n = len(normalized) + sim: list[list[float]] = [[0.0] * n for _ in range(n)] + for i in range(n): + sim[i][i] = 1.0 + vi = normalized[i] + for j in range(i + 1, n): + vj = normalized[j] + dot = 0.0 + for a, b in zip(vi, vj, strict=False): + dot += a * b + sim[i][j] = dot + sim[j][i] = dot + return sim + def _resolve_cube_ids(self, search_req: APISearchRequest) -> list[str]: """ Normalize target cube ids from search_req. diff --git a/src/memos/api/product_models.py b/src/memos/api/product_models.py index d5f301c9d..0444a531f 100644 --- a/src/memos/api/product_models.py +++ b/src/memos/api/product_models.py @@ -319,11 +319,11 @@ class APISearchRequest(BaseRequest): description="Number of textual memories to retrieve (top-K). Default: 10.", ) - dedup: Literal["no", "sim"] | None = Field( + dedup: Literal["no", "sim", "mmr"] | None = Field( None, description=( "Optional dedup option for textual memories. " - "Use 'no' for no dedup, 'sim' for similarity dedup. " + "Use 'no' for no dedup, 'sim' for similarity dedup, 'mmr' for MMR-based dedup. " "If None, default exact-text dedup is applied." ), ) diff --git a/src/memos/multi_mem_cube/single_cube.py b/src/memos/multi_mem_cube/single_cube.py index 6c3cc0cc7..a063c56cb 100644 --- a/src/memos/multi_mem_cube/single_cube.py +++ b/src/memos/multi_mem_cube/single_cube.py @@ -265,7 +265,7 @@ def _deep_search( info=info, ) formatted_memories = [ - format_memory_item(data, include_embedding=search_req.dedup == "sim") + format_memory_item(data, include_embedding=search_req.dedup in ("sim", "mmr")) for data in enhanced_memories ] return formatted_memories @@ -277,7 +277,7 @@ def _agentic_search( search_req.query, user_id=user_context.mem_cube_id ) formatted_memories = [ - format_memory_item(data, include_embedding=search_req.dedup == "sim") + format_memory_item(data, include_embedding=search_req.dedup in ("sim", "mmr")) for data in deepsearch_results ] return formatted_memories @@ -389,7 +389,7 @@ def _dedup_by_content(memories: list) -> list: enhanced_memories if search_req.dedup == "no" else _dedup_by_content(enhanced_memories) ) formatted_memories = [ - format_memory_item(data, include_embedding=search_req.dedup == "sim") + format_memory_item(data, include_embedding=search_req.dedup in ("sim", "mmr")) for data in deduped_memories ] @@ -479,7 +479,7 @@ def _fast_search( ) formatted_memories = [ - format_memory_item(data, include_embedding=search_req.dedup == "sim") + format_memory_item(data, include_embedding=search_req.dedup in ("sim", "mmr")) for data in search_results ] From a2029d8d3fe645abe2451b5f34da1884d6f04d2d Mon Sep 17 00:00:00 2001 From: hijzy Date: Mon, 26 Jan 2026 00:07:23 +0800 Subject: [PATCH 02/40] test: decrease lambda --- src/memos/api/handlers/search_handler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/memos/api/handlers/search_handler.py b/src/memos/api/handlers/search_handler.py index c8246c7cf..60eeb90b9 100644 --- a/src/memos/api/handlers/search_handler.py +++ b/src/memos/api/handlers/search_handler.py @@ -190,7 +190,7 @@ def _mmr_dedup_text_memories(self, results: dict[str, Any], target_top_k: int) - selected_global: list[int] = [] selected_by_bucket: dict[int, list[int]] = {i: [] for i in range(len(buckets))} - lambda_relevance = 0.8 + lambda_relevance = 0.7 remaining = set(range(len(flat))) while remaining: best_idx: int | None = None From cb345c775915cdf43caa0cea60574afdc80fd77d Mon Sep 17 00:00:00 2001 From: hijzy Date: Mon, 26 Jan 2026 15:07:58 +0800 Subject: [PATCH 03/40] test: add tag penalty --- src/memos/api/handlers/search_handler.py | 29 +++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/src/memos/api/handlers/search_handler.py b/src/memos/api/handlers/search_handler.py index 60eeb90b9..d6108cc7c 100644 --- a/src/memos/api/handlers/search_handler.py +++ b/src/memos/api/handlers/search_handler.py @@ -191,6 +191,7 @@ def _mmr_dedup_text_memories(self, results: dict[str, Any], target_top_k: int) - selected_by_bucket: dict[int, list[int]] = {i: [] for i in range(len(buckets))} lambda_relevance = 0.7 + alpha_tag = 0.1 remaining = set(range(len(flat))) while remaining: best_idx: int | None = None @@ -207,7 +208,33 @@ def _mmr_dedup_text_memories(self, results: dict[str, Any], target_top_k: int) - if not selected_global else max(similarity_matrix[idx][j] for j in selected_global) ) - mmr_score = lambda_relevance * relevance - (1.0 - lambda_relevance) * diversity + tag_penalty = 0.0 + if selected_global: + current_tags = set( + flat[idx][1].get("metadata", {}).get("tags", []) or [] + ) + if current_tags: + max_jaccard = 0.0 + for j in selected_global: + other_tags = set( + flat[j][1].get("metadata", {}).get("tags", []) or [] + ) + if not other_tags: + continue + inter = current_tags.intersection(other_tags) + if not inter: + continue + union = current_tags.union(other_tags) + jaccard = float(len(inter)) / float(len(union)) if union else 0.0 + if jaccard > max_jaccard: + max_jaccard = jaccard + tag_penalty = max_jaccard + + mmr_score = ( + lambda_relevance * relevance + - (1.0 - lambda_relevance) * diversity + - alpha_tag * tag_penalty + ) if best_mmr is None or mmr_score > best_mmr: best_mmr = mmr_score From a0a6a354e44b7a22f265407cd4b243d6e2dfb8bf Mon Sep 17 00:00:00 2001 From: hijzy Date: Mon, 26 Jan 2026 16:36:38 +0800 Subject: [PATCH 04/40] test: increase lambda_relevance --- src/memos/api/handlers/search_handler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/memos/api/handlers/search_handler.py b/src/memos/api/handlers/search_handler.py index d6108cc7c..219a2ba65 100644 --- a/src/memos/api/handlers/search_handler.py +++ b/src/memos/api/handlers/search_handler.py @@ -190,7 +190,7 @@ def _mmr_dedup_text_memories(self, results: dict[str, Any], target_top_k: int) - selected_global: list[int] = [] selected_by_bucket: dict[int, list[int]] = {i: [] for i in range(len(buckets))} - lambda_relevance = 0.7 + lambda_relevance = 0.8 alpha_tag = 0.1 remaining = set(range(len(flat))) while remaining: From a40fa5a3926a2256f4207d2349c0c7098c7f53d3 Mon Sep 17 00:00:00 2001 From: hijzy Date: Tue, 27 Jan 2026 11:39:48 +0800 Subject: [PATCH 05/40] test: fix top 5 candidates --- src/memos/api/handlers/search_handler.py | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/src/memos/api/handlers/search_handler.py b/src/memos/api/handlers/search_handler.py index 219a2ba65..25040bad1 100644 --- a/src/memos/api/handlers/search_handler.py +++ b/src/memos/api/handlers/search_handler.py @@ -64,6 +64,9 @@ def handle_search_memories(self, search_req: APISearchRequest) -> SearchResponse search_req.dedup = "mmr" + # if getattr(search_req, "dedup", None) is None: + # search_req.dedup = "mmr" + try: if search_req.dedup == "sim": search_req.top_k = original_top_k * 5 @@ -190,9 +193,23 @@ def _mmr_dedup_text_memories(self, results: dict[str, Any], target_top_k: int) - selected_global: list[int] = [] selected_by_bucket: dict[int, list[int]] = {i: [] for i in range(len(buckets))} - lambda_relevance = 0.8 - alpha_tag = 0.1 - remaining = set(range(len(flat))) + prefill_top_n = min(5, target_top_k) + if prefill_top_n > 0: + ordered_by_relevance = sorted( + range(len(flat)), key=lambda idx: flat[idx][2], reverse=True + ) + for idx in ordered_by_relevance: + if len(selected_global) >= prefill_top_n: + break + bucket_idx = flat[idx][0] + if len(selected_by_bucket[bucket_idx]) >= target_top_k: + continue + selected_global.append(idx) + selected_by_bucket[bucket_idx].append(idx) + + lambda_relevance = 0.7 + alpha_tag = 0.15 + remaining = set(range(len(flat))) - set(selected_global) while remaining: best_idx: int | None = None best_mmr: float | None = None From 692d1f17cd81d0bcea926349e4c596e3e6c4a09f Mon Sep 17 00:00:00 2001 From: hijzy Date: Tue, 27 Jan 2026 14:03:56 +0800 Subject: [PATCH 06/40] test: adjust alpha_tag --- src/memos/api/handlers/search_handler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/memos/api/handlers/search_handler.py b/src/memos/api/handlers/search_handler.py index 25040bad1..695a7d5e8 100644 --- a/src/memos/api/handlers/search_handler.py +++ b/src/memos/api/handlers/search_handler.py @@ -208,7 +208,7 @@ def _mmr_dedup_text_memories(self, results: dict[str, Any], target_top_k: int) - selected_by_bucket[bucket_idx].append(idx) lambda_relevance = 0.7 - alpha_tag = 0.15 + alpha_tag = 0.1 remaining = set(range(len(flat))) - set(selected_global) while remaining: best_idx: int | None = None From 5ef4942e59728d341702ed93541c8706e39c9a95 Mon Sep 17 00:00:00 2001 From: hijzy Date: Tue, 27 Jan 2026 15:16:42 +0800 Subject: [PATCH 07/40] test: increase lambda_relevance --- src/memos/api/handlers/search_handler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/memos/api/handlers/search_handler.py b/src/memos/api/handlers/search_handler.py index 695a7d5e8..de2960676 100644 --- a/src/memos/api/handlers/search_handler.py +++ b/src/memos/api/handlers/search_handler.py @@ -207,7 +207,7 @@ def _mmr_dedup_text_memories(self, results: dict[str, Any], target_top_k: int) - selected_global.append(idx) selected_by_bucket[bucket_idx].append(idx) - lambda_relevance = 0.7 + lambda_relevance = 0.8 alpha_tag = 0.1 remaining = set(range(len(flat))) - set(selected_global) while remaining: From 2885d577c7aa8e3e99855deada563fbba2ac1ba1 Mon Sep 17 00:00:00 2001 From: hijzy Date: Tue, 27 Jan 2026 15:31:14 +0800 Subject: [PATCH 08/40] test: decrease lambda_relevance --- src/memos/api/handlers/search_handler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/memos/api/handlers/search_handler.py b/src/memos/api/handlers/search_handler.py index de2960676..d7813a133 100644 --- a/src/memos/api/handlers/search_handler.py +++ b/src/memos/api/handlers/search_handler.py @@ -207,7 +207,7 @@ def _mmr_dedup_text_memories(self, results: dict[str, Any], target_top_k: int) - selected_global.append(idx) selected_by_bucket[bucket_idx].append(idx) - lambda_relevance = 0.8 + lambda_relevance = 0.6 alpha_tag = 0.1 remaining = set(range(len(flat))) - set(selected_global) while remaining: From 0819a460f11e50e1c2c158f6c25d6b3d60051852 Mon Sep 17 00:00:00 2001 From: hijzy Date: Tue, 27 Jan 2026 15:44:57 +0800 Subject: [PATCH 09/40] test: adjust params --- src/memos/api/handlers/search_handler.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/memos/api/handlers/search_handler.py b/src/memos/api/handlers/search_handler.py index d7813a133..4cb50a451 100644 --- a/src/memos/api/handlers/search_handler.py +++ b/src/memos/api/handlers/search_handler.py @@ -207,8 +207,8 @@ def _mmr_dedup_text_memories(self, results: dict[str, Any], target_top_k: int) - selected_global.append(idx) selected_by_bucket[bucket_idx].append(idx) - lambda_relevance = 0.6 - alpha_tag = 0.1 + lambda_relevance = 0.8 + alpha_tag = 0.2 remaining = set(range(len(flat))) - set(selected_global) while remaining: best_idx: int | None = None From 767a0cc3f0e53858767ea9bcae9932552f660b5b Mon Sep 17 00:00:00 2001 From: hijzy Date: Tue, 27 Jan 2026 15:56:49 +0800 Subject: [PATCH 10/40] test: delete tag penalty --- src/memos/api/handlers/search_handler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/memos/api/handlers/search_handler.py b/src/memos/api/handlers/search_handler.py index 4cb50a451..39543e334 100644 --- a/src/memos/api/handlers/search_handler.py +++ b/src/memos/api/handlers/search_handler.py @@ -208,7 +208,7 @@ def _mmr_dedup_text_memories(self, results: dict[str, Any], target_top_k: int) - selected_by_bucket[bucket_idx].append(idx) lambda_relevance = 0.8 - alpha_tag = 0.2 + alpha_tag = 0 remaining = set(range(len(flat))) - set(selected_global) while remaining: best_idx: int | None = None From 69143b78260a6f9a80c570b9181147ebb71b36c0 Mon Sep 17 00:00:00 2001 From: hijzy Date: Tue, 27 Jan 2026 17:21:38 +0800 Subject: [PATCH 11/40] test: delete fix top5 --- src/memos/api/handlers/search_handler.py | 54 +++++++----------------- 1 file changed, 15 insertions(+), 39 deletions(-) diff --git a/src/memos/api/handlers/search_handler.py b/src/memos/api/handlers/search_handler.py index 39543e334..112e06ec7 100644 --- a/src/memos/api/handlers/search_handler.py +++ b/src/memos/api/handlers/search_handler.py @@ -176,6 +176,15 @@ def _mmr_dedup_text_memories(self, results: dict[str, Any], target_top_k: int) - score = mem.get("metadata", {}).get("relativity", 0.0) flat.append((bucket_idx, mem, float(score) if score is not None else 0.0)) + if len(flat) <= 1: + return results + + ordered_by_relevance = sorted(range(len(flat)), key=lambda idx: flat[idx][2], reverse=True) + candidate_pool_size = min(len(flat), target_top_k * 3) + candidate_indices = ordered_by_relevance[:candidate_pool_size] + flat = [flat[i] for i in candidate_indices] + ordered_by_relevance = sorted(range(len(flat)), key=lambda idx: flat[idx][2], reverse=True) + if len(flat) <= 1: return results @@ -193,23 +202,10 @@ def _mmr_dedup_text_memories(self, results: dict[str, Any], target_top_k: int) - selected_global: list[int] = [] selected_by_bucket: dict[int, list[int]] = {i: [] for i in range(len(buckets))} - prefill_top_n = min(5, target_top_k) - if prefill_top_n > 0: - ordered_by_relevance = sorted( - range(len(flat)), key=lambda idx: flat[idx][2], reverse=True - ) - for idx in ordered_by_relevance: - if len(selected_global) >= prefill_top_n: - break - bucket_idx = flat[idx][0] - if len(selected_by_bucket[bucket_idx]) >= target_top_k: - continue - selected_global.append(idx) - selected_by_bucket[bucket_idx].append(idx) - + # No prefill - let MMR handle all selections with threshold-based penalty lambda_relevance = 0.8 - alpha_tag = 0 - remaining = set(range(len(flat))) - set(selected_global) + similarity_threshold = 0.9 + remaining = set(range(len(flat))) while remaining: best_idx: int | None = None best_mmr: float | None = None @@ -225,32 +221,11 @@ def _mmr_dedup_text_memories(self, results: dict[str, Any], target_top_k: int) - if not selected_global else max(similarity_matrix[idx][j] for j in selected_global) ) - tag_penalty = 0.0 - if selected_global: - current_tags = set( - flat[idx][1].get("metadata", {}).get("tags", []) or [] - ) - if current_tags: - max_jaccard = 0.0 - for j in selected_global: - other_tags = set( - flat[j][1].get("metadata", {}).get("tags", []) or [] - ) - if not other_tags: - continue - inter = current_tags.intersection(other_tags) - if not inter: - continue - union = current_tags.union(other_tags) - jaccard = float(len(inter)) / float(len(union)) if union else 0.0 - if jaccard > max_jaccard: - max_jaccard = jaccard - tag_penalty = max_jaccard + diversity_penalty = max(0.0, diversity - similarity_threshold) mmr_score = ( lambda_relevance * relevance - - (1.0 - lambda_relevance) * diversity - - alpha_tag * tag_penalty + - (1.0 - lambda_relevance) * diversity_penalty ) if best_mmr is None or mmr_score > best_mmr: @@ -274,6 +249,7 @@ def _mmr_dedup_text_memories(self, results: dict[str, Any], target_top_k: int) - for bucket_idx, bucket in enumerate(buckets): selected_indices = selected_by_bucket.get(bucket_idx, []) + selected_indices = sorted(selected_indices, key=lambda i: flat[i][2], reverse=True) bucket["memories"] = [flat[i][1] for i in selected_indices] return results From ff9b03f91de54c0a40303689996e0a87e5cc1c6b Mon Sep 17 00:00:00 2001 From: hijzy Date: Tue, 27 Jan 2026 17:36:29 +0800 Subject: [PATCH 12/40] test: readd fix top5 --- src/memos/api/handlers/search_handler.py | 52 +++++++++++++++++++++++- 1 file changed, 50 insertions(+), 2 deletions(-) diff --git a/src/memos/api/handlers/search_handler.py b/src/memos/api/handlers/search_handler.py index 112e06ec7..5a43326b8 100644 --- a/src/memos/api/handlers/search_handler.py +++ b/src/memos/api/handlers/search_handler.py @@ -202,10 +202,23 @@ def _mmr_dedup_text_memories(self, results: dict[str, Any], target_top_k: int) - selected_global: list[int] = [] selected_by_bucket: dict[int, list[int]] = {i: [] for i in range(len(buckets))} - # No prefill - let MMR handle all selections with threshold-based penalty + # Prefill top5 by relevance to ensure high-relevance items are always selected + prefill_top_n = min(5, target_top_k) + if prefill_top_n > 0: + for idx in ordered_by_relevance: + if len(selected_global) >= prefill_top_n: + break + bucket_idx = flat[idx][0] + if len(selected_by_bucket[bucket_idx]) >= target_top_k: + continue + selected_global.append(idx) + selected_by_bucket[bucket_idx].append(idx) + + # MMR selection with threshold-based penalties lambda_relevance = 0.8 + alpha_tag = 0.1 similarity_threshold = 0.9 - remaining = set(range(len(flat))) + remaining = set(range(len(flat))) - set(selected_global) while remaining: best_idx: int | None = None best_mmr: float | None = None @@ -223,9 +236,44 @@ def _mmr_dedup_text_memories(self, results: dict[str, Any], target_top_k: int) - ) diversity_penalty = max(0.0, diversity - similarity_threshold) + # Tag penalty: compute max Jaccard similarity with selected memories + tag_penalty = 0.0 + if selected_global: + # Try metadata.tags first, fallback to memory_type + current_tags = set( + flat[idx][1].get("metadata", {}).get("tags", []) or [] + ) + if not current_tags: + # Fallback: use memory_type as a single-element tag set + mem_type = flat[idx][1].get("memory_type") + if mem_type: + current_tags = {mem_type} + + if current_tags: + max_jaccard = 0.0 + for j in selected_global: + other_tags = set( + flat[j][1].get("metadata", {}).get("tags", []) or [] + ) + if not other_tags: + other_mem_type = flat[j][1].get("memory_type") + if other_mem_type: + other_tags = {other_mem_type} + if not other_tags: + continue + inter = current_tags.intersection(other_tags) + if not inter: + continue + union = current_tags.union(other_tags) + jaccard = float(len(inter)) / float(len(union)) if union else 0.0 + if jaccard > max_jaccard: + max_jaccard = jaccard + tag_penalty = max_jaccard + mmr_score = ( lambda_relevance * relevance - (1.0 - lambda_relevance) * diversity_penalty + - alpha_tag * tag_penalty ) if best_mmr is None or mmr_score > best_mmr: From 47ef19bc4834caa9fb827654ae0cf2c7ef2e8864 Mon Sep 17 00:00:00 2001 From: hijzy Date: Tue, 27 Jan 2026 17:50:54 +0800 Subject: [PATCH 13/40] test: delete threshold-based penalties --- src/memos/api/handlers/search_handler.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/memos/api/handlers/search_handler.py b/src/memos/api/handlers/search_handler.py index 5a43326b8..a71864838 100644 --- a/src/memos/api/handlers/search_handler.py +++ b/src/memos/api/handlers/search_handler.py @@ -214,10 +214,9 @@ def _mmr_dedup_text_memories(self, results: dict[str, Any], target_top_k: int) - selected_global.append(idx) selected_by_bucket[bucket_idx].append(idx) - # MMR selection with threshold-based penalties + # MMR selection with diversity and tag penalties lambda_relevance = 0.8 alpha_tag = 0.1 - similarity_threshold = 0.9 remaining = set(range(len(flat))) - set(selected_global) while remaining: best_idx: int | None = None @@ -234,7 +233,6 @@ def _mmr_dedup_text_memories(self, results: dict[str, Any], target_top_k: int) - if not selected_global else max(similarity_matrix[idx][j] for j in selected_global) ) - diversity_penalty = max(0.0, diversity - similarity_threshold) # Tag penalty: compute max Jaccard similarity with selected memories tag_penalty = 0.0 @@ -272,7 +270,7 @@ def _mmr_dedup_text_memories(self, results: dict[str, Any], target_top_k: int) - mmr_score = ( lambda_relevance * relevance - - (1.0 - lambda_relevance) * diversity_penalty + - (1.0 - lambda_relevance) * diversity - alpha_tag * tag_penalty ) From 0eaa06fe94a99c6b5b739b01be32b64c627dd719 Mon Sep 17 00:00:00 2001 From: hijzy Date: Tue, 27 Jan 2026 17:51:18 +0800 Subject: [PATCH 14/40] test: delete threshold-based penalties --- src/memos/api/handlers/search_handler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/memos/api/handlers/search_handler.py b/src/memos/api/handlers/search_handler.py index a71864838..78ee9d003 100644 --- a/src/memos/api/handlers/search_handler.py +++ b/src/memos/api/handlers/search_handler.py @@ -216,7 +216,7 @@ def _mmr_dedup_text_memories(self, results: dict[str, Any], target_top_k: int) - # MMR selection with diversity and tag penalties lambda_relevance = 0.8 - alpha_tag = 0.1 + alpha_tag = 0 remaining = set(range(len(flat))) - set(selected_global) while remaining: best_idx: int | None = None From a58777b1d9af84210d6ebac8df9789c4a81b7414 Mon Sep 17 00:00:00 2001 From: hijzy Date: Tue, 27 Jan 2026 18:41:54 +0800 Subject: [PATCH 15/40] test: restore best score version, add resort --- src/memos/api/handlers/search_handler.py | 28 ++++-------------------- 1 file changed, 4 insertions(+), 24 deletions(-) diff --git a/src/memos/api/handlers/search_handler.py b/src/memos/api/handlers/search_handler.py index 78ee9d003..083d3c08f 100644 --- a/src/memos/api/handlers/search_handler.py +++ b/src/memos/api/handlers/search_handler.py @@ -176,15 +176,6 @@ def _mmr_dedup_text_memories(self, results: dict[str, Any], target_top_k: int) - score = mem.get("metadata", {}).get("relativity", 0.0) flat.append((bucket_idx, mem, float(score) if score is not None else 0.0)) - if len(flat) <= 1: - return results - - ordered_by_relevance = sorted(range(len(flat)), key=lambda idx: flat[idx][2], reverse=True) - candidate_pool_size = min(len(flat), target_top_k * 3) - candidate_indices = ordered_by_relevance[:candidate_pool_size] - flat = [flat[i] for i in candidate_indices] - ordered_by_relevance = sorted(range(len(flat)), key=lambda idx: flat[idx][2], reverse=True) - if len(flat) <= 1: return results @@ -202,9 +193,11 @@ def _mmr_dedup_text_memories(self, results: dict[str, Any], target_top_k: int) - selected_global: list[int] = [] selected_by_bucket: dict[int, list[int]] = {i: [] for i in range(len(buckets))} - # Prefill top5 by relevance to ensure high-relevance items are always selected prefill_top_n = min(5, target_top_k) if prefill_top_n > 0: + ordered_by_relevance = sorted( + range(len(flat)), key=lambda idx: flat[idx][2], reverse=True + ) for idx in ordered_by_relevance: if len(selected_global) >= prefill_top_n: break @@ -214,7 +207,6 @@ def _mmr_dedup_text_memories(self, results: dict[str, Any], target_top_k: int) - selected_global.append(idx) selected_by_bucket[bucket_idx].append(idx) - # MMR selection with diversity and tag penalties lambda_relevance = 0.8 alpha_tag = 0 remaining = set(range(len(flat))) - set(selected_global) @@ -233,30 +225,17 @@ def _mmr_dedup_text_memories(self, results: dict[str, Any], target_top_k: int) - if not selected_global else max(similarity_matrix[idx][j] for j in selected_global) ) - - # Tag penalty: compute max Jaccard similarity with selected memories tag_penalty = 0.0 if selected_global: - # Try metadata.tags first, fallback to memory_type current_tags = set( flat[idx][1].get("metadata", {}).get("tags", []) or [] ) - if not current_tags: - # Fallback: use memory_type as a single-element tag set - mem_type = flat[idx][1].get("memory_type") - if mem_type: - current_tags = {mem_type} - if current_tags: max_jaccard = 0.0 for j in selected_global: other_tags = set( flat[j][1].get("metadata", {}).get("tags", []) or [] ) - if not other_tags: - other_mem_type = flat[j][1].get("memory_type") - if other_mem_type: - other_tags = {other_mem_type} if not other_tags: continue inter = current_tags.intersection(other_tags) @@ -295,6 +274,7 @@ def _mmr_dedup_text_memories(self, results: dict[str, Any], target_top_k: int) - for bucket_idx, bucket in enumerate(buckets): selected_indices = selected_by_bucket.get(bucket_idx, []) + # Re-sort by original relevance score (descending) for better generation quality selected_indices = sorted(selected_indices, key=lambda i: flat[i][2], reverse=True) bucket["memories"] = [flat[i][1] for i in selected_indices] From 99362554b62a99b98b54f36102ddcd4e4888f3d2 Mon Sep 17 00:00:00 2001 From: harvey_xiang Date: Tue, 27 Jan 2026 22:02:36 +0800 Subject: [PATCH 16/40] chore: update version to 2.0.4 --- pyproject.toml | 2 +- src/memos/__init__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 3fbe4ced4..8dd1d90c6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ ############################################################################## name = "MemoryOS" -version = "2.0.3" +version = "2.0.4" description = "Intelligence Begins with Memory" license = {text = "Apache-2.0"} readme = "README.md" diff --git a/src/memos/__init__.py b/src/memos/__init__.py index 3c764db79..2d946cfbb 100644 --- a/src/memos/__init__.py +++ b/src/memos/__init__.py @@ -1,4 +1,4 @@ -__version__ = "2.0.3" +__version__ = "2.0.4" from memos.configs.mem_cube import GeneralMemCubeConfig from memos.configs.mem_os import MOSConfig From 9c0f8dfd1bade14b7ae63903d8c83c3a638b203a Mon Sep 17 00:00:00 2001 From: jiang Date: Tue, 27 Jan 2026 23:19:38 +0800 Subject: [PATCH 17/40] test: add diversity --- src/memos/api/handlers/search_handler.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/memos/api/handlers/search_handler.py b/src/memos/api/handlers/search_handler.py index 083d3c08f..3375face8 100644 --- a/src/memos/api/handlers/search_handler.py +++ b/src/memos/api/handlers/search_handler.py @@ -209,6 +209,8 @@ def _mmr_dedup_text_memories(self, results: dict[str, Any], target_top_k: int) - lambda_relevance = 0.8 alpha_tag = 0 + beta_high_similarity = 5.0 # Penalty multiplier for similarity > 0.92 + similarity_threshold = 0.92 remaining = set(range(len(flat))) - set(selected_global) while remaining: best_idx: int | None = None @@ -220,11 +222,17 @@ def _mmr_dedup_text_memories(self, results: dict[str, Any], target_top_k: int) - continue relevance = flat[idx][2] - diversity = ( + max_sim = ( 0.0 if not selected_global else max(similarity_matrix[idx][j] for j in selected_global) ) + + # Apply progressive penalty for high similarity (> 0.92) + if max_sim > similarity_threshold: + diversity = max_sim + (max_sim - similarity_threshold) * beta_high_similarity + else: + diversity = max_sim tag_penalty = 0.0 if selected_global: current_tags = set( From 5772a0f2b7f0ea5d9b2ba9f76a068b956bc41710 Mon Sep 17 00:00:00 2001 From: jiang Date: Tue, 27 Jan 2026 23:40:00 +0800 Subject: [PATCH 18/40] test: reformat --- src/memos/api/handlers/search_handler.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/memos/api/handlers/search_handler.py b/src/memos/api/handlers/search_handler.py index 3375face8..15fbe4b44 100644 --- a/src/memos/api/handlers/search_handler.py +++ b/src/memos/api/handlers/search_handler.py @@ -165,7 +165,9 @@ def _dedup_text_memories(self, results: dict[str, Any], target_top_k: int) -> di bucket["memories"] = [flat[i][1] for i in selected_indices] return results - def _mmr_dedup_text_memories(self, results: dict[str, Any], target_top_k: int) -> dict[str, Any]: + def _mmr_dedup_text_memories( + self, results: dict[str, Any], target_top_k: int + ) -> dict[str, Any]: buckets = results.get("text_mem", []) if not buckets: return results @@ -235,15 +237,11 @@ def _mmr_dedup_text_memories(self, results: dict[str, Any], target_top_k: int) - diversity = max_sim tag_penalty = 0.0 if selected_global: - current_tags = set( - flat[idx][1].get("metadata", {}).get("tags", []) or [] - ) + current_tags = set(flat[idx][1].get("metadata", {}).get("tags", []) or []) if current_tags: max_jaccard = 0.0 for j in selected_global: - other_tags = set( - flat[j][1].get("metadata", {}).get("tags", []) or [] - ) + other_tags = set(flat[j][1].get("metadata", {}).get("tags", []) or []) if not other_tags: continue inter = current_tags.intersection(other_tags) From 07b48571b20bb9d209738b92b72f933f4c2cdecb Mon Sep 17 00:00:00 2001 From: CaralHsi Date: Wed, 28 Jan 2026 15:22:42 +0800 Subject: [PATCH 19/40] fix: playground chat bug (#968) * feat: add timer for split text * feat: add chat_handler log * feat: add chat_handler log * fix: chat in playgound bug: use index in null list * chore: deprecated warning * fix: we don't use query when search in graph-db --- src/memos/api/handlers/chat_handler.py | 5 +++++ src/memos/mem_reader/multi_modal_struct.py | 7 ------- src/memos/mem_reader/read_multi_modal/base.py | 2 ++ 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/memos/api/handlers/chat_handler.py b/src/memos/api/handlers/chat_handler.py index 812cf2793..8292e027b 100644 --- a/src/memos/api/handlers/chat_handler.py +++ b/src/memos/api/handlers/chat_handler.py @@ -110,6 +110,7 @@ def handle_chat_complete(self, chat_req: APIChatCompleteRequest) -> dict[str, An Raises: HTTPException: If chat fails """ + self.logger.info(f"[ChatHandler] Chat Req is: {chat_req}") try: # Resolve readable cube IDs (for search) readable_cube_ids = chat_req.readable_cube_ids or [chat_req.user_id] @@ -241,6 +242,7 @@ def handle_chat_stream(self, chat_req: ChatRequest) -> StreamingResponse: Raises: HTTPException: If stream initialization fails """ + self.logger.info(f"[ChatHandler] Chat Req is: {chat_req}") try: def generate_chat_response() -> Generator[str, None, None]: @@ -422,6 +424,7 @@ def handle_chat_stream_playground(self, chat_req: ChatPlaygroundRequest) -> Stre Raises: HTTPException: If stream initialization fails """ + self.logger.info(f"[ChatHandler] Chat Req is: {chat_req}") try: def generate_chat_response() -> Generator[str, None, None]: @@ -585,6 +588,8 @@ def generate_chat_response() -> Generator[str, None, None]: # get internet reference internet_reference = self._get_internet_reference( search_response.data.get("text_mem")[0]["memories"] + if search_response.data.get("text_mem") + else [] ) yield f"data: {json.dumps({'type': 'reference', 'data': reference}, ensure_ascii=False)}\n\n" diff --git a/src/memos/mem_reader/multi_modal_struct.py b/src/memos/mem_reader/multi_modal_struct.py index 9edcd0a55..d61b20d0f 100644 --- a/src/memos/mem_reader/multi_modal_struct.py +++ b/src/memos/mem_reader/multi_modal_struct.py @@ -464,13 +464,6 @@ def _get_maybe_merged_memory( status="activated", threshold=merge_threshold, user_name=user_name, - filter={ - "or": [ - {"memory_type": "LongTermMemory"}, - {"memory_type": "UserMemory"}, - {"memory_type": "WorkingMemory"}, - ] - }, ) if not search_results: diff --git a/src/memos/mem_reader/read_multi_modal/base.py b/src/memos/mem_reader/read_multi_modal/base.py index 1a756c5d0..95d427864 100644 --- a/src/memos/mem_reader/read_multi_modal/base.py +++ b/src/memos/mem_reader/read_multi_modal/base.py @@ -15,6 +15,7 @@ TextualMemoryItem, TreeNodeTextualMemoryMetadata, ) +from memos.utils import timed from .utils import detect_lang, get_text_splitter @@ -245,6 +246,7 @@ def parse( else: raise ValueError(f"Unknown mode: {mode}. Must be 'fast' or 'fine'") + @timed def _split_text(self, text: str, is_markdown: bool = False) -> list[str]: """ Split text into chunks using text splitter from utils. From d6f596bb02a5ac0837dc3213404c7338ba1242b9 Mon Sep 17 00:00:00 2001 From: hijzy Date: Wed, 28 Jan 2026 15:32:34 +0800 Subject: [PATCH 20/40] fix: fix Nan and 0 embedding --- .../textual/tree_text_memory/retrieve/retrieve_utils.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/memos/memories/textual/tree_text_memory/retrieve/retrieve_utils.py b/src/memos/memories/textual/tree_text_memory/retrieve/retrieve_utils.py index 5a82883c8..1c887355c 100644 --- a/src/memos/memories/textual/tree_text_memory/retrieve/retrieve_utils.py +++ b/src/memos/memories/textual/tree_text_memory/retrieve/retrieve_utils.py @@ -466,7 +466,12 @@ def find_best_unrelated_subgroup(sentences: list, similarity_matrix: list, bar: def cosine_similarity_matrix(embeddings: list[list[float]]) -> list[list[float]]: - norms = np.linalg.norm(embeddings, axis=1, keepdims=True) - x_normalized = embeddings / norms + embeddings_array = np.asarray(embeddings) + norms = np.linalg.norm(embeddings_array, axis=1, keepdims=True) + # Handle zero vectors to avoid division by zero + norms[norms == 0] = 1.0 + x_normalized = embeddings_array / norms similarity_matrix = np.dot(x_normalized, x_normalized.T) + # Handle any NaN or Inf values + similarity_matrix = np.nan_to_num(similarity_matrix, nan=0.0, posinf=0.0, neginf=0.0) return similarity_matrix From 659231d9e429c9f2016676ac925e5d8290f3b94b Mon Sep 17 00:00:00 2001 From: hijzy Date: Wed, 28 Jan 2026 15:32:40 +0800 Subject: [PATCH 21/40] feat: add mmr deduplication --- src/memos/api/handlers/search_handler.py | 134 ++++++++--------------- 1 file changed, 46 insertions(+), 88 deletions(-) diff --git a/src/memos/api/handlers/search_handler.py b/src/memos/api/handlers/search_handler.py index 15fbe4b44..0cd5399cf 100644 --- a/src/memos/api/handlers/search_handler.py +++ b/src/memos/api/handlers/search_handler.py @@ -5,7 +5,6 @@ using dependency injection for better modularity and testability. """ -import math from typing import Any from memos.api.handlers.base_handler import BaseHandler, HandlerDependencies @@ -57,24 +56,18 @@ def handle_search_memories(self, search_req: APISearchRequest) -> SearchResponse self.logger.info(f"[SearchHandler] Search Req is: {search_req}") original_top_k = search_req.top_k - adjusted_top_k = False - prev_text_mem_include_embedding: bool | None = None prev_graph_retriever_include_embedding: bool | None = None - search_req.dedup = "mmr" - - # if getattr(search_req, "dedup", None) is None: - # search_req.dedup = "mmr" + if getattr(search_req, "dedup", None) is None: + search_req.dedup = "mmr" try: - if search_req.dedup == "sim": + # Expand top_k for deduplication (5x to ensure enough candidates) + if search_req.dedup in ("sim", "mmr"): search_req.top_k = original_top_k * 5 - adjusted_top_k = True - elif search_req.dedup == "mmr": - search_req.top_k = original_top_k * 5 - adjusted_top_k = True + # Enable embeddings for MMR deduplication if search_req.dedup == "mmr": text_mem = getattr(self.naive_mem_cube, "text_mem", None) if text_mem is not None and hasattr(text_mem, "include_embedding"): @@ -86,6 +79,7 @@ def handle_search_memories(self, search_req: APISearchRequest) -> SearchResponse prev_graph_retriever_include_embedding = graph_retriever.include_embedding graph_retriever.include_embedding = True + # Search and deduplicate cube_view = self._build_cube_view(search_req) results = cube_view.search_memories(search_req) @@ -96,8 +90,8 @@ def handle_search_memories(self, search_req: APISearchRequest) -> SearchResponse results = self._mmr_dedup_text_memories(results, original_top_k) self._strip_embeddings(results) finally: - if adjusted_top_k: - search_req.top_k = original_top_k + # Restore original states + search_req.top_k = original_top_k if prev_text_mem_include_embedding is not None: text_mem = getattr(self.naive_mem_cube, "text_mem", None) @@ -168,10 +162,19 @@ def _dedup_text_memories(self, results: dict[str, Any], target_top_k: int) -> di def _mmr_dedup_text_memories( self, results: dict[str, Any], target_top_k: int ) -> dict[str, Any]: + """ + MMR-based deduplication with progressive penalty for high similarity. + + Algorithm: + 1. Prefill top 5 by relevance + 2. MMR selection: balance relevance vs diversity + 3. Re-sort by original relevance for better generation quality + """ buckets = results.get("text_mem", []) if not buckets: return results + # Flatten all memories with their scores flat: list[tuple[int, dict[str, Any], float]] = [] for bucket_idx, bucket in enumerate(buckets): for mem in bucket.get("memories", []): @@ -181,13 +184,17 @@ def _mmr_dedup_text_memories( if len(flat) <= 1: return results + # Get or compute embeddings embeddings = self._extract_embeddings([mem for _, mem, _ in flat]) if embeddings is None: documents = [mem.get("memory", "") for _, mem, _ in flat] embeddings = self.searcher.embedder.embed(documents) - similarity_matrix = self._cosine_similarity_matrix_local(embeddings) + # Compute similarity matrix using NumPy-optimized method + # Returns numpy array but compatible with list[i][j] indexing + similarity_matrix = cosine_similarity_matrix(embeddings) + # Initialize selection tracking indices_by_bucket: dict[int, list[int]] = {i: [] for i in range(len(buckets))} for flat_index, (bucket_idx, _, _) in enumerate(flat): indices_by_bucket[bucket_idx].append(flat_index) @@ -195,25 +202,26 @@ def _mmr_dedup_text_memories( selected_global: list[int] = [] selected_by_bucket: dict[int, list[int]] = {i: [] for i in range(len(buckets))} + # Phase 1: Prefill top N by relevance prefill_top_n = min(5, target_top_k) - if prefill_top_n > 0: - ordered_by_relevance = sorted( - range(len(flat)), key=lambda idx: flat[idx][2], reverse=True - ) - for idx in ordered_by_relevance: - if len(selected_global) >= prefill_top_n: - break - bucket_idx = flat[idx][0] - if len(selected_by_bucket[bucket_idx]) >= target_top_k: - continue - selected_global.append(idx) - selected_by_bucket[bucket_idx].append(idx) + ordered_by_relevance = sorted( + range(len(flat)), key=lambda idx: flat[idx][2], reverse=True + ) + for idx in ordered_by_relevance[:len(flat)]: + if len(selected_global) >= prefill_top_n: + break + bucket_idx = flat[idx][0] + if len(selected_by_bucket[bucket_idx]) >= target_top_k: + continue + selected_global.append(idx) + selected_by_bucket[bucket_idx].append(idx) + # Phase 2: MMR selection for remaining slots lambda_relevance = 0.8 - alpha_tag = 0 - beta_high_similarity = 5.0 # Penalty multiplier for similarity > 0.92 similarity_threshold = 0.92 + beta_high_similarity = 5.0 # Penalty multiplier for similarity > 0.92 remaining = set(range(len(flat))) - set(selected_global) + while remaining: best_idx: int | None = None best_mmr: float | None = None @@ -230,34 +238,13 @@ def _mmr_dedup_text_memories( else max(similarity_matrix[idx][j] for j in selected_global) ) - # Apply progressive penalty for high similarity (> 0.92) + # Progressive penalty for high similarity (> 0.92) if max_sim > similarity_threshold: diversity = max_sim + (max_sim - similarity_threshold) * beta_high_similarity else: diversity = max_sim - tag_penalty = 0.0 - if selected_global: - current_tags = set(flat[idx][1].get("metadata", {}).get("tags", []) or []) - if current_tags: - max_jaccard = 0.0 - for j in selected_global: - other_tags = set(flat[j][1].get("metadata", {}).get("tags", []) or []) - if not other_tags: - continue - inter = current_tags.intersection(other_tags) - if not inter: - continue - union = current_tags.union(other_tags) - jaccard = float(len(inter)) / float(len(union)) if union else 0.0 - if jaccard > max_jaccard: - max_jaccard = jaccard - tag_penalty = max_jaccard - - mmr_score = ( - lambda_relevance * relevance - - (1.0 - lambda_relevance) * diversity - - alpha_tag * tag_penalty - ) + + mmr_score = lambda_relevance * relevance - (1.0 - lambda_relevance) * diversity if best_mmr is None or mmr_score > best_mmr: best_mmr = mmr_score @@ -270,17 +257,16 @@ def _mmr_dedup_text_memories( selected_by_bucket[flat[best_idx][0]].append(best_idx) remaining.remove(best_idx) - all_full = True - for bucket_idx, bucket_indices in indices_by_bucket.items(): - if len(selected_by_bucket[bucket_idx]) < min(target_top_k, len(bucket_indices)): - all_full = False - break - if all_full: + # Early termination: all buckets are full + if all( + len(selected_by_bucket[b_idx]) >= min(target_top_k, len(bucket_indices)) + for b_idx, bucket_indices in indices_by_bucket.items() + ): break + # Phase 3: Re-sort by original relevance for bucket_idx, bucket in enumerate(buckets): selected_indices = selected_by_bucket.get(bucket_idx, []) - # Re-sort by original relevance score (descending) for better generation quality selected_indices = sorted(selected_indices, key=lambda i: flat[i][2], reverse=True) bucket["memories"] = [flat[i][1] for i in selected_indices] @@ -326,34 +312,6 @@ def _strip_embeddings(results: dict[str, Any]) -> None: if "embedding" in metadata: metadata["embedding"] = [] - @staticmethod - def _cosine_similarity_matrix_local(embeddings: list[list[float]]) -> list[list[float]]: - if not embeddings: - return [] - - normalized: list[list[float]] = [] - for vec in embeddings: - norm_sq = 0.0 - for x in vec: - xf = float(x) - norm_sq += xf * xf - denom = math.sqrt(norm_sq) if norm_sq > 0.0 else 1.0 - normalized.append([float(x) / denom for x in vec]) - - n = len(normalized) - sim: list[list[float]] = [[0.0] * n for _ in range(n)] - for i in range(n): - sim[i][i] = 1.0 - vi = normalized[i] - for j in range(i + 1, n): - vj = normalized[j] - dot = 0.0 - for a, b in zip(vi, vj, strict=False): - dot += a * b - sim[i][j] = dot - sim[j][i] = dot - return sim - def _resolve_cube_ids(self, search_req: APISearchRequest) -> list[str]: """ Normalize target cube ids from search_req. From 6645c3e5ef096718db278c25a9e2b693cab59634 Mon Sep 17 00:00:00 2001 From: hijzy Date: Wed, 28 Jan 2026 17:46:41 +0800 Subject: [PATCH 22/40] feat: add preference memory deduplication --- src/memos/api/handlers/search_handler.py | 128 ++++++++++++++++------- 1 file changed, 92 insertions(+), 36 deletions(-) diff --git a/src/memos/api/handlers/search_handler.py b/src/memos/api/handlers/search_handler.py index 0cd5399cf..b7d1e150b 100644 --- a/src/memos/api/handlers/search_handler.py +++ b/src/memos/api/handlers/search_handler.py @@ -87,7 +87,8 @@ def handle_search_memories(self, search_req: APISearchRequest) -> SearchResponse results = self._dedup_text_memories(results, original_top_k) self._strip_embeddings(results) elif search_req.dedup == "mmr": - results = self._mmr_dedup_text_memories(results, original_top_k) + pref_top_k = getattr(search_req, "pref_top_k", 6) + results = self._mmr_dedup_text_memories(results, original_top_k, pref_top_k) self._strip_embeddings(results) finally: # Restore original states @@ -160,61 +161,96 @@ def _dedup_text_memories(self, results: dict[str, Any], target_top_k: int) -> di return results def _mmr_dedup_text_memories( - self, results: dict[str, Any], target_top_k: int + self, results: dict[str, Any], text_top_k: int, pref_top_k: int = 6 ) -> dict[str, Any]: """ MMR-based deduplication with progressive penalty for high similarity. + Performs deduplication on both text_mem and preference memories together. + Other memory types (tool_mem, etc.) are not modified. + + Args: + results: Search results containing text_mem and preference buckets + text_top_k: Target number of text memories to return per bucket + pref_top_k: Target number of preference memories to return per bucket + Algorithm: 1. Prefill top 5 by relevance 2. MMR selection: balance relevance vs diversity 3. Re-sort by original relevance for better generation quality """ - buckets = results.get("text_mem", []) - if not buckets: + text_buckets = results.get("text_mem", []) + pref_buckets = results.get("preference", []) + + # Early return if no memories to deduplicate + if not text_buckets and not pref_buckets: return results - # Flatten all memories with their scores - flat: list[tuple[int, dict[str, Any], float]] = [] - for bucket_idx, bucket in enumerate(buckets): + # Flatten all memories with their type and scores + # flat structure: (memory_type, bucket_idx, mem, score) + flat: list[tuple[str, int, dict[str, Any], float]] = [] + + # Flatten text memories + for bucket_idx, bucket in enumerate(text_buckets): for mem in bucket.get("memories", []): score = mem.get("metadata", {}).get("relativity", 0.0) - flat.append((bucket_idx, mem, float(score) if score is not None else 0.0)) + flat.append(("text", bucket_idx, mem, float(score) if score is not None else 0.0)) + + # Flatten preference memories + for bucket_idx, bucket in enumerate(pref_buckets): + for mem in bucket.get("memories", []): + score = mem.get("metadata", {}).get("relativity", 0.0) + flat.append(("preference", bucket_idx, mem, float(score) if score is not None else 0.0)) if len(flat) <= 1: return results # Get or compute embeddings - embeddings = self._extract_embeddings([mem for _, mem, _ in flat]) + embeddings = self._extract_embeddings([mem for _, _, mem, _ in flat]) if embeddings is None: - documents = [mem.get("memory", "") for _, mem, _ in flat] + documents = [mem.get("memory", "") for _, _, mem, _ in flat] embeddings = self.searcher.embedder.embed(documents) # Compute similarity matrix using NumPy-optimized method # Returns numpy array but compatible with list[i][j] indexing similarity_matrix = cosine_similarity_matrix(embeddings) - # Initialize selection tracking - indices_by_bucket: dict[int, list[int]] = {i: [] for i in range(len(buckets))} - for flat_index, (bucket_idx, _, _) in enumerate(flat): - indices_by_bucket[bucket_idx].append(flat_index) + # Initialize selection tracking for both text and preference + text_indices_by_bucket: dict[int, list[int]] = {i: [] for i in range(len(text_buckets))} + pref_indices_by_bucket: dict[int, list[int]] = {i: [] for i in range(len(pref_buckets))} + + for flat_index, (mem_type, bucket_idx, _, _) in enumerate(flat): + if mem_type == "text": + text_indices_by_bucket[bucket_idx].append(flat_index) + elif mem_type == "preference": + pref_indices_by_bucket[bucket_idx].append(flat_index) selected_global: list[int] = [] - selected_by_bucket: dict[int, list[int]] = {i: [] for i in range(len(buckets))} + text_selected_by_bucket: dict[int, list[int]] = {i: [] for i in range(len(text_buckets))} + pref_selected_by_bucket: dict[int, list[int]] = {i: [] for i in range(len(pref_buckets))} # Phase 1: Prefill top N by relevance - prefill_top_n = min(5, target_top_k) + # Use the smaller of text_top_k and pref_top_k for prefill count + prefill_top_n = min(5, text_top_k, pref_top_k) if pref_buckets else min(5, text_top_k) ordered_by_relevance = sorted( - range(len(flat)), key=lambda idx: flat[idx][2], reverse=True + range(len(flat)), key=lambda idx: flat[idx][3], reverse=True ) for idx in ordered_by_relevance[:len(flat)]: if len(selected_global) >= prefill_top_n: break - bucket_idx = flat[idx][0] - if len(selected_by_bucket[bucket_idx]) >= target_top_k: - continue - selected_global.append(idx) - selected_by_bucket[bucket_idx].append(idx) + mem_type, bucket_idx, _, _ = flat[idx] + + # Check bucket capacity with correct top_k for each type + if mem_type == "text": + if len(text_selected_by_bucket[bucket_idx]) >= text_top_k: + continue + selected_global.append(idx) + text_selected_by_bucket[bucket_idx].append(idx) + elif mem_type == "preference": + if len(pref_selected_by_bucket[bucket_idx]) >= pref_top_k: + continue + selected_global.append(idx) + pref_selected_by_bucket[bucket_idx].append(idx) # Phase 2: MMR selection for remaining slots lambda_relevance = 0.8 @@ -227,11 +263,17 @@ def _mmr_dedup_text_memories( best_mmr: float | None = None for idx in remaining: - bucket_idx = flat[idx][0] - if len(selected_by_bucket[bucket_idx]) >= target_top_k: - continue + mem_type, bucket_idx, _, _ = flat[idx] + + # Check bucket capacity + if mem_type == "text": + if len(text_selected_by_bucket[bucket_idx]) >= target_top_k: + continue + elif mem_type == "preference": + if len(pref_selected_by_bucket[bucket_idx]) >= target_top_k: + continue - relevance = flat[idx][2] + relevance = flat[idx][3] max_sim = ( 0.0 if not selected_global @@ -253,22 +295,36 @@ def _mmr_dedup_text_memories( if best_idx is None: break + mem_type, bucket_idx, _, _ = flat[best_idx] selected_global.append(best_idx) - selected_by_bucket[flat[best_idx][0]].append(best_idx) + if mem_type == "text": + text_selected_by_bucket[bucket_idx].append(best_idx) + elif mem_type == "preference": + pref_selected_by_bucket[bucket_idx].append(best_idx) remaining.remove(best_idx) # Early termination: all buckets are full - if all( - len(selected_by_bucket[b_idx]) >= min(target_top_k, len(bucket_indices)) - for b_idx, bucket_indices in indices_by_bucket.items() - ): + text_all_full = all( + len(text_selected_by_bucket[b_idx]) >= min(target_top_k, len(bucket_indices)) + for b_idx, bucket_indices in text_indices_by_bucket.items() + ) + pref_all_full = all( + len(pref_selected_by_bucket[b_idx]) >= min(target_top_k, len(bucket_indices)) + for b_idx, bucket_indices in pref_indices_by_bucket.items() + ) + if text_all_full and pref_all_full: break - # Phase 3: Re-sort by original relevance - for bucket_idx, bucket in enumerate(buckets): - selected_indices = selected_by_bucket.get(bucket_idx, []) - selected_indices = sorted(selected_indices, key=lambda i: flat[i][2], reverse=True) - bucket["memories"] = [flat[i][1] for i in selected_indices] + # Phase 3: Re-sort by original relevance and fill back to buckets + for bucket_idx, bucket in enumerate(text_buckets): + selected_indices = text_selected_by_bucket.get(bucket_idx, []) + selected_indices = sorted(selected_indices, key=lambda i: flat[i][3], reverse=True) + bucket["memories"] = [flat[i][2] for i in selected_indices] + + for bucket_idx, bucket in enumerate(pref_buckets): + selected_indices = pref_selected_by_bucket.get(bucket_idx, []) + selected_indices = sorted(selected_indices, key=lambda i: flat[i][3], reverse=True) + bucket["memories"] = [flat[i][2] for i in selected_indices] return results From d7bc36bacb992959fe76236d5600a8bd6fbadc49 Mon Sep 17 00:00:00 2001 From: hijzy Date: Wed, 28 Jan 2026 17:52:22 +0800 Subject: [PATCH 23/40] fix: recall less preference memory --- src/memos/api/handlers/search_handler.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/memos/api/handlers/search_handler.py b/src/memos/api/handlers/search_handler.py index b7d1e150b..e49b5e25b 100644 --- a/src/memos/api/handlers/search_handler.py +++ b/src/memos/api/handlers/search_handler.py @@ -265,12 +265,12 @@ def _mmr_dedup_text_memories( for idx in remaining: mem_type, bucket_idx, _, _ = flat[idx] - # Check bucket capacity + # Check bucket capacity with correct top_k for each type if mem_type == "text": - if len(text_selected_by_bucket[bucket_idx]) >= target_top_k: + if len(text_selected_by_bucket[bucket_idx]) >= text_top_k: continue elif mem_type == "preference": - if len(pref_selected_by_bucket[bucket_idx]) >= target_top_k: + if len(pref_selected_by_bucket[bucket_idx]) >= pref_top_k: continue relevance = flat[idx][3] @@ -305,11 +305,11 @@ def _mmr_dedup_text_memories( # Early termination: all buckets are full text_all_full = all( - len(text_selected_by_bucket[b_idx]) >= min(target_top_k, len(bucket_indices)) + len(text_selected_by_bucket[b_idx]) >= min(text_top_k, len(bucket_indices)) for b_idx, bucket_indices in text_indices_by_bucket.items() ) pref_all_full = all( - len(pref_selected_by_bucket[b_idx]) >= min(target_top_k, len(bucket_indices)) + len(pref_selected_by_bucket[b_idx]) >= min(pref_top_k, len(bucket_indices)) for b_idx, bucket_indices in pref_indices_by_bucket.items() ) if text_all_full and pref_all_full: From 0fc1b6ed2560b1335e402267998032eede4c1865 Mon Sep 17 00:00:00 2001 From: hijzy Date: Wed, 28 Jan 2026 18:33:42 +0800 Subject: [PATCH 24/40] test: memory text deduplication --- src/memos/api/handlers/search_handler.py | 26 ++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/src/memos/api/handlers/search_handler.py b/src/memos/api/handlers/search_handler.py index e49b5e25b..af038c660 100644 --- a/src/memos/api/handlers/search_handler.py +++ b/src/memos/api/handlers/search_handler.py @@ -228,6 +228,7 @@ def _mmr_dedup_text_memories( selected_global: list[int] = [] text_selected_by_bucket: dict[int, list[int]] = {i: [] for i in range(len(text_buckets))} pref_selected_by_bucket: dict[int, list[int]] = {i: [] for i in range(len(pref_buckets))} + selected_texts: set[str] = set() # Track exact text content to avoid duplicates # Phase 1: Prefill top N by relevance # Use the smaller of text_top_k and pref_top_k for prefill count @@ -238,7 +239,12 @@ def _mmr_dedup_text_memories( for idx in ordered_by_relevance[:len(flat)]: if len(selected_global) >= prefill_top_n: break - mem_type, bucket_idx, _, _ = flat[idx] + mem_type, bucket_idx, mem, _ = flat[idx] + + # Skip if exact text already exists in selected set + mem_text = mem.get("memory", "").strip() + if mem_text in selected_texts: + continue # Check bucket capacity with correct top_k for each type if mem_type == "text": @@ -246,16 +252,18 @@ def _mmr_dedup_text_memories( continue selected_global.append(idx) text_selected_by_bucket[bucket_idx].append(idx) + selected_texts.add(mem_text) elif mem_type == "preference": if len(pref_selected_by_bucket[bucket_idx]) >= pref_top_k: continue selected_global.append(idx) pref_selected_by_bucket[bucket_idx].append(idx) + selected_texts.add(mem_text) # Phase 2: MMR selection for remaining slots lambda_relevance = 0.8 similarity_threshold = 0.92 - beta_high_similarity = 5.0 # Penalty multiplier for similarity > 0.92 + beta_high_similarity = 12.0 # Penalty multiplier for similarity > 0.92 remaining = set(range(len(flat))) - set(selected_global) while remaining: @@ -263,7 +271,7 @@ def _mmr_dedup_text_memories( best_mmr: float | None = None for idx in remaining: - mem_type, bucket_idx, _, _ = flat[idx] + mem_type, bucket_idx, mem, _ = flat[idx] # Check bucket capacity with correct top_k for each type if mem_type == "text": @@ -273,6 +281,11 @@ def _mmr_dedup_text_memories( if len(pref_selected_by_bucket[bucket_idx]) >= pref_top_k: continue + # Check if exact text already exists - if so, skip this candidate entirely + mem_text = mem.get("memory", "").strip() + if mem_text in selected_texts: + continue # Skip duplicate text, don't participate in MMR competition + relevance = flat[idx][3] max_sim = ( 0.0 @@ -295,8 +308,13 @@ def _mmr_dedup_text_memories( if best_idx is None: break - mem_type, bucket_idx, _, _ = flat[best_idx] + mem_type, bucket_idx, mem, _ = flat[best_idx] + + # Add to selected set and track text + mem_text = mem.get("memory", "").strip() selected_global.append(best_idx) + selected_texts.add(mem_text) + if mem_type == "text": text_selected_by_bucket[bucket_idx].append(best_idx) elif mem_type == "preference": From 5c52691b15c47e21acc92af9672e3d8736fbcf28 Mon Sep 17 00:00:00 2001 From: hijzy Date: Wed, 28 Jan 2026 18:44:44 +0800 Subject: [PATCH 25/40] test: memory text deduplication --- src/memos/api/handlers/search_handler.py | 84 ++++++++++++++++++++++++ 1 file changed, 84 insertions(+) diff --git a/src/memos/api/handlers/search_handler.py b/src/memos/api/handlers/search_handler.py index af038c660..c146c3159 100644 --- a/src/memos/api/handlers/search_handler.py +++ b/src/memos/api/handlers/search_handler.py @@ -246,6 +246,10 @@ def _mmr_dedup_text_memories( if mem_text in selected_texts: continue + # Skip if highly similar (85% LCS) to any selected text + if SearchHandler._is_text_highly_similar(mem_text, selected_texts, threshold=0.85): + continue + # Check bucket capacity with correct top_k for each type if mem_type == "text": if len(text_selected_by_bucket[bucket_idx]) >= text_top_k: @@ -286,6 +290,10 @@ def _mmr_dedup_text_memories( if mem_text in selected_texts: continue # Skip duplicate text, don't participate in MMR competition + # Skip if highly similar (95% LCS) to any selected text + if SearchHandler._is_text_highly_similar(mem_text, selected_texts, threshold=0.95): + continue # Skip highly similar text, don't participate in MMR competition + relevance = flat[idx][3] max_sim = ( 0.0 @@ -346,6 +354,82 @@ def _mmr_dedup_text_memories( return results + @staticmethod + def _lcs_ratio(text1: str, text2: str) -> float: + """ + 计算最长公共子序列(LCS)占较短文本的比例 + 使用空间优化的动态规划算法,只保留一行 + + Args: + text1: 第一个文本 + text2: 第二个文本 + + Returns: + LCS长度 / min(len(text1), len(text2)) + """ + if not text1 or not text2: + return 0.0 + + m, n = len(text1), len(text2) + min_len = min(m, n) + + # 优化:如果长度差异太大(超过20%),不可能达到95%相似 + if abs(m - n) > min_len * 0.2: + return 0.0 + + # 空间优化的DP,只保留一行 + prev = [0] * (n + 1) + + for i in range(1, m + 1): + curr = [0] * (n + 1) + for j in range(1, n + 1): + if text1[i-1] == text2[j-1]: + curr[j] = prev[j-1] + 1 + else: + curr[j] = max(curr[j-1], prev[j]) + prev = curr + + lcs_len = prev[n] + return lcs_len / min_len if min_len > 0 else 0.0 + + @staticmethod + def _is_text_highly_similar(candidate: str, selected_texts: set[str], threshold: float = 0.85) -> bool: + """ + 快速检查候选文本是否与已选择的任何文本高度相似(基于LCS) + + 优化策略: + 1. 先检查长度差异(超过20%直接跳过) + 2. 计算LCS比例,如果 >= threshold 则认为高度相似 + + Args: + candidate: 候选文本 + selected_texts: 已选择的文本集合 + threshold: 相似度阈值(默认0.85,表示85%相似) + + Returns: + True if 高度相似,False otherwise + """ + candidate = candidate.strip() + if not candidate: + return False + + for selected in selected_texts: + selected = selected.strip() + if not selected: + continue + + # 快速检查:长度差异超过20%则不可能95%相似 + min_len = min(len(candidate), len(selected)) + if abs(len(candidate) - len(selected)) > min_len * 0.2: + continue + + # 计算LCS比例 + lcs_ratio = SearchHandler._lcs_ratio(candidate, selected) + if lcs_ratio >= threshold: + return True + + return False + @staticmethod def _is_unrelated( index: int, From ce8fb96f9b6ede940bb773869a53541329829a48 Mon Sep 17 00:00:00 2001 From: hijzy Date: Wed, 28 Jan 2026 18:59:11 +0800 Subject: [PATCH 26/40] test: restore --- src/memos/api/handlers/search_handler.py | 110 +---------------------- 1 file changed, 4 insertions(+), 106 deletions(-) diff --git a/src/memos/api/handlers/search_handler.py b/src/memos/api/handlers/search_handler.py index c146c3159..e49b5e25b 100644 --- a/src/memos/api/handlers/search_handler.py +++ b/src/memos/api/handlers/search_handler.py @@ -228,7 +228,6 @@ def _mmr_dedup_text_memories( selected_global: list[int] = [] text_selected_by_bucket: dict[int, list[int]] = {i: [] for i in range(len(text_buckets))} pref_selected_by_bucket: dict[int, list[int]] = {i: [] for i in range(len(pref_buckets))} - selected_texts: set[str] = set() # Track exact text content to avoid duplicates # Phase 1: Prefill top N by relevance # Use the smaller of text_top_k and pref_top_k for prefill count @@ -239,16 +238,7 @@ def _mmr_dedup_text_memories( for idx in ordered_by_relevance[:len(flat)]: if len(selected_global) >= prefill_top_n: break - mem_type, bucket_idx, mem, _ = flat[idx] - - # Skip if exact text already exists in selected set - mem_text = mem.get("memory", "").strip() - if mem_text in selected_texts: - continue - - # Skip if highly similar (85% LCS) to any selected text - if SearchHandler._is_text_highly_similar(mem_text, selected_texts, threshold=0.85): - continue + mem_type, bucket_idx, _, _ = flat[idx] # Check bucket capacity with correct top_k for each type if mem_type == "text": @@ -256,18 +246,16 @@ def _mmr_dedup_text_memories( continue selected_global.append(idx) text_selected_by_bucket[bucket_idx].append(idx) - selected_texts.add(mem_text) elif mem_type == "preference": if len(pref_selected_by_bucket[bucket_idx]) >= pref_top_k: continue selected_global.append(idx) pref_selected_by_bucket[bucket_idx].append(idx) - selected_texts.add(mem_text) # Phase 2: MMR selection for remaining slots lambda_relevance = 0.8 similarity_threshold = 0.92 - beta_high_similarity = 12.0 # Penalty multiplier for similarity > 0.92 + beta_high_similarity = 5.0 # Penalty multiplier for similarity > 0.92 remaining = set(range(len(flat))) - set(selected_global) while remaining: @@ -275,7 +263,7 @@ def _mmr_dedup_text_memories( best_mmr: float | None = None for idx in remaining: - mem_type, bucket_idx, mem, _ = flat[idx] + mem_type, bucket_idx, _, _ = flat[idx] # Check bucket capacity with correct top_k for each type if mem_type == "text": @@ -285,15 +273,6 @@ def _mmr_dedup_text_memories( if len(pref_selected_by_bucket[bucket_idx]) >= pref_top_k: continue - # Check if exact text already exists - if so, skip this candidate entirely - mem_text = mem.get("memory", "").strip() - if mem_text in selected_texts: - continue # Skip duplicate text, don't participate in MMR competition - - # Skip if highly similar (95% LCS) to any selected text - if SearchHandler._is_text_highly_similar(mem_text, selected_texts, threshold=0.95): - continue # Skip highly similar text, don't participate in MMR competition - relevance = flat[idx][3] max_sim = ( 0.0 @@ -316,13 +295,8 @@ def _mmr_dedup_text_memories( if best_idx is None: break - mem_type, bucket_idx, mem, _ = flat[best_idx] - - # Add to selected set and track text - mem_text = mem.get("memory", "").strip() + mem_type, bucket_idx, _, _ = flat[best_idx] selected_global.append(best_idx) - selected_texts.add(mem_text) - if mem_type == "text": text_selected_by_bucket[bucket_idx].append(best_idx) elif mem_type == "preference": @@ -354,82 +328,6 @@ def _mmr_dedup_text_memories( return results - @staticmethod - def _lcs_ratio(text1: str, text2: str) -> float: - """ - 计算最长公共子序列(LCS)占较短文本的比例 - 使用空间优化的动态规划算法,只保留一行 - - Args: - text1: 第一个文本 - text2: 第二个文本 - - Returns: - LCS长度 / min(len(text1), len(text2)) - """ - if not text1 or not text2: - return 0.0 - - m, n = len(text1), len(text2) - min_len = min(m, n) - - # 优化:如果长度差异太大(超过20%),不可能达到95%相似 - if abs(m - n) > min_len * 0.2: - return 0.0 - - # 空间优化的DP,只保留一行 - prev = [0] * (n + 1) - - for i in range(1, m + 1): - curr = [0] * (n + 1) - for j in range(1, n + 1): - if text1[i-1] == text2[j-1]: - curr[j] = prev[j-1] + 1 - else: - curr[j] = max(curr[j-1], prev[j]) - prev = curr - - lcs_len = prev[n] - return lcs_len / min_len if min_len > 0 else 0.0 - - @staticmethod - def _is_text_highly_similar(candidate: str, selected_texts: set[str], threshold: float = 0.85) -> bool: - """ - 快速检查候选文本是否与已选择的任何文本高度相似(基于LCS) - - 优化策略: - 1. 先检查长度差异(超过20%直接跳过) - 2. 计算LCS比例,如果 >= threshold 则认为高度相似 - - Args: - candidate: 候选文本 - selected_texts: 已选择的文本集合 - threshold: 相似度阈值(默认0.85,表示85%相似) - - Returns: - True if 高度相似,False otherwise - """ - candidate = candidate.strip() - if not candidate: - return False - - for selected in selected_texts: - selected = selected.strip() - if not selected: - continue - - # 快速检查:长度差异超过20%则不可能95%相似 - min_len = min(len(candidate), len(selected)) - if abs(len(candidate) - len(selected)) > min_len * 0.2: - continue - - # 计算LCS比例 - lcs_ratio = SearchHandler._lcs_ratio(candidate, selected) - if lcs_ratio >= threshold: - return True - - return False - @staticmethod def _is_unrelated( index: int, From a4830110a4df80132fe77d2cd4b9693f84eeb210 Mon Sep 17 00:00:00 2001 From: hijzy Date: Wed, 28 Jan 2026 19:02:37 +0800 Subject: [PATCH 27/40] test: restore --- src/memos/api/handlers/search_handler.py | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/src/memos/api/handlers/search_handler.py b/src/memos/api/handlers/search_handler.py index e49b5e25b..229a13c0b 100644 --- a/src/memos/api/handlers/search_handler.py +++ b/src/memos/api/handlers/search_handler.py @@ -228,6 +228,7 @@ def _mmr_dedup_text_memories( selected_global: list[int] = [] text_selected_by_bucket: dict[int, list[int]] = {i: [] for i in range(len(text_buckets))} pref_selected_by_bucket: dict[int, list[int]] = {i: [] for i in range(len(pref_buckets))} + selected_texts: set[str] = set() # Track exact text content to avoid duplicates # Phase 1: Prefill top N by relevance # Use the smaller of text_top_k and pref_top_k for prefill count @@ -238,7 +239,12 @@ def _mmr_dedup_text_memories( for idx in ordered_by_relevance[:len(flat)]: if len(selected_global) >= prefill_top_n: break - mem_type, bucket_idx, _, _ = flat[idx] + mem_type, bucket_idx, mem, _ = flat[idx] + + # Skip if exact text already exists in selected set + mem_text = mem.get("memory", "").strip() + if mem_text in selected_texts: + continue # Check bucket capacity with correct top_k for each type if mem_type == "text": @@ -246,11 +252,13 @@ def _mmr_dedup_text_memories( continue selected_global.append(idx) text_selected_by_bucket[bucket_idx].append(idx) + selected_texts.add(mem_text) elif mem_type == "preference": if len(pref_selected_by_bucket[bucket_idx]) >= pref_top_k: continue selected_global.append(idx) pref_selected_by_bucket[bucket_idx].append(idx) + selected_texts.add(mem_text) # Phase 2: MMR selection for remaining slots lambda_relevance = 0.8 @@ -263,7 +271,7 @@ def _mmr_dedup_text_memories( best_mmr: float | None = None for idx in remaining: - mem_type, bucket_idx, _, _ = flat[idx] + mem_type, bucket_idx, mem, _ = flat[idx] # Check bucket capacity with correct top_k for each type if mem_type == "text": @@ -273,6 +281,11 @@ def _mmr_dedup_text_memories( if len(pref_selected_by_bucket[bucket_idx]) >= pref_top_k: continue + # Check if exact text already exists - if so, skip this candidate entirely + mem_text = mem.get("memory", "").strip() + if mem_text in selected_texts: + continue # Skip duplicate text, don't participate in MMR competition + relevance = flat[idx][3] max_sim = ( 0.0 @@ -295,8 +308,13 @@ def _mmr_dedup_text_memories( if best_idx is None: break - mem_type, bucket_idx, _, _ = flat[best_idx] + mem_type, bucket_idx, mem, _ = flat[best_idx] + + # Add to selected set and track text + mem_text = mem.get("memory", "").strip() selected_global.append(best_idx) + selected_texts.add(mem_text) + if mem_type == "text": text_selected_by_bucket[bucket_idx].append(best_idx) elif mem_type == "preference": From f0f685cbb85f7e57eb03efe8936b2db3facebf07 Mon Sep 17 00:00:00 2001 From: hijzy Date: Wed, 28 Jan 2026 19:18:58 +0800 Subject: [PATCH 28/40] test: add 2 gram dedup --- src/memos/api/handlers/search_handler.py | 85 ++++++++++++++++++++++++ 1 file changed, 85 insertions(+) diff --git a/src/memos/api/handlers/search_handler.py b/src/memos/api/handlers/search_handler.py index 229a13c0b..e74100533 100644 --- a/src/memos/api/handlers/search_handler.py +++ b/src/memos/api/handlers/search_handler.py @@ -246,6 +246,12 @@ def _mmr_dedup_text_memories( if mem_text in selected_texts: continue + # Skip if highly similar (2-gram + embedding filter) + if SearchHandler._is_text_highly_similar_optimized( + idx, mem_text, selected_global, similarity_matrix, flat, threshold=0.60 + ): + continue + # Check bucket capacity with correct top_k for each type if mem_type == "text": if len(text_selected_by_bucket[bucket_idx]) >= text_top_k: @@ -286,6 +292,12 @@ def _mmr_dedup_text_memories( if mem_text in selected_texts: continue # Skip duplicate text, don't participate in MMR competition + # Skip if highly similar (2-gram + embedding filter) + if SearchHandler._is_text_highly_similar_optimized( + idx, mem_text, selected_global, similarity_matrix, flat, threshold=0.60 + ): + continue # Skip highly similar text, don't participate in MMR competition + relevance = flat[idx][3] max_sim = ( 0.0 @@ -386,6 +398,79 @@ def _strip_embeddings(results: dict[str, Any]) -> None: if "embedding" in metadata: metadata["embedding"] = [] + @staticmethod + def _bigram_similarity(text1: str, text2: str) -> float: + """ + Calculate character-level 2-gram Jaccard similarity (fast approximation). + + Args: + text1: First text string + text2: Second text string + + Returns: + Jaccard similarity score between 0.0 and 1.0 + """ + if not text1 or not text2: + return 0.0 + + # Generate 2-grams + bigrams1 = {text1[i:i+2] for i in range(len(text1) - 1)} if len(text1) >= 2 else {text1} + bigrams2 = {text2[i:i+2] for i in range(len(text2) - 1)} if len(text2) >= 2 else {text2} + + intersection = len(bigrams1 & bigrams2) + union = len(bigrams1 | bigrams2) + + return intersection / union if union > 0 else 0.0 + + @staticmethod + def _is_text_highly_similar_optimized( + candidate_idx: int, + candidate_text: str, + selected_global: list[int], + similarity_matrix, + flat: list, + threshold: float = 0.60, + ) -> bool: + """ + Optimized text similarity check with two-stage filtering. + + Strategy: + 1. Only compare with the single highest embedding similarity item (not all 25) + 2. Only perform 2-gram comparison if embedding similarity > 0.80 + + This reduces comparisons from O(N) to O(1) per candidate, with embedding pre-filtering. + Expected speedup: 100-200x compared to LCS approach. + + Args: + candidate_idx: Index of candidate memory in flat list + candidate_text: Text content of candidate memory + selected_global: List of already selected memory indices + similarity_matrix: Precomputed embedding similarity matrix + flat: Flat list of all memories + threshold: 2-gram similarity threshold (default 0.60) + + Returns: + True if candidate is highly similar to any selected memory + """ + if not selected_global: + return False + + # Find the already-selected memory with highest embedding similarity + max_sim_idx = max(selected_global, key=lambda j: similarity_matrix[candidate_idx][j]) + max_sim = similarity_matrix[candidate_idx][max_sim_idx] + + # If highest embedding similarity < 0.80, skip text comparison entirely + if max_sim < 0.80: + return False + + # Get text of most similar memory + most_similar_mem = flat[max_sim_idx][2] + most_similar_text = most_similar_mem.get("memory", "").strip() + + # Calculate 2-gram similarity + bigram_sim = SearchHandler._bigram_similarity(candidate_text, most_similar_text) + return bigram_sim >= threshold + def _resolve_cube_ids(self, search_req: APISearchRequest) -> list[str]: """ Normalize target cube ids from search_req. From 55e028830b5c2c51bba4d79731582c40df63825c Mon Sep 17 00:00:00 2001 From: hijzy Date: Wed, 28 Jan 2026 20:03:57 +0800 Subject: [PATCH 29/40] test: add more dedup --- src/memos/api/handlers/search_handler.py | 127 +++++++++++++++++++---- 1 file changed, 109 insertions(+), 18 deletions(-) diff --git a/src/memos/api/handlers/search_handler.py b/src/memos/api/handlers/search_handler.py index e74100533..130b91e28 100644 --- a/src/memos/api/handlers/search_handler.py +++ b/src/memos/api/handlers/search_handler.py @@ -5,6 +5,7 @@ using dependency injection for better modularity and testability. """ +import math from typing import Any from memos.api.handlers.base_handler import BaseHandler, HandlerDependencies @@ -232,7 +233,7 @@ def _mmr_dedup_text_memories( # Phase 1: Prefill top N by relevance # Use the smaller of text_top_k and pref_top_k for prefill count - prefill_top_n = min(5, text_top_k, pref_top_k) if pref_buckets else min(5, text_top_k) + prefill_top_n = min(2, text_top_k, pref_top_k) if pref_buckets else min(2, text_top_k) ordered_by_relevance = sorted( range(len(flat)), key=lambda idx: flat[idx][3], reverse=True ) @@ -246,9 +247,9 @@ def _mmr_dedup_text_memories( if mem_text in selected_texts: continue - # Skip if highly similar (2-gram + embedding filter) + # Skip if highly similar (Dice + TF-IDF + 2-gram combined, with embedding filter) if SearchHandler._is_text_highly_similar_optimized( - idx, mem_text, selected_global, similarity_matrix, flat, threshold=0.60 + idx, mem_text, selected_global, similarity_matrix, flat, threshold=0.6 ): continue @@ -268,8 +269,8 @@ def _mmr_dedup_text_memories( # Phase 2: MMR selection for remaining slots lambda_relevance = 0.8 - similarity_threshold = 0.92 - beta_high_similarity = 5.0 # Penalty multiplier for similarity > 0.92 + similarity_threshold = 0.60 # Start exponential penalty from 0.80 (lowered from 0.92) + alpha_exponential = 20.0 # Exponential penalty coefficient remaining = set(range(len(flat))) - set(selected_global) while remaining: @@ -292,9 +293,9 @@ def _mmr_dedup_text_memories( if mem_text in selected_texts: continue # Skip duplicate text, don't participate in MMR competition - # Skip if highly similar (2-gram + embedding filter) + # Skip if highly similar (Dice + TF-IDF + 2-gram combined, with embedding filter) if SearchHandler._is_text_highly_similar_optimized( - idx, mem_text, selected_global, similarity_matrix, flat, threshold=0.60 + idx, mem_text, selected_global, similarity_matrix, flat, threshold=0.6 ): continue # Skip highly similar text, don't participate in MMR competition @@ -305,9 +306,10 @@ def _mmr_dedup_text_memories( else max(similarity_matrix[idx][j] for j in selected_global) ) - # Progressive penalty for high similarity (> 0.92) + # Exponential penalty for similarity > 0.80 if max_sim > similarity_threshold: - diversity = max_sim + (max_sim - similarity_threshold) * beta_high_similarity + penalty_multiplier = math.exp(alpha_exponential * (max_sim - similarity_threshold)) + diversity = max_sim * penalty_multiplier else: diversity = max_sim @@ -398,10 +400,37 @@ def _strip_embeddings(results: dict[str, Any]) -> None: if "embedding" in metadata: metadata["embedding"] = [] + @staticmethod + def _dice_similarity(text1: str, text2: str) -> float: + """ + Calculate Dice coefficient (character-level, fastest). + + Dice = 2 * |A ∩ B| / (|A| + |B|) + Speed: O(n + m), ~0.05-0.1ms per comparison + + Args: + text1: First text string + text2: Second text string + + Returns: + Dice similarity score between 0.0 and 1.0 + """ + if not text1 or not text2: + return 0.0 + + chars1 = set(text1) + chars2 = set(text2) + + intersection = len(chars1 & chars2) + return 2 * intersection / (len(chars1) + len(chars2)) + @staticmethod def _bigram_similarity(text1: str, text2: str) -> float: """ - Calculate character-level 2-gram Jaccard similarity (fast approximation). + Calculate character-level 2-gram Jaccard similarity. + + Speed: O(n + m), ~0.1-0.2ms per comparison + Considers local order (more strict than Dice). Args: text1: First text string @@ -422,6 +451,52 @@ def _bigram_similarity(text1: str, text2: str) -> float: return intersection / union if union > 0 else 0.0 + @staticmethod + def _tfidf_similarity(text1: str, text2: str) -> float: + """ + Calculate TF-IDF cosine similarity (character-level, no sklearn). + + Speed: O(n + m), ~0.3-0.5ms per comparison + Considers character frequency weighting. + + Args: + text1: First text string + text2: Second text string + + Returns: + Cosine similarity score between 0.0 and 1.0 + """ + if not text1 or not text2: + return 0.0 + + from collections import Counter + + # Character frequency (TF) + tf1 = Counter(text1) + tf2 = Counter(text2) + + # All unique characters (vocabulary) + vocab = set(tf1.keys()) | set(tf2.keys()) + + # Simple IDF: log(2 / df) where df is document frequency + # For two documents, IDF is log(2/1)=0.693 if char appears in one doc, + # or log(2/2)=0 if appears in both (we use log(2/1) for simplicity) + idf = {char: (1.0 if char in tf1 and char in tf2 else 1.5) for char in vocab} + + # TF-IDF vectors + vec1 = {char: tf1.get(char, 0) * idf[char] for char in vocab} + vec2 = {char: tf2.get(char, 0) * idf[char] for char in vocab} + + # Cosine similarity + dot_product = sum(vec1[char] * vec2[char] for char in vocab) + norm1 = math.sqrt(sum(v * v for v in vec1.values())) + norm2 = math.sqrt(sum(v * v for v in vec2.values())) + + if norm1 == 0 or norm2 == 0: + return 0.0 + + return dot_product / (norm1 * norm2) + @staticmethod def _is_text_highly_similar_optimized( candidate_idx: int, @@ -429,14 +504,21 @@ def _is_text_highly_similar_optimized( selected_global: list[int], similarity_matrix, flat: list, - threshold: float = 0.60, + threshold: float = 0.75, ) -> bool: """ - Optimized text similarity check with two-stage filtering. + Multi-algorithm text similarity check with embedding pre-filtering. Strategy: 1. Only compare with the single highest embedding similarity item (not all 25) - 2. Only perform 2-gram comparison if embedding similarity > 0.80 + 2. Only perform text comparison if embedding similarity > 0.60 + 3. Use weighted combination of three algorithms: + - Dice (40%): Fastest, character-level set similarity + - TF-IDF (35%): Considers character frequency weighting + - 2-gram (25%): Considers local character order + + Combined formula: + combined_score = 0.40 * dice + 0.35 * tfidf + 0.25 * bigram This reduces comparisons from O(N) to O(1) per candidate, with embedding pre-filtering. Expected speedup: 100-200x compared to LCS approach. @@ -447,7 +529,7 @@ def _is_text_highly_similar_optimized( selected_global: List of already selected memory indices similarity_matrix: Precomputed embedding similarity matrix flat: Flat list of all memories - threshold: 2-gram similarity threshold (default 0.60) + threshold: Combined similarity threshold (default 0.75) Returns: True if candidate is highly similar to any selected memory @@ -459,17 +541,26 @@ def _is_text_highly_similar_optimized( max_sim_idx = max(selected_global, key=lambda j: similarity_matrix[candidate_idx][j]) max_sim = similarity_matrix[candidate_idx][max_sim_idx] - # If highest embedding similarity < 0.80, skip text comparison entirely - if max_sim < 0.80: + # If highest embedding similarity < 0.60, skip text comparison entirely + if max_sim <= 0.60: return False # Get text of most similar memory most_similar_mem = flat[max_sim_idx][2] most_similar_text = most_similar_mem.get("memory", "").strip() - # Calculate 2-gram similarity + # Calculate three similarity scores + dice_sim = SearchHandler._dice_similarity(candidate_text, most_similar_text) + tfidf_sim = SearchHandler._tfidf_similarity(candidate_text, most_similar_text) bigram_sim = SearchHandler._bigram_similarity(candidate_text, most_similar_text) - return bigram_sim >= threshold + + # Weighted combination: Dice (40%) + TF-IDF (35%) + 2-gram (25%) + # Dice has highest weight (fastest and most reliable) + # TF-IDF considers frequency (handles repeated characters well) + # 2-gram considers order (catches local pattern similarity) + combined_score = 0.40 * dice_sim + 0.35 * tfidf_sim + 0.25 * bigram_sim + + return combined_score >= threshold def _resolve_cube_ids(self, search_req: APISearchRequest) -> list[str]: """ From 07cfce240297901a55bb061521ffe366ad1fbbb5 Mon Sep 17 00:00:00 2001 From: harvey_xiang Date: Wed, 28 Jan 2026 20:04:52 +0800 Subject: [PATCH 30/40] chore: revert version to 2.0.3 --- pyproject.toml | 2 +- src/memos/__init__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 8dd1d90c6..3fbe4ced4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ ############################################################################## name = "MemoryOS" -version = "2.0.4" +version = "2.0.3" description = "Intelligence Begins with Memory" license = {text = "Apache-2.0"} readme = "README.md" diff --git a/src/memos/__init__.py b/src/memos/__init__.py index 2d946cfbb..3c764db79 100644 --- a/src/memos/__init__.py +++ b/src/memos/__init__.py @@ -1,4 +1,4 @@ -__version__ = "2.0.4" +__version__ = "2.0.3" from memos.configs.mem_cube import GeneralMemCubeConfig from memos.configs.mem_os import MOSConfig From 9020198f5e8321d85479b0509bb5da1905091b36 Mon Sep 17 00:00:00 2001 From: hijzy Date: Wed, 28 Jan 2026 20:21:52 +0800 Subject: [PATCH 31/40] =?UTF-8?q?test:=20=E5=A2=9E=E5=A4=A7=20prefill?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/memos/api/handlers/search_handler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/memos/api/handlers/search_handler.py b/src/memos/api/handlers/search_handler.py index 130b91e28..8ff5c6f8d 100644 --- a/src/memos/api/handlers/search_handler.py +++ b/src/memos/api/handlers/search_handler.py @@ -233,7 +233,7 @@ def _mmr_dedup_text_memories( # Phase 1: Prefill top N by relevance # Use the smaller of text_top_k and pref_top_k for prefill count - prefill_top_n = min(2, text_top_k, pref_top_k) if pref_buckets else min(2, text_top_k) + prefill_top_n = min(3, text_top_k, pref_top_k) if pref_buckets else min(3, text_top_k) ordered_by_relevance = sorted( range(len(flat)), key=lambda idx: flat[idx][3], reverse=True ) From 6060ce1c3bea093eb222dedc47f03e67c65c73c9 Mon Sep 17 00:00:00 2001 From: hijzy Date: Wed, 28 Jan 2026 20:36:59 +0800 Subject: [PATCH 32/40] =?UTF-8?q?test:=20=E8=B0=83=E6=95=B4=E9=98=88?= =?UTF-8?q?=E5=80=BC=E5=8F=82=E6=95=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/memos/api/handlers/search_handler.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/memos/api/handlers/search_handler.py b/src/memos/api/handlers/search_handler.py index 8ff5c6f8d..a094d229e 100644 --- a/src/memos/api/handlers/search_handler.py +++ b/src/memos/api/handlers/search_handler.py @@ -148,7 +148,7 @@ def _dedup_text_memories(self, results: dict[str, Any], target_top_k: int) -> di if len(selected_by_bucket[bucket_idx]) >= target_top_k: continue # Use 0.92 threshold strictly - if self._is_unrelated(idx, selected_global, similarity_matrix, 0.92): + if self._is_unrelated(idx, selected_global, similarity_matrix, 0.9): selected_by_bucket[bucket_idx].append(idx) selected_global.append(idx) @@ -233,7 +233,7 @@ def _mmr_dedup_text_memories( # Phase 1: Prefill top N by relevance # Use the smaller of text_top_k and pref_top_k for prefill count - prefill_top_n = min(3, text_top_k, pref_top_k) if pref_buckets else min(3, text_top_k) + prefill_top_n = min(2, text_top_k, pref_top_k) if pref_buckets else min(2, text_top_k) ordered_by_relevance = sorted( range(len(flat)), key=lambda idx: flat[idx][3], reverse=True ) @@ -249,7 +249,7 @@ def _mmr_dedup_text_memories( # Skip if highly similar (Dice + TF-IDF + 2-gram combined, with embedding filter) if SearchHandler._is_text_highly_similar_optimized( - idx, mem_text, selected_global, similarity_matrix, flat, threshold=0.6 + idx, mem_text, selected_global, similarity_matrix, flat, threshold=0.9 ): continue @@ -269,8 +269,8 @@ def _mmr_dedup_text_memories( # Phase 2: MMR selection for remaining slots lambda_relevance = 0.8 - similarity_threshold = 0.60 # Start exponential penalty from 0.80 (lowered from 0.92) - alpha_exponential = 20.0 # Exponential penalty coefficient + similarity_threshold = 0.9 # Start exponential penalty from 0.80 (lowered from 0.9) + alpha_exponential = 10.0 # Exponential penalty coefficient remaining = set(range(len(flat))) - set(selected_global) while remaining: @@ -295,7 +295,7 @@ def _mmr_dedup_text_memories( # Skip if highly similar (Dice + TF-IDF + 2-gram combined, with embedding filter) if SearchHandler._is_text_highly_similar_optimized( - idx, mem_text, selected_global, similarity_matrix, flat, threshold=0.6 + idx, mem_text, selected_global, similarity_matrix, flat, threshold=0.9 ): continue # Skip highly similar text, don't participate in MMR competition @@ -504,7 +504,7 @@ def _is_text_highly_similar_optimized( selected_global: list[int], similarity_matrix, flat: list, - threshold: float = 0.75, + threshold: float = 0.9, ) -> bool: """ Multi-algorithm text similarity check with embedding pre-filtering. @@ -542,7 +542,7 @@ def _is_text_highly_similar_optimized( max_sim = similarity_matrix[candidate_idx][max_sim_idx] # If highest embedding similarity < 0.60, skip text comparison entirely - if max_sim <= 0.60: + if max_sim <= 0.9: return False # Get text of most similar memory From 4f96a8c0c9f9f5eb01d17ec252829f7b70aecc1c Mon Sep 17 00:00:00 2001 From: hijzy Date: Wed, 28 Jan 2026 21:18:38 +0800 Subject: [PATCH 33/40] fix: reformat --- src/memos/api/handlers/search_handler.py | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/src/memos/api/handlers/search_handler.py b/src/memos/api/handlers/search_handler.py index a094d229e..0f8ead64e 100644 --- a/src/memos/api/handlers/search_handler.py +++ b/src/memos/api/handlers/search_handler.py @@ -6,6 +6,7 @@ """ import math + from typing import Any from memos.api.handlers.base_handler import BaseHandler, HandlerDependencies @@ -201,7 +202,9 @@ def _mmr_dedup_text_memories( for bucket_idx, bucket in enumerate(pref_buckets): for mem in bucket.get("memories", []): score = mem.get("metadata", {}).get("relativity", 0.0) - flat.append(("preference", bucket_idx, mem, float(score) if score is not None else 0.0)) + flat.append( + ("preference", bucket_idx, mem, float(score) if score is not None else 0.0) + ) if len(flat) <= 1: return results @@ -234,10 +237,8 @@ def _mmr_dedup_text_memories( # Phase 1: Prefill top N by relevance # Use the smaller of text_top_k and pref_top_k for prefill count prefill_top_n = min(2, text_top_k, pref_top_k) if pref_buckets else min(2, text_top_k) - ordered_by_relevance = sorted( - range(len(flat)), key=lambda idx: flat[idx][3], reverse=True - ) - for idx in ordered_by_relevance[:len(flat)]: + ordered_by_relevance = sorted(range(len(flat)), key=lambda idx: flat[idx][3], reverse=True) + for idx in ordered_by_relevance[: len(flat)]: if len(selected_global) >= prefill_top_n: break mem_type, bucket_idx, mem, _ = flat[idx] @@ -249,7 +250,7 @@ def _mmr_dedup_text_memories( # Skip if highly similar (Dice + TF-IDF + 2-gram combined, with embedding filter) if SearchHandler._is_text_highly_similar_optimized( - idx, mem_text, selected_global, similarity_matrix, flat, threshold=0.9 + idx, mem_text, selected_global, similarity_matrix, flat ): continue @@ -295,7 +296,7 @@ def _mmr_dedup_text_memories( # Skip if highly similar (Dice + TF-IDF + 2-gram combined, with embedding filter) if SearchHandler._is_text_highly_similar_optimized( - idx, mem_text, selected_global, similarity_matrix, flat, threshold=0.9 + idx, mem_text, selected_global, similarity_matrix, flat ): continue # Skip highly similar text, don't participate in MMR competition @@ -308,7 +309,9 @@ def _mmr_dedup_text_memories( # Exponential penalty for similarity > 0.80 if max_sim > similarity_threshold: - penalty_multiplier = math.exp(alpha_exponential * (max_sim - similarity_threshold)) + penalty_multiplier = math.exp( + alpha_exponential * (max_sim - similarity_threshold) + ) diversity = max_sim * penalty_multiplier else: diversity = max_sim @@ -443,8 +446,8 @@ def _bigram_similarity(text1: str, text2: str) -> float: return 0.0 # Generate 2-grams - bigrams1 = {text1[i:i+2] for i in range(len(text1) - 1)} if len(text1) >= 2 else {text1} - bigrams2 = {text2[i:i+2] for i in range(len(text2) - 1)} if len(text2) >= 2 else {text2} + bigrams1 = {text1[i : i + 2] for i in range(len(text1) - 1)} if len(text1) >= 2 else {text1} + bigrams2 = {text2[i : i + 2] for i in range(len(text2) - 1)} if len(text2) >= 2 else {text2} intersection = len(bigrams1 & bigrams2) union = len(bigrams1 | bigrams2) From 5c9532e2884d60f70c1c3c102786cf6b6fef8379 Mon Sep 17 00:00:00 2001 From: hijzy Date: Wed, 28 Jan 2026 21:19:49 +0800 Subject: [PATCH 34/40] fix: reformat --- src/memos/api/handlers/search_handler.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/src/memos/api/handlers/search_handler.py b/src/memos/api/handlers/search_handler.py index a844d451a..68d36955d 100644 --- a/src/memos/api/handlers/search_handler.py +++ b/src/memos/api/handlers/search_handler.py @@ -202,7 +202,9 @@ def _mmr_dedup_text_memories( for bucket_idx, bucket in enumerate(pref_buckets): for mem in bucket.get("memories", []): score = mem.get("metadata", {}).get("relativity", 0.0) - flat.append(("preference", bucket_idx, mem, float(score) if score is not None else 0.0)) + flat.append( + ("preference", bucket_idx, mem, float(score) if score is not None else 0.0) + ) if len(flat) <= 1: return results @@ -235,10 +237,8 @@ def _mmr_dedup_text_memories( # Phase 1: Prefill top N by relevance # Use the smaller of text_top_k and pref_top_k for prefill count prefill_top_n = min(2, text_top_k, pref_top_k) if pref_buckets else min(2, text_top_k) - ordered_by_relevance = sorted( - range(len(flat)), key=lambda idx: flat[idx][3], reverse=True - ) - for idx in ordered_by_relevance[:len(flat)]: + ordered_by_relevance = sorted(range(len(flat)), key=lambda idx: flat[idx][3], reverse=True) + for idx in ordered_by_relevance[: len(flat)]: if len(selected_global) >= prefill_top_n: break mem_type, bucket_idx, mem, _ = flat[idx] @@ -309,7 +309,9 @@ def _mmr_dedup_text_memories( # Exponential penalty for similarity > 0.80 if max_sim > similarity_threshold: - penalty_multiplier = math.exp(alpha_exponential * (max_sim - similarity_threshold)) + penalty_multiplier = math.exp( + alpha_exponential * (max_sim - similarity_threshold) + ) diversity = max_sim * penalty_multiplier else: diversity = max_sim @@ -444,8 +446,8 @@ def _bigram_similarity(text1: str, text2: str) -> float: return 0.0 # Generate 2-grams - bigrams1 = {text1[i:i+2] for i in range(len(text1) - 1)} if len(text1) >= 2 else {text1} - bigrams2 = {text2[i:i+2] for i in range(len(text2) - 1)} if len(text2) >= 2 else {text2} + bigrams1 = {text1[i : i + 2] for i in range(len(text1) - 1)} if len(text1) >= 2 else {text1} + bigrams2 = {text2[i : i + 2] for i in range(len(text2) - 1)} if len(text2) >= 2 else {text2} intersection = len(bigrams1 & bigrams2) union = len(bigrams1 | bigrams2) From 4dd692a198af7f2665c7910c24e9935647045b3a Mon Sep 17 00:00:00 2001 From: hijzy Date: Wed, 28 Jan 2026 21:23:10 +0800 Subject: [PATCH 35/40] fix: reformat --- src/memos/api/handlers/search_handler.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/memos/api/handlers/search_handler.py b/src/memos/api/handlers/search_handler.py index 68d36955d..cbf209d20 100644 --- a/src/memos/api/handlers/search_handler.py +++ b/src/memos/api/handlers/search_handler.py @@ -282,12 +282,10 @@ def _mmr_dedup_text_memories( mem_type, bucket_idx, mem, _ = flat[idx] # Check bucket capacity with correct top_k for each type - if mem_type == "text": - if len(text_selected_by_bucket[bucket_idx]) >= text_top_k: - continue - elif mem_type == "preference": - if len(pref_selected_by_bucket[bucket_idx]) >= pref_top_k: - continue + if mem_type == "text" and len(text_selected_by_bucket[bucket_idx]) >= text_top_k: + continue + elif mem_type == "preference" and len(pref_selected_by_bucket[bucket_idx]) >= pref_top_k: + continue # Check if exact text already exists - if so, skip this candidate entirely mem_text = mem.get("memory", "").strip() From 8039c02a00c856f17c77faa85b40e0074a79e6de Mon Sep 17 00:00:00 2001 From: hijzy Date: Wed, 28 Jan 2026 21:24:48 +0800 Subject: [PATCH 36/40] fix: reformat --- src/memos/api/handlers/search_handler.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/memos/api/handlers/search_handler.py b/src/memos/api/handlers/search_handler.py index cbf209d20..604916ddf 100644 --- a/src/memos/api/handlers/search_handler.py +++ b/src/memos/api/handlers/search_handler.py @@ -6,10 +6,10 @@ """ import math -import time + from typing import Any + from memos.api.handlers.base_handler import BaseHandler, HandlerDependencies -from memos.api.handlers.formatters_handler import rerank_knowledge_mem from memos.api.product_models import APISearchRequest, SearchResponse from memos.log import get_logger from memos.memories.textual.tree_text_memory.retrieve.retrieve_utils import ( @@ -282,9 +282,7 @@ def _mmr_dedup_text_memories( mem_type, bucket_idx, mem, _ = flat[idx] # Check bucket capacity with correct top_k for each type - if mem_type == "text" and len(text_selected_by_bucket[bucket_idx]) >= text_top_k: - continue - elif mem_type == "preference" and len(pref_selected_by_bucket[bucket_idx]) >= pref_top_k: + if (mem_type == "text" and len(text_selected_by_bucket[bucket_idx]) >= text_top_k) or (mem_type == "preference" and len(pref_selected_by_bucket[bucket_idx]) >= pref_top_k): continue # Check if exact text already exists - if so, skip this candidate entirely From 726b097093fa3077584e9cfe18537a1fc145e8c8 Mon Sep 17 00:00:00 2001 From: hijzy Date: Wed, 28 Jan 2026 21:29:31 +0800 Subject: [PATCH 37/40] fix: reformat --- src/memos/api/handlers/search_handler.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/memos/api/handlers/search_handler.py b/src/memos/api/handlers/search_handler.py index 604916ddf..4e21f0029 100644 --- a/src/memos/api/handlers/search_handler.py +++ b/src/memos/api/handlers/search_handler.py @@ -255,15 +255,11 @@ def _mmr_dedup_text_memories( continue # Check bucket capacity with correct top_k for each type - if mem_type == "text": - if len(text_selected_by_bucket[bucket_idx]) >= text_top_k: - continue + if mem_type == "text" and len(text_selected_by_bucket[bucket_idx]) < text_top_k: selected_global.append(idx) text_selected_by_bucket[bucket_idx].append(idx) selected_texts.add(mem_text) - elif mem_type == "preference": - if len(pref_selected_by_bucket[bucket_idx]) >= pref_top_k: - continue + elif mem_type == "preference" and len(pref_selected_by_bucket[bucket_idx]) < pref_top_k: selected_global.append(idx) pref_selected_by_bucket[bucket_idx].append(idx) selected_texts.add(mem_text) From 500928148808182718e56ff542c50184a0e6a30f Mon Sep 17 00:00:00 2001 From: hijzy Date: Wed, 28 Jan 2026 21:30:04 +0800 Subject: [PATCH 38/40] fix: reformat --- src/memos/api/handlers/search_handler.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/memos/api/handlers/search_handler.py b/src/memos/api/handlers/search_handler.py index 4e21f0029..e7459622b 100644 --- a/src/memos/api/handlers/search_handler.py +++ b/src/memos/api/handlers/search_handler.py @@ -278,7 +278,12 @@ def _mmr_dedup_text_memories( mem_type, bucket_idx, mem, _ = flat[idx] # Check bucket capacity with correct top_k for each type - if (mem_type == "text" and len(text_selected_by_bucket[bucket_idx]) >= text_top_k) or (mem_type == "preference" and len(pref_selected_by_bucket[bucket_idx]) >= pref_top_k): + if ( + mem_type == "text" and len(text_selected_by_bucket[bucket_idx]) >= text_top_k + ) or ( + mem_type == "preference" + and len(pref_selected_by_bucket[bucket_idx]) >= pref_top_k + ): continue # Check if exact text already exists - if so, skip this candidate entirely From e2c28bad42431039ba8a92cd22ba521251d195d4 Mon Sep 17 00:00:00 2001 From: hijzy Date: Thu, 29 Jan 2026 02:41:09 +0800 Subject: [PATCH 39/40] fix: use deepcopy, add log --- src/memos/api/handlers/search_handler.py | 96 +++++++++++------------- src/memos/api/product_models.py | 2 +- 2 files changed, 43 insertions(+), 55 deletions(-) diff --git a/src/memos/api/handlers/search_handler.py b/src/memos/api/handlers/search_handler.py index e7459622b..27d832f17 100644 --- a/src/memos/api/handlers/search_handler.py +++ b/src/memos/api/handlers/search_handler.py @@ -6,7 +6,7 @@ """ import math - +import copy from typing import Any from memos.api.handlers.base_handler import BaseHandler, HandlerDependencies @@ -57,54 +57,40 @@ def handle_search_memories(self, search_req: APISearchRequest) -> SearchResponse """ self.logger.info(f"[SearchHandler] Search Req is: {search_req}") - original_top_k = search_req.top_k - prev_text_mem_include_embedding: bool | None = None - prev_graph_retriever_include_embedding: bool | None = None - - if getattr(search_req, "dedup", None) is None: - search_req.dedup = "mmr" - - try: - # Expand top_k for deduplication (5x to ensure enough candidates) - if search_req.dedup in ("sim", "mmr"): - search_req.top_k = original_top_k * 5 - - # Enable embeddings for MMR deduplication - if search_req.dedup == "mmr": - text_mem = getattr(self.naive_mem_cube, "text_mem", None) - if text_mem is not None and hasattr(text_mem, "include_embedding"): - prev_text_mem_include_embedding = text_mem.include_embedding - text_mem.include_embedding = True - - graph_retriever = getattr(self.searcher, "graph_retriever", None) - if graph_retriever is not None and hasattr(graph_retriever, "include_embedding"): - prev_graph_retriever_include_embedding = graph_retriever.include_embedding - graph_retriever.include_embedding = True - - # Search and deduplicate - cube_view = self._build_cube_view(search_req) - results = cube_view.search_memories(search_req) - - if search_req.dedup == "sim": - results = self._dedup_text_memories(results, original_top_k) - self._strip_embeddings(results) - elif search_req.dedup == "mmr": - pref_top_k = getattr(search_req, "pref_top_k", 6) - results = self._mmr_dedup_text_memories(results, original_top_k, pref_top_k) - self._strip_embeddings(results) - finally: - # Restore original states - search_req.top_k = original_top_k - - if prev_text_mem_include_embedding is not None: - text_mem = getattr(self.naive_mem_cube, "text_mem", None) - if text_mem is not None and hasattr(text_mem, "include_embedding"): - text_mem.include_embedding = prev_text_mem_include_embedding - - if prev_graph_retriever_include_embedding is not None: - graph_retriever = getattr(self.searcher, "graph_retriever", None) - if graph_retriever is not None and hasattr(graph_retriever, "include_embedding"): - graph_retriever.include_embedding = prev_graph_retriever_include_embedding + # Use deepcopy to avoid modifying the original request object + search_req_local = copy.deepcopy(search_req) + original_top_k = search_req_local.top_k + + # Expand top_k for deduplication (5x to ensure enough candidates) + if search_req_local.dedup in ("sim", "mmr"): + search_req_local.top_k = original_top_k * 5 + + # Create new searcher with include_embedding for MMR deduplication + searcher_to_use = self.searcher + if search_req_local.dedup == "mmr": + text_mem = getattr(self.naive_mem_cube, "text_mem", None) + if text_mem is not None: + # Create new searcher instance with include_embedding=True + searcher_to_use = text_mem.get_searcher( + manual_close_internet=not getattr(self.searcher, "internet_retriever", None), + moscube=False, + process_llm=getattr(self.mem_reader, "llm", None), + ) + # Override include_embedding for this searcher + if hasattr(searcher_to_use, "graph_retriever"): + searcher_to_use.graph_retriever.include_embedding = True + + # Search and deduplicate + cube_view = self._build_cube_view(search_req_local, searcher_to_use) + results = cube_view.search_memories(search_req_local) + + if search_req_local.dedup == "sim": + results = self._dedup_text_memories(results, original_top_k) + self._strip_embeddings(results) + elif search_req_local.dedup == "mmr": + pref_top_k = getattr(search_req_local, "pref_top_k", 6) + results = self._mmr_dedup_text_memories(results, original_top_k, pref_top_k) + self._strip_embeddings(results) self.logger.info( f"[SearchHandler] Final search results: count={len(results)} results={results}" @@ -149,7 +135,7 @@ def _dedup_text_memories(self, results: dict[str, Any], target_top_k: int) -> di if len(selected_by_bucket[bucket_idx]) >= target_top_k: continue # Use 0.92 threshold strictly - if self._is_unrelated(idx, selected_global, similarity_matrix, 0.9): + if self._is_unrelated(idx, selected_global, similarity_matrix, 0.92): selected_by_bucket[bucket_idx].append(idx) selected_global.append(idx) @@ -212,6 +198,7 @@ def _mmr_dedup_text_memories( # Get or compute embeddings embeddings = self._extract_embeddings([mem for _, _, mem, _ in flat]) if embeddings is None: + self.logger.warning("[SearchHandler] Embedding is missing; recomputing embeddings") documents = [mem.get("memory", "") for _, _, mem, _ in flat] embeddings = self.searcher.embedder.embed(documents) @@ -266,7 +253,7 @@ def _mmr_dedup_text_memories( # Phase 2: MMR selection for remaining slots lambda_relevance = 0.8 - similarity_threshold = 0.9 # Start exponential penalty from 0.80 (lowered from 0.9) + similarity_threshold = 0.9 # Start exponential penalty from 0.9 (lowered from 0.9) alpha_exponential = 10.0 # Exponential penalty coefficient remaining = set(range(len(flat))) - set(selected_global) @@ -574,8 +561,9 @@ def _resolve_cube_ids(self, search_req: APISearchRequest) -> list[str]: return [search_req.user_id] - def _build_cube_view(self, search_req: APISearchRequest) -> MemCubeView: + def _build_cube_view(self, search_req: APISearchRequest, searcher=None) -> MemCubeView: cube_ids = self._resolve_cube_ids(search_req) + searcher_to_use = searcher if searcher is not None else self.searcher if len(cube_ids) == 1: cube_id = cube_ids[0] @@ -585,7 +573,7 @@ def _build_cube_view(self, search_req: APISearchRequest) -> MemCubeView: mem_reader=self.mem_reader, mem_scheduler=self.mem_scheduler, logger=self.logger, - searcher=self.searcher, + searcher=searcher_to_use, deepsearch_agent=self.deepsearch_agent, ) else: @@ -596,7 +584,7 @@ def _build_cube_view(self, search_req: APISearchRequest) -> MemCubeView: mem_reader=self.mem_reader, mem_scheduler=self.mem_scheduler, logger=self.logger, - searcher=self.searcher, + searcher=searcher_to_use, deepsearch_agent=self.deepsearch_agent, ) for cube_id in cube_ids diff --git a/src/memos/api/product_models.py b/src/memos/api/product_models.py index d143a6fa2..d8fa784a3 100644 --- a/src/memos/api/product_models.py +++ b/src/memos/api/product_models.py @@ -320,7 +320,7 @@ class APISearchRequest(BaseRequest): ) dedup: Literal["no", "sim", "mmr"] | None = Field( - None, + "mmr", description=( "Optional dedup option for textual memories. " "Use 'no' for no dedup, 'sim' for similarity dedup, 'mmr' for MMR-based dedup. " From e4a8831b6ceb5fdd541bbc54456922f26c7022a7 Mon Sep 17 00:00:00 2001 From: hijzy Date: Thu, 29 Jan 2026 02:42:37 +0800 Subject: [PATCH 40/40] fix: reformat --- src/memos/api/handlers/search_handler.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/memos/api/handlers/search_handler.py b/src/memos/api/handlers/search_handler.py index 27d832f17..93eff185b 100644 --- a/src/memos/api/handlers/search_handler.py +++ b/src/memos/api/handlers/search_handler.py @@ -5,8 +5,9 @@ using dependency injection for better modularity and testability. """ -import math import copy +import math + from typing import Any from memos.api.handlers.base_handler import BaseHandler, HandlerDependencies