From a26c19a80a4593d5760e6f9337d5916e31ca1d6d Mon Sep 17 00:00:00 2001 From: itqop Date: Sat, 19 Jul 2025 16:56:09 +0300 Subject: [PATCH] Fix chunk finder --- src/graph/nodes/parse_output.py | 20 +++++++++++++++++++- src/services/retrieval.py | 16 +++++++--------- 2 files changed, 26 insertions(+), 10 deletions(-) diff --git a/src/graph/nodes/parse_output.py b/src/graph/nodes/parse_output.py index d96b9bb..82fd752 100644 --- a/src/graph/nodes/parse_output.py +++ b/src/graph/nodes/parse_output.py @@ -39,11 +39,29 @@ def parse_output_node(state: EmailGenerationState) -> EmailGenerationState: subject = _validate_subject(subject) body = _validate_body(body) + ranked_context = state.get("ranked_context") + real_chunk_ids = [] + + if isinstance(used_chunks, list) and ranked_context: + for chunk_idx in used_chunks: + try: + idx = int(chunk_idx) - 1 + if 0 <= idx < len(ranked_context.chunks): + chunk = ranked_context.chunks[idx] + chunk_info = f"{chunk.parent_doc_id}#{chunk.chunk_id}" + real_chunk_ids.append(chunk_info) + else: + real_chunk_ids.append(str(chunk_idx)) + except (ValueError, TypeError): + real_chunk_ids.append(str(chunk_idx)) + else: + real_chunk_ids = [] + email_draft = EmailDraft( subject=subject, body=body, short_reasoning=short_reasoning, - used_chunks=used_chunks if isinstance(used_chunks, list) else [], + used_chunks=real_chunk_ids, ) state["email_draft"] = email_draft diff --git a/src/services/retrieval.py b/src/services/retrieval.py index 8d8822f..24d20aa 100644 --- a/src/services/retrieval.py +++ b/src/services/retrieval.py @@ -70,12 +70,7 @@ class RetrievalService: text_query = " ".join(search_terms) - metadata_filters = {} - if lead_features.industry_tag != "other": - metadata_filters["$or"] = [ - {"industry": {"$contains": lead_features.industry_tag}}, - {"roles_relevant": {"$contains": lead_features.role_category}}, - ] + metadata_filters = None return RetrievalQuery( text_query=text_query, @@ -155,9 +150,12 @@ class RetrievalService: if chunk.metadata.get("metrics"): metrics = chunk.metadata["metrics"] metrics_parts = [] - for key, value in metrics.items(): - if isinstance(value, (int, float)): - metrics_parts.append(f"{key}: {value}") + if isinstance(metrics, dict): + for key, value in metrics.items(): + if isinstance(value, (int, float)): + metrics_parts.append(f"{key}: {value}") + elif isinstance(metrics, str): + metrics_parts.append(metrics) if metrics_parts: metrics_info = f" ({', '.join(metrics_parts)})"