diff --git a/docs/orchestration/address_truth_harness_phase64_human_vat_investigation_dialog.json b/docs/orchestration/address_truth_harness_phase64_human_vat_investigation_dialog.json new file mode 100644 index 0000000..86afaa5 --- /dev/null +++ b/docs/orchestration/address_truth_harness_phase64_human_vat_investigation_dialog.json @@ -0,0 +1,97 @@ +{ + "schema_version": "domain_truth_harness_spec_v1", + "scenario_id": "address_truth_harness_phase64_human_vat_investigation_dialog", + "domain": "address_phase64_human_vat_investigation_dialog", + "title": "Phase 64 human VAT investigation dialog", + "description": "Human-facing AGENT dialog for a finance user who first orients inside VAT-related 1C objects, then deliberately goes into VAT movements for one organization, pivots into supporting documents, and checks year-switch plus all-time continuity without dead filler turns or vague wording.", + "bindings": {}, + "steps": [ + { + "step_id": "step_01_metadata_orientation", + "title": "The user asks where VAT data lives in 1C", + "question": "Мне нужно понять, где в 1С по НДС вообще лежат данные. Какие объекты стоит смотреть по НДС?", + "allowed_reply_types": ["partial_coverage", "factual_with_explanation"], + "required_answer_patterns_all": [ + "(?i)metadata|метадан", + "(?i)ндс", + "(?i)документ|регистр" + ], + "criticality": "critical", + "semantic_tags": ["metadata_surface", "vat_orientation", "human_dialog"] + }, + { + "step_id": "step_02_choose_movements_with_org", + "title": "The user explicitly chooses the movement lane with an organization", + "question": "Хорошо, тогда покажи движения по ООО Альтернатива Плюс.", + "allowed_reply_types": ["clarification_required", "partial_coverage"], + "required_answer_patterns_all": [ + "(?i)движени|регистр|операц", + "(?i)период" + ], + "criticality": "critical", + "semantic_tags": ["movement_lane_after_metadata", "inline_organization_clarification", "human_dialog"] + }, + { + "step_id": "step_03_execute_movement_slice", + "title": "The user provides the year and the movement slice executes", + "question": "За 2020 год.", + "allowed_reply_types": ["factual", "factual_with_explanation", "partial_coverage"], + "required_answer_patterns_all": [ + "(?i)2020", + "(?i)ндс|движени|регистр|операц|платеж|поступлен|списан|строк" + ], + "criticality": "critical", + "semantic_tags": ["movement_lane_execution", "bounded_retrieval", "human_dialog"] + }, + { + "step_id": "step_04_document_pivot_same_scope", + "title": "The user pivots from movements to supporting documents on the same slice", + "question": "А теперь по документам?", + "allowed_reply_types": ["factual", "factual_with_explanation", "partial_coverage"], + "required_answer_patterns_all": [ + "(?i)документ|счет|сч[её]т[- ]?фактур|накладн|акт|строк" + ], + "forbidden_answer_patterns": [ + "(?i)уточните .*организац", + "(?i)уточните .*период", + "(?i)не найден контрагент" + ], + "criticality": "critical", + "semantic_tags": ["document_pivot_after_movement_retrieval", "scope_reuse", "human_dialog"] + }, + { + "step_id": "step_05_document_year_switch", + "title": "The user asks for the same document slice in another year", + "question": "А теперь за 2021 год?", + "allowed_reply_types": ["factual", "factual_with_explanation", "partial_coverage"], + "required_answer_patterns_all": [ + "(?i)2021", + "(?i)документ|счет|сч[её]т[- ]?фактур|накладн|акт|строк" + ], + "forbidden_answer_patterns": [ + "(?i)движени|регистр", + "(?i)уточните .*организац", + "(?i)уточните .*период" + ], + "criticality": "critical", + "semantic_tags": ["document_lane_continuity", "year_switch_after_pivot", "human_dialog"] + }, + { + "step_id": "step_06_document_all_time_followup", + "title": "The user broadens the same document slice to all available time", + "question": "А теперь за все время?", + "allowed_reply_types": ["factual", "factual_with_explanation", "partial_coverage"], + "required_answer_patterns_all": [ + "(?i)все доступное время|все время|весь период", + "(?i)документ|счет|сч[её]т[- ]?фактур|накладн|акт|строк" + ], + "forbidden_answer_patterns": [ + "(?i)за 2021", + "(?i)движени|регистр", + "(?i)уточните .*период" + ], + "criticality": "critical", + "semantic_tags": ["document_lane_continuity", "all_time_followup", "human_dialog"] + } + ] +} diff --git a/docs/orchestration/address_truth_harness_phase65_human_svk_money_dialog.json b/docs/orchestration/address_truth_harness_phase65_human_svk_money_dialog.json new file mode 100644 index 0000000..846f6d8 --- /dev/null +++ b/docs/orchestration/address_truth_harness_phase65_human_svk_money_dialog.json @@ -0,0 +1,97 @@ +{ + "schema_version": "domain_truth_harness_spec_v1", + "scenario_id": "address_truth_harness_phase65_human_svk_money_dialog", + "domain": "address_phase65_human_svk_money_dialog", + "title": "Phase 65 human SVK money dialog", + "description": "Human-facing AGENT dialog for a user who grounds one counterparty in 1C, checks incoming money, outgoing money, and net for one year, then naturally pivots into documents and movements without dead filler turns.", + "bindings": {}, + "steps": [ + { + "step_id": "step_01_ground_counterparty", + "title": "The user asks to find the counterparty in 1C", + "question": "Хочу проверить одного контрагента. Найди в 1С Группу СВК.", + "allowed_reply_types": ["partial_coverage", "factual_with_explanation"], + "required_answer_patterns_all": [ + "(?i)свк", + "(?i)контрагент|каталог|1с" + ], + "criticality": "critical", + "semantic_tags": ["entity_grounding", "counterparty_resolution", "human_dialog"] + }, + { + "step_id": "step_02_incoming_value_flow", + "title": "The user asks for incoming money in 2020", + "question": "Посмотри, сколько денег мы получили от него за 2020 год.", + "allowed_reply_types": ["factual", "factual_with_explanation", "partial_coverage"], + "required_answer_patterns_all": [ + "(?i)2020", + "(?i)входящ|поступлен|получ" + ], + "criticality": "critical", + "semantic_tags": ["incoming_value_flow", "grounded_counterparty_followup", "human_dialog"] + }, + { + "step_id": "step_03_outgoing_value_flow", + "title": "The user pivots into outgoing money on the same counterparty and year", + "question": "А теперь сколько мы ему заплатили?", + "allowed_reply_types": ["factual", "factual_with_explanation", "partial_coverage"], + "required_answer_patterns_all": [ + "(?i)исходящ|списан|заплат" + ], + "criticality": "critical", + "semantic_tags": ["outgoing_value_flow", "grounded_counterparty_followup", "human_dialog"] + }, + { + "step_id": "step_04_net_value_flow", + "title": "The user asks for the same net flow", + "question": "А какое получилось нетто?", + "allowed_reply_types": ["factual", "factual_with_explanation", "partial_coverage"], + "required_answer_patterns_all": [ + "(?i)нетто|сальдо|чист" + ], + "criticality": "critical", + "semantic_tags": ["net_value_flow", "grounded_counterparty_followup", "human_dialog"] + }, + { + "step_id": "step_05_document_pivot", + "title": "The user asks for documents on the same grounded counterparty", + "question": "А по документам?", + "allowed_reply_types": ["factual", "factual_with_explanation", "partial_coverage"], + "required_answer_patterns_all": [ + "(?i)документ|счет|сч[её]т[- ]?фактур|накладн|акт|строк" + ], + "forbidden_answer_patterns": [ + "(?i)уточните .*контрагент", + "(?i)не найден контрагент" + ], + "criticality": "critical", + "semantic_tags": ["document_pivot_after_value_flow", "grounded_counterparty_followup", "human_dialog"] + }, + { + "step_id": "step_06_movement_pivot", + "title": "The user asks for movements on the same grounded counterparty", + "question": "А по движениям?", + "allowed_reply_types": ["factual", "factual_with_explanation", "partial_coverage"], + "required_answer_patterns_all": [ + "(?i)движени|регистр|операц|платеж|поступлен|списан|строк" + ], + "forbidden_answer_patterns": [ + "(?i)уточните .*контрагент", + "(?i)не найден контрагент" + ], + "criticality": "critical", + "semantic_tags": ["movement_pivot_after_value_flow", "grounded_counterparty_followup", "human_dialog"] + }, + { + "step_id": "step_07_year_switch_same_counterparty", + "title": "The user asks for the same contour in 2021", + "question": "А теперь за 2021 год?", + "allowed_reply_types": ["factual", "factual_with_explanation", "partial_coverage"], + "required_answer_patterns_all": [ + "(?i)2021" + ], + "criticality": "critical", + "semantic_tags": ["year_switch", "grounded_counterparty_followup", "human_dialog"] + } + ] +} diff --git a/docs/orchestration/address_truth_harness_phase66_human_org_open_scope_dialog.json b/docs/orchestration/address_truth_harness_phase66_human_org_open_scope_dialog.json new file mode 100644 index 0000000..8af5f93 --- /dev/null +++ b/docs/orchestration/address_truth_harness_phase66_human_org_open_scope_dialog.json @@ -0,0 +1,104 @@ +{ + "schema_version": "domain_truth_harness_spec_v1", + "scenario_id": "address_truth_harness_phase66_human_org_open_scope_dialog", + "domain": "address_phase66_human_org_open_scope_dialog", + "title": "Phase 66 human open-scope organization money dialog", + "description": "Human-facing AGENT dialog for organization-scoped money analytics without a preselected counterparty: the assistant first asks for the organization, then the same dialog continues through all-time follow-up, comparison, and ranking over that one company.", + "bindings": {}, + "steps": [ + { + "step_id": "step_01_open_scope_incoming_total", + "title": "The user asks for incoming money without naming the organization yet", + "question": "Хочу быстрый денежный срез по одной организации без привязки к контрагенту. Сколько вообще входящих денег было за 2020 год?", + "allowed_reply_types": ["clarification_required", "partial_coverage"], + "required_answer_patterns_all": [ + "(?i)уточн|нужно", + "(?i)организац" + ], + "criticality": "critical", + "semantic_tags": ["open_scope_total", "organization_scope", "human_dialog"] + }, + { + "step_id": "step_02_all_time_same_open_scope", + "title": "The user selects the organization and gets the 2020 incoming total", + "question": "По ООО Альтернатива Плюс.", + "allowed_reply_types": ["factual", "factual_with_explanation", "partial_coverage"], + "required_answer_patterns_all": [ + "(?i)2020", + "(?i)входящ|поступлен|получ" + ], + "forbidden_answer_patterns": [ + "(?i)уточните .*контрагент", + "(?i)не найден контрагент", + "(?i)уточните .*организац" + ], + "criticality": "critical", + "semantic_tags": ["organization_clarification", "open_scope_total", "human_dialog"] + }, + { + "step_id": "step_03_all_time_same_open_scope", + "title": "The user broadens the same organization slice to all available time", + "question": "Понял, тогда за все время.", + "allowed_reply_types": ["factual", "factual_with_explanation", "partial_coverage"], + "required_answer_patterns_all": [ + "(?i)все доступное время|все время|весь период", + "(?i)входящ|поступлен|получ" + ], + "forbidden_answer_patterns": [ + "(?i)за 2020", + "(?i)уточните .*контрагент", + "(?i)уточните .*период", + "(?i)уточните .*организац" + ], + "criticality": "critical", + "semantic_tags": ["all_time_followup", "organization_scope", "human_dialog"] + }, + { + "step_id": "step_04_bidirectional_comparison", + "title": "The user asks which money direction is larger for the organization", + "question": "Хорошо. А что по ООО Альтернатива Плюс больше в 2020 году: входящие или исходящие деньги?", + "allowed_reply_types": ["factual", "factual_with_explanation", "partial_coverage"], + "required_answer_patterns_all": [ + "(?i)2020", + "(?i)входящ|исходящ|получ|заплат|больше" + ], + "criticality": "critical", + "semantic_tags": ["value_flow_comparison", "organization_scope", "human_dialog"] + }, + { + "step_id": "step_05_comparison_year_switch", + "title": "The user asks the same comparison for another year", + "question": "А что по ООО Альтернатива Плюс больше уже за 2021 год: входящие или исходящие деньги?", + "allowed_reply_types": ["factual", "factual_with_explanation", "partial_coverage"], + "required_answer_patterns_all": [ + "(?i)2021", + "(?i)входящ|исходящ|получ|заплат|больше" + ], + "criticality": "critical", + "semantic_tags": ["value_flow_comparison", "year_switch", "organization_scope", "human_dialog"] + }, + { + "step_id": "step_06_ranking_top_counterparty", + "title": "The user asks who brought the most money for the organization", + "question": "И кто больше всего принес денег этой организации в 2020 году?", + "allowed_reply_types": ["factual", "factual_with_explanation", "partial_coverage"], + "required_answer_patterns_all": [ + "(?i)2020", + "(?i)кто|контрагент|клиент|принес|доход" + ], + "criticality": "critical", + "semantic_tags": ["value_flow_ranking", "organization_scope", "human_dialog"] + }, + { + "step_id": "step_07_ranking_year_switch", + "title": "The user asks the same ranking for another year", + "question": "А в 2021 году?", + "allowed_reply_types": ["factual", "factual_with_explanation", "partial_coverage"], + "required_answer_patterns_all": [ + "(?i)2021" + ], + "criticality": "critical", + "semantic_tags": ["value_flow_ranking", "year_switch", "organization_scope", "human_dialog"] + } + ] +} diff --git a/docs/orchestration/address_truth_harness_phase67_svk_grounded_counterparty_integrity.json b/docs/orchestration/address_truth_harness_phase67_svk_grounded_counterparty_integrity.json new file mode 100644 index 0000000..4c51b93 --- /dev/null +++ b/docs/orchestration/address_truth_harness_phase67_svk_grounded_counterparty_integrity.json @@ -0,0 +1,120 @@ +{ + "schema_version": "domain_truth_harness_spec_v1", + "scenario_id": "address_truth_harness_phase67_svk_grounded_counterparty_integrity", + "domain": "address_phase67_svk_grounded_counterparty_integrity", + "title": "Phase 67 grounded counterparty integrity for SVK dialog", + "description": "Replay for the exact human dialog where one grounded counterparty must survive incoming, payout, net, documents, movements, and year-switch without being replaced by a stale focus object.", + "bindings": {}, + "steps": [ + { + "step_id": "step_01_ground_counterparty", + "title": "Ground the counterparty in 1C", + "question": "\u0425\u043e\u0447\u0443 \u043f\u0440\u043e\u0432\u0435\u0440\u0438\u0442\u044c \u043e\u0434\u043d\u043e\u0433\u043e \u043a\u043e\u043d\u0442\u0440\u0430\u0433\u0435\u043d\u0442\u0430. \u041d\u0430\u0439\u0434\u0438 \u0432 1\u0421 \u0413\u0440\u0443\u043f\u043f\u0443 \u0421\u0412\u041a.", + "allowed_reply_types": ["partial_coverage", "factual_with_explanation"], + "required_answer_patterns_all": [ + "(?i)\u0441\u0432\u043a", + "(?i)\u043a\u043e\u043d\u0442\u0440\u0430\u0433\u0435\u043d\u0442|\u043a\u0430\u0442\u0430\u043b\u043e\u0433|1\u0441" + ], + "criticality": "critical", + "semantic_tags": ["entity_grounding", "counterparty_resolution", "integrity_guard"] + }, + { + "step_id": "step_02_incoming_2020", + "title": "Ask about incoming money for 2020", + "question": "\u041f\u043e\u0441\u043c\u043e\u0442\u0440\u0438, \u0441\u043a\u043e\u043b\u044c\u043a\u043e \u0434\u0435\u043d\u0435\u0433 \u043c\u044b \u043f\u043e\u043b\u0443\u0447\u0438\u043b\u0438 \u043e\u0442 \u043d\u0435\u0433\u043e \u0437\u0430 2020 \u0433\u043e\u0434.", + "allowed_reply_types": ["factual", "factual_with_explanation", "partial_coverage"], + "required_answer_patterns_all": [ + "(?i)\u0441\u0432\u043a", + "(?i)2020", + "(?i)\u0432\u0445\u043e\u0434\u044f\u0449|\u043f\u043e\u0441\u0442\u0443\u043f\u043b\u0435\u043d|\u043f\u043e\u043b\u0443\u0447" + ], + "forbidden_answer_patterns": [ + "(?i)\u043d\u043e\u0440\u0442\u043e\u043d" + ], + "criticality": "critical", + "semantic_tags": ["incoming_value_flow", "grounded_counterparty_followup", "integrity_guard"] + }, + { + "step_id": "step_03_payout_2020_same_counterparty", + "title": "Ask how much we paid to the same counterparty", + "question": "\u0410 \u0442\u0435\u043f\u0435\u0440\u044c \u0441\u043a\u043e\u043b\u044c\u043a\u043e \u043c\u044b \u0435\u043c\u0443 \u0437\u0430\u043f\u043b\u0430\u0442\u0438\u043b\u0438?", + "allowed_reply_types": ["factual", "factual_with_explanation", "partial_coverage"], + "required_answer_patterns_all": [ + "(?i)\u0441\u0432\u043a", + "(?i)\u0437\u0430\u043f\u043b\u0430\u0442|\u0438\u0441\u0445\u043e\u0434\u044f\u0449|\u0441\u043f\u0438\u0441\u0430\u043d|\u043f\u043b\u0430\u0442\u0435\u0436" + ], + "forbidden_answer_patterns": [ + "(?i)\u043d\u043e\u0440\u0442\u043e\u043d", + "(?i)\u0441\u0435\u0440\u0432\u0438\u0441\u043a\u043e\u043d\u0441\u0430\u043b\u0442" + ], + "criticality": "critical", + "semantic_tags": ["outgoing_value_flow", "grounded_counterparty_followup", "integrity_guard"] + }, + { + "step_id": "step_04_net_same_counterparty", + "title": "Ask for the same net flow", + "question": "\u0410 \u043a\u0430\u043a\u043e\u0435 \u043f\u043e\u043b\u0443\u0447\u0438\u043b\u043e\u0441\u044c \u043d\u0435\u0442\u0442\u043e?", + "allowed_reply_types": ["factual", "factual_with_explanation", "partial_coverage"], + "required_answer_patterns_all": [ + "(?i)\u0441\u0432\u043a", + "(?i)\u043d\u0435\u0442\u0442\u043e|\u0441\u0430\u043b\u044c\u0434\u043e|\u0447\u0438\u0441\u0442" + ], + "forbidden_answer_patterns": [ + "(?i)\u043d\u043e\u0440\u0442\u043e\u043d", + "(?i)\u0441\u0435\u0440\u0432\u0438\u0441\u043a\u043e\u043d\u0441\u0430\u043b\u0442" + ], + "criticality": "critical", + "semantic_tags": ["net_value_flow", "grounded_counterparty_followup", "integrity_guard"] + }, + { + "step_id": "step_05_documents_same_counterparty", + "title": "Pivot into documents without losing the counterparty", + "question": "\u0410 \u043f\u043e \u0434\u043e\u043a\u0443\u043c\u0435\u043d\u0442\u0430\u043c?", + "allowed_reply_types": ["factual", "factual_with_explanation", "partial_coverage"], + "required_answer_patterns_all": [ + "(?i)\u0441\u0432\u043a", + "(?i)\u0434\u043e\u043a\u0443\u043c\u0435\u043d\u0442|\u0441\u0447\u0435\u0442|\u0441\u0447[\u0435\u0451]\u0442[- ]?\u0444\u0430\u043a\u0442\u0443\u0440|\u043d\u0430\u043a\u043b\u0430\u0434\u043d|\u0430\u043a\u0442|\u0441\u0442\u0440\u043e\u043a" + ], + "forbidden_answer_patterns": [ + "(?i)\u043d\u043e\u0440\u0442\u043e\u043d", + "(?i)\u0441\u0435\u0440\u0432\u0438\u0441\u043a\u043e\u043d\u0441\u0430\u043b\u0442", + "(?i)\u0443\u0442\u043e\u0447\u043d\u0438\u0442\u0435 .* \u043a\u043e\u043d\u0442\u0440\u0430\u0433\u0435\u043d\u0442" + ], + "criticality": "critical", + "semantic_tags": ["document_pivot_after_value_flow", "grounded_counterparty_followup", "integrity_guard"] + }, + { + "step_id": "step_06_movements_same_counterparty", + "title": "Pivot into movements without losing the counterparty", + "question": "\u0410 \u043f\u043e \u0434\u0432\u0438\u0436\u0435\u043d\u0438\u044f\u043c?", + "allowed_reply_types": ["factual", "factual_with_explanation", "partial_coverage"], + "required_answer_patterns_all": [ + "(?i)\u0441\u0432\u043a", + "(?i)\u0434\u0432\u0438\u0436\u0435\u043d\u0438|\u0440\u0435\u0433\u0438\u0441\u0442\u0440|\u043e\u043f\u0435\u0440\u0430\u0446|\u043f\u043b\u0430\u0442\u0435\u0436|\u043f\u043e\u0441\u0442\u0443\u043f\u043b\u0435\u043d|\u0441\u043f\u0438\u0441\u0430\u043d|\u0441\u0442\u0440\u043e\u043a" + ], + "forbidden_answer_patterns": [ + "(?i)\u043d\u043e\u0440\u0442\u043e\u043d", + "(?i)\u0441\u0435\u0440\u0432\u0438\u0441\u043a\u043e\u043d\u0441\u0430\u043b\u0442", + "(?i)\u0443\u0442\u043e\u0447\u043d\u0438\u0442\u0435 .* \u043a\u043e\u043d\u0442\u0440\u0430\u0433\u0435\u043d\u0442" + ], + "criticality": "critical", + "semantic_tags": ["movement_pivot_after_value_flow", "grounded_counterparty_followup", "integrity_guard"] + }, + { + "step_id": "step_07_year_switch_same_counterparty", + "title": "Switch to 2021 without losing the counterparty", + "question": "\u0410 \u0442\u0435\u043f\u0435\u0440\u044c \u0437\u0430 2021 \u0433\u043e\u0434?", + "allowed_reply_types": ["factual", "factual_with_explanation", "partial_coverage"], + "required_answer_patterns_all": [ + "(?i)\u0441\u0432\u043a", + "(?i)2021" + ], + "forbidden_answer_patterns": [ + "(?i)\u043d\u043e\u0440\u0442\u043e\u043d", + "(?i)\u0441\u0435\u0440\u0432\u0438\u0441\u043a\u043e\u043d\u0441\u0430\u043b\u0442" + ], + "criticality": "critical", + "semantic_tags": ["year_switch", "grounded_counterparty_followup", "integrity_guard"] + } + ] +} diff --git a/docs/orchestration/address_truth_harness_phase68_referential_document_followup_integrity.json b/docs/orchestration/address_truth_harness_phase68_referential_document_followup_integrity.json new file mode 100644 index 0000000..93a3ff7 --- /dev/null +++ b/docs/orchestration/address_truth_harness_phase68_referential_document_followup_integrity.json @@ -0,0 +1,41 @@ +{ + "schema_version": "domain_truth_harness_spec_v1", + "scenario_id": "address_truth_harness_phase68_referential_document_followup_integrity", + "domain": "address_phase68_referential_document_followup_integrity", + "title": "Phase 68 referential document follow-up integrity", + "description": "Replay for a human document drilldown where a referential follow-up like 'кроме этого документа...' must stay in the exact document contour, keep the prior counterparty, and avoid drifting into metadata discovery.", + "bindings": {}, + "steps": [ + { + "step_id": "step_01_documents_by_counterparty", + "title": "Open documents for the counterparty", + "question": "Покажи документы по Жуковке 51.", + "allowed_reply_types": ["factual", "factual_with_explanation", "partial_coverage"], + "required_answer_patterns_all": [ + "(?i)жуковк", + "(?i)документ|сч[её]т|акт|накладн|строк" + ], + "criticality": "critical", + "semantic_tags": ["documents_by_counterparty", "referential_followup_seed", "integrity_guard"] + }, + { + "step_id": "step_02_referential_document_followup", + "title": "Ask whether there are more documents besides this one", + "question": "Кроме этого документа есть еще что-то?", + "allowed_reply_types": ["factual", "factual_with_explanation", "partial_coverage"], + "required_answer_patterns_all": [ + "(?i)жуковк|контрагент", + "(?i)документ|сч[её]т|акт|накладн|еще" + ], + "forbidden_answer_patterns": [ + "(?i)метадан", + "(?i)схем", + "(?i)объект[а-я]* 1с", + "(?i)регистр", + "(?i)уточните .* контрагент" + ], + "criticality": "critical", + "semantic_tags": ["referential_document_followup", "counterparty_carryover", "integrity_guard"] + } + ] +} diff --git a/llm_normalizer/backend/dist/services/addressNavigationState.js b/llm_normalizer/backend/dist/services/addressNavigationState.js index 6e4eb27..790096f 100644 --- a/llm_normalizer/backend/dist/services/addressNavigationState.js +++ b/llm_normalizer/backend/dist/services/addressNavigationState.js @@ -5,6 +5,7 @@ exports.cloneAddressNavigationState = cloneAddressNavigationState; exports.normalizeAddressNavigationState = normalizeAddressNavigationState; exports.evolveAddressNavigationStateWithAssistantItem = evolveAddressNavigationStateWithAssistantItem; const nanoid_1 = require("nanoid"); +const assistantContinuityPolicy_1 = require("./assistantContinuityPolicy"); const addressNavigation_1 = require("../types/addressNavigation"); const MAX_RESULT_SETS = 40; const MAX_NAVIGATION_EVENTS = 120; @@ -242,24 +243,32 @@ function resolveNavigationAction(debug, hasFocusObject) { } return hasFocusObject ? "drilldown" : "open"; } -function buildFocusObjectFromDebug(debug, resultSetId, createdAt) { - const extractedFilters = toObject(debug.extracted_filters) ?? {}; - const rawValue = toNonEmptyString(debug.anchor_value_resolved) ?? - toNonEmptyString(debug.anchor_value_raw) ?? - toNonEmptyString(extractedFilters.item); - if (!rawValue) { - return null; - } - const objectType = toAddressFocusObjectType(debug.anchor_type); - const canonicalType = objectType === "unknown" ? inferDisplayEntityType(toAddressIntent(debug.detected_intent)) : objectType; +function buildFocusObject(objectType, label, resultSetId, createdAt) { return { - object_type: canonicalType, - object_id: `${canonicalType}:${rawValue}`.toLowerCase(), - label: rawValue, + object_type: objectType, + object_id: `${objectType}:${label}`.toLowerCase(), + label, provenance_result_set_id: resultSetId, selected_at: createdAt }; } +function buildFocusObjectFromDebug(debug, resultSetId, createdAt) { + const extractedFilters = toObject(debug.extracted_filters) ?? {}; + const objectType = toAddressFocusObjectType(debug.anchor_type); + const canonicalType = objectType === "unknown" ? inferDisplayEntityType(toAddressIntent(debug.detected_intent)) : objectType; + if (canonicalType === "item") { + const item = (0, assistantContinuityPolicy_1.readAddressDebugItem)(debug, toNonEmptyString); + return item ? buildFocusObject(canonicalType, item, resultSetId, createdAt) : null; + } + if (canonicalType === "counterparty" && debug.mcp_discovery_response_applied === true) { + const counterparty = (0, assistantContinuityPolicy_1.readAddressDebugCounterparty)(debug, toNonEmptyString); + return counterparty ? buildFocusObject(canonicalType, counterparty, resultSetId, createdAt) : null; + } + const rawValue = toNonEmptyString(debug.anchor_value_resolved) ?? + toNonEmptyString(debug.anchor_value_raw) ?? + toNonEmptyString(extractedFilters.item); + return rawValue ? buildFocusObject(canonicalType, rawValue, resultSetId, createdAt) : null; +} function capResultSets(resultSets) { if (resultSets.length <= MAX_RESULT_SETS) { return resultSets; diff --git a/llm_normalizer/backend/dist/services/addressTextRepair.js b/llm_normalizer/backend/dist/services/addressTextRepair.js new file mode 100644 index 0000000..c8ff57f --- /dev/null +++ b/llm_normalizer/backend/dist/services/addressTextRepair.js @@ -0,0 +1,73 @@ +"use strict"; +var __importDefault = (this && this.__importDefault) || function (mod) { + return (mod && mod.__esModule) ? mod : { "default": mod }; +}; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.repairAddressMojibakeText = repairAddressMojibakeText; +exports.normalizeRussianComparableText = normalizeRussianComparableText; +const iconv_lite_1 = __importDefault(require("iconv-lite")); +function compactWhitespace(value) { + return value.replace(/\s+/g, " ").trim(); +} +function textMojibakeScore(value) { + const source = String(value ?? ""); + const cyrillic = (source.match(/[\u0400-\u04ff]/g) ?? []).length; + const latin = (source.match(/[A-Za-z]/g) ?? []).length; + const replacement = (source.match(/[�]/g) ?? []).length; + const pairMarkers = (source.match(/(?:Р.|С.|Ð.|Ñ.)/g) ?? []).length; + const doubleEncodedMarkers = (source.match(/(?:Р“[Р-џ]|Р’[Р-џ]|Ã.|Â.)/gu) ?? []).length; + return cyrillic + latin - replacement * 3 - pairMarkers * 2 - doubleEncodedMarkers * 2; +} +function looksLikeAddressMojibake(value) { + const source = String(value ?? ""); + if (!source.trim()) { + return false; + } + if (/[�]/.test(source)) { + return true; + } + if ((source.match(/(?:Р.|С.|Ð.|Ñ.)/g) ?? []).length >= 2) { + return true; + } + if ((source.match(/(?:Р“[Р-џ]|Р’[Р-џ]|Ã.|Â.)/gu) ?? []).length >= 2) { + return true; + } + return false; +} +function repairAddressMojibakeText(value) { + const source = String(value ?? ""); + if (!looksLikeAddressMojibake(source)) { + return source; + } + let candidate = source; + for (let pass = 0; pass < 3; pass += 1) { + let improved = false; + try { + const fromWin1251 = iconv_lite_1.default.encode(candidate, "win1251").toString("utf8"); + if (textMojibakeScore(fromWin1251) > textMojibakeScore(candidate)) { + candidate = fromWin1251; + improved = true; + } + } + catch { + // Ignore decode failures and keep the current candidate. + } + try { + const fromLatin1 = Buffer.from(candidate, "latin1").toString("utf8"); + if (textMojibakeScore(fromLatin1) > textMojibakeScore(candidate)) { + candidate = fromLatin1; + improved = true; + } + } + catch { + // Ignore decode failures and keep the current candidate. + } + if (!improved) { + break; + } + } + return candidate; +} +function normalizeRussianComparableText(value) { + return compactWhitespace(repairAddressMojibakeText(String(value ?? "")).toLowerCase()).replace(/ё/g, "е"); +} diff --git a/llm_normalizer/backend/dist/services/assistantAddressOrchestrationRuntimeAdapter.js b/llm_normalizer/backend/dist/services/assistantAddressOrchestrationRuntimeAdapter.js index b1024e2..b120072 100644 --- a/llm_normalizer/backend/dist/services/assistantAddressOrchestrationRuntimeAdapter.js +++ b/llm_normalizer/backend/dist/services/assistantAddressOrchestrationRuntimeAdapter.js @@ -77,12 +77,17 @@ function shouldPreferRawFollowupMessage(userMessage, addressInputMessage, carryo const previousIntent = toNonEmptyString(followupContext?.previous_intent); const rootIntent = toNonEmptyString(followupContext?.root_intent); const previousAnchorType = toNonEmptyString(followupContext?.previous_anchor_type); + const hasReferentialDocumentExclusionFollowupCue = /(?:\u043a\u0440\u043e\u043c\u0435|\u043f\u043e\u043c\u0438\u043c\u043e)\s+(?:\u044d\u0442\u043e\u0433\u043e|\u044d\u0442\u043e\u0439|\u044d\u0442\u043e\u0442|\u044d\u0442\u0443|\u044d\u0442\u0438\u0445)(?:\s+(?:\u0434\u043e\u043a\u0443\u043c\u0435\u043d\u0442\u0430|\u0434\u043e\u0433\u043e\u0432\u043e\u0440\u0430|\u043a\u043e\u043d\u0442\u0440\u0430\u0433\u0435\u043d\u0442\u0430))?/iu.test(rawMessage); const hasInventoryItemCarryover = previousAnchorType === "item" && isInventorySelectedObjectOrRootIntent(previousIntent); const hasInventoryFrameCarryover = isInventorySelectedObjectOrRootIntent(previousIntent) || isInventorySelectedObjectOrRootIntent(rootIntent); + const hasDocumentCarryover = previousIntent === "list_documents_by_counterparty" || previousIntent === "list_documents_by_contract"; if (mode === "unsupported" && intent === "unknown") { return true; } + if (hasDocumentCarryover && hasReferentialDocumentExclusionFollowupCue) { + return true; + } if (hasSameDateFollowupSignal(rawMessage) && hasExplicitCurrentDateSignal(canonicalMessage)) { return true; } diff --git a/llm_normalizer/backend/dist/services/assistantContinuityPolicy.js b/llm_normalizer/backend/dist/services/assistantContinuityPolicy.js index da1da90..6943d2f 100644 --- a/llm_normalizer/backend/dist/services/assistantContinuityPolicy.js +++ b/llm_normalizer/backend/dist/services/assistantContinuityPolicy.js @@ -51,6 +51,7 @@ exports.resolveAssistantContinuitySnapshot = resolveAssistantContinuitySnapshot; exports.resolveAssistantOrganizationAuthority = resolveAssistantOrganizationAuthority; exports.resolveOrganizationClarificationContinuation = resolveOrganizationClarificationContinuation; const assistantOrganizationMatcher_1 = require("./assistantOrganizationMatcher"); +const addressTextRepair_1 = require("./addressTextRepair"); function fallbackToNonEmptyString(value) { if (value === null || value === undefined) { return null; @@ -365,14 +366,45 @@ function readAddressDebugItem(debug, toNonEmptyString = fallbackToNonEmptyString ? toNonEmptyString(debug?.anchor_value_resolved) ?? toNonEmptyString(debug?.anchor_value_raw) : null)); } -function readAddressDebugCounterparty(debug, toNonEmptyString = fallbackToNonEmptyString) { - const extractedFilters = readAddressDebugFilters(debug); - if (toNonEmptyString(extractedFilters?.counterparty)) { - return toNonEmptyString(extractedFilters?.counterparty); +function isReferentialCounterpartyPlaceholder(value) { + if (!value) { + return false; } - if (String(debug?.anchor_type ?? "") === "counterparty") { - return toNonEmptyString(debug?.anchor_value_resolved) ?? toNonEmptyString(debug?.anchor_value_raw); + return new Set([ + "он", + "она", + "оно", + "они", + "ему", + "ней", + "нему", + "ним", + "ними", + "его", + "ее", + "их", + "этому", + "этой", + "этом", + "этим", + "эта", + "этот", + "эти" + ]).has((0, addressTextRepair_1.normalizeRussianComparableText)(value)); +} +function normalizeCounterpartyCandidate(value, toNonEmptyString) { + const text = toNonEmptyString(value); + if (!text || isReferentialCounterpartyPlaceholder(text)) { + return null; } + return text; +} +function sameCounterpartyCandidate(left, right) { + return Boolean(left && + right && + (0, addressTextRepair_1.normalizeRussianComparableText)(left) === (0, addressTextRepair_1.normalizeRussianComparableText)(right)); +} +function readGroundedDiscoveryCounterparty(debug, toNonEmptyString = fallbackToNonEmptyString) { const discoveryPilotScope = readAssistantMcpDiscoveryPilotScope(debug, toNonEmptyString); const suppressDiscoveryEntityCarryover = discoveryPilotScope === "metadata_inspection_v1" || readAssistantMcpDiscoveryLoopSubjectResolutionOptional(debug); @@ -381,12 +413,27 @@ function readAddressDebugCounterparty(debug, toNonEmptyString = fallbackToNonEmp } const discoveryEntities = collectAssistantMcpDiscoveryEntityCandidates(debug, toNonEmptyString); for (const entity of discoveryEntities) { - const text = toNonEmptyString(entity); - if (text) { - return text; + const normalized = normalizeCounterpartyCandidate(entity, toNonEmptyString); + if (normalized) { + return normalized; } } - return null; + return normalizeCounterpartyCandidate(readAssistantMcpDiscoveryLoopMetadataScopeHint(debug, toNonEmptyString), toNonEmptyString); +} +function readAddressDebugCounterparty(debug, toNonEmptyString = fallbackToNonEmptyString) { + const extractedFilters = readAddressDebugFilters(debug); + const extractedCounterparty = normalizeCounterpartyCandidate(extractedFilters?.counterparty, toNonEmptyString); + const anchorCounterparty = String(debug?.anchor_type ?? "") === "counterparty" + ? normalizeCounterpartyCandidate(toNonEmptyString(debug?.anchor_value_resolved) ?? toNonEmptyString(debug?.anchor_value_raw), toNonEmptyString) + : null; + const groundedDiscoveryCounterparty = readGroundedDiscoveryCounterparty(debug, toNonEmptyString); + if (hasGroundedDiscoveryBusinessAnswer(debug, toNonEmptyString) && groundedDiscoveryCounterparty) { + if (!extractedCounterparty || !sameCounterpartyCandidate(extractedCounterparty, groundedDiscoveryCounterparty)) { + return groundedDiscoveryCounterparty; + } + return extractedCounterparty; + } + return extractedCounterparty ?? anchorCounterparty ?? groundedDiscoveryCounterparty; } function readAddressDebugIntent(debug, toNonEmptyString = fallbackToNonEmptyString) { const detectedIntent = toNonEmptyString(debug?.detected_intent); @@ -431,8 +478,16 @@ function readAddressDebugTemporalScope(debug, toNonEmptyString = fallbackToNonEm } function resolveAddressDebugAnchorContext(debug, toNonEmptyString = fallbackToNonEmptyString) { const explicitAnchorType = toNonEmptyString(debug?.anchor_type); - const explicitAnchorValue = toNonEmptyString(debug?.anchor_value_resolved) ?? toNonEmptyString(debug?.anchor_value_raw); - if (explicitAnchorType || explicitAnchorValue) { + const explicitAnchorValueRaw = toNonEmptyString(debug?.anchor_value_resolved) ?? toNonEmptyString(debug?.anchor_value_raw); + const explicitAnchorValue = explicitAnchorType === "counterparty" + ? normalizeCounterpartyCandidate(explicitAnchorValueRaw, toNonEmptyString) + : explicitAnchorValueRaw; + const groundedDiscoveryCounterparty = readGroundedDiscoveryCounterparty(debug, toNonEmptyString); + const shouldPreferDiscoveryCounterparty = explicitAnchorType === "counterparty" && + Boolean(groundedDiscoveryCounterparty && + hasGroundedDiscoveryBusinessAnswer(debug, toNonEmptyString) && + (!explicitAnchorValue || !sameCounterpartyCandidate(explicitAnchorValue, groundedDiscoveryCounterparty))); + if ((explicitAnchorType || explicitAnchorValue) && !shouldPreferDiscoveryCounterparty) { return { anchorType: explicitAnchorType, anchorValue: explicitAnchorValue @@ -446,8 +501,11 @@ function resolveAddressDebugAnchorContext(debug, toNonEmptyString = fallbackToNo anchorValue: item }; } - const counterparty = toNonEmptyString(extractedFilters?.counterparty); - if (counterparty) { + const counterparty = normalizeCounterpartyCandidate(extractedFilters?.counterparty, toNonEmptyString); + if (counterparty && + !(groundedDiscoveryCounterparty && + hasGroundedDiscoveryBusinessAnswer(debug, toNonEmptyString) && + !sameCounterpartyCandidate(counterparty, groundedDiscoveryCounterparty))) { return { anchorType: "counterparty", anchorValue: counterparty @@ -512,7 +570,9 @@ function resolveAddressDebugCarryoverFilters(debug, toNonEmptyString = fallbackT Boolean(discoveryDateScope.asOfDate || discoveryDateScope.periodFrom || discoveryDateScope.periodTo); const counterparty = readAddressDebugCounterparty(debug, toNonEmptyString); const organization = readAddressDebugOrganization(debug, toNonEmptyString); - if (counterparty && !toNonEmptyString(nextFilters.counterparty)) { + const preferGroundedDiscoveryCounterparty = hasGroundedDiscoveryBusinessAnswer(debug, toNonEmptyString) && Boolean(counterparty); + const existingCounterparty = normalizeCounterpartyCandidate(nextFilters.counterparty, toNonEmptyString); + if (counterparty && (preferGroundedDiscoveryCounterparty || !existingCounterparty)) { nextFilters.counterparty = counterparty; } if (organization && !toNonEmptyString(nextFilters.organization)) { diff --git a/llm_normalizer/backend/dist/services/assistantMcpDiscoveryTurnInputAdapter.js b/llm_normalizer/backend/dist/services/assistantMcpDiscoveryTurnInputAdapter.js index 9b1b2ab..63b0998 100644 --- a/llm_normalizer/backend/dist/services/assistantMcpDiscoveryTurnInputAdapter.js +++ b/llm_normalizer/backend/dist/services/assistantMcpDiscoveryTurnInputAdapter.js @@ -3,6 +3,7 @@ Object.defineProperty(exports, "__esModule", { value: true }); exports.ASSISTANT_MCP_DISCOVERY_TURN_INPUT_SCHEMA_VERSION = void 0; exports.buildAssistantMcpDiscoveryTurnInput = buildAssistantMcpDiscoveryTurnInput; const assistantMcpDiscoveryDataNeedGraph_1 = require("./assistantMcpDiscoveryDataNeedGraph"); +const addressTextRepair_1 = require("./addressTextRepair"); exports.ASSISTANT_MCP_DISCOVERY_TURN_INPUT_SCHEMA_VERSION = "assistant_mcp_discovery_turn_input_v1"; function toRecordObject(value) { if (!value || typeof value !== "object" || Array.isArray(value)) { @@ -38,7 +39,32 @@ function pushUnique(target, value) { } } function isReferentialEntityPlaceholder(value) { - return /^(?:\u043d\u0435\u043c\u0443|\u043d\u0435\u0439|\u043d\u0438\u043c|\u043d\u0438\u043c\u0438|\u0435\u0433\u043e|\u0435\u0435|\u0435\u0451|\u0438\u0445|\u044d\u0442\u043e\u043c\u0443|\u044d\u0442\u043e\u0439|\u044d\u0442\u0438\u043c|\u044d\u0442\u0438\u043c\u0438|\u044d\u0442\u043e\u043c)$/iu.test(value.trim()); + return new Set([ + "он", + "она", + "оно", + "они", + "ему", + "ней", + "нему", + "ним", + "ними", + "его", + "ее", + "их", + "этому", + "этой", + "этим", + "этими", + "этом" + ]).has((0, addressTextRepair_1.normalizeRussianComparableText)(value)); +} +function normalizeFollowupCounterpartyCandidate(value) { + const text = candidateValue(value); + if (!text || isReferentialEntityPlaceholder(text)) { + return null; + } + return text; } function pushScopedEntityCandidate(target, value, groundedFollowupEntity) { const text = candidateValue(value); @@ -347,14 +373,22 @@ function collectFollowupDiscoverySeed(followupContext) { const entityResolutionAmbiguityCandidates = collectEntityCandidates(followupContext?.previous_discovery_entity_ambiguity_candidates); const ambiguityBlocksImplicitGrounding = effectivePilotScope === "entity_resolution_search_v1" && entityResolutionStatus === "ambiguous"; const metadataPilotCarriesScopeOnly = effectivePilotScope === "metadata_inspection_v1" || loopSubjectResolutionOptional; + const normalizedDiscoveryEntities = discoveryEntities + .map((entity) => normalizeFollowupCounterpartyCandidate(entity)) + .filter((entity) => Boolean(entity)); + const groundedDiscoveryCounterparty = ambiguityBlocksImplicitGrounding || metadataPilotCarriesScopeOnly + ? null + : normalizedDiscoveryEntities[0] ?? normalizeFollowupCounterpartyCandidate(loopMetadataScopeHint); const metadataScopeHint = loopMetadataScopeHint ?? - (loopSubjectResolutionOptional ? discoveryEntities[0] ?? null : null); - const counterparty = toNonEmptyString(previousFilters?.counterparty) ?? - toNonEmptyString(rootFilters?.counterparty) ?? - (toNonEmptyString(followupContext?.previous_anchor_type) === "counterparty" - ? toNonEmptyString(followupContext?.previous_anchor_value) - : null) ?? - (ambiguityBlocksImplicitGrounding || metadataPilotCarriesScopeOnly ? null : discoveryEntities[0] ?? null); + (loopSubjectResolutionOptional ? normalizedDiscoveryEntities[0] ?? null : null); + const previousFiltersCounterparty = normalizeFollowupCounterpartyCandidate(previousFilters?.counterparty); + const rootFiltersCounterparty = normalizeFollowupCounterpartyCandidate(rootFilters?.counterparty); + const previousAnchorCounterparty = toNonEmptyString(followupContext?.previous_anchor_type) === "counterparty" + ? normalizeFollowupCounterpartyCandidate(followupContext?.previous_anchor_value) + : null; + const counterparty = groundedDiscoveryCounterparty + ? groundedDiscoveryCounterparty + : previousFiltersCounterparty ?? rootFiltersCounterparty ?? previousAnchorCounterparty; const organization = toNonEmptyString(previousFilters?.organization) ?? toNonEmptyString(rootFilters?.organization) ?? (toNonEmptyString(followupContext?.previous_anchor_type) === "organization" @@ -372,7 +406,7 @@ function collectFollowupDiscoverySeed(followupContext) { loopPendingAxes, loopProvidedAxes, counterparty, - discoveryEntity: ambiguityBlocksImplicitGrounding || loopSubjectResolutionOptional ? null : discoveryEntities[0] ?? null, + discoveryEntity: ambiguityBlocksImplicitGrounding || loopSubjectResolutionOptional ? null : normalizedDiscoveryEntities[0] ?? null, entityResolutionStatus, entityResolutionAmbiguityCandidates, rankingNeed: toNonEmptyString(followupContext?.previous_discovery_ranking_need), @@ -472,6 +506,9 @@ function hasMetadataSignal(text) { return (/(?:\u043e\u0431\u044a\u0435\u043a\u0442(?:\u044b|\u0430|\u043e\u0432)?|\u0440\u0435\u0433\u0438\u0441\u0442\u0440\u044b|\u0434\u043e\u043a\u0443\u043c\u0435\u043d\u0442\u044b|\u0441\u043f\u0440\u0430\u0432\u043e\u0447\u043d\u0438\u043a\u0438|\u043f\u043e\u043b(?:\u0435|\u044f)|objects?|registers?|documents?|catalogs?|fields?)/iu.test(text) && /(?:\u0435\u0441\u0442\u044c|\u043a\u0430\u043a\u0438\u0435|\u0434\u043e\u0441\u0442\u0443\u043f\u043d|\u0432\s+1\u0441|1\u0441|available|exist|which)/iu.test(text)); } +function hasReferentialDocumentExclusionFollowupSignal(text) { + return /(?:\u043a\u0440\u043e\u043c\u0435|\u043f\u043e\u043c\u0438\u043c\u043e)\s+(?:\u044d\u0442\u043e\u0433\u043e|\u044d\u0442\u043e\u0439|\u044d\u0442\u043e\u0442|\u044d\u0442\u0443|\u044d\u0442\u0438\u0445)(?:\s+(?:\u0434\u043e\u043a\u0443\u043c\u0435\u043d\u0442\u0430|\u0434\u043e\u0433\u043e\u0432\u043e\u0440\u0430|\u043a\u043e\u043d\u0442\u0440\u0430\u0433\u0435\u043d\u0442\u0430))?/iu.test(text); +} function hasMetadataObjectHint(text) { return /(?:\u043e\u0431\u044a\u0435\u043a\u0442(?:\u044b|\u0430|\u043e\u0432)?|\u0440\u0435\u0433\u0438\u0441\u0442\u0440(?:\u044b)?|\u0434\u043e\u043a\u0443\u043c\u0435\u043d\u0442(?:\u044b)?|\u0441\u043f\u0440\u0430\u0432\u043e\u0447\u043d\u0438\u043a(?:\u0438)?|\u043f\u043e\u043b(?:\u0435|\u044f)|objects?|registers?|documents?|catalogs?|fields?)/iu.test(text); } @@ -732,14 +769,20 @@ function buildAssistantMcpDiscoveryTurnInput(input) { const reasonCodes = []; const rawUserText = toNonEmptyString(input.userMessage); const rawEffectiveText = toNonEmptyString(input.effectiveMessage); - const rawSignalSourceText = `${rawUserText ?? ""} ${rawEffectiveText ?? ""}`.trim(); - const rawEntitySourceText = rawUserText ?? rawEffectiveText ?? rawSignalSourceText; + const repairedUserText = rawUserText ? (0, addressTextRepair_1.repairAddressMojibakeText)(rawUserText) : null; + const repairedEffectiveText = rawEffectiveText ? (0, addressTextRepair_1.repairAddressMojibakeText)(rawEffectiveText) : null; + const rawSignalSourceText = `${repairedUserText ?? rawUserText ?? ""} ${repairedEffectiveText ?? rawEffectiveText ?? ""}`.trim(); + const rawEntitySourceText = repairedUserText ?? rawUserText ?? repairedEffectiveText ?? rawEffectiveText ?? rawSignalSourceText; const rawText = compactLower(rawSignalSourceText); + const rawReferentialDocumentExclusionSignal = hasReferentialDocumentExclusionFollowupSignal(repairedUserText ?? rawUserText ?? ""); const rawLifecycleSignal = hasLifecycleSignal(rawText); const rawBidirectionalValueFlowSignal = !rawLifecycleSignal && hasBidirectionalValueFlowSignal(rawText); const rawValueFlowSignal = !rawLifecycleSignal && (hasValueFlowSignal(rawText) || hasValueRankingSignal(rawText) || rawBidirectionalValueFlowSignal); - const rawMetadataSignal = !rawLifecycleSignal && !rawValueFlowSignal && hasMetadataSignal(rawText); + const rawMetadataSignal = !rawLifecycleSignal && + !rawValueFlowSignal && + !rawReferentialDocumentExclusionSignal && + hasMetadataSignal(rawText); const rawEntityResolutionSignal = !rawLifecycleSignal && !rawValueFlowSignal && !rawMetadataSignal && hasEntityResolutionSignal(rawText); const rawPayoutSignal = rawValueFlowSignal && !rawBidirectionalValueFlowSignal && hasPayoutSignal(rawText); const monthlyAggregationSignal = hasMonthlyAggregationSignal(rawText); diff --git a/llm_normalizer/backend/dist/services/assistantRoutePolicy.js b/llm_normalizer/backend/dist/services/assistantRoutePolicy.js index 11fa352..11a18be 100644 --- a/llm_normalizer/backend/dist/services/assistantRoutePolicy.js +++ b/llm_normalizer/backend/dist/services/assistantRoutePolicy.js @@ -495,12 +495,35 @@ function createAssistantRoutePolicy(deps) { !effectiveAddressFollowupSignal && resolvedModeDetection.mode === "unsupported" && resolvedIntentResolution.intent === "unknown"); + const groundedValueFlowFollowupContextDetected = Boolean(followupContext && + [ + "counterparty_value_flow_query_movements_v1", + "counterparty_supplier_payout_query_movements_v1", + "counterparty_bidirectional_value_flow_query_movements_v1" + ].includes(String(toNonEmptyString(followupContext?.previous_discovery_pilot_scope) ?? "")) && + !dangerOrCoercionSignal && + (toNonEmptyString(assistantTurnMeaning?.asked_domain_family) === "counterparty_value" || + [ + "turnover", + "payout", + "net_value_flow" + ].includes(String(toNonEmptyString(assistantTurnMeaning?.asked_action_family) ?? "")) || + /(?:нетто|сальдо|сколько\s+мы\s+(?:получили|заплатили)|incoming|outgoing)/iu.test(analyticsSample))); const baseToolGatePreservesAddressLane = Boolean(baseToolGate?.runAddressLane && - ["address_intent_resolver_detected", "address_mode_classifier_detected", "address_signal_detected", "llm_canonical_data_signal_detected"].includes(String(baseToolGate?.reason ?? ""))); + [ + "address_intent_resolver_detected", + "address_mode_classifier_detected", + "address_signal_detected", + "llm_canonical_data_signal_detected" + ].includes(String(baseToolGate?.reason ?? ""))) || + Boolean(baseToolGate?.runAddressLane && + String(baseToolGate?.reason ?? "") === "followup_context_detected" && + groundedValueFlowFollowupContextDetected); const nonDomainQueryIndexed = Boolean(!llmFirstAddressCandidate && deterministicNonDomainGuard && (llmFirstUnsupportedCandidate || llmContractMode === null) && !baseToolGatePreservesAddressLane && + !groundedValueFlowFollowupContextDetected && !protectedInventoryShortFollowup && !organizationClarificationContinuationDetected); const lastAddressAssistantDebug = sessionItems @@ -664,9 +687,11 @@ function createAssistantRoutePolicy(deps) { const unsupportedCurrentTurnMeaningBoundary = Boolean(assistantTurnMeaning?.unsupported_but_understood_family && assistantTurnMeaning?.stale_replay_forbidden === true && !turnMeaningIntentCandidate && + !aggregateBusinessAnalyticsSignal && !dataScopeMetaQuery && !capabilityMetaQuery && !dangerOrCoercionSignal && + !groundedValueFlowFollowupContextDetected && !organizationClarificationContinuationDetected); if (unsupportedCurrentTurnMeaningBoundary) { return { diff --git a/llm_normalizer/backend/src/services/addressNavigationState.ts b/llm_normalizer/backend/src/services/addressNavigationState.ts index f3611bc..8ce68ff 100644 --- a/llm_normalizer/backend/src/services/addressNavigationState.ts +++ b/llm_normalizer/backend/src/services/addressNavigationState.ts @@ -1,6 +1,10 @@ import { nanoid } from "nanoid"; import type { AssistantConversationItem } from "../types/assistant"; import type { AddressIntent } from "../types/addressQuery"; +import { + readAddressDebugCounterparty, + readAddressDebugItem +} from "./assistantContinuityPolicy"; import { ADDRESS_NAVIGATION_STATE_SCHEMA_VERSION, type AddressFocusObject, @@ -277,24 +281,38 @@ function resolveNavigationAction(debug: Record, hasFocusObject: return hasFocusObject ? "drilldown" : "open"; } +function buildFocusObject( + objectType: AddressFocusObjectType, + label: string, + resultSetId: string, + createdAt: string +): AddressFocusObject { + return { + object_type: objectType, + object_id: `${objectType}:${label}`.toLowerCase(), + label, + provenance_result_set_id: resultSetId, + selected_at: createdAt + }; +} + function buildFocusObjectFromDebug(debug: Record, resultSetId: string, createdAt: string): AddressFocusObject | null { const extractedFilters = toObject(debug.extracted_filters) ?? {}; + const objectType = toAddressFocusObjectType(debug.anchor_type); + const canonicalType = objectType === "unknown" ? inferDisplayEntityType(toAddressIntent(debug.detected_intent)) : objectType; + if (canonicalType === "item") { + const item = readAddressDebugItem(debug, toNonEmptyString); + return item ? buildFocusObject(canonicalType, item, resultSetId, createdAt) : null; + } + if (canonicalType === "counterparty" && debug.mcp_discovery_response_applied === true) { + const counterparty = readAddressDebugCounterparty(debug, toNonEmptyString); + return counterparty ? buildFocusObject(canonicalType, counterparty, resultSetId, createdAt) : null; + } const rawValue = toNonEmptyString(debug.anchor_value_resolved) ?? toNonEmptyString(debug.anchor_value_raw) ?? toNonEmptyString(extractedFilters.item); - if (!rawValue) { - return null; - } - const objectType = toAddressFocusObjectType(debug.anchor_type); - const canonicalType = objectType === "unknown" ? inferDisplayEntityType(toAddressIntent(debug.detected_intent)) : objectType; - return { - object_type: canonicalType, - object_id: `${canonicalType}:${rawValue}`.toLowerCase(), - label: rawValue, - provenance_result_set_id: resultSetId, - selected_at: createdAt - }; + return rawValue ? buildFocusObject(canonicalType, rawValue, resultSetId, createdAt) : null; } function capResultSets(resultSets: AddressResultSet[]): AddressResultSet[] { diff --git a/llm_normalizer/backend/src/services/addressTextRepair.ts b/llm_normalizer/backend/src/services/addressTextRepair.ts new file mode 100644 index 0000000..4cc3bb0 --- /dev/null +++ b/llm_normalizer/backend/src/services/addressTextRepair.ts @@ -0,0 +1,74 @@ +import iconv from "iconv-lite"; + +function compactWhitespace(value: string): string { + return value.replace(/\s+/g, " ").trim(); +} + +function textMojibakeScore(value: string): number { + const source = String(value ?? ""); + const cyrillic = (source.match(/[\u0400-\u04ff]/g) ?? []).length; + const latin = (source.match(/[A-Za-z]/g) ?? []).length; + const replacement = (source.match(/[�]/g) ?? []).length; + const pairMarkers = (source.match(/(?:Р.|С.|Ð.|Ñ.)/g) ?? []).length; + const doubleEncodedMarkers = (source.match(/(?:Р“[Р-џ]|Р’[Р-џ]|Ã.|Â.)/gu) ?? []).length; + return cyrillic + latin - replacement * 3 - pairMarkers * 2 - doubleEncodedMarkers * 2; +} + +function looksLikeAddressMojibake(value: string): boolean { + const source = String(value ?? ""); + if (!source.trim()) { + return false; + } + if (/[�]/.test(source)) { + return true; + } + if ((source.match(/(?:Р.|С.|Ð.|Ñ.)/g) ?? []).length >= 2) { + return true; + } + if ((source.match(/(?:Р“[Р-џ]|Р’[Р-џ]|Ã.|Â.)/gu) ?? []).length >= 2) { + return true; + } + return false; +} + +export function repairAddressMojibakeText(value: string): string { + const source = String(value ?? ""); + if (!looksLikeAddressMojibake(source)) { + return source; + } + + let candidate = source; + for (let pass = 0; pass < 3; pass += 1) { + let improved = false; + + try { + const fromWin1251 = iconv.encode(candidate, "win1251").toString("utf8"); + if (textMojibakeScore(fromWin1251) > textMojibakeScore(candidate)) { + candidate = fromWin1251; + improved = true; + } + } catch { + // Ignore decode failures and keep the current candidate. + } + + try { + const fromLatin1 = Buffer.from(candidate, "latin1").toString("utf8"); + if (textMojibakeScore(fromLatin1) > textMojibakeScore(candidate)) { + candidate = fromLatin1; + improved = true; + } + } catch { + // Ignore decode failures and keep the current candidate. + } + + if (!improved) { + break; + } + } + + return candidate; +} + +export function normalizeRussianComparableText(value: unknown): string { + return compactWhitespace(repairAddressMojibakeText(String(value ?? "")).toLowerCase()).replace(/ё/g, "е"); +} diff --git a/llm_normalizer/backend/src/services/assistantAddressOrchestrationRuntimeAdapter.ts b/llm_normalizer/backend/src/services/assistantAddressOrchestrationRuntimeAdapter.ts index b9393b1..3486772 100644 --- a/llm_normalizer/backend/src/services/assistantAddressOrchestrationRuntimeAdapter.ts +++ b/llm_normalizer/backend/src/services/assistantAddressOrchestrationRuntimeAdapter.ts @@ -178,16 +178,25 @@ function shouldPreferRawFollowupMessage( const previousIntent = toNonEmptyString(followupContext?.previous_intent); const rootIntent = toNonEmptyString(followupContext?.root_intent); const previousAnchorType = toNonEmptyString(followupContext?.previous_anchor_type); + const hasReferentialDocumentExclusionFollowupCue = /(?:\u043a\u0440\u043e\u043c\u0435|\u043f\u043e\u043c\u0438\u043c\u043e)\s+(?:\u044d\u0442\u043e\u0433\u043e|\u044d\u0442\u043e\u0439|\u044d\u0442\u043e\u0442|\u044d\u0442\u0443|\u044d\u0442\u0438\u0445)(?:\s+(?:\u0434\u043e\u043a\u0443\u043c\u0435\u043d\u0442\u0430|\u0434\u043e\u0433\u043e\u0432\u043e\u0440\u0430|\u043a\u043e\u043d\u0442\u0440\u0430\u0433\u0435\u043d\u0442\u0430))?/iu.test( + rawMessage + ); const hasInventoryItemCarryover = previousAnchorType === "item" && isInventorySelectedObjectOrRootIntent(previousIntent); const hasInventoryFrameCarryover = isInventorySelectedObjectOrRootIntent(previousIntent) || isInventorySelectedObjectOrRootIntent(rootIntent); + const hasDocumentCarryover = + previousIntent === "list_documents_by_counterparty" || previousIntent === "list_documents_by_contract"; if (mode === "unsupported" && intent === "unknown") { return true; } + if (hasDocumentCarryover && hasReferentialDocumentExclusionFollowupCue) { + return true; + } + if (hasSameDateFollowupSignal(rawMessage) && hasExplicitCurrentDateSignal(canonicalMessage)) { return true; } diff --git a/llm_normalizer/backend/src/services/assistantContinuityPolicy.ts b/llm_normalizer/backend/src/services/assistantContinuityPolicy.ts index 11b4736..f469c8c 100644 --- a/llm_normalizer/backend/src/services/assistantContinuityPolicy.ts +++ b/llm_normalizer/backend/src/services/assistantContinuityPolicy.ts @@ -2,6 +2,7 @@ import { mergeKnownOrganizations as mergeKnownOrganizationsFromMatcher, normalizeOrganizationScopeValue as normalizeOrganizationScopeValueFromMatcher } from "./assistantOrganizationMatcher"; +import { normalizeRussianComparableText } from "./addressTextRepair"; export interface AssistantContinuitySnapshotInput { sessionItems?: unknown[]; @@ -567,17 +568,61 @@ export function readAddressDebugItem( ); } -export function readAddressDebugCounterparty( +function isReferentialCounterpartyPlaceholder( + value: string | null +): boolean { + if (!value) { + return false; + } + return new Set([ + "он", + "она", + "оно", + "они", + "ему", + "ней", + "нему", + "ним", + "ними", + "его", + "ее", + "их", + "этому", + "этой", + "этом", + "этим", + "эта", + "этот", + "эти" + ]).has(normalizeRussianComparableText(value)); +} + +function normalizeCounterpartyCandidate( + value: unknown, + toNonEmptyString: (value: unknown) => string | null +): string | null { + const text = toNonEmptyString(value); + if (!text || isReferentialCounterpartyPlaceholder(text)) { + return null; + } + return text; +} + +function sameCounterpartyCandidate( + left: string | null, + right: string | null +): boolean { + return Boolean( + left && + right && + normalizeRussianComparableText(left) === normalizeRussianComparableText(right) + ); +} + +function readGroundedDiscoveryCounterparty( debug: Record | null, toNonEmptyString: (value: unknown) => string | null = fallbackToNonEmptyString ): string | null { - const extractedFilters = readAddressDebugFilters(debug); - if (toNonEmptyString(extractedFilters?.counterparty)) { - return toNonEmptyString(extractedFilters?.counterparty); - } - if (String(debug?.anchor_type ?? "") === "counterparty") { - return toNonEmptyString(debug?.anchor_value_resolved) ?? toNonEmptyString(debug?.anchor_value_raw); - } const discoveryPilotScope = readAssistantMcpDiscoveryPilotScope(debug, toNonEmptyString); const suppressDiscoveryEntityCarryover = discoveryPilotScope === "metadata_inspection_v1" || @@ -587,12 +632,38 @@ export function readAddressDebugCounterparty( } const discoveryEntities = collectAssistantMcpDiscoveryEntityCandidates(debug, toNonEmptyString); for (const entity of discoveryEntities) { - const text = toNonEmptyString(entity); - if (text) { - return text; + const normalized = normalizeCounterpartyCandidate(entity, toNonEmptyString); + if (normalized) { + return normalized; } } - return null; + return normalizeCounterpartyCandidate( + readAssistantMcpDiscoveryLoopMetadataScopeHint(debug, toNonEmptyString), + toNonEmptyString + ); +} + +export function readAddressDebugCounterparty( + debug: Record | null, + toNonEmptyString: (value: unknown) => string | null = fallbackToNonEmptyString +): string | null { + const extractedFilters = readAddressDebugFilters(debug); + const extractedCounterparty = normalizeCounterpartyCandidate(extractedFilters?.counterparty, toNonEmptyString); + const anchorCounterparty = + String(debug?.anchor_type ?? "") === "counterparty" + ? normalizeCounterpartyCandidate( + toNonEmptyString(debug?.anchor_value_resolved) ?? toNonEmptyString(debug?.anchor_value_raw), + toNonEmptyString + ) + : null; + const groundedDiscoveryCounterparty = readGroundedDiscoveryCounterparty(debug, toNonEmptyString); + if (hasGroundedDiscoveryBusinessAnswer(debug, toNonEmptyString) && groundedDiscoveryCounterparty) { + if (!extractedCounterparty || !sameCounterpartyCandidate(extractedCounterparty, groundedDiscoveryCounterparty)) { + return groundedDiscoveryCounterparty; + } + return extractedCounterparty; + } + return extractedCounterparty ?? anchorCounterparty ?? groundedDiscoveryCounterparty; } export function readAddressDebugIntent( @@ -664,9 +735,21 @@ export function resolveAddressDebugAnchorContext( toNonEmptyString: (value: unknown) => string | null = fallbackToNonEmptyString ): AssistantAddressDebugAnchorContext { const explicitAnchorType = toNonEmptyString(debug?.anchor_type); - const explicitAnchorValue = + const explicitAnchorValueRaw = toNonEmptyString(debug?.anchor_value_resolved) ?? toNonEmptyString(debug?.anchor_value_raw); - if (explicitAnchorType || explicitAnchorValue) { + const explicitAnchorValue = + explicitAnchorType === "counterparty" + ? normalizeCounterpartyCandidate(explicitAnchorValueRaw, toNonEmptyString) + : explicitAnchorValueRaw; + const groundedDiscoveryCounterparty = readGroundedDiscoveryCounterparty(debug, toNonEmptyString); + const shouldPreferDiscoveryCounterparty = + explicitAnchorType === "counterparty" && + Boolean( + groundedDiscoveryCounterparty && + hasGroundedDiscoveryBusinessAnswer(debug, toNonEmptyString) && + (!explicitAnchorValue || !sameCounterpartyCandidate(explicitAnchorValue, groundedDiscoveryCounterparty)) + ); + if ((explicitAnchorType || explicitAnchorValue) && !shouldPreferDiscoveryCounterparty) { return { anchorType: explicitAnchorType, anchorValue: explicitAnchorValue @@ -681,8 +764,15 @@ export function resolveAddressDebugAnchorContext( anchorValue: item }; } - const counterparty = toNonEmptyString(extractedFilters?.counterparty); - if (counterparty) { + const counterparty = normalizeCounterpartyCandidate(extractedFilters?.counterparty, toNonEmptyString); + if ( + counterparty && + !( + groundedDiscoveryCounterparty && + hasGroundedDiscoveryBusinessAnswer(debug, toNonEmptyString) && + !sameCounterpartyCandidate(counterparty, groundedDiscoveryCounterparty) + ) + ) { return { anchorType: "counterparty", anchorValue: counterparty @@ -761,7 +851,10 @@ export function resolveAddressDebugCarryoverFilters( Boolean(discoveryDateScope.asOfDate || discoveryDateScope.periodFrom || discoveryDateScope.periodTo); const counterparty = readAddressDebugCounterparty(debug, toNonEmptyString); const organization = readAddressDebugOrganization(debug, toNonEmptyString); - if (counterparty && !toNonEmptyString(nextFilters.counterparty)) { + const preferGroundedDiscoveryCounterparty = + hasGroundedDiscoveryBusinessAnswer(debug, toNonEmptyString) && Boolean(counterparty); + const existingCounterparty = normalizeCounterpartyCandidate(nextFilters.counterparty, toNonEmptyString); + if (counterparty && (preferGroundedDiscoveryCounterparty || !existingCounterparty)) { nextFilters.counterparty = counterparty; } if (organization && !toNonEmptyString(nextFilters.organization)) { diff --git a/llm_normalizer/backend/src/services/assistantMcpDiscoveryTurnInputAdapter.ts b/llm_normalizer/backend/src/services/assistantMcpDiscoveryTurnInputAdapter.ts index cbf3f86..b49108a 100644 --- a/llm_normalizer/backend/src/services/assistantMcpDiscoveryTurnInputAdapter.ts +++ b/llm_normalizer/backend/src/services/assistantMcpDiscoveryTurnInputAdapter.ts @@ -3,6 +3,7 @@ import { buildAssistantMcpDiscoveryDataNeedGraph, type AssistantMcpDiscoveryDataNeedGraphContract } from "./assistantMcpDiscoveryDataNeedGraph"; +import { normalizeRussianComparableText, repairAddressMojibakeText } from "./addressTextRepair"; import type { AssistantMcpDiscoveryMetadataRecommendedPrimitive, AssistantMcpDiscoveryMetadataRouteFamily, @@ -80,9 +81,33 @@ function pushUnique(target: string[], value: unknown): void { } function isReferentialEntityPlaceholder(value: string): boolean { - return /^(?:\u043d\u0435\u043c\u0443|\u043d\u0435\u0439|\u043d\u0438\u043c|\u043d\u0438\u043c\u0438|\u0435\u0433\u043e|\u0435\u0435|\u0435\u0451|\u0438\u0445|\u044d\u0442\u043e\u043c\u0443|\u044d\u0442\u043e\u0439|\u044d\u0442\u0438\u043c|\u044d\u0442\u0438\u043c\u0438|\u044d\u0442\u043e\u043c)$/iu.test( - value.trim() - ); + return new Set([ + "он", + "она", + "оно", + "они", + "ему", + "ней", + "нему", + "ним", + "ними", + "его", + "ее", + "их", + "этому", + "этой", + "этим", + "этими", + "этом" + ]).has(normalizeRussianComparableText(value)); +} + +function normalizeFollowupCounterpartyCandidate(value: unknown): string | null { + const text = candidateValue(value); + if (!text || isReferentialEntityPlaceholder(text)) { + return null; + } + return text; } function pushScopedEntityCandidate( @@ -482,16 +507,25 @@ function collectFollowupDiscoverySeed(followupContext: Record | effectivePilotScope === "entity_resolution_search_v1" && entityResolutionStatus === "ambiguous"; const metadataPilotCarriesScopeOnly = effectivePilotScope === "metadata_inspection_v1" || loopSubjectResolutionOptional; + const normalizedDiscoveryEntities = discoveryEntities + .map((entity) => normalizeFollowupCounterpartyCandidate(entity)) + .filter((entity): entity is string => Boolean(entity)); + const groundedDiscoveryCounterparty = + ambiguityBlocksImplicitGrounding || metadataPilotCarriesScopeOnly + ? null + : normalizedDiscoveryEntities[0] ?? normalizeFollowupCounterpartyCandidate(loopMetadataScopeHint); const metadataScopeHint = loopMetadataScopeHint ?? - (loopSubjectResolutionOptional ? discoveryEntities[0] ?? null : null); - const counterparty = - toNonEmptyString(previousFilters?.counterparty) ?? - toNonEmptyString(rootFilters?.counterparty) ?? - (toNonEmptyString(followupContext?.previous_anchor_type) === "counterparty" - ? toNonEmptyString(followupContext?.previous_anchor_value) - : null) ?? - (ambiguityBlocksImplicitGrounding || metadataPilotCarriesScopeOnly ? null : discoveryEntities[0] ?? null); + (loopSubjectResolutionOptional ? normalizedDiscoveryEntities[0] ?? null : null); + const previousFiltersCounterparty = normalizeFollowupCounterpartyCandidate(previousFilters?.counterparty); + const rootFiltersCounterparty = normalizeFollowupCounterpartyCandidate(rootFilters?.counterparty); + const previousAnchorCounterparty = + toNonEmptyString(followupContext?.previous_anchor_type) === "counterparty" + ? normalizeFollowupCounterpartyCandidate(followupContext?.previous_anchor_value) + : null; + const counterparty = groundedDiscoveryCounterparty + ? groundedDiscoveryCounterparty + : previousFiltersCounterparty ?? rootFiltersCounterparty ?? previousAnchorCounterparty; const organization = toNonEmptyString(previousFilters?.organization) ?? toNonEmptyString(rootFilters?.organization) ?? @@ -512,7 +546,7 @@ function collectFollowupDiscoverySeed(followupContext: Record | loopProvidedAxes, counterparty, discoveryEntity: - ambiguityBlocksImplicitGrounding || loopSubjectResolutionOptional ? null : discoveryEntities[0] ?? null, + ambiguityBlocksImplicitGrounding || loopSubjectResolutionOptional ? null : normalizedDiscoveryEntities[0] ?? null, entityResolutionStatus, entityResolutionAmbiguityCandidates, rankingNeed: toNonEmptyString(followupContext?.previous_discovery_ranking_need), @@ -672,6 +706,12 @@ function hasMetadataSignal(text: string): boolean { ); } +function hasReferentialDocumentExclusionFollowupSignal(text: string): boolean { + return /(?:\u043a\u0440\u043e\u043c\u0435|\u043f\u043e\u043c\u0438\u043c\u043e)\s+(?:\u044d\u0442\u043e\u0433\u043e|\u044d\u0442\u043e\u0439|\u044d\u0442\u043e\u0442|\u044d\u0442\u0443|\u044d\u0442\u0438\u0445)(?:\s+(?:\u0434\u043e\u043a\u0443\u043c\u0435\u043d\u0442\u0430|\u0434\u043e\u0433\u043e\u0432\u043e\u0440\u0430|\u043a\u043e\u043d\u0442\u0440\u0430\u0433\u0435\u043d\u0442\u0430))?/iu.test( + text + ); +} + function hasMetadataObjectHint(text: string): boolean { return /(?:\u043e\u0431\u044a\u0435\u043a\u0442(?:\u044b|\u0430|\u043e\u0432)?|\u0440\u0435\u0433\u0438\u0441\u0442\u0440(?:\u044b)?|\u0434\u043e\u043a\u0443\u043c\u0435\u043d\u0442(?:\u044b)?|\u0441\u043f\u0440\u0430\u0432\u043e\u0447\u043d\u0438\u043a(?:\u0438)?|\u043f\u043e\u043b(?:\u0435|\u044f)|objects?|registers?|documents?|catalogs?|fields?)/iu.test( text @@ -1007,15 +1047,24 @@ export function buildAssistantMcpDiscoveryTurnInput( const reasonCodes: string[] = []; const rawUserText = toNonEmptyString(input.userMessage); const rawEffectiveText = toNonEmptyString(input.effectiveMessage); - const rawSignalSourceText = `${rawUserText ?? ""} ${rawEffectiveText ?? ""}`.trim(); - const rawEntitySourceText = rawUserText ?? rawEffectiveText ?? rawSignalSourceText; + const repairedUserText = rawUserText ? repairAddressMojibakeText(rawUserText) : null; + const repairedEffectiveText = rawEffectiveText ? repairAddressMojibakeText(rawEffectiveText) : null; + const rawSignalSourceText = `${repairedUserText ?? rawUserText ?? ""} ${repairedEffectiveText ?? rawEffectiveText ?? ""}`.trim(); + const rawEntitySourceText = repairedUserText ?? rawUserText ?? repairedEffectiveText ?? rawEffectiveText ?? rawSignalSourceText; const rawText = compactLower(rawSignalSourceText); + const rawReferentialDocumentExclusionSignal = hasReferentialDocumentExclusionFollowupSignal( + repairedUserText ?? rawUserText ?? "" + ); const rawLifecycleSignal = hasLifecycleSignal(rawText); const rawBidirectionalValueFlowSignal = !rawLifecycleSignal && hasBidirectionalValueFlowSignal(rawText); const rawValueFlowSignal = !rawLifecycleSignal && (hasValueFlowSignal(rawText) || hasValueRankingSignal(rawText) || rawBidirectionalValueFlowSignal); - const rawMetadataSignal = !rawLifecycleSignal && !rawValueFlowSignal && hasMetadataSignal(rawText); + const rawMetadataSignal = + !rawLifecycleSignal && + !rawValueFlowSignal && + !rawReferentialDocumentExclusionSignal && + hasMetadataSignal(rawText); const rawEntityResolutionSignal = !rawLifecycleSignal && !rawValueFlowSignal && !rawMetadataSignal && hasEntityResolutionSignal(rawText); const rawPayoutSignal = rawValueFlowSignal && !rawBidirectionalValueFlowSignal && hasPayoutSignal(rawText); diff --git a/llm_normalizer/backend/src/services/assistantRoutePolicy.ts b/llm_normalizer/backend/src/services/assistantRoutePolicy.ts index 4063b92..6af6e28 100644 --- a/llm_normalizer/backend/src/services/assistantRoutePolicy.ts +++ b/llm_normalizer/backend/src/services/assistantRoutePolicy.ts @@ -579,12 +579,35 @@ export function createAssistantRoutePolicy(deps) { !effectiveAddressFollowupSignal && resolvedModeDetection.mode === "unsupported" && resolvedIntentResolution.intent === "unknown"); + const groundedValueFlowFollowupContextDetected = Boolean(followupContext && + [ + "counterparty_value_flow_query_movements_v1", + "counterparty_supplier_payout_query_movements_v1", + "counterparty_bidirectional_value_flow_query_movements_v1" + ].includes(String(toNonEmptyString(followupContext?.previous_discovery_pilot_scope) ?? "")) && + !dangerOrCoercionSignal && + (toNonEmptyString(assistantTurnMeaning?.asked_domain_family) === "counterparty_value" || + [ + "turnover", + "payout", + "net_value_flow" + ].includes(String(toNonEmptyString(assistantTurnMeaning?.asked_action_family) ?? "")) || + /(?:нетто|сальдо|сколько\s+мы\s+(?:получили|заплатили)|incoming|outgoing)/iu.test(analyticsSample))); const baseToolGatePreservesAddressLane = Boolean(baseToolGate?.runAddressLane && - ["address_intent_resolver_detected", "address_mode_classifier_detected", "address_signal_detected", "llm_canonical_data_signal_detected"].includes(String(baseToolGate?.reason ?? ""))); + [ + "address_intent_resolver_detected", + "address_mode_classifier_detected", + "address_signal_detected", + "llm_canonical_data_signal_detected" + ].includes(String(baseToolGate?.reason ?? ""))) || + Boolean(baseToolGate?.runAddressLane && + String(baseToolGate?.reason ?? "") === "followup_context_detected" && + groundedValueFlowFollowupContextDetected); const nonDomainQueryIndexed = Boolean(!llmFirstAddressCandidate && deterministicNonDomainGuard && (llmFirstUnsupportedCandidate || llmContractMode === null) && !baseToolGatePreservesAddressLane && + !groundedValueFlowFollowupContextDetected && !protectedInventoryShortFollowup && !organizationClarificationContinuationDetected); const lastAddressAssistantDebug = sessionItems @@ -749,9 +772,11 @@ export function createAssistantRoutePolicy(deps) { assistantTurnMeaning?.unsupported_but_understood_family && assistantTurnMeaning?.stale_replay_forbidden === true && !turnMeaningIntentCandidate && + !aggregateBusinessAnalyticsSignal && !dataScopeMetaQuery && !capabilityMetaQuery && !dangerOrCoercionSignal && + !groundedValueFlowFollowupContextDetected && !organizationClarificationContinuationDetected); if (unsupportedCurrentTurnMeaningBoundary) { return { diff --git a/llm_normalizer/backend/tests/addressNavigationState.test.ts b/llm_normalizer/backend/tests/addressNavigationState.test.ts index 4d28ff1..fdf87b5 100644 --- a/llm_normalizer/backend/tests/addressNavigationState.test.ts +++ b/llm_normalizer/backend/tests/addressNavigationState.test.ts @@ -112,6 +112,89 @@ describe("address navigation state", () => { expect(evolved.session_context.date_scope.period_to).toBe("2020-12-31"); }); + it("prefers grounded discovery counterparty over stale referential anchor when updating focus", () => { + const initial = normalizeAddressNavigationState( + { + schema_version: "address_navigation_state_v1", + session_id: "asst-3b", + updated_at: "2026-04-12T10:00:00.000Z", + session_context: { + active_result_set_id: "rs-prev", + active_focus_object: { + object_type: "counterparty", + object_id: "counterparty:нортон", + label: "НОРТОН", + provenance_result_set_id: "rs-prev", + selected_at: "2026-04-12T09:59:00.000Z" + }, + last_confirmed_route: "address_customer_revenue_and_payments_v1", + date_scope: { + as_of_date: null, + period_from: "2020-01-01", + period_to: "2020-12-31" + }, + organization_scope: "ООО Альтернатива Плюс" + }, + result_sets: [], + navigation_history: [] + } as any, + "asst-3b" + ); + const assistantItem = { + message_id: "msg-a2b", + session_id: "asst-3b", + role: "assistant", + text: "По Группа СВК подтверждены исходящие платежи за 2020 год.", + reply_type: "factual", + created_at: "2026-04-12T10:02:30.000Z", + trace_id: "address-456b", + debug: { + detected_mode: "address_query", + detected_intent: "supplier_payouts_profile", + selected_recipe: "address_supplier_payouts_profile_v1", + extracted_filters: { + counterparty: "НОРТОН", + period_from: "2020-01-01", + period_to: "2020-12-31" + }, + anchor_type: "counterparty", + anchor_value_raw: "он", + anchor_value_resolved: "он", + mcp_discovery_response_applied: true, + assistant_mcp_discovery_entry_point_v1: { + schema_version: "assistant_mcp_discovery_runtime_entry_point_v1", + entry_status: "bridge_executed", + turn_input: { + turn_meaning_ref: { + asked_domain_family: "counterparty_value", + asked_action_family: "payout", + explicit_entity_candidates: ["Группа СВК"] + } + }, + bridge: { + bridge_status: "answer_draft_ready", + business_fact_answer_allowed: true, + pilot: { + pilot_scope: "counterparty_supplier_payout_query_movements_v1" + }, + answer_draft: { + answer_mode: "confirmed_factual" + } + } + }, + dialog_continuation_contract_v2: { + decision: "continue_previous" + } + } + } as any; + + const evolved = evolveAddressNavigationStateWithAssistantItem(initial, assistantItem, 4); + expect(evolved.session_context.active_focus_object?.object_type).toBe("counterparty"); + expect(evolved.session_context.active_focus_object?.label).toBe("Группа СВК"); + expect(evolved.session_context.active_focus_object?.object_id).toBe("counterparty:группа свк"); + expect(evolved.navigation_history[0]?.target_object_id).toBe("counterparty:группа свк"); + }); + it("captures item focus from inventory answers when no anchor is materialized", () => { const base = createEmptyAddressNavigationState("asst-4", "2026-04-12T10:00:00.000Z"); const assistantItem = { diff --git a/llm_normalizer/backend/tests/assistantAddressFollowupContext.test.ts b/llm_normalizer/backend/tests/assistantAddressFollowupContext.test.ts index 97303a1..c9018f6 100644 --- a/llm_normalizer/backend/tests/assistantAddressFollowupContext.test.ts +++ b/llm_normalizer/backend/tests/assistantAddressFollowupContext.test.ts @@ -731,13 +731,14 @@ describe("assistant address follow-up carryover", () => { expect(second.reply_type).toBe("factual"); expect(calls).toHaveLength(2); + expect(calls[1].message).toBe(followupMessage); expect(calls[1].options?.followupContext?.previous_anchor_type).toBe("counterparty"); expect(String(calls[1].options?.followupContext?.previous_anchor_value ?? "")).toContain("Жуковка 51"); expect(String(calls[1].options?.followupContext?.previous_filters?.counterparty ?? "")).toContain("жуковке 51"); expect(normalizerService.normalize).not.toHaveBeenCalled(); }); - it("retries with raw user message after rewrite degraded anchor and returns factual follow-up result", async () => { + it("prefers the raw referential document follow-up over a degraded rewrite and returns factual follow-up result", async () => { const calls: Array<{ message: string; options?: any }> = []; const firstMessage = "покажи документы по жуковке 51"; const followupMessage = "кроме этого документа есть еще чтото?"; @@ -826,12 +827,10 @@ describe("assistant address follow-up carryover", () => { } as any); expect(second.ok).toBe(true); expect(second.reply_type).toBe("factual"); - expect(second.debug?.address_retry_audit?.attempted).toBe(true); - expect(second.debug?.address_retry_audit?.initial_limited_category).toBe("missing_anchor"); - expect(second.debug?.address_retry_audit?.retry_message).toBe(followupMessage); + expect(second.debug?.address_retry_audit?.attempted).toBe(false); - expect(calls.some((entry) => String(entry.message).toLowerCase().startsWith("документы по контрагенту"))).toBe(true); - expect(calls.some((entry) => String(entry.message).toLowerCase() === followupMessage)).toBe(true); + expect(calls).toHaveLength(2); + expect(calls[1].message).toBe(followupMessage); expect(normalizerService.normalize).not.toHaveBeenCalled(); }); diff --git a/llm_normalizer/backend/tests/assistantContinuityPolicy.test.ts b/llm_normalizer/backend/tests/assistantContinuityPolicy.test.ts index c2ee728..de1d3ff 100644 --- a/llm_normalizer/backend/tests/assistantContinuityPolicy.test.ts +++ b/llm_normalizer/backend/tests/assistantContinuityPolicy.test.ts @@ -230,6 +230,102 @@ describe("assistantContinuityPolicy organization authority", () => { }); }); + it("replaces referential counterparty placeholders with grounded discovery entity during carryover", () => { + const debug = { + execution_lane: "address_query", + extracted_filters: { + counterparty: "РѕРЅ", + period_from: "2020-01-01", + period_to: "2020-12-31" + }, + anchor_type: "counterparty", + anchor_value_raw: "РѕРЅ", + anchor_value_resolved: "РѕРЅ", + mcp_discovery_response_applied: true, + assistant_mcp_discovery_entry_point_v1: { + schema_version: "assistant_mcp_discovery_runtime_entry_point_v1", + entry_status: "bridge_executed", + turn_input: { + turn_meaning_ref: { + asked_domain_family: "counterparty_value", + asked_action_family: "turnover", + explicit_entity_candidates: ["Группа РЎР’Рљ"], + explicit_date_scope: "2020" + } + }, + bridge: { + bridge_status: "answer_draft_ready", + business_fact_answer_allowed: true, + pilot: { + pilot_scope: "counterparty_value_flow_query_movements_v1" + }, + answer_draft: { + answer_mode: "confirmed_with_bounded_inference" + } + } + } + }; + + expect(readAddressDebugCounterparty(debug)).toBe("Группа РЎР’Рљ"); + expect(resolveAddressDebugCarryoverFilters(debug)).toEqual({ + counterparty: "Группа РЎР’Рљ", + period_from: "2020-01-01", + period_to: "2020-12-31" + }); + expect(resolveAddressDebugAnchorContext(debug)).toEqual({ + anchorType: "counterparty", + anchorValue: "Группа РЎР’Рљ" + }); + }); + + it("prefers grounded discovery metadata scope over conflicting stale exact-route counterparty", () => { + const debug = { + execution_lane: "address_query", + extracted_filters: { + counterparty: "РќРћР РўРћРќ", + period_from: "2020-01-01", + period_to: "2020-12-31" + }, + anchor_type: "counterparty", + anchor_value_resolved: "СервисКонсалт, РћРћРћ", + mcp_discovery_response_applied: true, + assistant_mcp_discovery_entry_point_v1: { + schema_version: "assistant_mcp_discovery_runtime_entry_point_v1", + entry_status: "bridge_executed", + turn_input: { + turn_meaning_ref: { + asked_domain_family: "counterparty_value", + asked_action_family: "payout", + explicit_entity_candidates: ["РѕРЅ"], + metadata_scope_hint: "Группа РЎР’Рљ", + explicit_date_scope: "2020" + } + }, + bridge: { + bridge_status: "answer_draft_ready", + business_fact_answer_allowed: true, + pilot: { + pilot_scope: "counterparty_supplier_payout_query_movements_v1" + }, + answer_draft: { + answer_mode: "confirmed_with_bounded_inference" + } + } + } + }; + + expect(readAddressDebugCounterparty(debug)).toBe("Группа РЎР’Рљ"); + expect(resolveAddressDebugCarryoverFilters(debug)).toEqual({ + counterparty: "Группа РЎР’Рљ", + period_from: "2020-01-01", + period_to: "2020-12-31" + }); + expect(resolveAddressDebugAnchorContext(debug)).toEqual({ + anchorType: "counterparty", + anchorValue: "Группа РЎР’Рљ" + }); + }); + it("prefers the resolved entity from grounded entity-resolution discovery for counterparty carryover", () => { const debug = { execution_lane: "living_chat", diff --git a/llm_normalizer/backend/tests/assistantLivingRouter.test.ts b/llm_normalizer/backend/tests/assistantLivingRouter.test.ts index bcddf89..c68dad5 100644 --- a/llm_normalizer/backend/tests/assistantLivingRouter.test.ts +++ b/llm_normalizer/backend/tests/assistantLivingRouter.test.ts @@ -235,6 +235,40 @@ describe("assistant orchestration contract", () => { expect(decision.orchestrationContract?.hard_meta_mode).toBe("non_domain"); }); + it("keeps address lane for a short net follow-up over grounded value-flow context", () => { + const decision = resolveAssistantOrchestrationDecision({ + rawUserMessage: "Рђ какое получилось нетто?", + effectiveAddressUserMessage: "Рђ какое получилось нетто?", + followupContext: { + previous_intent: "customer_revenue_and_payments", + previous_filters: { + counterparty: "Группа РЎР’Рљ", + period_from: "2020-01-01", + period_to: "2020-12-31" + }, + previous_anchor_type: "counterparty", + previous_anchor_value: "Группа РЎР’Рљ", + previous_discovery_pilot_scope: "counterparty_supplier_payout_query_movements_v1" + }, + llmPreDecomposeMeta: { + applied: false, + reason: "no_usable_fragment", + predecomposeContract: { + mode: "unsupported", + mode_confidence: "low", + intent: "unknown", + intent_confidence: "low" + } + } as any, + useMock: false + }); + + expect(decision.runAddressLane).toBe(true); + expect(decision.toolGateDecision).toBe("run_address_lane"); + expect(decision.toolGateReason).not.toBe("non_domain_query_indexed"); + expect(decision.livingMode).toBe("address_data"); + }); + it("routes historical capability follow-up over grounded inventory answer to contextual chat", () => { const decision = resolveAssistantOrchestrationDecision({ rawUserMessage: "а исторические данные ты можешь же показать?", diff --git a/llm_normalizer/backend/tests/assistantMcpDiscoveryTurnInputAdapter.test.ts b/llm_normalizer/backend/tests/assistantMcpDiscoveryTurnInputAdapter.test.ts index 78f363f..007e02c 100644 --- a/llm_normalizer/backend/tests/assistantMcpDiscoveryTurnInputAdapter.test.ts +++ b/llm_normalizer/backend/tests/assistantMcpDiscoveryTurnInputAdapter.test.ts @@ -585,6 +585,41 @@ describe("assistant MCP discovery turn input adapter", () => { expect(result.reason_codes).not.toContain("mcp_discovery_not_applicable_for_supported_exact_turn"); }); + it("prefers grounded discovery metadata scope over stale conflicting counterparty in a short net follow-up", () => { + const result = buildAssistantMcpDiscoveryTurnInput({ + userMessage: "Р° какое нетто?", + assistantTurnMeaning: { + asked_domain_family: "counterparty", + asked_action_family: "turnover", + explicit_intent_candidate: "customer_revenue_and_payments" + }, + followupContext: { + previous_discovery_pilot_scope: "counterparty_supplier_payout_query_movements_v1", + previous_filters: { + counterparty: "РќРћР РўРћРќ", + period_from: "2020-01-01", + period_to: "2020-12-31" + }, + previous_anchor_type: "counterparty", + previous_anchor_value: "СервисКонсалт, РћРћРћ", + previous_discovery_loop_metadata_scope_hint: "Группа РЎР’Рљ" + } + }); + + expect(result.adapter_status).toBe("ready"); + expect(result.should_run_discovery).toBe(true); + expect(result.turn_meaning_ref).toMatchObject({ + asked_domain_family: "counterparty_value", + asked_action_family: "net_value_flow", + explicit_entity_candidates: ["Группа РЎР’Рљ"], + explicit_date_scope: "2020", + unsupported_but_understood_family: "counterparty_bidirectional_value_flow_or_netting", + stale_replay_forbidden: true + }); + expect(result.turn_meaning_ref?.explicit_entity_candidates).not.toContain("РќРћР РўРћРќ"); + expect(result.reason_codes).toContain("mcp_discovery_grounded_value_flow_followup"); + }); + it.skip("switches from a grounded exact value-flow answer into document evidence without restating the counterparty", () => { const result = buildAssistantMcpDiscoveryTurnInput({ userMessage: "Р° РїРѕ документам?", @@ -1268,6 +1303,35 @@ describe("assistant MCP discovery turn input adapter", () => { expect(result.reason_codes).toContain("mcp_discovery_not_applicable_for_supported_exact_turn"); }); + it("does not bootstrap metadata discovery from a referential document exclusion follow-up over exact document context", () => { + const result = buildAssistantMcpDiscoveryTurnInput({ + userMessage: "кроме этого документа есть еще что-то?", + effectiveMessage: "документы по контрагенту документа", + assistantTurnMeaning: { + asked_domain_family: "counterparty", + asked_action_family: "list_documents", + explicit_intent_candidate: "list_documents_by_counterparty", + explicit_entity_candidates: [{ value: "ТСЖ \"Жуковка 51\"" }] + }, + followupContext: { + previous_intent: "list_documents_by_counterparty", + target_intent: "list_documents_by_counterparty", + previous_anchor_type: "counterparty", + previous_anchor_value: "ТСЖ \"Жуковка 51\"", + previous_filters: { + counterparty: "жуковке 51" + } + } + }); + + expect(result.adapter_status).toBe("not_applicable"); + expect(result.should_run_discovery).toBe(false); + expect(result.turn_meaning_ref).toBeNull(); + expect(result.reason_codes).toContain("mcp_discovery_not_applicable_for_supported_exact_turn"); + expect(result.reason_codes).not.toContain("mcp_discovery_metadata_signal_detected"); + expect(result.reason_codes).not.toContain("mcp_discovery_metadata_scope_hint_from_raw_text"); + }); + it("never serializes object candidates as [object Object]", () => { const result = buildAssistantMcpDiscoveryTurnInput({ assistantTurnMeaning: { diff --git a/llm_normalizer/data/autorun_generators/history.json b/llm_normalizer/data/autorun_generators/history.json index 074d534..c8b783d 100644 --- a/llm_normalizer/data/autorun_generators/history.json +++ b/llm_normalizer/data/autorun_generators/history.json @@ -1,4 +1,147 @@ [ + { + "generation_id": "gen-ag04231336-3d4cc9", + "created_at": "2026-04-23T13:36:22+00:00", + "mode": "saved_user_sessions", + "title": "AGENT | Живой диалог по организации: денежный срез, сравнение и рейтинг", + "count": 7, + "domain": "address_phase66_human_org_open_scope_dialog", + "questions": [ + "Хочу быстрый денежный срез по одной организации без привязки к контрагенту. Сколько вообще входящих денег было за 2020 год?", + "По ООО Альтернатива Плюс.", + "Понял, тогда за все время.", + "Хорошо. А что по ООО Альтернатива Плюс больше в 2020 году: входящие или исходящие деньги?", + "А что по ООО Альтернатива Плюс больше уже за 2021 год: входящие или исходящие деньги?", + "И кто больше всего принес денег этой организации в 2020 году?", + "А в 2021 году?" + ], + "generated_by": "codex_agent", + "saved_case_set_file": "assistant_autogen_saved_user_sessions_20260423133622_gen-ag04231336-3d4cc9.json", + "context": { + "llm_provider": null, + "model": null, + "assistant_prompt_version": null, + "decomposition_prompt_version": null, + "prompt_fingerprint": null, + "autogen_personality_id": null, + "autogen_personality_prompt": null, + "source_session_id": null, + "saved_session_file": "assistant_saved_session_20260423133622_gen-ag04231336-3d4cc9.json", + "saved_case_set_kind": "agent_semantic_scenario", + "agent_run": true, + "agent_focus": "Human organization open-scope dialog: org clarification, all-time incoming total, comparison, ranking", + "architecture_phase": "turnaround_11", + "source_spec_file": "X:\\1C\\NDC_1C\\docs\\orchestration\\address_truth_harness_phase66_human_org_open_scope_dialog.json", + "scenario_id": "address_truth_harness_phase66_human_org_open_scope_dialog", + "semantic_tags": [ + "all_time_followup", + "human_dialog", + "open_scope_total", + "organization_clarification", + "organization_scope", + "value_flow_comparison", + "value_flow_ranking", + "year_switch" + ] + } + }, + { + "generation_id": "gen-ag04231336-db78b3", + "created_at": "2026-04-23T13:36:22+00:00", + "mode": "saved_user_sessions", + "title": "AGENT | Живой диалог по СВК: деньги, нетто, документы и движения", + "count": 7, + "domain": "address_phase65_human_svk_money_dialog", + "questions": [ + "Хочу проверить одного контрагента. Найди в 1С Группу СВК.", + "Посмотри, сколько денег мы получили от него за 2020 год.", + "А теперь сколько мы ему заплатили?", + "А какое получилось нетто?", + "А по документам?", + "А по движениям?", + "А теперь за 2021 год?" + ], + "generated_by": "codex_agent", + "saved_case_set_file": "assistant_autogen_saved_user_sessions_20260423133622_gen-ag04231336-db78b3.json", + "context": { + "llm_provider": null, + "model": null, + "assistant_prompt_version": null, + "decomposition_prompt_version": null, + "prompt_fingerprint": null, + "autogen_personality_id": null, + "autogen_personality_prompt": null, + "source_session_id": null, + "saved_session_file": "assistant_saved_session_20260423133622_gen-ag04231336-db78b3.json", + "saved_case_set_kind": "agent_semantic_scenario", + "agent_run": true, + "agent_focus": "Human SVK counterparty dialog: grounding, incoming, outgoing, net, documents, movements", + "architecture_phase": "turnaround_11", + "source_spec_file": "X:\\1C\\NDC_1C\\docs\\orchestration\\address_truth_harness_phase65_human_svk_money_dialog.json", + "scenario_id": "address_truth_harness_phase65_human_svk_money_dialog", + "semantic_tags": [ + "counterparty_resolution", + "document_pivot_after_value_flow", + "entity_grounding", + "grounded_counterparty_followup", + "human_dialog", + "incoming_value_flow", + "movement_pivot_after_value_flow", + "net_value_flow", + "outgoing_value_flow", + "year_switch" + ] + } + }, + { + "generation_id": "gen-ag04231336-4fa660", + "created_at": "2026-04-23T13:36:22+00:00", + "mode": "saved_user_sessions", + "title": "AGENT | Живой диалог по НДС: от ориентации до документов", + "count": 6, + "domain": "address_phase64_human_vat_investigation_dialog", + "questions": [ + "Мне нужно понять, где в 1С по НДС вообще лежат данные. Какие объекты стоит смотреть по НДС?", + "Хорошо, тогда покажи движения по ООО Альтернатива Плюс.", + "За 2020 год.", + "А теперь по документам?", + "А теперь за 2021 год?", + "А теперь за все время?" + ], + "generated_by": "codex_agent", + "saved_case_set_file": "assistant_autogen_saved_user_sessions_20260423133622_gen-ag04231336-4fa660.json", + "context": { + "llm_provider": null, + "model": null, + "assistant_prompt_version": null, + "decomposition_prompt_version": null, + "prompt_fingerprint": null, + "autogen_personality_id": null, + "autogen_personality_prompt": null, + "source_session_id": null, + "saved_session_file": "assistant_saved_session_20260423133622_gen-ag04231336-4fa660.json", + "saved_case_set_kind": "agent_semantic_scenario", + "agent_run": true, + "agent_focus": "Human VAT dialog: metadata orientation, movement lane, document pivot, year switch, all-time continuity", + "architecture_phase": "turnaround_11", + "source_spec_file": "X:\\1C\\NDC_1C\\docs\\orchestration\\address_truth_harness_phase64_human_vat_investigation_dialog.json", + "scenario_id": "address_truth_harness_phase64_human_vat_investigation_dialog", + "semantic_tags": [ + "all_time_followup", + "bounded_retrieval", + "document_lane_continuity", + "document_pivot_after_movement_retrieval", + "human_dialog", + "inline_organization_clarification", + "metadata_surface", + "movement_lane_after_metadata", + "movement_lane_execution", + "scope_reuse", + "vat_orientation", + "year_switch_after_pivot" + ] + } + }, { "generation_id": "gen-moa1y0lw-m30gdsz", "created_at": "2026-04-22T12:51:54.657Z", @@ -1080,4 +1223,4 @@ "latest_acceptance": null } } -] \ No newline at end of file +] diff --git a/llm_normalizer/data/autorun_generators/saved_sessions/assistant_saved_session_20260423133622_gen-ag04231336-3d4cc9.json b/llm_normalizer/data/autorun_generators/saved_sessions/assistant_saved_session_20260423133622_gen-ag04231336-3d4cc9.json new file mode 100644 index 0000000..f704e52 --- /dev/null +++ b/llm_normalizer/data/autorun_generators/saved_sessions/assistant_saved_session_20260423133622_gen-ag04231336-3d4cc9.json @@ -0,0 +1,125 @@ +{ + "saved_at": "2026-04-23T13:36:22+00:00", + "generation_id": "gen-ag04231336-3d4cc9", + "mode": "saved_user_sessions", + "title": "AGENT | Живой диалог по организации: денежный срез, сравнение и рейтинг", + "agent_run": true, + "questions": [ + "Хочу быстрый денежный срез по одной организации без привязки к контрагенту. Сколько вообще входящих денег было за 2020 год?", + "По ООО Альтернатива Плюс.", + "Понял, тогда за все время.", + "Хорошо. А что по ООО Альтернатива Плюс больше в 2020 году: входящие или исходящие деньги?", + "А что по ООО Альтернатива Плюс больше уже за 2021 год: входящие или исходящие деньги?", + "И кто больше всего принес денег этой организации в 2020 году?", + "А в 2021 году?" + ], + "metadata": { + "assistant_prompt_version": null, + "decomposition_prompt_version": null, + "prompt_fingerprint": null, + "agent_focus": "Human organization open-scope dialog: org clarification, all-time incoming total, comparison, ranking", + "architecture_phase": "turnaround_11", + "source_spec_file": "X:\\1C\\NDC_1C\\docs\\orchestration\\address_truth_harness_phase66_human_org_open_scope_dialog.json", + "scenario_id": "address_truth_harness_phase66_human_org_open_scope_dialog", + "semantic_tags": [ + "all_time_followup", + "human_dialog", + "open_scope_total", + "organization_clarification", + "organization_scope", + "value_flow_comparison", + "value_flow_ranking", + "year_switch" + ] + }, + "source_session_id": null, + "session": { + "session_id": null, + "mode": "agent_semantic_run", + "items": [ + { + "message_id": "agent-user-001", + "role": "user", + "text": "Хочу быстрый денежный срез по одной организации без привязки к контрагенту. Сколько вообще входящих денег было за 2020 год?", + "created_at": "2026-04-23T13:36:22+00:00", + "reply_type": null, + "trace_id": null, + "debug": null + }, + { + "message_id": "agent-user-002", + "role": "user", + "text": "По ООО Альтернатива Плюс.", + "created_at": "2026-04-23T13:36:22+00:00", + "reply_type": null, + "trace_id": null, + "debug": null + }, + { + "message_id": "agent-user-003", + "role": "user", + "text": "Понял, тогда за все время.", + "created_at": "2026-04-23T13:36:22+00:00", + "reply_type": null, + "trace_id": null, + "debug": null + }, + { + "message_id": "agent-user-004", + "role": "user", + "text": "Хорошо. А что по ООО Альтернатива Плюс больше в 2020 году: входящие или исходящие деньги?", + "created_at": "2026-04-23T13:36:22+00:00", + "reply_type": null, + "trace_id": null, + "debug": null + }, + { + "message_id": "agent-user-005", + "role": "user", + "text": "А что по ООО Альтернатива Плюс больше уже за 2021 год: входящие или исходящие деньги?", + "created_at": "2026-04-23T13:36:22+00:00", + "reply_type": null, + "trace_id": null, + "debug": null + }, + { + "message_id": "agent-user-006", + "role": "user", + "text": "И кто больше всего принес денег этой организации в 2020 году?", + "created_at": "2026-04-23T13:36:22+00:00", + "reply_type": null, + "trace_id": null, + "debug": null + }, + { + "message_id": "agent-user-007", + "role": "user", + "text": "А в 2021 году?", + "created_at": "2026-04-23T13:36:22+00:00", + "reply_type": null, + "trace_id": null, + "debug": null + } + ], + "agent_run": true, + "metadata": { + "assistant_prompt_version": null, + "decomposition_prompt_version": null, + "prompt_fingerprint": null, + "agent_focus": "Human organization open-scope dialog: org clarification, all-time incoming total, comparison, ranking", + "architecture_phase": "turnaround_11", + "source_spec_file": "X:\\1C\\NDC_1C\\docs\\orchestration\\address_truth_harness_phase66_human_org_open_scope_dialog.json", + "scenario_id": "address_truth_harness_phase66_human_org_open_scope_dialog", + "semantic_tags": [ + "all_time_followup", + "human_dialog", + "open_scope_total", + "organization_clarification", + "organization_scope", + "value_flow_comparison", + "value_flow_ranking", + "year_switch" + ] + } + } +} diff --git a/llm_normalizer/data/autorun_generators/saved_sessions/assistant_saved_session_20260423133622_gen-ag04231336-4fa660.json b/llm_normalizer/data/autorun_generators/saved_sessions/assistant_saved_session_20260423133622_gen-ag04231336-4fa660.json new file mode 100644 index 0000000..5aa2fe9 --- /dev/null +++ b/llm_normalizer/data/autorun_generators/saved_sessions/assistant_saved_session_20260423133622_gen-ag04231336-4fa660.json @@ -0,0 +1,123 @@ +{ + "saved_at": "2026-04-23T13:36:22+00:00", + "generation_id": "gen-ag04231336-4fa660", + "mode": "saved_user_sessions", + "title": "AGENT | Живой диалог по НДС: от ориентации до документов", + "agent_run": true, + "questions": [ + "Мне нужно понять, где в 1С по НДС вообще лежат данные. Какие объекты стоит смотреть по НДС?", + "Хорошо, тогда покажи движения по ООО Альтернатива Плюс.", + "За 2020 год.", + "А теперь по документам?", + "А теперь за 2021 год?", + "А теперь за все время?" + ], + "metadata": { + "assistant_prompt_version": null, + "decomposition_prompt_version": null, + "prompt_fingerprint": null, + "agent_focus": "Human VAT dialog: metadata orientation, movement lane, document pivot, year switch, all-time continuity", + "architecture_phase": "turnaround_11", + "source_spec_file": "X:\\1C\\NDC_1C\\docs\\orchestration\\address_truth_harness_phase64_human_vat_investigation_dialog.json", + "scenario_id": "address_truth_harness_phase64_human_vat_investigation_dialog", + "semantic_tags": [ + "all_time_followup", + "bounded_retrieval", + "document_lane_continuity", + "document_pivot_after_movement_retrieval", + "human_dialog", + "inline_organization_clarification", + "metadata_surface", + "movement_lane_after_metadata", + "movement_lane_execution", + "scope_reuse", + "vat_orientation", + "year_switch_after_pivot" + ] + }, + "source_session_id": null, + "session": { + "session_id": null, + "mode": "agent_semantic_run", + "items": [ + { + "message_id": "agent-user-001", + "role": "user", + "text": "Мне нужно понять, где в 1С по НДС вообще лежат данные. Какие объекты стоит смотреть по НДС?", + "created_at": "2026-04-23T13:36:22+00:00", + "reply_type": null, + "trace_id": null, + "debug": null + }, + { + "message_id": "agent-user-002", + "role": "user", + "text": "Хорошо, тогда покажи движения по ООО Альтернатива Плюс.", + "created_at": "2026-04-23T13:36:22+00:00", + "reply_type": null, + "trace_id": null, + "debug": null + }, + { + "message_id": "agent-user-003", + "role": "user", + "text": "За 2020 год.", + "created_at": "2026-04-23T13:36:22+00:00", + "reply_type": null, + "trace_id": null, + "debug": null + }, + { + "message_id": "agent-user-004", + "role": "user", + "text": "А теперь по документам?", + "created_at": "2026-04-23T13:36:22+00:00", + "reply_type": null, + "trace_id": null, + "debug": null + }, + { + "message_id": "agent-user-005", + "role": "user", + "text": "А теперь за 2021 год?", + "created_at": "2026-04-23T13:36:22+00:00", + "reply_type": null, + "trace_id": null, + "debug": null + }, + { + "message_id": "agent-user-006", + "role": "user", + "text": "А теперь за все время?", + "created_at": "2026-04-23T13:36:22+00:00", + "reply_type": null, + "trace_id": null, + "debug": null + } + ], + "agent_run": true, + "metadata": { + "assistant_prompt_version": null, + "decomposition_prompt_version": null, + "prompt_fingerprint": null, + "agent_focus": "Human VAT dialog: metadata orientation, movement lane, document pivot, year switch, all-time continuity", + "architecture_phase": "turnaround_11", + "source_spec_file": "X:\\1C\\NDC_1C\\docs\\orchestration\\address_truth_harness_phase64_human_vat_investigation_dialog.json", + "scenario_id": "address_truth_harness_phase64_human_vat_investigation_dialog", + "semantic_tags": [ + "all_time_followup", + "bounded_retrieval", + "document_lane_continuity", + "document_pivot_after_movement_retrieval", + "human_dialog", + "inline_organization_clarification", + "metadata_surface", + "movement_lane_after_metadata", + "movement_lane_execution", + "scope_reuse", + "vat_orientation", + "year_switch_after_pivot" + ] + } + } +} diff --git a/llm_normalizer/data/autorun_generators/saved_sessions/assistant_saved_session_20260423133622_gen-ag04231336-db78b3.json b/llm_normalizer/data/autorun_generators/saved_sessions/assistant_saved_session_20260423133622_gen-ag04231336-db78b3.json new file mode 100644 index 0000000..7c804b0 --- /dev/null +++ b/llm_normalizer/data/autorun_generators/saved_sessions/assistant_saved_session_20260423133622_gen-ag04231336-db78b3.json @@ -0,0 +1,129 @@ +{ + "saved_at": "2026-04-23T13:36:22+00:00", + "generation_id": "gen-ag04231336-db78b3", + "mode": "saved_user_sessions", + "title": "AGENT | Живой диалог по СВК: деньги, нетто, документы и движения", + "agent_run": true, + "questions": [ + "Хочу проверить одного контрагента. Найди в 1С Группу СВК.", + "Посмотри, сколько денег мы получили от него за 2020 год.", + "А теперь сколько мы ему заплатили?", + "А какое получилось нетто?", + "А по документам?", + "А по движениям?", + "А теперь за 2021 год?" + ], + "metadata": { + "assistant_prompt_version": null, + "decomposition_prompt_version": null, + "prompt_fingerprint": null, + "agent_focus": "Human SVK counterparty dialog: grounding, incoming, outgoing, net, documents, movements", + "architecture_phase": "turnaround_11", + "source_spec_file": "X:\\1C\\NDC_1C\\docs\\orchestration\\address_truth_harness_phase65_human_svk_money_dialog.json", + "scenario_id": "address_truth_harness_phase65_human_svk_money_dialog", + "semantic_tags": [ + "counterparty_resolution", + "document_pivot_after_value_flow", + "entity_grounding", + "grounded_counterparty_followup", + "human_dialog", + "incoming_value_flow", + "movement_pivot_after_value_flow", + "net_value_flow", + "outgoing_value_flow", + "year_switch" + ] + }, + "source_session_id": null, + "session": { + "session_id": null, + "mode": "agent_semantic_run", + "items": [ + { + "message_id": "agent-user-001", + "role": "user", + "text": "Хочу проверить одного контрагента. Найди в 1С Группу СВК.", + "created_at": "2026-04-23T13:36:22+00:00", + "reply_type": null, + "trace_id": null, + "debug": null + }, + { + "message_id": "agent-user-002", + "role": "user", + "text": "Посмотри, сколько денег мы получили от него за 2020 год.", + "created_at": "2026-04-23T13:36:22+00:00", + "reply_type": null, + "trace_id": null, + "debug": null + }, + { + "message_id": "agent-user-003", + "role": "user", + "text": "А теперь сколько мы ему заплатили?", + "created_at": "2026-04-23T13:36:22+00:00", + "reply_type": null, + "trace_id": null, + "debug": null + }, + { + "message_id": "agent-user-004", + "role": "user", + "text": "А какое получилось нетто?", + "created_at": "2026-04-23T13:36:22+00:00", + "reply_type": null, + "trace_id": null, + "debug": null + }, + { + "message_id": "agent-user-005", + "role": "user", + "text": "А по документам?", + "created_at": "2026-04-23T13:36:22+00:00", + "reply_type": null, + "trace_id": null, + "debug": null + }, + { + "message_id": "agent-user-006", + "role": "user", + "text": "А по движениям?", + "created_at": "2026-04-23T13:36:22+00:00", + "reply_type": null, + "trace_id": null, + "debug": null + }, + { + "message_id": "agent-user-007", + "role": "user", + "text": "А теперь за 2021 год?", + "created_at": "2026-04-23T13:36:22+00:00", + "reply_type": null, + "trace_id": null, + "debug": null + } + ], + "agent_run": true, + "metadata": { + "assistant_prompt_version": null, + "decomposition_prompt_version": null, + "prompt_fingerprint": null, + "agent_focus": "Human SVK counterparty dialog: grounding, incoming, outgoing, net, documents, movements", + "architecture_phase": "turnaround_11", + "source_spec_file": "X:\\1C\\NDC_1C\\docs\\orchestration\\address_truth_harness_phase65_human_svk_money_dialog.json", + "scenario_id": "address_truth_harness_phase65_human_svk_money_dialog", + "semantic_tags": [ + "counterparty_resolution", + "document_pivot_after_value_flow", + "entity_grounding", + "grounded_counterparty_followup", + "human_dialog", + "incoming_value_flow", + "movement_pivot_after_value_flow", + "net_value_flow", + "outgoing_value_flow", + "year_switch" + ] + } + } +} diff --git a/llm_normalizer/data/eval_cases/assistant_autogen_saved_user_sessions_20260423133622_gen-ag04231336-3d4cc9.json b/llm_normalizer/data/eval_cases/assistant_autogen_saved_user_sessions_20260423133622_gen-ag04231336-3d4cc9.json new file mode 100644 index 0000000..309344c --- /dev/null +++ b/llm_normalizer/data/eval_cases/assistant_autogen_saved_user_sessions_20260423133622_gen-ag04231336-3d4cc9.json @@ -0,0 +1,46 @@ +{ + "suite_id": "assistant_saved_session_gen-ag04231336-3d4cc9", + "suite_version": "0.1.0", + "schema_version": "assistant_saved_session_suite_v0_1", + "generated_at": "2026-04-23T13:36:22+00:00", + "generation_id": "gen-ag04231336-3d4cc9", + "mode": "saved_user_sessions", + "title": "AGENT | Живой диалог по организации: денежный срез, сравнение и рейтинг", + "domain": "address_phase66_human_org_open_scope_dialog", + "scenario_count": 1, + "case_ids": [ + "SAVED-001" + ], + "cases": [ + { + "case_id": "SAVED-001", + "scenario_tag": "agent_saved_user_sessions", + "title": "AGENT | Живой диалог по организации: денежный срез, сравнение и рейтинг", + "question_type": "followup", + "broadness_level": "medium", + "turns": [ + { + "user_message": "Хочу быстрый денежный срез по одной организации без привязки к контрагенту. Сколько вообще входящих денег было за 2020 год?" + }, + { + "user_message": "По ООО Альтернатива Плюс." + }, + { + "user_message": "Понял, тогда за все время." + }, + { + "user_message": "Хорошо. А что по ООО Альтернатива Плюс больше в 2020 году: входящие или исходящие деньги?" + }, + { + "user_message": "А что по ООО Альтернатива Плюс больше уже за 2021 год: входящие или исходящие деньги?" + }, + { + "user_message": "И кто больше всего принес денег этой организации в 2020 году?" + }, + { + "user_message": "А в 2021 году?" + } + ] + } + ] +} diff --git a/llm_normalizer/data/eval_cases/assistant_autogen_saved_user_sessions_20260423133622_gen-ag04231336-4fa660.json b/llm_normalizer/data/eval_cases/assistant_autogen_saved_user_sessions_20260423133622_gen-ag04231336-4fa660.json new file mode 100644 index 0000000..d0e8a7b --- /dev/null +++ b/llm_normalizer/data/eval_cases/assistant_autogen_saved_user_sessions_20260423133622_gen-ag04231336-4fa660.json @@ -0,0 +1,43 @@ +{ + "suite_id": "assistant_saved_session_gen-ag04231336-4fa660", + "suite_version": "0.1.0", + "schema_version": "assistant_saved_session_suite_v0_1", + "generated_at": "2026-04-23T13:36:22+00:00", + "generation_id": "gen-ag04231336-4fa660", + "mode": "saved_user_sessions", + "title": "AGENT | Живой диалог по НДС: от ориентации до документов", + "domain": "address_phase64_human_vat_investigation_dialog", + "scenario_count": 1, + "case_ids": [ + "SAVED-001" + ], + "cases": [ + { + "case_id": "SAVED-001", + "scenario_tag": "agent_saved_user_sessions", + "title": "AGENT | Живой диалог по НДС: от ориентации до документов", + "question_type": "followup", + "broadness_level": "medium", + "turns": [ + { + "user_message": "Мне нужно понять, где в 1С по НДС вообще лежат данные. Какие объекты стоит смотреть по НДС?" + }, + { + "user_message": "Хорошо, тогда покажи движения по ООО Альтернатива Плюс." + }, + { + "user_message": "За 2020 год." + }, + { + "user_message": "А теперь по документам?" + }, + { + "user_message": "А теперь за 2021 год?" + }, + { + "user_message": "А теперь за все время?" + } + ] + } + ] +} diff --git a/llm_normalizer/data/eval_cases/assistant_autogen_saved_user_sessions_20260423133622_gen-ag04231336-db78b3.json b/llm_normalizer/data/eval_cases/assistant_autogen_saved_user_sessions_20260423133622_gen-ag04231336-db78b3.json new file mode 100644 index 0000000..78d9a53 --- /dev/null +++ b/llm_normalizer/data/eval_cases/assistant_autogen_saved_user_sessions_20260423133622_gen-ag04231336-db78b3.json @@ -0,0 +1,46 @@ +{ + "suite_id": "assistant_saved_session_gen-ag04231336-db78b3", + "suite_version": "0.1.0", + "schema_version": "assistant_saved_session_suite_v0_1", + "generated_at": "2026-04-23T13:36:22+00:00", + "generation_id": "gen-ag04231336-db78b3", + "mode": "saved_user_sessions", + "title": "AGENT | Живой диалог по СВК: деньги, нетто, документы и движения", + "domain": "address_phase65_human_svk_money_dialog", + "scenario_count": 1, + "case_ids": [ + "SAVED-001" + ], + "cases": [ + { + "case_id": "SAVED-001", + "scenario_tag": "agent_saved_user_sessions", + "title": "AGENT | Живой диалог по СВК: деньги, нетто, документы и движения", + "question_type": "followup", + "broadness_level": "medium", + "turns": [ + { + "user_message": "Хочу проверить одного контрагента. Найди в 1С Группу СВК." + }, + { + "user_message": "Посмотри, сколько денег мы получили от него за 2020 год." + }, + { + "user_message": "А теперь сколько мы ему заплатили?" + }, + { + "user_message": "А какое получилось нетто?" + }, + { + "user_message": "А по документам?" + }, + { + "user_message": "А по движениям?" + }, + { + "user_message": "А теперь за 2021 год?" + } + ] + } + ] +}