diff --git a/docs/orchestration/active_domain_contract.json b/docs/orchestration/active_domain_contract.json index 0022727..1454e88 100644 --- a/docs/orchestration/active_domain_contract.json +++ b/docs/orchestration/active_domain_contract.json @@ -41,8 +41,8 @@ "buyer_candidate": "Департамент капитального ремонта города Москвы" }, "question_pool": { - "total_questions": 26, - "core_questions_total": 17, + "total_questions": 30, + "core_questions_total": 21, "followup_checkpoints_total": 9, "questions": [ { @@ -90,6 +90,15 @@ "wording_family": "canonical", "semantic_goal": "получить номенклатурный состав складского остатка на дату" }, + { + "question_id": "Q28", + "text": "что лежит у нас на складе в марте 2019", + "layer": "root_snapshot", + "node_id": "N01_stock_snapshot", + "role": "root_variant", + "wording_family": "colloquial", + "semantic_goal": "проверить разговорный root snapshot с предлогом `в` и месячной словоформой без развала в годовой диапазон" + }, { "question_id": "Q06", "text": "От какого поставщика куплен товар ...", @@ -261,6 +270,33 @@ "wording_family": "pronoun_followup", "semantic_goal": "проверить короткий supplier follow-up по уже активному selected object без повторного item anchor" }, + { + "question_id": "Q27", + "text": "По выбранному объекту \"...\": у кого купили", + "layer": "selected_item_provenance", + "node_id": "N03_selected_item_supplier", + "role": "critical_child", + "wording_family": "ui_selected_object_colloquial", + "semantic_goal": "проверить разговорный selected-object follow-up про поставщика без слова `поставщик`" + }, + { + "question_id": "Q29", + "text": "По выбранному объекту \"...\": где мы купили это", + "layer": "selected_item_provenance", + "node_id": "N03_selected_item_supplier", + "role": "critical_child", + "wording_family": "ui_selected_object_colloquial", + "semantic_goal": "проверить разговорный selected-object follow-up с локационной формулировкой, который по смыслу должен вернуться к supplier provenance, а не в документы" + }, + { + "question_id": "Q30", + "text": "По выбранному объекту \"...\": где куплено!!", + "layer": "selected_item_provenance", + "node_id": "N03_selected_item_supplier", + "role": "critical_child", + "wording_family": "ui_selected_object_colloquial", + "semantic_goal": "проверить короткий selected-object follow-up с emphatic punctuation и усеченной формулировкой `где куплено`" + }, { "question_id": "Q24", "text": "По выбранному объекту \"...\": кому был продан товар", @@ -315,7 +351,8 @@ "covers_question_ids": [ "Q01", "Q02", - "Q05" + "Q05", + "Q28" ], "expected_intents": [ "inventory_on_hand_as_of_date" @@ -360,7 +397,10 @@ "Q06", "Q19", "Q22", - "Q23" + "Q23", + "Q27", + "Q29", + "Q30" ], "expected_intents": [ "inventory_purchase_provenance_for_item" @@ -679,6 +719,7 @@ "question_ids": [ "Q01", "Q02", + "Q28", "Q03", "Q04", "Q05" @@ -732,6 +773,31 @@ "expected_capability": "confirmed_inventory_on_hand_as_of_date", "expected_result_mode": "confirmed_balance" }, + { + "step_id": "step_02b_stock_on_named_month_prepositional", + "question_id": "Q28", + "node_id": "N01_stock_snapshot", + "node_role": "root_variant", + "paraphrase_family": "colloquial", + "title": "Historical stock slice with prepositional month wording", + "question": "что лежит у нас на складе в марте 2019", + "analysis_context": { + "as_of_date": "2019-03-31", + "source": "binding_target_date_historical" + }, + "expected_capability": "confirmed_inventory_on_hand_as_of_date", + "expected_result_mode": "confirmed_balance", + "required_filters": { + "as_of_date": "2019-03-31", + "period_from": "2019-03-01", + "period_to": "2019-03-31" + }, + "invariant_severity": { + "wrong_as_of_date": "P0", + "wrong_period_from": "P0", + "wrong_period_to": "P0" + } + }, { "step_id": "step_03_account_41_now", "question_id": "Q03", @@ -788,6 +854,9 @@ "Q26", "Q10", "Q19", + "Q27", + "Q29", + "Q30", "Q20", "Q21", "Q22", @@ -888,6 +957,105 @@ "organization_scope" ] }, + { + "step_id": "step_03b_selected_item_supplier_u_kogo_kupili", + "question_id": "Q27", + "node_id": "N03_selected_item_supplier", + "node_role": "critical_child", + "paraphrase_family": "ui_selected_object_colloquial", + "title": "Selected item supplier colloquial without supplier noun", + "question": "По выбранному объекту \"{{bindings.focus_item_historical}}\": у кого купили", + "depends_on": [ + "step_01_snapshot_historical" + ], + "analysis_context": { + "as_of_date": "2019-03-31", + "source": "binding_target_date_historical" + }, + "expected_capability": "inventory_purchase_provenance_for_item", + "forbidden_capabilities": [ + "confirmed_inventory_on_hand_as_of_date" + ], + "forbidden_recipes": [ + "address_inventory_on_hand_as_of_date_v1" + ], + "required_state_objects": [ + "focus_object" + ], + "required_carryover_invariants": [ + "selected_object", + "focus_object", + "date_scope", + "warehouse_scope", + "organization_scope" + ] + }, + { + "step_id": "step_03c_selected_item_supplier_gde_my_kupili", + "question_id": "Q29", + "node_id": "N03_selected_item_supplier", + "node_role": "critical_child", + "paraphrase_family": "ui_selected_object_colloquial", + "title": "Selected item supplier colloquial with where-bought wording", + "question": "По выбранному объекту \"{{bindings.focus_item_historical}}\": где мы купили это", + "depends_on": [ + "step_01_snapshot_historical" + ], + "analysis_context": { + "as_of_date": "2019-03-31", + "source": "binding_target_date_historical" + }, + "expected_capability": "inventory_purchase_provenance_for_item", + "forbidden_capabilities": [ + "confirmed_inventory_on_hand_as_of_date" + ], + "forbidden_recipes": [ + "address_inventory_on_hand_as_of_date_v1" + ], + "required_state_objects": [ + "focus_object" + ], + "required_carryover_invariants": [ + "selected_object", + "focus_object", + "date_scope", + "warehouse_scope", + "organization_scope" + ] + }, + { + "step_id": "step_03d_selected_item_supplier_gde_kupleno", + "question_id": "Q30", + "node_id": "N03_selected_item_supplier", + "node_role": "critical_child", + "paraphrase_family": "ui_selected_object_colloquial", + "title": "Selected item supplier terse where-bought wording", + "question": "По выбранному объекту \"{{bindings.focus_item_historical}}\": где куплено!!", + "depends_on": [ + "step_01_snapshot_historical" + ], + "analysis_context": { + "as_of_date": "2019-03-31", + "source": "binding_target_date_historical" + }, + "expected_capability": "inventory_purchase_provenance_for_item", + "forbidden_capabilities": [ + "confirmed_inventory_on_hand_as_of_date" + ], + "forbidden_recipes": [ + "address_inventory_on_hand_as_of_date_v1" + ], + "required_state_objects": [ + "focus_object" + ], + "required_carryover_invariants": [ + "selected_object", + "focus_object", + "date_scope", + "warehouse_scope", + "organization_scope" + ] + }, { "step_id": "step_04_selected_item_supplier_pronoun", "question_id": "Q23", @@ -904,6 +1072,15 @@ "source": "binding_target_date_historical" }, "expected_capability": "inventory_purchase_provenance_for_item", + "forbidden_capabilities": [ + "confirmed_inventory_on_hand_as_of_date" + ], + "forbidden_recipes": [ + "address_inventory_on_hand_as_of_date_v1" + ], + "required_state_objects": [ + "focus_object" + ], "required_carryover_invariants": [ "selected_object", "focus_object", @@ -926,7 +1103,16 @@ "as_of_date": "2019-03-31", "source": "binding_target_date_historical" }, - "expected_capability": "inventory_purchase_provenance_for_item" + "expected_capability": "inventory_purchase_provenance_for_item", + "forbidden_capabilities": [ + "confirmed_inventory_on_hand_as_of_date" + ], + "forbidden_recipes": [ + "address_inventory_on_hand_as_of_date_v1" + ], + "required_state_objects": [ + "focus_object" + ] }, { "step_id": "step_06_selected_item_purchase_date", @@ -939,6 +1125,15 @@ "depends_on": [ "step_01_snapshot_historical", "step_02_selected_item_supplier_ui" + ], + "required_state_objects": [ + "focus_object" + ], + "forbidden_capabilities": [ + "confirmed_inventory_on_hand_as_of_date" + ], + "forbidden_recipes": [ + "address_inventory_on_hand_as_of_date_v1" ] }, { @@ -957,6 +1152,15 @@ "source": "binding_target_date_historical" }, "expected_capability": "inventory_purchase_provenance_for_item", + "forbidden_capabilities": [ + "confirmed_inventory_on_hand_as_of_date" + ], + "forbidden_recipes": [ + "address_inventory_on_hand_as_of_date_v1" + ], + "required_state_objects": [ + "focus_object" + ], "required_carryover_invariants": [ "selected_object", "focus_object", @@ -982,6 +1186,17 @@ "source": "binding_target_date_historical" }, "expected_capability": "inventory_purchase_documents_for_item", + "required_state_objects": [ + "focus_object" + ], + "forbidden_capabilities": [ + "confirmed_inventory_on_hand_as_of_date", + "inventory_purchase_provenance_for_item" + ], + "forbidden_recipes": [ + "address_inventory_on_hand_as_of_date_v1", + "address_inventory_purchase_provenance_for_item_v1" + ], "required_carryover_invariants": [ "selected_object", "focus_object", @@ -1007,6 +1222,17 @@ "source": "binding_target_date_historical" }, "expected_capability": "inventory_purchase_documents_for_item", + "required_state_objects": [ + "focus_object" + ], + "forbidden_capabilities": [ + "confirmed_inventory_on_hand_as_of_date", + "inventory_purchase_provenance_for_item" + ], + "forbidden_recipes": [ + "address_inventory_on_hand_as_of_date_v1", + "address_inventory_purchase_provenance_for_item_v1" + ], "required_carryover_invariants": [ "selected_object", "focus_object", @@ -1031,7 +1257,18 @@ "as_of_date": "2019-03-31", "source": "binding_target_date_historical" }, - "expected_capability": "inventory_purchase_documents_for_item" + "expected_capability": "inventory_purchase_documents_for_item", + "required_state_objects": [ + "focus_object" + ], + "forbidden_capabilities": [ + "confirmed_inventory_on_hand_as_of_date", + "inventory_purchase_provenance_for_item" + ], + "forbidden_recipes": [ + "address_inventory_on_hand_as_of_date_v1", + "address_inventory_purchase_provenance_for_item_v1" + ] } ] }, @@ -1541,6 +1778,21 @@ "symptom": "a narrow business follow-up opens with `Блок 1/2/3` scaffolding instead of a compact direct answer", "defect_class": "business_utility_gap" }, + { + "pattern_id": "F12b_selected_object_supplier_u_kogo_kupili_misroute", + "symptom": "selected-object follow-up such as `у кого купили` stays on the root stock snapshot instead of switching to selected-item provenance", + "defect_class": "followup_action_resolution_gap" + }, + { + "pattern_id": "F12c_root_named_month_prepositional_misparsed", + "symptom": "root stock snapshot wording such as `что лежит у нас на складе в мае 2016` collapses to the full year boundary instead of the requested month window", + "defect_class": "temporal_honesty_gap" + }, + { + "pattern_id": "F12d_selected_object_supplier_gde_kupili_misroute", + "symptom": "selected-object follow-up such as `где мы купили это` or `где куплено!!` stays on the root stock snapshot or drifts into generic documents instead of switching to selected-item provenance", + "defect_class": "followup_action_resolution_gap" + }, { "pattern_id": "F13_selected_item_sale_followup_misroute", "symptom": "selected-item follow-up such as `кому в итоге мы продали этот товар` drifts back into purchase provenance and answers about supplier instead of buyer", diff --git a/llm_normalizer/backend/dist/services/addressFilterExtractor.js b/llm_normalizer/backend/dist/services/addressFilterExtractor.js index 1e0888e..ae804ec 100644 --- a/llm_normalizer/backend/dist/services/addressFilterExtractor.js +++ b/llm_normalizer/backend/dist/services/addressFilterExtractor.js @@ -22,10 +22,10 @@ const YEAR_PERIOD_SHORT_PATTERN = /(?:^|[\s,.;:!?()\-])(\d{2})\s*(?:г(?:од|о const YEAR_PERIOD_SHORT_ORDINAL_PATTERN = /(?:^|[\s,.;:!?()\-])(?:за|for|на|in)?\s*(\d{2})\s*(?:[-\s]?(?:й|ый|ой|th))(?:\s*(?:г(?:од|ода)?\.?|year|period|период))?(?=$|[\s,.;:!?()\-])/iu; const YEAR_PERIOD_SHORT_BARE_PATTERN = /(?:^|[\s,.;:!?()\-])(?:за|for|на|in)\s*(\d{2})(?=$|[\s,.;:!?()\-])/iu; const YEAR_PERIOD_ANY_PATTERN = /(?:^|[\s,.;:!?()\-])((?:19|20)\d{2})(?!\s*(?:[-‐‑‒–—―−]|до|to|по)\s*(?:19|20)\d{2})(?![.\/-]\d)(?:\s*(?:г(?:од|ода)?\.?|year))?(?=$|[\s,.;:!?()\-])/iu; -const MONTH_PERIOD_NUMERIC_MONTH_YEAR_PATTERN = /(?:^|[\s,.;:!?()\-])(?:за|for|на|in)?\s*(0?[1-9]|1[0-2])[.\/-](20\d{2})(?=$|[\s,.;:!?()\-])/iu; -const MONTH_PERIOD_NUMERIC_YEAR_MONTH_PATTERN = /(?:^|[\s,.;:!?()\-])(?:за|for|на|in)?\s*(20\d{2})[.\/-](0?[1-9]|1[0-2])(?=$|[\s,.;:!?()\-])/iu; -const MONTH_PERIOD_NAME_PATTERN = /(?:^|[\s,.;:!?()\-])(?:за|for|на|in)?\s*([a-zа-яё]+)\s+(20\d{2})(?:\s*г(?:од|ода|\\.)?)?(?=$|[\s,.;:!?()\-])/iu; -const MONTH_PERIOD_NAME_YEAR_FIRST_PATTERN = /(?:^|[\s,.;:!?()\-])(?:за|for|на|in)?\s*(20\d{2})(?:\s*г(?:од|ода|\\.)?)?\s+([a-zа-яё]+)(?=$|[\s,.;:!?()\-])/iu; +const MONTH_PERIOD_NUMERIC_MONTH_YEAR_PATTERN = /(?:^|[\s,.;:!?()\-])(?:за|for|на|в|во|in)?\s*(0?[1-9]|1[0-2])[.\/-](20\d{2})(?=$|[\s,.;:!?()\-])/iu; +const MONTH_PERIOD_NUMERIC_YEAR_MONTH_PATTERN = /(?:^|[\s,.;:!?()\-])(?:за|for|на|в|во|in)?\s*(20\d{2})[.\/-](0?[1-9]|1[0-2])(?=$|[\s,.;:!?()\-])/iu; +const MONTH_PERIOD_NAME_PATTERN = /(?:^|[\s,.;:!?()\-])(?:за|for|на|в|во|in)?\s*([a-zа-яё]+)\s+(20\d{2})(?:\s*г(?:од|ода|\\.)?)?(?=$|[\s,.;:!?()\-])/iu; +const MONTH_PERIOD_NAME_YEAR_FIRST_PATTERN = /(?:^|[\s,.;:!?()\-])(?:за|for|на|в|во|in)?\s*(20\d{2})(?:\s*г(?:од|ода|\\.)?)?\s+([a-zа-яё]+)(?=$|[\s,.;:!?()\-])/iu; const DOC_SIGNAL_PATTERN = "(?:док(?:и|умент|ументы|ументов|умам|ума)|docs?|documents?|docy|doci|doki|dokument(?:y|ov|am|a)?)"; const COUNTERPARTY_TOKEN_NOISE = new Set([ "за", @@ -249,7 +249,7 @@ function resolveMonthByName(rawMonthName) { return 3; if (/^апр|^april|^apr/.test(token)) return 4; - if (/^ма[йя]|^may/.test(token)) + if (/^ма(?:й|я|е)|^may/.test(token)) return 5; if (/^июн|^june|^jun/.test(token)) return 6; @@ -1062,6 +1062,18 @@ function usesAsOfPrimaryWindow(intent) { intent === "receivables_confirmed_as_of_date" || intent === "vat_payable_confirmed_as_of_date"); } +function shouldDefaultAsOfDateToToday(intent) { + return (intent === "account_balance_snapshot" || + intent === "documents_forming_balance" || + intent === "inventory_on_hand_as_of_date" || + intent === "inventory_supplier_stock_overlap_as_of_date" || + intent === "open_items_by_counterparty_or_contract" || + intent === "list_open_contracts" || + intent === "open_contracts_confirmed_as_of_date" || + intent === "payables_confirmed_as_of_date" || + intent === "receivables_confirmed_as_of_date" || + intent === "vat_payable_confirmed_as_of_date"); +} function extractAddressFilters(userMessage, intent) { const rawText = String(userMessage ?? "").trim(); const text = normalizeMojibakeString(rawText); @@ -1255,25 +1267,13 @@ function extractAddressFilters(userMessage, intent) { // For balance-style intents we force as_of_date deterministically: // - explicit as_of has priority; // - else use period_to boundary when provided; - // - else default to today. - if ((intent === "account_balance_snapshot" || - intent === "documents_forming_balance" || - intent === "inventory_on_hand_as_of_date" || - intent === "inventory_purchase_provenance_for_item" || - intent === "inventory_purchase_documents_for_item" || - intent === "inventory_supplier_stock_overlap_as_of_date" || - intent === "inventory_sale_trace_for_item" || - intent === "inventory_purchase_to_sale_chain" || - intent === "inventory_aging_by_purchase_date" || - intent === "payables_confirmed_as_of_date" || - intent === "receivables_confirmed_as_of_date" || - intent === "vat_payable_confirmed_as_of_date") && - !filters.as_of_date) { + // - for strict snapshot intents only, default to today. + if (usesAsOfPrimaryWindow(intent) && !filters.as_of_date) { if (filters.period_to) { filters.as_of_date = filters.period_to; warnings.push("as_of_date_derived_from_period_to"); } - else { + else if (shouldDefaultAsOfDateToToday(intent)) { filters.as_of_date = new Date().toISOString().slice(0, 10); warnings.push("as_of_date_defaulted_today"); } diff --git a/llm_normalizer/backend/dist/services/addressIntentResolver.js b/llm_normalizer/backend/dist/services/addressIntentResolver.js index 02d4638..2fecb07 100644 --- a/llm_normalizer/backend/dist/services/addressIntentResolver.js +++ b/llm_normalizer/backend/dist/services/addressIntentResolver.js @@ -1345,7 +1345,7 @@ function hasSelectedObjectInventoryCue(text) { } function hasSelectedObjectInventoryProvenanceSignal(text) { return (hasSelectedObjectInventoryCue(text) && - /(?:кто\s+(?:(?:это|этот\s+товар|эту\s+позицию)\s+)?(?:нам\s+)?поставил|кто\s+(?:нам\s+)?поставил\s+(?:это|этот\s+товар|эту\s+позицию)|от\s+какого\s+поставщика|у\s+какого\s+поставщика|от\s+кого\s+куплен|supplier|vendor|поставщик)/iu.test(text)); + /(?:кто\s+(?:(?:это|этот\s+товар|эту\s+позицию)\s+)?(?:нам\s+)?поставил|кто\s+(?:нам\s+)?поставил\s+(?:это|этот\s+товар|эту\s+позицию)|от\s+какого\s+поставщика|у\s+какого\s+поставщика|от\s+кого\s+куплен|у\s+кого\s+купили|где\s+(?:мы\s+)?купили(?:\s+(?:это|его|этот\s+товар|эту\s+позицию))?|где\s+куплено|supplier|vendor|поставщик)/iu.test(text)); } function hasSelectedObjectInventoryPurchaseDocumentsSignal(text) { return (hasSelectedObjectInventoryCue(text) && @@ -1353,8 +1353,8 @@ function hasSelectedObjectInventoryPurchaseDocumentsSignal(text) { } function hasInventoryProvenanceSignalV2(text) { const hasItemCue = /(?:товар|номенклатур|sku|item|product|остат(?:ок|ки)|склад)/iu.test(text); - const hasSupplierCue = /(?:от\s+какого\s+поставщика|у\s+какого\s+поставщика|от\s+кого\s+куплен|кто\s+(?:нам\s+)?поставил|кем\s+поставлен|поставщик|supplier|vendor)/iu.test(text); - const hasPurchaseCue = /(?:куплен(?:ы|а)?|закупк|происхождени|откуда|когда\s+был\s+куплен|когда\s+куплен|дата\s+закупк|кто\s+(?:нам\s+)?поставил|кем\s+поставлен|поставлен(?:ы|а)?|purchase\s+provenance|purchase\s+date)/iu.test(text); + const hasSupplierCue = /(?:от\s+какого\s+поставщика|у\s+какого\s+поставщика|от\s+кого\s+куплен|у\s+кого\s+купили|где\s+(?:мы\s+)?купили(?:\s+(?:это|его|товар|позицию))?|где\s+куплено|кто\s+(?:нам\s+)?поставил|кем\s+поставлен|поставщик|supplier|vendor)/iu.test(text); + const hasPurchaseCue = /(?:куплен(?:ы|а|о)?|закупк|происхождени|откуда|где\s+(?:мы\s+)?купили(?:\s+(?:это|его|товар|позицию))?|где\s+куплено|когда\s+был\s+куплен|когда\s+куплен|дата\s+закупк|кто\s+(?:нам\s+)?поставил|кем\s+поставлен|поставлен(?:ы|а)?|purchase\s+provenance|purchase\s+date)/iu.test(text); return hasItemCue && hasSupplierCue && hasPurchaseCue; } function hasInventoryPurchaseDateSignal(text) { diff --git a/llm_normalizer/backend/dist/services/addressQueryClassifier.js b/llm_normalizer/backend/dist/services/addressQueryClassifier.js index d819f0e..c230035 100644 --- a/llm_normalizer/backend/dist/services/addressQueryClassifier.js +++ b/llm_normalizer/backend/dist/services/addressQueryClassifier.js @@ -265,6 +265,12 @@ function hasAddressFollowupSignal(text) { } return false; } +function hasSelectedObjectInventoryFollowupSignal(text) { + if (!/(?:по\s+выбранному\s+объекту|по\s+выбранной\s+позиции)/iu.test(text)) { + return false; + } + return /(?:у\s+кого\s+купили|где\s+(?:мы\s+)?купили(?:\s+(?:это|его|товар|позицию))?|где\s+куплено|кто\s+(?:поставил|продал)|кому\s+(?:продали|реализовали)|когда\s+(?:примерно\s+)?купили|по\s+каким\s+документам\s+.*купили)/iu.test(text); +} function hasDocsOrBankSignal(text) { return /(?:док(?:и|умент|ументы|ументов)|docs?|documents?|банк|выписк|платеж|платёж|оплат|поступлен|списан|транзак|transactions?|bank\s+ops|bank\s+operations?)/iu.test(text); } @@ -377,6 +383,7 @@ function detectAddressQuestionMode(userMessage) { const hasManagementSignal = hasManagementProfileSignal(text); const hasLooseByAnchor = hasLooseByAnchorMention(text); const hasFollowupSignal = hasAddressFollowupSignal(text); + const hasSelectedObjectInventoryFollowup = hasSelectedObjectInventoryFollowupSignal(text); const hasAccountCode = hasAccountCodeAnchor(text); if (hasAddressAction && (hasAddressEntity || hasAccountCode) && !hasDeepReasoning) { return { @@ -385,6 +392,13 @@ function detectAddressQuestionMode(userMessage) { reasons: ["address_action_detected", "address_entity_detected"] }; } + if (hasSelectedObjectInventoryFollowup && !hasDeepReasoning) { + return { + mode: "address_query", + confidence: "medium", + reasons: ["selected_object_inventory_followup_detected"] + }; + } if (hasManagementSignal && !hasDeepReasoning) { return { mode: "address_query", diff --git a/llm_normalizer/backend/dist/services/address_runtime/decomposeStage.js b/llm_normalizer/backend/dist/services/address_runtime/decomposeStage.js index e6f192f..9d0fe27 100644 --- a/llm_normalizer/backend/dist/services/address_runtime/decomposeStage.js +++ b/llm_normalizer/backend/dist/services/address_runtime/decomposeStage.js @@ -257,7 +257,7 @@ function hasSelectedObjectInventorySignal(text) { return /(?:по\s+выбранному\s+объекту|for\s+selected\s+object)/iu.test(String(text ?? "")); } function hasInventorySupplierFollowupCue(text) { - return /(?:кто\s+(?:(?:это|этот\s+товар|эту\s+позицию)\s+)?(?:нам\s+)?поставил|кто\s+(?:нам\s+)?поставил\s+(?:это|этот\s+товар|эту\s+позицию)|от\s+какого\s+поставщика|у\s+какого\s+поставщика|от\s+кого\s+куплен|supplier|vendor|поставщик)/iu.test(String(text ?? "")); + return /(?:кто\s+(?:(?:это|этот\s+товар|эту\s+позицию)\s+)?(?:нам\s+)?поставил|кто\s+(?:нам\s+)?поставил\s+(?:это|этот\s+товар|эту\s+позицию)|от\s+какого\s+поставщика|у\s+какого\s+поставщика|от\s+кого\s+куплен|у\s+кого\s+купили|где\s+(?:мы\s+)?купили(?:\s+(?:это|его|этот\s+товар|эту\s+позицию))?|где\s+куплено|supplier|vendor|поставщик)/iu.test(String(text ?? "")); } function hasInventoryPurchaseDocumentsFollowupCue(text) { return /(?:по\s+каким\s+документам\s+(?:это|его|этот\s+товар|эту\s+позицию)\s+купили|по\s+каким\s+документам\s+(?:был\s+)?куплен|какими\s+документами\s+(?:это|его|этот\s+товар|эту\s+позицию)\s+купили|какими\s+документами\s+(?:был\s+)?куплен|покажи\s+документы\s+по\s+(?:этой\s+позиции|этому\s+товару|ней|нему)|документы\s+по\s+(?:этой\s+позиции|этому\s+товару|ней|нему)|purchase\s+documents|documents\s+of\s+purchase|through\s+which\s+documents)/iu.test(String(text ?? "")); @@ -674,6 +674,8 @@ function deriveIntentWithFollowupContext(detectedIntent, userMessage, followupCo const inventorySelectedObjectFollowup = hasSelectedObjectInventorySignal(normalizedMessage) || (previousIsInventoryFamily && hasFollowupSignal); if (inventorySelectedObjectFollowup && hasInventorySupplierFollowupCue(normalizedMessage)) { if (detectedIntent.intent === "unknown" || + detectedIntent.intent === "list_documents_by_counterparty" || + detectedIntent.intent === "list_documents_by_contract" || detectedIntent.intent === "inventory_on_hand_as_of_date" || detectedIntent.intent === previousIntent) { return { diff --git a/llm_normalizer/backend/dist/services/assistantService.js b/llm_normalizer/backend/dist/services/assistantService.js index 1742578..eade237 100644 --- a/llm_normalizer/backend/dist/services/assistantService.js +++ b/llm_normalizer/backend/dist/services/assistantService.js @@ -3278,6 +3278,12 @@ function hasPredecomposeExplicitDrilldownSignal(text) { const source = String(text ?? ""); return ADDRESS_DOCS_SIGNAL_PATTERN.test(source) || ADDRESS_BANK_SIGNAL_PATTERN.test(source) || ADDRESS_CONTRACT_SIGNAL_PATTERN.test(source); } +function hasSelectedObjectInventoryFollowupSignalForPredecompose(text) { + return /(?:по\s+выбранному\s+объекту|по\s+этой\s+позиции|по\s+этому\s+товару|selected\s+object)/iu.test(String(text ?? "")); +} +function isInventorySelectedObjectFollowupIntent(intent) { + return intent === "inventory_purchase_provenance_for_item" || intent === "inventory_purchase_documents_for_item"; +} function hasSameDateAccountFollowupSignalForPredecompose(text) { const source = String(text ?? ""); const hasSameDate = /(?:на\s+ту\s+же\s+дат[ауеы]|на\s+эту\s+же\s+дат[ауеы]|та\s+же\s+дата|same\s+date|the\s+same\s+date|as\s+of\s+same\s+date)/iu.test(source); @@ -3453,6 +3459,26 @@ async function runAddressLlmPreDecompose(normalizerService, payload, userMessage sanitizedUserMessage }, userMessage); } + const sourceHasSelectedObjectInventoryFollowup = hasSelectedObjectInventoryFollowupSignalForPredecompose(repairedSourceMessage || userMessage); + const candidateHasSelectedObjectInventoryFollowup = hasSelectedObjectInventoryFollowupSignalForPredecompose(candidate); + const candidateInjectsGenericDocsIntent = candidateIntentResolution.intent === "list_documents_by_counterparty" || + candidateIntentResolution.intent === "list_documents_by_contract"; + if (sourceHasSelectedObjectInventoryFollowup && + isInventorySelectedObjectFollowupIntent(sourceIntentResolution.intent) && + !candidateHasSelectedObjectInventoryFollowup && + candidateInjectsGenericDocsIntent) { + return attachAddressPredecomposeContract({ + ...baseMeta, + attempted: true, + applied: false, + traceId: normalized?.trace_id ?? null, + llmCanonicalCandidateDetected: true, + effectiveMessage: userMessage, + reason: "normalized_fragment_rejected_selected_object_context_loss", + fallbackRuleHit: null, + sanitizedUserMessage + }, userMessage); + } const sourceAnchorQuality = evaluateAddressAnchorQuality(repairedSourceMessage || userMessage); const candidateAnchorQuality = evaluateAddressAnchorQuality(candidate); const sameIntentForAnchorSafety = sourceAnchorQuality.intent !== "unknown" && sourceAnchorQuality.intent === candidateAnchorQuality.intent; diff --git a/llm_normalizer/backend/src/services/addressFilterExtractor.ts b/llm_normalizer/backend/src/services/addressFilterExtractor.ts index b1eb296..a158b01 100644 --- a/llm_normalizer/backend/src/services/addressFilterExtractor.ts +++ b/llm_normalizer/backend/src/services/addressFilterExtractor.ts @@ -29,13 +29,13 @@ const YEAR_PERIOD_SHORT_BARE_PATTERN = const YEAR_PERIOD_ANY_PATTERN = /(?:^|[\s,.;:!?()\-])((?:19|20)\d{2})(?!\s*(?:[-‐‑‒–—―−]|до|to|по)\s*(?:19|20)\d{2})(?![.\/-]\d)(?:\s*(?:г(?:од|ода)?\.?|year))?(?=$|[\s,.;:!?()\-])/iu; const MONTH_PERIOD_NUMERIC_MONTH_YEAR_PATTERN = - /(?:^|[\s,.;:!?()\-])(?:за|for|на|in)?\s*(0?[1-9]|1[0-2])[.\/-](20\d{2})(?=$|[\s,.;:!?()\-])/iu; + /(?:^|[\s,.;:!?()\-])(?:за|for|на|в|во|in)?\s*(0?[1-9]|1[0-2])[.\/-](20\d{2})(?=$|[\s,.;:!?()\-])/iu; const MONTH_PERIOD_NUMERIC_YEAR_MONTH_PATTERN = - /(?:^|[\s,.;:!?()\-])(?:за|for|на|in)?\s*(20\d{2})[.\/-](0?[1-9]|1[0-2])(?=$|[\s,.;:!?()\-])/iu; + /(?:^|[\s,.;:!?()\-])(?:за|for|на|в|во|in)?\s*(20\d{2})[.\/-](0?[1-9]|1[0-2])(?=$|[\s,.;:!?()\-])/iu; const MONTH_PERIOD_NAME_PATTERN = - /(?:^|[\s,.;:!?()\-])(?:за|for|на|in)?\s*([a-zа-яё]+)\s+(20\d{2})(?:\s*г(?:од|ода|\\.)?)?(?=$|[\s,.;:!?()\-])/iu; + /(?:^|[\s,.;:!?()\-])(?:за|for|на|в|во|in)?\s*([a-zа-яё]+)\s+(20\d{2})(?:\s*г(?:од|ода|\\.)?)?(?=$|[\s,.;:!?()\-])/iu; const MONTH_PERIOD_NAME_YEAR_FIRST_PATTERN = - /(?:^|[\s,.;:!?()\-])(?:за|for|на|in)?\s*(20\d{2})(?:\s*г(?:од|ода|\\.)?)?\s+([a-zа-яё]+)(?=$|[\s,.;:!?()\-])/iu; + /(?:^|[\s,.;:!?()\-])(?:за|for|на|в|во|in)?\s*(20\d{2})(?:\s*г(?:од|ода|\\.)?)?\s+([a-zа-яё]+)(?=$|[\s,.;:!?()\-])/iu; const DOC_SIGNAL_PATTERN = "(?:док(?:и|умент|ументы|ументов|умам|ума)|docs?|documents?|docy|doci|doki|dokument(?:y|ov|am|a)?)"; const COUNTERPARTY_TOKEN_NOISE = new Set([ @@ -282,7 +282,7 @@ function resolveMonthByName(rawMonthName: string): number | undefined { if (/^фев|^february|^feb/.test(token)) return 2; if (/^мар|^march|^mar/.test(token)) return 3; if (/^апр|^april|^apr/.test(token)) return 4; - if (/^ма[йя]|^may/.test(token)) return 5; + if (/^ма(?:й|я|е)|^may/.test(token)) return 5; if (/^июн|^june|^jun/.test(token)) return 6; if (/^июл|^july|^jul/.test(token)) return 7; if (/^авг|^august|^aug/.test(token)) return 8; @@ -1211,6 +1211,8 @@ function requiredFiltersByIntent(intent: AddressIntent): Array { expect(executeAddressMcpQueryMock).toHaveBeenCalledTimes(2); }); + it("uses analysis date hint for canonical item provenance wording without explicit date", async () => { + executeAddressMcpQueryMock.mockResolvedValueOnce({ + fetched_rows: 1, + matched_rows: 1, + raw_rows: [ + { + Period: "2019-02-12T00:00:00Z", + Registrator: "Поступление товаров и услуг 00000000003 от 12.02.2019 0:00:00", + AccountDt: "41.01", + AccountKt: "60.01", + Amount: 3724.17, + SubcontoDt1: "Столешница 600*3050*26 дуб ниагара", + SubcontoDt3: "Основной склад", + SubcontoKt1: "Торговый дом \\Союз", + SubcontoKt2: "Договор поставки № 12 от 01.02.2019", + Organization: "ООО \\Альтернатива Плюс\\" + } + ], + rows: [], + error: null + }); + + const service = new AddressQueryService(); + const result = await service.tryHandle("От какого поставщика куплен товар Столешница 600*3050*26 дуб ниагара", { + analysisDateHint: "2019-03-31" + }); + + expect(result?.handled).toBe(true); + expect(result?.response_type).toBe("FACTUAL_SUMMARY"); + expect(result?.debug.detected_intent).toBe("inventory_purchase_provenance_for_item"); + expect(result?.debug.extracted_filters?.item).toBe("Столешница 600*3050*26 дуб ниагара"); + expect(result?.debug.extracted_filters?.as_of_date).toBe("2019-03-31"); + expect(result?.debug.reasons).toContain("as_of_date_from_analysis_context"); + expect(String(result?.reply_text ?? "")).toContain("Торговый дом \\Союз"); + }); + it("handles selected-object supplier slang 'кто это поставил нам' as provenance follow-up", async () => { executeAddressMcpQueryMock.mockResolvedValueOnce({ fetched_rows: 1, @@ -160,6 +196,153 @@ describe("inventory selected-object follow-up", () => { expect(String(result?.reply_text ?? "")).toContain("Торговый дом \\Союз МСК\\"); }); + it("handles selected-object colloquial supplier wording 'у кого купили' as provenance follow-up", async () => { + executeAddressMcpQueryMock.mockResolvedValueOnce({ + fetched_rows: 1, + matched_rows: 1, + raw_rows: [ + { + Period: "2016-07-20T00:00:00Z", + Registrator: "Поступление товаров и услуг 00000000011 от 20.07.2016 0:00:00", + AccountDt: "41.01", + AccountKt: "60.01", + Amount: 695360, + SubcontoDt1: "Рабочая станция универсального специалиста (индивидуальное изготовление)", + SubcontoDt3: "Основной склад", + SubcontoKt1: "ООО \\Производство мебели\\", + SubcontoKt2: "Договор поставки № 7 от 15.07.2016", + Organization: "ООО \\Альтернатива Плюс\\" + } + ], + rows: [], + error: null + }); + + const service = new AddressQueryService(); + const result = await service.tryHandle( + 'По выбранному объекту "Рабочая станция универсального специалиста (индивидуальное изготовление)": у кого купили', + { + followupContext: { + previous_intent: "inventory_on_hand_as_of_date", + previous_filters: { + as_of_date: "2016-07-31", + period_from: "2016-07-01", + period_to: "2016-07-31", + warehouse: "Основной склад", + organization: "ООО \\Альтернатива Плюс\\" + }, + previous_anchor_type: "unknown", + previous_anchor_value: null + } + } + ); + + expect(result?.handled).toBe(true); + expect(result?.response_type).toBe("FACTUAL_SUMMARY"); + expect(result?.debug.detected_intent).toBe("inventory_purchase_provenance_for_item"); + expect(result?.debug.extracted_filters?.item).toBe("Рабочая станция универсального специалиста (индивидуальное изготовление)"); + expect(result?.debug.extracted_filters?.as_of_date).toBe("2016-07-31"); + expect(String(result?.reply_text ?? "")).toContain("ООО \\Производство мебели\\"); + }); + + it("handles selected-object wording 'где мы купили это' as provenance follow-up", async () => { + executeAddressMcpQueryMock.mockResolvedValueOnce({ + fetched_rows: 1, + matched_rows: 1, + raw_rows: [ + { + Period: "2016-05-20T00:00:00Z", + Registrator: "Поступление товаров и услуг 00000000009 от 20.05.2016 0:00:00", + AccountDt: "41.01", + AccountKt: "60.01", + Amount: 695360, + SubcontoDt1: "Рабочая станция универсального специалиста (индивидуальное изготовление)", + SubcontoDt3: "Основной склад", + SubcontoKt1: "ООО \\Производство мебели\\", + SubcontoKt2: "Договор поставки № 5 от 16.05.2016", + Organization: "ООО \\Альтернатива Плюс\\" + } + ], + rows: [], + error: null + }); + + const service = new AddressQueryService(); + const result = await service.tryHandle( + 'По выбранному объекту "Рабочая станция универсального специалиста (индивидуальное изготовление)": где мы купили это', + { + followupContext: { + previous_intent: "inventory_on_hand_as_of_date", + previous_filters: { + as_of_date: "2016-05-31", + period_from: "2016-05-01", + period_to: "2016-05-31", + warehouse: "Основной склад", + organization: "ООО \\Альтернатива Плюс\\" + }, + previous_anchor_type: "unknown", + previous_anchor_value: null + } + } + ); + + expect(result?.handled).toBe(true); + expect(result?.response_type).toBe("FACTUAL_SUMMARY"); + expect(result?.debug.detected_intent).toBe("inventory_purchase_provenance_for_item"); + expect(result?.debug.extracted_filters?.item).toBe("Рабочая станция универсального специалиста (индивидуальное изготовление)"); + expect(result?.debug.extracted_filters?.as_of_date).toBe("2016-05-31"); + expect(String(result?.reply_text ?? "")).toContain("ООО \\Производство мебели\\"); + }); + + it("handles selected-object wording 'где куплено!!' as provenance follow-up", async () => { + executeAddressMcpQueryMock.mockResolvedValueOnce({ + fetched_rows: 1, + matched_rows: 1, + raw_rows: [ + { + Period: "2016-05-20T00:00:00Z", + Registrator: "Поступление товаров и услуг 00000000009 от 20.05.2016 0:00:00", + AccountDt: "41.01", + AccountKt: "60.01", + Amount: 695360, + SubcontoDt1: "Рабочая станция универсального специалиста (индивидуальное изготовление)", + SubcontoDt3: "Основной склад", + SubcontoKt1: "ООО \\Производство мебели\\", + SubcontoKt2: "Договор поставки № 5 от 16.05.2016", + Organization: "ООО \\Альтернатива Плюс\\" + } + ], + rows: [], + error: null + }); + + const service = new AddressQueryService(); + const result = await service.tryHandle( + 'По выбранному объекту "Рабочая станция универсального специалиста (индивидуальное изготовление)": где куплено!!', + { + followupContext: { + previous_intent: "inventory_on_hand_as_of_date", + previous_filters: { + as_of_date: "2016-05-31", + period_from: "2016-05-01", + period_to: "2016-05-31", + warehouse: "Основной склад", + organization: "ООО \\Альтернатива Плюс\\" + }, + previous_anchor_type: "unknown", + previous_anchor_value: null + } + } + ); + + expect(result?.handled).toBe(true); + expect(result?.response_type).toBe("FACTUAL_SUMMARY"); + expect(result?.debug.detected_intent).toBe("inventory_purchase_provenance_for_item"); + expect(result?.debug.extracted_filters?.item).toBe("Рабочая станция универсального специалиста (индивидуальное изготовление)"); + expect(result?.debug.extracted_filters?.as_of_date).toBe("2016-05-31"); + expect(String(result?.reply_text ?? "")).toContain("ООО \\Производство мебели\\"); + }); + it("handles selected-object purchase-doc slang 'по каким документам это купили' as exact purchase-doc follow-up", async () => { executeAddressMcpQueryMock.mockResolvedValueOnce({ fetched_rows: 1, diff --git a/llm_normalizer/backend/tests/addressQueryRuntimeM23.test.ts b/llm_normalizer/backend/tests/addressQueryRuntimeM23.test.ts index d6471f9..cca8395 100644 --- a/llm_normalizer/backend/tests/addressQueryRuntimeM23.test.ts +++ b/llm_normalizer/backend/tests/addressQueryRuntimeM23.test.ts @@ -203,6 +203,39 @@ describe("address query shape classifier", () => { expect(result.intent).toBe("inventory_purchase_provenance_for_item"); }); + it("keeps selected-object colloquial supplier wording 'у кого купили' in inventory provenance intent", () => { + const mode = detectAddressQuestionMode( + 'По выбранному объекту "Рабочая станция универсального специалиста (индивидуальное изготовление)": у кого купили' + ); + const result = resolveAddressIntent( + 'По выбранному объекту "Рабочая станция универсального специалиста (индивидуальное изготовление)": у кого купили' + ); + expect(mode.mode).toBe("address_query"); + expect(result.intent).toBe("inventory_purchase_provenance_for_item"); + }); + + it("keeps selected-object colloquial supplier wording 'где мы купили это' in inventory provenance intent", () => { + const mode = detectAddressQuestionMode( + 'По выбранному объекту "Рабочая станция универсального специалиста (индивидуальное изготовление)": где мы купили это' + ); + const result = resolveAddressIntent( + 'По выбранному объекту "Рабочая станция универсального специалиста (индивидуальное изготовление)": где мы купили это' + ); + expect(mode.mode).toBe("address_query"); + expect(result.intent).toBe("inventory_purchase_provenance_for_item"); + }); + + it("keeps selected-object terse supplier wording 'где куплено!!' in inventory provenance intent", () => { + const mode = detectAddressQuestionMode( + 'По выбранному объекту "Рабочая станция универсального специалиста (индивидуальное изготовление)": где куплено!!' + ); + const result = resolveAddressIntent( + 'По выбранному объекту "Рабочая станция универсального специалиста (индивидуальное изготовление)": где куплено!!' + ); + expect(mode.mode).toBe("address_query"); + expect(result.intent).toBe("inventory_purchase_provenance_for_item"); + }); + it("keeps selected-object purchase-doc slang with 'по каким документам это купили' in purchase-doc intent", () => { const mode = detectAddressQuestionMode( 'По выбранному объекту "Столешница 600*3050*26 дуб ниагара": по каким документам это купили' @@ -2845,6 +2878,14 @@ describe("address filter extraction for balance drilldown", () => { expect(result.warnings).toContain("as_of_date_derived_from_period_to"); }); + it("derives month period for inventory snapshot from prepositional month wording 'в мае 2016'", () => { + const result = extractAddressFilters("Что лежит на складе в мае 2016 года?", "inventory_on_hand_as_of_date"); + expect(result.extracted_filters.period_from).toBe("2016-05-01"); + expect(result.extracted_filters.period_to).toBe("2016-05-31"); + expect(result.extracted_filters.as_of_date).toBe("2016-05-31"); + expect(result.warnings).toContain("period_derived_from_month_phrase"); + }); + it("extracts dotted account by heuristic for docs-forming phrasing without 'счет' keyword", () => { const result = extractAddressFilters( "раскрой остаток 60.01 по документам на конец июля 2020", @@ -3139,6 +3180,16 @@ describe("address filter extraction for balance drilldown", () => { }); describe("address query limited taxonomy and stage diagnostics", { timeout: 15000 }, () => { + it("does not default standalone item provenance questions to today without explicit temporal cue", () => { + const result = extractAddressFilters( + "От какого поставщика куплен товар Столешница 600*3050*26 дуб ниагара", + "inventory_purchase_provenance_for_item" + ); + expect(result.extracted_filters.item).toBe("Столешница 600*3050*26 дуб ниагара"); + expect(result.extracted_filters.as_of_date).toBeUndefined(); + expect(result.warnings).not.toContain("as_of_date_defaulted_today"); + }); + it("injects as_of_date from analysis context when user message has no explicit period", async () => { const service = new AddressQueryService(); const result = await service.tryHandle("Покажи контрагентов с незакрытыми хвостами", { @@ -3924,6 +3975,78 @@ describe("address decompose stage follow-up carryover", () => { expect(result).not.toBeNull(); expect(result?.intent.intent).toBe("inventory_purchase_provenance_for_item"); expect(result?.filters.extracted_filters.as_of_date).toBe("2019-03-31"); + expect( + result?.baseReasons?.includes("intent_adjusted_to_inventory_followup_context") || + result?.intent.reasons.includes("inventory_selected_object_provenance_signal_detected") + ).toBe(true); + }); + + it("promotes selected-object wording 'у кого купили' into inventory provenance with inherited date context", () => { + const result = runAddressDecomposeStage( + 'По выбранному объекту "Рабочая станция универсального специалиста (индивидуальное изготовление)": у кого купили', + { + previous_intent: "inventory_on_hand_as_of_date", + previous_filters: { + as_of_date: "2016-07-31", + period_from: "2016-07-01", + period_to: "2016-07-31", + warehouse: "Основной склад" + }, + previous_anchor_type: "unknown", + previous_anchor_value: null + } + ); + expect(result).not.toBeNull(); + expect(result?.intent.intent).toBe("inventory_purchase_provenance_for_item"); + expect(result?.filters.extracted_filters.as_of_date).toBe("2016-07-31"); + expect( + result?.baseReasons?.includes("intent_adjusted_to_inventory_followup_context") || + result?.intent.reasons.includes("inventory_selected_object_provenance_signal_detected") + ).toBe(true); + }); + + it("promotes selected-object wording 'где мы купили это' into inventory provenance with inherited date context", () => { + const result = runAddressDecomposeStage( + 'По выбранному объекту "Рабочая станция универсального специалиста (индивидуальное изготовление)": где мы купили это', + { + previous_intent: "inventory_on_hand_as_of_date", + previous_filters: { + as_of_date: "2016-05-31", + period_from: "2016-05-01", + period_to: "2016-05-31", + warehouse: "Основной склад" + }, + previous_anchor_type: "unknown", + previous_anchor_value: null + } + ); + expect(result).not.toBeNull(); + expect(result?.intent.intent).toBe("inventory_purchase_provenance_for_item"); + expect(result?.filters.extracted_filters.as_of_date).toBe("2016-05-31"); + expect( + result?.baseReasons?.includes("intent_adjusted_to_inventory_followup_context") || + result?.intent.reasons.includes("inventory_selected_object_provenance_signal_detected") + ).toBe(true); + }); + + it("promotes selected-object wording 'где куплено!!' into inventory provenance with inherited date context", () => { + const result = runAddressDecomposeStage( + 'По выбранному объекту "Рабочая станция универсального специалиста (индивидуальное изготовление)": где куплено!!', + { + previous_intent: "inventory_on_hand_as_of_date", + previous_filters: { + as_of_date: "2016-05-31", + period_from: "2016-05-01", + period_to: "2016-05-31", + warehouse: "Основной склад" + }, + previous_anchor_type: "unknown", + previous_anchor_value: null + } + ); + expect(result).not.toBeNull(); + expect(result?.intent.intent).toBe("inventory_purchase_provenance_for_item"); + expect(result?.filters.extracted_filters.as_of_date).toBe("2016-05-31"); expect( result?.baseReasons?.includes("intent_adjusted_to_inventory_followup_context") || result?.intent.reasons.includes("inventory_selected_object_provenance_signal_detected") diff --git a/llm_normalizer/backend/tests/assistantAddressLlmPredecompose.test.ts b/llm_normalizer/backend/tests/assistantAddressLlmPredecompose.test.ts index d10e5f1..e47a50d 100644 --- a/llm_normalizer/backend/tests/assistantAddressLlmPredecompose.test.ts +++ b/llm_normalizer/backend/tests/assistantAddressLlmPredecompose.test.ts @@ -350,6 +350,105 @@ describe("assistant address llm pre-decompose candidate preference", () => { expect(response.debug?.llm_decomposition_reason).not.toBe("normalized_fragment_applied"); }); + it("rejects selected-object rewrite that drops object context and injects generic documents intent", async () => { + const calls: Array<{ message: string }> = []; + const addressQueryService = { + tryHandle: vi.fn(async (message: string) => { + calls.push({ message }); + return buildAddressLaneResult(message); + }) + } as any; + + const normalizerService = { + normalize: vi.fn(async () => ({ + trace_id: "norm-predecompose-selected-object-context-loss", + ok: true, + normalized: { + schema_version: "normalized_query_v2_0_2", + user_message_raw: 'По выбранному объекту "Рабочая станция универсального специалиста (индивидуальное изготовление)": где мы купили это', + message_in_scope: true, + scope_confidence: "medium", + contains_multiple_tasks: false, + fragments: [ + { + fragment_id: "F1", + raw_fragment_text: 'По выбранному объекту "Рабочая станция универсального специалиста (индивидуальное изготовление)": где мы купили это', + normalized_fragment_text: "Покупка рабочей станции универсального специалиста (индивидуальное изготовление). Кто поставщик и какие документы подтверждают?", + domain_relevance: "in_scope", + business_scope: "company_specific_accounting", + entity_hints: [], + account_hints: [], + document_hints: [], + register_hints: [], + time_scope: { + type: "missing", + value: null, + confidence: "low" + }, + flags: { + has_multi_entity_scope: false, + asks_for_chain_explanation: false, + asks_for_ranking_or_top: false, + asks_for_period_summary: false, + asks_for_rule_check: false, + asks_for_anomaly_scan: false, + asks_for_exact_object_trace: true, + asks_for_evidence: false, + mentions_period_close_context: false + }, + candidate_labels: ["simple_factual"], + confidence: "medium", + execution_readiness: "executable", + clarification_reason: null, + soft_assumption_used: [], + route_status: "routed", + no_route_reason: null + } + ], + discarded_fragments: [], + global_notes: { + needs_clarification: false, + clarification_reason: null + } + }, + raw_model_output: null, + validation: { passed: true, errors: [] }, + usage: { input_tokens: 1, output_tokens: 1, total_tokens: 2 }, + latency_ms: 10, + prompt_version: "normalizer_v2_0_2", + schema_version: "v2_0_2", + request_count_for_case: 1 + })) + } as any; + + const sessions = new AssistantSessionStore(); + const service = new AssistantService( + normalizerService, + sessions as any, + {} as any, + { persistSession: vi.fn() } as any, + addressQueryService + ); + + const response = await service.handleMessage({ + session_id: `asst-predecompose-selected-object-${Date.now()}`, + user_message: 'По выбранному объекту "Рабочая станция универсального специалиста (индивидуальное изготовление)": где мы купили это', + llmProvider: "local", + useMock: false + } as any); + + expect(response.ok).toBe(true); + expect(response.reply_type).toBe("factual"); + expect(calls).toHaveLength(1); + expect(calls[0].message).toBe( + 'По выбранному объекту "Рабочая станция универсального специалиста (индивидуальное изготовление)": где мы купили это' + ); + expect([ + "normalized_fragment_rejected_intent_conflict", + "normalized_fragment_rejected_selected_object_context_loss" + ]).toContain(response.debug?.llm_decomposition_reason); + }); + it("does not treat service verb as counterparty anchor when llm rewrites noisy bank phrase", async () => { const calls: Array<{ message: string }> = []; const addressQueryService = { diff --git a/scripts/domain_case_loop.py b/scripts/domain_case_loop.py index 6e4049c..b831a1c 100644 --- a/scripts/domain_case_loop.py +++ b/scripts/domain_case_loop.py @@ -35,6 +35,31 @@ SCENARIO_PACK_SCHEMA_VERSION = "domain_scenario_pack_v1" ACTIVE_DOMAIN_CONTRACT_SCHEMA_VERSION = "active_domain_contract_v1" AUTONOMOUS_LOOP_SCHEMA_VERSION = "domain_autonomous_loop_v1" +TOP_LEVEL_NOISE_PATTERNS = ( + re.compile(r"^(?:status|статус(?: результата)?)\b", re.IGNORECASE), + re.compile(r"^(?:что учтено|сводка)\b", re.IGNORECASE), + re.compile(r"^блок\s+\d+\b", re.IGNORECASE), + re.compile(r"^(?:подтверждение|опорные документы|сервисно)\b", re.IGNORECASE), +) + +DEFAULT_INVARIANT_SEVERITY: dict[str, str] = { + "wrong_intent": "P0", + "wrong_capability": "P0", + "wrong_followup_action": "P0", + "wrong_recipe": "P0", + "wrong_result_mode": "P0", + "wrong_as_of_date": "P0", + "wrong_period_from": "P0", + "wrong_period_to": "P0", + "missing_required_filter": "P0", + "forbidden_capability_selected": "P0", + "forbidden_recipe_selected": "P0", + "focus_object_missing": "P0", + "wrong_date_scope_state": "P0", + "direct_answer_missing": "P0", + "top_level_noise_present": "P0", +} + def dump_json(payload: Any) -> str: return json.dumps(payload, ensure_ascii=False, indent=2) @@ -268,6 +293,115 @@ def normalize_string_list(raw_values: Any) -> list[str]: return values +def normalize_validation_filters(raw_filters: Any) -> dict[str, str]: + if not isinstance(raw_filters, dict): + return {} + normalized: dict[str, str] = {} + for raw_key, raw_value in raw_filters.items(): + key = str(raw_key or "").strip() + if not key: + continue + if key in {"as_of_date", "period_from", "period_to"}: + normalized_value = normalize_iso_date(raw_value) + if normalized_value: + normalized[key] = normalized_value + continue + text_value = str(raw_value or "").strip() + if text_value: + normalized[key] = text_value + return normalized + + +def normalize_invariant_severity(raw_mapping: Any) -> dict[str, str]: + if not isinstance(raw_mapping, dict): + return {} + normalized: dict[str, str] = {} + for raw_key, raw_value in raw_mapping.items(): + key = str(raw_key or "").strip() + value = str(raw_value or "").strip().upper() + if not key or value not in {"P0", "P1", "WARNING"}: + continue + normalized[key] = value + return normalized + + +def normalize_identifier(value: Any) -> str: + return str(value or "").strip().lower() + + +def identifiers_match(actual: Any, expected: Any) -> bool: + actual_id = normalize_identifier(actual) + expected_id = normalize_identifier(expected) + if not actual_id or not expected_id: + return False + return actual_id == expected_id or actual_id.endswith(expected_id) or expected_id.endswith(actual_id) + + +def identifier_in_list(actual: Any, expected_values: list[str]) -> bool: + return any(identifiers_match(actual, expected) for expected in expected_values if expected) + + +def first_non_empty_lines(text: str, limit: int = 3) -> list[str]: + output: list[str] = [] + for raw_line in str(text or "").splitlines(): + cleaned = raw_line.strip() + if not cleaned: + continue + output.append(cleaned) + if len(output) >= limit: + break + return output + + +def build_node_contract_index(raw_contract: dict[str, Any]) -> dict[str, dict[str, Any]]: + scenario_tree = raw_contract.get("scenario_tree") + if not isinstance(scenario_tree, dict): + return {} + node_index: dict[str, dict[str, Any]] = {} + for section_key in ("root_nodes", "critical_nodes", "supporting_nodes"): + raw_nodes = scenario_tree.get(section_key) + if not isinstance(raw_nodes, list): + continue + for raw_node in raw_nodes: + if not isinstance(raw_node, dict): + continue + node_id = str(raw_node.get("node_id") or "").strip() + if not node_id: + continue + node_index[node_id] = { + "expected_intents": normalize_string_list(raw_node.get("expected_intents")), + "required_answer_shape": str(raw_node.get("expected_answer_shape") or "").strip() or None, + "required_carryover_invariants": normalize_string_list(raw_node.get("required_carryover_invariants")), + "ordering_rule": str(raw_node.get("ordering_rule") or "").strip() or None, + } + return node_index + + +def enrich_step_with_node_contract(raw_step: Any, node_contract_index: dict[str, dict[str, Any]]) -> Any: + if not isinstance(raw_step, dict): + return raw_step + node_id = str(raw_step.get("node_id") or "").strip() + node_defaults = node_contract_index.get(node_id) or {} + if not node_defaults: + return raw_step + enriched = dict(raw_step) + if not enriched.get("expected_intents") and node_defaults.get("expected_intents"): + enriched["expected_intents"] = list(node_defaults["expected_intents"]) + if not enriched.get("required_answer_shape") and node_defaults.get("required_answer_shape"): + enriched["required_answer_shape"] = node_defaults["required_answer_shape"] + merged_invariants = list( + dict.fromkeys( + normalize_string_list(node_defaults.get("required_carryover_invariants")) + + normalize_string_list(enriched.get("required_carryover_invariants")) + ) + ) + if merged_invariants: + enriched["required_carryover_invariants"] = merged_invariants + if not enriched.get("ordering_rule") and node_defaults.get("ordering_rule"): + enriched["ordering_rule"] = node_defaults["ordering_rule"] + return enriched + + def drop_none_values(payload: dict[str, Any]) -> dict[str, Any]: return {key: value for key, value in payload.items() if value is not None} @@ -777,8 +911,18 @@ def build_failed_step_state( "depends_on": step["depends_on"], "question_template": step["question_template"], "question_resolved": question_resolved, + "expected_intents": step.get("expected_intents") or [], "expected_capability": step.get("expected_capability"), + "expected_recipe": step.get("expected_recipe"), "expected_result_mode": step.get("expected_result_mode"), + "required_filters": step.get("required_filters") or {}, + "forbidden_capabilities": step.get("forbidden_capabilities") or [], + "forbidden_recipes": step.get("forbidden_recipes") or [], + "required_state_objects": step.get("required_state_objects") or [], + "required_answer_shape": step.get("required_answer_shape"), + "forbidden_answer_patterns": step.get("forbidden_answer_patterns") or [], + "required_carryover_invariants": step.get("required_carryover_invariants") or [], + "invariant_severity": step.get("invariant_severity") or {}, "reply_type": "backend_error" if status == "blocked" else "unresolved_followup", "assistant_message_id": None, "trace_id": None, @@ -790,6 +934,11 @@ def build_failed_step_state( "route_expectation_status": None, "result_mode": None, "response_type": None, + "assistant_text": "", + "top_non_empty_lines": [], + "actual_direct_answer": None, + "extracted_filters": {}, + "focus_object": None, "fallback_type": failure_type, "mcp_call_status": None, "balance_confirmed": None, @@ -798,6 +947,11 @@ def build_failed_step_state( "date_scope": None, "organization_scope": None, "entries": [], + "execution_status": status, + "acceptance_status": status, + "violated_invariants": [], + "warnings": [], + "hard_fail": status == "blocked", "status": status, "failure_type": failure_type, "error_message": error_message, @@ -816,13 +970,22 @@ def normalize_step_definition(index: int, raw_step: Any) -> dict[str, Any]: "question_template": question_template, "depends_on": [], "analysis_context": {}, + "expected_intents": [], "expected_capability": None, + "expected_recipe": None, "expected_result_mode": None, "question_id": None, "node_id": None, "node_role": None, "paraphrase_family": None, + "required_filters": {}, + "forbidden_capabilities": [], + "forbidden_recipes": [], + "required_state_objects": [], + "required_answer_shape": None, + "forbidden_answer_patterns": [], "required_carryover_invariants": [], + "invariant_severity": {}, "ordering_rule": None, } if not isinstance(raw_step, dict): @@ -845,13 +1008,24 @@ def normalize_step_definition(index: int, raw_step: Any) -> dict[str, Any]: "question_template": question_template, "depends_on": depends_on, "analysis_context": normalize_analysis_context(raw_step.get("analysis_context")), + "expected_intents": normalize_string_list(raw_step.get("expected_intents") or raw_step.get("expected_intent")), "expected_capability": str(raw_step.get("expected_capability") or "").strip() or None, + "expected_recipe": str(raw_step.get("expected_recipe") or raw_step.get("expected_selected_recipe") or "").strip() or None, "expected_result_mode": str(raw_step.get("expected_result_mode") or "").strip() or None, "question_id": str(raw_step.get("question_id") or "").strip() or None, "node_id": str(raw_step.get("node_id") or "").strip() or None, "node_role": str(raw_step.get("node_role") or raw_step.get("role") or "").strip() or None, "paraphrase_family": str(raw_step.get("paraphrase_family") or raw_step.get("wording_family") or "").strip() or None, + "required_filters": normalize_validation_filters(raw_step.get("required_filters")), + "forbidden_capabilities": normalize_string_list(raw_step.get("forbidden_capabilities")), + "forbidden_recipes": normalize_string_list(raw_step.get("forbidden_recipes")), + "required_state_objects": normalize_string_list(raw_step.get("required_state_objects")), + "required_answer_shape": ( + str(raw_step.get("required_answer_shape") or raw_step.get("expected_answer_shape") or "").strip() or None + ), + "forbidden_answer_patterns": normalize_string_list(raw_step.get("forbidden_answer_patterns")), "required_carryover_invariants": normalize_string_list(raw_step.get("required_carryover_invariants")), + "invariant_severity": normalize_invariant_severity(raw_step.get("invariant_severity")), "ordering_rule": str(raw_step.get("ordering_rule") or "").strip() or None, } @@ -936,6 +1110,7 @@ def convert_active_domain_contract_to_pack(raw_contract: dict[str, Any]) -> dict raise RuntimeError("Active domain contract must define `runtime_domain`") domain_id = str(raw_contract.get("domain_id") or runtime_domain).strip() or runtime_domain + node_contract_index = build_node_contract_index(raw_contract) bindings = build_active_contract_bindings(raw_contract) bindings.update(normalize_bindings(orchestration_pack.get("bindings"))) @@ -947,6 +1122,18 @@ def convert_active_domain_contract_to_pack(raw_contract: dict[str, Any]) -> dict pack_id = str(orchestration_pack.get("pack_id") or "").strip() or slugify_case_id(domain_id, None) title = str(orchestration_pack.get("title") or raw_contract.get("title") or domain_id).strip() or domain_id description = str(orchestration_pack.get("description") or raw_contract.get("domain_goal") or "").strip() or None + enriched_scenarios: list[Any] = [] + for raw_scenario in raw_scenarios: + if not isinstance(raw_scenario, dict): + enriched_scenarios.append(raw_scenario) + continue + enriched_scenario = dict(raw_scenario) + raw_steps = enriched_scenario.get("steps") + if isinstance(raw_steps, list): + enriched_scenario["steps"] = [ + enrich_step_with_node_contract(raw_step, node_contract_index) for raw_step in raw_steps + ] + enriched_scenarios.append(enriched_scenario) return { "schema_version": SCENARIO_PACK_SCHEMA_VERSION, @@ -958,7 +1145,7 @@ def convert_active_domain_contract_to_pack(raw_contract: dict[str, Any]) -> dict "description": description, "analysis_context": analysis_context, "bindings": bindings, - "scenarios": raw_scenarios, + "scenarios": enriched_scenarios, "scenario_tree": raw_contract.get("scenario_tree") if isinstance(raw_contract.get("scenario_tree"), dict) else {}, "acceptance_contract": ( raw_contract.get("acceptance_contract") if isinstance(raw_contract.get("acceptance_contract"), dict) else {} @@ -1135,7 +1322,7 @@ def extract_structured_entries(answer_text: str) -> list[dict[str, Any]]: return entries -def derive_step_status(reply_type: str | None, debug_payload: dict[str, Any]) -> str: +def derive_step_execution_status(reply_type: str | None, debug_payload: dict[str, Any]) -> str: if reply_type == "backend_error": return "blocked" capability_route_mode = str(debug_payload.get("capability_route_mode") or "").strip() @@ -1156,6 +1343,153 @@ def derive_step_status(reply_type: str | None, debug_payload: dict[str, Any]) -> return "needs_exact_capability" +def derive_step_status(reply_type: str | None, debug_payload: dict[str, Any]) -> str: + return derive_step_execution_status(reply_type, debug_payload) + + +def should_require_direct_answer(step_state: dict[str, Any]) -> bool: + required_answer_shape = str(step_state.get("required_answer_shape") or "").strip() + if required_answer_shape: + return True + return str(step_state.get("node_role") or "").strip() in {"root", "critical_child"} + + +def is_top_level_noise_line(line: str) -> bool: + cleaned = str(line or "").strip() + if not cleaned: + return False + return any(pattern.search(cleaned) for pattern in TOP_LEVEL_NOISE_PATTERNS) + + +def derive_invariant_severity(step_state: dict[str, Any], violation_code: str) -> str: + overrides = step_state.get("invariant_severity") + if isinstance(overrides, dict): + override = str(overrides.get(violation_code) or "").strip().upper() + if override in {"P0", "P1", "WARNING"}: + return override + return DEFAULT_INVARIANT_SEVERITY.get(violation_code, "P1") + + +def acceptance_status_from_execution(execution_status: str, hard_fail: bool) -> str: + if execution_status == "blocked": + return "blocked" + if execution_status == "needs_exact_capability": + return "needs_exact_capability" + if hard_fail: + return "rejected" + if execution_status == "exact": + return "validated" + return "rejected" + + +def validate_step_contract(step_state: dict[str, Any]) -> dict[str, Any]: + state = dict(step_state) + execution_status = str(state.get("execution_status") or state.get("status") or "").strip() or "needs_exact_capability" + actual_direct_answer = str(state.get("actual_direct_answer") or "").strip() + top_non_empty_lines = state.get("top_non_empty_lines") if isinstance(state.get("top_non_empty_lines"), list) else [] + extracted_filters = state.get("extracted_filters") if isinstance(state.get("extracted_filters"), dict) else {} + date_scope = state.get("date_scope") if isinstance(state.get("date_scope"), dict) else {} + violated_invariants: list[str] = [] + warnings: list[str] = [] + + expected_intents = normalize_string_list(state.get("expected_intents")) + if expected_intents and not identifier_in_list(state.get("detected_intent"), expected_intents): + violated_invariants.append("wrong_intent") + + expected_capability = state.get("expected_capability") + if expected_capability and not identifiers_match(state.get("capability_id"), expected_capability): + required_state_objects = set(normalize_string_list(state.get("required_state_objects"))) + required_state_objects.update(normalize_string_list(state.get("required_carryover_invariants"))) + violation_code = "wrong_followup_action" if "focus_object" in required_state_objects else "wrong_capability" + violated_invariants.append(violation_code) + + expected_recipe = state.get("expected_recipe") + if expected_recipe and not identifiers_match(state.get("selected_recipe"), expected_recipe): + violated_invariants.append("wrong_recipe") + + expected_result_mode = str(state.get("expected_result_mode") or "").strip() + actual_result_mode = str(state.get("result_mode") or "").strip() + if expected_result_mode and actual_result_mode and normalize_identifier(actual_result_mode) != normalize_identifier(expected_result_mode): + violated_invariants.append("wrong_result_mode") + + for forbidden_capability in normalize_string_list(state.get("forbidden_capabilities")): + if identifiers_match(state.get("capability_id"), forbidden_capability): + violated_invariants.append("forbidden_capability_selected") + break + + for forbidden_recipe in normalize_string_list(state.get("forbidden_recipes")): + if identifiers_match(state.get("selected_recipe"), forbidden_recipe): + violated_invariants.append("forbidden_recipe_selected") + break + + required_filters = normalize_validation_filters(state.get("required_filters")) + required_as_of_date_from_context = normalize_iso_date( + (state.get("analysis_context") or {}).get("as_of_date") if isinstance(state.get("analysis_context"), dict) else None + ) + if required_as_of_date_from_context and "as_of_date" not in required_filters: + required_filters["as_of_date"] = required_as_of_date_from_context + + for filter_key, expected_value in required_filters.items(): + actual_value = "" + if filter_key in {"as_of_date", "period_from", "period_to"}: + actual_value = normalize_iso_date(extracted_filters.get(filter_key)) + else: + actual_value = str(extracted_filters.get(filter_key) or "").strip() + if not actual_value: + violated_invariants.append("missing_required_filter") + continue + if actual_value != expected_value: + if filter_key == "as_of_date": + violated_invariants.append("wrong_as_of_date") + elif filter_key == "period_from": + violated_invariants.append("wrong_period_from") + elif filter_key == "period_to": + violated_invariants.append("wrong_period_to") + else: + violated_invariants.append("missing_required_filter") + + required_state_objects = set(normalize_string_list(state.get("required_state_objects"))) + required_state_objects.update( + item + for item in normalize_string_list(state.get("required_carryover_invariants")) + if item in {"focus_object"} + ) + focus_object = state.get("focus_object") if isinstance(state.get("focus_object"), dict) else {} + if "focus_object" in required_state_objects: + has_focus_object = bool(str(focus_object.get("object_id") or "").strip() or str(focus_object.get("label") or "").strip()) + if not has_focus_object: + violated_invariants.append("focus_object_missing") + + if "date_scope" in normalize_string_list(state.get("required_carryover_invariants")) and required_filters.get("as_of_date"): + current_date_scope = normalize_iso_date(date_scope.get("as_of_date")) + if current_date_scope and current_date_scope != required_filters["as_of_date"]: + violated_invariants.append("wrong_date_scope_state") + + if should_require_direct_answer(state): + if not actual_direct_answer or is_top_level_noise_line(actual_direct_answer): + violated_invariants.append("direct_answer_missing") + + first_top_line = str(top_non_empty_lines[0] if top_non_empty_lines else "").strip() + if first_top_line and is_top_level_noise_line(first_top_line): + violated_invariants.append("top_level_noise_present") + + forbidden_answer_patterns = normalize_string_list(state.get("forbidden_answer_patterns")) + if forbidden_answer_patterns and top_non_empty_lines: + joined_top_block = "\n".join(str(line) for line in top_non_empty_lines) + for pattern in forbidden_answer_patterns: + if pattern and re.search(pattern, joined_top_block, flags=re.IGNORECASE): + warnings.append(f"forbidden_answer_pattern:{pattern}") + + unique_violations = list(dict.fromkeys(violated_invariants)) + hard_fail = any(derive_invariant_severity(state, code) == "P0" for code in unique_violations) + state["violated_invariants"] = unique_violations + state["warnings"] = list(dict.fromkeys(warnings)) + state["hard_fail"] = hard_fail + state["acceptance_status"] = acceptance_status_from_execution(execution_status, hard_fail) + state["status"] = state["acceptance_status"] + return state + + def build_scenario_step_state( *, scenario_id: str, @@ -1167,6 +1501,9 @@ def build_scenario_step_state( entries: list[dict[str, Any]], ) -> dict[str, Any]: debug_payload = turn_artifact.get("technical_debug_payload") + if not isinstance(debug_payload, dict): + assistant_debug = turn_artifact.get("assistant_message", {}).get("debug") if isinstance(turn_artifact.get("assistant_message"), dict) else None + debug_payload = assistant_debug if isinstance(assistant_debug, dict) else {} debug = debug_payload if isinstance(debug_payload, dict) else {} session_summary = turn_artifact.get("session_summary") summary = session_summary if isinstance(session_summary, dict) else {} @@ -1177,6 +1514,9 @@ def build_scenario_step_state( assistant_message = turn_artifact.get("assistant_message") assistant_item = assistant_message if isinstance(assistant_message, dict) else {} reply_type = assistant_item.get("reply_type") + assistant_text = str(assistant_item.get("text") or "") + top_non_empty = first_non_empty_lines(assistant_text, limit=3) + analysis_context = step.get("analysis_context") if isinstance(step.get("analysis_context"), dict) else {} step_state = { "schema_version": SCENARIO_STEP_STATE_SCHEMA_VERSION, @@ -1188,8 +1528,19 @@ def build_scenario_step_state( "depends_on": step["depends_on"], "question_template": step["question_template"], "question_resolved": question_resolved, + "analysis_context": analysis_context, + "expected_intents": step.get("expected_intents") or [], "expected_capability": step.get("expected_capability"), + "expected_recipe": step.get("expected_recipe"), "expected_result_mode": step.get("expected_result_mode"), + "required_filters": step.get("required_filters") or {}, + "forbidden_capabilities": step.get("forbidden_capabilities") or [], + "forbidden_recipes": step.get("forbidden_recipes") or [], + "required_state_objects": step.get("required_state_objects") or [], + "required_answer_shape": step.get("required_answer_shape"), + "forbidden_answer_patterns": step.get("forbidden_answer_patterns") or [], + "required_carryover_invariants": step.get("required_carryover_invariants") or [], + "invariant_severity": step.get("invariant_severity") or {}, "reply_type": reply_type, "assistant_message_id": assistant_item.get("message_id"), "trace_id": assistant_item.get("trace_id"), @@ -1201,6 +1552,11 @@ def build_scenario_step_state( "route_expectation_status": debug.get("route_expectation_status"), "result_mode": debug.get("result_mode"), "response_type": debug.get("response_type"), + "assistant_text": assistant_text, + "top_non_empty_lines": top_non_empty, + "actual_direct_answer": top_non_empty[0] if top_non_empty else None, + "extracted_filters": debug.get("extracted_filters") if isinstance(debug.get("extracted_filters"), dict) else {}, + "focus_object": context.get("active_focus_object") if isinstance(context.get("active_focus_object"), dict) else None, "fallback_type": debug.get("fallback_type"), "mcp_call_status": debug.get("mcp_call_status"), "balance_confirmed": debug.get("balance_confirmed"), @@ -1210,8 +1566,10 @@ def build_scenario_step_state( "organization_scope": context.get("organization_scope"), "entries": entries, } - step_state["status"] = derive_step_status(reply_type if isinstance(reply_type, str) else None, debug) - return step_state + step_state["execution_status"] = derive_step_execution_status(reply_type if isinstance(reply_type, str) else None, debug) + step_state["acceptance_status"] = step_state["execution_status"] + step_state["status"] = step_state["acceptance_status"] + return validate_step_contract(step_state) def save_scenario_step_bundle( @@ -1233,8 +1591,8 @@ def save_scenario_step_bundle( write_text(step_dir / "resolved_question.txt", f"{step_state['question_resolved']}\n") -def derive_scenario_status(step_outputs: dict[str, dict[str, Any]]) -> str: - statuses = [str(item.get("status") or "") for item in step_outputs.values()] +def derive_scenario_execution_status(step_outputs: dict[str, dict[str, Any]]) -> str: + statuses = [str(item.get("execution_status") or item.get("status") or "") for item in step_outputs.values()] if not statuses: return "blocked" if any(status == "blocked" for status in statuses): @@ -1243,10 +1601,23 @@ def derive_scenario_status(step_outputs: dict[str, dict[str, Any]]) -> str: return "needs_exact_capability" if any(status == "partial" for status in statuses): return "partial" - return "accepted" + return "exact" -def build_scenario_summary(manifest: dict[str, Any], scenario_state: dict[str, Any], final_status: str) -> str: +def derive_scenario_status(step_outputs: dict[str, dict[str, Any]]) -> str: + statuses = [str(item.get("acceptance_status") or item.get("status") or "") for item in step_outputs.values()] + if not statuses: + return "blocked" + if any(status == "blocked" for status in statuses): + return "blocked" + if any(status == "needs_exact_capability" for status in statuses): + return "needs_exact_capability" + if any(status in {"partial", "rejected"} for status in statuses): + return "partial" + return "accepted" if all(status == "validated" for status in statuses) else "partial" + + +def build_scenario_summary(manifest: dict[str, Any], scenario_state: dict[str, Any], final_status: str, execution_status: str) -> str: lines = [ "# Scenario summary", "", @@ -1254,6 +1625,7 @@ def build_scenario_summary(manifest: dict[str, Any], scenario_state: dict[str, A f"- domain: `{manifest['domain']}`", f"- title: {manifest['title']}", f"- session_id: `{scenario_state.get('session_id') or 'n/a'}`", + f"- execution_status: `{execution_status}`", f"- final_status: `{final_status}`", "", "## Steps", @@ -1263,20 +1635,27 @@ def build_scenario_summary(manifest: dict[str, Any], scenario_state: dict[str, A lines.extend( [ f"{index}. `{step['step_id']}` - {step['question_template']}", - f"status: `{step_output.get('status') or 'n/a'}`", + f"execution_status: `{step_output.get('execution_status') or 'n/a'}`", + f"acceptance_status: `{step_output.get('acceptance_status') or step_output.get('status') or 'n/a'}`", f"question_resolved: {step_output.get('question_resolved') or 'n/a'}", f"intent: `{step_output.get('detected_intent') or 'n/a'}`", f"recipe: `{step_output.get('selected_recipe') or 'n/a'}`", f"capability: `{step_output.get('capability_id') or 'n/a'}`", f"result_mode: `{step_output.get('result_mode') or 'n/a'}`", f"result_set: `{step_output.get('active_result_set_id') or 'n/a'}`", + f"violated_invariants: {', '.join(step_output.get('violated_invariants') or []) or 'none'}", "", ] ) return "\n".join(lines).strip() + "\n" -def build_scenario_final_status(manifest: dict[str, Any], scenario_state: dict[str, Any], final_status: str) -> str: +def build_scenario_final_status( + manifest: dict[str, Any], + scenario_state: dict[str, Any], + final_status: str, + execution_status: str, +) -> str: reason = { "accepted": "all scenario steps executed in one assistant session with no unresolved route or capability gaps", "partial": "scenario captured successfully, but at least one step still needs exact capability enablement or route hardening", @@ -1288,6 +1667,7 @@ def build_scenario_final_status(manifest: dict[str, Any], scenario_state: dict[s # Final status - status: `{final_status}` + - execution_status: `{execution_status}` - scenario_id: `{manifest['scenario_id']}` - session_id: `{scenario_state.get('session_id') or 'n/a'}` - reason: {reason} @@ -1492,14 +1872,15 @@ def execute_scenario_manifest( f"{step['step_id']} -> {result['step_state']['status']}" ) + execution_status = derive_scenario_execution_status(scenario_state["step_outputs"]) final_status = derive_scenario_status(scenario_state["step_outputs"]) write_text(scenario_dir / "scenario_output.md", last_export_markdown or "") - write_text(scenario_dir / "scenario_summary.md", build_scenario_summary(manifest, scenario_state, final_status)) - write_text(scenario_dir / "final_status.md", build_scenario_final_status(manifest, scenario_state, final_status)) + write_text(scenario_dir / "scenario_summary.md", build_scenario_summary(manifest, scenario_state, final_status, execution_status)) + write_text(scenario_dir / "final_status.md", build_scenario_final_status(manifest, scenario_state, final_status, execution_status)) if scenario_state.get("session_id"): write_text(scenario_dir / "session_id.txt", f"{scenario_state['session_id']}\n") print(f"[domain-case-loop] saved scenario artifacts to {scenario_dir}") - print(f"[domain-case-loop] final_status={final_status}") + print(f"[domain-case-loop] execution_status={execution_status} final_status={final_status}") return scenario_state, final_status @@ -1645,13 +2026,19 @@ def handle_run_scenario(args: argparse.Namespace) -> int: return 0 -def build_pack_summary(pack: dict[str, Any], scenario_results: list[dict[str, Any]], final_status: str) -> str: +def build_pack_summary( + pack: dict[str, Any], + scenario_results: list[dict[str, Any]], + final_status: str, + execution_status: str, +) -> str: lines = [ "# Pack summary", "", f"- pack_id: `{pack['pack_id']}`", f"- domain: `{pack['domain']}`", f"- title: {pack['title']}", + f"- execution_status: `{execution_status}`", f"- final_status: `{final_status}`", "", "## Scenarios", @@ -1660,7 +2047,8 @@ def build_pack_summary(pack: dict[str, Any], scenario_results: list[dict[str, An lines.extend( [ f"{index}. `{item['scenario_id']}` - {item['title']}", - f"status: `{item['final_status']}`", + f"execution_status: `{item.get('execution_status') or 'n/a'}`", + f"acceptance_status: `{item['final_status']}`", f"session_id: `{item.get('session_id') or 'n/a'}`", f"artifact_dir: `{item['artifact_dir']}`", "", @@ -1669,7 +2057,12 @@ def build_pack_summary(pack: dict[str, Any], scenario_results: list[dict[str, An return "\n".join(lines).strip() + "\n" -def build_pack_final_status(pack: dict[str, Any], scenario_results: list[dict[str, Any]], final_status: str) -> str: +def build_pack_final_status( + pack: dict[str, Any], + scenario_results: list[dict[str, Any]], + final_status: str, + execution_status: str, +) -> str: expected_scenarios = len(pack.get("scenarios") or []) executed_scenarios = len(scenario_results) has_missing_scenarios = executed_scenarios < expected_scenarios @@ -1689,6 +2082,7 @@ def build_pack_final_status(pack: dict[str, Any], scenario_results: list[dict[st # Final status - status: `{final_status}` + - execution_status: `{execution_status}` - pack_id: `{pack['pack_id']}` - domain: `{pack['domain']}` - reason: {reason} @@ -1709,6 +2103,19 @@ def derive_coverage_status(statuses: list[str]) -> str: return "partial" +def derive_pack_execution_status(scenario_results: list[dict[str, Any]]) -> str: + aggregate_statuses = [str(item.get("execution_status") or "") for item in scenario_results if isinstance(item, dict)] + if not aggregate_statuses: + return "blocked" + if any(status == "blocked" for status in aggregate_statuses): + return "blocked" + if any(status == "needs_exact_capability" for status in aggregate_statuses): + return "needs_exact_capability" + if any(status == "partial" for status in aggregate_statuses): + return "partial" + return "exact" + + def derive_pack_final_status(pack: dict[str, Any], scenario_results: list[dict[str, Any]]) -> str: aggregate_statuses = [item["final_status"] for item in scenario_results] if not aggregate_statuses: @@ -2099,6 +2506,8 @@ def compact_step_output_for_review(step_output: Any) -> dict[str, Any]: entry_titles_sample.append(title) return { "status": step_output.get("status"), + "execution_status": step_output.get("execution_status"), + "acceptance_status": step_output.get("acceptance_status"), "question_resolved": step_output.get("question_resolved"), "detected_intent": step_output.get("detected_intent"), "selected_recipe": step_output.get("selected_recipe"), @@ -2106,6 +2515,8 @@ def compact_step_output_for_review(step_output: Any) -> dict[str, Any]: "result_mode": step_output.get("result_mode"), "answer_shape": step_output.get("answer_shape"), "actual_direct_answer": step_output.get("actual_direct_answer"), + "violated_invariants": step_output.get("violated_invariants"), + "warnings": step_output.get("warnings"), "fallback_type": step_output.get("fallback_type"), "mcp_call_status": step_output.get("mcp_call_status"), "failure_type": step_output.get("failure_type"), @@ -2140,6 +2551,7 @@ def build_pack_review_bundle(pack_dir: Path) -> str: "pack_id": pack_state.get("pack_id"), "domain": pack_state.get("domain"), "title": pack_state.get("title"), + "execution_status": pack_state.get("execution_status"), "final_status": pack_state.get("final_status"), "scenario_results": pack_state.get("scenario_results"), }, @@ -2387,12 +2799,14 @@ def handle_run_pack(args: argparse.Namespace) -> int: { "scenario_id": scenario_manifest["scenario_id"], "title": scenario_manifest["title"], + "execution_status": derive_scenario_execution_status(scenario_state.get("step_outputs") or {}), "final_status": scenario_final_status, "session_id": scenario_state.get("session_id"), "artifact_dir": str(scenario_dir), } ) + execution_status = derive_pack_execution_status(scenario_results) final_status = derive_pack_final_status(pack, scenario_results) pack_state = { @@ -2403,15 +2817,16 @@ def handle_run_pack(args: argparse.Namespace) -> int: "analysis_context": pack.get("analysis_context") or {}, "bindings": pack.get("bindings") or {}, "scenario_results": scenario_results, + "execution_status": execution_status, "final_status": final_status, "updated_at": datetime.now(timezone.utc).replace(microsecond=0).isoformat(), } write_text(pack_dir / "scenario_acceptance_matrix.md", build_scenario_acceptance_matrix(pack, scenario_results)) write_json(pack_dir / "pack_state.json", pack_state) - write_text(pack_dir / "pack_summary.md", build_pack_summary(pack, scenario_results, final_status)) - write_text(pack_dir / "final_status.md", build_pack_final_status(pack, scenario_results, final_status)) + write_text(pack_dir / "pack_summary.md", build_pack_summary(pack, scenario_results, final_status, execution_status)) + write_text(pack_dir / "final_status.md", build_pack_final_status(pack, scenario_results, final_status, execution_status)) print(f"[domain-case-loop] saved pack artifacts to {pack_dir}") - print(f"[domain-case-loop] final_status={final_status}") + print(f"[domain-case-loop] execution_status={execution_status} final_status={final_status}") return 0 diff --git a/tests/test_domain_case_loop.py b/tests/test_domain_case_loop.py index 85e5a0a..c2f07e2 100644 --- a/tests/test_domain_case_loop.py +++ b/tests/test_domain_case_loop.py @@ -13,6 +13,7 @@ from scripts.domain_case_loop import ( evaluate_analyst_gate, load_scenario_pack, merge_scenario_date_scope, + validate_step_contract, ) @@ -134,6 +135,65 @@ def test_load_scenario_pack_accepts_active_domain_contract(tmp_path) -> None: assert pack["scenarios"][0]["steps"][1]["question_id"] == "Q19" +def test_load_scenario_pack_enriches_step_with_node_contract_defaults(tmp_path) -> None: + manifest_path = tmp_path / "active_domain_contract.json" + manifest_path.write_text( + json.dumps( + { + "schema_version": "active_domain_contract_v1", + "status": "active", + "domain_id": "inventory_stock_supplier_provenance", + "runtime_domain": "inventory_stock", + "title": "Warehouse domain", + "question_pool": { + "questions": [ + {"question_id": "Q19", "node_id": "N03_selected_item_supplier", "text": "Q19"}, + ] + }, + "scenario_tree": { + "critical_nodes": [ + { + "node_id": "N03_selected_item_supplier", + "expected_intents": ["inventory_purchase_provenance_for_item"], + "expected_answer_shape": "direct_supplier_answer_first_then_evidence", + "required_carryover_invariants": ["focus_object", "date_scope"], + } + ] + }, + "orchestration_pack": { + "pack_id": "inventory_active_contract_smoke", + "scenarios": [ + { + "scenario_id": "inventory_selected_item_provenance", + "title": "Selected item provenance", + "steps": [ + { + "step_id": "step_02_supplier", + "question_id": "Q19", + "node_id": "N03_selected_item_supplier", + "question": "По выбранному объекту \"...\": кто это поставил нам", + } + ], + } + ], + }, + }, + ensure_ascii=False, + indent=2, + ) + + "\n", + encoding="utf-8", + ) + + pack = load_scenario_pack(manifest_path) + step = pack["scenarios"][0]["steps"][0] + + assert step["expected_intents"] == ["inventory_purchase_provenance_for_item"] + assert step["required_answer_shape"] == "direct_supplier_answer_first_then_evidence" + assert "focus_object" in step["required_carryover_invariants"] + assert "date_scope" in step["required_carryover_invariants"] + + def test_build_scenario_acceptance_matrix_marks_green_edge_when_covering_scenario_is_accepted() -> None: pack = { "pack_id": "inventory_active_contract_smoke", @@ -323,3 +383,119 @@ def test_evaluate_analyst_gate_requires_temporal_honesty_field_truth_and_layerin assert requires_user_decision is False assert user_decision_type == "none" assert user_decision_prompt is None + + +def test_validate_step_contract_rejects_wrong_month_filter_even_when_execution_is_exact() -> None: + validated = validate_step_contract( + { + "execution_status": "exact", + "status": "exact", + "node_role": "root", + "analysis_context": {"as_of_date": "2016-05-31"}, + "expected_intents": ["inventory_on_hand_as_of_date"], + "detected_intent": "inventory_on_hand_as_of_date", + "expected_capability": "confirmed_inventory_on_hand_as_of_date", + "capability_id": "confirmed_inventory_on_hand_as_of_date", + "expected_recipe": "address_inventory_on_hand_as_of_date_v1", + "selected_recipe": "address_inventory_on_hand_as_of_date_v1", + "expected_result_mode": "confirmed_balance", + "result_mode": "confirmed_balance", + "required_filters": { + "as_of_date": "2016-05-31", + "period_from": "2016-05-01", + "period_to": "2016-05-31", + }, + "required_answer_shape": "item_list_with_quantity_cost_warehouse_organization", + "required_carryover_invariants": [], + "required_state_objects": [], + "forbidden_capabilities": [], + "forbidden_recipes": [], + "actual_direct_answer": "На 31.12.2016 на складе подтверждено 4 позиций.", + "top_non_empty_lines": ["На 31.12.2016 на складе подтверждено 4 позиций."], + "extracted_filters": { + "as_of_date": "2016-12-31", + "period_from": "2016-01-01", + "period_to": "2016-12-31", + }, + "date_scope": {"as_of_date": "2016-12-31"}, + "focus_object": None, + } + ) + + assert validated["acceptance_status"] == "rejected" + assert "wrong_as_of_date" in validated["violated_invariants"] + assert "wrong_period_from" in validated["violated_invariants"] + assert "wrong_period_to" in validated["violated_invariants"] + assert validated["hard_fail"] is True + + +def test_validate_step_contract_rejects_selected_object_followup_without_focus_object_and_with_wrong_route() -> None: + validated = validate_step_contract( + { + "execution_status": "exact", + "status": "exact", + "node_role": "critical_child", + "analysis_context": {"as_of_date": "2019-03-31"}, + "expected_intents": ["inventory_purchase_provenance_for_item"], + "detected_intent": "inventory_on_hand_as_of_date", + "expected_capability": "inventory_purchase_provenance_for_item", + "capability_id": "confirmed_inventory_on_hand_as_of_date", + "expected_result_mode": "confirmed_balance", + "result_mode": "confirmed_balance", + "required_filters": {"as_of_date": "2019-03-31"}, + "required_answer_shape": "direct_supplier_answer_first_then_evidence", + "required_carryover_invariants": ["focus_object", "date_scope"], + "required_state_objects": [], + "forbidden_capabilities": ["confirmed_inventory_on_hand_as_of_date"], + "forbidden_recipes": ["address_inventory_on_hand_as_of_date_v1"], + "selected_recipe": "address_inventory_on_hand_as_of_date_v1", + "actual_direct_answer": "На 31.03.2019 на складе подтверждено 16 позиций.", + "top_non_empty_lines": ["На 31.03.2019 на складе подтверждено 16 позиций."], + "extracted_filters": {"as_of_date": "2019-03-31"}, + "date_scope": {"as_of_date": "2019-03-31"}, + "focus_object": None, + } + ) + + assert validated["acceptance_status"] == "rejected" + assert "wrong_intent" in validated["violated_invariants"] + assert "wrong_followup_action" in validated["violated_invariants"] + assert "forbidden_capability_selected" in validated["violated_invariants"] + assert "forbidden_recipe_selected" in validated["violated_invariants"] + assert "focus_object_missing" in validated["violated_invariants"] + + +def test_validate_step_contract_rejects_top_level_noise_as_direct_answer() -> None: + validated = validate_step_contract( + { + "execution_status": "exact", + "status": "exact", + "node_role": "critical_child", + "analysis_context": {"as_of_date": "2019-03-31"}, + "expected_intents": ["inventory_purchase_provenance_for_item"], + "detected_intent": "inventory_purchase_provenance_for_item", + "expected_capability": "inventory_purchase_provenance_for_item", + "capability_id": "inventory_inventory_purchase_provenance_for_item", + "expected_result_mode": "confirmed_balance", + "result_mode": "confirmed_balance", + "required_filters": {"as_of_date": "2019-03-31"}, + "required_answer_shape": "direct_supplier_answer_first_then_evidence", + "required_carryover_invariants": [], + "required_state_objects": [], + "forbidden_capabilities": [], + "forbidden_recipes": [], + "selected_recipe": "address_inventory_purchase_provenance_for_item_v1", + "actual_direct_answer": "Статус результата: подтверждено.", + "top_non_empty_lines": [ + "Статус результата: подтверждено.", + "Поставщик: Торговый дом \\Союз\\.", + ], + "extracted_filters": {"as_of_date": "2019-03-31"}, + "date_scope": {"as_of_date": "2019-03-31"}, + "focus_object": {"object_id": "item:1", "label": "Столешница"}, + } + ) + + assert validated["acceptance_status"] == "rejected" + assert "direct_answer_missing" in validated["violated_invariants"] + assert "top_level_noise_present" in validated["violated_invariants"]