From 98afdd39c4c4014c01d592ac43f2bce44f133e6f Mon Sep 17 00:00:00 2001 From: dctouch Date: Tue, 5 May 2026 17:09:26 +0300 Subject: [PATCH] =?UTF-8?q?Post-F:=20=D1=83=D0=BA=D1=80=D0=B5=D0=BF=D0=B8?= =?UTF-8?q?=D1=82=D1=8C=20VAT,=20=D0=B4=D0=B0=D1=82=D1=8B=20=D0=BE=D1=81?= =?UTF-8?q?=D1=82=D0=B0=D1=82=D0=BA=D0=BE=D0=B2=20=D0=B8=20SVK=20follow-up?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ...rld_semantic_control_gate_ehmo_subset.json | 481 ++++++++++++++++++ .../dist/services/addressFilterExtractor.js | 14 +- .../dist/services/addressIntentResolver.js | 16 +- .../address_runtime/decomposeStage.js | 15 +- .../assistantMcpDiscoveryTurnInputAdapter.js | 55 +- .../backend/dist/services/assistantService.js | 15 +- .../src/services/addressFilterExtractor.ts | 14 +- .../src/services/addressIntentResolver.ts | 28 +- .../address_runtime/decomposeStage.ts | 16 +- .../assistantMcpDiscoveryTurnInputAdapter.ts | 57 ++- .../backend/src/services/assistantService.ts | 15 +- .../addressFilterExtractorRegression.test.ts | 21 + .../addressFollowupTemporalRegression.test.ts | 51 ++ .../tests/addressVatConfirmedRoute.test.ts | 9 + ...istantMcpDiscoveryTurnInputAdapter.test.ts | 30 ++ 15 files changed, 815 insertions(+), 22 deletions(-) create mode 100644 docs/orchestration/address_truth_harness_phase89_open_world_semantic_control_gate_ehmo_subset.json diff --git a/docs/orchestration/address_truth_harness_phase89_open_world_semantic_control_gate_ehmo_subset.json b/docs/orchestration/address_truth_harness_phase89_open_world_semantic_control_gate_ehmo_subset.json new file mode 100644 index 0000000..820f479 --- /dev/null +++ b/docs/orchestration/address_truth_harness_phase89_open_world_semantic_control_gate_ehmo_subset.json @@ -0,0 +1,481 @@ +{ + "schema_version": "domain_truth_harness_spec_v1", + "scenario_id": "address_truth_harness_phase89_open_world_semantic_control_gate_ehmo_subset", + "domain": "open_world_bounded_autonomy_breadth_semantic_control_gate", + "title": "Phase 89 Open-World Semantic Control Gate EHMO critical subset", + "description": "Strict live subset derived from assistant-stage1-EHMOy3lNFt. Covers business overview continuity, wrong-lane prevention, stale frame reset, SVK pivot integrity, metadata continuation, VAT metadata, selected-object profitability, and final summary semantics.", + "source_export": "docs\\orchestration\\manual_qa_open_world_breadth_99_fat_gui_pack_20260505.json", + "bindings": {}, + "steps": [ + { + "step_id": "step_001_smalltalk_sanity", + "title": "001_smalltalk_sanity", + "question": "привет, ты на связи? перед большим прогоном отвечай живо, но не теряй потом бизнес-контекст", + "criticality": "info", + "semantic_tags": [ + "human_answer", + "meta_smalltalk", + "context_guard" + ], + "notes": "EHMO subset source=001_smalltalk_sanity; review_focus=Ассистент должен ответить нормально и не начать преждевременно искать данные 1С.", + "forbidden_answer_patterns": [ + "(?i)business_overview_route_template_v1", + "(?i)mcp_discovery", + "(?i)runtime_", + "(?i)query_documents", + "(?i)query_movements", + "(?i)capability_id", + "(?i)selected_chain_id" + ] + }, + { + "step_id": "step_002_business_overview_2020_full", + "title": "002_business_overview_2020_full", + "question": "Дай взрослый бизнес-обзор ООО Альтернатива Плюс за 2020 год по данным 1С: обороты, входящие и исходящие деньги, нетто, НДС, дебиторка, кредиторка, склад, клиенты, поставщики, договоры, документы, что подтверждено и что пока нельзя утверждать.", + "criticality": "critical", + "semantic_tags": [ + "business_overview", + "organization_scope", + "explicit_period", + "analyst_synthesis" + ], + "notes": "EHMO subset source=002_business_overview_2020_full; review_focus=Проверить полноту бизнес-обзора и честность границ: нетто не должно выдаваться за прибыль.", + "forbidden_answer_patterns": [ + "(?i)business_overview_route_template_v1", + "(?i)mcp_discovery", + "(?i)runtime_", + "(?i)query_documents", + "(?i)query_movements", + "(?i)capability_id", + "(?i)selected_chain_id" + ], + "required_answer_patterns_all": [ + "2020" + ] + }, + { + "step_id": "step_003_money_breakdown", + "title": "003_money_breakdown", + "question": "Раскрой деньги подробнее: сколько всего получили, сколько заплатили, какой чистый денежный поток, кто главный клиент и кто главный поставщик в 2020.", + "criticality": "critical", + "semantic_tags": [ + "business_overview", + "money_flow", + "top_customer", + "top_supplier", + "followup_reuse" + ], + "notes": "EHMO subset source=003_money_breakdown; review_focus=Должен сохраняться scope ООО Альтернатива Плюс и период 2020.", + "forbidden_answer_patterns": [ + "(?i)business_overview_route_template_v1", + "(?i)mcp_discovery", + "(?i)runtime_", + "(?i)query_documents", + "(?i)query_movements", + "(?i)capability_id", + "(?i)selected_chain_id" + ], + "required_answer_patterns_all": [ + "2020" + ] + }, + { + "step_id": "step_005_profit_margin_boundary", + "title": "005_profit_margin_boundary", + "question": "Можно ли по этим данным посчитать нормальную прибыль и маржу компании? Если нет, дай proxy-анализ и объясни, каких учетных доказательств не хватает.", + "criticality": "critical", + "semantic_tags": [ + "business_overview", + "profit_margin_boundary", + "missing_proof_families" + ], + "notes": "EHMO subset source=005_profit_margin_boundary; review_focus=Ответ не должен фантазировать exact P&L; должен назвать missing proof families.", + "forbidden_answer_patterns": [ + "(?i)business_overview_route_template_v1", + "(?i)mcp_discovery", + "(?i)runtime_", + "(?i)query_documents", + "(?i)query_movements", + "(?i)capability_id", + "(?i)selected_chain_id" + ] + }, + { + "step_id": "step_008_vat_2020", + "title": "008_vat_2020", + "question": "Что с НДС за 2020 год по Альтернативе Плюс: какая позиция видна, на чем она основана и чего не хватает для налогового вывода?", + "criticality": "critical", + "semantic_tags": [ + "business_overview", + "vat", + "explicit_period", + "tax_boundary" + ], + "notes": "EHMO subset source=008_vat_2020; review_focus=VAT-период должен быть 2020, без materialization gap и без выдуманного налогового заключения.", + "forbidden_answer_patterns": [ + "(?i)business_overview_route_template_v1", + "(?i)mcp_discovery", + "(?i)runtime_", + "(?i)query_documents", + "(?i)query_movements", + "(?i)capability_id", + "(?i)selected_chain_id", + "2000-01-01" + ], + "required_answer_patterns_all": [ + "2020" + ] + }, + { + "step_id": "step_013_inventory_date", + "title": "013_inventory_date", + "question": "Покажи складской срез Альтернативы Плюс на 2026-04-16: что есть в остатках, какие самые заметные позиции, и что это говорит о бизнесе.", + "criticality": "critical", + "semantic_tags": [ + "business_overview", + "inventory_position", + "explicit_date", + "inventory_boundary" + ], + "notes": "EHMO subset source=013_inventory_date; review_focus=Нужен складской факт на дату без превращения его в полное здоровье бизнеса.", + "forbidden_answer_patterns": [ + "(?i)business_overview_route_template_v1", + "(?i)mcp_discovery", + "(?i)runtime_", + "(?i)query_documents", + "(?i)query_movements", + "(?i)capability_id", + "(?i)selected_chain_id" + ], + "required_answer_patterns_all": [ + "2026|2026-04-16|16[.-]04" + ] + }, + { + "step_id": "step_014_inventory_reserve_boundary", + "title": "014_inventory_reserve_boundary", + "question": "Можно ли из этого сказать, что склад ликвидный или что надо создавать резервы/списывать неликвид? Если нет, что именно подтверждено и чего не хватает?", + "criticality": "critical", + "semantic_tags": [ + "business_overview", + "inventory_reserve_boundary", + "missing_proof_families" + ], + "notes": "EHMO subset source=014_inventory_reserve_boundary; review_focus=Нельзя выдавать reserve/liquidation evidence без подтвержденных маршрутов.", + "forbidden_answer_patterns": [ + "(?i)business_overview_route_template_v1", + "(?i)mcp_discovery", + "(?i)runtime_", + "(?i)query_documents", + "(?i)query_movements", + "(?i)capability_id", + "(?i)selected_chain_id" + ] + }, + { + "step_id": "step_016_contract_counterparty_profile", + "title": "016_contract_counterparty_profile", + "question": "Сколько реально активных контрагентов и договоров видно по Альтернативе Плюс, какие роли у контрагентов, и какие договоры используются чаще всего?", + "criticality": "critical", + "semantic_tags": [ + "business_overview", + "counterparty_population", + "contract_usage_profile" + ], + "notes": "EHMO subset source=016_contract_counterparty_profile; review_focus=Должен быть профиль, а не generic metadata ответ.", + "forbidden_answer_patterns": [ + "(?i)business_overview_route_template_v1", + "(?i)mcp_discovery", + "(?i)runtime_", + "(?i)query_documents", + "(?i)query_movements", + "(?i)capability_id", + "(?i)selected_chain_id" + ] + }, + { + "step_id": "step_018_business_audit_synthesis", + "title": "018_business_audit_synthesis", + "question": "Собери это как нормальный бизнес-аудит: сильные стороны, риски, что уже можно сказать уверенно, что только proxy, и что директору проверить руками.", + "criticality": "critical", + "semantic_tags": [ + "business_overview", + "analyst_synthesis", + "human_answer_quality" + ], + "notes": "EHMO subset source=018_business_audit_synthesis; review_focus=Нужен взрослый аналитический ответ, а не короткий высер или debug.", + "forbidden_answer_patterns": [ + "(?i)business_overview_route_template_v1", + "(?i)mcp_discovery", + "(?i)runtime_", + "(?i)query_documents", + "(?i)query_movements", + "(?i)capability_id", + "(?i)selected_chain_id" + ] + }, + { + "step_id": "step_019_pivot_find_svk", + "title": "019_pivot_find_svk", + "question": "Теперь резко переключаемся: найди в 1С контрагента СВК.", + "criticality": "critical", + "semantic_tags": [ + "entity_resolution", + "counterparty_pivot", + "stale_scope_guard" + ], + "notes": "EHMO subset source=019_pivot_find_svk; review_focus=Должен смениться focus с организации на контрагента, без залипания Альтернативы как контрагента.", + "forbidden_answer_patterns": [ + "(?i)business_overview_route_template_v1", + "(?i)mcp_discovery", + "(?i)runtime_", + "(?i)query_documents", + "(?i)query_movements", + "(?i)capability_id", + "(?i)selected_chain_id" + ] + }, + { + "step_id": "step_020_svk_incoming_2020", + "title": "020_svk_incoming_2020", + "question": "Сколько получили по нему за 2020 год?", + "criticality": "critical", + "semantic_tags": [ + "value_flow", + "incoming_value_flow", + "followup_anchor", + "explicit_period" + ], + "notes": "EHMO subset source=020_svk_incoming_2020; review_focus=Scope: выбранный СВК как контрагент, период 2020.", + "forbidden_answer_patterns": [ + "(?i)business_overview_route_template_v1", + "(?i)mcp_discovery", + "(?i)runtime_", + "(?i)query_documents", + "(?i)query_movements", + "(?i)capability_id", + "(?i)selected_chain_id" + ], + "required_answer_patterns_all": [ + "2020" + ] + }, + { + "step_id": "step_021_svk_outgoing_2020", + "title": "021_svk_outgoing_2020", + "question": "А теперь сколько заплатили?", + "criticality": "critical", + "semantic_tags": [ + "value_flow", + "outgoing_value_flow", + "followup_reuse", + "date_carryover" + ], + "notes": "EHMO subset source=021_svk_outgoing_2020; review_focus=Проверить payout switch и carryover периода/контрагента.", + "forbidden_answer_patterns": [ + "(?i)business_overview_route_template_v1", + "(?i)mcp_discovery", + "(?i)runtime_", + "(?i)query_documents", + "(?i)query_movements", + "(?i)capability_id", + "(?i)selected_chain_id" + ], + "required_answer_patterns_all": [ + "2020" + ] + }, + { + "step_id": "step_022_svk_net", + "title": "022_svk_net", + "question": "А какое нетто по СВК: сколько получили минус сколько заплатили?", + "criticality": "critical", + "semantic_tags": [ + "value_flow_comparison", + "net_value_flow", + "followup_reuse" + ], + "notes": "EHMO subset source=022_svk_net; review_focus=Нетто должно быть по СВК, не по организации в целом.", + "forbidden_answer_patterns": [ + "(?i)business_overview_route_template_v1", + "(?i)mcp_discovery", + "(?i)runtime_", + "(?i)query_documents", + "(?i)query_movements", + "(?i)capability_id", + "(?i)selected_chain_id" + ] + }, + { + "step_id": "step_023_svk_documents", + "title": "023_svk_documents", + "question": "А по документам СВК что видно?", + "criticality": "critical", + "semantic_tags": [ + "document_evidence", + "counterparty_followup", + "document_pivot" + ], + "notes": "EHMO subset source=023_svk_documents; review_focus=Переход value-flow -> documents не должен терять selected counterparty.", + "forbidden_answer_patterns": [ + "(?i)business_overview_route_template_v1", + "(?i)mcp_discovery", + "(?i)runtime_", + "(?i)query_documents", + "(?i)query_movements", + "(?i)capability_id", + "(?i)selected_chain_id" + ] + }, + { + "step_id": "step_024_svk_movements", + "title": "024_svk_movements", + "question": "А по движениям?", + "criticality": "critical", + "semantic_tags": [ + "movement_evidence", + "document_pivot", + "followup_reuse" + ], + "notes": "EHMO subset source=024_svk_movements; review_focus=Движения должны относиться к текущему СВК/document context.", + "forbidden_answer_patterns": [ + "(?i)business_overview_route_template_v1", + "(?i)mcp_discovery", + "(?i)runtime_", + "(?i)query_documents", + "(?i)query_movements", + "(?i)capability_id", + "(?i)selected_chain_id" + ] + }, + { + "step_id": "step_039_metadata_counterparty_catalogs", + "title": "039_metadata_counterparty_catalogs", + "question": "Какие справочники 1С есть по контрагентам?", + "criticality": "critical", + "semantic_tags": [ + "phase83_canary", + "catalog_metadata_surface" + ], + "notes": "EHMO subset source=039_metadata_counterparty_catalogs; review_focus=Metadata lane должен ответить полезно, не ломая бизнес-контекст.", + "forbidden_answer_patterns": [ + "(?i)business_overview_route_template_v1", + "(?i)mcp_discovery", + "(?i)runtime_", + "(?i)query_documents", + "(?i)query_movements", + "(?i)capability_id", + "(?i)selected_chain_id" + ] + }, + { + "step_id": "step_040_metadata_drilldown_neutral", + "title": "040_metadata_drilldown_neutral", + "question": "давай дальше", + "criticality": "critical", + "semantic_tags": [ + "phase83_canary", + "neutral_followup", + "catalog_drilldown" + ], + "notes": "EHMO subset source=040_metadata_drilldown_neutral; review_focus=Нейтральный follow-up должен продолжить metadata drilldown, а не предыдущий деньги/Жуковку.", + "forbidden_answer_patterns": [ + "(?i)business_overview_route_template_v1", + "(?i)mcp_discovery", + "(?i)runtime_", + "(?i)query_documents", + "(?i)query_movements", + "(?i)capability_id", + "(?i)selected_chain_id" + ] + }, + { + "step_id": "step_041_metadata_document_fields", + "title": "041_metadata_document_fields", + "question": "Какие поля и связи стоит смотреть у документов реализации и поступления, если я хочу потом идти в продажи, закупки, оплату и движения?", + "criticality": "critical", + "semantic_tags": [ + "metadata_surface", + "dynamic_schema_traversal", + "route_planning" + ], + "notes": "EHMO subset source=041_metadata_document_fields; review_focus=Проверить полезность маршрутизации без жесткой скрепки.", + "forbidden_answer_patterns": [ + "(?i)business_overview_route_template_v1", + "(?i)mcp_discovery", + "(?i)runtime_", + "(?i)query_documents", + "(?i)query_movements", + "(?i)capability_id", + "(?i)selected_chain_id" + ] + }, + { + "step_id": "step_046_vat_metadata", + "title": "046_vat_metadata", + "question": "Мне нужно понять, где в 1С по НДС вообще лежат данные. Какие объекты стоит смотреть по НДС?", + "criticality": "critical", + "semantic_tags": [ + "post_f_canary", + "vat_metadata", + "dynamic_schema_traversal" + ], + "notes": "EHMO subset source=046_vat_metadata; review_focus=Metadata answer should be useful and not block VAT facts incorrectly.", + "forbidden_answer_patterns": [ + "(?i)business_overview_route_template_v1", + "(?i)mcp_discovery", + "(?i)runtime_", + "(?i)query_documents", + "(?i)query_movements", + "(?i)capability_id", + "(?i)selected_chain_id", + "2000-01-01" + ], + "required_answer_patterns_all": [ + "(?i)1C|1?" + ] + }, + { + "step_id": "step_056_selected_item_profitability", + "title": "056_selected_item_profitability", + "question": "По выбранному объекту \"Четки Пост (84*117)\": сколько заработали на продаже, какие закупочные и продажные документы это подтверждают?", + "criticality": "critical", + "semantic_tags": [ + "selected_object_continuity", + "inventory_item_profitability", + "profit_boundary" + ], + "notes": "EHMO subset source=056_selected_item_profitability; review_focus=Selected-item profitability should avoid company-level profit confusion.", + "forbidden_answer_patterns": [ + "(?i)business_overview_route_template_v1", + "(?i)mcp_discovery", + "(?i)runtime_", + "(?i)query_documents", + "(?i)query_movements", + "(?i)capability_id", + "(?i)selected_chain_id" + ] + }, + { + "step_id": "step_061_final_manual_review_summary", + "title": "061_final_manual_review_summary", + "question": "Финально собери executive summary по всему диалогу: где ответы были подтвержденными, где proxy, где не хватило доказательств, и какие места мне руками смотреть особенно внимательно.", + "criticality": "critical", + "semantic_tags": [ + "manual_review_summary", + "context_integrity", + "analyst_synthesis" + ], + "notes": "EHMO subset source=061_final_manual_review_summary; review_focus=Финальный ответ должен удержать контекст всего прогона и честно выделить рискованные зоны.", + "forbidden_answer_patterns": [ + "(?i)business_overview_route_template_v1", + "(?i)mcp_discovery", + "(?i)runtime_", + "(?i)query_documents", + "(?i)query_movements", + "(?i)capability_id", + "(?i)selected_chain_id" + ] + } + ] +} diff --git a/llm_normalizer/backend/dist/services/addressFilterExtractor.js b/llm_normalizer/backend/dist/services/addressFilterExtractor.js index 68b11b6..fdadc79 100644 --- a/llm_normalizer/backend/dist/services/addressFilterExtractor.js +++ b/llm_normalizer/backend/dist/services/addressFilterExtractor.js @@ -73,6 +73,15 @@ const COUNTERPARTY_TOKEN_NOISE = new Set([ "показать", "скажи", "выведи", + "видно", + "документам", + "документами", + "движение", + "движения", + "движениям", + "операциям", + "проверить", + "проверь", "show", "list", "контра", @@ -99,7 +108,10 @@ function isCounterpartyFillerToken(token) { if (/^(?:бл[яе]|блять|нах|нахуй|епт|ёпт|епта)$/iu.test(normalized)) { return true; } - if (/^(?:док(?:и|ам|ами|умент(?:ы|ов)?)?|docs?|docy|doci|doki|dokument(?:y|ov|am|a)?)$/iu.test(normalized)) { + if (/^(?:док(?:и|ам|ами|умент(?:ы|ов|ам|ами)?)?|docs?|docy|doci|doki|dokument(?:y|ov|am|a)?)$/iu.test(normalized)) { + return true; + } + if (/^(?:движени[еяям]*|операци[яиюеям]*|проверить|проверь|видно)$/iu.test(normalized)) { return true; } if (/^(?:pokazh?|pokazhi|pokaji|pokezh|kakie|kakoi|kakaya|est|za|po|na|s|vse|all|poka)$/iu.test(normalized)) { diff --git a/llm_normalizer/backend/dist/services/addressIntentResolver.js b/llm_normalizer/backend/dist/services/addressIntentResolver.js index 6030d76..2137dec 100644 --- a/llm_normalizer/backend/dist/services/addressIntentResolver.js +++ b/llm_normalizer/backend/dist/services/addressIntentResolver.js @@ -1661,7 +1661,18 @@ function hasBidirectionalValueFlowComparisonSignal(text) { const hasOutgoingCue = /(?:\u0438\u0441\u0445\u043e\u0434\u044f\u0449|\u0441\u043f\u0438\u0441\u0430\u043d|\u0437\u0430\u043f\u043b\u0430\u0442|\u043f\u043b\u0430\u0442\u0438\u043b|\u043e\u043f\u043b\u0430\u0442|outflow|outgoing|payout)/iu.test(normalized); const hasComparisonCue = /(?:\u0431\u043e\u043b\u044c\u0448|\u043c\u0435\u043d\u044c\u0448|\u0441\u0440\u0430\u0432|\u0438\u043b\u0438|\u043d\u0435\u0442\u0442\u043e|\u0441\u0430\u043b\u044c\u0434\u043e|vs|versus)/iu.test(normalized); const hasValueFlowCue = /(?:\u0434\u0435\u043d\u044c\u0433|\u0434\u0435\u043d\u0435\u0433|\u0434\u0435\u043d\u0435\u0436|\u043f\u043e\u0442\u043e\u043a|\u043e\u0431\u043e\u0440\u043e\u0442|money|cash|flow)/iu.test(normalized); - return hasIncomingCue && hasOutgoingCue && hasComparisonCue && hasValueFlowCue; + const hasNetAmountCue = /(?:сколько|сумм|итог|нетто|сальдо|минус|net|total|sum)/iu.test(normalized); + return hasIncomingCue && hasOutgoingCue && hasComparisonCue && (hasValueFlowCue || hasNetAmountCue); +} +function hasVatPeriodInspectionBridgeSignal(text) { + const normalized = String(text ?? "").trim().toLowerCase(); + if (!/(?:ндс|vat)/iu.test(normalized)) { + return false; + } + const hasPeriodCue = /(?:\b(?:19|20)\d{2}\b|за\s+(?:\d{4}|год|период|квартал|месяц|январ|феврал|март|апрел|ма[йя]|июн|июл|август|сентябр|октябр|ноябр|декабр)|\b[1-4]\s*(?:кв|квартал))/iu.test(normalized); + const hasInspectionCue = /(?:что\s+с|позици|основан|не\s+хватает|налогов[а-яё]*\s+вывод|вывод|декларац|книга\s+(?:продаж|покупок)|расшифр|разбор)/iu.test(normalized); + const forecastOnlyCue = /(?:прогноз|план|примерн|ориентировочн)/iu.test(normalized) && !hasInspectionCue; + return hasPeriodCue && hasInspectionCue && !forecastOnlyCue; } function resolveUnicodeAddressIntentBridge(text) { const normalized = String(text ?? "").trim().toLowerCase(); @@ -1780,6 +1791,9 @@ function resolveUnicodeAddressIntentBridge(text) { /(?:покупател|клиент|заказ|отгрузк|товар|услуг|задолженн|сальдо|не\s+плат|не\s+оплат|не\s+оплачен|неоплачен|просроч)/iu.test(normalized)) { return unicodeBridgeResolution("list_receivables_counterparties", "high", "receivables_debt_lifecycle_signal_detected"); } + if (hasVatPeriodInspectionBridgeSignal(normalized)) { + return unicodeBridgeResolution("vat_liability_confirmed_for_tax_period", "high", "vat_period_inspection_bridge_signal_detected"); + } const inventoryBridgeIntent = (0, addressInventoryIntentSignals_1.resolveInventoryAddressIntent)(normalized); if (inventoryBridgeIntent) { if (inventoryBridgeIntent.intent === "inventory_aging_by_purchase_date") { diff --git a/llm_normalizer/backend/dist/services/address_runtime/decomposeStage.js b/llm_normalizer/backend/dist/services/address_runtime/decomposeStage.js index 93d7e9a..78ebb86 100644 --- a/llm_normalizer/backend/dist/services/address_runtime/decomposeStage.js +++ b/llm_normalizer/backend/dist/services/address_runtime/decomposeStage.js @@ -189,7 +189,15 @@ const FOLLOWUP_LOW_QUALITY_COUNTERPARTY_TOKENS = new Set([ "сводную", "сводном", "сводного", - "сводному" + "сводному", + "видно", + "документам", + "документами", + "движение", + "движения", + "движениям", + "проверить", + "проверь" ]); const FOLLOWUP_LOW_QUALITY_CONTRACT_TOKENS = new Set([ "за", @@ -661,6 +669,10 @@ function mergeFollowupFilters(current, intent, userMessage, followupContext) { const hasFollowupSignal = hasAddressFollowupContextSignal(userMessage); const hasExplicitPeriodInMessage = hasExplicitPeriodLiteral(userMessage); const hasExplicitCurrentDateInMessage = hasExplicitCurrentDateHint(userMessage); + const currentHasExplicitTemporalScope = hasExplicitPeriodWindow(merged) || + Boolean(toNonEmptyString(merged.as_of_date)) || + hasExplicitPeriodInMessage || + hasExplicitCurrentDateInMessage; const explicitQuotedItem = extractSelectedObjectItemFromFollowupText(userMessage); if (!toNonEmptyString(merged.organization) && previousOrganization) { merged.organization = previousOrganization; @@ -945,6 +957,7 @@ function mergeFollowupFilters(current, intent, userMessage, followupContext) { } if (!sameDateRequested && (intent === "inventory_on_hand_as_of_date" || intent === "inventory_supplier_stock_overlap_as_of_date") && + !currentHasExplicitTemporalScope && hasOpenItemsHint(userMessage)) { const inheritedAsOfDate = previousAsOfDate ?? previousPeriodTo ?? previousPeriodFrom; if (inheritedAsOfDate && merged.as_of_date !== inheritedAsOfDate) { diff --git a/llm_normalizer/backend/dist/services/assistantMcpDiscoveryTurnInputAdapter.js b/llm_normalizer/backend/dist/services/assistantMcpDiscoveryTurnInputAdapter.js index 890d2e2..aebaca6 100644 --- a/llm_normalizer/backend/dist/services/assistantMcpDiscoveryTurnInputAdapter.js +++ b/llm_normalizer/backend/dist/services/assistantMcpDiscoveryTurnInputAdapter.js @@ -119,7 +119,28 @@ function isGarbageSemanticAnchorCandidate(value) { "прокси", "proxy", "summary", - "overall" + "overall", + "деньги", + "денег", + "деньгам", + "деньгами", + "ооо", + "ип", + "ао", + "пао", + "зао", + "llc", + "inc", + "corp", + "документам", + "документами", + "движение", + "движения", + "движениям", + "операциям", + "проверить", + "проверь", + "видно" ]).has(compact)) { return true; } @@ -756,6 +777,20 @@ function rawEntityResolutionCandidate(text) { } return null; } +function rawScopedEntityCandidateFromText(text) { + const source = (0, addressTextRepair_1.repairAddressMojibakeText)(String(text ?? "")); + const patterns = [ + /(?:^|[\s,.;:!?])(?:по|у|для|for|by)\s+([\p{L}\d._-]{2,})(?=$|[\s,.;:!?])/iu, + /(?:документ(?:ам|ы)?|движени(?:ям|я)?|операци(?:ям|и)?|плат[её]ж(?:ам|и)?)\s+([\p{L}\d._-]{2,})(?=$|[\s,.;:!?])/iu + ]; + for (const pattern of patterns) { + const candidate = normalizeEntityResolutionCandidate(source.match(pattern)?.[1] ?? ""); + if (candidate.length >= 2 && !isInvalidEntityCandidate(candidate)) { + return candidate; + } + } + return null; +} function resolveEntityResolutionAmbiguityChoice(text, candidates) { const normalizedText = canonicalizeEntityResolutionCandidate(text); if (!normalizedText || candidates.length <= 0) { @@ -1013,6 +1048,11 @@ function buildAssistantMcpDiscoveryTurnInput(input) { const rawMetadataScopeHint = rawMetadataSignal ? metadataScopeHintFromRawText(rawText) : null; const rawTopicSwitchSignal = hasExplicitTopicSwitchSignal(rawText); const rawEntityCandidate = rawEntityResolutionSignal ? rawEntityResolutionCandidate(rawEntitySourceText) : null; + const rawScopedEntityCandidate = !predecomposeEntities.counterparty && + !predecomposeEntities.organization && + (rawValueFlowSignal || rawLifecycleSignal || rawMetadataSignal || rawBusinessOverviewSignal) + ? rawScopedEntityCandidateFromText(rawEntitySourceText) + : null; const entityResolutionClarificationCandidate = followupSeed.pilotScope === "entity_resolution_search_v1" && followupSeed.entityResolutionStatus === "ambiguous" ? resolveEntityResolutionAmbiguityChoice(rawEntitySourceText, followupSeed.entityResolutionAmbiguityCandidates) @@ -1379,8 +1419,9 @@ function buildAssistantMcpDiscoveryTurnInput(input) { !metadataGroundedDocumentLaneApplicable && !metadataGroundedMovementLaneApplicable }); - const explicitCurrentCounterpartyCandidate = normalizedPredecomposeCounterparty && !isReferentialEntityPlaceholder(normalizedPredecomposeCounterparty) - ? normalizedPredecomposeCounterparty + const explicitCurrentCounterpartyCandidate = (normalizedPredecomposeCounterparty ?? rawScopedEntityCandidate) && + !isReferentialEntityPlaceholder(normalizedPredecomposeCounterparty ?? rawScopedEntityCandidate ?? "") + ? normalizedPredecomposeCounterparty ?? rawScopedEntityCandidate : null; const explicitCurrentCounterpartyOverridesFollowupEntity = Boolean(explicitCurrentCounterpartyCandidate && (effectiveFollowupCounterparty || followupSeed.discoveryEntity) && @@ -1434,6 +1475,7 @@ function buildAssistantMcpDiscoveryTurnInput(input) { pushScopedEntityCandidate(entityCandidates, candidate, groundedFollowupEntity); } pushScopedEntityCandidate(entityCandidates, normalizedPredecomposeCounterparty, groundedFollowupEntity); + pushScopedEntityCandidate(entityCandidates, rawScopedEntityCandidate, groundedFollowupEntity); if (!groundedFollowupEntity) { if (!rawMetadataScopeOverridesFollowupEntity) { pushScopedEntityCandidate(entityCandidates, effectiveFollowupCounterparty, null); @@ -1452,7 +1494,7 @@ function buildAssistantMcpDiscoveryTurnInput(input) { } pushUnique(entityCandidates, rawMetadataScopeHint); } - const openScopeValueFlowWithoutCounterparty = valueFlowSignal && !normalizedPredecomposeCounterparty && !effectiveFollowupCounterparty; + const openScopeValueFlowWithoutCounterparty = valueFlowSignal && !explicitCurrentCounterpartyCandidate && !effectiveFollowupCounterparty; const valueFlowOrganizationStaysScope = openScopeValueFlowWithoutCounterparty && Boolean(bidirectionalValueFlowSignal || hasValueRankingSignal(rawText) || @@ -1460,7 +1502,7 @@ function buildAssistantMcpDiscoveryTurnInput(input) { explicitOrganizationScopeSignal || organizationClarificationFollowupApplicable || followupSeed.organization); - const openScopeValueFlowWithoutResolvedCounterparty = Boolean(valueFlowSignal && !normalizedPredecomposeCounterparty && !effectiveFollowupCounterparty); + const openScopeValueFlowWithoutResolvedCounterparty = Boolean(valueFlowSignal && !explicitCurrentCounterpartyCandidate && !effectiveFollowupCounterparty); if (openScopeValueFlowWithoutCounterparty && !valueFlowOrganizationStaysScope) { pushUnique(entityCandidates, predecomposeEntities.organization); pushUnique(entityCandidates, followupSeed.organization); @@ -1864,6 +1906,9 @@ function buildAssistantMcpDiscoveryTurnInput(input) { normalizedPredecomposeCounterparty) { pushReason(reasonCodes, "mcp_discovery_counterparty_from_predecompose"); } + if (rawScopedEntityCandidate && !normalizedPredecomposeCounterparty) { + pushReason(reasonCodes, "mcp_discovery_counterparty_from_raw_scope"); + } if (effectiveFollowupCounterparty && !rawEntitySearchOverridesStaleScope && !rawMetadataScopeOverridesFollowupEntity) { diff --git a/llm_normalizer/backend/dist/services/assistantService.js b/llm_normalizer/backend/dist/services/assistantService.js index 55a284f..f803445 100644 --- a/llm_normalizer/backend/dist/services/assistantService.js +++ b/llm_normalizer/backend/dist/services/assistantService.js @@ -1429,13 +1429,22 @@ const ADDRESS_PREDECOMPOSE_NOISE_TOKENS = new Set([ "dokumenty", "документ", "документы", + "документам", + "документами", "документов", "банк", "банковские", "операции", + "операциям", + "движение", + "движения", + "движениям", "платеж", "платёж", "платежи", + "проверить", + "проверь", + "видно", "контрагент", "контрагенту", "контрагента", @@ -2615,10 +2624,10 @@ function hasAddressFollowupContextSignal(userMessage) { if (ultraShortFollowup && hasAny(/^(?:давай|показывай|показывыай|ещ[её]|also|again|go|ok|okay)(?=$|[\s,.;:!?])/iu)) { return true; } - const shortValueFlowRetargetCue = shortFollowup && + const valueFlowRetargetFollowup = minTokens <= 14 && (hasMarker() || hasPointer() || hasAny(/^(?:Р°|a|Рё|i|also|then|now)(?=$|[\s,.;:!?])/iu)) && - hasAny(/(?:нетто|сальдо|разниц|получил|заплатил|поступ|РІС…РѕРґСЏС‰|РёСЃС…РѕРґСЏС‰|РѕР±РѕСЂРѕС‚|выручк|денеж)/iu); - if (shortValueFlowRetargetCue) { + hasAny(/(?:нетто|сальдо|разниц|получил|заплатил|поступ|входящ|исходящ|оборот|выручк|денеж|нетто|сальдо|разниц|получил|заплатил|поступ|РІС…РѕРґСЏС‰|РёСЃС…РѕРґСЏС‰|РѕР±РѕСЂРѕС‚|выручк|денеж)/iu); + if (valueFlowRetargetFollowup) { return true; } if (hasStandaloneAddressTopicSignal(rawText || repairedText)) { diff --git a/llm_normalizer/backend/src/services/addressFilterExtractor.ts b/llm_normalizer/backend/src/services/addressFilterExtractor.ts index e14e4eb..0bc0a97 100644 --- a/llm_normalizer/backend/src/services/addressFilterExtractor.ts +++ b/llm_normalizer/backend/src/services/addressFilterExtractor.ts @@ -81,6 +81,15 @@ const COUNTERPARTY_TOKEN_NOISE = new Set([ "показать", "скажи", "выведи", + "видно", + "документам", + "документами", + "движение", + "движения", + "движениям", + "операциям", + "проверить", + "проверь", "show", "list", "контра", @@ -108,7 +117,10 @@ function isCounterpartyFillerToken(token: string): boolean { if (/^(?:бл[яе]|блять|нах|нахуй|епт|ёпт|епта)$/iu.test(normalized)) { return true; } - if (/^(?:док(?:и|ам|ами|умент(?:ы|ов)?)?|docs?|docy|doci|doki|dokument(?:y|ov|am|a)?)$/iu.test(normalized)) { + if (/^(?:док(?:и|ам|ами|умент(?:ы|ов|ам|ами)?)?|docs?|docy|doci|doki|dokument(?:y|ov|am|a)?)$/iu.test(normalized)) { + return true; + } + if (/^(?:движени[еяям]*|операци[яиюеям]*|проверить|проверь|видно)$/iu.test(normalized)) { return true; } if (/^(?:pokazh?|pokazhi|pokaji|pokezh|kakie|kakoi|kakaya|est|za|po|na|s|vse|all|poka)$/iu.test(normalized)) { diff --git a/llm_normalizer/backend/src/services/addressIntentResolver.ts b/llm_normalizer/backend/src/services/addressIntentResolver.ts index 029a111..7602101 100644 --- a/llm_normalizer/backend/src/services/addressIntentResolver.ts +++ b/llm_normalizer/backend/src/services/addressIntentResolver.ts @@ -2147,8 +2147,26 @@ function hasBidirectionalValueFlowComparisonSignal(text: string): boolean { /(?:\u0434\u0435\u043d\u044c\u0433|\u0434\u0435\u043d\u0435\u0433|\u0434\u0435\u043d\u0435\u0436|\u043f\u043e\u0442\u043e\u043a|\u043e\u0431\u043e\u0440\u043e\u0442|money|cash|flow)/iu.test( normalized ); + const hasNetAmountCue = /(?:сколько|сумм|итог|нетто|сальдо|минус|net|total|sum)/iu.test(normalized); - return hasIncomingCue && hasOutgoingCue && hasComparisonCue && hasValueFlowCue; + return hasIncomingCue && hasOutgoingCue && hasComparisonCue && (hasValueFlowCue || hasNetAmountCue); +} + +function hasVatPeriodInspectionBridgeSignal(text: string): boolean { + const normalized = String(text ?? "").trim().toLowerCase(); + if (!/(?:ндс|vat)/iu.test(normalized)) { + return false; + } + const hasPeriodCue = + /(?:\b(?:19|20)\d{2}\b|за\s+(?:\d{4}|год|период|квартал|месяц|январ|феврал|март|апрел|ма[йя]|июн|июл|август|сентябр|октябр|ноябр|декабр)|\b[1-4]\s*(?:кв|квартал))/iu.test( + normalized + ); + const hasInspectionCue = + /(?:что\s+с|позици|основан|не\s+хватает|налогов[а-яё]*\s+вывод|вывод|декларац|книга\s+(?:продаж|покупок)|расшифр|разбор)/iu.test( + normalized + ); + const forecastOnlyCue = /(?:прогноз|план|примерн|ориентировочн)/iu.test(normalized) && !hasInspectionCue; + return hasPeriodCue && hasInspectionCue && !forecastOnlyCue; } function resolveUnicodeAddressIntentBridge(text: string): AddressIntentResolution | null { @@ -2381,6 +2399,14 @@ function resolveUnicodeAddressIntentBridge(text: string): AddressIntentResolutio ); } + if (hasVatPeriodInspectionBridgeSignal(normalized)) { + return unicodeBridgeResolution( + "vat_liability_confirmed_for_tax_period", + "high", + "vat_period_inspection_bridge_signal_detected" + ); + } + const inventoryBridgeIntent = resolveInventoryAddressIntent(normalized); if (inventoryBridgeIntent) { if (inventoryBridgeIntent.intent === "inventory_aging_by_purchase_date") { diff --git a/llm_normalizer/backend/src/services/address_runtime/decomposeStage.ts b/llm_normalizer/backend/src/services/address_runtime/decomposeStage.ts index 8549ead..484ead9 100644 --- a/llm_normalizer/backend/src/services/address_runtime/decomposeStage.ts +++ b/llm_normalizer/backend/src/services/address_runtime/decomposeStage.ts @@ -283,7 +283,15 @@ const FOLLOWUP_LOW_QUALITY_COUNTERPARTY_TOKENS = new Set([ "сводную", "сводном", "сводного", - "сводному" + "сводному", + "видно", + "документам", + "документами", + "движение", + "движения", + "движениям", + "проверить", + "проверь" ]); const FOLLOWUP_LOW_QUALITY_CONTRACT_TOKENS = new Set([ @@ -858,6 +866,11 @@ function mergeFollowupFilters( const hasFollowupSignal = hasAddressFollowupContextSignal(userMessage); const hasExplicitPeriodInMessage = hasExplicitPeriodLiteral(userMessage); const hasExplicitCurrentDateInMessage = hasExplicitCurrentDateHint(userMessage); + const currentHasExplicitTemporalScope = + hasExplicitPeriodWindow(merged) || + Boolean(toNonEmptyString(merged.as_of_date)) || + hasExplicitPeriodInMessage || + hasExplicitCurrentDateInMessage; const explicitQuotedItem = extractSelectedObjectItemFromFollowupText(userMessage); if (!toNonEmptyString(merged.organization) && previousOrganization) { merged.organization = previousOrganization; @@ -1196,6 +1209,7 @@ function mergeFollowupFilters( if ( !sameDateRequested && (intent === "inventory_on_hand_as_of_date" || intent === "inventory_supplier_stock_overlap_as_of_date") && + !currentHasExplicitTemporalScope && hasOpenItemsHint(userMessage) ) { const inheritedAsOfDate = previousAsOfDate ?? previousPeriodTo ?? previousPeriodFrom; diff --git a/llm_normalizer/backend/src/services/assistantMcpDiscoveryTurnInputAdapter.ts b/llm_normalizer/backend/src/services/assistantMcpDiscoveryTurnInputAdapter.ts index c717251..050d2f1 100644 --- a/llm_normalizer/backend/src/services/assistantMcpDiscoveryTurnInputAdapter.ts +++ b/llm_normalizer/backend/src/services/assistantMcpDiscoveryTurnInputAdapter.ts @@ -169,7 +169,28 @@ function isGarbageSemanticAnchorCandidate(value: string | null): boolean { "прокси", "proxy", "summary", - "overall" + "overall", + "деньги", + "денег", + "деньгам", + "деньгами", + "ооо", + "ип", + "ао", + "пао", + "зао", + "llc", + "inc", + "corp", + "документам", + "документами", + "движение", + "движения", + "движениям", + "операциям", + "проверить", + "проверь", + "видно" ]).has(compact) ) { return true; @@ -1099,6 +1120,21 @@ function rawEntityResolutionCandidate(text: string): string | null { return null; } +function rawScopedEntityCandidateFromText(text: string): string | null { + const source = repairAddressMojibakeText(String(text ?? "")); + const patterns = [ + /(?:^|[\s,.;:!?])(?:по|у|для|for|by)\s+([\p{L}\d._-]{2,})(?=$|[\s,.;:!?])/iu, + /(?:документ(?:ам|ы)?|движени(?:ям|я)?|операци(?:ям|и)?|плат[её]ж(?:ам|и)?)\s+([\p{L}\d._-]{2,})(?=$|[\s,.;:!?])/iu + ]; + for (const pattern of patterns) { + const candidate = normalizeEntityResolutionCandidate(source.match(pattern)?.[1] ?? ""); + if (candidate.length >= 2 && !isInvalidEntityCandidate(candidate)) { + return candidate; + } + } + return null; +} + function resolveEntityResolutionAmbiguityChoice(text: string, candidates: string[]): string | null { const normalizedText = canonicalizeEntityResolutionCandidate(text); if (!normalizedText || candidates.length <= 0) { @@ -1419,6 +1455,12 @@ export function buildAssistantMcpDiscoveryTurnInput( const rawMetadataScopeHint = rawMetadataSignal ? metadataScopeHintFromRawText(rawText) : null; const rawTopicSwitchSignal = hasExplicitTopicSwitchSignal(rawText); const rawEntityCandidate = rawEntityResolutionSignal ? rawEntityResolutionCandidate(rawEntitySourceText) : null; + const rawScopedEntityCandidate = + !predecomposeEntities.counterparty && + !predecomposeEntities.organization && + (rawValueFlowSignal || rawLifecycleSignal || rawMetadataSignal || rawBusinessOverviewSignal) + ? rawScopedEntityCandidateFromText(rawEntitySourceText) + : null; const entityResolutionClarificationCandidate = followupSeed.pilotScope === "entity_resolution_search_v1" && followupSeed.entityResolutionStatus === "ambiguous" @@ -1870,8 +1912,9 @@ export function buildAssistantMcpDiscoveryTurnInput( !metadataGroundedMovementLaneApplicable }); const explicitCurrentCounterpartyCandidate = - normalizedPredecomposeCounterparty && !isReferentialEntityPlaceholder(normalizedPredecomposeCounterparty) - ? normalizedPredecomposeCounterparty + (normalizedPredecomposeCounterparty ?? rawScopedEntityCandidate) && + !isReferentialEntityPlaceholder(normalizedPredecomposeCounterparty ?? rawScopedEntityCandidate ?? "") + ? normalizedPredecomposeCounterparty ?? rawScopedEntityCandidate : null; const explicitCurrentCounterpartyOverridesFollowupEntity = Boolean( explicitCurrentCounterpartyCandidate && @@ -1929,6 +1972,7 @@ export function buildAssistantMcpDiscoveryTurnInput( pushScopedEntityCandidate(entityCandidates, candidate, groundedFollowupEntity); } pushScopedEntityCandidate(entityCandidates, normalizedPredecomposeCounterparty, groundedFollowupEntity); + pushScopedEntityCandidate(entityCandidates, rawScopedEntityCandidate, groundedFollowupEntity); if (!groundedFollowupEntity) { if (!rawMetadataScopeOverridesFollowupEntity) { pushScopedEntityCandidate(entityCandidates, effectiveFollowupCounterparty, null); @@ -1950,7 +1994,7 @@ export function buildAssistantMcpDiscoveryTurnInput( pushUnique(entityCandidates, rawMetadataScopeHint); } const openScopeValueFlowWithoutCounterparty = - valueFlowSignal && !normalizedPredecomposeCounterparty && !effectiveFollowupCounterparty; + valueFlowSignal && !explicitCurrentCounterpartyCandidate && !effectiveFollowupCounterparty; const valueFlowOrganizationStaysScope = openScopeValueFlowWithoutCounterparty && Boolean( @@ -1962,7 +2006,7 @@ export function buildAssistantMcpDiscoveryTurnInput( followupSeed.organization ); const openScopeValueFlowWithoutResolvedCounterparty = Boolean( - valueFlowSignal && !normalizedPredecomposeCounterparty && !effectiveFollowupCounterparty + valueFlowSignal && !explicitCurrentCounterpartyCandidate && !effectiveFollowupCounterparty ); if (openScopeValueFlowWithoutCounterparty && !valueFlowOrganizationStaysScope) { pushUnique(entityCandidates, predecomposeEntities.organization); @@ -2409,6 +2453,9 @@ export function buildAssistantMcpDiscoveryTurnInput( ) { pushReason(reasonCodes, "mcp_discovery_counterparty_from_predecompose"); } + if (rawScopedEntityCandidate && !normalizedPredecomposeCounterparty) { + pushReason(reasonCodes, "mcp_discovery_counterparty_from_raw_scope"); + } if ( effectiveFollowupCounterparty && !rawEntitySearchOverridesStaleScope && diff --git a/llm_normalizer/backend/src/services/assistantService.ts b/llm_normalizer/backend/src/services/assistantService.ts index 726ba0c..c0a67fe 100644 --- a/llm_normalizer/backend/src/services/assistantService.ts +++ b/llm_normalizer/backend/src/services/assistantService.ts @@ -1383,13 +1383,22 @@ const ADDRESS_PREDECOMPOSE_NOISE_TOKENS = new Set([ "dokumenty", "документ", "документы", + "документам", + "документами", "документов", "банк", "банковские", "операции", + "операциям", + "движение", + "движения", + "движениям", "платеж", "платёж", "платежи", + "проверить", + "проверь", + "видно", "контрагент", "контрагенту", "контрагента", @@ -2571,10 +2580,10 @@ function hasAddressFollowupContextSignal(userMessage) { if (ultraShortFollowup && hasAny(/^(?:давай|показывай|показывыай|ещ[её]|also|again|go|ok|okay)(?=$|[\s,.;:!?])/iu)) { return true; } - const shortValueFlowRetargetCue = shortFollowup && + const valueFlowRetargetFollowup = minTokens <= 14 && (hasMarker() || hasPointer() || hasAny(/^(?:Р°|a|Рё|i|also|then|now)(?=$|[\s,.;:!?])/iu)) && - hasAny(/(?:нетто|сальдо|разниц|получил|заплатил|поступ|РІС…РѕРґСЏС‰|РёСЃС…РѕРґСЏС‰|РѕР±РѕСЂРѕС‚|выручк|денеж)/iu); - if (shortValueFlowRetargetCue) { + hasAny(/(?:нетто|сальдо|разниц|получил|заплатил|поступ|входящ|исходящ|оборот|выручк|денеж|нетто|сальдо|разниц|получил|заплатил|поступ|РІС…РѕРґСЏС‰|РёСЃС…РѕРґСЏС‰|РѕР±РѕСЂРѕС‚|выручк|денеж)/iu); + if (valueFlowRetargetFollowup) { return true; } if (hasStandaloneAddressTopicSignal(rawText || repairedText)) { diff --git a/llm_normalizer/backend/tests/addressFilterExtractorRegression.test.ts b/llm_normalizer/backend/tests/addressFilterExtractorRegression.test.ts index a212cf0..e6570e7 100644 --- a/llm_normalizer/backend/tests/addressFilterExtractorRegression.test.ts +++ b/llm_normalizer/backend/tests/addressFilterExtractorRegression.test.ts @@ -23,4 +23,25 @@ describe("address filter extractor regressions", () => { expect(extracted.extracted_filters.counterparty).toBe("\u0441\u0432\u043a"); expect(extracted.warnings).toContain("counterparty_anchor_derived_from_revenue_phrase"); }); + + it("drops document and movement service words as low-quality counterparty anchors", () => { + const documentNoise = extractAddressFilters( + "документы по контрагенту документам", + "list_documents_by_counterparty" + ); + const movementNoise = extractAddressFilters( + "Проверить движение по счетам или документам", + "list_documents_by_counterparty" + ); + const explicitCheckNoise = extractAddressFilters( + "документы по контрагенту Проверить", + "list_documents_by_counterparty" + ); + + expect(documentNoise.extracted_filters.counterparty).toBeUndefined(); + expect(documentNoise.warnings).toContain("counterparty_anchor_dropped_low_quality"); + expect(movementNoise.extracted_filters.counterparty).toBeUndefined(); + expect(explicitCheckNoise.extracted_filters.counterparty).toBeUndefined(); + expect(explicitCheckNoise.warnings).toContain("counterparty_anchor_dropped_low_quality"); + }); }); diff --git a/llm_normalizer/backend/tests/addressFollowupTemporalRegression.test.ts b/llm_normalizer/backend/tests/addressFollowupTemporalRegression.test.ts index b33ccb2..0963689 100644 --- a/llm_normalizer/backend/tests/addressFollowupTemporalRegression.test.ts +++ b/llm_normalizer/backend/tests/addressFollowupTemporalRegression.test.ts @@ -144,6 +144,31 @@ describe("address follow-up temporal regressions", () => { expect(result?.baseReasons).toContain("period_to_from_followup_context"); }); + it("does not inherit stale inventory as-of date over an explicit fresh snapshot date", () => { + const result = runAddressDecomposeStage( + "Покажи складской срез Альтернативы Плюс на 2026-04-16: что есть в остатках, какие самые заметные позиции, и что это говорит о бизнесе.", + { + previous_intent: "inventory_on_hand_as_of_date", + target_intent: "inventory_on_hand_as_of_date", + previous_filters: { + organization: "ООО Альтернатива Плюс", + period_from: "2020-01-01", + period_to: "2020-12-31", + as_of_date: "2020-12-31" + }, + previous_anchor_type: "organization", + previous_anchor_value: "ООО Альтернатива Плюс" + } + ); + + expect(result).not.toBeNull(); + expect(result?.intent.intent).toBe("inventory_on_hand_as_of_date"); + expect(result?.filters.extracted_filters.as_of_date).toBe("2026-04-16"); + expect(result?.filters.extracted_filters.period_from).toBe("2026-04-01"); + expect(result?.filters.extracted_filters.period_to).toBe("2026-04-30"); + expect(result?.baseReasons).not.toContain("as_of_date_from_open_items_followup_context"); + }); + it("retargets inventory purchase-date VAT bridge into confirmed VAT period with inherited purchase month", () => { const result = runAddressDecomposeStage("ндс можешь прикинуть на дату покупки рабочей станции?", { previous_intent: "inventory_purchase_provenance_for_item", @@ -240,4 +265,30 @@ describe("address follow-up temporal regressions", () => { expect(result?.filters.extracted_filters.period_from).toBeUndefined(); expect(result?.filters.extracted_filters.period_to).toBeUndefined(); }); + + it("replaces document and movement service-word anchors from counterparty follow-up context", () => { + const followupContext = { + previous_intent: "customer_revenue_and_payments" as const, + target_intent: "list_documents_by_counterparty" as const, + previous_filters: { + organization: "ООО Альтернатива Плюс", + counterparty: "Группа СВК", + period_from: "2020-01-01", + period_to: "2020-12-31" + }, + previous_anchor_type: "counterparty" as const, + previous_anchor_value: "Группа СВК", + resolved_counterparty_from_display: true + }; + + const documents = runAddressDecomposeStage("документы по контрагенту документам", followupContext); + const movements = runAddressDecomposeStage("Проверить движение по счетам или документам", followupContext); + + expect(documents?.intent.intent).toBe("list_documents_by_counterparty"); + expect(documents?.filters.extracted_filters.counterparty).toBe("Группа СВК"); + expect(documents?.baseReasons).toContain("counterparty_from_followup_context"); + expect(movements?.intent.intent).toBe("list_documents_by_counterparty"); + expect(movements?.filters.extracted_filters.counterparty).toBe("Группа СВК"); + expect(movements?.baseReasons).toContain("counterparty_from_followup_context"); + }); }); diff --git a/llm_normalizer/backend/tests/addressVatConfirmedRoute.test.ts b/llm_normalizer/backend/tests/addressVatConfirmedRoute.test.ts index c5cb31e..088ad50 100644 --- a/llm_normalizer/backend/tests/addressVatConfirmedRoute.test.ts +++ b/llm_normalizer/backend/tests/addressVatConfirmedRoute.test.ts @@ -28,6 +28,15 @@ describe("vat payable confirmed as-of route", () => { expect(result.reasons).toContain("vat_liability_colloquial_bridge_signal_detected"); }); + it("keeps VAT period-inspection wording out of inventory snapshot arbitration", () => { + const result = resolveAddressIntent( + "Что с НДС за 2020 год по Альтернативе Плюс: какая позиция видна, на чем она основана и чего не хватает для налогового вывода?" + ); + + expect(result.intent).toBe("vat_liability_confirmed_for_tax_period"); + expect(result.reasons).toContain("vat_period_inspection_bridge_signal_detected"); + }); + it("keeps VAT forecast intent when explicit forecast wording is used", () => { const result = resolveAddressIntent("мож прикинусь плиз скока ндс надо заплатить на 15 марта 2020 года"); expect(result.intent).toBe("vat_payable_forecast"); diff --git a/llm_normalizer/backend/tests/assistantMcpDiscoveryTurnInputAdapter.test.ts b/llm_normalizer/backend/tests/assistantMcpDiscoveryTurnInputAdapter.test.ts index 4b85b79..2341a5c 100644 --- a/llm_normalizer/backend/tests/assistantMcpDiscoveryTurnInputAdapter.test.ts +++ b/llm_normalizer/backend/tests/assistantMcpDiscoveryTurnInputAdapter.test.ts @@ -133,6 +133,36 @@ describe("assistant MCP discovery turn input adapter", () => { expect(result.reason_codes).not.toContain("mcp_discovery_payout_signal_detected"); }); + it("extracts compact scoped counterparty from net follow-up wording when LLM entities are empty", () => { + const orgName = "ООО Альтернатива Плюс"; + const result = buildAssistantMcpDiscoveryTurnInput({ + userMessage: "А какое нетто по СВК: сколько получили минус сколько заплатили?", + followupContext: { + previous_discovery_pilot_scope: "counterparty_value_flow_query_movements_v1", + previous_filters: { + organization: orgName, + period_from: "2020-01-01", + period_to: "2020-12-31" + } + } + }); + + expect(result.adapter_status).toBe("ready"); + expect(result.should_run_discovery).toBe(true); + expect(result.semantic_data_need).toBe("counterparty value-flow evidence"); + expect(result.turn_meaning_ref).toMatchObject({ + asked_domain_family: "counterparty_value", + asked_action_family: "net_value_flow", + explicit_entity_candidates: ["СВК"], + explicit_organization_scope: orgName, + explicit_date_scope: "2020", + unsupported_but_understood_family: "counterparty_bidirectional_value_flow_or_netting", + stale_replay_forbidden: true + }); + expect(result.reason_codes).toContain("mcp_discovery_counterparty_from_raw_scope"); + expect(result.reason_codes).toContain("mcp_discovery_bidirectional_value_flow_signal_detected"); + }); + it("overrides a supported exact current-turn payout route when the question asks for a payment amount", () => { const result = buildAssistantMcpDiscoveryTurnInput({ userMessage: