diff --git a/docs/ADDRESS/1.txt b/docs/ADDRESS/1.txt index d0fe72f..c769077 100644 --- a/docs/ADDRESS/1.txt +++ b/docs/ADDRESS/1.txt @@ -1,494 +1,33 @@ # Assistant conversation export -session_id: asst--iAIydc_EC +session_id: asst-HwpNV6T2XO export_mode: technical -exported_at: 2026-04-01T12:33:31.795Z +exported_at: 2026-04-01T16:22:16.775Z ## 1. user -message_id: msg-2d0MoaXT6W -created_at: 2026-04-01T12:32:28.939Z +message_id: msg-LXFxFvHMjy +created_at: 2026-04-01T16:20:29.759Z reply_type: n/a -Какой остаток по счету 60 на дату 2020-07-31? +Какой остаток по счету 60 на май 2020 ## 2. assistant -message_id: msg-MVrv4gqMpT -created_at: 2026-04-01T12:32:29.818Z -reply_type: partial_coverage -trace_id: address-Q_QdUVA19F - -В live-данных по текущему фильтру записи не найдены. -Причина: по выбранным фильтрам в live-выборке нет строк. -Что нужно уточнить: уточните период, контрагента, договор или снимите часть фильтров. - -### technical_debug_payload_json -```json -{ - "trace_id": "address-Q_QdUVA19F", - "prompt_version": "address_query_runtime_v1", - "schema_version": "address_query_runtime_v1", - "fallback_type": "partial", - "route_summary": null, - "fragments": [], - "requirements_extracted": [], - "coverage_report": { - "requirements_total": 0, - "requirements_covered": 0, - "requirements_uncovered": [], - "requirements_partially_covered": [], - "clarification_needed_for": [], - "out_of_scope_requirements": [] - }, - "routes": [], - "retrieval_status": [], - "retrieval_results": [], - "answer_grounding_check": { - "status": "partial", - "route_subject_match": true, - "missing_requirements": [], - "reasons": [ - "address_action_detected", - "address_entity_detected", - "aggregate_signal_detected", - "account_balance_signal_detected" - ], - "why_included_summary": [], - "selection_reason_summary": [] - }, - "dropped_intent_segments": [], - "detected_mode": "address_query", - "detected_mode_confidence": "high", - "query_shape": "AGGREGATE_LOOKUP", - "query_shape_confidence": "high", - "detected_intent": "account_balance_snapshot", - "detected_intent_confidence": "high", - "extracted_filters": { - "sort": "period_desc", - "limit": 20, - "account": "60", - "as_of_date": "2020-07-31" - }, - "missing_required_filters": [], - "selected_recipe": "address_movements_account_snapshot_v1", - "mcp_call_status_legacy": "raw_rows_received_but_not_materialized", - "account_scope_mode": "strict", - "account_scope_fallback_applied": false, - "anchor_type": "account", - "anchor_value_raw": "60", - "anchor_value_resolved": "60", - "resolver_confidence": "high", - "ambiguity_count": 0, - "match_failure_stage": "none", - "match_failure_reason": null, - "mcp_call_status": "raw_rows_received_but_not_materialized", - "rows_fetched": 20, - "raw_rows_received": 20, - "rows_after_account_scope": 0, - "rows_after_recipe_filter": 0, - "rows_materialized": 0, - "rows_matched": 0, - "raw_row_keys_sample": [ - "Период", - "Регистратор", - "СчетДт", - "СчетКт", - "Сумма", - "Period", - "Registrator", - "AccountDt", - "AccountKt", - "Amount" - ], - "materialization_drop_reason": "dropped_by_account_scope_filter", - "account_token_raw": "60", - "account_token_normalized": "60", - "account_scope_fields_checked": [ - "account_dt", - "account_kt", - "registrator", - "analytics" - ], - "account_scope_match_strategy": "account_code_regex_plus_alias_map_v1", - "account_scope_drop_reason": "no_rows_after_scope_filter", - "runtime_readiness": "LIVE_QUERYABLE_WITH_LIMITS", - "limited_reason_category": "empty_match", - "response_type": "LIMITED_WITH_REASON", - "answer_structure_v11": null, - "investigation_state_snapshot": null, - "normalized": null, - "normalizer_output": null -} -``` - -## 3. user -message_id: msg-tPODcsFT2j -created_at: 2026-04-01T12:32:43.787Z -reply_type: n/a - -Какой остаток по счету 76 на дату 2020-12-31? - -## 4. assistant -message_id: msg-sbyXymS8Tu -created_at: 2026-04-01T12:32:44.036Z -reply_type: partial_coverage -trace_id: address-tWfGlq9Bif - -В live-данных по текущему фильтру записи не найдены. -Причина: по выбранным фильтрам в live-выборке нет строк. -Что нужно уточнить: уточните период, контрагента, договор или снимите часть фильтров. - -### technical_debug_payload_json -```json -{ - "trace_id": "address-tWfGlq9Bif", - "prompt_version": "address_query_runtime_v1", - "schema_version": "address_query_runtime_v1", - "fallback_type": "partial", - "route_summary": null, - "fragments": [], - "requirements_extracted": [], - "coverage_report": { - "requirements_total": 0, - "requirements_covered": 0, - "requirements_uncovered": [], - "requirements_partially_covered": [], - "clarification_needed_for": [], - "out_of_scope_requirements": [] - }, - "routes": [], - "retrieval_status": [], - "retrieval_results": [], - "answer_grounding_check": { - "status": "partial", - "route_subject_match": true, - "missing_requirements": [], - "reasons": [ - "address_action_detected", - "address_entity_detected", - "aggregate_signal_detected", - "account_balance_signal_detected" - ], - "why_included_summary": [], - "selection_reason_summary": [] - }, - "dropped_intent_segments": [], - "detected_mode": "address_query", - "detected_mode_confidence": "high", - "query_shape": "AGGREGATE_LOOKUP", - "query_shape_confidence": "high", - "detected_intent": "account_balance_snapshot", - "detected_intent_confidence": "high", - "extracted_filters": { - "sort": "period_desc", - "limit": 20, - "account": "76", - "as_of_date": "2020-12-31" - }, - "missing_required_filters": [], - "selected_recipe": "address_movements_account_snapshot_v1", - "mcp_call_status_legacy": "raw_rows_received_but_not_materialized", - "account_scope_mode": "strict", - "account_scope_fallback_applied": false, - "anchor_type": "account", - "anchor_value_raw": "76", - "anchor_value_resolved": "76", - "resolver_confidence": "high", - "ambiguity_count": 0, - "match_failure_stage": "none", - "match_failure_reason": null, - "mcp_call_status": "raw_rows_received_but_not_materialized", - "rows_fetched": 20, - "raw_rows_received": 20, - "rows_after_account_scope": 0, - "rows_after_recipe_filter": 0, - "rows_materialized": 0, - "rows_matched": 0, - "raw_row_keys_sample": [ - "Период", - "Регистратор", - "СчетДт", - "СчетКт", - "Сумма", - "Period", - "Registrator", - "AccountDt", - "AccountKt", - "Amount" - ], - "materialization_drop_reason": "dropped_by_account_scope_filter", - "account_token_raw": "76", - "account_token_normalized": "76", - "account_scope_fields_checked": [ - "account_dt", - "account_kt", - "registrator", - "analytics" - ], - "account_scope_match_strategy": "account_code_regex_plus_alias_map_v1", - "account_scope_drop_reason": "no_rows_after_scope_filter", - "runtime_readiness": "LIVE_QUERYABLE_WITH_LIMITS", - "limited_reason_category": "empty_match", - "response_type": "LIMITED_WITH_REASON", - "answer_structure_v11": null, - "investigation_state_snapshot": null, - "normalized": null, - "normalizer_output": null -} -``` - -## 5. user -message_id: msg-_0vMruaosw -created_at: 2026-04-01T12:32:52.525Z -reply_type: n/a - -Какие документы формируют остаток по счету 62 на дату 2020-07-31? - -## 6. assistant -message_id: msg-vDgQf1_P6u -created_at: 2026-04-01T12:32:53.206Z -reply_type: partial_coverage -trace_id: address-D2al1c11rV - -В live-данных по текущему фильтру записи не найдены. -Причина: по выбранным фильтрам в live-выборке нет строк. -Что нужно уточнить: уточните период, контрагента, договор или снимите часть фильтров. - -### technical_debug_payload_json -```json -{ - "trace_id": "address-D2al1c11rV", - "prompt_version": "address_query_runtime_v1", - "schema_version": "address_query_runtime_v1", - "fallback_type": "partial", - "route_summary": null, - "fragments": [], - "requirements_extracted": [], - "coverage_report": { - "requirements_total": 0, - "requirements_covered": 0, - "requirements_uncovered": [], - "requirements_partially_covered": [], - "clarification_needed_for": [], - "out_of_scope_requirements": [] - }, - "routes": [], - "retrieval_status": [], - "retrieval_results": [], - "answer_grounding_check": { - "status": "partial", - "route_subject_match": true, - "missing_requirements": [], - "reasons": [ - "address_action_detected", - "address_entity_detected", - "drilldown_signal_detected", - "documents_forming_balance_signal_detected" - ], - "why_included_summary": [], - "selection_reason_summary": [] - }, - "dropped_intent_segments": [], - "detected_mode": "address_query", - "detected_mode_confidence": "high", - "query_shape": "DRILLDOWN_REQUEST", - "query_shape_confidence": "high", - "detected_intent": "documents_forming_balance", - "detected_intent_confidence": "high", - "extracted_filters": { - "sort": "period_desc", - "limit": 20, - "account": "62", - "as_of_date": "2020-07-31" - }, - "missing_required_filters": [], - "selected_recipe": "address_documents_forming_balance_v1", - "mcp_call_status_legacy": "raw_rows_received_but_not_materialized", - "account_scope_mode": "strict", - "account_scope_fallback_applied": false, - "anchor_type": "account", - "anchor_value_raw": "62", - "anchor_value_resolved": "62", - "resolver_confidence": "high", - "ambiguity_count": 0, - "match_failure_stage": "none", - "match_failure_reason": null, - "mcp_call_status": "raw_rows_received_but_not_materialized", - "rows_fetched": 20, - "raw_rows_received": 20, - "rows_after_account_scope": 0, - "rows_after_recipe_filter": 0, - "rows_materialized": 0, - "rows_matched": 0, - "raw_row_keys_sample": [ - "Период", - "Регистратор", - "СчетДт", - "СчетКт", - "Сумма", - "Period", - "Registrator", - "AccountDt", - "AccountKt", - "Amount" - ], - "materialization_drop_reason": "dropped_by_account_scope_filter", - "account_token_raw": "62", - "account_token_normalized": "62", - "account_scope_fields_checked": [ - "account_dt", - "account_kt", - "registrator", - "analytics" - ], - "account_scope_match_strategy": "account_code_regex_plus_alias_map_v1", - "account_scope_drop_reason": "no_rows_after_scope_filter", - "runtime_readiness": "LIVE_QUERYABLE_WITH_LIMITS", - "limited_reason_category": "empty_match", - "response_type": "LIMITED_WITH_REASON", - "answer_structure_v11": null, - "investigation_state_snapshot": null, - "normalized": null, - "normalizer_output": null -} -``` - -## 7. user -message_id: msg-n4DAnggWmb -created_at: 2026-04-01T12:33:02.902Z -reply_type: n/a - -Что формирует остаток по счету 76 на дату 2020-12-31? - -## 8. assistant -message_id: msg-46fXowQ8cs -created_at: 2026-04-01T12:33:03.352Z -reply_type: partial_coverage -trace_id: address-o7q7qa-FOF - -В live-данных по текущему фильтру записи не найдены. -Причина: по выбранным фильтрам в live-выборке нет строк. -Что нужно уточнить: уточните период, контрагента, договор или снимите часть фильтров. - -### technical_debug_payload_json -```json -{ - "trace_id": "address-o7q7qa-FOF", - "prompt_version": "address_query_runtime_v1", - "schema_version": "address_query_runtime_v1", - "fallback_type": "partial", - "route_summary": null, - "fragments": [], - "requirements_extracted": [], - "coverage_report": { - "requirements_total": 0, - "requirements_covered": 0, - "requirements_uncovered": [], - "requirements_partially_covered": [], - "clarification_needed_for": [], - "out_of_scope_requirements": [] - }, - "routes": [], - "retrieval_status": [], - "retrieval_results": [], - "answer_grounding_check": { - "status": "partial", - "route_subject_match": true, - "missing_requirements": [], - "reasons": [ - "address_action_detected", - "address_entity_detected", - "aggregate_signal_detected", - "account_balance_signal_detected" - ], - "why_included_summary": [], - "selection_reason_summary": [] - }, - "dropped_intent_segments": [], - "detected_mode": "address_query", - "detected_mode_confidence": "high", - "query_shape": "AGGREGATE_LOOKUP", - "query_shape_confidence": "high", - "detected_intent": "account_balance_snapshot", - "detected_intent_confidence": "high", - "extracted_filters": { - "sort": "period_desc", - "limit": 20, - "account": "76", - "as_of_date": "2020-12-31" - }, - "missing_required_filters": [], - "selected_recipe": "address_movements_account_snapshot_v1", - "mcp_call_status_legacy": "raw_rows_received_but_not_materialized", - "account_scope_mode": "strict", - "account_scope_fallback_applied": false, - "anchor_type": "account", - "anchor_value_raw": "76", - "anchor_value_resolved": "76", - "resolver_confidence": "high", - "ambiguity_count": 0, - "match_failure_stage": "none", - "match_failure_reason": null, - "mcp_call_status": "raw_rows_received_but_not_materialized", - "rows_fetched": 20, - "raw_rows_received": 20, - "rows_after_account_scope": 0, - "rows_after_recipe_filter": 0, - "rows_materialized": 0, - "rows_matched": 0, - "raw_row_keys_sample": [ - "Период", - "Регистратор", - "СчетДт", - "СчетКт", - "Сумма", - "Period", - "Registrator", - "AccountDt", - "AccountKt", - "Amount" - ], - "materialization_drop_reason": "dropped_by_account_scope_filter", - "account_token_raw": "76", - "account_token_normalized": "76", - "account_scope_fields_checked": [ - "account_dt", - "account_kt", - "registrator", - "analytics" - ], - "account_scope_match_strategy": "account_code_regex_plus_alias_map_v1", - "account_scope_drop_reason": "no_rows_after_scope_filter", - "runtime_readiness": "LIVE_QUERYABLE_WITH_LIMITS", - "limited_reason_category": "empty_match", - "response_type": "LIMITED_WITH_REASON", - "answer_structure_v11": null, - "investigation_state_snapshot": null, - "normalized": null, - "normalizer_output": null -} -``` - -## 9. user -message_id: msg-ATLBoTgwlD -created_at: 2026-04-01T12:33:20.966Z -reply_type: n/a - -Покажи банковские операции по контрагенту СВК с 2020-07-01 по 2020-07-31 - -## 10. assistant -message_id: msg-p1biGQOTcr -created_at: 2026-04-01T12:33:21.620Z +message_id: msg-f4QDcIh8Ra +created_at: 2026-04-01T16:20:35.415Z reply_type: factual -trace_id: address-JwD-3OLHgc +trace_id: address-wfjQbLaPF0 -Собран список банковских операций по контрагенту (live address lane). -Строк отобрано: 2. -1. 2020-07-30T12:00:00Z | Поступление на расчетный счет 00000000029 от 30.07.2020 12:00:00 | 0 / 0 | 20000 | аналитика: Группа СВК -2. 2020-07-27T12:00:00Z | Поступление на расчетный счет 00000000028 от 27.07.2020 12:00:00 | 0 / 0 | 40860 | аналитика: Группа СВК +Адресный срез по счету собран (по движениям live MCP). +Строк отобрано: 6. +Сумма по отобранным движениям: 849499. +1. 2020-05-22T10:14:43Z | Списание с расчетного счета 00000000148 от 22.05.2020 10:14:43 | 60.02 / 51 | 142299 +2. 2020-05-22T10:12:50Z | Списание с расчетного счета 00000000147 от 22.05.2020 10:12:50 | 60.01 / 51 | 75367 +3. 2020-05-22T10:12:50Z | Списание с расчетного счета 00000000147 от 22.05.2020 10:12:50 | 60.01 / 51 | 211833 +4. 2020-05-18T12:00:02Z | Списание с расчетного счета 00000000145 от 18.05.2020 12:00:02 | 60.01 / 51 | 187967 ### technical_debug_payload_json ```json { - "trace_id": "address-JwD-3OLHgc", + "trace_id": "address-wfjQbLaPF0", "prompt_version": "address_query_runtime_v1", "schema_version": "address_query_runtime_v1", "fallback_type": "none", @@ -513,8 +52,8 @@ trace_id: address-JwD-3OLHgc "reasons": [ "address_action_detected", "address_entity_detected", - "object_signal_detected", - "bank_ops_by_counterparty_signal_detected" + "aggregate_signal_detected", + "account_balance_signal_detected" ], "why_included_summary": [], "selection_reason_summary": [] @@ -522,50 +61,52 @@ trace_id: address-JwD-3OLHgc "dropped_intent_segments": [], "detected_mode": "address_query", "detected_mode_confidence": "high", - "query_shape": "OBJECT_LOOKUP", - "query_shape_confidence": "medium", - "detected_intent": "bank_operations_by_counterparty", - "detected_intent_confidence": "medium", + "query_shape": "AGGREGATE_LOOKUP", + "query_shape_confidence": "high", + "detected_intent": "account_balance_snapshot", + "detected_intent_confidence": "high", "extracted_filters": { "sort": "period_desc", "limit": 20, - "counterparty": "СВК", - "period_from": "2020-07-01", - "period_to": "2020-07-31" + "account": "60", + "period_from": "2020-05-01", + "period_to": "2020-05-31", + "as_of_date": "2020-05-31" }, "missing_required_filters": [], - "selected_recipe": "address_bank_operations_by_counterparty_v1", + "selected_recipe": "address_movements_account_snapshot_v1", "mcp_call_status_legacy": "matched_non_empty", - "account_scope_mode": "preferred", + "account_scope_mode": "strict", "account_scope_fallback_applied": false, - "anchor_type": "counterparty", - "anchor_value_raw": "СВК", - "anchor_value_resolved": "Группа СВК", - "resolver_confidence": "medium", + "anchor_type": "account", + "anchor_value_raw": "60", + "anchor_value_resolved": "60", + "resolver_confidence": "high", "ambiguity_count": 0, "match_failure_stage": "none", "match_failure_reason": null, "mcp_call_status": "matched_non_empty", - "rows_fetched": 19, - "raw_rows_received": 19, - "rows_after_account_scope": 3, - "rows_after_recipe_filter": 2, - "rows_materialized": 3, - "rows_matched": 2, + "rows_fetched": 6, + "raw_rows_received": 6, + "rows_after_account_scope": 6, + "rows_after_recipe_filter": 6, + "rows_materialized": 6, + "rows_matched": 6, "raw_row_keys_sample": [ "Период", "Регистратор", "СчетДт", "СчетКт", "Сумма", - "Контрагент", "Period", "Registrator", + "AccountDt", + "AccountKt", "Amount" ], "materialization_drop_reason": "none", - "account_token_raw": null, - "account_token_normalized": null, + "account_token_raw": "60", + "account_token_normalized": "60", "account_scope_fields_checked": [ "account_dt", "account_kt", @@ -573,13 +114,690 @@ trace_id: address-JwD-3OLHgc "analytics" ], "account_scope_match_strategy": "account_code_regex_plus_alias_map_v1", - "account_scope_drop_reason": "not_applicable", + "account_scope_drop_reason": "rows_remaining_after_scope_filter", "runtime_readiness": "LIVE_QUERYABLE_WITH_LIMITS", "limited_reason_category": null, - "response_type": "FACTUAL_LIST", + "response_type": "FACTUAL_SUMMARY", + "execution_lane": "address_query", + "llm_decomposition_applied": true, + "llm_decomposition_attempted": true, + "llm_provider_used": "local", + "llm_decomposition_trace_id": "i0pMbD6A0Kfwwe", + "llm_decomposition_effective_message": "Определить остаток по счету 60 на май 2020 года.", + "llm_decomposition_reason": "raw_fragment_applied", "answer_structure_v11": null, "investigation_state_snapshot": null, "normalized": null, - "normalizer_output": null + "normalizer_output": { + "trace_id": "i0pMbD6A0Kfwwe", + "prompt_version": "normalizer_v2_0_2", + "applied": true, + "effective_message": "Определить остаток по счету 60 на май 2020 года." + } +} +``` + +## 3. user +message_id: msg-bSI5HDsrEG +created_at: 2026-04-01T16:20:41.904Z +reply_type: n/a + +Какой остаток по счету 60 на 2020.05 + +## 4. assistant +message_id: msg-R9Bdu1_mwQ +created_at: 2026-04-01T16:20:47.626Z +reply_type: factual +trace_id: address-HGr18_l4u9 + +Адресный срез по счету собран (по движениям live MCP). +Строк отобрано: 6. +Сумма по отобранным движениям: 849499. +1. 2020-05-22T10:14:43Z | Списание с расчетного счета 00000000148 от 22.05.2020 10:14:43 | 60.02 / 51 | 142299 +2. 2020-05-22T10:12:50Z | Списание с расчетного счета 00000000147 от 22.05.2020 10:12:50 | 60.01 / 51 | 75367 +3. 2020-05-22T10:12:50Z | Списание с расчетного счета 00000000147 от 22.05.2020 10:12:50 | 60.01 / 51 | 211833 +4. 2020-05-18T12:00:02Z | Списание с расчетного счета 00000000145 от 18.05.2020 12:00:02 | 60.01 / 51 | 187967 + +### technical_debug_payload_json +```json +{ + "trace_id": "address-HGr18_l4u9", + "prompt_version": "address_query_runtime_v1", + "schema_version": "address_query_runtime_v1", + "fallback_type": "none", + "route_summary": null, + "fragments": [], + "requirements_extracted": [], + "coverage_report": { + "requirements_total": 0, + "requirements_covered": 0, + "requirements_uncovered": [], + "requirements_partially_covered": [], + "clarification_needed_for": [], + "out_of_scope_requirements": [] + }, + "routes": [], + "retrieval_status": [], + "retrieval_results": [], + "answer_grounding_check": { + "status": "grounded", + "route_subject_match": true, + "missing_requirements": [], + "reasons": [ + "address_action_detected", + "address_entity_detected", + "aggregate_signal_detected", + "account_balance_signal_detected" + ], + "why_included_summary": [], + "selection_reason_summary": [] + }, + "dropped_intent_segments": [], + "detected_mode": "address_query", + "detected_mode_confidence": "high", + "query_shape": "AGGREGATE_LOOKUP", + "query_shape_confidence": "high", + "detected_intent": "account_balance_snapshot", + "detected_intent_confidence": "high", + "extracted_filters": { + "sort": "period_desc", + "limit": 20, + "account": "60", + "period_from": "2020-05-01", + "period_to": "2020-05-31", + "as_of_date": "2020-05-31" + }, + "missing_required_filters": [], + "selected_recipe": "address_movements_account_snapshot_v1", + "mcp_call_status_legacy": "matched_non_empty", + "account_scope_mode": "strict", + "account_scope_fallback_applied": false, + "anchor_type": "account", + "anchor_value_raw": "60", + "anchor_value_resolved": "60", + "resolver_confidence": "high", + "ambiguity_count": 0, + "match_failure_stage": "none", + "match_failure_reason": null, + "mcp_call_status": "matched_non_empty", + "rows_fetched": 6, + "raw_rows_received": 6, + "rows_after_account_scope": 6, + "rows_after_recipe_filter": 6, + "rows_materialized": 6, + "rows_matched": 6, + "raw_row_keys_sample": [ + "Период", + "Регистратор", + "СчетДт", + "СчетКт", + "Сумма", + "Period", + "Registrator", + "AccountDt", + "AccountKt", + "Amount" + ], + "materialization_drop_reason": "none", + "account_token_raw": "60", + "account_token_normalized": "60", + "account_scope_fields_checked": [ + "account_dt", + "account_kt", + "registrator", + "analytics" + ], + "account_scope_match_strategy": "account_code_regex_plus_alias_map_v1", + "account_scope_drop_reason": "rows_remaining_after_scope_filter", + "runtime_readiness": "LIVE_QUERYABLE_WITH_LIMITS", + "limited_reason_category": null, + "response_type": "FACTUAL_SUMMARY", + "execution_lane": "address_query", + "llm_decomposition_applied": true, + "llm_decomposition_attempted": true, + "llm_provider_used": "local", + "llm_decomposition_trace_id": "F9Z2ar78WSI6_p", + "llm_decomposition_effective_message": "Определить остаток по счету 60 на конец мая 2020 года.", + "llm_decomposition_reason": "raw_fragment_applied", + "answer_structure_v11": null, + "investigation_state_snapshot": null, + "normalized": null, + "normalizer_output": { + "trace_id": "F9Z2ar78WSI6_p", + "prompt_version": "normalizer_v2_0_2", + "applied": true, + "effective_message": "Определить остаток по счету 60 на конец мая 2020 года." + } +} +``` + +## 5. user +message_id: msg-jG7p8LlecZ +created_at: 2026-04-01T16:20:54.188Z +reply_type: n/a + +Какой остаток по счету 60 на 2020 май + +## 6. assistant +message_id: msg-K1BNDw4OQ_ +created_at: 2026-04-01T16:20:59.871Z +reply_type: factual +trace_id: address-LDNpl6ws4o + +Адресный срез по счету собран (по движениям live MCP). +Строк отобрано: 6. +Сумма по отобранным движениям: 849499. +1. 2020-05-22T10:14:43Z | Списание с расчетного счета 00000000148 от 22.05.2020 10:14:43 | 60.02 / 51 | 142299 +2. 2020-05-22T10:12:50Z | Списание с расчетного счета 00000000147 от 22.05.2020 10:12:50 | 60.01 / 51 | 75367 +3. 2020-05-22T10:12:50Z | Списание с расчетного счета 00000000147 от 22.05.2020 10:12:50 | 60.01 / 51 | 211833 +4. 2020-05-18T12:00:02Z | Списание с расчетного счета 00000000145 от 18.05.2020 12:00:02 | 60.01 / 51 | 187967 + +### technical_debug_payload_json +```json +{ + "trace_id": "address-LDNpl6ws4o", + "prompt_version": "address_query_runtime_v1", + "schema_version": "address_query_runtime_v1", + "fallback_type": "none", + "route_summary": null, + "fragments": [], + "requirements_extracted": [], + "coverage_report": { + "requirements_total": 0, + "requirements_covered": 0, + "requirements_uncovered": [], + "requirements_partially_covered": [], + "clarification_needed_for": [], + "out_of_scope_requirements": [] + }, + "routes": [], + "retrieval_status": [], + "retrieval_results": [], + "answer_grounding_check": { + "status": "grounded", + "route_subject_match": true, + "missing_requirements": [], + "reasons": [ + "address_action_detected", + "address_entity_detected", + "aggregate_signal_detected", + "account_balance_signal_detected" + ], + "why_included_summary": [], + "selection_reason_summary": [] + }, + "dropped_intent_segments": [], + "detected_mode": "address_query", + "detected_mode_confidence": "high", + "query_shape": "AGGREGATE_LOOKUP", + "query_shape_confidence": "high", + "detected_intent": "account_balance_snapshot", + "detected_intent_confidence": "high", + "extracted_filters": { + "sort": "period_desc", + "limit": 20, + "account": "60", + "period_from": "2020-05-01", + "period_to": "2020-05-31", + "as_of_date": "2020-05-31" + }, + "missing_required_filters": [], + "selected_recipe": "address_movements_account_snapshot_v1", + "mcp_call_status_legacy": "matched_non_empty", + "account_scope_mode": "strict", + "account_scope_fallback_applied": false, + "anchor_type": "account", + "anchor_value_raw": "60", + "anchor_value_resolved": "60", + "resolver_confidence": "high", + "ambiguity_count": 0, + "match_failure_stage": "none", + "match_failure_reason": null, + "mcp_call_status": "matched_non_empty", + "rows_fetched": 6, + "raw_rows_received": 6, + "rows_after_account_scope": 6, + "rows_after_recipe_filter": 6, + "rows_materialized": 6, + "rows_matched": 6, + "raw_row_keys_sample": [ + "Период", + "Регистратор", + "СчетДт", + "СчетКт", + "Сумма", + "Period", + "Registrator", + "AccountDt", + "AccountKt", + "Amount" + ], + "materialization_drop_reason": "none", + "account_token_raw": "60", + "account_token_normalized": "60", + "account_scope_fields_checked": [ + "account_dt", + "account_kt", + "registrator", + "analytics" + ], + "account_scope_match_strategy": "account_code_regex_plus_alias_map_v1", + "account_scope_drop_reason": "rows_remaining_after_scope_filter", + "runtime_readiness": "LIVE_QUERYABLE_WITH_LIMITS", + "limited_reason_category": null, + "response_type": "FACTUAL_SUMMARY", + "execution_lane": "address_query", + "llm_decomposition_applied": true, + "llm_decomposition_attempted": true, + "llm_provider_used": "local", + "llm_decomposition_trace_id": "lT4cg7WUCFBLgw", + "llm_decomposition_effective_message": "Определить остаток по счету 60 на май 2020 года.", + "llm_decomposition_reason": "raw_fragment_applied", + "answer_structure_v11": null, + "investigation_state_snapshot": null, + "normalized": null, + "normalizer_output": { + "trace_id": "lT4cg7WUCFBLgw", + "prompt_version": "normalizer_v2_0_2", + "applied": true, + "effective_message": "Определить остаток по счету 60 на май 2020 года." + } +} +``` + +## 7. user +message_id: msg-yAb2z9q4f8 +created_at: 2026-04-01T16:21:27.684Z +reply_type: n/a + +Покажи сальдо по счету 60.01 на дату 2020-07-31 + +## 8. assistant +message_id: msg-phnVMmFBTg +created_at: 2026-04-01T16:21:33.413Z +reply_type: factual +trace_id: address-gZwPQVl-EQ + +Адресный срез по счету собран (по движениям live MCP). +Строк отобрано: 5. +Сумма по отобранным движениям: 216200. +1. 2020-07-13T12:00:02Z | Поступление товаров и услуг 00000000029 от 13.07.2020 12:00:02 | 41.01 / 60.01 | 74400 +2. 2020-07-13T12:00:02Z | Поступление товаров и услуг 00000000029 от 13.07.2020 12:00:02 | 41.01 / 60.01 | 57600 +3. 2020-07-13T12:00:02Z | Поступление товаров и услуг 00000000029 от 13.07.2020 12:00:02 | 41.01 / 60.01 | 42400 +4. 2020-07-13T12:00:02Z | Поступление товаров и услуг 00000000029 от 13.07.2020 12:00:02 | 41.01 / 60.01 | 34200 + +### technical_debug_payload_json +```json +{ + "trace_id": "address-gZwPQVl-EQ", + "prompt_version": "address_query_runtime_v1", + "schema_version": "address_query_runtime_v1", + "fallback_type": "none", + "route_summary": null, + "fragments": [], + "requirements_extracted": [], + "coverage_report": { + "requirements_total": 0, + "requirements_covered": 0, + "requirements_uncovered": [], + "requirements_partially_covered": [], + "clarification_needed_for": [], + "out_of_scope_requirements": [] + }, + "routes": [], + "retrieval_status": [], + "retrieval_results": [], + "answer_grounding_check": { + "status": "grounded", + "route_subject_match": true, + "missing_requirements": [], + "reasons": [ + "address_entity_detected", + "aggregate_signal_detected", + "account_balance_signal_detected" + ], + "why_included_summary": [], + "selection_reason_summary": [] + }, + "dropped_intent_segments": [], + "detected_mode": "address_query", + "detected_mode_confidence": "medium", + "query_shape": "AGGREGATE_LOOKUP", + "query_shape_confidence": "high", + "detected_intent": "account_balance_snapshot", + "detected_intent_confidence": "high", + "extracted_filters": { + "sort": "period_desc", + "limit": 20, + "account": "60.01", + "period_from": "2020-07-01", + "period_to": "2020-07-31", + "as_of_date": "2020-07-31" + }, + "missing_required_filters": [], + "selected_recipe": "address_movements_account_snapshot_v1", + "mcp_call_status_legacy": "matched_non_empty", + "account_scope_mode": "strict", + "account_scope_fallback_applied": false, + "anchor_type": "account", + "anchor_value_raw": "60.01", + "anchor_value_resolved": "60.01", + "resolver_confidence": "high", + "ambiguity_count": 0, + "match_failure_stage": "none", + "match_failure_reason": null, + "mcp_call_status": "matched_non_empty", + "rows_fetched": 5, + "raw_rows_received": 5, + "rows_after_account_scope": 5, + "rows_after_recipe_filter": 5, + "rows_materialized": 5, + "rows_matched": 5, + "raw_row_keys_sample": [ + "Период", + "Регистратор", + "СчетДт", + "СчетКт", + "Сумма", + "Period", + "Registrator", + "AccountDt", + "AccountKt", + "Amount" + ], + "materialization_drop_reason": "none", + "account_token_raw": "60.01", + "account_token_normalized": "60.1", + "account_scope_fields_checked": [ + "account_dt", + "account_kt", + "registrator", + "analytics" + ], + "account_scope_match_strategy": "account_code_regex_plus_alias_map_v1", + "account_scope_drop_reason": "rows_remaining_after_scope_filter", + "runtime_readiness": "LIVE_QUERYABLE_WITH_LIMITS", + "limited_reason_category": null, + "response_type": "FACTUAL_SUMMARY", + "execution_lane": "address_query", + "llm_decomposition_applied": true, + "llm_decomposition_attempted": true, + "llm_provider_used": "local", + "llm_decomposition_trace_id": "SCPdcYqxpPIQId", + "llm_decomposition_effective_message": "Показать сальдо по счету 60.01 на дату 2020-07-31", + "llm_decomposition_reason": "raw_fragment_applied", + "answer_structure_v11": null, + "investigation_state_snapshot": null, + "normalized": null, + "normalizer_output": { + "trace_id": "SCPdcYqxpPIQId", + "prompt_version": "normalizer_v2_0_2", + "applied": true, + "effective_message": "Показать сальдо по счету 60.01 на дату 2020-07-31" + } +} +``` + +## 9. user +message_id: msg-5feV35vJpK +created_at: 2026-04-01T16:21:45.871Z +reply_type: n/a + +Какие документы формируют остаток по счету 60.01 на дату 2020-07-31 + +## 10. assistant +message_id: msg-uNew3MaEgs +created_at: 2026-04-01T16:21:51.764Z +reply_type: factual +trace_id: address-MUotfXkAFf + +Адресный срез по счету собран (по движениям live MCP). +Строк отобрано: 5. +Сумма по отобранным движениям: 216200. +1. 2020-07-13T12:00:02Z | Поступление товаров и услуг 00000000029 от 13.07.2020 12:00:02 | 41.01 / 60.01 | 74400 +2. 2020-07-13T12:00:02Z | Поступление товаров и услуг 00000000029 от 13.07.2020 12:00:02 | 41.01 / 60.01 | 57600 +3. 2020-07-13T12:00:02Z | Поступление товаров и услуг 00000000029 от 13.07.2020 12:00:02 | 41.01 / 60.01 | 42400 +4. 2020-07-13T12:00:02Z | Поступление товаров и услуг 00000000029 от 13.07.2020 12:00:02 | 41.01 / 60.01 | 34200 + +### technical_debug_payload_json +```json +{ + "trace_id": "address-MUotfXkAFf", + "prompt_version": "address_query_runtime_v1", + "schema_version": "address_query_runtime_v1", + "fallback_type": "none", + "route_summary": null, + "fragments": [], + "requirements_extracted": [], + "coverage_report": { + "requirements_total": 0, + "requirements_covered": 0, + "requirements_uncovered": [], + "requirements_partially_covered": [], + "clarification_needed_for": [], + "out_of_scope_requirements": [] + }, + "routes": [], + "retrieval_status": [], + "retrieval_results": [], + "answer_grounding_check": { + "status": "grounded", + "route_subject_match": true, + "missing_requirements": [], + "reasons": [ + "address_action_detected", + "address_entity_detected", + "document_list_signal_detected", + "account_balance_signal_detected" + ], + "why_included_summary": [], + "selection_reason_summary": [] + }, + "dropped_intent_segments": [], + "detected_mode": "address_query", + "detected_mode_confidence": "high", + "query_shape": "DOCUMENT_LIST", + "query_shape_confidence": "medium", + "detected_intent": "account_balance_snapshot", + "detected_intent_confidence": "high", + "extracted_filters": { + "sort": "period_desc", + "limit": 20, + "account": "60.01", + "period_from": "2020-07-01", + "period_to": "2020-07-31", + "as_of_date": "2020-07-31" + }, + "missing_required_filters": [], + "selected_recipe": "address_movements_account_snapshot_v1", + "mcp_call_status_legacy": "matched_non_empty", + "account_scope_mode": "strict", + "account_scope_fallback_applied": false, + "anchor_type": "account", + "anchor_value_raw": "60.01", + "anchor_value_resolved": "60.01", + "resolver_confidence": "high", + "ambiguity_count": 0, + "match_failure_stage": "none", + "match_failure_reason": null, + "mcp_call_status": "matched_non_empty", + "rows_fetched": 5, + "raw_rows_received": 5, + "rows_after_account_scope": 5, + "rows_after_recipe_filter": 5, + "rows_materialized": 5, + "rows_matched": 5, + "raw_row_keys_sample": [ + "Период", + "Регистратор", + "СчетДт", + "СчетКт", + "Сумма", + "Period", + "Registrator", + "AccountDt", + "AccountKt", + "Amount" + ], + "materialization_drop_reason": "none", + "account_token_raw": "60.01", + "account_token_normalized": "60.1", + "account_scope_fields_checked": [ + "account_dt", + "account_kt", + "registrator", + "analytics" + ], + "account_scope_match_strategy": "account_code_regex_plus_alias_map_v1", + "account_scope_drop_reason": "rows_remaining_after_scope_filter", + "runtime_readiness": "LIVE_QUERYABLE_WITH_LIMITS", + "limited_reason_category": null, + "response_type": "FACTUAL_SUMMARY", + "execution_lane": "address_query", + "llm_decomposition_applied": true, + "llm_decomposition_attempted": true, + "llm_provider_used": "local", + "llm_decomposition_trace_id": "K3_LT-taCw3G6_", + "llm_decomposition_effective_message": "Показать документы, формирующие остаток по счету 60.01 на дату 2020-07-31", + "llm_decomposition_reason": "raw_fragment_applied", + "answer_structure_v11": null, + "investigation_state_snapshot": null, + "normalized": null, + "normalizer_output": { + "trace_id": "K3_LT-taCw3G6_", + "prompt_version": "normalizer_v2_0_2", + "applied": true, + "effective_message": "Показать документы, формирующие остаток по счету 60.01 на дату 2020-07-31" + } +} +``` + +## 11. user +message_id: msg-jLJS0_9JeA +created_at: 2026-04-01T16:22:02.102Z +reply_type: n/a + +Покажи банковские операции по контрагенту СВК за 2020 год + +## 12. assistant +message_id: msg-6ITp6xDQ5N +created_at: 2026-04-01T16:22:08.109Z +reply_type: partial_coverage +trace_id: address-UkUDVPyDcS + +Для точного адресного поиска не хватает обязательного якоря. +Причина: якорь контрагента/договора не найден в материализованных live-строках. +Что нужно уточнить: уточните контрагента точным именем или добавьте ИНН/договор. + +### technical_debug_payload_json +```json +{ + "trace_id": "address-UkUDVPyDcS", + "prompt_version": "address_query_runtime_v1", + "schema_version": "address_query_runtime_v1", + "fallback_type": "partial", + "route_summary": null, + "fragments": [], + "requirements_extracted": [], + "coverage_report": { + "requirements_total": 0, + "requirements_covered": 0, + "requirements_uncovered": [], + "requirements_partially_covered": [], + "clarification_needed_for": [], + "out_of_scope_requirements": [] + }, + "routes": [], + "retrieval_status": [], + "retrieval_results": [], + "answer_grounding_check": { + "status": "partial", + "route_subject_match": true, + "missing_requirements": [], + "reasons": [ + "address_entity_detected", + "object_signal_detected", + "account_balance_signal_detected" + ], + "why_included_summary": [], + "selection_reason_summary": [] + }, + "dropped_intent_segments": [], + "detected_mode": "address_query", + "detected_mode_confidence": "medium", + "query_shape": "OBJECT_LOOKUP", + "query_shape_confidence": "medium", + "detected_intent": "account_balance_snapshot", + "detected_intent_confidence": "high", + "extracted_filters": { + "sort": "period_desc", + "limit": 20, + "account": "51", + "counterparty": "СВК", + "period_from": "2020-01-01", + "period_to": "2020-12-31", + "as_of_date": "2020-12-31" + }, + "missing_required_filters": [], + "selected_recipe": "address_movements_account_snapshot_v1", + "mcp_call_status_legacy": "materialized_but_not_matched", + "account_scope_mode": "strict", + "account_scope_fallback_applied": false, + "anchor_type": "account", + "anchor_value_raw": "51", + "anchor_value_resolved": "51", + "resolver_confidence": "high", + "ambiguity_count": 0, + "match_failure_stage": "materialized_but_not_anchor_matched", + "match_failure_reason": "counterparty_anchor_not_matched_in_materialized_rows", + "mcp_call_status": "materialized_but_not_anchor_matched", + "rows_fetched": 200, + "raw_rows_received": 200, + "rows_after_account_scope": 200, + "rows_after_recipe_filter": 0, + "rows_materialized": 200, + "rows_matched": 0, + "raw_row_keys_sample": [ + "Период", + "Регистратор", + "СчетДт", + "СчетКт", + "Сумма", + "Period", + "Registrator", + "AccountDt", + "AccountKt", + "Amount" + ], + "materialization_drop_reason": "none", + "account_token_raw": "51", + "account_token_normalized": "51", + "account_scope_fields_checked": [ + "account_dt", + "account_kt", + "registrator", + "analytics" + ], + "account_scope_match_strategy": "account_code_regex_plus_alias_map_v1", + "account_scope_drop_reason": "rows_remaining_after_scope_filter", + "runtime_readiness": "LIVE_QUERYABLE_WITH_LIMITS", + "limited_reason_category": "missing_anchor", + "response_type": "LIMITED_WITH_REASON", + "execution_lane": "address_query", + "llm_decomposition_applied": true, + "llm_decomposition_attempted": true, + "llm_provider_used": "local", + "llm_decomposition_trace_id": "0cLF7I15mPRopo", + "llm_decomposition_effective_message": "Показать банковские операции (счета 51, 62) для контрагента СВК за период 2020 год.", + "llm_decomposition_reason": "raw_fragment_applied", + "answer_structure_v11": null, + "investigation_state_snapshot": null, + "normalized": null, + "normalizer_output": { + "trace_id": "0cLF7I15mPRopo", + "prompt_version": "normalizer_v2_0_2", + "applied": true, + "effective_message": "Показать банковские операции (счета 51, 62) для контрагента СВК за период 2020 год." + } } ``` diff --git a/docs/ADDRESS/address_query/README.md b/docs/ADDRESS/address_query/README.md index aa2101a..ba9acce 100644 --- a/docs/ADDRESS/address_query/README.md +++ b/docs/ADDRESS/address_query/README.md @@ -25,4 +25,5 @@ - `docs/ADDRESS/runs/2026-03-29_Address_Query_Runtime_V1_M2_3B_AccountScope_Mode_Tuning/` - `docs/ADDRESS/runs/2026-03-29_Address_Query_Runtime_V1_M2_3C_Resolver_Filter_Tuning_And_AccountScope_Audit/` - `docs/ADDRESS/runs/2026-04-01_Address_Query_Runtime_V1_M2_3D_Query_Variants_Expansion/` +- `docs/ADDRESS/runs/2026-04-01_Address_Query_Runtime_V1_M2_3E_Stability_Hardening_AccountQueryScope/` diff --git a/docs/ADDRESS/runs/2026-04-01_Address_Query_Runtime_V1_M2_3E_Stability_Hardening_AccountQueryScope/README.md b/docs/ADDRESS/runs/2026-04-01_Address_Query_Runtime_V1_M2_3E_Stability_Hardening_AccountQueryScope/README.md new file mode 100644 index 0000000..2b8c934 --- /dev/null +++ b/docs/ADDRESS/runs/2026-04-01_Address_Query_Runtime_V1_M2_3E_Stability_Hardening_AccountQueryScope/README.md @@ -0,0 +1,59 @@ +# Address Query Runtime V1 - M2.3e Stability Hardening (Noise + Account Query Scope) + +Date: 2026-04-01 (updated) + +## Goal + +Finalize the stage before local LLM switch with production-safe stability: + +- robust noisy phrase handling; +- deterministic month/account filtering; +- safer LLM pre-decompose integration without semantic drift. + +## Implemented + +1. Noise hardening for counterparty extraction: +- slang/noise tails (`плс`, `pls`, profanity markers) are ignored as anchors; +- free-text heuristic for counterparty in document/bank requests without strict `по `. + +2. Relaxed year extraction: +- compact forms like `20 год` -> `2020-01-01..2020-12-31`; +- standalone year mentions in noisy phrasing. + +3. Month period hardening for balance intents: +- equivalent mapping for: + - `на май 2020` + - `на 2020.05` + - `на 2020 май` +- all forms now converge to `period_from=2020-05-01`, `period_to=2020-05-31`, `as_of_date=2020-05-31`. + +4. Account query scope hardening in recipe: +- account condition injected into movements query using account code fields; +- removed fragile presentation-based filtering path for account subaccounts. + +5. LLM pre-decompose salvage path: +- when strict normalized payload has no usable fragments, runtime now extracts a safe fragment from raw model JSON; +- decomposition reason now can be `raw_fragment_applied` instead of always falling back to deterministic parsing. + +6. Intent routing hardening against LLM drift: +- `bank_operations_by_counterparty` has higher priority than generic account fallback when party/bank signals are present; +- strengthened Russian phrasing detection for `documents_forming_balance` (including participle forms like `формирующие остаток`). + +7. Test coverage expansion: +- month parsing regressions (`на май 2020`, `на 2020.05`, `на 2020 май`); +- bank-ops intent priority regression with injected account hints; +- documents-forming-balance participle phrasing regression; +- existing noisy-query and account-scope regressions retained. + +## Verification + +- `npx vitest tests/addressQueryRuntimeM23.test.ts` -> PASS (`46` tests) +- `npx vitest tests/assistantAddressFollowupContext.test.ts` -> PASS (`1` test) +- `npm run build` -> PASS + +## Notes + +- Architecture remains hybrid and stable: deterministic parser + guarded heuristics + optional LLM pre-decompose. +- LLM pre-decompose is now resilient to schema drift in local model output. +- No free-form query builder added. +- No deep-analysis lane changes. diff --git a/docs/ADDRESS/runs/2026-04-01_Address_Query_Runtime_V1_M2_3E_Stability_Hardening_AccountQueryScope/before_after_metrics.json b/docs/ADDRESS/runs/2026-04-01_Address_Query_Runtime_V1_M2_3E_Stability_Hardening_AccountQueryScope/before_after_metrics.json new file mode 100644 index 0000000..963a316 --- /dev/null +++ b/docs/ADDRESS/runs/2026-04-01_Address_Query_Runtime_V1_M2_3E_Stability_Hardening_AccountQueryScope/before_after_metrics.json @@ -0,0 +1,29 @@ +{ + "run_id": "2026-04-01_Address_Query_Runtime_V1_M2_3E_Stability_Hardening_AccountQueryScope", + "comparison": { + "baseline_ref": "2026-04-01_Address_Query_Runtime_V1_M2_3D_Query_Variants_Expansion", + "current_ref": "workspace (after M2.3e stability update)" + }, + "metrics": { + "address_m23_test_cases": { + "before": 39, + "after": 46, + "delta": 7 + }, + "address_m23_test_failures": { + "before": 0, + "after": 0, + "delta": 0 + }, + "llm_predecompose_signal": { + "before": "attempted_but_often_no_usable_fragment", + "after": "raw_fragment_salvage_available", + "delta": "stability_improved" + } + }, + "notes": [ + "delta includes month-format parsing regressions and intent-priority regressions", + "added fallback extraction from raw local LLM JSON when strict normalized fragments are empty", + "added regression checks for: 'на 2020.05', 'на 2020 май', bank ops with account hints" + ] +} diff --git a/docs/ADDRESS/runs/2026-04-01_Address_Query_Runtime_V1_M2_3E_Stability_Hardening_AccountQueryScope/changed_files.txt b/docs/ADDRESS/runs/2026-04-01_Address_Query_Runtime_V1_M2_3E_Stability_Hardening_AccountQueryScope/changed_files.txt new file mode 100644 index 0000000..71b689a --- /dev/null +++ b/docs/ADDRESS/runs/2026-04-01_Address_Query_Runtime_V1_M2_3E_Stability_Hardening_AccountQueryScope/changed_files.txt @@ -0,0 +1,11 @@ +llm_normalizer/backend/src/services/addressFilterExtractor.ts +llm_normalizer/backend/src/services/addressIntentResolver.ts +llm_normalizer/backend/src/services/addressRecipeCatalog.ts +llm_normalizer/backend/src/services/assistantService.ts +llm_normalizer/backend/tests/addressQueryRuntimeM23.test.ts + +docs/ADDRESS/runs/2026-04-01_Address_Query_Runtime_V1_M2_3E_Stability_Hardening_AccountQueryScope/README.md +docs/ADDRESS/runs/2026-04-01_Address_Query_Runtime_V1_M2_3E_Stability_Hardening_AccountQueryScope/run_summary.json +docs/ADDRESS/runs/2026-04-01_Address_Query_Runtime_V1_M2_3E_Stability_Hardening_AccountQueryScope/before_after_metrics.json +docs/ADDRESS/runs/2026-04-01_Address_Query_Runtime_V1_M2_3E_Stability_Hardening_AccountQueryScope/smoke_checks.md +docs/ADDRESS/runs/2026-04-01_Address_Query_Runtime_V1_M2_3E_Stability_Hardening_AccountQueryScope/changed_files.txt diff --git a/docs/ADDRESS/runs/2026-04-01_Address_Query_Runtime_V1_M2_3E_Stability_Hardening_AccountQueryScope/run_summary.json b/docs/ADDRESS/runs/2026-04-01_Address_Query_Runtime_V1_M2_3E_Stability_Hardening_AccountQueryScope/run_summary.json new file mode 100644 index 0000000..3af909b --- /dev/null +++ b/docs/ADDRESS/runs/2026-04-01_Address_Query_Runtime_V1_M2_3E_Stability_Hardening_AccountQueryScope/run_summary.json @@ -0,0 +1,43 @@ +{ + "run_id": "2026-04-01_Address_Query_Runtime_V1_M2_3E_Stability_Hardening_AccountQueryScope", + "date": "2026-04-01", + "stage": "Address Query Runtime V1", + "wave": "M2.3e", + "goal": "Stability hardening before local LLM switch: noisy phrase robustness + account/query period determinism + LLM pre-decompose resilience", + "status": "COMPLETED_UPDATED", + "scope": { + "new_intents": false, + "deep_analysis_changes": false, + "focus": [ + "noisy phrase anchor stability", + "relaxed year extraction", + "month format normalization (month-year/year-month/year month)", + "account query scope in movements recipe", + "LLM pre-decompose raw-fragment salvage", + "intent priority hardening for bank ops vs account fallback", + "regression/unit coverage expansion" + ] + }, + "checks": { + "test_command_primary": "npx vitest tests/addressQueryRuntimeM23.test.ts", + "test_command_secondary": "npx vitest tests/assistantAddressFollowupContext.test.ts", + "build_command": "npm run build", + "tests_passed": 47, + "tests_failed": 0 + }, + "guardrails": { + "false_factual_rate_target": 0, + "free_form_query_builder": "not_added", + "whitelist_recipe_policy": "unchanged" + }, + "key_changes": { + "noise_tokens_not_used_as_anchor": true, + "free_text_counterparty_heuristic": true, + "compact_year_support": true, + "month_year_variants_supported": true, + "movements_query_account_condition_injected": true, + "llm_predecompose_raw_salvage": true, + "intent_priority_bank_ops_over_account_fallback": true, + "documents_forming_balance_participle_detection": true + } +} diff --git a/docs/ADDRESS/runs/2026-04-01_Address_Query_Runtime_V1_M2_3E_Stability_Hardening_AccountQueryScope/smoke_checks.md b/docs/ADDRESS/runs/2026-04-01_Address_Query_Runtime_V1_M2_3E_Stability_Hardening_AccountQueryScope/smoke_checks.md new file mode 100644 index 0000000..f0ae657 --- /dev/null +++ b/docs/ADDRESS/runs/2026-04-01_Address_Query_Runtime_V1_M2_3E_Stability_Hardening_AccountQueryScope/smoke_checks.md @@ -0,0 +1,23 @@ +# Smoke Checks + +## Backend tests + +- Command: `npx vitest tests/addressQueryRuntimeM23.test.ts` +- Result: PASS +- Details: `1 passed file`, `46 passed tests`, `0 failed` + +- Command: `npx vitest tests/assistantAddressFollowupContext.test.ts` +- Result: PASS +- Details: `1 passed file`, `1 passed test`, `0 failed` + +## Build + +- Command: `npm run build` +- Result: PASS +- Details: TypeScript build completed without errors. + +## Manual spot-check (address lane) + +- File: `docs/ADDRESS/1.txt` +- Result: month-variant account questions produce consistent May-2020 window. +- Result: last bank-operations question now covered by intent-priority hardening (expected recipe: bank ops by counterparty). diff --git a/llm_normalizer/backend/dist/services/addressFilterExtractor.js b/llm_normalizer/backend/dist/services/addressFilterExtractor.js index b2068c2..821677a 100644 --- a/llm_normalizer/backend/dist/services/addressFilterExtractor.js +++ b/llm_normalizer/backend/dist/services/addressFilterExtractor.js @@ -14,8 +14,10 @@ const YEAR_RANGE_LOOSE_PATTERN = /\b(20\d{2})\b\s*(?:[-‐‑‒–—―−]|д const YEAR_PERIOD_PATTERN = /(?:за|for)\s*(20\d{2})(?!\s*(?:[-‐‑‒–—―−]|до|to|по)\s*20\d{2})\s*(?:г(?:од|ода)?\.?|year)?/iu; const YEAR_PERIOD_SHORT_PATTERN = /(?:^|[\s,.;:!?()\-])(\d{2})\s*(?:г(?:од|ода)?\.?|year)(?=$|[\s,.;:!?()\-])/iu; const YEAR_PERIOD_ANY_PATTERN = /(?:^|[\s,.;:!?()\-])((?:19|20)\d{2})(?!\s*(?:[-‐‑‒–—―−]|до|to|по)\s*(?:19|20)\d{2})(?![.\/-]\d)(?:\s*(?:г(?:од|ода)?\.?|year))?(?=$|[\s,.;:!?()\-])/iu; -const MONTH_PERIOD_NUMERIC_PATTERN = /(?:за|for)\s*(0?[1-9]|1[0-2])[.\/-](20\d{2})/i; -const MONTH_PERIOD_NAME_PATTERN = /(?:за|for)\s+([a-zа-яё]+)\s+(20\d{2})(?:\s*г(?:од|ода|\\.)?)?/iu; +const MONTH_PERIOD_NUMERIC_MONTH_YEAR_PATTERN = /(?:^|[\s,.;:!?()\-])(?:за|for|на|in)?\s*(0?[1-9]|1[0-2])[.\/-](20\d{2})(?=$|[\s,.;:!?()\-])/iu; +const MONTH_PERIOD_NUMERIC_YEAR_MONTH_PATTERN = /(?:^|[\s,.;:!?()\-])(?:за|for|на|in)?\s*(20\d{2})[.\/-](0?[1-9]|1[0-2])(?=$|[\s,.;:!?()\-])/iu; +const MONTH_PERIOD_NAME_PATTERN = /(?:^|[\s,.;:!?()\-])(?:за|for|на|in)?\s*([a-zа-яё]+)\s+(20\d{2})(?:\s*г(?:од|ода|\\.)?)?(?=$|[\s,.;:!?()\-])/iu; +const MONTH_PERIOD_NAME_YEAR_FIRST_PATTERN = /(?:^|[\s,.;:!?()\-])(?:за|for|на|in)?\s*(20\d{2})(?:\s*г(?:од|ода|\\.)?)?\s+([a-zа-яё]+)(?=$|[\s,.;:!?()\-])/iu; function toIsoDate(year, month, day) { if (!Number.isInteger(year) || !Number.isInteger(month) || !Number.isInteger(day)) { return null; @@ -100,10 +102,22 @@ function resolveMonthByName(rawMonthName) { return undefined; } function extractMonthPeriod(text) { - const numericMatch = text.match(MONTH_PERIOD_NUMERIC_PATTERN); - if (numericMatch) { - const month = Number(numericMatch[1]); - const year = Number(numericMatch[2]); + const numericMonthYearMatch = text.match(MONTH_PERIOD_NUMERIC_MONTH_YEAR_PATTERN); + if (numericMonthYearMatch) { + const month = Number(numericMonthYearMatch[1]); + const year = Number(numericMonthYearMatch[2]); + if (month >= 1 && month <= 12 && year >= 2000 && year <= 2099) { + const lastDay = new Date(Date.UTC(year, month, 0)).getUTCDate(); + return { + period_from: `${year}-${String(month).padStart(2, "0")}-01`, + period_to: `${year}-${String(month).padStart(2, "0")}-${String(lastDay).padStart(2, "0")}` + }; + } + } + const numericYearMonthMatch = text.match(MONTH_PERIOD_NUMERIC_YEAR_MONTH_PATTERN); + if (numericYearMonthMatch) { + const year = Number(numericYearMonthMatch[1]); + const month = Number(numericYearMonthMatch[2]); if (month >= 1 && month <= 12 && year >= 2000 && year <= 2099) { const lastDay = new Date(Date.UTC(year, month, 0)).getUTCDate(); return { @@ -124,6 +138,18 @@ function extractMonthPeriod(text) { }; } } + const byNameYearFirstMatch = text.match(MONTH_PERIOD_NAME_YEAR_FIRST_PATTERN); + if (byNameYearFirstMatch) { + const year = Number(byNameYearFirstMatch[1]); + const month = resolveMonthByName(String(byNameYearFirstMatch[2])); + if (month && year >= 2000 && year <= 2099) { + const lastDay = new Date(Date.UTC(year, month, 0)).getUTCDate(); + return { + period_from: `${year}-${String(month).padStart(2, "0")}-01`, + period_to: `${year}-${String(month).padStart(2, "0")}-${String(lastDay).padStart(2, "0")}` + }; + } + } return {}; } function extractPeriodRange(text) { diff --git a/llm_normalizer/backend/dist/services/addressIntentResolver.js b/llm_normalizer/backend/dist/services/addressIntentResolver.js index c6ba454..9489f03 100644 --- a/llm_normalizer/backend/dist/services/addressIntentResolver.js +++ b/llm_normalizer/backend/dist/services/addressIntentResolver.js @@ -100,6 +100,19 @@ const BANK_OPERATIONS_BY_COUNTERPARTY_HINTS = [ function hasAny(text, patterns) { return patterns.some((item) => text.includes(item)); } +function hasDocumentsFormingBalanceSignal(text) { + if (hasAny(text, DOCUMENTS_FORMING_BALANCE_HINTS)) { + return true; + } + const hasDocLexeme = text.includes("документ") || text.includes("доки"); + const hasFormingLexeme = text.includes("формир"); + const hasBalanceLexeme = text.includes("остат"); + const hasAccountLexeme = text.includes("счет") || text.includes("счёт") || hasAccountNumberAnchor(text); + if (hasDocLexeme && hasFormingLexeme && hasBalanceLexeme && hasAccountLexeme) { + return true; + } + return hasBalanceLexeme && hasAccountLexeme && text.includes("из чего состоит"); +} function isLikelyCounterpartyToken(rawToken) { const token = String(rawToken ?? "").trim().toLowerCase(); if (!token || token.length < 2) { @@ -276,20 +289,13 @@ function resolveAddressIntent(userMessage) { reasons: ["payables_signal_detected"] }; } - if (hasAny(text, DOCUMENTS_FORMING_BALANCE_HINTS) && (hasAccountNumberAnchor(text) || text.includes("счет"))) { + if (hasDocumentsFormingBalanceSignal(text) && (hasAccountNumberAnchor(text) || text.includes("счет"))) { return { intent: "documents_forming_balance", confidence: "high", reasons: ["documents_forming_balance_signal_detected"] }; } - if (hasAny(text, ACCOUNT_BALANCE_HINTS) || hasAccountNumberAnchor(text)) { - return { - intent: "account_balance_snapshot", - confidence: "high", - reasons: ["account_balance_signal_detected"] - }; - } if (hasAny(text, BANK_OPERATIONS_BY_COUNTERPARTY_HINTS) && (hasPartyAnchorMention(text) || hasLooseByAnchorMention(text) || hasHeuristicCounterpartyAnchor(text))) { return { @@ -309,6 +315,13 @@ function resolveAddressIntent(userMessage) { reasons: ["documents_by_counterparty_signal_detected"] }; } + if (hasAny(text, ACCOUNT_BALANCE_HINTS) || hasAccountNumberAnchor(text)) { + return { + intent: "account_balance_snapshot", + confidence: "high", + reasons: ["account_balance_signal_detected"] + }; + } if (hasLooseByAnchorMention(text) && hasGenericAddressLookupSignal(text)) { return { intent: "list_documents_by_counterparty", diff --git a/llm_normalizer/backend/dist/services/addressRecipeCatalog.js b/llm_normalizer/backend/dist/services/addressRecipeCatalog.js index 632fff1..13091ff 100644 --- a/llm_normalizer/backend/dist/services/addressRecipeCatalog.js +++ b/llm_normalizer/backend/dist/services/addressRecipeCatalog.js @@ -140,7 +140,7 @@ function toDateTimeExpr(isoDate, endOfDay) { const second = endOfDay ? 59 : 0; return `ДАТАВРЕМЯ(${year}, ${month}, ${day}, ${hour}, ${minute}, ${second})`; } -function buildWhereClause(filters, fieldPath) { +function buildWhereClause(filters, fieldPath, extraConditions = []) { const periodFromExpr = typeof filters.period_from === "string" && filters.period_from.trim().length > 0 ? toDateTimeExpr(filters.period_from, false) : null; @@ -150,20 +150,71 @@ function buildWhereClause(filters, fieldPath) { const asOfExpr = typeof filters.as_of_date === "string" && filters.as_of_date.trim().length > 0 ? toDateTimeExpr(filters.as_of_date, true) : null; + const conditions = []; if (periodFromExpr && periodToExpr) { - return `ГДЕ\n ${fieldPath} МЕЖДУ ${periodFromExpr} И ${periodToExpr}`; + conditions.push(`${fieldPath} МЕЖДУ ${periodFromExpr} И ${periodToExpr}`); } - if (periodFromExpr) { - return `ГДЕ\n ${fieldPath} >= ${periodFromExpr}`; + else if (periodFromExpr) { + conditions.push(`${fieldPath} >= ${periodFromExpr}`); } - if (periodToExpr) { - return `ГДЕ\n ${fieldPath} <= ${periodToExpr}`; + else if (periodToExpr) { + conditions.push(`${fieldPath} <= ${periodToExpr}`); } - if (asOfExpr) { - return `ГДЕ\n ${fieldPath} <= ${asOfExpr}`; + else if (asOfExpr) { + conditions.push(`${fieldPath} <= ${asOfExpr}`); + } + for (const condition of extraConditions) { + const value = String(condition ?? "").trim(); + if (value) { + conditions.push(value); + } + } + if (conditions.length > 0) { + return `ГДЕ\n ${conditions.join("\n И ")}`; } return ""; } +function normalizeAccountTokenForQuery(value) { + const source = String(value ?? "").trim().replace(",", "."); + const match = source.match(/^(\d{2})(?:\.(\d{1,2}))?/); + if (!match) { + return source; + } + const base = match[1]; + if (!match[2]) { + return base; + } + return `${base}.${match[2]}`; +} +function buildMovementAccountCondition(filters) { + const raw = typeof filters.account === "string" ? filters.account.trim() : ""; + if (!raw) { + return null; + } + const normalized = normalizeAccountTokenForQuery(raw); + const match = normalized.match(/^(\d{2})(?:\.(\d{1,2}))?/); + if (!match) { + return null; + } + const base = match[1]; + const subRaw = match[2] ?? null; + const patterns = new Set(); + if (!subRaw) { + patterns.add(base); + } + else { + patterns.add(`${base}.${subRaw}`); + patterns.add(`${base}.${String(Number(subRaw))}`); + } + const clauses = Array.from(patterns) + .map((pattern) => pattern.trim()) + .filter((pattern) => pattern.length > 0) + .map((pattern) => `(Движения.СчетДт.Код ПОДОБНО "${pattern}%" ИЛИ Движения.СчетКт.Код ПОДОБНО "${pattern}%")`); + if (clauses.length === 0) { + return null; + } + return clauses.length === 1 ? clauses[0] : `(${clauses.join(" ИЛИ ")})`; +} function shouldBoostLimitForAllTimeCounterparty(filters) { const hasCounterparty = typeof filters.counterparty === "string" && filters.counterparty.trim().length > 0; if (!hasCounterparty) { @@ -224,7 +275,14 @@ function buildAddressRecipePlan(recipe, filters) { .replaceAll("__LIMIT__", String(resolvedLimit)) .replace("__WHERE_OUT__", buildWhereClause(filters, "БанкСписание.Дата")) .replace("__WHERE_IN__", buildWhereClause(filters, "БанкПоступление.Дата")) - : MOVEMENTS_QUERY_TEMPLATE.replace("__LIMIT__", String(resolvedLimit)).replace("__WHERE_CLAUSE__", buildWhereClause(filters, "Движения.Период")); + : MOVEMENTS_QUERY_TEMPLATE.replace("__LIMIT__", String(resolvedLimit)).replace("__WHERE_CLAUSE__", (() => { + const extraConditions = []; + const accountCondition = buildMovementAccountCondition(filters); + if (accountCondition) { + extraConditions.push(accountCondition); + } + return buildWhereClause(filters, "Движения.Период", extraConditions); + })()); return { recipe, query, diff --git a/llm_normalizer/backend/dist/services/assistantService.js b/llm_normalizer/backend/dist/services/assistantService.js index 920ff19..6136180 100644 --- a/llm_normalizer/backend/dist/services/assistantService.js +++ b/llm_normalizer/backend/dist/services/assistantService.js @@ -1923,6 +1923,119 @@ function extractAddressQuestionFromNormalized(normalized) { } return null; } +function stripMarkdownJsonFence(text) { + return String(text ?? "") + .trim() + .replace(/^```json\s*/i, "") + .replace(/^```\s*/i, "") + .replace(/```$/i, "") + .trim(); +} +function safeParseLooseJson(text) { + const fenced = stripMarkdownJsonFence(text); + if (!fenced) { + return null; + } + try { + return JSON.parse(fenced); + } + catch (_error) { + // Local OpenAI-compatible models often wrap JSON with extra text. + // Try extracting the first top-level JSON object defensively. + const start = fenced.indexOf("{"); + const end = fenced.lastIndexOf("}"); + if (start < 0 || end < 0 || end <= start) { + return null; + } + const candidate = fenced.slice(start, end + 1).trim(); + try { + return JSON.parse(candidate); + } + catch (_nestedError) { + return null; + } + } +} +function extractOutputTextFromRawNormalizerOutput(raw) { + if (!raw || typeof raw !== "object") { + return null; + } + const source = raw; + if (typeof source.output_text === "string" && source.output_text.trim().length > 0) { + return source.output_text; + } + if (Array.isArray(source.output)) { + for (const item of source.output) { + if (!item || typeof item !== "object") { + continue; + } + const content = item.content; + if (!Array.isArray(content)) { + continue; + } + for (const block of content) { + if (!block || typeof block !== "object") { + continue; + } + if (typeof block.text === "string" && block.text.trim().length > 0) { + return block.text; + } + } + } + } + if (source.response && typeof source.response === "object") { + const nested = source.response; + if (typeof nested.output_text === "string" && nested.output_text.trim().length > 0) { + return nested.output_text; + } + } + if (Array.isArray(source.choices) && source.choices.length > 0) { + const first = source.choices[0]; + if (first && typeof first === "object" && first.message && typeof first.message === "object") { + const message = first.message; + if (typeof message.content === "string" && message.content.trim().length > 0) { + return message.content; + } + } + } + return null; +} +function extractAddressQuestionFromRawNormalizerOutput(rawModelOutput) { + const outputText = extractOutputTextFromRawNormalizerOutput(rawModelOutput); + if (!outputText) { + return null; + } + const parsed = safeParseLooseJson(outputText); + if (!parsed || typeof parsed !== "object") { + return null; + } + const source = parsed; + const fragments = Array.isArray(source.fragments) ? source.fragments : []; + for (const item of fragments) { + if (!item || typeof item !== "object") { + continue; + } + const fragment = item; + const domainRelevance = fragment.domain_relevance; + if (typeof domainRelevance === "string" && domainRelevance.trim().toLowerCase() === "out_of_scope") { + continue; + } + if (domainRelevance === false) { + continue; + } + const readiness = String(fragment.execution_readiness ?? "").trim().toLowerCase(); + if (readiness === "no_route") { + continue; + } + const normalizedText = toNonEmptyString(fragment.normalized_fragment_text); + const rawText = toNonEmptyString(fragment.raw_fragment_text); + const candidate = compactWhitespace(normalizedText ?? rawText ?? ""); + if (candidate.length >= 3 && candidate.length <= 500) { + return candidate; + } + } + return null; +} async function runAddressLlmPreDecompose(normalizerService, payload, userMessage) { const provider = payload?.llmProvider === "local" ? "local" : payload?.llmProvider === "openai" ? "openai" : null; const baseMeta = { @@ -1960,8 +2073,10 @@ async function runAddressLlmPreDecompose(normalizerService, payload, userMessage }; try { const normalized = await normalizerService.normalize(normalizePayload); - const candidate = extractAddressQuestionFromNormalized(normalized?.normalized); - if (!normalized?.ok || !candidate) { + const candidateFromNormalized = extractAddressQuestionFromNormalized(normalized?.normalized); + const candidateFromRaw = candidateFromNormalized ? null : extractAddressQuestionFromRawNormalizerOutput(normalized?.raw_model_output); + const candidate = candidateFromNormalized ?? candidateFromRaw; + if (!candidate) { return { ...baseMeta, attempted: true, @@ -1972,13 +2087,25 @@ async function runAddressLlmPreDecompose(normalizerService, payload, userMessage const sourceCompact = compactWhitespace(String(userMessage ?? "").toLowerCase()); const candidateCompact = compactWhitespace(candidate.toLowerCase()); const applied = sourceCompact !== candidateCompact; + const candidateSource = candidateFromNormalized ? "normalized" : "raw"; + const reason = candidateSource === "normalized" + ? applied + ? "normalized_fragment_applied" + : "normalized_fragment_same" + : normalized?.ok + ? applied + ? "raw_fragment_applied" + : "raw_fragment_same" + : applied + ? "raw_fragment_applied_after_normalize_failed" + : "raw_fragment_same_after_normalize_failed"; return { attempted: true, applied, provider, traceId: normalized?.trace_id ?? null, effectiveMessage: applied ? candidate : userMessage, - reason: applied ? "normalized_fragment_applied" : "normalized_fragment_same" + reason }; } catch (error) { diff --git a/llm_normalizer/backend/src/services/addressFilterExtractor.ts b/llm_normalizer/backend/src/services/addressFilterExtractor.ts index f3e91b5..e9f27a4 100644 --- a/llm_normalizer/backend/src/services/addressFilterExtractor.ts +++ b/llm_normalizer/backend/src/services/addressFilterExtractor.ts @@ -18,8 +18,14 @@ const YEAR_PERIOD_PATTERN = const YEAR_PERIOD_SHORT_PATTERN = /(?:^|[\s,.;:!?()\-])(\d{2})\s*(?:г(?:од|ода)?\.?|year)(?=$|[\s,.;:!?()\-])/iu; const YEAR_PERIOD_ANY_PATTERN = /(?:^|[\s,.;:!?()\-])((?:19|20)\d{2})(?!\s*(?:[-‐‑‒–—―−]|до|to|по)\s*(?:19|20)\d{2})(?![.\/-]\d)(?:\s*(?:г(?:од|ода)?\.?|year))?(?=$|[\s,.;:!?()\-])/iu; -const MONTH_PERIOD_NUMERIC_PATTERN = /(?:за|for)\s*(0?[1-9]|1[0-2])[.\/-](20\d{2})/i; -const MONTH_PERIOD_NAME_PATTERN = /(?:за|for)\s+([a-zа-яё]+)\s+(20\d{2})(?:\s*г(?:од|ода|\\.)?)?/iu; +const MONTH_PERIOD_NUMERIC_MONTH_YEAR_PATTERN = + /(?:^|[\s,.;:!?()\-])(?:за|for|на|in)?\s*(0?[1-9]|1[0-2])[.\/-](20\d{2})(?=$|[\s,.;:!?()\-])/iu; +const MONTH_PERIOD_NUMERIC_YEAR_MONTH_PATTERN = + /(?:^|[\s,.;:!?()\-])(?:за|for|на|in)?\s*(20\d{2})[.\/-](0?[1-9]|1[0-2])(?=$|[\s,.;:!?()\-])/iu; +const MONTH_PERIOD_NAME_PATTERN = + /(?:^|[\s,.;:!?()\-])(?:за|for|на|in)?\s*([a-zа-яё]+)\s+(20\d{2})(?:\s*г(?:од|ода|\\.)?)?(?=$|[\s,.;:!?()\-])/iu; +const MONTH_PERIOD_NAME_YEAR_FIRST_PATTERN = + /(?:^|[\s,.;:!?()\-])(?:за|for|на|in)?\s*(20\d{2})(?:\s*г(?:од|ода|\\.)?)?\s+([a-zа-яё]+)(?=$|[\s,.;:!?()\-])/iu; function toIsoDate(year: number, month: number, day: number): string | null { if (!Number.isInteger(year) || !Number.isInteger(month) || !Number.isInteger(day)) { @@ -101,10 +107,23 @@ function resolveMonthByName(rawMonthName: string): number | undefined { } function extractMonthPeriod(text: string): { period_from?: string; period_to?: string } { - const numericMatch = text.match(MONTH_PERIOD_NUMERIC_PATTERN); - if (numericMatch) { - const month = Number(numericMatch[1]); - const year = Number(numericMatch[2]); + const numericMonthYearMatch = text.match(MONTH_PERIOD_NUMERIC_MONTH_YEAR_PATTERN); + if (numericMonthYearMatch) { + const month = Number(numericMonthYearMatch[1]); + const year = Number(numericMonthYearMatch[2]); + if (month >= 1 && month <= 12 && year >= 2000 && year <= 2099) { + const lastDay = new Date(Date.UTC(year, month, 0)).getUTCDate(); + return { + period_from: `${year}-${String(month).padStart(2, "0")}-01`, + period_to: `${year}-${String(month).padStart(2, "0")}-${String(lastDay).padStart(2, "0")}` + }; + } + } + + const numericYearMonthMatch = text.match(MONTH_PERIOD_NUMERIC_YEAR_MONTH_PATTERN); + if (numericYearMonthMatch) { + const year = Number(numericYearMonthMatch[1]); + const month = Number(numericYearMonthMatch[2]); if (month >= 1 && month <= 12 && year >= 2000 && year <= 2099) { const lastDay = new Date(Date.UTC(year, month, 0)).getUTCDate(); return { @@ -127,6 +146,19 @@ function extractMonthPeriod(text: string): { period_from?: string; period_to?: s } } + const byNameYearFirstMatch = text.match(MONTH_PERIOD_NAME_YEAR_FIRST_PATTERN); + if (byNameYearFirstMatch) { + const year = Number(byNameYearFirstMatch[1]); + const month = resolveMonthByName(String(byNameYearFirstMatch[2])); + if (month && year >= 2000 && year <= 2099) { + const lastDay = new Date(Date.UTC(year, month, 0)).getUTCDate(); + return { + period_from: `${year}-${String(month).padStart(2, "0")}-01`, + period_to: `${year}-${String(month).padStart(2, "0")}-${String(lastDay).padStart(2, "0")}` + }; + } + } + return {}; } diff --git a/llm_normalizer/backend/src/services/addressIntentResolver.ts b/llm_normalizer/backend/src/services/addressIntentResolver.ts index 145966f..861b471 100644 --- a/llm_normalizer/backend/src/services/addressIntentResolver.ts +++ b/llm_normalizer/backend/src/services/addressIntentResolver.ts @@ -108,6 +108,20 @@ function hasAny(text: string, patterns: string[]): boolean { return patterns.some((item) => text.includes(item)); } +function hasDocumentsFormingBalanceSignal(text: string): boolean { + if (hasAny(text, DOCUMENTS_FORMING_BALANCE_HINTS)) { + return true; + } + const hasDocLexeme = text.includes("документ") || text.includes("доки"); + const hasFormingLexeme = text.includes("формир"); + const hasBalanceLexeme = text.includes("остат"); + const hasAccountLexeme = text.includes("счет") || text.includes("счёт") || hasAccountNumberAnchor(text); + if (hasDocLexeme && hasFormingLexeme && hasBalanceLexeme && hasAccountLexeme) { + return true; + } + return hasBalanceLexeme && hasAccountLexeme && text.includes("из чего состоит"); +} + function isLikelyCounterpartyToken(rawToken: string): boolean { const token = String(rawToken ?? "").trim().toLowerCase(); if (!token || token.length < 2) { @@ -307,7 +321,7 @@ export function resolveAddressIntent(userMessage: string): AddressIntentResoluti }; } - if (hasAny(text, DOCUMENTS_FORMING_BALANCE_HINTS) && (hasAccountNumberAnchor(text) || text.includes("счет"))) { + if (hasDocumentsFormingBalanceSignal(text) && (hasAccountNumberAnchor(text) || text.includes("счет"))) { return { intent: "documents_forming_balance", confidence: "high", @@ -315,14 +329,6 @@ export function resolveAddressIntent(userMessage: string): AddressIntentResoluti }; } - if (hasAny(text, ACCOUNT_BALANCE_HINTS) || hasAccountNumberAnchor(text)) { - return { - intent: "account_balance_snapshot", - confidence: "high", - reasons: ["account_balance_signal_detected"] - }; - } - if ( hasAny(text, BANK_OPERATIONS_BY_COUNTERPARTY_HINTS) && (hasPartyAnchorMention(text) || hasLooseByAnchorMention(text) || hasHeuristicCounterpartyAnchor(text)) @@ -348,6 +354,14 @@ export function resolveAddressIntent(userMessage: string): AddressIntentResoluti }; } + if (hasAny(text, ACCOUNT_BALANCE_HINTS) || hasAccountNumberAnchor(text)) { + return { + intent: "account_balance_snapshot", + confidence: "high", + reasons: ["account_balance_signal_detected"] + }; + } + if (hasLooseByAnchorMention(text) && hasGenericAddressLookupSignal(text)) { return { intent: "list_documents_by_counterparty", diff --git a/llm_normalizer/backend/src/services/addressRecipeCatalog.ts b/llm_normalizer/backend/src/services/addressRecipeCatalog.ts index e294767..8454ed8 100644 --- a/llm_normalizer/backend/src/services/addressRecipeCatalog.ts +++ b/llm_normalizer/backend/src/services/addressRecipeCatalog.ts @@ -156,7 +156,7 @@ function toDateTimeExpr(isoDate: string, endOfDay: boolean): string | null { return `ДАТАВРЕМЯ(${year}, ${month}, ${day}, ${hour}, ${minute}, ${second})`; } -function buildWhereClause(filters: AddressFilterSet, fieldPath: string): string { +function buildWhereClause(filters: AddressFilterSet, fieldPath: string, extraConditions: string[] = []): string { const periodFromExpr = typeof filters.period_from === "string" && filters.period_from.trim().length > 0 ? toDateTimeExpr(filters.period_from, false) @@ -170,22 +170,76 @@ function buildWhereClause(filters: AddressFilterSet, fieldPath: string): string ? toDateTimeExpr(filters.as_of_date, true) : null; + const conditions: string[] = []; if (periodFromExpr && periodToExpr) { - return `ГДЕ\n ${fieldPath} МЕЖДУ ${periodFromExpr} И ${periodToExpr}`; + conditions.push(`${fieldPath} МЕЖДУ ${periodFromExpr} И ${periodToExpr}`); + } else if (periodFromExpr) { + conditions.push(`${fieldPath} >= ${periodFromExpr}`); + } else if (periodToExpr) { + conditions.push(`${fieldPath} <= ${periodToExpr}`); + } else if (asOfExpr) { + conditions.push(`${fieldPath} <= ${asOfExpr}`); } - if (periodFromExpr) { - return `ГДЕ\n ${fieldPath} >= ${periodFromExpr}`; - } - if (periodToExpr) { - return `ГДЕ\n ${fieldPath} <= ${periodToExpr}`; - } - if (asOfExpr) { - return `ГДЕ\n ${fieldPath} <= ${asOfExpr}`; + for (const condition of extraConditions) { + const value = String(condition ?? "").trim(); + if (value) { + conditions.push(value); + } } + if (conditions.length > 0) { + return `ГДЕ\n ${conditions.join("\n И ")}`; + } return ""; } +function normalizeAccountTokenForQuery(value: string): string { + const source = String(value ?? "").trim().replace(",", "."); + const match = source.match(/^(\d{2})(?:\.(\d{1,2}))?/); + if (!match) { + return source; + } + const base = match[1]; + if (!match[2]) { + return base; + } + return `${base}.${match[2]}`; +} + +function buildMovementAccountCondition(filters: AddressFilterSet): string | null { + const raw = typeof filters.account === "string" ? filters.account.trim() : ""; + if (!raw) { + return null; + } + const normalized = normalizeAccountTokenForQuery(raw); + const match = normalized.match(/^(\d{2})(?:\.(\d{1,2}))?/); + if (!match) { + return null; + } + const base = match[1]; + const subRaw = match[2] ?? null; + + const patterns = new Set(); + if (!subRaw) { + patterns.add(base); + } else { + patterns.add(`${base}.${subRaw}`); + patterns.add(`${base}.${String(Number(subRaw))}`); + } + + const clauses = Array.from(patterns) + .map((pattern) => pattern.trim()) + .filter((pattern) => pattern.length > 0) + .map( + (pattern) => + `(Движения.СчетДт.Код ПОДОБНО "${pattern}%" ИЛИ Движения.СчетКт.Код ПОДОБНО "${pattern}%")` + ); + if (clauses.length === 0) { + return null; + } + return clauses.length === 1 ? clauses[0] : `(${clauses.join(" ИЛИ ")})`; +} + function shouldBoostLimitForAllTimeCounterparty(filters: AddressFilterSet): boolean { const hasCounterparty = typeof filters.counterparty === "string" && filters.counterparty.trim().length > 0; if (!hasCounterparty) { @@ -262,10 +316,14 @@ export function buildAddressRecipePlan( .replaceAll("__LIMIT__", String(resolvedLimit)) .replace("__WHERE_OUT__", buildWhereClause(filters, "БанкСписание.Дата")) .replace("__WHERE_IN__", buildWhereClause(filters, "БанкПоступление.Дата")) - : MOVEMENTS_QUERY_TEMPLATE.replace("__LIMIT__", String(resolvedLimit)).replace( - "__WHERE_CLAUSE__", - buildWhereClause(filters, "Движения.Период") - ); + : MOVEMENTS_QUERY_TEMPLATE.replace("__LIMIT__", String(resolvedLimit)).replace("__WHERE_CLAUSE__", (() => { + const extraConditions: string[] = []; + const accountCondition = buildMovementAccountCondition(filters); + if (accountCondition) { + extraConditions.push(accountCondition); + } + return buildWhereClause(filters, "Движения.Период", extraConditions); + })()); return { recipe, diff --git a/llm_normalizer/backend/src/services/assistantService.ts b/llm_normalizer/backend/src/services/assistantService.ts index b19b76b..da16776 100644 --- a/llm_normalizer/backend/src/services/assistantService.ts +++ b/llm_normalizer/backend/src/services/assistantService.ts @@ -1885,6 +1885,119 @@ function extractAddressQuestionFromNormalized(normalized) { } return null; } +function stripMarkdownJsonFence(text) { + return String(text ?? "") + .trim() + .replace(/^```json\s*/i, "") + .replace(/^```\s*/i, "") + .replace(/```$/i, "") + .trim(); +} +function safeParseLooseJson(text) { + const fenced = stripMarkdownJsonFence(text); + if (!fenced) { + return null; + } + try { + return JSON.parse(fenced); + } + catch (_error) { + // Local OpenAI-compatible models often wrap JSON with extra text. + // Try extracting the first top-level JSON object defensively. + const start = fenced.indexOf("{"); + const end = fenced.lastIndexOf("}"); + if (start < 0 || end < 0 || end <= start) { + return null; + } + const candidate = fenced.slice(start, end + 1).trim(); + try { + return JSON.parse(candidate); + } + catch (_nestedError) { + return null; + } + } +} +function extractOutputTextFromRawNormalizerOutput(raw) { + if (!raw || typeof raw !== "object") { + return null; + } + const source = raw; + if (typeof source.output_text === "string" && source.output_text.trim().length > 0) { + return source.output_text; + } + if (Array.isArray(source.output)) { + for (const item of source.output) { + if (!item || typeof item !== "object") { + continue; + } + const content = item.content; + if (!Array.isArray(content)) { + continue; + } + for (const block of content) { + if (!block || typeof block !== "object") { + continue; + } + if (typeof block.text === "string" && block.text.trim().length > 0) { + return block.text; + } + } + } + } + if (source.response && typeof source.response === "object") { + const nested = source.response; + if (typeof nested.output_text === "string" && nested.output_text.trim().length > 0) { + return nested.output_text; + } + } + if (Array.isArray(source.choices) && source.choices.length > 0) { + const first = source.choices[0]; + if (first && typeof first === "object" && first.message && typeof first.message === "object") { + const message = first.message; + if (typeof message.content === "string" && message.content.trim().length > 0) { + return message.content; + } + } + } + return null; +} +function extractAddressQuestionFromRawNormalizerOutput(rawModelOutput) { + const outputText = extractOutputTextFromRawNormalizerOutput(rawModelOutput); + if (!outputText) { + return null; + } + const parsed = safeParseLooseJson(outputText); + if (!parsed || typeof parsed !== "object") { + return null; + } + const source = parsed; + const fragments = Array.isArray(source.fragments) ? source.fragments : []; + for (const item of fragments) { + if (!item || typeof item !== "object") { + continue; + } + const fragment = item; + const domainRelevance = fragment.domain_relevance; + if (typeof domainRelevance === "string" && domainRelevance.trim().toLowerCase() === "out_of_scope") { + continue; + } + if (domainRelevance === false) { + continue; + } + const readiness = String(fragment.execution_readiness ?? "").trim().toLowerCase(); + if (readiness === "no_route") { + continue; + } + const normalizedText = toNonEmptyString(fragment.normalized_fragment_text); + const rawText = toNonEmptyString(fragment.raw_fragment_text); + const candidate = compactWhitespace(normalizedText ?? rawText ?? ""); + if (candidate.length >= 3 && candidate.length <= 500) { + return candidate; + } + } + return null; +} async function runAddressLlmPreDecompose(normalizerService, payload, userMessage) { const provider = payload?.llmProvider === "local" ? "local" : payload?.llmProvider === "openai" ? "openai" : null; const baseMeta = { @@ -1922,8 +2035,10 @@ async function runAddressLlmPreDecompose(normalizerService, payload, userMessage }; try { const normalized = await normalizerService.normalize(normalizePayload); - const candidate = extractAddressQuestionFromNormalized(normalized?.normalized); - if (!normalized?.ok || !candidate) { + const candidateFromNormalized = extractAddressQuestionFromNormalized(normalized?.normalized); + const candidateFromRaw = candidateFromNormalized ? null : extractAddressQuestionFromRawNormalizerOutput(normalized?.raw_model_output); + const candidate = candidateFromNormalized ?? candidateFromRaw; + if (!candidate) { return { ...baseMeta, attempted: true, @@ -1934,13 +2049,25 @@ async function runAddressLlmPreDecompose(normalizerService, payload, userMessage const sourceCompact = compactWhitespace(String(userMessage ?? "").toLowerCase()); const candidateCompact = compactWhitespace(candidate.toLowerCase()); const applied = sourceCompact !== candidateCompact; + const candidateSource = candidateFromNormalized ? "normalized" : "raw"; + const reason = candidateSource === "normalized" + ? applied + ? "normalized_fragment_applied" + : "normalized_fragment_same" + : normalized?.ok + ? applied + ? "raw_fragment_applied" + : "raw_fragment_same" + : applied + ? "raw_fragment_applied_after_normalize_failed" + : "raw_fragment_same_after_normalize_failed"; return { attempted: true, applied, provider, traceId: normalized?.trace_id ?? null, effectiveMessage: applied ? candidate : userMessage, - reason: applied ? "normalized_fragment_applied" : "normalized_fragment_same" + reason }; } catch (error) { diff --git a/llm_normalizer/backend/tests/addressQueryRuntimeM23.test.ts b/llm_normalizer/backend/tests/addressQueryRuntimeM23.test.ts index 4b07e39..6ce7821 100644 --- a/llm_normalizer/backend/tests/addressQueryRuntimeM23.test.ts +++ b/llm_normalizer/backend/tests/addressQueryRuntimeM23.test.ts @@ -50,6 +50,11 @@ describe("address intent resolver expansion (M2.3a)", () => { expect(result.intent).toBe("documents_forming_balance"); }); + it("resolves documents forming balance for russian participle phrasing", () => { + const result = resolveAddressIntent("Показать документы, формирующие остаток по счету 60.01 на дату 2020-07-31"); + expect(result.intent).toBe("documents_forming_balance"); + }); + it("resolves documents by company phrase as counterparty intent", () => { const result = resolveAddressIntent("Какие документы доступны по компании СВК за 2021 год?"); expect(result.intent).toBe("list_documents_by_counterparty"); @@ -60,6 +65,11 @@ describe("address intent resolver expansion (M2.3a)", () => { expect(result.intent).toBe("bank_operations_by_counterparty"); }); + it("keeps bank_operations_by_counterparty even when account hints are present", () => { + const result = resolveAddressIntent("Показать банковские операции (счета 51, 62) для контрагента СВК за 2020 год"); + expect(result.intent).toBe("bank_operations_by_counterparty"); + }); + it("resolves documents by client phrase", () => { const result = resolveAddressIntent("Выведи документы по клиенту Бета за 2020-07"); expect(result.intent).toBe("list_documents_by_counterparty"); @@ -137,6 +147,36 @@ describe("address filter extraction for balance drilldown", () => { expect(result.warnings).toContain("period_derived_from_month_phrase"); }); + it("derives month period for balance snapshot from 'на май 2020'", () => { + const result = extractAddressFilters("Какой остаток по счету 60 на май 2020", "account_balance_snapshot"); + expect(result.extracted_filters.account).toBe("60"); + expect(result.extracted_filters.period_from).toBe("2020-05-01"); + expect(result.extracted_filters.period_to).toBe("2020-05-31"); + expect(result.extracted_filters.as_of_date).toBe("2020-05-31"); + expect(result.warnings).toContain("period_derived_from_month_phrase"); + expect(result.warnings).toContain("as_of_date_derived_from_period_to"); + }); + + it("derives month period for balance snapshot from 'на 2020.05'", () => { + const result = extractAddressFilters("Какой остаток по счету 60 на 2020.05", "account_balance_snapshot"); + expect(result.extracted_filters.account).toBe("60"); + expect(result.extracted_filters.period_from).toBe("2020-05-01"); + expect(result.extracted_filters.period_to).toBe("2020-05-31"); + expect(result.extracted_filters.as_of_date).toBe("2020-05-31"); + expect(result.warnings).toContain("period_derived_from_month_phrase"); + expect(result.warnings).toContain("as_of_date_derived_from_period_to"); + }); + + it("derives month period for balance snapshot from 'на 2020 май'", () => { + const result = extractAddressFilters("Какой остаток по счету 60 на 2020 май", "account_balance_snapshot"); + expect(result.extracted_filters.account).toBe("60"); + expect(result.extracted_filters.period_from).toBe("2020-05-01"); + expect(result.extracted_filters.period_to).toBe("2020-05-31"); + expect(result.extracted_filters.as_of_date).toBe("2020-05-31"); + expect(result.warnings).toContain("period_derived_from_month_phrase"); + expect(result.warnings).toContain("as_of_date_derived_from_period_to"); + }); + it("treats 'за весь период' as all-time hint and does not force 90-day default", () => { const result = extractAddressFilters( "Покажи банковские операции по клиенту Бета за весь период", @@ -379,4 +419,30 @@ describe("address recipe catalog counterparty filtering", () => { expect(plan.limit).toBe(200); }); + + it("injects account condition into movements query for account snapshot", () => { + const filters = extractAddressFilters( + "Какой остаток по счету 60 на дату 2020-07-31", + "account_balance_snapshot" + ).extracted_filters; + const selected = selectAddressRecipe("account_balance_snapshot", filters); + expect(selected.selected_recipe).toBeTruthy(); + const plan = buildAddressRecipePlan(selected.selected_recipe!, filters); + + expect(plan.query).toContain("Движения.СчетДт.Код"); + expect(plan.query).toContain("ПОДОБНО \"60%\""); + }); + + it("injects subaccount condition variants into movements query for documents_forming_balance", () => { + const filters = extractAddressFilters( + "Какие документы формируют остаток по счету 60.01 на дату 2020-07-31", + "documents_forming_balance" + ).extracted_filters; + const selected = selectAddressRecipe("documents_forming_balance", filters); + expect(selected.selected_recipe).toBeTruthy(); + const plan = buildAddressRecipePlan(selected.selected_recipe!, filters); + + expect(plan.query).toContain("ПОДОБНО \"60.01%\""); + expect(plan.query).toContain("ПОДОБНО \"60.1%\""); + }); });