63 lines
36 KiB
Plaintext
63 lines
36 KiB
Plaintext
|
||
> llm-normalizer-backend@0.1.0 test
|
||
> vitest run tests/assistantAddressLlmPredecompose.test.ts -t uses llm canonical candidate as gate signal --testTimeout=30000
|
||
|
||
|
||
[1m[46m RUN [49m[22m [36mv3.2.4 [39m[90mX:/1C/NDC_1C/llm_normalizer/backend[39m
|
||
|
||
{"timestamp":"2026-04-11T10:47:29.831Z","level":"info","service":"assistant_loop","message":"assistant_address_tool_gate_skip","sessionId":"asst-predecompose-llm-gate-signal-1775904449796","details":{"session_id":"asst-predecompose-llm-gate-signal-1775904449796","user_message":"svk gib list","effective_address_user_message":"заказчики компании svk","address_llm_predecompose_attempted":true,"address_llm_predecompose_applied":true,"address_llm_predecompose_reason":"normalized_fragment_applied","address_fallback_rule_hit":null,"address_sanitized_user_message":"svk gib list","assistant_orchestration_contract_v1":{"schema_version":"assistant_orchestration_contract_v1","hard_meta_mode":null,"address_mode":"unsupported","address_mode_confidence":"low","address_intent":"unknown","address_intent_confidence":"low","strong_data_signal_detected":true,"data_retrieval_signal_detected":false,"followup_context_detected":false,"unsupported_address_intent_fallback_to_deep":false,"deep_analysis_signal_fallback_to_deep":false,"final_decision":{"run_address_lane":false,"tool_gate_decision":"skip_address_lane","tool_gate_reason":"no_address_signal_after_l0","living_mode":"chat","living_reason":"predecompose_unsupported_mode"}},"address_tool_gate_decision":"skip_address_lane","address_tool_gate_reason":"no_address_signal_after_l0","address_llm_predecompose_contract_intent":"unknown","address_llm_predecompose_contract_aggregation_profile":"unknown","address_llm_predecompose_contract_period_scope":"unspecified"}}
|
||
{"timestamp":"2026-04-11T10:47:30.356Z","level":"warn","service":"assistant_loop","message":"assistant_living_chat_failed_fallback_to_deep","sessionId":"asst-predecompose-llm-gate-signal-1775904449796","details":{"session_id":"asst-predecompose-llm-gate-signal-1775904449796","user_message":"svk gib list","reason":"Incorrect API key provided: local-de***oken. You can find your API key at https://platform.openai.com/account/api-keys."}}
|
||
{"timestamp":"2026-04-11T10:47:30.382Z","level":"info","service":"assistant_loop","message":"assistant_message_processed","sessionId":"asst-predecompose-llm-gate-signal-1775904449796","eventType":"assistant_message","details":{"session_id":"asst-predecompose-llm-gate-signal-1775904449796","message_id":"msg-m2MKDpDJ2I","user_message":"svk gib list","normalizer_output":{"schema_version":"normalized_query_v2_0_2","user_message_raw":"svk gib list","message_in_scope":true,"scope_confidence":"medium","contains_multiple_tasks":false,"fragments":[{"fragment_id":"F1","raw_fragment_text":"svk gib list","normalized_fragment_text":"заказчики компании svk","domain_relevance":"in_scope","confidence":"medium","execution_readiness":"no_route","route_status":"no_route"}]},"execution_plan":[],"resolved_execution_state":[{"fragment_id":"F1","execution_readiness":"no_route","route_status":"no_route","no_route_reason":null}],"routes":[],"retrieval_calls":[],"retrieval_results_raw":[],"retrieval_results_normalized":[],"requirements_extracted":[{"requirement_id":"R1","source_fragment_id":null,"requirement_text":"svk gib list","subject_tokens":[],"status":"clarification_needed","route":null}],"requirements_total":1,"requirements_covered":0,"requirements_uncovered":[],"coverage_status":"partial_or_limited","answer_grounding_status":"no_grounded_answer","reply_semantic_type":"clarification_required","why_included_summary":[],"selection_reason_summary":[],"route_subject_match":true,"clarification_target":["R1"],"dropped_intent_segments":[],"question_type_class":"unknown","company_anchors":{"contract_numbers":[],"document_numbers":[],"dates":[],"amounts":[],"accounts":[],"periods":[],"document_types":[],"all":[]},"analysis_context_applied":false,"analysis_context":null,"business_scope_raw":[],"business_scope_resolved":["company_specific_accounting"],"company_grounding_applied":true,"scope_resolution_reason":["settlement_claim_company_scope_recovery"],"company_scope_resolution_reason":["settlement_claim_company_scope_recovery"],"raw_time_anchor":null,"raw_time_scope":null,"resolved_time_anchor":null,"resolved_primary_period":null,"effective_primary_period":null,"temporal_guard_input":null,"temporal_alignment_status":"conflicting","temporal_resolution_source":"normalized_time_scope:missing","temporal_guard_basis":"none","temporal_guard_applied":false,"temporal_guard_outcome":"passed","temporal_guard":{"raw_time_anchor":null,"raw_time_scope":null,"resolved_time_anchor":null,"resolved_primary_period":null,"effective_primary_period":null,"temporal_guard_input":null,"temporal_alignment_status":"conflicting","temporal_resolution_source":"normalized_time_scope:missing","temporal_guard_basis":"none","temporal_guard_applied":false,"temporal_guard_outcome":"passed","primary_period_window":null,"allowed_context_window":null,"controlled_temporal_expansion_enabled":false,"context_expansion_reasons_allowed":["prehistory","carryover","post_period_closure","long_running_contract_context"],"normalized_anchor_drift_detected":false,"reason_codes":["missing_resolved_primary_period"]},"raw_numeric_tokens":[],"classified_numeric_tokens":[],"rejected_as_non_accounts":[],"resolved_account_anchors":[],"domain_polarity_guard":{"applied":false,"polarity":"not_applicable","outcome":"not_applicable","supplier_score":0,"customer_score":0,"account_scope":[],"raw_numeric_tokens":[],"classified_numeric_tokens":[],"rejected_as_non_accounts":[],"resolved_account_anchors":[],"rejected_problem_units":0,"rejected_evidence":0,"critical_contradiction":false,"reason_codes":[]},"claim_anchor_audit":{"claim_type":"prove_settlement_closure_state","settlement_role":"unknown","settlement_role_resolution_reason":["settlement_role_resolved_unknown"],"polarity_resolution_status":"unknown","required_anchors":["period","account_scope","counterparty_scope_or_contract","closure_signal"],"resolved_anchors":{"period":[],"account_scope":[],"amounts":[],"contract":[],"document_numbers":[],"document_types":[],"counterparty_scope":[],"advance_signal":[],"closure_signal":[],"vat_signal":[],"chain_signal":[],"close_signal":[],"cost_scope":[],"rbp_signal":[],"writeoff_signal":[],"fixed_asset_signal":[],"amortization_signal":[],"expected_fa_set":[],"actual_fa_set":[]},"missing_anchors":["period","account_scope","counterparty_scope_or_contract","closure_signal"],"claim_anchor_resolution_rate":0,"primary_period":null,"allowed_context_window":null,"context_expansion_reasons_allowed":["prehistory","carryover","post_period_closure","long_running_contract_context"],"reason_codes":["claim_missing_required_anchors","claim_anchor_resolution_low","unresolved_supplier_customer_polarity"]},"settlement_role":"unknown","settlement_role_resolution_reason":["settlement_role_resolved_unknown"],"polarity_resolution_status":"unknown","targeted_evidence_acquisition":{"claim_type":"prove_settlement_closure_state","required_checks":["payment_document_found","contract_matched","settlement_object_matched","closing_document_found","register_closure_entry_found","posting_link_found"],"check_status":{"payment_document_found":"not_found","contract_matched":"not_found","settlement_object_matched":"not_found","closing_document_found":"not_found","register_closure_entry_found":"not_found","posting_link_found":"not_found"},"targeted_item_hits":0,"targeted_evidence_hits":0,"targeted_evidence_hit_rate":0,"targeted_evidence_source_refs":[],"reason_codes":["targeted_evidence_not_found","targeted_evidence_hit_rate_low"]},"evidence_admissibility_gate":{"candidate_evidence_total":0,"admissible_evidence_count":0,"rejected_evidence_count":0,"rejected_item_count":0,"reject_breakdown":{"wrong_period":0,"wrong_domain":0,"wrong_account_scope":0,"weak_source_mapping":0,"zero_live_match":0,"future_dated_or_out_of_window":0},"category_breakdown":{"hard_evidence":0,"supporting_signal":0,"inadmissible_noise":0},"reason_codes":["no_admissible_evidence_for_grounded_answer"]},"eligibility_time_basis":"none","grounded_answer_eligibility_guard":{"eligible":false,"temporal_passed":true,"eligibility_time_basis":"none","business_scope_passed":true,"polarity_passed":true,"claim_anchors_passed":false,"claim_anchor_resolution_rate":0,"missing_required_anchors":4,"admissible_evidence_count":0,"critical_contradiction":false,"outcome":"limited_or_insufficient_evidence","grounding_mode":"limited_or_insufficient_evidence","reason_codes":["claim_anchor_coverage_insufficient","admissible_evidence_count_zero","targeted_evidence_hit_rate_zero"]},"problem_centric_answer_applied":false,"problem_units_used_count":0,"problem_answer_mode":"stage1_policy_v11","assistant_outcome_class_v1":"BLOCKED_BY_AMBIGUITY","assistant_orchestration_contracts_v1":{"query_frame":{"schema_version":"assistant_query_frame_v1","original_user_question":"svk gib list","normalized_question":"svk gib list","route_summary_mode":"none","fragments_total":1,"dropped_intent_segments":[],"analysis_context":null},"execution_plan":{"schema_version":"assistant_execution_plan_v1","steps":[],"requirements_total":1},"evidence_bundle":{"schema_version":"assistant_evidence_bundle_v1","retrieval_calls_total":0,"retrieval_results_total":0,"retrieval_status_breakdown":{"ok":0,"partial":0,"empty":0,"error":0},"evidence_total":0,"source_refs_total":0,"limitation_total":0,"error_total":0},"coverage":{"schema_version":"assistant_coverage_contract_v1","coverage_report":{"requirements_total":1,"requirements_covered":0,"requirements_uncovered":[],"requirements_partially_covered":[],"clarification_needed_for":["R1"],"out_of_scope_requirements":[]},"grounding":{"status":"no_grounded_answer","route_subject_match":true,"missing_requirements":["R1"],"reasons":["Ни одно требование не получило подтвержденного покрытия.","В текущей выборке не хватает явных подтверждений, почему записи попали в ответ.","Не хватает ключевых ориентиров в вопросе (период, объект или контрагент).","Недостаточно подтвержденных данных для уверенного ответа.","Не хватило целевых подтверждений по выбранному сценарию."],"why_included_summary":[],"selection_reason_summary":[]},"outcome_class":"BLOCKED_BY_AMBIGUITY"}},"answer_structure_v11":{"schema_version":"answer_structure_v1_1","answer_summary":"Нужны уточнения: без сужения фокуса надежный вывод невозможен.","direct_answer":"Есть признаки проблемы, но без уточнений по периоду и объекту вывод ненадежен.","mechanism_block":{"status":"unresolved","mechanism_notes":[],"limitation_reason_codes":[]},"evidence_block":{"evidence_ids":[],"source_refs":[],"mechanism_notes":[],"coverage_note":"coverage_partial_or_limited"},"uncertainty_block":{"open_uncertainties":["R1","missing_anchor:period","missing_anchor:account","missing_anchor:document_or_object","missing_anchor:counterparty"],"limitations":["Ни одно требование не получило подтвержденного покрытия.","В текущей выборке не хватает явных подтверждений, почему записи попали в ответ.","Не хватает ключевых ориентиров в вопросе (период, объект или контрагент).","Недостаточно подтвержденных данных для уверенного ответа.","Не хватило целевых подтверждений по выбранному сценарию."]},"next_step_block":{"recommended_actions":["Дайте недостающие якоря (период/счет/объект), иначе сильный factual вывод невозможен."],"clarification_questions":["Уточните период проверки (например, июль 2020).","Уточните счет или группу счетов (например, 19, 60, 62).","Укажите документ/GUID/конкретный объект для трассировки.","Укажите контрагента или группу контрагентов.","Закройте уточнения для требований: R1."]}},"investigation_state_snapshot":{"schema_version":"investigation_state_v1","session_id":"asst-predecompose-llm-gate-signal-1775904449796","status":"active","turn_index":1,"updated_at":"2026-04-11T10:47:30.380Z","question_id":"msg-W7haLVq5Wq","question_scope_id":"s:svk gib list","scope_origin":"underspecified","focus":{"domain":null,"period":null,"primary_accounts":[],"active_query_subject":"svk gib list"},"narrowing_status":"unknown","evidence_refs":[],"open_uncertainties":["clarify:R1"],"last_answer_mode":"clarification_required","followup_context":{"previous_question_id":null,"last_user_message":"svk gib list","referenced_requirement_ids":["R1"],"active_domain":null,"active_requirement_ids":["R1"],"uncovered_requirement_ids":["R1"],"last_problem_unit_id":null,"settlement_next_actions":[],"evidence_summary":[],"question_scope_id":"s:svk gib list","scope_origin":"underspecified"},"query_mode_hint":"investigation_candidate"},"fallback_type":"clarification","assistant_reply":"Коротко: Проблема подтверждается частично; для уверенного вывода нужны уточнения. Что сломано:\n- Есть признаки проблемы, но без уточнений по периоду и объекту вывод ненадежен. Почему это похоже на проблему:\n- Сигнал проблемы есть, но механизм подтвержден не полностью. На чем это основано:\n- Опора частичная: часть требований покрыта не полностью.\n- Отдельно не подтверждено или покрыто частично: R1. Что проверить первым:\n- Уточните период проверки, чтобы подтвердить проблему без лишнего шума.\n- Проверьте связку документов и проводок по проблемному участку в указанном периоде.\n- Уточните период проверки (например, июль 2020).\n- Уточните счет или группу счетов (например, 19, 60, 62). Ограничения:\n- Ни одно требование не получило подтвержденного покрытия.\n- В текущей выборке не хватает явных подтверждений, почему записи попали в ответ.\n- Не хватает ключевых ориентиров в вопросе (период, объект или контрагент).\n- Недостаточно подтвержденных данных для уверенного ответа.\n- Не хватило целевых подтверждений по выбранному сценарию.","reply_type":"clarification_required","trace_id":"norm-predecompose-llm-gate-signal"}}
|
||
[31m❯[39m tests/assistantAddressLlmPredecompose.test.ts [2m([22m[2m16 tests[22m[2m | [22m[31m1 failed[39m[2m | [22m[33m15 skipped[39m[2m)[22m[33m 593[2mms[22m[39m
|
||
[2m[90m↓[39m[22m assistant address llm pre-decompose candidate preference[2m > [22mprefers raw fragment when normalized fragment loses counterparty anchor
|
||
[2m[90m↓[39m[22m assistant address llm pre-decompose candidate preference[2m > [22mkeeps counterparty anchor for docy typo when llm fragment drops anchor
|
||
[2m[90m↓[39m[22m assistant address llm pre-decompose candidate preference[2m > [22mdoes not treat service verb as counterparty anchor when llm rewrites noisy bank phrase
|
||
[2m[90m↓[39m[22m assistant address llm pre-decompose candidate preference[2m > [22mrejects llm fragment when counterparty anchor is substituted by unrelated noun
|
||
[2m[90m↓[39m[22m assistant address llm pre-decompose candidate preference[2m > [22mrejects follow-up intent injection when llm adds documents to same-date account prompt
|
||
[2m[90m↓[39m[22m assistant address llm pre-decompose candidate preference[2m > [22mapplies deterministic fallback rule when llm fragment is unusable
|
||
[2m[90m↓[39m[22m assistant address llm pre-decompose candidate preference[2m > [22mkeeps contract anchor in deterministic fallback when llm output is unusable
|
||
[2m[90m↓[39m[22m assistant address llm pre-decompose candidate preference[2m > [22mkeeps bank-by-contract intent in deterministic fallback when llm output is unusable
|
||
[2m[90m↓[39m[22m assistant address llm pre-decompose candidate preference[2m > [22mkeeps month scope for balance fallback in 'year month' phrasing
|
||
[2m[90m↓[39m[22m assistant address llm pre-decompose candidate preference[2m > [22mdoes not pick service words as counterparty anchor in noisy docs query
|
||
[2m[90m↓[39m[22m assistant address llm pre-decompose candidate preference[2m > [22mrewrites payment-style counterparty phrasing to bank operations
|
||
[2m[90m↓[39m[22m assistant address llm pre-decompose candidate preference[2m > [22mrewrites shorthand bank/contract slang phrase to bank operations by contract
|
||
[2m[90m↓[39m[22m assistant address llm pre-decompose candidate preference[2m > [22mkeeps loose all-time colloquial lookup in address lane without forcing rewrite
|
||
[31m [31m×[31m assistant address llm pre-decompose candidate preference[2m > [22muses llm canonical candidate as gate signal when regex path has no address markers[39m[33m 592[2mms[22m[39m
|
||
[31m → expected 'clarification_required' to be 'factual' // Object.is equality[39m
|
||
[2m[90m↓[39m[22m assistant address llm pre-decompose candidate preference[2m > [22mnormalizes short ordinal year like '20й' in noisy docs phrasing
|
||
[2m[90m↓[39m[22m assistant address llm pre-decompose candidate preference[2m > [22mdoes not treat date fragments as account in balance fallback
|
||
npm.cmd :
|
||
At line:2 char:45
|
||
+ ... _TESTS='1'; npm.cmd test -- tests/assistantAddressLlmPredecompose.tes ...
|
||
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||
+ CategoryInfo : NotSpecified: (:String) [], RemoteException
|
||
+ FullyQualifiedErrorId : NativeCommandError
|
||
|
||
[31m⎯⎯⎯⎯⎯⎯⎯[39m[1m[41m Failed Tests 1 [49m[22m[31m⎯⎯⎯⎯⎯⎯⎯[39m
|
||
|
||
[41m[1m FAIL [22m[49m tests/assistantAddressLlmPredecompose.test.ts[2m > [22massistant address llm pre-decompose
|
||
candidate preference[2m > [22muses llm canonical candidate as gate signal when regex path has no address markers
|
||
[31m[1mAssertionError[22m: expected 'clarification_required' to be 'factual' // Object.is equality[39m
|
||
|
||
Expected: [32m"factual"[39m
|
||
Received: [31m"clarification_required"[39m
|
||
|
||
[36m [2m❯[22m tests/assistantAddressLlmPredecompose.test.ts:[2m1050:33[22m[39m
|
||
[90m1048| [39m
|
||
[90m1049| [39m [34mexpect[39m(response[33m.[39mok)[33m.[39m[34mtoBe[39m([35mtrue[39m)[33m;[39m
|
||
[90m1050| [39m [34mexpect[39m(response[33m.[39mreply_type)[33m.[39m[34mtoBe[39m([32m"factual"[39m)[
|
||
33m;[39m
|
||
[90m | [39m [31m^[39m
|
||
[90m1051| [39m [34mexpect[39m(calls)[33m.[39m[34mtoHaveLength[39m([34m1[39m)[33m;[39m
|
||
[90m1052| [39m [34mexpect[39m(calls[[34m0[39m][33m.[39mmessage)[33m.[39m[34mtoBe[39m([32m"заказчики
|
||
компании svk"[39m)[33m;[39m
|
||
|
||
[31m[2m⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯[1/1]⎯[22m[39m
|
||
|
||
|
||
[2m Test Files [22m [1m[31m1 failed[39m[22m[90m (1)[39m
|
||
[2m Tests [22m [1m[31m1 failed[39m[22m[2m | [22m[33m15 skipped[39m[90m (16)[39m
|
||
[2m Start at [22m 13:47:28
|
||
[2m Duration [22m 2.09s[2m (transform 859ms, setup 0ms, collect 1.27s, tests 593ms, environment 0ms, prepare 56ms)[22m
|
||
|