diff --git a/docs/ARCH/11 - architecture_turnaround/14 - semantic_dialog_authority_recovery_plan_2026-04-19.md b/docs/ARCH/11 - architecture_turnaround/14 - semantic_dialog_authority_recovery_plan_2026-04-19.md index 6edfec6..e47d136 100644 --- a/docs/ARCH/11 - architecture_turnaround/14 - semantic_dialog_authority_recovery_plan_2026-04-19.md +++ b/docs/ARCH/11 - architecture_turnaround/14 - semantic_dialog_authority_recovery_plan_2026-04-19.md @@ -454,6 +454,26 @@ The architecture should not be considered corrected until the following invarian 7. `answer_top_block_matches_current_user_intent` 8. `meta_interrupt_does_not_corrupt_business_thread` +## Progress Update - 2026-04-20 + +Phase 18 added a live AGENT replay for the answer/acceptance layer: + +- [address_truth_harness_phase18_semantic_dialog_authority.json](/x:/1C/NDC_1C/docs/orchestration/address_truth_harness_phase18_semantic_dialog_authority.json:1) +- live replay artifact: `artifacts/domain_runs/address_truth_harness_phase18_semantic_dialog_authority_live_rerun` +- saved autorun: `AGENT | Phase 18 semantic dialog authority replay` + +Accepted live result: + +- `steps_passed`: 7/7 +- `final_status`: `accepted` +- `critical_path_green`: `true` +- green invariants include direct-answer quality, truth gate, human-answer quality, and meta-context integrity. + +Important semantic conclusion: + +- the turn `какой оборот был свк` now routes to `customer_revenue_and_payments` and opens with direct turnover for `Группа СВК`, not with stale `Чепурнов` documents or a generic top-client ranking; +- off-domain living-chat turns after a business answer are accepted when they stay live and do not replay stale business context. + ## Execution Rule Do not implement this plan as: diff --git a/docs/orchestration/address_truth_harness_phase18_semantic_dialog_authority.json b/docs/orchestration/address_truth_harness_phase18_semantic_dialog_authority.json new file mode 100644 index 0000000..a329012 --- /dev/null +++ b/docs/orchestration/address_truth_harness_phase18_semantic_dialog_authority.json @@ -0,0 +1,161 @@ +{ + "schema_version": "domain_truth_harness_spec_v1", + "scenario_id": "address_truth_harness_phase18_semantic_dialog_authority", + "domain": "address_phase18_semantic_dialog_authority", + "title": "Phase 18 semantic dialog authority replay", + "description": "Targeted AGENT replay for the semantic-dialog-authority recovery plan. The scenario validates that human/meta turns do not corrupt the business thread, explicit current-turn object/action authority overrides stale counterparty carryover, unsupported current meaning does not replay old exact answers, and the top block of the answer matches the user's current question.", + "bindings": {}, + "steps": [ + { + "step_id": "step_01_human_smalltalk_entry", + "title": "Smalltalk stays human before business routing starts", + "question": "приветик - че как там дела", + "required_answer_patterns_any": [ + "(?i)привет|дела|помоч|готов" + ], + "forbidden_answer_patterns": [ + "(?i)mcp", + "(?i)read_only", + "(?i)tool_gate_reason", + "(?i)snapshot_items" + ], + "criticality": "important", + "semantic_tags": [ + "smalltalk_entry", + "human_answer" + ] + }, + { + "step_id": "step_02_capability_meta", + "title": "Capability meta answer stays useful and does not expose service internals", + "question": "расскажи что можешь интересного", + "required_answer_patterns_any": [ + "(?i)могу|умею|помогу", + "(?i)ндс|документ|контрагент|долг|склад|остат" + ], + "forbidden_answer_patterns": [ + "(?i)mcp", + "(?i)read_only", + "(?i)tool_gate_reason", + "(?i)snapshot_items" + ], + "criticality": "important", + "semantic_tags": [ + "capability_meta", + "human_answer" + ] + }, + { + "step_id": "step_03_data_scope_meta", + "title": "Data-scope meta turn exposes organization choice without technical garbage", + "question": "по какой компании мы сейчас работаем?", + "required_answer_patterns_all": [ + "(?i)компан|организац|контур", + "(?i)альтернатива плюс|лайсвуд|райм" + ], + "forbidden_answer_patterns": [ + "(?i)mcp", + "(?i)read_only", + "(?i)tool_gate_reason", + "(?i)snapshot_items" + ], + "criticality": "critical", + "semantic_tags": [ + "data_scope_meta", + "multi_company_entry" + ] + }, + { + "step_id": "step_04_choose_organization", + "title": "Explicit organization selection fixes the session scope", + "question": "Альтернатива Плюс", + "required_answer_patterns_all": [ + "(?i)альтернатива плюс|зафиксир|работаем по|рабочую организац" + ], + "forbidden_answer_patterns": [ + "(?i)mcp", + "(?i)read_only", + "(?i)не могу определить" + ], + "criticality": "critical", + "semantic_tags": [ + "organization_authority", + "company_selected" + ] + }, + { + "step_id": "step_05_counterparty_documents_root", + "title": "Counterparty document root establishes a stale entity that must not dominate later turns", + "question": "по чепурнову покажи все доки", + "allowed_reply_types": [ + "factual", + "factual_with_explanation" + ], + "expected_intents": [ + "list_documents_by_counterparty" + ], + "required_direct_answer_patterns_any": [ + "(?i)чепурнов", + "(?i)документ|поступление|счет" + ], + "forbidden_direct_answer_patterns": [ + "(?i)mcp", + "(?i)tool_gate_reason", + "(?i)уточните организац" + ], + "criticality": "critical", + "semantic_tags": [ + "counterparty_documents", + "stale_entity_seed" + ] + }, + { + "step_id": "step_06_svk_turnover_overrides_stale_documents", + "title": "Explicit turnover question for SVK overrides stale Chepurnov documents", + "question": "какой оборот был свк", + "allowed_reply_types": [ + "factual", + "factual_with_explanation" + ], + "expected_intents": [ + "customer_revenue_and_payments" + ], + "required_direct_answer_patterns_all": [ + "(?i)оборот.*свк|свк.*оборот", + "(?i)денежн.*поток|входящ" + ], + "forbidden_direct_answer_patterns": [ + "(?i)самый доходный клиент", + "(?i)топ-", + "(?i)чепурнов", + "(?i)^контрагент:.*документ" + ], + "criticality": "critical", + "semantic_tags": [ + "current_turn_entity_authority", + "current_turn_action_authority", + "answer_top_block_matches_current_user_intent" + ] + }, + { + "step_id": "step_07_unsupported_current_meaning_boundary", + "title": "Off-domain current meaning stays live and does not replay the previous exact business answer", + "question": "а чем капибара отличается от утки?", + "required_answer_patterns_any": [ + "(?i)капибара.*утк|утк.*капибар", + "(?i)млекопита|птиц|грызун" + ], + "forbidden_answer_patterns": [ + "(?i)оборот.*свк", + "(?i)чепурнов", + "(?i)документ.*контрагент", + "(?i)самый доходный клиент" + ], + "criticality": "critical", + "semantic_tags": [ + "off_domain_living_chat", + "stale_replay_forbidden" + ] + } + ] +} diff --git a/llm_normalizer/data/autorun_generators/history.json b/llm_normalizer/data/autorun_generators/history.json index debb6c7..08742ae 100644 --- a/llm_normalizer/data/autorun_generators/history.json +++ b/llm_normalizer/data/autorun_generators/history.json @@ -1,4 +1,56 @@ [ + { + "generation_id": "gen-ag04200344-0dc69f", + "created_at": "2026-04-20T03:44:04+00:00", + "mode": "saved_user_sessions", + "title": "AGENT | Phase 18 semantic dialog authority replay", + "count": 7, + "domain": "address_phase18_semantic_dialog_authority", + "questions": [ + "приветик - че как там дела", + "расскажи что можешь интересного", + "по какой компании мы сейчас работаем?", + "Альтернатива Плюс", + "по чепурнову покажи все доки", + "какой оборот был свк", + "а чем капибара отличается от утки?" + ], + "generated_by": "codex_agent", + "saved_case_set_file": "assistant_autogen_saved_user_sessions_20260420034404_gen-ag04200344-0dc69f.json", + "context": { + "llm_provider": null, + "model": null, + "assistant_prompt_version": null, + "decomposition_prompt_version": null, + "prompt_fingerprint": null, + "autogen_personality_id": null, + "autogen_personality_prompt": null, + "source_session_id": null, + "saved_session_file": "assistant_saved_session_20260420034404_gen-ag04200344-0dc69f.json", + "saved_case_set_kind": "agent_semantic_scenario", + "agent_run": true, + "agent_focus": "semantic_dialog_authority_answer_acceptance", + "architecture_phase": "turnaround_11_phase18", + "source_spec_file": "X:\\1C\\NDC_1C\\docs\\orchestration\\address_truth_harness_phase18_semantic_dialog_authority.json", + "scenario_id": "address_truth_harness_phase18_semantic_dialog_authority", + "semantic_tags": [ + "answer_top_block_matches_current_user_intent", + "capability_meta", + "company_selected", + "counterparty_documents", + "current_turn_action_authority", + "current_turn_entity_authority", + "data_scope_meta", + "human_answer", + "multi_company_entry", + "off_domain_living_chat", + "organization_authority", + "smalltalk_entry", + "stale_entity_seed", + "stale_replay_forbidden" + ] + } + }, { "generation_id": "gen-ag04171508-760111", "created_at": "2026-04-17T15:08:06+00:00", @@ -962,4 +1014,4 @@ "latest_acceptance": null } } -] \ No newline at end of file +] diff --git a/llm_normalizer/data/autorun_generators/saved_sessions/assistant_saved_session_20260420034404_gen-ag04200344-0dc69f.json b/llm_normalizer/data/autorun_generators/saved_sessions/assistant_saved_session_20260420034404_gen-ag04200344-0dc69f.json new file mode 100644 index 0000000..28bd880 --- /dev/null +++ b/llm_normalizer/data/autorun_generators/saved_sessions/assistant_saved_session_20260420034404_gen-ag04200344-0dc69f.json @@ -0,0 +1,137 @@ +{ + "saved_at": "2026-04-20T03:44:04+00:00", + "generation_id": "gen-ag04200344-0dc69f", + "mode": "saved_user_sessions", + "title": "AGENT | Phase 18 semantic dialog authority replay", + "agent_run": true, + "questions": [ + "приветик - че как там дела", + "расскажи что можешь интересного", + "по какой компании мы сейчас работаем?", + "Альтернатива Плюс", + "по чепурнову покажи все доки", + "какой оборот был свк", + "а чем капибара отличается от утки?" + ], + "metadata": { + "assistant_prompt_version": null, + "decomposition_prompt_version": null, + "prompt_fingerprint": null, + "agent_focus": "semantic_dialog_authority_answer_acceptance", + "architecture_phase": "turnaround_11_phase18", + "source_spec_file": "X:\\1C\\NDC_1C\\docs\\orchestration\\address_truth_harness_phase18_semantic_dialog_authority.json", + "scenario_id": "address_truth_harness_phase18_semantic_dialog_authority", + "semantic_tags": [ + "answer_top_block_matches_current_user_intent", + "capability_meta", + "company_selected", + "counterparty_documents", + "current_turn_action_authority", + "current_turn_entity_authority", + "data_scope_meta", + "human_answer", + "multi_company_entry", + "off_domain_living_chat", + "organization_authority", + "smalltalk_entry", + "stale_entity_seed", + "stale_replay_forbidden" + ] + }, + "source_session_id": null, + "session": { + "session_id": null, + "mode": "agent_semantic_run", + "items": [ + { + "message_id": "agent-user-001", + "role": "user", + "text": "приветик - че как там дела", + "created_at": "2026-04-20T03:44:04+00:00", + "reply_type": null, + "trace_id": null, + "debug": null + }, + { + "message_id": "agent-user-002", + "role": "user", + "text": "расскажи что можешь интересного", + "created_at": "2026-04-20T03:44:04+00:00", + "reply_type": null, + "trace_id": null, + "debug": null + }, + { + "message_id": "agent-user-003", + "role": "user", + "text": "по какой компании мы сейчас работаем?", + "created_at": "2026-04-20T03:44:04+00:00", + "reply_type": null, + "trace_id": null, + "debug": null + }, + { + "message_id": "agent-user-004", + "role": "user", + "text": "Альтернатива Плюс", + "created_at": "2026-04-20T03:44:04+00:00", + "reply_type": null, + "trace_id": null, + "debug": null + }, + { + "message_id": "agent-user-005", + "role": "user", + "text": "по чепурнову покажи все доки", + "created_at": "2026-04-20T03:44:04+00:00", + "reply_type": null, + "trace_id": null, + "debug": null + }, + { + "message_id": "agent-user-006", + "role": "user", + "text": "какой оборот был свк", + "created_at": "2026-04-20T03:44:04+00:00", + "reply_type": null, + "trace_id": null, + "debug": null + }, + { + "message_id": "agent-user-007", + "role": "user", + "text": "а чем капибара отличается от утки?", + "created_at": "2026-04-20T03:44:04+00:00", + "reply_type": null, + "trace_id": null, + "debug": null + } + ], + "agent_run": true, + "metadata": { + "assistant_prompt_version": null, + "decomposition_prompt_version": null, + "prompt_fingerprint": null, + "agent_focus": "semantic_dialog_authority_answer_acceptance", + "architecture_phase": "turnaround_11_phase18", + "source_spec_file": "X:\\1C\\NDC_1C\\docs\\orchestration\\address_truth_harness_phase18_semantic_dialog_authority.json", + "scenario_id": "address_truth_harness_phase18_semantic_dialog_authority", + "semantic_tags": [ + "answer_top_block_matches_current_user_intent", + "capability_meta", + "company_selected", + "counterparty_documents", + "current_turn_action_authority", + "current_turn_entity_authority", + "data_scope_meta", + "human_answer", + "multi_company_entry", + "off_domain_living_chat", + "organization_authority", + "smalltalk_entry", + "stale_entity_seed", + "stale_replay_forbidden" + ] + } + } +} diff --git a/llm_normalizer/data/eval_cases/assistant_autogen_saved_user_sessions_20260420034404_gen-ag04200344-0dc69f.json b/llm_normalizer/data/eval_cases/assistant_autogen_saved_user_sessions_20260420034404_gen-ag04200344-0dc69f.json new file mode 100644 index 0000000..e704724 --- /dev/null +++ b/llm_normalizer/data/eval_cases/assistant_autogen_saved_user_sessions_20260420034404_gen-ag04200344-0dc69f.json @@ -0,0 +1,46 @@ +{ + "suite_id": "assistant_saved_session_gen-ag04200344-0dc69f", + "suite_version": "0.1.0", + "schema_version": "assistant_saved_session_suite_v0_1", + "generated_at": "2026-04-20T03:44:04+00:00", + "generation_id": "gen-ag04200344-0dc69f", + "mode": "saved_user_sessions", + "title": "AGENT | Phase 18 semantic dialog authority replay", + "domain": "address_phase18_semantic_dialog_authority", + "scenario_count": 1, + "case_ids": [ + "SAVED-001" + ], + "cases": [ + { + "case_id": "SAVED-001", + "scenario_tag": "agent_saved_user_sessions", + "title": "AGENT | Phase 18 semantic dialog authority replay", + "question_type": "followup", + "broadness_level": "medium", + "turns": [ + { + "user_message": "приветик - че как там дела" + }, + { + "user_message": "расскажи что можешь интересного" + }, + { + "user_message": "по какой компании мы сейчас работаем?" + }, + { + "user_message": "Альтернатива Плюс" + }, + { + "user_message": "по чепурнову покажи все доки" + }, + { + "user_message": "какой оборот был свк" + }, + { + "user_message": "а чем капибара отличается от утки?" + } + ] + } + ] +}