From 5f7e2011909db6b7d654249912c8c230947f37f2 Mon Sep 17 00:00:00 2001 From: dctouch Date: Sat, 23 May 2026 15:34:11 +0300 Subject: [PATCH] =?UTF-8?q?=D0=98=D1=81=D0=BF=D1=80=D0=B0=D0=B2=D0=B8?= =?UTF-8?q?=D1=82=D1=8C=20=D0=BF=D1=80=D1=8F=D0=BC=D0=BE=D0=B9=20=D0=BE?= =?UTF-8?q?=D1=82=D0=B2=D0=B5=D1=82=20=D0=BE=20=D1=87=D0=B8=D1=81=D1=82?= =?UTF-8?q?=D0=BE=D0=B9=20=D0=BF=D1=80=D0=B8=D0=B1=D1=8B=D0=BB=D0=B8=201?= =?UTF-8?q?=D0=A1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../agent_profit_direct_answer_20260523.json | 96 +++++++++++++++ .../assistantMcpDiscoveryResponseCandidate.js | 24 +++- .../assistantMcpDiscoveryResponseCandidate.ts | 28 ++++- ...stantMcpDiscoveryResponseCandidate.test.ts | 56 +++++++++ .../data/autorun_generators/history.json | 42 +++++++ ..._20260523123202_gen-ag05231232-9ef058.json | 115 ++++++++++++++++++ ..._20260523123202_gen-ag05231232-9ef058.json | 34 ++++++ 7 files changed, 391 insertions(+), 4 deletions(-) create mode 100644 docs/orchestration/agent_profit_direct_answer_20260523.json create mode 100644 llm_normalizer/data/autorun_generators/saved_sessions/assistant_saved_session_20260523123202_gen-ag05231232-9ef058.json create mode 100644 llm_normalizer/data/eval_cases/assistant_autogen_saved_user_sessions_20260523123202_gen-ag05231232-9ef058.json diff --git a/docs/orchestration/agent_profit_direct_answer_20260523.json b/docs/orchestration/agent_profit_direct_answer_20260523.json new file mode 100644 index 0000000..b234759 --- /dev/null +++ b/docs/orchestration/agent_profit_direct_answer_20260523.json @@ -0,0 +1,96 @@ +{ + "schema_version": "domain_truth_harness_spec_v1", + "scenario_id": "agent_profit_direct_answer_20260523", + "domain": "cashflow_profit_boundary", + "title": "AGENT | Direct net profit answer shape", + "description": "Targeted replay for direct clean-profit questions: follow-up boundary may say cashflow is not profit, but a standalone net-profit question must answer with the accounting result first.", + "bindings": {}, + "steps": [ + { + "step_id": "step_01_cashflow_money_not_profit", + "title": "Cashflow question asks money and profit boundary", + "question": "Сколько денег Альтернатива заработала за 2020 год? Ответь коротко: получили, заплатили, денежное нетто, это прибыль или нет.", + "allowed_reply_types": [ + "partial_coverage", + "factual_with_explanation", + "factual" + ], + "required_answer_patterns_all": [ + "47\\s*628\\s*853,03", + "43\\s*763\\s*351,53", + "3\\s*865\\s*501,50", + "не\\s+чистая\\s+прибыль|не\\s+бухгалтерский\\s+финрезультат" + ], + "forbidden_answer_patterns": [ + "7\\s*136\\s*815,85.*получили", + "runtime_", + "planner_", + "primitive" + ], + "criticality": "critical", + "semantic_tags": [ + "cashflow", + "profit_boundary" + ] + }, + { + "step_id": "step_02_followup_this_is_profit", + "title": "Follow-up asks whether cashflow is clean profit", + "question": "а это чистая прибыль?", + "allowed_reply_types": [ + "partial_coverage", + "factual_with_explanation", + "factual" + ], + "required_answer_patterns_all": [ + "нет", + "90/91/99", + "7\\s*136\\s*815,85", + "убыт" + ], + "forbidden_answer_patterns": [ + "получили\\s+47\\s*628\\s*853,03.*чистая\\s+прибыль", + "runtime_", + "planner_", + "primitive" + ], + "criticality": "critical", + "semantic_tags": [ + "profit_boundary", + "followup" + ] + }, + { + "step_id": "step_03_direct_net_profit_question", + "title": "Standalone direct net profit question answers with result first", + "question": "какая чистая прибыль по Альтернативе за 2020?", + "allowed_reply_types": [ + "partial_coverage", + "factual_with_explanation", + "factual" + ], + "required_answer_patterns_all": [ + "90/91/99", + "7\\s*136\\s*815,85", + "убыт", + "не\\s+денежный\\s+поток|учетный\\s+финрезультат" + ], + "forbidden_answer_patterns": [ + "Коротко:\\s*нет,\\s*денежное\\s+операционное\\s+нетто", + "runtime_", + "planner_", + "primitive" + ], + "criticality": "critical", + "semantic_tags": [ + "direct_profit", + "answer_shape" + ] + } + ], + "acceptance": { + "min_score": 80, + "max_unresolved_p0": 0, + "require_all_critical_steps_pass": true + } +} diff --git a/llm_normalizer/backend/dist/services/assistantMcpDiscoveryResponseCandidate.js b/llm_normalizer/backend/dist/services/assistantMcpDiscoveryResponseCandidate.js index 6569e81..a2f0279 100644 --- a/llm_normalizer/backend/dist/services/assistantMcpDiscoveryResponseCandidate.js +++ b/llm_normalizer/backend/dist/services/assistantMcpDiscoveryResponseCandidate.js @@ -58,6 +58,21 @@ function requestsCashflowPolarityAnswer(turnMeaning, graph) { ].join(" ")); return /(?:\u043f\u043e\s+\u0434\u0435\u043d\p{L}*|\u0434\u0435\u043d\p{L}*)[\s\S]{0,80}(?:\u043f\u043b\u044e\u0441|\u043c\u0438\u043d\u0443\u0441)|(?:\u043f\u043b\u044e\u0441|\u043c\u0438\u043d\u0443\u0441)[\s\S]{0,80}(?:\u043f\u043e\s+\u0434\u0435\u043d\p{L}*|\u0434\u0435\u043d\p{L}*)/iu.test(text); } +function requestsDirectAccountingProfitAnswer(turnMeaning, graph) { + const text = normalizeQuestionText([ + turnMeaning?.raw_message, + turnMeaning?.effective_message, + graph?.source_message, + graph?.question + ].join(" ")); + if (!/(?:чист\p{L}{0,8}\s+прибыл|финрезультат|финансов\p{L}{0,12}\s+результат)/iu.test(text)) { + return false; + } + if (/(?:^|\s)(?:а\s+)?это\s+чист\p{L}{0,8}\s+прибыл|это\s+прибыл\p{L}{0,4}\s+или\s+нет/iu.test(text)) { + return false; + } + return /(?:какая|какой|сколько|посчитай|рассчитай|покажи|дай|определить|определи|найди|итог|за\s+\d{4})/iu.test(text); +} function toStringList(value) { if (!Array.isArray(value)) { return []; @@ -768,6 +783,7 @@ function buildCompactBusinessOverviewReply(entryPoint, draft) { const inventoryReserveBoundary = actionFamily === "inventory_reserve_boundary" || unsupportedFamily === "inventory_reserve_liquidation_boundary"; const compactCashflowRequested = directMoneyAnswer && requestsCompactCashflowAnswer(turnMeaning, graph); const cashflowPolarityRequested = compactCashflowRequested && requestsCashflowPolarityAnswer(turnMeaning, graph); + const directAccountingProfitRequested = requestsDirectAccountingProfitAnswer(turnMeaning, graph); if (compactCashflowRequested && !rankingNeed && (incomingAmount || outgoingAmount || netAmount)) { const netDisplay = sentenceAmount(netAmount) ?? netAmount ?? "0 \u0440\u0443\u0431."; const signedNetDisplay = cashflowPolarityRequested && netDisplay && !String(netDisplay).trim().startsWith("-") @@ -802,8 +818,12 @@ function buildCompactBusinessOverviewReply(entryPoint, draft) { ? `минус ${amount}` : amount : "сумма не распознана"; - lines.push(`Коротко: нет, денежное операционное нетто не стоит считать чистой прибылью. Отдельно по закрытию счетов 90/91/99 в 1С за ${periodScope} подтвержден ${directionText}: ${amountText}${marginPct ? `; маржа к подтвержденной выручке ${marginPct}` : "; маржа к подтвержденной выручке не рассчитана"}.`); - lines.push("Это учетный финрезультат по найденным строкам закрытия периода в 1С, а не внешний аудит и не юридически подтвержденная отчетность."); + lines.push(directAccountingProfitRequested + ? `Коротко: за ${periodScope} по закрытию счетов 90/91/99 в 1С подтвержден ${directionText}: ${amountText}${marginPct ? `; маржа к подтвержденной выручке ${marginPct}` : "; маржа к подтвержденной выручке не рассчитана"}.` + : `Коротко: нет, денежное операционное нетто не стоит считать чистой прибылью. Отдельно по закрытию счетов 90/91/99 в 1С за ${periodScope} подтвержден ${directionText}: ${amountText}${marginPct ? `; маржа к подтвержденной выручке ${marginPct}` : "; маржа к подтвержденной выручке не рассчитана"}.`); + lines.push(directAccountingProfitRequested + ? "Это учетный финрезультат по найденным строкам закрытия периода в 1С, не денежный поток, не внешний аудит и не юридически подтвержденная отчетность." + : "Это учетный финрезультат по найденным строкам закрытия периода в 1С, а не внешний аудит и не юридически подтвержденная отчетность."); return joinBusinessReplyLines(lines); } const headline = toNonEmptyString(draft.headline); diff --git a/llm_normalizer/backend/src/services/assistantMcpDiscoveryResponseCandidate.ts b/llm_normalizer/backend/src/services/assistantMcpDiscoveryResponseCandidate.ts index 510f754..38f67ef 100644 --- a/llm_normalizer/backend/src/services/assistantMcpDiscoveryResponseCandidate.ts +++ b/llm_normalizer/backend/src/services/assistantMcpDiscoveryResponseCandidate.ts @@ -100,6 +100,25 @@ function requestsCashflowPolarityAnswer( ); } +function requestsDirectAccountingProfitAnswer( + turnMeaning: Record | null, + graph: Record | null +): boolean { + const text = normalizeQuestionText([ + turnMeaning?.raw_message, + turnMeaning?.effective_message, + graph?.source_message, + graph?.question + ].join(" ")); + if (!/(?:чист\p{L}{0,8}\s+прибыл|финрезультат|финансов\p{L}{0,12}\s+результат)/iu.test(text)) { + return false; + } + if (/(?:^|\s)(?:а\s+)?это\s+чист\p{L}{0,8}\s+прибыл|это\s+прибыл\p{L}{0,4}\s+или\s+нет/iu.test(text)) { + return false; + } + return /(?:какая|какой|сколько|посчитай|рассчитай|покажи|дай|определить|определи|найди|итог|за\s+\d{4})/iu.test(text); +} + function toStringList(value: unknown): string[] { if (!Array.isArray(value)) { return []; @@ -914,6 +933,7 @@ function buildCompactBusinessOverviewReply( actionFamily === "inventory_reserve_boundary" || unsupportedFamily === "inventory_reserve_liquidation_boundary"; const compactCashflowRequested = directMoneyAnswer && requestsCompactCashflowAnswer(turnMeaning, graph); const cashflowPolarityRequested = compactCashflowRequested && requestsCashflowPolarityAnswer(turnMeaning, graph); + const directAccountingProfitRequested = requestsDirectAccountingProfitAnswer(turnMeaning, graph); if (compactCashflowRequested && !rankingNeed && (incomingAmount || outgoingAmount || netAmount)) { const netDisplay = sentenceAmount(netAmount) ?? netAmount ?? "0 \u0440\u0443\u0431."; @@ -958,10 +978,14 @@ function buildCompactBusinessOverviewReply( : amount : "сумма не распознана"; lines.push( - `Коротко: нет, денежное операционное нетто не стоит считать чистой прибылью. Отдельно по закрытию счетов 90/91/99 в 1С за ${periodScope} подтвержден ${directionText}: ${amountText}${marginPct ? `; маржа к подтвержденной выручке ${marginPct}` : "; маржа к подтвержденной выручке не рассчитана"}.` + directAccountingProfitRequested + ? `Коротко: за ${periodScope} по закрытию счетов 90/91/99 в 1С подтвержден ${directionText}: ${amountText}${marginPct ? `; маржа к подтвержденной выручке ${marginPct}` : "; маржа к подтвержденной выручке не рассчитана"}.` + : `Коротко: нет, денежное операционное нетто не стоит считать чистой прибылью. Отдельно по закрытию счетов 90/91/99 в 1С за ${periodScope} подтвержден ${directionText}: ${amountText}${marginPct ? `; маржа к подтвержденной выручке ${marginPct}` : "; маржа к подтвержденной выручке не рассчитана"}.` ); lines.push( - "Это учетный финрезультат по найденным строкам закрытия периода в 1С, а не внешний аудит и не юридически подтвержденная отчетность." + directAccountingProfitRequested + ? "Это учетный финрезультат по найденным строкам закрытия периода в 1С, не денежный поток, не внешний аудит и не юридически подтвержденная отчетность." + : "Это учетный финрезультат по найденным строкам закрытия периода в 1С, а не внешний аудит и не юридически подтвержденная отчетность." ); return joinBusinessReplyLines(lines); } diff --git a/llm_normalizer/backend/tests/assistantMcpDiscoveryResponseCandidate.test.ts b/llm_normalizer/backend/tests/assistantMcpDiscoveryResponseCandidate.test.ts index 0de26c3..4e9cd76 100644 --- a/llm_normalizer/backend/tests/assistantMcpDiscoveryResponseCandidate.test.ts +++ b/llm_normalizer/backend/tests/assistantMcpDiscoveryResponseCandidate.test.ts @@ -262,6 +262,62 @@ describe("assistant MCP discovery response candidate", () => { expect(candidate.reply_text).not.toContain("маржа к выручке 90.01"); }); + it("answers direct net-profit questions with the accounting result first instead of saying no", () => { + const candidate = buildAssistantMcpDiscoveryResponseCandidate( + entryPoint({ + turn_input: { + adapter_status: "ready", + turn_meaning_ref: { + raw_message: "какая чистая прибыль по Альтернативе за 2020?", + effective_message: "Определить чистую прибыль компании Альтернатива за 2020 год", + asked_domain_family: "business_overview", + asked_action_family: "profit_margin_boundary", + unsupported_but_understood_family: "profit_margin_boundary", + explicit_date_scope: "2020" + }, + data_need_graph: { + business_fact_family: "business_overview", + ranking_need: null, + reason_codes: ["data_need_graph_family_business_overview"] + } + }, + bridge: { + bridge_status: "answer_draft_ready", + user_facing_response_allowed: true, + business_fact_answer_allowed: true, + requires_user_clarification: false, + pilot: { + pilot_scope: "business_overview_route_template_v1", + derived_business_overview: { + accounting_financial_result: { + period_scope: "2020", + final_result_direction: "loss", + final_result_amount_human_ru: "7 136 815,85 руб.", + net_margin_to_revenue_pct: -59.41 + } + } + }, + answer_draft: { + answer_mode: "confirmed_with_bounded_inference", + headline: "Коротко: по бухгалтерскому маршруту 90/91/99 за 2020 подтвержден учетный убыток.", + confirmed_lines: [], + inference_lines: [], + unknown_lines: [], + limitation_lines: [], + next_step_line: null + } + } + }) + ); + + const firstLine = String(candidate.reply_text ?? "").split("\n")[0] ?? ""; + expect(firstLine).toContain("за 2020 по закрытию счетов 90/91/99"); + expect(firstLine).toContain("учетный убыток"); + expect(firstLine).toContain("минус 7 136 815,85 руб."); + expect(firstLine).not.toContain("нет, денежное операционное нетто"); + expect(candidate.reply_text).toContain("не денежный поток"); + }); + it("keeps vendor-risk boundary answers direct instead of compacting into a money overview", () => { const candidate = buildAssistantMcpDiscoveryResponseCandidate( entryPoint({ diff --git a/llm_normalizer/data/autorun_generators/history.json b/llm_normalizer/data/autorun_generators/history.json index 2258005..e2362cc 100644 --- a/llm_normalizer/data/autorun_generators/history.json +++ b/llm_normalizer/data/autorun_generators/history.json @@ -1,4 +1,46 @@ [ + { + "generation_id": "gen-ag05231232-9ef058", + "created_at": "2026-05-23T12:32:02+00:00", + "mode": "saved_user_sessions", + "title": "AGENT | Direct net profit answer shape", + "count": 3, + "domain": "cashflow_profit_boundary", + "questions": [ + "Сколько денег Альтернатива заработала за 2020 год? Ответь коротко: получили, заплатили, денежное нетто, это прибыль или нет.", + "а это чистая прибыль?", + "какая чистая прибыль по Альтернативе за 2020?" + ], + "generated_by": "codex_agent", + "saved_case_set_file": "assistant_autogen_saved_user_sessions_20260523123202_gen-ag05231232-9ef058.json", + "context": { + "llm_provider": null, + "model": null, + "assistant_prompt_version": null, + "decomposition_prompt_version": null, + "prompt_fingerprint": null, + "autogen_personality_id": null, + "autogen_personality_prompt": null, + "source_session_id": null, + "saved_session_file": "assistant_saved_session_20260523123202_gen-ag05231232-9ef058.json", + "saved_case_set_kind": "agent_semantic_scenario", + "agent_run": true, + "agent_focus": "Targeted replay for direct clean-profit questions: follow-up boundary may say cashflow is not profit, but a standalone net-profit question must answer with the accounting result first.", + "architecture_phase": "turnaround_11", + "source_spec_file": "X:\\1C\\NDC_1C\\docs\\orchestration\\agent_profit_direct_answer_20260523.json", + "scenario_id": "agent_profit_direct_answer_20260523", + "semantic_tags": [ + "answer_shape", + "cashflow", + "direct_profit", + "followup", + "profit_boundary" + ], + "validation_status": "accepted_live_replay", + "validated_run_dir": "artifacts\\domain_runs\\agent_profit_direct_answer_live1", + "saved_after_validated_replay": true + } + }, { "generation_id": "gen-ag05231107-464a28", "created_at": "2026-05-23T11:07:35+00:00", diff --git a/llm_normalizer/data/autorun_generators/saved_sessions/assistant_saved_session_20260523123202_gen-ag05231232-9ef058.json b/llm_normalizer/data/autorun_generators/saved_sessions/assistant_saved_session_20260523123202_gen-ag05231232-9ef058.json new file mode 100644 index 0000000..e722690 --- /dev/null +++ b/llm_normalizer/data/autorun_generators/saved_sessions/assistant_saved_session_20260523123202_gen-ag05231232-9ef058.json @@ -0,0 +1,115 @@ +{ + "saved_at": "2026-05-23T12:32:02+00:00", + "generation_id": "gen-ag05231232-9ef058", + "mode": "saved_user_sessions", + "title": "AGENT | Direct net profit answer shape", + "agent_run": true, + "questions": [ + "Сколько денег Альтернатива заработала за 2020 год? Ответь коротко: получили, заплатили, денежное нетто, это прибыль или нет.", + "а это чистая прибыль?", + "какая чистая прибыль по Альтернативе за 2020?" + ], + "metadata": { + "assistant_prompt_version": null, + "decomposition_prompt_version": null, + "prompt_fingerprint": null, + "agent_focus": "Targeted replay for direct clean-profit questions: follow-up boundary may say cashflow is not profit, but a standalone net-profit question must answer with the accounting result first.", + "architecture_phase": "turnaround_11", + "source_spec_file": "X:\\1C\\NDC_1C\\docs\\orchestration\\agent_profit_direct_answer_20260523.json", + "scenario_id": "agent_profit_direct_answer_20260523", + "semantic_tags": [ + "answer_shape", + "cashflow", + "direct_profit", + "followup", + "profit_boundary" + ], + "validation_status": "accepted_live_replay", + "validated_run_dir": "artifacts\\domain_runs\\agent_profit_direct_answer_live1", + "saved_after_validated_replay": true, + "save_gate": { + "schema_version": "agent_semantic_save_gate_v1", + "validation_status": "accepted_live_replay", + "validated_run_dir": "artifacts\\domain_runs\\agent_profit_direct_answer_live1", + "final_status": "accepted", + "review_overall_status": "pass", + "business_overall_status": "pass", + "steps_total": 3, + "steps_passed": 3, + "steps_failed": 0, + "steps_with_business_failures": 0, + "steps_with_business_warnings": 0, + "acceptance_gate_passed": true, + "saved_after_validated_replay": true + } + }, + "source_session_id": null, + "session": { + "session_id": null, + "mode": "agent_semantic_run", + "items": [ + { + "message_id": "agent-user-001", + "role": "user", + "text": "Сколько денег Альтернатива заработала за 2020 год? Ответь коротко: получили, заплатили, денежное нетто, это прибыль или нет.", + "created_at": "2026-05-23T12:32:02+00:00", + "reply_type": null, + "trace_id": null, + "debug": null + }, + { + "message_id": "agent-user-002", + "role": "user", + "text": "а это чистая прибыль?", + "created_at": "2026-05-23T12:32:02+00:00", + "reply_type": null, + "trace_id": null, + "debug": null + }, + { + "message_id": "agent-user-003", + "role": "user", + "text": "какая чистая прибыль по Альтернативе за 2020?", + "created_at": "2026-05-23T12:32:02+00:00", + "reply_type": null, + "trace_id": null, + "debug": null + } + ], + "agent_run": true, + "metadata": { + "assistant_prompt_version": null, + "decomposition_prompt_version": null, + "prompt_fingerprint": null, + "agent_focus": "Targeted replay for direct clean-profit questions: follow-up boundary may say cashflow is not profit, but a standalone net-profit question must answer with the accounting result first.", + "architecture_phase": "turnaround_11", + "source_spec_file": "X:\\1C\\NDC_1C\\docs\\orchestration\\agent_profit_direct_answer_20260523.json", + "scenario_id": "agent_profit_direct_answer_20260523", + "semantic_tags": [ + "answer_shape", + "cashflow", + "direct_profit", + "followup", + "profit_boundary" + ], + "validation_status": "accepted_live_replay", + "validated_run_dir": "artifacts\\domain_runs\\agent_profit_direct_answer_live1", + "saved_after_validated_replay": true, + "save_gate": { + "schema_version": "agent_semantic_save_gate_v1", + "validation_status": "accepted_live_replay", + "validated_run_dir": "artifacts\\domain_runs\\agent_profit_direct_answer_live1", + "final_status": "accepted", + "review_overall_status": "pass", + "business_overall_status": "pass", + "steps_total": 3, + "steps_passed": 3, + "steps_failed": 0, + "steps_with_business_failures": 0, + "steps_with_business_warnings": 0, + "acceptance_gate_passed": true, + "saved_after_validated_replay": true + } + } + } +} diff --git a/llm_normalizer/data/eval_cases/assistant_autogen_saved_user_sessions_20260523123202_gen-ag05231232-9ef058.json b/llm_normalizer/data/eval_cases/assistant_autogen_saved_user_sessions_20260523123202_gen-ag05231232-9ef058.json new file mode 100644 index 0000000..178beed --- /dev/null +++ b/llm_normalizer/data/eval_cases/assistant_autogen_saved_user_sessions_20260523123202_gen-ag05231232-9ef058.json @@ -0,0 +1,34 @@ +{ + "suite_id": "assistant_saved_session_gen-ag05231232-9ef058", + "suite_version": "0.1.0", + "schema_version": "assistant_saved_session_suite_v0_1", + "generated_at": "2026-05-23T12:32:02+00:00", + "generation_id": "gen-ag05231232-9ef058", + "mode": "saved_user_sessions", + "title": "AGENT | Direct net profit answer shape", + "domain": "cashflow_profit_boundary", + "scenario_count": 1, + "case_ids": [ + "SAVED-001" + ], + "cases": [ + { + "case_id": "SAVED-001", + "scenario_tag": "agent_saved_user_sessions", + "title": "AGENT | Direct net profit answer shape", + "question_type": "followup", + "broadness_level": "medium", + "turns": [ + { + "user_message": "Сколько денег Альтернатива заработала за 2020 год? Ответь коротко: получили, заплатили, денежное нетто, это прибыль или нет." + }, + { + "user_message": "а это чистая прибыль?" + }, + { + "user_message": "какая чистая прибыль по Альтернативе за 2020?" + } + ] + } + ] +}