Усилить агентный ревью-гейт маржинальности 1С

2026-05-23 22:35:47 +03:00 · 2026-05-23 22:35:47 +03:00 · f56bde3a45
parent 6ddd61f975
commit f56bde3a45
3 changed files with 255 additions and 11 deletions
--- a/scripts/domain_case_loop.py
+++ b/scripts/domain_case_loop.py
@ -137,9 +137,68 @@ BUSINESS_TECHNICAL_GARBAGE_MARKERS = (
    "probe ",
    "query_movements",
    "query_documents",
    "surrogate-формул",
    "vat-объект",
    "truth gate",
    "coverage",
    "checked_sources_only",
    "строк с суммой",
    "строк в выборке",
    "эвристический shortlist",
 )
 BUSINESS_DIRECT_ANSWER_SOFT_LIMIT = 1800
 BUSINESS_LIMITED_ANSWER_MARKERS = (
    "не удалось",
    "не могу подтвердить",
    "не подтвержден",
    "не подтверждён",
    "не хватает",
    "нужен период",
    "нужна организация",
    "нужно уточнить",
    "нельзя честно",
    "нельзя построить",
    "нет достаточной базы",
 )
 BUSINESS_NEXT_ACTION_MARKERS = (
    "могу",
    "уточните",
    "следующий шаг",
    "что проверить дальше",
    "дальше",
    "можно",
    "предлагаю",
    "показать найден",
    "расширить",
    "проверить 90",
    "посчитать по",
    "построить по",
 )
 NOMENCLATURE_MARGIN_EXPECTED_ANSWER_MARKERS = (
    "период",
    "выручк",
    "себестоим",
    "валов",
    "марж",
 )
 NOMENCLATURE_MARGIN_WRONG_DOMAIN_ANSWER_MARKERS = (
    "амортизац",
    "основн",
    "объект ос",
    "карточк",
    "оплата завис",
    "зависш",
    "закрытие расчет",
    "закрытие расчёт",
    "списание с расчетного",
    "списание с расчётного",
    "банковск",
    "settlement",
    "payment_document",
    "unresolved settlement",
 )
 GUARDED_INSUFFICIENCY_PRIMARY_MARKERS = (
    "\u0442\u043e\u0447\u043d\u044b\u0439",
    "\u0442\u043e\u0447\u043d\u044b\u0435",
@ -188,8 +247,11 @@ DEFAULT_INVARIANT_SEVERITY: dict[str, str] = {
    "business_direct_answer_missing": "P0",
    "technical_garbage_in_answer": "P0",
    "counterparty_value_flow_misrouted_to_company_profit": "P0",
    "domain_leak_accounting_route": "P0",
    "answer_layering_noise": "P1",
    "accounting_contract_missing": "P1",
    "business_answer_too_verbose": "P1",
    "business_next_step_missing": "P2",
 }
 REPAIR_TARGET_SEVERITY_ORDER = {"P0": 0, "P1": 1, "P2": 2}
@ -1845,6 +1907,10 @@ def _marker_hits(text: str, markers: tuple[str, ...]) -> list[str]:
    return [marker for marker in markers if marker and marker in lowered]
 def _has_any_marker(text: str, markers: tuple[str, ...]) -> bool:
    return bool(_marker_hits(text, markers))
 def is_report_style_business_question(question: str) -> bool:
    return bool(_marker_hits(question, BUSINESS_REPORT_REQUEST_MARKERS))
@ -1855,6 +1921,20 @@ def is_direct_style_business_question(question: str) -> bool:
    return bool(_marker_hits(question, BUSINESS_DIRECT_QUESTION_MARKERS))
 def is_nomenclature_margin_context(step_state: dict[str, Any], question: str) -> bool:
    detected_intent = str(step_state.get("detected_intent") or "").strip()
    capability_id = str(step_state.get("capability_id") or "").strip()
    if detected_intent == "inventory_margin_ranking_for_nomenclature":
        return True
    if capability_id == "inventory_inventory_margin_ranking_for_nomenclature":
        return True
    lowered_question = _review_text(question)
    has_subject = "номенклатур" in lowered_question or "товар" in lowered_question
    has_margin_signal = any(marker in lowered_question for marker in ("прибыл", "марж", "реализован", "реализац"))
    has_rank_signal = any(marker in lowered_question for marker in ("высок", "низк", "какая", "какие"))
    return has_subject and has_margin_signal and has_rank_signal
 def build_business_first_review(step_state: dict[str, Any]) -> dict[str, Any]:
    question = str(step_state.get("question_resolved") or step_state.get("question_template") or "").strip()
    assistant_text = str(step_state.get("assistant_text") or "")
@ -1882,11 +1962,38 @@ def build_business_first_review(step_state: dict[str, Any]) -> dict[str, Any]:
    if too_verbose_for_direct:
        issue_codes.append("business_answer_too_verbose")
    limited_answer = _has_any_marker(assistant_text, BUSINESS_LIMITED_ANSWER_MARKERS)
    has_next_action = _has_any_marker(assistant_text, BUSINESS_NEXT_ACTION_MARKERS)
    nomenclature_margin_context = is_nomenclature_margin_context(step_state, question)
    wrong_margin_domain_hits = (
        _marker_hits(assistant_text, NOMENCLATURE_MARGIN_WRONG_DOMAIN_ANSWER_MARKERS)
        if nomenclature_margin_context
        else []
    )
    margin_contract_hits = (
        _marker_hits(assistant_text, NOMENCLATURE_MARGIN_EXPECTED_ANSWER_MARKERS)
        if nomenclature_margin_context
        else []
    )
    if wrong_margin_domain_hits:
        issue_codes.append("domain_leak_accounting_route")
    if nomenclature_margin_context and len(set(margin_contract_hits)) < 2:
        issue_codes.append("accounting_contract_missing")
    if nomenclature_margin_context and limited_answer and not has_next_action:
        issue_codes.append("business_next_step_missing")
    root_cause_layers: list[str] = []
    if "business_direct_answer_missing" in issue_codes or "answer_layering_noise" in issue_codes:
        root_cause_layers.append("answer_shape_mismatch")
    if "business_answer_too_verbose" in issue_codes or "technical_garbage_in_answer" in issue_codes:
        root_cause_layers.append("business_utility_gap")
    if "domain_leak_accounting_route" in issue_codes:
        root_cause_layers.append("domain_purity_gap")
        root_cause_layers.append("route_gap")
    if "accounting_contract_missing" in issue_codes:
        root_cause_layers.append("accounting_contract_gap")
    if "business_next_step_missing" in issue_codes:
        root_cause_layers.append("business_utility_gap")
    return {
        "schema_version": "business_first_step_review_v1",
@ -1903,8 +2010,13 @@ def build_business_first_review(step_state: dict[str, Any]) -> dict[str, Any]:
        "top_line_scaffold_present": bool(scaffold_hits or top_noise),
        "top_line_scaffold_hits": scaffold_hits,
        "too_verbose_for_direct_question": too_verbose_for_direct,
        "limited_answer_detected": limited_answer,
        "next_action_present": has_next_action,
        "nomenclature_margin_context": nomenclature_margin_context,
        "domain_leak_hits": wrong_margin_domain_hits,
        "accounting_contract_hits": margin_contract_hits,
        "business_usefulness_ok": not issue_codes,
-        "issue_codes": issue_codes,
+        "issue_codes": list(dict.fromkeys(issue_codes)),
        "suggested_root_cause_layers": list(dict.fromkeys(root_cause_layers)),
    }
@ -1920,7 +2032,7 @@ def derive_invariant_severity(step_state: dict[str, Any], violation_code: str) -
    overrides = step_state.get("invariant_severity")
    if isinstance(overrides, dict):
        override = str(overrides.get(violation_code) or "").strip().upper()
-        if override in {"P0", "P1", "WARNING"}:
+        if override in {"P0", "P1", "P2", "WARNING"}:
            return override
    return DEFAULT_INVARIANT_SEVERITY.get(violation_code, "P1")
--- a/scripts/domain_truth_harness.py
+++ b/scripts/domain_truth_harness.py
@ -351,16 +351,22 @@ BUSINESS_REVIEW_FINDING_MESSAGES = {
    "technical_garbage_in_answer": "User-facing answer leaked internal runtime or MCP identifiers.",
    "business_direct_answer_missing": "The answer did not put the direct business answer first.",
    "counterparty_value_flow_misrouted_to_company_profit": "Counterparty received/paid/net flow question was answered with company profit instead of counterparty cashflow.",
    "domain_leak_accounting_route": "The answer leaked into the wrong accounting domain for the user's business question.",
    "accounting_contract_missing": "The answer did not expose the required accounting contract for the requested business calculation.",
    "answer_layering_noise": "The answer opened with scaffolding or report framing instead of a clean business result.",
    "business_answer_too_verbose": "The answer is too verbose for a direct business question.",
    "business_next_step_missing": "A bounded or insufficient answer did not offer a useful next action.",
 }
 BUSINESS_REVIEW_FINDING_SEVERITY = {
    "technical_garbage_in_answer": "critical",
    "business_direct_answer_missing": "critical",
    "counterparty_value_flow_misrouted_to_company_profit": "critical",
    "domain_leak_accounting_route": "critical",
    "accounting_contract_missing": "warning",
    "answer_layering_noise": "critical",
    "business_answer_too_verbose": "warning",
    "business_next_step_missing": "warning",
 }
@ -1191,15 +1197,20 @@ def build_business_review_summary(spec: dict[str, Any], scenario_state: dict[str
                "suggested_root_cause_layers": business_review.get("suggested_root_cause_layers") or [],
            }
        )
-    failed = sum(
+    failure_issues = {
-        1
+        "technical_garbage_in_answer",
-        for step in steps
+        "business_direct_answer_missing",
-        if any(
+        "answer_layering_noise",
-            issue in {"technical_garbage_in_answer", "business_direct_answer_missing", "answer_layering_noise"}
+        "counterparty_value_flow_misrouted_to_company_profit",
-            for issue in step["issue_codes"]
+        "domain_leak_accounting_route",
-        )
+    }
-    )
+    warning_issues = {
-    warnings = sum(1 for step in steps if "business_answer_too_verbose" in step["issue_codes"])
+        "business_answer_too_verbose",
        "accounting_contract_missing",
        "business_next_step_missing",
    }
    failed = sum(1 for step in steps if any(issue in failure_issues for issue in step["issue_codes"]))
    warnings = sum(1 for step in steps if any(issue in warning_issues for issue in step["issue_codes"]))
    semantic_status = "fail" if failed or review_failures else ("warning" if warnings or review_warnings else "pass")
    return {
        "schema_version": "business_first_run_review_v1",
--- a/scripts/test_review_assistant_stage1_run.py
+++ b/scripts/test_review_assistant_stage1_run.py
@ -268,6 +268,127 @@ class AssistantStage1RunReviewTests(unittest.TestCase):
        self.assertGreaterEqual(review["tag_counts"]["contextual_followup"], 3)
        self.assertGreaterEqual(review["tag_counts"]["direct_business_question"], 2)
    def test_review_flags_nomenclature_margin_answer_that_leaks_to_os_and_settlements(self) -> None:
        with tempfile.TemporaryDirectory() as tmp:
            root = Path(tmp)
            sessions_dir = root / "sessions"
            reports_dir = root / "reports"
            run_id = "assistant-stage1-margin-domain-leak"
            session_file = sessions_dir / f"{run_id}-SAVED-001.json"
            report_file = reports_dir / f"{run_id}.md"
            write_json(
                session_file,
                session_payload(
                    [
                        {
                            "role": "user",
                            "text": "Какая номенклатура товара реализована с высокой прибылью какая с низкой",
                        },
                        {
                            "role": "assistant",
                            "text": "По объекту ОС видна амортизация и зависшая оплата. Проверьте карточку ОС и закрытие расчетов.",
                            "reply_type": "factual_with_explanation",
                            "message_id": "a-margin-leak",
                            "trace_id": "trace-margin-leak",
                            "debug": {"detected_intent": "inventory_margin_ranking_for_nomenclature"},
                        },
                    ]
                ),
            )
            report_file.parent.mkdir(parents=True, exist_ok=True)
            report_file.write_text(f"# Assistant Stage 1 Eval Run\n\n- run_id: {run_id}\n", encoding="utf-8")
            review = reviewer.build_run_review(
                run_id=run_id,
                session_files=[session_file],
                report_path=report_file,
            )
        self.assertEqual(review["summary"]["overall_business_status"], "fail")
        self.assertIn("domain_leak_accounting_route", review["summary"]["issue_counts"])
        target_by_issue = {item["issue_code"]: item for item in review["repair_targets"]}
        self.assertEqual(target_by_issue["domain_leak_accounting_route"]["severity"], "P0")
    def test_review_warns_when_limited_business_answer_has_no_next_action(self) -> None:
        with tempfile.TemporaryDirectory() as tmp:
            root = Path(tmp)
            sessions_dir = root / "sessions"
            reports_dir = root / "reports"
            run_id = "assistant-stage1-limited-no-next-action"
            session_file = sessions_dir / f"{run_id}-SAVED-001.json"
            report_file = reports_dir / f"{run_id}.md"
            write_json(
                session_file,
                session_payload(
                    [
                        {
                            "role": "user",
                            "text": "Какая номенклатура товара реализована с высокой прибылью какая с низкой",
                        },
                        {
                            "role": "assistant",
                            "text": "За май 2020 рейтинг прибыльности номенклатуры построить нельзя: не подтверждена себестоимость реализации.",
                            "reply_type": "partial_coverage",
                            "message_id": "a-limited-no-next",
                            "trace_id": "trace-limited-no-next",
                            "debug": {"detected_intent": "inventory_margin_ranking_for_nomenclature"},
                        },
                    ]
                ),
            )
            report_file.parent.mkdir(parents=True, exist_ok=True)
            report_file.write_text(f"# Assistant Stage 1 Eval Run\n\n- run_id: {run_id}\n", encoding="utf-8")
            review = reviewer.build_run_review(
                run_id=run_id,
                session_files=[session_file],
                report_path=report_file,
            )
        self.assertIn("business_next_step_missing", review["summary"]["issue_counts"])
        target_by_issue = {item["issue_code"]: item for item in review["repair_targets"]}
        self.assertEqual(target_by_issue["business_next_step_missing"]["severity"], "P2")
    def test_review_accepts_margin_clarification_with_accounting_contract_and_next_action(self) -> None:
        with tempfile.TemporaryDirectory() as tmp:
            root = Path(tmp)
            sessions_dir = root / "sessions"
            reports_dir = root / "reports"
            run_id = "assistant-stage1-margin-clarification-clean"
            session_file = sessions_dir / f"{run_id}-SAVED-001.json"
            report_file = reports_dir / f"{run_id}.md"
            write_json(
                session_file,
                session_payload(
                    [
                        {
                            "role": "user",
                            "text": "Какая номенклатура товара реализована с высокой прибылью какая с низкой",
                        },
                        {
                            "role": "assistant",
                            "text": "Для рейтинга прибыльности нужен период. Могу посчитать по номенклатуре: выручку без НДС, себестоимость реализации, валовую прибыль и маржинальность. Уточните месяц, квартал или год.",
                            "reply_type": "partial_coverage",
                            "message_id": "a-margin-clean",
                            "trace_id": "trace-margin-clean",
                            "debug": {"detected_intent": "inventory_margin_ranking_for_nomenclature"},
                        },
                    ]
                ),
            )
            report_file.parent.mkdir(parents=True, exist_ok=True)
            report_file.write_text(f"# Assistant Stage 1 Eval Run\n\n- run_id: {run_id}\n", encoding="utf-8")
            review = reviewer.build_run_review(
                run_id=run_id,
                session_files=[session_file],
                report_path=report_file,
            )
        self.assertNotIn("domain_leak_accounting_route", review["summary"]["issue_counts"])
        self.assertNotIn("accounting_contract_missing", review["summary"]["issue_counts"])
        self.assertNotIn("business_next_step_missing", review["summary"]["issue_counts"])
 if __name__ == "__main__":
    unittest.main()