ГЛОБАЛЬНЫЙ РЕФАКТОРИНГ АРХИТЕКТУРЫ - Рефакторинг этапов Stage 3.4: расширен soft-refusal в deep/living, чтобы при слабом/непокрытом домене не падать в шаблонный Коротко/Что сломано.

2026-04-11 17:41:07 +03:00 · 2026-04-11 17:41:07 +03:00 · 88d5561fad
parent a0d0f95dde
commit 88d5561fad
5 changed files with 386 additions and 15 deletions
--- a/docs/TECH/1CLLMARCH-FACT.md
+++ b/docs/TECH/1CLLMARCH-FACT.md
@ -2401,6 +2401,21 @@ Implemented in current pass (Stage 3.3 soft-refusal + anti-template limited repl
   - Extended regression pack: `11` files / `344` tests passed.
   - Type build: `npm --prefix llm_normalizer/backend run build` passed.

+Implemented in current pass (Stage 3.4 deep/living soft-refusal boundary widening):
+1. Extended boundary fallback trigger in policy composer:
+   - `shouldUseBoundaryFallbackReply(...)` now supports `broad_partial` mode when domain coverage is weak/unconfirmed.
+   - Added domain-aware gate (`focus_domain == null` or `focusDomainGroundingBlocked`) + weak-evidence envelope checks.
+2. Reduced template leakage for off-domain partial replies:
+   - Cases with weak partial support but no confirmed domain grounding now return soft-refusal boundary reply instead of rigid `Коротко/Что сломано/...` template blocks.
+3. Improved boundary fallback UX:
+   - Added deterministic heading variants to reduce repetitive wording.
+   - Added quick actionable line (`Что могу сделать сейчас`) sourced from nearest supported capability set.
+4. Added targeted regression coverage:
+   - Rewrote/normalized `assistantBoundaryFallbackReply.test.ts` in UTF-8 and added case for `broad_partial + weak evidence + uncovered domain`.
+5. Validation snapshot:
+   - Stage 3 focused suite (+boundary): `12` files / `348` tests passed.
+   - Type build: `npm --prefix llm_normalizer/backend run build` passed.
+
 Acceptance (Stage 3):
 1. LLM outputs strictly validated schema for extraction/decomposition (no free-form).
 2. Deterministic guards can block or downgrade answers when evidence insufficient.
--- a/llm_normalizer/backend/dist/services/answerComposer.js
+++ b/llm_normalizer/backend/dist/services/answerComposer.js
@ -1601,6 +1601,16 @@ function formatNarrativeDomainLabel(domain) {
    }
    return "доступного учетного контура";
 }
+function pickDeterministicBoundaryVariant(seed, variants) {
+    if (variants.length === 0) {
+        return "";
+    }
+    let score = 0;
+    for (const char of String(seed ?? "")) {
+        score = (score + char.charCodeAt(0)) % 104_729;
+    }
+    return variants[score % variants.length];
+}
 function pickBoundaryCapabilityLines(userMessage, limit = 3) {
    const text = String(userMessage ?? "").toLowerCase();
    const scored = BOUNDARY_CAPABILITY_SUGGESTIONS.map((item, index) => ({
@ -1615,6 +1625,16 @@ function pickBoundaryCapabilityLines(userMessage, limit = 3) {
    const selected = ranked.slice(0, Math.max(2, limit));
    return uniqueStrings(selected.map((item) => `${item.label}: ${item.helpText}`), limit);
 }
+function buildBoundaryQuickActionLine(capabilities) {
+    const actions = capabilities
+        .slice(0, 2)
+        .map((item) => item.replace(/:\s*/u, " — ").trim())
+        .filter((item) => item.length > 0);
+    if (actions.length === 0) {
+        return null;
+    }
+    return `Что могу сделать сейчас: ${actions.join("; ")}.`;
+}
 function buildNaturalClarificationHints(input) {
    const hints = [];
    if (input.missingAnchors.period) {
@ -1641,7 +1661,7 @@ function shouldUseBoundaryFallbackReply(input) {
    if (input.mode === "out_of_scope") {
        return true;
    }
-    if (input.mode !== "clarification_required" && input.mode !== "no_grounded") {
+    if (input.mode !== "clarification_required" && input.mode !== "no_grounded" && input.mode !== "broad_partial") {
        return false;
    }
    const hasNoEvidenceRoutes = input.okResultsCount === 0 && input.partialResultsCount === 0;
@ -1650,14 +1670,31 @@ function shouldUseBoundaryFallbackReply(input) {
    const groundingBlocked = input.groundingCheck.status === "no_grounded_answer" ||
        input.groundingCheck.status === "partial" ||
        input.groundingCheck.status === "route_mismatch_blocked";
-    return hasNoEvidenceRoutes && hasNoConfirmedCoverage && groundingBlocked;
+    if (hasNoEvidenceRoutes && hasNoConfirmedCoverage && groundingBlocked) {
+        return true;
+    }
+    const domainNotCovered = input.focusDomain === null || input.focusDomainGroundingBlocked;
+    const weakEvidenceEnvelope = input.okResultsCount === 0 &&
+        (input.partialResultsCount === 0 ||
+            input.aggregateEvidenceConfidence === "low" ||
+            input.hasCriticalEvidenceLimitation);
+    if (domainNotCovered && hasNoConfirmedCoverage && groundingBlocked && weakEvidenceEnvelope) {
+        return true;
+    }
+    return false;
 }
 function buildBoundaryFallbackReply(input) {
    const nearbyCapabilities = pickBoundaryCapabilityLines(input.userMessage, 3);
+    const quickActionLine = buildBoundaryQuickActionLine(nearbyCapabilities);
    if (input.focusDomain === null) {
-        return sanitizeUserFacingReply([
+        const heading = pickDeterministicBoundaryVariant(input.userMessage, [
            "По этому запросу у меня нет надежного доменного покрытия, поэтому даю мягкий отказ вместо технического шаблона.",
+            "Сейчас у меня нет надежного доменного маршрута по этому запросу, поэтому даю мягкий отказ вместо шаблонной технички."
+        ]);
+        return sanitizeUserFacingReply([
+            heading,
            nearbyCapabilities.length > 0 ? `Что могу сделать рядом по смыслу:\n${formatList(nearbyCapabilities)}` : "",
+            quickActionLine ?? "",
            "Переформулируй вопрос через один из вариантов выше, и я сразу перейду к проверке по данным 1С."
        ]
            .filter(Boolean)
@ -1667,10 +1704,15 @@ function buildBoundaryFallbackReply(input) {
        missingAnchors: input.missingAnchors,
        coverageReport: input.coverageReport
    });
-    return sanitizeUserFacingReply([
+    const domainHeading = pickDeterministicBoundaryVariant(`${input.userMessage}|${input.focusDomain}`, [
        `Сейчас не могу надежно ответить по сценарию ${formatNarrativeDomainLabel(input.focusDomain)}: не хватает опоры.`,
+        `По сценарию ${formatNarrativeDomainLabel(input.focusDomain)} пока не хватает подтвержденной опоры для надежного вывода.`
+    ]);
+    return sanitizeUserFacingReply([
+        domainHeading,
        clarificationHints.length > 0 ? `Чтобы сразу перейти к проверке, уточни:\n${formatList(clarificationHints)}` : "",
-        nearbyCapabilities.length > 0 ? `Если удобнее, могу начать с близкого сценария:\n${formatList(nearbyCapabilities.slice(0, 2))}` : ""
+        nearbyCapabilities.length > 0 ? `Если удобнее, могу начать с близкого сценария:\n${formatList(nearbyCapabilities.slice(0, 2))}` : "",
+        quickActionLine ?? ""
    ]
        .filter(Boolean)
        .join("\n\n"));
@ -3659,7 +3701,11 @@ function composeAssistantAnswerV11(input) {
        groundingCheck: input.groundingCheck,
        coverageReport: input.coverageReport,
        okResultsCount: okResults.length,
-        partialResultsCount: partialResults.length
+        partialResultsCount: partialResults.length,
+        focusDomain: focusNarrativeDomain,
+        focusDomainGroundingBlocked,
+        aggregateEvidenceConfidence,
+        hasCriticalEvidenceLimitation
    });
    const hasProblemWeakSignal = policySignals.narrowing_strength !== "strong" ||
        policySignals.minimum_evidence_failed ||
--- a/llm_normalizer/backend/src/services/answerComposer.ts
+++ b/llm_normalizer/backend/src/services/answerComposer.ts
@ -1895,6 +1895,17 @@ function formatNarrativeDomainLabel(domain: P0NarrativeDomain): string {
  return "доступного учетного контура";
 }

+function pickDeterministicBoundaryVariant(seed: string, variants: string[]): string {
+  if (variants.length === 0) {
+    return "";
+  }
+  let score = 0;
+  for (const char of String(seed ?? "")) {
+    score = (score + char.charCodeAt(0)) % 104_729;
+  }
+  return variants[score % variants.length];
+}
+
 function pickBoundaryCapabilityLines(userMessage: string, limit = 3): string[] {
  const text = String(userMessage ?? "").toLowerCase();
  const scored = BOUNDARY_CAPABILITY_SUGGESTIONS.map((item, index) => ({
@ -1910,6 +1921,17 @@ function pickBoundaryCapabilityLines(userMessage: string, limit = 3): string[] {
  return uniqueStrings(selected.map((item) => `${item.label}: ${item.helpText}`), limit);
 }

+function buildBoundaryQuickActionLine(capabilities: string[]): string | null {
+  const actions = capabilities
+    .slice(0, 2)
+    .map((item) => item.replace(/:\s*/u, " — ").trim())
+    .filter((item) => item.length > 0);
+  if (actions.length === 0) {
+    return null;
+  }
+  return `Что могу сделать сейчас: ${actions.join("; ")}.`;
+}
+
 function buildNaturalClarificationHints(input: {
  missingAnchors: MissingAnchors;
  coverageReport: RequirementCoverageReport;
@ -1942,11 +1964,15 @@ function shouldUseBoundaryFallbackReply(input: {
  coverageReport: RequirementCoverageReport;
  okResultsCount: number;
  partialResultsCount: number;
+  focusDomain: P0NarrativeDomain;
+  focusDomainGroundingBlocked: boolean;
+  aggregateEvidenceConfidence: EvidenceConfidence;
+  hasCriticalEvidenceLimitation: boolean;
 }): boolean {
  if (input.mode === "out_of_scope") {
    return true;
  }
-  if (input.mode !== "clarification_required" && input.mode !== "no_grounded") {
+  if (input.mode !== "clarification_required" && input.mode !== "no_grounded" && input.mode !== "broad_partial") {
    return false;
  }
  const hasNoEvidenceRoutes = input.okResultsCount === 0 && input.partialResultsCount === 0;
@ -1957,7 +1983,19 @@ function shouldUseBoundaryFallbackReply(input: {
    input.groundingCheck.status === "no_grounded_answer" ||
    input.groundingCheck.status === "partial" ||
    input.groundingCheck.status === "route_mismatch_blocked";
-  return hasNoEvidenceRoutes && hasNoConfirmedCoverage && groundingBlocked;
+  if (hasNoEvidenceRoutes && hasNoConfirmedCoverage && groundingBlocked) {
+    return true;
+  }
+  const domainNotCovered = input.focusDomain === null || input.focusDomainGroundingBlocked;
+  const weakEvidenceEnvelope =
+    input.okResultsCount === 0 &&
+    (input.partialResultsCount === 0 ||
+      input.aggregateEvidenceConfidence === "low" ||
+      input.hasCriticalEvidenceLimitation);
+  if (domainNotCovered && hasNoConfirmedCoverage && groundingBlocked && weakEvidenceEnvelope) {
+    return true;
+  }
+  return false;
 }

 function buildBoundaryFallbackReply(input: {
@ -1967,11 +2005,20 @@ function buildBoundaryFallbackReply(input: {
  coverageReport: RequirementCoverageReport;
 }): string {
  const nearbyCapabilities = pickBoundaryCapabilityLines(input.userMessage, 3);
+  const quickActionLine = buildBoundaryQuickActionLine(nearbyCapabilities);
  if (input.focusDomain === null) {
-    return sanitizeUserFacingReply(
+    const heading = pickDeterministicBoundaryVariant(
+      input.userMessage,
      [
        "По этому запросу у меня нет надежного доменного покрытия, поэтому даю мягкий отказ вместо технического шаблона.",
+        "Сейчас у меня нет надежного доменного маршрута по этому запросу, поэтому даю мягкий отказ вместо шаблонной технички."
+      ]
+    );
+    return sanitizeUserFacingReply(
+      [
+        heading,
        nearbyCapabilities.length > 0 ? `Что могу сделать рядом по смыслу:\n${formatList(nearbyCapabilities)}` : "",
+        quickActionLine ?? "",
        "Переформулируй вопрос через один из вариантов выше, и я сразу перейду к проверке по данным 1С."
      ]
        .filter(Boolean)
@ -1983,11 +2030,19 @@ function buildBoundaryFallbackReply(input: {
    missingAnchors: input.missingAnchors,
    coverageReport: input.coverageReport
  });
-  return sanitizeUserFacingReply(
+  const domainHeading = pickDeterministicBoundaryVariant(
+    `${input.userMessage}|${input.focusDomain}`,
    [
      `Сейчас не могу надежно ответить по сценарию ${formatNarrativeDomainLabel(input.focusDomain)}: не хватает опоры.`,
+      `По сценарию ${formatNarrativeDomainLabel(input.focusDomain)} пока не хватает подтвержденной опоры для надежного вывода.`
+    ]
+  );
+  return sanitizeUserFacingReply(
+    [
+      domainHeading,
      clarificationHints.length > 0 ? `Чтобы сразу перейти к проверке, уточни:\n${formatList(clarificationHints)}` : "",
-      nearbyCapabilities.length > 0 ? `Если удобнее, могу начать с близкого сценария:\n${formatList(nearbyCapabilities.slice(0, 2))}` : ""
+      nearbyCapabilities.length > 0 ? `Если удобнее, могу начать с близкого сценария:\n${formatList(nearbyCapabilities.slice(0, 2))}` : "",
+      quickActionLine ?? ""
    ]
      .filter(Boolean)
      .join("\n\n")
@ -4364,7 +4419,11 @@ function composeAssistantAnswerV11(input: ComposeAnswerInput): ComposeAnswerOutp
    groundingCheck: input.groundingCheck,
    coverageReport: input.coverageReport,
    okResultsCount: okResults.length,
-    partialResultsCount: partialResults.length
+    partialResultsCount: partialResults.length,
+    focusDomain: focusNarrativeDomain,
+    focusDomainGroundingBlocked,
+    aggregateEvidenceConfidence,
+    hasCriticalEvidenceLimitation
  });
  const hasProblemWeakSignal =
    policySignals.narrowing_strength !== "strong" ||
--- a/llm_normalizer/backend/tests/assistantBoundaryFallbackReply.test.ts
+++ b/llm_normalizer/backend/tests/assistantBoundaryFallbackReply.test.ts
@ -54,6 +54,7 @@ describe("assistant boundary fallback reply", () => {
    expect(output.reply_type).toBe("clarification_required");
    expect(output.assistant_reply).toMatch(/мягкий отказ/i);
    expect(output.assistant_reply).toContain("Что могу сделать рядом по смыслу:");
+    expect(output.assistant_reply).toContain("Что могу сделать сейчас:");
    expect(output.assistant_reply).not.toContain("Что сломано:");
  });

@ -76,9 +77,10 @@ describe("assistant boundary fallback reply", () => {
    });

    expect(output.reply_type).toBe("clarification_required");
-    expect(output.assistant_reply).toMatch(/не могу надежно ответить по сценарию/i);
+    expect(output.assistant_reply).toMatch(/не могу надежно ответить по сценарию|не хватает подтвержденной опоры/i);
    expect(output.assistant_reply).toContain("Чтобы сразу перейти к проверке, уточни:");
    expect(output.assistant_reply).toContain("Если удобнее, могу начать с близкого сценария:");
+    expect(output.assistant_reply).toContain("Что могу сделать сейчас:");
    expect(output.assistant_reply).not.toContain("Что сломано:");
  });

@ -101,7 +103,66 @@ describe("assistant boundary fallback reply", () => {
    });

    expect(output.reply_type).toBe("out_of_scope");
-    expect(output.assistant_reply).toMatch(/мягкий отказ|не могу надежно/i);
+    expect(output.assistant_reply).toMatch(/мягкий отказ|не могу надежно|не хватает подтвержденной опоры/i);
+    expect(output.assistant_reply).toContain("Что могу сделать сейчас:");
+    expect(output.assistant_reply).not.toContain("Что сломано:");
+  });
+
+  it("uses soft refusal in broad_partial mode when domain is not covered and evidence is weak", () => {
+    const output = composeAssistantAnswer({
+      userMessage: "What is the expected inflation next quarter and FX trend?",
+      routeSummary: buildRouteSummary("none"),
+      retrievalResults: [
+        {
+          fragment_id: "F1",
+          requirement_ids: ["R1"],
+          route: "store_canonical",
+          status: "partial",
+          result_type: "summary",
+          items: [{ note: "weak candidate" }],
+          summary: { note: "weak" },
+          evidence: [],
+          why_included: [],
+          selection_reason: [],
+          risk_factors: [],
+          business_interpretation: [],
+          confidence: "low",
+          limitations: ["weak_source_mapping"],
+          errors: []
+        } as any
+      ],
+      requirements: [
+        {
+          requirement_id: "R1",
+          source_fragment_id: "F1",
+          requirement_text: "forecast and trend",
+          subject_tokens: [],
+          status: "clarification_needed",
+          route: null
+        }
+      ],
+      coverageReport: {
+        requirements_total: 1,
+        requirements_covered: 0,
+        requirements_uncovered: [],
+        requirements_partially_covered: [],
+        clarification_needed_for: ["R1"],
+        out_of_scope_requirements: []
+      },
+      groundingCheck: {
+        status: "partial",
+        route_subject_match: false,
+        missing_requirements: ["R1"],
+        reasons: ["weak_source_mapping"],
+        why_included_summary: [],
+        selection_reason_summary: []
+      },
+      enableAnswerPolicyV11: true
+    });
+
+    expect(output.reply_type).toBe("partial_coverage");
+    expect(output.assistant_reply).toContain("Что могу сделать рядом по смыслу:");
+    expect(output.assistant_reply).toContain("Что могу сделать сейчас:");
    expect(output.assistant_reply).not.toContain("Что сломано:");
  });
 });
--- a/llm_normalizer/data/eval_cases/assistant_autogen_runtime_job-lwG5gg6svR.json
+++ b/llm_normalizer/data/eval_cases/assistant_autogen_runtime_job-lwG5gg6svR.json
@ -0,0 +1,190 @@
+{
+  "suite_id": "assistant_autogen_runtime_job-lwG5gg6svR",
+  "suite_version": "0.1.0",
+  "schema_version": "assistant_autogen_runtime_v0_1",
+  "scenario_count": 15,
+  "case_ids": [
+    "AUTO-001",
+    "AUTO-002",
+    "AUTO-003",
+    "AUTO-004",
+    "AUTO-005",
+    "AUTO-006",
+    "AUTO-007",
+    "AUTO-008",
+    "AUTO-009",
+    "AUTO-010",
+    "AUTO-011",
+    "AUTO-012",
+    "AUTO-013",
+    "AUTO-014",
+    "AUTO-015"
+  ],
+  "cases": [
+    {
+      "case_id": "AUTO-001",
+      "scenario_tag": "autogen_runtime",
+      "question_type": "direct",
+      "broadness_level": "medium",
+      "turns": [
+        {
+          "user_message": "Кому из контрагентов мы уже месяц отдаем товары, но на счетах все еще красуется минусовое сальдо - это реально зеленый свет для ручного вмешательства?"
+        }
+      ]
+    },
+    {
+      "case_id": "AUTO-002",
+      "scenario_tag": "autogen_runtime",
+      "question_type": "direct",
+      "broadness_level": "medium",
+      "turns": [
+        {
+          "user_message": "Где у нас накопились авансы к отгрузкам, которые уже давно пора закрыть или хотя бы перепроверить, чтобы не подозревать худшее?"
+        }
+      ]
+    },
+    {
+      "case_id": "AUTO-003",
+      "scenario_tag": "autogen_runtime",
+      "question_type": "direct",
+      "broadness_level": "medium",
+      "turns": [
+        {
+          "user_message": "Покажи контрагентов, по которым сальдо у нас выглядит так, будто оно врет - ну точно не совпадает с тем, что они нам прислали. Это уже критично для сверки."
+        }
+      ]
+    },
+    {
+      "case_id": "AUTO-004",
+      "scenario_tag": "autogen_runtime",
+      "question_type": "direct",
+      "broadness_level": "medium",
+      "turns": [
+        {
+          "user_message": "Сколько заказчиков у нас на этот момент могут считаться долгожителями по своим задолженностям?"
+        }
+      ]
+    },
+    {
+      "case_id": "AUTO-005",
+      "scenario_tag": "autogen_runtime",
+      "question_type": "direct",
+      "broadness_level": "medium",
+      "turns": [
+        {
+          "user_message": "В каких случаях мы видим ситуацию, когда документы есть, а денег - нет и пока не предвидится?"
+        }
+      ]
+    },
+    {
+      "case_id": "AUTO-006",
+      "scenario_tag": "autogen_runtime",
+      "question_type": "direct",
+      "broadness_level": "medium",
+      "turns": [
+        {
+          "user_message": "Какие контрагенты висят с закрытыми отгрузками, но с открытыми документами оплаты, что явно выглядит как кейс для ручной проверки?"
+        }
+      ]
+    },
+    {
+      "case_id": "AUTO-007",
+      "scenario_tag": "autogen_runtime",
+      "question_type": "direct",
+      "broadness_level": "medium",
+      "turns": [
+        {
+          "user_message": "Покажи контрагентов, у которых есть неоплаченные задолженности по договорам на конец месяца - это уже красный свет для бухгалтера."
+        }
+      ]
+    },
+    {
+      "case_id": "AUTO-008",
+      "scenario_tag": "autogen_runtime",
+      "question_type": "direct",
+      "broadness_level": "medium",
+      "turns": [
+        {
+          "user_message": "По каким заказчикам мы можем выделить непростую картину: сальдо нулевое, а история платежей явно говорит о том, что все не так просто?"
+        }
+      ]
+    },
+    {
+      "case_id": "AUTO-009",
+      "scenario_tag": "autogen_runtime",
+      "question_type": "direct",
+      "broadness_level": "medium",
+      "turns": [
+        {
+          "user_message": "Какие контрагенты у нас на этом моменте могут быть причислены к тем, кто вообще не платит уже несколько месяцев?"
+        }
+      ]
+    },
+    {
+      "case_id": "AUTO-010",
+      "scenario_tag": "autogen_runtime",
+      "question_type": "direct",
+      "broadness_level": "medium",
+      "turns": [
+        {
+          "user_message": "В каких случаях мы видим зависшие отгрузки, которые уже давно пора закрыть - это грозит проблемами в отчетности."
+        }
+      ]
+    },
+    {
+      "case_id": "AUTO-011",
+      "scenario_tag": "autogen_runtime",
+      "question_type": "direct",
+      "broadness_level": "medium",
+      "turns": [
+        {
+          "user_message": "Покажи контрагентов, по которым на конец месяца сальдо выглядит так, будто документы собраны криво и их нужно перепроверить."
+        }
+      ]
+    },
+    {
+      "case_id": "AUTO-012",
+      "scenario_tag": "autogen_runtime",
+      "question_type": "direct",
+      "broadness_level": "medium",
+      "turns": [
+        {
+          "user_message": "Какие у нас зависшие авансы или предоплаты уже давно пора либо закрыть, либо хотя бы проверить - это уже не просто вопрос времени?"
+        }
+      ]
+    },
+    {
+      "case_id": "AUTO-013",
+      "scenario_tag": "autogen_runtime",
+      "question_type": "direct",
+      "broadness_level": "medium",
+      "turns": [
+        {
+          "user_message": "По каким контрагентам мы можем заметить такую картину: оплачено меньше, чем отгружено, и это явно требует вмешательства бухгалтера."
+        }
+      ]
+    },
+    {
+      "case_id": "AUTO-014",
+      "scenario_tag": "autogen_runtime",
+      "question_type": "direct",
+      "broadness_level": "medium",
+      "turns": [
+        {
+          "user_message": "Какие незакрытые документы по договорам у нас уже давно пора проверить - это грозит серьезными проблемами?"
+        }
+      ]
+    },
+    {
+      "case_id": "AUTO-015",
+      "scenario_tag": "autogen_runtime",
+      "question_type": "direct",
+      "broadness_level": "medium",
+      "turns": [
+        {
+          "user_message": "Покажи контрагентов, чьи заказы на отгрузку еще не оплачены, но сальдо уже отрицательное - это явный признак того, что нужно вмешаться."
+        }
+      ]
+    }
+  ]
+}