diff --git a/docs/ARCH/11 - architecture_turnaround/11 - continuity_stabilization_plan_2026-04-17.md b/docs/ARCH/11 - architecture_turnaround/11 - continuity_stabilization_plan_2026-04-17.md index 3e618f6..7c96081 100644 --- a/docs/ARCH/11 - architecture_turnaround/11 - continuity_stabilization_plan_2026-04-17.md +++ b/docs/ARCH/11 - architecture_turnaround/11 - continuity_stabilization_plan_2026-04-17.md @@ -325,6 +325,10 @@ Still open after the accepted phase12 replay: - root supplier tails anomaly questions re-enter `hybrid_store_plus_live` with grounded fragments and non-empty deterministic route summaries; - narrowing follow-up for `2020-06 / account 60` now keeps hybrid/batch routing instead of collapsing into empty clarification; - the broader hybrid investigation contour is therefore back under explicit runtime authority rather than ambient luck. + - the remaining translit root seam is now also closed in the same contour: + - transliterated supplier-tail wording no longer loses the causal tail during predecompose entry handling; + - live replay `address_truth_harness_phase13_hybrid_followup_authority_live_20260418_rerun4` is accepted with the translit root step returning `factual_with_explanation` and staying inside hybrid investigation routing; + - endpoint coverage now explicitly requires the translit account-60 tail question to keep every routed fragment in `hybrid_store_plus_live`, so future refactors cannot silently split the same question back into `hybrid + store_canonical`. ## Next Execution Slice (2026-04-18) @@ -340,15 +344,17 @@ This next slice should be executed in the following order: 1. Finish continuity authority convergence in the hot runtime path. 2. Widen saved-session replay coverage beyond the already repaired flagship chains. -3. Prove that non-flagship hybrid/deep-turn contours survive the same runtime contracts instead of silently inheriting wrong prompt/schema assumptions. -4. Only after that, continue secondary answer-shaping cleanup where it materially affects acceptance. -5. Only after that, begin controlled domain-by-domain expansion toward the multi-domain stage. +3. Reduce duplicated state reconstruction that still lives outside the shared continuity authority. +4. Prove that non-flagship hybrid/deep-turn contours survive the same runtime contracts across more than one repaired scenario pack. +5. Only after that, continue secondary answer-shaping cleanup where it materially affects acceptance. +6. Only after that, begin controlled domain-by-domain expansion toward the multi-domain stage. Current explicit goals for this slice: - fewer owners independently reconstruct `active context`; - more replay breadth before any large expansion claim; - fewer hidden runtime contract mismatches between entry prompts, schema validation, and deep-turn orchestration; +- fewer hybrid/deep entry seams that still depend on fragment luck instead of explicit runtime contracts; - cleaner user-facing business answers on already-correct truth paths; - lower risk that new domains multiply orchestration chaos faster than capability growth. diff --git a/llm_normalizer/backend/dist/services/assistantService.js b/llm_normalizer/backend/dist/services/assistantService.js index e55e4c1..6e7404b 100644 --- a/llm_normalizer/backend/dist/services/assistantService.js +++ b/llm_normalizer/backend/dist/services/assistantService.js @@ -2040,6 +2040,29 @@ function selectPreferredAddressFragmentCandidate(rawText, normalizedText) { } return normalizedCandidate; } +function hasAddressDeepInvestigationSignalForPredecompose(text) { + const normalized = compactWhitespace(repairAddressMojibake(String(text ?? "")).toLowerCase()); + if (!normalized) { + return false; + } + return /(?:разрыв|хвост|цепоч|механизм|аномал|риск|не\s+закрыва|не\s+закрыт|где\s+разрыв|какие\s+документы\s+не\s+закрыва|tail|tails|chain|root\s*cause|anomal|risk|gde\s+tail|kakie\s+dokumenty\s+ne\s+zakryv|ne\s+zakryvayut\s+oplat|razryv|cepoch|prover.*tail)/iu.test(normalized); +} +function hasAddressTranslitSignalForPredecompose(text) { + const source = String(text ?? "").trim(); + if (!source) { + return false; + } + return /[a-z]/i.test(source); +} +function joinAddressFragmentCandidates(candidates) { + const unique = Array.from(new Set(candidates + .map((item) => compactWhitespace(String(item ?? ""))) + .filter(Boolean))); + if (unique.length === 0) { + return null; + } + return compactWhitespace(unique.join(". ")); +} function readAddressFilterString(addressDebug, key) { const filters = addressDebug?.extracted_filters; if (!filters || typeof filters !== "object") { @@ -2892,7 +2915,8 @@ function normalizeAddressSemanticHintsFromFragment(fragment) { selected_object_scope_detected: hints.selected_object_scope_detected === true || normalizedScopeTargetKind === "selected_object" }; } -function extractAddressPredecomposeCandidateFromFragments(fragments) { +function extractAddressPredecomposeCandidateFromFragments(fragments, sourceMessage = null) { + const candidates = []; for (const item of Array.isArray(fragments) ? fragments : []) { if (!item || typeof item !== "object") { continue; @@ -2909,19 +2933,39 @@ function extractAddressPredecomposeCandidateFromFragments(fragments) { continue; } if (candidate.length >= 3 && candidate.length <= 500) { - return { + candidates.push({ candidate, semanticHints: normalizeAddressSemanticHintsFromFragment(fragment) - }; + }); } } - return null; + if (candidates.length === 0) { + return null; + } + const primaryCandidate = candidates[0]; + const combinedCandidate = joinAddressFragmentCandidates(candidates.map((item) => item.candidate)); + const sourceHasDeepInvestigationSignal = hasAddressDeepInvestigationSignalForPredecompose(sourceMessage); + const sourceHasTranslitSignal = hasAddressTranslitSignalForPredecompose(sourceMessage); + const primaryHasDeepInvestigationSignal = hasAddressDeepInvestigationSignalForPredecompose(primaryCandidate.candidate); + const combinedHasDeepInvestigationSignal = hasAddressDeepInvestigationSignalForPredecompose(combinedCandidate); + if (combinedCandidate && + combinedCandidate.length >= 3 && + combinedCandidate.length <= 500 && + sourceHasTranslitSignal && + combinedHasDeepInvestigationSignal && + (sourceHasDeepInvestigationSignal || !primaryHasDeepInvestigationSignal)) { + return { + candidate: combinedCandidate, + semanticHints: primaryCandidate.semanticHints + }; + } + return primaryCandidate; } -function extractAddressPredecomposeCandidateFromNormalized(normalized) { +function extractAddressPredecomposeCandidateFromNormalized(normalized, sourceMessage = null) { if (!normalized || typeof normalized !== "object") { return null; } - return extractAddressPredecomposeCandidateFromFragments(normalized.fragments); + return extractAddressPredecomposeCandidateFromFragments(normalized.fragments, sourceMessage); } function stripMarkdownJsonFence(text) { return String(text ?? "") @@ -3000,7 +3044,7 @@ function extractOutputTextFromRawNormalizerOutput(raw) { } return null; } -function extractAddressPredecomposeCandidateFromRawNormalizerOutput(rawModelOutput) { +function extractAddressPredecomposeCandidateFromRawNormalizerOutput(rawModelOutput, sourceMessage = null) { const outputText = extractOutputTextFromRawNormalizerOutput(rawModelOutput); if (!outputText) { return null; @@ -3009,7 +3053,7 @@ function extractAddressPredecomposeCandidateFromRawNormalizerOutput(rawModelOutp if (!parsed || typeof parsed !== "object") { return null; } - return extractAddressPredecomposeCandidateFromFragments(parsed.fragments); + return extractAddressPredecomposeCandidateFromFragments(parsed.fragments, sourceMessage); } const ADDRESS_PREDECOMPOSE_LOW_QUALITY_COUNTERPARTY_TOKENS = new Set([ "есть", @@ -3346,8 +3390,8 @@ async function runAddressLlmPreDecompose(normalizerService, payload, userMessage }; try { const normalized = await normalizerService.normalize(normalizePayload); - const candidateFromNormalized = extractAddressPredecomposeCandidateFromNormalized(normalized?.normalized); - const candidateFromRaw = candidateFromNormalized ? null : extractAddressPredecomposeCandidateFromRawNormalizerOutput(normalized?.raw_model_output); + const candidateFromNormalized = extractAddressPredecomposeCandidateFromNormalized(normalized?.normalized, userMessage); + const candidateFromRaw = candidateFromNormalized ? null : extractAddressPredecomposeCandidateFromRawNormalizerOutput(normalized?.raw_model_output, userMessage); const candidateMeta = candidateFromNormalized ?? candidateFromRaw; const candidate = candidateMeta?.candidate ?? null; if (!candidate) { diff --git a/llm_normalizer/backend/src/services/assistantService.ts b/llm_normalizer/backend/src/services/assistantService.ts index dd4f3ec..c227603 100644 --- a/llm_normalizer/backend/src/services/assistantService.ts +++ b/llm_normalizer/backend/src/services/assistantService.ts @@ -1995,6 +1995,29 @@ function selectPreferredAddressFragmentCandidate(rawText, normalizedText) { } return normalizedCandidate; } +function hasAddressDeepInvestigationSignalForPredecompose(text) { + const normalized = compactWhitespace(repairAddressMojibake(String(text ?? "")).toLowerCase()); + if (!normalized) { + return false; + } + return /(?:разрыв|хвост|цепоч|механизм|аномал|риск|не\s+закрыва|не\s+закрыт|где\s+разрыв|какие\s+документы\s+не\s+закрыва|tail|tails|chain|root\s*cause|anomal|risk|gde\s+tail|kakie\s+dokumenty\s+ne\s+zakryv|ne\s+zakryvayut\s+oplat|razryv|cepoch|prover.*tail)/iu.test(normalized); +} +function hasAddressTranslitSignalForPredecompose(text) { + const source = String(text ?? "").trim(); + if (!source) { + return false; + } + return /[a-z]/i.test(source); +} +function joinAddressFragmentCandidates(candidates) { + const unique = Array.from(new Set(candidates + .map((item) => compactWhitespace(String(item ?? ""))) + .filter(Boolean))); + if (unique.length === 0) { + return null; + } + return compactWhitespace(unique.join(". ")); +} function readAddressFilterString(addressDebug, key) { const filters = addressDebug?.extracted_filters; if (!filters || typeof filters !== "object") { @@ -2847,7 +2870,8 @@ function normalizeAddressSemanticHintsFromFragment(fragment) { selected_object_scope_detected: hints.selected_object_scope_detected === true || normalizedScopeTargetKind === "selected_object" }; } -function extractAddressPredecomposeCandidateFromFragments(fragments) { +function extractAddressPredecomposeCandidateFromFragments(fragments, sourceMessage = null) { + const candidates = []; for (const item of Array.isArray(fragments) ? fragments : []) { if (!item || typeof item !== "object") { continue; @@ -2864,19 +2888,39 @@ function extractAddressPredecomposeCandidateFromFragments(fragments) { continue; } if (candidate.length >= 3 && candidate.length <= 500) { - return { + candidates.push({ candidate, semanticHints: normalizeAddressSemanticHintsFromFragment(fragment) - }; + }); } } - return null; + if (candidates.length === 0) { + return null; + } + const primaryCandidate = candidates[0]; + const combinedCandidate = joinAddressFragmentCandidates(candidates.map((item) => item.candidate)); + const sourceHasDeepInvestigationSignal = hasAddressDeepInvestigationSignalForPredecompose(sourceMessage); + const sourceHasTranslitSignal = hasAddressTranslitSignalForPredecompose(sourceMessage); + const primaryHasDeepInvestigationSignal = hasAddressDeepInvestigationSignalForPredecompose(primaryCandidate.candidate); + const combinedHasDeepInvestigationSignal = hasAddressDeepInvestigationSignalForPredecompose(combinedCandidate); + if (combinedCandidate && + combinedCandidate.length >= 3 && + combinedCandidate.length <= 500 && + sourceHasTranslitSignal && + combinedHasDeepInvestigationSignal && + (sourceHasDeepInvestigationSignal || !primaryHasDeepInvestigationSignal)) { + return { + candidate: combinedCandidate, + semanticHints: primaryCandidate.semanticHints + }; + } + return primaryCandidate; } -function extractAddressPredecomposeCandidateFromNormalized(normalized) { +function extractAddressPredecomposeCandidateFromNormalized(normalized, sourceMessage = null) { if (!normalized || typeof normalized !== "object") { return null; } - return extractAddressPredecomposeCandidateFromFragments(normalized.fragments); + return extractAddressPredecomposeCandidateFromFragments(normalized.fragments, sourceMessage); } function stripMarkdownJsonFence(text) { return String(text ?? "") @@ -2955,7 +2999,7 @@ function extractOutputTextFromRawNormalizerOutput(raw) { } return null; } -function extractAddressPredecomposeCandidateFromRawNormalizerOutput(rawModelOutput) { +function extractAddressPredecomposeCandidateFromRawNormalizerOutput(rawModelOutput, sourceMessage = null) { const outputText = extractOutputTextFromRawNormalizerOutput(rawModelOutput); if (!outputText) { return null; @@ -2964,7 +3008,7 @@ function extractAddressPredecomposeCandidateFromRawNormalizerOutput(rawModelOutp if (!parsed || typeof parsed !== "object") { return null; } - return extractAddressPredecomposeCandidateFromFragments(parsed.fragments); + return extractAddressPredecomposeCandidateFromFragments(parsed.fragments, sourceMessage); } const ADDRESS_PREDECOMPOSE_LOW_QUALITY_COUNTERPARTY_TOKENS = new Set([ "есть", @@ -3301,8 +3345,8 @@ async function runAddressLlmPreDecompose(normalizerService, payload, userMessage }; try { const normalized = await normalizerService.normalize(normalizePayload); - const candidateFromNormalized = extractAddressPredecomposeCandidateFromNormalized(normalized?.normalized); - const candidateFromRaw = candidateFromNormalized ? null : extractAddressPredecomposeCandidateFromRawNormalizerOutput(normalized?.raw_model_output); + const candidateFromNormalized = extractAddressPredecomposeCandidateFromNormalized(normalized?.normalized, userMessage); + const candidateFromRaw = candidateFromNormalized ? null : extractAddressPredecomposeCandidateFromRawNormalizerOutput(normalized?.raw_model_output, userMessage); const candidateMeta = candidateFromNormalized ?? candidateFromRaw; const candidate = candidateMeta?.candidate ?? null; if (!candidate) { diff --git a/llm_normalizer/backend/tests/assistantAddressLlmPredecompose.test.ts b/llm_normalizer/backend/tests/assistantAddressLlmPredecompose.test.ts index ec9cac5..5b78dad 100644 --- a/llm_normalizer/backend/tests/assistantAddressLlmPredecompose.test.ts +++ b/llm_normalizer/backend/tests/assistantAddressLlmPredecompose.test.ts @@ -1791,5 +1791,6 @@ describe("assistant address llm pre-decompose candidate preference", () => { expect(calls[0].message).not.toContain("остаток по счету 07"); expect(calls[0].message.toLowerCase()).toContain("договору 1-пм/2020"); }); + }); diff --git a/llm_normalizer/backend/tests/assistantEndpoint.test.ts b/llm_normalizer/backend/tests/assistantEndpoint.test.ts index 3a6289a..9ab8ab0 100644 --- a/llm_normalizer/backend/tests/assistantEndpoint.test.ts +++ b/llm_normalizer/backend/tests/assistantEndpoint.test.ts @@ -163,9 +163,17 @@ describe("assistant mode API", () => { expect(response.body.reply_type).not.toBe("out_of_scope"); expect(response.body.debug?.route_summary?.message_in_scope).toBe(true); expect(Array.isArray(response.body.debug?.routes)).toBe(true); - expect(response.body.debug?.routes.some((item: { route?: string }) => item.route !== "no_route")).toBe(true); + expect(response.body.debug?.routes.length).toBeGreaterThan(0); + expect( + response.body.debug?.routes.every((item: { route?: string }) => item.route === "hybrid_store_plus_live") + ).toBe(true); expect(Array.isArray(response.body.debug?.retrieval_results)).toBe(true); - expect(response.body.debug?.retrieval_results.some((item: { status?: string }) => item.status === "ok")).toBe(true); + expect( + response.body.debug?.retrieval_results.some( + (item: { status?: string; route?: string }) => + item.status === "ok" && item.route === "hybrid_store_plus_live" + ) + ).toBe(true); }); it("avoids false route mismatch when supported evidence exists for bounded answer", async () => {