АРЧ АП11 - Архитектура после регресса: Архитектура: закрыть translit hybrid-entry seam в phase13 и закрепить routing в endpoint-регрессии

This commit is contained in:
dctouch 2026-04-18 19:45:55 +03:00
parent 823cb56a63
commit 0ecee2b360
5 changed files with 128 additions and 25 deletions

View File

@ -325,6 +325,10 @@ Still open after the accepted phase12 replay:
- root supplier tails anomaly questions re-enter `hybrid_store_plus_live` with grounded fragments and non-empty deterministic route summaries;
- narrowing follow-up for `2020-06 / account 60` now keeps hybrid/batch routing instead of collapsing into empty clarification;
- the broader hybrid investigation contour is therefore back under explicit runtime authority rather than ambient luck.
- the remaining translit root seam is now also closed in the same contour:
- transliterated supplier-tail wording no longer loses the causal tail during predecompose entry handling;
- live replay `address_truth_harness_phase13_hybrid_followup_authority_live_20260418_rerun4` is accepted with the translit root step returning `factual_with_explanation` and staying inside hybrid investigation routing;
- endpoint coverage now explicitly requires the translit account-60 tail question to keep every routed fragment in `hybrid_store_plus_live`, so future refactors cannot silently split the same question back into `hybrid + store_canonical`.
## Next Execution Slice (2026-04-18)
@ -340,15 +344,17 @@ This next slice should be executed in the following order:
1. Finish continuity authority convergence in the hot runtime path.
2. Widen saved-session replay coverage beyond the already repaired flagship chains.
3. Prove that non-flagship hybrid/deep-turn contours survive the same runtime contracts instead of silently inheriting wrong prompt/schema assumptions.
4. Only after that, continue secondary answer-shaping cleanup where it materially affects acceptance.
5. Only after that, begin controlled domain-by-domain expansion toward the multi-domain stage.
3. Reduce duplicated state reconstruction that still lives outside the shared continuity authority.
4. Prove that non-flagship hybrid/deep-turn contours survive the same runtime contracts across more than one repaired scenario pack.
5. Only after that, continue secondary answer-shaping cleanup where it materially affects acceptance.
6. Only after that, begin controlled domain-by-domain expansion toward the multi-domain stage.
Current explicit goals for this slice:
- fewer owners independently reconstruct `active context`;
- more replay breadth before any large expansion claim;
- fewer hidden runtime contract mismatches between entry prompts, schema validation, and deep-turn orchestration;
- fewer hybrid/deep entry seams that still depend on fragment luck instead of explicit runtime contracts;
- cleaner user-facing business answers on already-correct truth paths;
- lower risk that new domains multiply orchestration chaos faster than capability growth.

View File

@ -2040,6 +2040,29 @@ function selectPreferredAddressFragmentCandidate(rawText, normalizedText) {
}
return normalizedCandidate;
}
function hasAddressDeepInvestigationSignalForPredecompose(text) {
const normalized = compactWhitespace(repairAddressMojibake(String(text ?? "")).toLowerCase());
if (!normalized) {
return false;
}
return /(?:разрыв|хвост|цепоч|механизм|аномал|риск|не\s+закрыва|не\s+закрыт|где\s+разрыв|какие\s+документы\s+не\s+закрыва|tail|tails|chain|root\s*cause|anomal|risk|gde\s+tail|kakie\s+dokumenty\s+ne\s+zakryv|ne\s+zakryvayut\s+oplat|razryv|cepoch|prover.*tail)/iu.test(normalized);
}
function hasAddressTranslitSignalForPredecompose(text) {
const source = String(text ?? "").trim();
if (!source) {
return false;
}
return /[a-z]/i.test(source);
}
function joinAddressFragmentCandidates(candidates) {
const unique = Array.from(new Set(candidates
.map((item) => compactWhitespace(String(item ?? "")))
.filter(Boolean)));
if (unique.length === 0) {
return null;
}
return compactWhitespace(unique.join(". "));
}
function readAddressFilterString(addressDebug, key) {
const filters = addressDebug?.extracted_filters;
if (!filters || typeof filters !== "object") {
@ -2892,7 +2915,8 @@ function normalizeAddressSemanticHintsFromFragment(fragment) {
selected_object_scope_detected: hints.selected_object_scope_detected === true || normalizedScopeTargetKind === "selected_object"
};
}
function extractAddressPredecomposeCandidateFromFragments(fragments) {
function extractAddressPredecomposeCandidateFromFragments(fragments, sourceMessage = null) {
const candidates = [];
for (const item of Array.isArray(fragments) ? fragments : []) {
if (!item || typeof item !== "object") {
continue;
@ -2909,19 +2933,39 @@ function extractAddressPredecomposeCandidateFromFragments(fragments) {
continue;
}
if (candidate.length >= 3 && candidate.length <= 500) {
return {
candidates.push({
candidate,
semanticHints: normalizeAddressSemanticHintsFromFragment(fragment)
};
});
}
}
if (candidates.length === 0) {
return null;
}
function extractAddressPredecomposeCandidateFromNormalized(normalized) {
const primaryCandidate = candidates[0];
const combinedCandidate = joinAddressFragmentCandidates(candidates.map((item) => item.candidate));
const sourceHasDeepInvestigationSignal = hasAddressDeepInvestigationSignalForPredecompose(sourceMessage);
const sourceHasTranslitSignal = hasAddressTranslitSignalForPredecompose(sourceMessage);
const primaryHasDeepInvestigationSignal = hasAddressDeepInvestigationSignalForPredecompose(primaryCandidate.candidate);
const combinedHasDeepInvestigationSignal = hasAddressDeepInvestigationSignalForPredecompose(combinedCandidate);
if (combinedCandidate &&
combinedCandidate.length >= 3 &&
combinedCandidate.length <= 500 &&
sourceHasTranslitSignal &&
combinedHasDeepInvestigationSignal &&
(sourceHasDeepInvestigationSignal || !primaryHasDeepInvestigationSignal)) {
return {
candidate: combinedCandidate,
semanticHints: primaryCandidate.semanticHints
};
}
return primaryCandidate;
}
function extractAddressPredecomposeCandidateFromNormalized(normalized, sourceMessage = null) {
if (!normalized || typeof normalized !== "object") {
return null;
}
return extractAddressPredecomposeCandidateFromFragments(normalized.fragments);
return extractAddressPredecomposeCandidateFromFragments(normalized.fragments, sourceMessage);
}
function stripMarkdownJsonFence(text) {
return String(text ?? "")
@ -3000,7 +3044,7 @@ function extractOutputTextFromRawNormalizerOutput(raw) {
}
return null;
}
function extractAddressPredecomposeCandidateFromRawNormalizerOutput(rawModelOutput) {
function extractAddressPredecomposeCandidateFromRawNormalizerOutput(rawModelOutput, sourceMessage = null) {
const outputText = extractOutputTextFromRawNormalizerOutput(rawModelOutput);
if (!outputText) {
return null;
@ -3009,7 +3053,7 @@ function extractAddressPredecomposeCandidateFromRawNormalizerOutput(rawModelOutp
if (!parsed || typeof parsed !== "object") {
return null;
}
return extractAddressPredecomposeCandidateFromFragments(parsed.fragments);
return extractAddressPredecomposeCandidateFromFragments(parsed.fragments, sourceMessage);
}
const ADDRESS_PREDECOMPOSE_LOW_QUALITY_COUNTERPARTY_TOKENS = new Set([
"есть",
@ -3346,8 +3390,8 @@ async function runAddressLlmPreDecompose(normalizerService, payload, userMessage
};
try {
const normalized = await normalizerService.normalize(normalizePayload);
const candidateFromNormalized = extractAddressPredecomposeCandidateFromNormalized(normalized?.normalized);
const candidateFromRaw = candidateFromNormalized ? null : extractAddressPredecomposeCandidateFromRawNormalizerOutput(normalized?.raw_model_output);
const candidateFromNormalized = extractAddressPredecomposeCandidateFromNormalized(normalized?.normalized, userMessage);
const candidateFromRaw = candidateFromNormalized ? null : extractAddressPredecomposeCandidateFromRawNormalizerOutput(normalized?.raw_model_output, userMessage);
const candidateMeta = candidateFromNormalized ?? candidateFromRaw;
const candidate = candidateMeta?.candidate ?? null;
if (!candidate) {

View File

@ -1995,6 +1995,29 @@ function selectPreferredAddressFragmentCandidate(rawText, normalizedText) {
}
return normalizedCandidate;
}
function hasAddressDeepInvestigationSignalForPredecompose(text) {
const normalized = compactWhitespace(repairAddressMojibake(String(text ?? "")).toLowerCase());
if (!normalized) {
return false;
}
return /(?:разрыв|хвост|цепоч|механизм|аномал|риск|не\s+закрыва|не\s+закрыт|где\s+разрыв|какие\s+документы\s+не\s+закрыва|tail|tails|chain|root\s*cause|anomal|risk|gde\s+tail|kakie\s+dokumenty\s+ne\s+zakryv|ne\s+zakryvayut\s+oplat|razryv|cepoch|prover.*tail)/iu.test(normalized);
}
function hasAddressTranslitSignalForPredecompose(text) {
const source = String(text ?? "").trim();
if (!source) {
return false;
}
return /[a-z]/i.test(source);
}
function joinAddressFragmentCandidates(candidates) {
const unique = Array.from(new Set(candidates
.map((item) => compactWhitespace(String(item ?? "")))
.filter(Boolean)));
if (unique.length === 0) {
return null;
}
return compactWhitespace(unique.join(". "));
}
function readAddressFilterString(addressDebug, key) {
const filters = addressDebug?.extracted_filters;
if (!filters || typeof filters !== "object") {
@ -2847,7 +2870,8 @@ function normalizeAddressSemanticHintsFromFragment(fragment) {
selected_object_scope_detected: hints.selected_object_scope_detected === true || normalizedScopeTargetKind === "selected_object"
};
}
function extractAddressPredecomposeCandidateFromFragments(fragments) {
function extractAddressPredecomposeCandidateFromFragments(fragments, sourceMessage = null) {
const candidates = [];
for (const item of Array.isArray(fragments) ? fragments : []) {
if (!item || typeof item !== "object") {
continue;
@ -2864,19 +2888,39 @@ function extractAddressPredecomposeCandidateFromFragments(fragments) {
continue;
}
if (candidate.length >= 3 && candidate.length <= 500) {
return {
candidates.push({
candidate,
semanticHints: normalizeAddressSemanticHintsFromFragment(fragment)
};
});
}
}
if (candidates.length === 0) {
return null;
}
function extractAddressPredecomposeCandidateFromNormalized(normalized) {
const primaryCandidate = candidates[0];
const combinedCandidate = joinAddressFragmentCandidates(candidates.map((item) => item.candidate));
const sourceHasDeepInvestigationSignal = hasAddressDeepInvestigationSignalForPredecompose(sourceMessage);
const sourceHasTranslitSignal = hasAddressTranslitSignalForPredecompose(sourceMessage);
const primaryHasDeepInvestigationSignal = hasAddressDeepInvestigationSignalForPredecompose(primaryCandidate.candidate);
const combinedHasDeepInvestigationSignal = hasAddressDeepInvestigationSignalForPredecompose(combinedCandidate);
if (combinedCandidate &&
combinedCandidate.length >= 3 &&
combinedCandidate.length <= 500 &&
sourceHasTranslitSignal &&
combinedHasDeepInvestigationSignal &&
(sourceHasDeepInvestigationSignal || !primaryHasDeepInvestigationSignal)) {
return {
candidate: combinedCandidate,
semanticHints: primaryCandidate.semanticHints
};
}
return primaryCandidate;
}
function extractAddressPredecomposeCandidateFromNormalized(normalized, sourceMessage = null) {
if (!normalized || typeof normalized !== "object") {
return null;
}
return extractAddressPredecomposeCandidateFromFragments(normalized.fragments);
return extractAddressPredecomposeCandidateFromFragments(normalized.fragments, sourceMessage);
}
function stripMarkdownJsonFence(text) {
return String(text ?? "")
@ -2955,7 +2999,7 @@ function extractOutputTextFromRawNormalizerOutput(raw) {
}
return null;
}
function extractAddressPredecomposeCandidateFromRawNormalizerOutput(rawModelOutput) {
function extractAddressPredecomposeCandidateFromRawNormalizerOutput(rawModelOutput, sourceMessage = null) {
const outputText = extractOutputTextFromRawNormalizerOutput(rawModelOutput);
if (!outputText) {
return null;
@ -2964,7 +3008,7 @@ function extractAddressPredecomposeCandidateFromRawNormalizerOutput(rawModelOutp
if (!parsed || typeof parsed !== "object") {
return null;
}
return extractAddressPredecomposeCandidateFromFragments(parsed.fragments);
return extractAddressPredecomposeCandidateFromFragments(parsed.fragments, sourceMessage);
}
const ADDRESS_PREDECOMPOSE_LOW_QUALITY_COUNTERPARTY_TOKENS = new Set([
"есть",
@ -3301,8 +3345,8 @@ async function runAddressLlmPreDecompose(normalizerService, payload, userMessage
};
try {
const normalized = await normalizerService.normalize(normalizePayload);
const candidateFromNormalized = extractAddressPredecomposeCandidateFromNormalized(normalized?.normalized);
const candidateFromRaw = candidateFromNormalized ? null : extractAddressPredecomposeCandidateFromRawNormalizerOutput(normalized?.raw_model_output);
const candidateFromNormalized = extractAddressPredecomposeCandidateFromNormalized(normalized?.normalized, userMessage);
const candidateFromRaw = candidateFromNormalized ? null : extractAddressPredecomposeCandidateFromRawNormalizerOutput(normalized?.raw_model_output, userMessage);
const candidateMeta = candidateFromNormalized ?? candidateFromRaw;
const candidate = candidateMeta?.candidate ?? null;
if (!candidate) {

View File

@ -1791,5 +1791,6 @@ describe("assistant address llm pre-decompose candidate preference", () => {
expect(calls[0].message).not.toContain("остаток по счету 07");
expect(calls[0].message.toLowerCase()).toContain("договору 1-пм/2020");
});
});

View File

@ -163,9 +163,17 @@ describe("assistant mode API", () => {
expect(response.body.reply_type).not.toBe("out_of_scope");
expect(response.body.debug?.route_summary?.message_in_scope).toBe(true);
expect(Array.isArray(response.body.debug?.routes)).toBe(true);
expect(response.body.debug?.routes.some((item: { route?: string }) => item.route !== "no_route")).toBe(true);
expect(response.body.debug?.routes.length).toBeGreaterThan(0);
expect(
response.body.debug?.routes.every((item: { route?: string }) => item.route === "hybrid_store_plus_live")
).toBe(true);
expect(Array.isArray(response.body.debug?.retrieval_results)).toBe(true);
expect(response.body.debug?.retrieval_results.some((item: { status?: string }) => item.status === "ok")).toBe(true);
expect(
response.body.debug?.retrieval_results.some(
(item: { status?: string; route?: string }) =>
item.status === "ok" && item.route === "hybrid_store_plus_live"
)
).toBe(true);
});
it("avoids false route mismatch when supported evidence exists for bounded answer", async () => {