diff --git a/llm_normalizer/backend/dist/services/answerComposer.js b/llm_normalizer/backend/dist/services/answerComposer.js index 72e9a0a..970372c 100644 --- a/llm_normalizer/backend/dist/services/answerComposer.js +++ b/llm_normalizer/backend/dist/services/answerComposer.js @@ -1078,7 +1078,7 @@ function isProblemUnitAlignedWithNarrativeDomain(unit, domain) { if (accounts.some((item) => isVatAccountToken(item))) { return true; } - return /(vat|ндс|invoice|book_entry|register|книг|счет[\s-]?фактур|сч[её]т[\s-]?фактур)/i.test(corpus); + return /(vat|ндс|invoice|book_entry|register|книг|сч[её]т(?:а|у|ом|е)?[\s-]?фактур(?:а|ы|е|у|ой)?|вычет|налогов(?:ый|ого)?\s+эффект)/i.test(corpus); } if (domain === "month_close_costs_20_44") { const foreignMonthCloseDomain = ["vat_flow", "bank_settlement", "customer_settlement", "fixed_asset"].includes(String(unit.lifecycle_domain ?? "")); @@ -2030,11 +2030,11 @@ function hasStrongNarrativeDomainSignalInText(userMessage, domain) { const accountTokens = extractAccountNumbersFromNarrativeText(text); if (domain === "settlements_60_62") { return (accountTokens.some((item) => isSettlementAccountToken(item)) || - /(60\.0[12]|62\.0[12]|долг|аванс|зач[её]т|взаимозач|расч[её]т)/i.test(text)); + /(60\.0[12]|62\.0[12]|долг|аванс|зач[её]т|взаимозач|расч[её]т|оплат|плат[её]ж|деньг[аи])/i.test(text)); } if (domain === "vat_document_register_book") { return (accountTokens.some((item) => isVatAccountToken(item)) || - /(ндс|vat|счет[-\s]?фактур|сч[её]т[-\s]?фактур|книг[аи]|регистр)/i.test(text)); + /(ндс|vat|сч[её]т(?:а|у|ом|е)?[-\s]?фактур(?:а|ы|е|у|ой)?|книг[аи]|регистр|вычет|налогов(?:ый|ого)?\s+эффект)/i.test(text)); } if (domain === "month_close_costs_20_44") { return (accountTokens.some((item) => isCloseCostsAccountToken(item)) || @@ -2401,7 +2401,7 @@ function buildProblemCentricAnswerStructure(input) { } function limitationReasonToUserText(code) { if (code === "snapshot_only") - return "Вывод сделан по snapshot и может не включать самые свежие изменения."; + return "Оценка построена на snapshot-срезе и может не включать самые свежие изменения."; if (code === "heuristic_inference") return "Часть вывода построена эвристически и требует проверки в базе."; if (code === "missing_mechanism") @@ -2430,7 +2430,10 @@ function inferNarrativeDomainFromText(value) { if (/(долг|аванс|взаимозач|зачет|зачёт|62\.01|62\.02|60\.01|60\.02|не\s+сход)/i.test(text)) { settlementScore += 2; } - if (/(ндс|vat|счет[-\s]?фактур|сч[её]т[-\s]?фактур|книг[аи]|регистр)/i.test(text)) { + if (/(расч[её]т|оплат|плат[её]ж|деньг[аи]|закрыти[ея]\s+расч)/i.test(text)) { + settlementScore += 2; + } + if (/(ндс|vat|сч[её]т(?:а|у|ом|е)?[-\s]?фактур(?:а|ы|е|у|ой)?|книг[аи]|регистр|вычет|налогов(?:ый|ого)?\s+эффект)/i.test(text)) { vatScore += 3; } if (/(закрыти[ея]\s+месяц|закрытие\s+счетов|регламентн|косвенн|затрат|распределени|рбп|амортиз|финансовых\s+результат|month\s*close|period\s*close|close\s+operation)/i.test(text)) { @@ -2595,8 +2598,8 @@ function buildEvidenceSectionLines(structure, questionType = "unknown") { function buildDefaultChecksByDomain(domain) { if (domain === "settlements_60_62") { return [ - "Проверьте договор, объект расчетов, регистр расчетов и документ закрытия (зачет аванса или взаимозачет).", - "Сверьте связку платеж -> расчетный документ -> проводки по 60/62/76 и подтверждение закрытия хвоста." + "Сверьте договор и объект расчетов, затем подтвердите запись в регистре расчетов и документ зачета.", + "Проверьте связку платеж -> расчетный документ -> проводки по 60/62/76 и факт закрытия хвоста." ]; } if (domain === "vat_document_register_book") { @@ -2753,11 +2756,12 @@ function buildChecksSectionLines(structure, context) { lines.push(...domainFallback.slice(0, 2)); lines.push(...actionLines.slice(0, 2)); } - else if (actionLines.length > 0) { - lines.push(...actionLines.slice(0, 2)); - } else { - lines.push(...domainFallback.slice(0, 2)); + lines.push(...domainFallback.slice(0, 1)); + lines.push(...actionLines.slice(0, 2)); + if (lines.length < 2) { + lines.push(...domainFallback.slice(1, 2)); + } } } if (hasMissingPeriod) { @@ -2795,7 +2799,7 @@ function humanizeLimitationToken(value) { if (normalized === "settlement_primary_evidence_not_confirmed") return "Опора по расчетному контуру не подтверждена: в приоритете были сигналы из смежных доменов."; if (normalized.includes("snapshot")) - return "Вывод сделан по snapshot и может не включать часть цепочки."; + return "Оценка сделана на snapshot-срезе и может не включать часть цепочки."; if (normalized.includes("heuristic")) return "Часть вывода основана на эвристике."; if (normalized.includes("weak_source_mapping")) @@ -2823,7 +2827,7 @@ function humanizeLimitationToken(value) { if (/weak mechanism evidence/i.test(raw)) return "Доказательность механизма слабая, нужен ручной контроль."; if (/evidence is snapshot-only/i.test(raw)) - return "Вывод сделан по snapshot и может не включать самые свежие изменения."; + return "Оценка сделана на snapshot-срезе и может не включать самые свежие изменения."; if (/source-of-record/i.test(raw)) return "Часть цепочки нужно подтвердить в исходной учетной базе."; if (/[a-z]/i.test(raw) && !/[а-яё]/iu.test(raw)) @@ -2881,6 +2885,18 @@ function buildQuestionTypeShortLine(context) { if (context.questionType === "what_to_check_first") { return `\u041a\u043e\u0440\u043e\u0442\u043a\u0438\u0439 \u043c\u0430\u0440\u0448\u0440\u0443\u0442 \u043f\u0435\u0440\u0432\u044b\u0445 \u043f\u0440\u043e\u0432\u0435\u0440\u043e\u043a \u0432\u043d\u0443\u0442\u0440\u0438 ${domainName}.`; } + if (context.questionType === "why_breaks") { + if (context.focusDomain === "settlements_60_62") { + return "Наиболее вероятная причина: переход от оплаты к закрытию расчета подтвержден не полностью."; + } + if (context.focusDomain === "vat_document_register_book") { + return "Наиболее вероятная причина: переход документа НДС к регистру и книге подтвержден частично."; + } + if (context.focusDomain === "month_close_costs_20_44") { + return "Наиболее вероятная причина: цепочка распределения затрат и закрытия месяца подтверждена не полностью."; + } + return "Наиболее вероятный механизм проблемы подтвержден частично и требует первичной проверки."; + } return null; } function buildQuestionTypeBrokenLine(context) { diff --git a/llm_normalizer/backend/dist/services/assistantDataLayer.js b/llm_normalizer/backend/dist/services/assistantDataLayer.js index 91929c7..0374f71 100644 --- a/llm_normalizer/backend/dist/services/assistantDataLayer.js +++ b/llm_normalizer/backend/dist/services/assistantDataLayer.js @@ -245,10 +245,11 @@ const P0_DOMAIN_CARDS = [ symptom_markers: [ /\bvat\b/i, /\u043d\u0434\u0441/i, - /\u0441\u0447[её]т.?фактур/i, + /\u0441\u0447[её]т(?:а|у|ом|е)?.?фактур/i, /\u043a\u043d\u0438\u0433[аи]\s+\u043f\u043e\u043a\u0443\u043f/i, /\u043a\u043d\u0438\u0433[аи]\s+\u043f\u0440\u043e\u0434\u0430\u0436/i, - /\u0432\u044b\u0447\u0435\u0442/i + /\u0432\u044b\u0447\u0435\u0442/i, + /\u043d\u0430\u043b\u043e\u0433\u043e\u0432(?:\u044b\u0439|\u043e\u0433\u043e)?\s+\u044d\u0444\u0444\u0435\u043a\u0442/i ] }, { @@ -857,6 +858,35 @@ function collectDateLikeSpans(text) { } return spans; } +function collectAmountLikeSpans(text) { + const spans = []; + const patterns = [ + /\b\d{1,3}(?:[ \u00A0]\d{3})+(?:[.,]\d{2})?\b/g, + /\b\d+[.,]\d{2}\b/g + ]; + for (const pattern of patterns) { + let match = null; + while ((match = pattern.exec(text)) !== null) { + spans.push({ + start: match.index, + end: match.index + match[0].length + }); + } + } + return spans; +} +function collectPercentLikeSpans(text) { + const spans = []; + const pattern = /\b\d{1,3}(?:[.,]\d+)?\s*%/g; + let match = null; + while ((match = pattern.exec(text)) !== null) { + spans.push({ + start: match.index, + end: match.index + match[0].length + }); + } + return spans; +} function intersectsSpan(start, end, spans) { return spans.some((span) => start < span.end && end > span.start); } @@ -867,7 +897,11 @@ function hasAccountContextAround(text, start, end) { } function extractAccountScopeFromText(text) { const lower = String(text ?? "").toLowerCase(); - const dateSpans = collectDateLikeSpans(lower); + const blockedSpans = [ + ...collectDateLikeSpans(lower), + ...collectAmountLikeSpans(lower), + ...collectPercentLikeSpans(lower) + ]; const accounts = []; const pushAccount = (raw) => { const prefix = String(raw ?? "").trim().match(/^(\d{2})/)?.[1]; @@ -912,7 +946,7 @@ function extractAccountScopeFromText(text) { const token = suffixAnchorMatch[0]; const start = suffixAnchorMatch.index; const end = start + token.length; - if (intersectsSpan(start, end, dateSpans)) { + if (intersectsSpan(start, end, blockedSpans)) { continue; } pushAccount(token); @@ -924,7 +958,7 @@ function extractAccountScopeFromText(text) { const token = explicitMatch[0]; const start = explicitMatch.index; const end = start + token.length; - if (intersectsSpan(start, end, dateSpans)) { + if (intersectsSpan(start, end, blockedSpans)) { continue; } const prefix = token.match(/^(\d{2})/)?.[1]; @@ -1090,7 +1124,7 @@ function buildSemanticRetrievalProfile(fragmentText) { pushMany(entityTypes, ["counterparty", "contract", "document", "posting"]); pushMany(relationPatterns, ["payment_to_settlement", "contract_to_documents"]); } - if (/РЅРґСЃ|ндс|vat|РєРЅРёРіР° РїРѕРєСѓРїРѕРє|РєРЅРёРіР° продаж|счет.?фактур|книг[аи]\s+покуп|книг[аи]\s+продаж|сч[её]т.?фактур/i.test(lower) || + if (/РЅРґСЃ|ндс|vat|РєРЅРёРіР° РїРѕРєСѓРїРѕРє|РєРЅРёРіР° продаж|счет.?фактур|книг[аи]\s+покуп|книг[аи]\s+продаж|сч[её]т(?:а|у|ом|е)?[-\s]?фактур(?:а|ы|е|у|ой)?|вычет|налогов(?:ый|ого)?\s+эффект/i.test(lower) || hasVatAccountScope) { pushMany(domainScope, ["vat", "taxes"]); pushMany(documentTypes, ["invoice", "vat_document"]); @@ -1207,12 +1241,24 @@ function cardResolutionScore(card, fragmentText, profile) { if (hasExplicitAccountScope && accountMatches.length === 0) { return 0; } - const hasHardAnchor = accountMatches.length > 0 || markerHit; + const hasVatSoftAnchor = card.id === "vat_document_register_book" && hasStrongVatDomainSignal(fragmentText, profile); + const hasHardAnchor = accountMatches.length > 0 || markerHit || hasVatSoftAnchor; if (!hasHardAnchor) { return 0; } return accountMatches.length * 4 + domainMatches.length * 3 + (markerHit ? 2 : 0); } +function hasStrongVatDomainSignal(fragmentText, profile) { + const text = String(fragmentText ?? ""); + const hasVatLexicalAnchor = /(?:ндс|vat|сч[её]т(?:а|у|ом|е)?[-\s]?фактур(?:а|ы|е|у|ой)?|книг[аи]\s+(?:покуп|продаж)|вычет|налогов(?:ый|ого)?\s+эффект)/iu.test(text); + return (hasVatLexicalAnchor || + profile.account_scope.some((account) => account === "19" || account === "68") || + profile.domain_scope.some((domain) => domain === "vat" || domain === "taxes") || + profile.relation_patterns.some((pattern) => ["invoice_to_vat", "register_to_book", "book_entry_generated", "deduction_posted"].includes(pattern))); +} +function hasStrongSettlementAccountSignal(profile) { + return profile.account_scope.some((account) => account === "51" || account === "60" || account === "62" || account === "76"); +} function resolveP0DomainCard(fragmentText, profile) { const resolved = P0_DOMAIN_CARDS.map((card) => ({ card, @@ -1225,6 +1271,11 @@ function resolveP0DomainCard(fragmentText, profile) { } const [first, second] = resolved; if (second && second.score === first.score) { + const pair = new Set([first.card.id, second.card.id]); + const hasVatSettlementTie = pair.has("vat_document_register_book") && pair.has("settlements_60_62"); + if (hasVatSettlementTie && hasStrongVatDomainSignal(fragmentText, profile) && !hasStrongSettlementAccountSignal(profile)) { + return resolved.find((item) => item.card.id === "vat_document_register_book") ?? null; + } return null; } return first; @@ -2823,7 +2874,39 @@ class AssistantDataLayer { } executeBatch(fragmentText, data) { const semanticProfile = buildSemanticRetrievalProfile(fragmentText); - const source = [...data.problemCases, ...data.keyFields, ...data.docs]; + const resolvedDomain = resolveP0DomainCard(fragmentText, semanticProfile); + const domainCard = resolvedDomain?.card ?? null; + const fallbackSources = ["problemCases", "keyFields", "docs"]; + const sourceScope = domainCard + ? uniqueStrings([...domainCard.allowed_evidence_sources.risk, ...domainCard.allowed_evidence_sources.canonical]) + : fallbackSources; + const sourcePool = collectSourceRecords(data, sourceScope); + const strictForbidden = Boolean(domainCard); + let sourceGate = domainCard + ? applyDomainPuritySourceGate(sourcePool, domainCard, semanticProfile, { strict_forbidden: strictForbidden }) + : { + accepted: sourcePool.map((item) => ({ + ...item, + signals: inferRecordSignals(item.record), + purity: { + allowed: true, + account_match: true, + domain_match: true, + entity_match: true, + edge_match: true, + forbidden_domains: [], + cross_domain_overlap: [] + } + })), + rejected_total: 0, + rejected_forbidden: 0 + }; + let sourceStrictFallbackUsed = false; + if (domainCard && strictForbidden && sourceGate.accepted.length === 0 && sourcePool.length > 0) { + sourceGate = applyDomainPuritySourceGate(sourcePool, domainCard, semanticProfile, { strict_forbidden: false }); + sourceStrictFallbackUsed = true; + } + const source = sourceGate.accepted.map((item) => item.record); const byEntity = new Map(); for (const record of source) { byEntity.set(record.source_entity, (byEntity.get(record.source_entity) ?? 0) + 1); @@ -2836,29 +2919,55 @@ class AssistantDataLayer { entity, records_count: count })); + const puritySummary = { + enabled: Boolean(domainCard), + domain_card_id: domainCard?.id ?? null, + domain_card_title: domainCard?.title ?? null, + source_scope: sourceScope, + source_pool_records: sourcePool.length, + source_selection_allowed: sourceGate.accepted.length, + source_selection_rejected: sourceGate.rejected_total, + source_selection_rejected_forbidden: sourceGate.rejected_forbidden, + top1_pure: domainCard ? topOnePurityHolds(sourceGate.accepted) : true, + top3_pure: domainCard ? topThreePurityHolds(sourceGate.accepted) : true, + strict_forbidden_mode: strictForbidden, + strict_forbidden_fallback_source: sourceStrictFallbackUsed + }; return { status: items.length > 0 ? "ok" : "empty", result_type: "ranking", items, summary: { - checked_records: source.length, + checked_records: sourcePool.length, ranked_entities: items.length, query_subject: semanticProfile.query_subject, semantic_profile: semanticProfile, - ranking_basis: semanticProfile.ranking_basis + ranking_basis: semanticProfile.ranking_basis, + domain_purity_guard: puritySummary }, evidence: items.slice(0, 5).map((item) => ({ entity: item.entity, records_count: item.records_count })), - why_included: items.length > 0 ? ["Показаны сущности СЃ максимальным количеством записей."] : [], - selection_reason: ["Ранжирование выполнено РїРѕ records_count РїРѕ убыванию."], + why_included: items.length > 0 + ? [ + "Показаны сущности СЃ максимальным количеством записей.", + domainCard ? `P0 domain purity enforced for ${domainCard.id}.` : "P0 domain purity was not enforced." + ] + : [], + selection_reason: [ + "Ранжирование выполнено РїРѕ records_count РїРѕ убыванию.", + domainCard ? `Domain gate source scope: ${sourceScope.join(", ")}.` : "Domain gate source scope not applied." + ], risk_factors: uniqueStrings(["entity_volume_spike", ...semanticProfile.anomaly_patterns]), business_interpretation: [ "Top entities by volume highlight where lifecycle-focused review should start first." ], confidence: "medium", - limitations: ["Ранжирование РїРѕ объему РЅРµ всегда эквивалентно бизнес-СЂРёСЃРєСѓ."], + limitations: [ + "Ранжирование РїРѕ объему РЅРµ всегда эквивалентно бизнес-СЂРёСЃРєСѓ.", + domainCard ? "Domain purity guardrail может исключить cross-domain записи на batch-слое." : "Domain purity guardrail не применялся." + ], errors: [] }; } diff --git a/llm_normalizer/backend/dist/services/questionTypeResolver.js b/llm_normalizer/backend/dist/services/questionTypeResolver.js index 459a686..97474c7 100644 --- a/llm_normalizer/backend/dist/services/questionTypeResolver.js +++ b/llm_normalizer/backend/dist/services/questionTypeResolver.js @@ -64,11 +64,24 @@ function countRuleHits(text, rule) { } return hits; } +function hasProofIntent(text) { + return /(?:\bprove\b|\bguess\b|доказан|доказано|доказуем|гипотез|догад|связан\s+ли|зач[её]л(?:ся|ось)\s+ли)/iu.test(text); +} +function hasExplicitChainSplitIntent(text) { + return /(?:какие(?:\s+\S+){0,4}\s+цепочк[аи]|which\s+chains?|complete\s+vs\s+incomplete|что\s+закрыто.*что\s+нет)/iu.test(text); +} +function hasGroundingIntent(text) { + return /(?:на\s+ч(?:е|ё)м[^?!.]{0,40}основан|чем\s+подтвержда|какие\s+основани|what\s+evidence|grounded\s+on|based\s+on)/iu.test(text); +} function resolveQuestionType(input) { const text = String(input ?? "").trim(); if (!text) { return "unknown"; } + // Guard against collapsing proof-intent questions into chain classification. + if (hasProofIntent(text) && !hasExplicitChainSplitIntent(text) && !hasGroundingIntent(text)) { + return "prove_or_guess"; + } let bestType = "unknown"; let bestHits = 0; let bestPriority = Number.POSITIVE_INFINITY; diff --git a/llm_normalizer/backend/src/services/answerComposer.ts b/llm_normalizer/backend/src/services/answerComposer.ts index ae7bc7f..5e49b39 100644 --- a/llm_normalizer/backend/src/services/answerComposer.ts +++ b/llm_normalizer/backend/src/services/answerComposer.ts @@ -1262,7 +1262,9 @@ function isProblemUnitAlignedWithNarrativeDomain(unit: ProblemUnit, domain: P0Na if (accounts.some((item) => isVatAccountToken(item))) { return true; } - return /(vat|ндс|invoice|book_entry|register|книг|счет[\s-]?фактур|сч[её]т[\s-]?фактур)/i.test(corpus); + return /(vat|ндс|invoice|book_entry|register|книг|сч[её]т(?:а|у|ом|е)?[\s-]?фактур(?:а|ы|е|у|ой)?|вычет|налогов(?:ый|ого)?\s+эффект)/i.test( + corpus + ); } if (domain === "month_close_costs_20_44") { @@ -2387,13 +2389,15 @@ function hasStrongNarrativeDomainSignalInText(userMessage: string, domain: P0Nar if (domain === "settlements_60_62") { return ( accountTokens.some((item) => isSettlementAccountToken(item)) || - /(60\.0[12]|62\.0[12]|долг|аванс|зач[её]т|взаимозач|расч[её]т)/i.test(text) + /(60\.0[12]|62\.0[12]|долг|аванс|зач[её]т|взаимозач|расч[её]т|оплат|плат[её]ж|деньг[аи])/i.test(text) ); } if (domain === "vat_document_register_book") { return ( accountTokens.some((item) => isVatAccountToken(item)) || - /(ндс|vat|счет[-\s]?фактур|сч[её]т[-\s]?фактур|книг[аи]|регистр)/i.test(text) + /(ндс|vat|сч[её]т(?:а|у|ом|е)?[-\s]?фактур(?:а|ы|е|у|ой)?|книг[аи]|регистр|вычет|налогов(?:ый|ого)?\s+эффект)/i.test( + text + ) ); } if (domain === "month_close_costs_20_44") { @@ -2855,7 +2859,7 @@ function buildProblemCentricAnswerStructure(input: { } function limitationReasonToUserText(code: EvidenceLimitationReasonCode): string { - if (code === "snapshot_only") return "Вывод сделан по snapshot и может не включать самые свежие изменения."; + if (code === "snapshot_only") return "Оценка построена на snapshot-срезе и может не включать самые свежие изменения."; if (code === "heuristic_inference") return "Часть вывода построена эвристически и требует проверки в базе."; if (code === "missing_mechanism") return "Механизм проблемы подтвержден не полностью."; if (code === "weak_source_mapping") return "Связка между источниками подтверждена частично."; @@ -2884,7 +2888,14 @@ function inferNarrativeDomainFromText(value: string): P0NarrativeDomain { if (/(долг|аванс|взаимозач|зачет|зачёт|62\.01|62\.02|60\.01|60\.02|не\s+сход)/i.test(text)) { settlementScore += 2; } - if (/(ндс|vat|счет[-\s]?фактур|сч[её]т[-\s]?фактур|книг[аи]|регистр)/i.test(text)) { + if (/(расч[её]т|оплат|плат[её]ж|деньг[аи]|закрыти[ея]\s+расч)/i.test(text)) { + settlementScore += 2; + } + if ( + /(ндс|vat|сч[её]т(?:а|у|ом|е)?[-\s]?фактур(?:а|ы|е|у|ой)?|книг[аи]|регистр|вычет|налогов(?:ый|ого)?\s+эффект)/i.test( + text + ) + ) { vatScore += 3; } if ( @@ -3080,8 +3091,8 @@ function buildEvidenceSectionLines( function buildDefaultChecksByDomain(domain: P0NarrativeDomain): string[] { if (domain === "settlements_60_62") { return [ - "Проверьте договор, объект расчетов, регистр расчетов и документ закрытия (зачет аванса или взаимозачет).", - "Сверьте связку платеж -> расчетный документ -> проводки по 60/62/76 и подтверждение закрытия хвоста." + "Сверьте договор и объект расчетов, затем подтвердите запись в регистре расчетов и документ зачета.", + "Проверьте связку платеж -> расчетный документ -> проводки по 60/62/76 и факт закрытия хвоста." ]; } if (domain === "vat_document_register_book") { @@ -3248,10 +3259,12 @@ function buildChecksSectionLines(structure: AnswerStructureV11, context?: Answer if (domain === "settlements_60_62") { lines.push(...domainFallback.slice(0, 2)); lines.push(...actionLines.slice(0, 2)); - } else if (actionLines.length > 0) { - lines.push(...actionLines.slice(0, 2)); } else { - lines.push(...domainFallback.slice(0, 2)); + lines.push(...domainFallback.slice(0, 1)); + lines.push(...actionLines.slice(0, 2)); + if (lines.length < 2) { + lines.push(...domainFallback.slice(1, 2)); + } } } if (hasMissingPeriod) { @@ -3283,7 +3296,7 @@ function humanizeLimitationToken(value: string): string | null { return "Целевой механизм активного домена подтвержден частично; вывод ограничен."; if (normalized === "settlement_primary_evidence_not_confirmed") return "Опора по расчетному контуру не подтверждена: в приоритете были сигналы из смежных доменов."; - if (normalized.includes("snapshot")) return "Вывод сделан по snapshot и может не включать часть цепочки."; + if (normalized.includes("snapshot")) return "Оценка сделана на snapshot-срезе и может не включать часть цепочки."; if (normalized.includes("heuristic")) return "Часть вывода основана на эвристике."; if (normalized.includes("weak_source_mapping")) return "Связка между источниками подтверждена частично."; if (normalized.includes("missing_mechanism")) return "Механизм проблемы подтвержден не полностью."; @@ -3297,7 +3310,7 @@ function humanizeLimitationToken(value: string): string | null { if (/broad query support is limited/i.test(raw)) return "Запрос широкий, поэтому вывод ограничен частичной опорой."; if (/broad ranking output was tightened/i.test(raw)) return "Часть ранжирования ограничена, чтобы избежать ложной точности."; if (/weak mechanism evidence/i.test(raw)) return "Доказательность механизма слабая, нужен ручной контроль."; - if (/evidence is snapshot-only/i.test(raw)) return "Вывод сделан по snapshot и может не включать самые свежие изменения."; + if (/evidence is snapshot-only/i.test(raw)) return "Оценка сделана на snapshot-срезе и может не включать самые свежие изменения."; if (/source-of-record/i.test(raw)) return "Часть цепочки нужно подтвердить в исходной учетной базе."; if (/[a-z]/i.test(raw) && !/[а-яё]/iu.test(raw)) return null; @@ -3361,6 +3374,18 @@ function buildQuestionTypeShortLine(context: AnswerRenderContext): string | null if (context.questionType === "what_to_check_first") { return `\u041a\u043e\u0440\u043e\u0442\u043a\u0438\u0439 \u043c\u0430\u0440\u0448\u0440\u0443\u0442 \u043f\u0435\u0440\u0432\u044b\u0445 \u043f\u0440\u043e\u0432\u0435\u0440\u043e\u043a \u0432\u043d\u0443\u0442\u0440\u0438 ${domainName}.`; } + if (context.questionType === "why_breaks") { + if (context.focusDomain === "settlements_60_62") { + return "Наиболее вероятная причина: переход от оплаты к закрытию расчета подтвержден не полностью."; + } + if (context.focusDomain === "vat_document_register_book") { + return "Наиболее вероятная причина: переход документа НДС к регистру и книге подтвержден частично."; + } + if (context.focusDomain === "month_close_costs_20_44") { + return "Наиболее вероятная причина: цепочка распределения затрат и закрытия месяца подтверждена не полностью."; + } + return "Наиболее вероятный механизм проблемы подтвержден частично и требует первичной проверки."; + } return null; } diff --git a/llm_normalizer/backend/src/services/assistantDataLayer.ts b/llm_normalizer/backend/src/services/assistantDataLayer.ts index efe65ad..15e891a 100644 --- a/llm_normalizer/backend/src/services/assistantDataLayer.ts +++ b/llm_normalizer/backend/src/services/assistantDataLayer.ts @@ -388,10 +388,11 @@ const P0_DOMAIN_CARDS: P0DomainCard[] = [ symptom_markers: [ /\bvat\b/i, /\u043d\u0434\u0441/i, - /\u0441\u0447[её]т.?фактур/i, + /\u0441\u0447[её]т(?:а|у|ом|е)?.?фактур/i, /\u043a\u043d\u0438\u0433[аи]\s+\u043f\u043e\u043a\u0443\u043f/i, /\u043a\u043d\u0438\u0433[аи]\s+\u043f\u0440\u043e\u0434\u0430\u0436/i, - /\u0432\u044b\u0447\u0435\u0442/i + /\u0432\u044b\u0447\u0435\u0442/i, + /\u043d\u0430\u043b\u043e\u0433\u043e\u0432(?:\u044b\u0439|\u043e\u0433\u043e)?\s+\u044d\u0444\u0444\u0435\u043a\u0442/i ] }, { @@ -1209,6 +1210,37 @@ function collectDateLikeSpans(text: string): Array<{ start: number; end: number return spans; } +function collectAmountLikeSpans(text: string): Array<{ start: number; end: number }> { + const spans: Array<{ start: number; end: number }> = []; + const patterns = [ + /\b\d{1,3}(?:[ \u00A0]\d{3})+(?:[.,]\d{2})?\b/g, + /\b\d+[.,]\d{2}\b/g + ]; + for (const pattern of patterns) { + let match: RegExpExecArray | null = null; + while ((match = pattern.exec(text)) !== null) { + spans.push({ + start: match.index, + end: match.index + match[0].length + }); + } + } + return spans; +} + +function collectPercentLikeSpans(text: string): Array<{ start: number; end: number }> { + const spans: Array<{ start: number; end: number }> = []; + const pattern = /\b\d{1,3}(?:[.,]\d+)?\s*%/g; + let match: RegExpExecArray | null = null; + while ((match = pattern.exec(text)) !== null) { + spans.push({ + start: match.index, + end: match.index + match[0].length + }); + } + return spans; +} + function intersectsSpan(start: number, end: number, spans: Array<{ start: number; end: number }>): boolean { return spans.some((span) => start < span.end && end > span.start); } @@ -1221,7 +1253,11 @@ function hasAccountContextAround(text: string, start: number, end: number): bool function extractAccountScopeFromText(text: string): string[] { const lower = String(text ?? "").toLowerCase(); - const dateSpans = collectDateLikeSpans(lower); + const blockedSpans = [ + ...collectDateLikeSpans(lower), + ...collectAmountLikeSpans(lower), + ...collectPercentLikeSpans(lower) + ]; const accounts: string[] = []; const pushAccount = (raw: string): void => { const prefix = String(raw ?? "").trim().match(/^(\d{2})/)?.[1]; @@ -1271,7 +1307,7 @@ function extractAccountScopeFromText(text: string): string[] { const token = suffixAnchorMatch[0]; const start = suffixAnchorMatch.index; const end = start + token.length; - if (intersectsSpan(start, end, dateSpans)) { + if (intersectsSpan(start, end, blockedSpans)) { continue; } pushAccount(token); @@ -1286,7 +1322,7 @@ function extractAccountScopeFromText(text: string): string[] { const token = explicitMatch[0]; const start = explicitMatch.index; const end = start + token.length; - if (intersectsSpan(start, end, dateSpans)) { + if (intersectsSpan(start, end, blockedSpans)) { continue; } const prefix = token.match(/^(\d{2})/)?.[1]; @@ -1465,7 +1501,7 @@ function buildSemanticRetrievalProfile(fragmentText: string): SemanticRetrievalP pushMany(relationPatterns, ["payment_to_settlement", "contract_to_documents"]); } if ( - /РЅРґСЃ|ндс|vat|РєРЅРёРіР° РїРѕРєСѓРїРѕРє|РєРЅРёРіР° продаж|счет.?фактур|книг[аи]\s+покуп|книг[аи]\s+продаж|сч[её]т.?фактур/i.test( + /РЅРґСЃ|ндс|vat|РєРЅРёРіР° РїРѕРєСѓРїРѕРє|РєРЅРёРіР° продаж|счет.?фактур|книг[аи]\s+покуп|книг[аи]\s+продаж|сч[её]т(?:а|у|ом|е)?[-\s]?фактур(?:а|ы|е|у|ой)?|вычет|налогов(?:ый|ого)?\s+эффект/i.test( lower ) || hasVatAccountScope @@ -1599,7 +1635,8 @@ function cardResolutionScore(card: P0DomainCard, fragmentText: string, profile: return 0; } - const hasHardAnchor = accountMatches.length > 0 || markerHit; + const hasVatSoftAnchor = card.id === "vat_document_register_book" && hasStrongVatDomainSignal(fragmentText, profile); + const hasHardAnchor = accountMatches.length > 0 || markerHit || hasVatSoftAnchor; if (!hasHardAnchor) { return 0; } @@ -1607,6 +1644,26 @@ function cardResolutionScore(card: P0DomainCard, fragmentText: string, profile: return accountMatches.length * 4 + domainMatches.length * 3 + (markerHit ? 2 : 0); } +function hasStrongVatDomainSignal(fragmentText: string, profile: SemanticRetrievalProfile): boolean { + const text = String(fragmentText ?? ""); + const hasVatLexicalAnchor = + /(?:ндс|vat|сч[её]т(?:а|у|ом|е)?[-\s]?фактур(?:а|ы|е|у|ой)?|книг[аи]\s+(?:покуп|продаж)|вычет|налогов(?:ый|ого)?\s+эффект)/iu.test( + text + ); + return ( + hasVatLexicalAnchor || + profile.account_scope.some((account) => account === "19" || account === "68") || + profile.domain_scope.some((domain) => domain === "vat" || domain === "taxes") || + profile.relation_patterns.some((pattern) => + ["invoice_to_vat", "register_to_book", "book_entry_generated", "deduction_posted"].includes(pattern) + ) + ); +} + +function hasStrongSettlementAccountSignal(profile: SemanticRetrievalProfile): boolean { + return profile.account_scope.some((account) => account === "51" || account === "60" || account === "62" || account === "76"); +} + function resolveP0DomainCard(fragmentText: string, profile: SemanticRetrievalProfile): P0DomainResolution | null { const resolved = P0_DOMAIN_CARDS.map((card) => ({ card, @@ -1620,6 +1677,11 @@ function resolveP0DomainCard(fragmentText: string, profile: SemanticRetrievalPro } const [first, second] = resolved; if (second && second.score === first.score) { + const pair = new Set([first.card.id, second.card.id]); + const hasVatSettlementTie = pair.has("vat_document_register_book") && pair.has("settlements_60_62"); + if (hasVatSettlementTie && hasStrongVatDomainSignal(fragmentText, profile) && !hasStrongSettlementAccountSignal(profile)) { + return resolved.find((item) => item.card.id === "vat_document_register_book") ?? null; + } return null; } return first; @@ -3443,7 +3505,39 @@ export class AssistantDataLayer { private executeBatch(fragmentText: string, data: DatasetBundle): RawRetrievalResult { const semanticProfile = buildSemanticRetrievalProfile(fragmentText); - const source = [...data.problemCases, ...data.keyFields, ...data.docs]; + const resolvedDomain = resolveP0DomainCard(fragmentText, semanticProfile); + const domainCard = resolvedDomain?.card ?? null; + const fallbackSources: DatasetSourceName[] = ["problemCases", "keyFields", "docs"]; + const sourceScope = domainCard + ? (uniqueStrings([...domainCard.allowed_evidence_sources.risk, ...domainCard.allowed_evidence_sources.canonical]) as DatasetSourceName[]) + : fallbackSources; + const sourcePool = collectSourceRecords(data, sourceScope); + const strictForbidden = Boolean(domainCard); + let sourceGate = domainCard + ? applyDomainPuritySourceGate(sourcePool, domainCard, semanticProfile, { strict_forbidden: strictForbidden }) + : { + accepted: sourcePool.map((item) => ({ + ...item, + signals: inferRecordSignals(item.record), + purity: { + allowed: true, + account_match: true, + domain_match: true, + entity_match: true, + edge_match: true, + forbidden_domains: [], + cross_domain_overlap: [] + } + })), + rejected_total: 0, + rejected_forbidden: 0 + }; + let sourceStrictFallbackUsed = false; + if (domainCard && strictForbidden && sourceGate.accepted.length === 0 && sourcePool.length > 0) { + sourceGate = applyDomainPuritySourceGate(sourcePool, domainCard, semanticProfile, { strict_forbidden: false }); + sourceStrictFallbackUsed = true; + } + const source = sourceGate.accepted.map((item) => item.record); const byEntity = new Map(); for (const record of source) { byEntity.set(record.source_entity, (byEntity.get(record.source_entity) ?? 0) + 1); @@ -3458,29 +3552,56 @@ export class AssistantDataLayer { records_count: count })); + const puritySummary = { + enabled: Boolean(domainCard), + domain_card_id: domainCard?.id ?? null, + domain_card_title: domainCard?.title ?? null, + source_scope: sourceScope, + source_pool_records: sourcePool.length, + source_selection_allowed: sourceGate.accepted.length, + source_selection_rejected: sourceGate.rejected_total, + source_selection_rejected_forbidden: sourceGate.rejected_forbidden, + top1_pure: domainCard ? topOnePurityHolds(sourceGate.accepted) : true, + top3_pure: domainCard ? topThreePurityHolds(sourceGate.accepted) : true, + strict_forbidden_mode: strictForbidden, + strict_forbidden_fallback_source: sourceStrictFallbackUsed + }; + return { status: items.length > 0 ? "ok" : "empty", result_type: "ranking", items, summary: { - checked_records: source.length, + checked_records: sourcePool.length, ranked_entities: items.length, query_subject: semanticProfile.query_subject, semantic_profile: semanticProfile, - ranking_basis: semanticProfile.ranking_basis + ranking_basis: semanticProfile.ranking_basis, + domain_purity_guard: puritySummary }, evidence: items.slice(0, 5).map((item) => ({ entity: item.entity, records_count: item.records_count })), - why_included: items.length > 0 ? ["Показаны сущности СЃ максимальным количеством записей."] : [], - selection_reason: ["Ранжирование выполнено РїРѕ records_count РїРѕ убыванию."], + why_included: items.length > 0 + ? [ + "Показаны сущности СЃ максимальным количеством записей.", + domainCard ? `P0 domain purity enforced for ${domainCard.id}.` : "P0 domain purity was not enforced." + ] + : [], + selection_reason: [ + "Ранжирование выполнено РїРѕ records_count РїРѕ убыванию.", + domainCard ? `Domain gate source scope: ${sourceScope.join(", ")}.` : "Domain gate source scope not applied." + ], risk_factors: uniqueStrings(["entity_volume_spike", ...semanticProfile.anomaly_patterns]), business_interpretation: [ "Top entities by volume highlight where lifecycle-focused review should start first." ], confidence: "medium", - limitations: ["Ранжирование РїРѕ объему РЅРµ всегда эквивалентно бизнес-СЂРёСЃРєСѓ."], + limitations: [ + "Ранжирование РїРѕ объему РЅРµ всегда эквивалентно бизнес-СЂРёСЃРєСѓ.", + domainCard ? "Domain purity guardrail может исключить cross-domain записи на batch-слое." : "Domain purity guardrail не применялся." + ], errors: [] }; } diff --git a/llm_normalizer/backend/src/services/questionTypeResolver.ts b/llm_normalizer/backend/src/services/questionTypeResolver.ts index f5aaf1c..e4b73ca 100644 --- a/llm_normalizer/backend/src/services/questionTypeResolver.ts +++ b/llm_normalizer/backend/src/services/questionTypeResolver.ts @@ -78,12 +78,33 @@ function countRuleHits(text: string, rule: QuestionTypeRule): number { return hits; } +function hasProofIntent(text: string): boolean { + return /(?:\bprove\b|\bguess\b|доказан|доказано|доказуем|гипотез|догад|связан\s+ли|зач[её]л(?:ся|ось)\s+ли)/iu.test(text); +} + +function hasExplicitChainSplitIntent(text: string): boolean { + return /(?:какие(?:\s+\S+){0,4}\s+цепочк[аи]|which\s+chains?|complete\s+vs\s+incomplete|что\s+закрыто.*что\s+нет)/iu.test( + text + ); +} + +function hasGroundingIntent(text: string): boolean { + return /(?:на\s+ч(?:е|ё)м[^?!.]{0,40}основан|чем\s+подтвержда|какие\s+основани|what\s+evidence|grounded\s+on|based\s+on)/iu.test( + text + ); +} + export function resolveQuestionType(input: string): QuestionTypeClass { const text = String(input ?? "").trim(); if (!text) { return "unknown"; } + // Guard against collapsing proof-intent questions into chain classification. + if (hasProofIntent(text) && !hasExplicitChainSplitIntent(text) && !hasGroundingIntent(text)) { + return "prove_or_guess"; + } + let bestType: QuestionTypeClass = "unknown"; let bestHits = 0; let bestPriority = Number.POSITIVE_INFINITY; diff --git a/llm_normalizer/backend/tests/assistantWave16ResidualCleanupRegression.test.ts b/llm_normalizer/backend/tests/assistantWave16ResidualCleanupRegression.test.ts new file mode 100644 index 0000000..3974452 --- /dev/null +++ b/llm_normalizer/backend/tests/assistantWave16ResidualCleanupRegression.test.ts @@ -0,0 +1,431 @@ +import fs from "fs"; +import os from "os"; +import path from "path"; +import { afterEach, describe, expect, it } from "vitest"; +import { AssistantDataLayer } from "../src/services/assistantDataLayer"; +import { composeAssistantAnswer } from "../src/services/answerComposer"; +import { resolveQuestionType } from "../src/services/questionTypeResolver"; +import type { AnswerGroundingCheck, RequirementCoverageReport, UnifiedRetrievalResult } from "../src/types/assistant"; +import type { ProblemUnit } from "../src/types/stage2ProblemUnits"; + +const TEMP_DIRS: string[] = []; + +function cleanupTempDirs(): void { + for (const dir of TEMP_DIRS.splice(0)) { + fs.rmSync(dir, { recursive: true, force: true }); + } +} + +function createSnapshotRoot(records: Array>): string { + const root = fs.mkdtempSync(path.join(os.tmpdir(), "assistant-wave16-")); + TEMP_DIRS.push(root); + const payload = JSON.stringify({ records }, null, 2); + const write = (name: string, content = payload): void => { + fs.writeFileSync(path.resolve(root, name), content, "utf-8"); + }; + write("09_samples_key_fields_Recorder_Ref_Supplier_Buyer_Responsible.json"); + write("03_snapshot_fragment_problem_cases.json"); + write("07_samples_DocumentJournals.json"); + write("08_samples_NDS_registers.json"); + write("04_samples_SpisanieSRaschetnogoScheta.json"); + write("05_samples_RealizaciyaTovarovUslug.json"); + write("06_samples_PostuplenieTovarovUslug.json"); + return root; +} + +function buildMixedDomainRecord(id: string, description: string, account?: string): Record { + return { + source_entity: "Document", + source_id: id, + display_name: description, + unknown_link_count: 0, + attributes: { + Description: description, + Account: account ?? "", + Period: "2020-07-15T00:00:00", + Amount: 276873.6 + }, + links: [ + { + relation: "document_has_counterparty", + target_entity: "Counterparty", + target_id: `CP-${id}`, + source_field: "Counterparty" + } + ] + }; +} + +function buildRouteSummary() { + return { + mode: "deterministic_v2" as const, + message_in_scope: true, + scope_confidence: "high" as const, + planner: { + total_fragments: 1, + in_scope_fragments: 1, + out_of_scope_fragments: 0, + discarded_fragments: 0, + contains_multiple_tasks: false + }, + decisions: [], + fallback: { + type: "none" as const, + message: null + } + }; +} + +function buildCoverage(input?: Partial): RequirementCoverageReport { + return { + requirements_total: 1, + requirements_covered: 1, + requirements_uncovered: [], + requirements_partially_covered: [], + clarification_needed_for: [], + out_of_scope_requirements: [], + ...input + }; +} + +function buildGrounding(input?: Partial): AnswerGroundingCheck { + return { + status: "partial", + route_subject_match: true, + missing_requirements: [], + reasons: [], + why_included_summary: ["wave16-test"], + selection_reason_summary: ["wave16-test"], + ...input + }; +} + +function buildProblemUnit(input: { + id: string; + type: ProblemUnit["problem_unit_type"]; + account: string; + defect: string; + lifecycleDomain: ProblemUnit["lifecycle_domain"]; +}): ProblemUnit { + return { + schema_version: "problem_unit_v0_1", + problem_unit_id: input.id, + problem_unit_type: input.type, + title: "Wave16 problem unit", + mechanism_summary: `Mechanism candidate: ${input.defect}.`, + business_defect_class: input.defect, + severity: { + score: 0.73, + grade: "high" + }, + confidence: { + score: 0.62, + grade: "medium" + }, + lifecycle_domain: input.lifecycleDomain, + affected_entities: ["Document:DOC-1"], + affected_documents: ["Document:DOC-1"], + affected_postings: ["Posting:POST-1"], + affected_accounts: [input.account], + affected_counterparties: ["Counterparty:CP-1"], + affected_contracts: ["Contract:CTR-1"], + failed_expected_edge: input.defect, + period_impact: { + is_period_sensitive: true, + impact_class: "close_risk" + }, + evidence_pack: ["cand-1"], + entity_backlinks: [{ entity: "Document", id: "DOC-1" }], + snapshot_limitations: [] + }; +} + +function buildRetrieval(input: { + status: UnifiedRetrievalResult["status"]; + accountScope?: string[]; + domainScope?: string[]; + relationPatterns?: string[]; + units?: ProblemUnit[]; +}): UnifiedRetrievalResult { + const accountScope = input.accountScope ?? []; + const domainScope = input.domainScope ?? ["settlements"]; + const relationPatterns = input.relationPatterns ?? ["payment_to_settlement"]; + const units = input.units ?? []; + return { + fragment_id: "F1", + requirement_ids: ["R1"], + route: "hybrid_store_plus_live", + status: input.status, + result_type: "chain", + items: + input.status === "empty" + ? [] + : [ + { + source_entity: "Document", + source_id: "DOC-1", + display_name: "Счет №4 от 07.07.20", + account_context: accountScope, + graph_domain_scope: domainScope, + relation_pattern_hits: relationPatterns, + period: "2020-07", + amount: "276 873,60" + } + ], + summary: { + semantic_profile: { + account_scope: accountScope, + domain_scope: domainScope, + relation_patterns: relationPatterns, + period_scope: { + from: "2020-07-01", + to: "2020-07-31", + granularity: "month" + } + }, + domain_purity_guard: { + domain_card_id: domainScope.includes("vat") ? "vat_document_register_book" : "settlements_60_62" + }, + broad_query_detected: false, + broad_result_flag: false, + minimum_evidence_failed: false, + narrowing_strength: "strong", + degraded_to: null + }, + evidence: + input.status === "empty" + ? [] + : [ + { + evidence_id: "ev-R1", + claim_ref: "requirement:R1", + source_type: "retrieval_item", + source_ref: { + schema_version: "evidence_source_ref_v1", + namespace: "snapshot_2020_07", + entity: "document", + id: "DOC-1", + period: "2020-07", + canonical_ref: "evidence_source_ref_v1|snapshot_2020_07|document|doc-1|2020-07" + }, + pointer: { + fragment_id: "F1", + route: "hybrid_store_plus_live", + source: { + namespace: "snapshot_2020_07", + entity: "document", + id: "DOC-1", + period: "2020-07" + }, + locator: { + field_path: "risk_score", + item_index: 0 + } + }, + evidence_kind: "mechanism_link", + mechanism_note: relationPatterns[0], + confidence: "medium", + limitation: null, + payload: { + contract: "договор № 01/19-ПТ" + } + } + ], + candidate_evidence: [], + problem_units: units, + problem_unit_summary: + units.length > 0 + ? { + schema_version: "problem_unit_summary_v0_1", + units_total: units.length, + duplicate_collapses: 0, + unit_types: units.map((unit) => unit.problem_unit_type), + type_distribution: { + [units[0]?.problem_unit_type ?? "broken_chain_segment"]: units.length + }, + severity_distribution: { + low: 0, + medium: 0, + high: units.length + }, + confidence_distribution: { + low: 0, + medium: units.length, + high: 0 + }, + primary_unit_type: units[0]?.problem_unit_type ?? null + } + : null, + why_included: ["wave16-test"], + selection_reason: ["wave16-test"], + risk_factors: ["wave16"], + business_interpretation: ["wave16"], + confidence: input.status === "ok" ? "medium" : "low", + limitations: [], + errors: [] + }; +} + +afterEach(() => { + cleanupTempDirs(); +}); + +describe("wave16 residual fail cleanup", () => { + it("q10-style VAT query must not derive account scope from percent/amount fragments", () => { + const dataLayer = new AssistantDataLayer( + createSnapshotRoot([ + buildMixedDomainRecord("SET-1", "Оплата покупателя по договору", "62.01"), + buildMixedDomainRecord("VAT-1", "Счет-фактура и НДС в книге продаж", "68.02") + ]) + ); + const result = dataLayer.executeRoute( + "hybrid_store_plus_live", + "По оплате от 13 июля на 276 873,60 указан НДС 20% = 46 145,60. Докажи отражение НДС." + ); + + expect(result.status).toBe("ok"); + const summary = result.summary as Record; + const semanticProfile = (summary.semantic_profile ?? {}) as Record; + const accountScope = Array.isArray(semanticProfile.account_scope) ? semanticProfile.account_scope : []; + expect(accountScope).not.toContain("20"); + expect(accountScope).not.toContain("60"); + const domainGuard = (summary.domain_purity_guard ?? {}) as Record; + expect(domainGuard.domain_card_id).toBe("vat_document_register_book"); + }); + + it("q13-style broad VAT query on batch route must stay VAT-domain", () => { + const dataLayer = new AssistantDataLayer( + createSnapshotRoot([ + buildMixedDomainRecord("SET-2", "Платеж и расчеты с покупателем", "62.02"), + buildMixedDomainRecord("VAT-2", "Полученный счет-фактура и налоговый эффект", "19.03") + ]) + ); + const result = dataLayer.executeRoute( + "batch_refresh_then_store", + "Есть ли в июльском срезе покупки, где есть товар/услуга, но не видно счета-фактуры или налогового эффекта?" + ); + + const summary = result.summary as Record; + const domainGuard = (summary.domain_purity_guard ?? {}) as Record; + expect(domainGuard.domain_card_id).toBe("vat_document_register_book"); + }); + + it("q06 first-check should be settlement-specific for prove_or_guess without explicit account tokens", () => { + const settlementUnit = buildProblemUnit({ + id: "pu-settlement-wave16", + type: "broken_chain_segment", + account: "62.02", + defect: "failed_edge:payment_to_settlement", + lifecycleDomain: "customer_settlement" + }); + + const output = composeAssistantAnswer({ + userMessage: + "Есть ли в июльском срезе ситуация, где деньги уже пришли, но закрытие расчётов не подтверждено тем документом?", + routeSummary: buildRouteSummary(), + retrievalResults: [buildRetrieval({ status: "ok", units: [settlementUnit], domainScope: ["settlements"] })], + requirements: [ + { + requirement_id: "R1", + source_fragment_id: "F1", + requirement_text: "Wave16 requirement", + subject_tokens: [], + status: "covered", + route: "hybrid_store_plus_live" + } + ], + coverageReport: buildCoverage(), + groundingCheck: buildGrounding(), + focusDomainHint: null, + questionTypeHint: "prove_or_guess", + enableAnswerPolicyV11: true, + enableProblemCentricAnswerV1: true, + enableLifecycleAnswerV1: true + }); + + expect(output.assistant_reply).toMatch(/регистр расчет|60\/62\/76|договор|объект расчет/i); + }); + + it("q12-like question should resolve prove_or_guess instead of chain classification", () => { + const resolved = resolveQuestionType( + "Связан ли полученный 31 июля счёт-фактура с услугой так, чтобы вычет был корректен, и это доказано или только предположение?" + ); + expect(resolved).toBe("prove_or_guess"); + }); + + it("why_breaks short line should avoid generic collapse and keep domain-specific mechanism", () => { + const settlementUnit = buildProblemUnit({ + id: "pu-settlement-wave16-2", + type: "broken_chain_segment", + account: "62.01", + defect: "failed_edge:payment_to_settlement", + lifecycleDomain: "customer_settlement" + }); + const output = composeAssistantAnswer({ + userMessage: "Почему по расчетам долг остался после оплаты?", + routeSummary: buildRouteSummary(), + retrievalResults: [buildRetrieval({ status: "ok", units: [settlementUnit], domainScope: ["settlements"] })], + requirements: [ + { + requirement_id: "R1", + source_fragment_id: "F1", + requirement_text: "Wave16 requirement", + subject_tokens: [], + status: "covered", + route: "hybrid_store_plus_live" + } + ], + coverageReport: buildCoverage({ + requirements_covered: 0, + requirements_partially_covered: ["R1"] + }), + groundingCheck: buildGrounding({ + status: "partial", + reasons: ["Mechanism is unresolved for part of the evidence."] + }), + focusDomainHint: "settlements_60_62", + questionTypeHint: "why_breaks", + enableAnswerPolicyV11: true, + enableProblemCentricAnswerV1: true, + enableLifecycleAnswerV1: true + }); + + expect(output.assistant_reply).toMatch(/наиболее вероятн/i); + expect(output.assistant_reply).not.toContain("Коротко: Проблема с закрытием расчета подтверждается частично."); + }); + + it("VAT why_breaks first-check must include VAT-specific checks", () => { + const vatUnit = buildProblemUnit({ + id: "pu-vat-wave16", + type: "cross_branch_inconsistency_cluster", + account: "68.02", + defect: "invoice_to_vat", + lifecycleDomain: "vat_flow" + }); + const output = composeAssistantAnswer({ + userMessage: "Почему по НДС не видно ожидаемого налогового эффекта?", + routeSummary: buildRouteSummary(), + retrievalResults: [buildRetrieval({ status: "ok", units: [vatUnit], domainScope: ["vat", "taxes"] })], + requirements: [ + { + requirement_id: "R1", + source_fragment_id: "F1", + requirement_text: "Wave16 requirement", + subject_tokens: [], + status: "covered", + route: "hybrid_store_plus_live" + } + ], + coverageReport: buildCoverage(), + groundingCheck: buildGrounding(), + focusDomainHint: "vat_document_register_book", + questionTypeHint: "why_breaks", + enableAnswerPolicyV11: true, + enableProblemCentricAnswerV1: true, + enableLifecycleAnswerV1: true + }); + + expect(output.assistant_reply).toMatch(/счет-?фактур|регистр НДС|19\/68|книг/i); + }); +}); + diff --git a/llm_normalizer/docs/runs/2026-03-28_Stage_04_Wave_16_Residual_FAIL_Cleanup_Generic_Answer_Squeeze.zip b/llm_normalizer/docs/runs/2026-03-28_Stage_04_Wave_16_Residual_FAIL_Cleanup_Generic_Answer_Squeeze.zip new file mode 100644 index 0000000..733a41d Binary files /dev/null and b/llm_normalizer/docs/runs/2026-03-28_Stage_04_Wave_16_Residual_FAIL_Cleanup_Generic_Answer_Squeeze.zip differ