diff --git a/llm_normalizer/backend/dist/services/answerComposer.js b/llm_normalizer/backend/dist/services/answerComposer.js index 3e7d3e4..d36e114 100644 --- a/llm_normalizer/backend/dist/services/answerComposer.js +++ b/llm_normalizer/backend/dist/services/answerComposer.js @@ -10,6 +10,112 @@ function fallbackFromSummary(routeSummary) { function uniqueStrings(values, limit = 6) { return Array.from(new Set(values.map((item) => item.trim()).filter(Boolean))).slice(0, limit); } +function withUniquePush(target, value) { + const normalized = String(value ?? "").trim(); + if (!normalized) { + return; + } + if (!target.includes(normalized)) { + target.push(normalized); + } +} +function normalizeAnchorForMatch(value) { + return String(value ?? "") + .toLowerCase() + .replace(/[^\p{L}\p{N}.:/-]+/gu, " ") + .replace(/\s+/g, " ") + .trim(); +} +function collectCompanyAnchorTokens(anchors) { + if (!anchors) { + return []; + } + const tokens = []; + for (const item of anchors.contract_numbers ?? []) + withUniquePush(tokens, item); + for (const item of anchors.document_numbers ?? []) + withUniquePush(tokens, item); + for (const item of anchors.dates ?? []) + withUniquePush(tokens, item); + for (const item of anchors.amounts ?? []) + withUniquePush(tokens, item); + for (const item of anchors.accounts ?? []) + withUniquePush(tokens, `\u0441\u0447\u0435\u0442 ${item}`); + for (const item of anchors.accounts ?? []) + withUniquePush(tokens, item); + for (const item of anchors.periods ?? []) + withUniquePush(tokens, item); + for (const item of anchors.document_types ?? []) + withUniquePush(tokens, item); + for (const item of anchors.all ?? []) + withUniquePush(tokens, item); + return uniqueStrings(tokens, 48); +} +function collectRetrievalCorpus(results) { + const chunks = []; + for (const result of results) { + chunks.push(JSON.stringify(result.summary ?? {})); + for (const item of result.items.slice(0, 10)) { + chunks.push(JSON.stringify(item)); + } + for (const evidence of result.evidence.slice(0, 16)) { + chunks.push(JSON.stringify(evidence)); + } + chunks.push(...result.why_included.slice(0, 16)); + chunks.push(...result.selection_reason.slice(0, 16)); + chunks.push(...result.business_interpretation.slice(0, 16)); + } + return chunks.join(" ").toLowerCase(); +} +function isAnchorMatchedInCorpus(anchor, corpus) { + const normalized = normalizeAnchorForMatch(anchor); + if (!normalized) { + return false; + } + if (normalized.length < 3) { + return false; + } + if (corpus.includes(normalized)) { + return true; + } + const withoutPrefix = normalized + .replace(/^(?:\u0434\u043e\u0433\u043e\u0432\u043e\u0440|document|account|period|doc_type)\s*[:№#]?\s*/iu, "") + .trim(); + if (withoutPrefix.length >= 3 && corpus.includes(withoutPrefix)) { + return true; + } + if (/^\d+(?:[.,]\d{2})?$/.test(withoutPrefix)) { + const normalizedAmount = withoutPrefix.replace(",", "."); + return corpus.includes(withoutPrefix) || corpus.includes(normalizedAmount); + } + return false; +} +function evaluateCompanyAnchorUsage(anchors, retrievalResults) { + const present = collectCompanyAnchorTokens(anchors); + if (present.length === 0) { + return { + present: [], + used: [], + unused: [] + }; + } + const corpus = normalizeAnchorForMatch(collectRetrievalCorpus(retrievalResults)); + const used = []; + const unused = []; + for (const anchor of present) { + if (isAnchorMatchedInCorpus(anchor, corpus)) { + withUniquePush(used, anchor); + } + else { + withUniquePush(unused, anchor); + } + } + return { + present: uniqueStrings(present, 24), + used: uniqueStrings(used, 12), + unused: uniqueStrings(unused, 12) + }; +} const UUID_PATTERN = /\b[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}\b/gi; const LONG_HEX_PATTERN = /\b[0-9a-f]{24,}\b/gi; const RAW_REF_BLOB_PATTERN = /\bevidence_source_ref_v1\|[^\s,;]+/gi; @@ -962,6 +1068,10 @@ function isProblemUnitAlignedWithNarrativeDomain(unit, domain) { return /(payment_to_settlement|settlement_closed|settlement|аванс|зачет|зачёт|расчет|расч[её]т|оплат)/i.test(corpus); } if (domain === "vat_document_register_book") { + const foreignVatDomain = ["period_close", "deferred_expense", "fixed_asset", "bank_settlement", "customer_settlement"].includes(String(unit.lifecycle_domain ?? "")); + if (foreignVatDomain && !hasControlledCrossDomainHandoff(unit)) { + return false; + } if (unit.lifecycle_domain === "vat_flow") { return true; } @@ -971,6 +1081,10 @@ function isProblemUnitAlignedWithNarrativeDomain(unit, domain) { return /(vat|ндс|invoice|book_entry|register|книг|счет[\s-]?фактур|сч[её]т[\s-]?фактур)/i.test(corpus); } if (domain === "month_close_costs_20_44") { + const foreignMonthCloseDomain = ["vat_flow", "bank_settlement", "customer_settlement", "fixed_asset"].includes(String(unit.lifecycle_domain ?? "")); + if (foreignMonthCloseDomain && !hasControlledCrossDomainHandoff(unit)) { + return false; + } if (unit.lifecycle_domain === "period_close" || unit.lifecycle_domain === "deferred_expense" || unit.lifecycle_domain === "fixed_asset") { @@ -1514,12 +1628,159 @@ function mapDefectTokenToNarrative(value) { } return null; } +const KNOWN_ACCOUNT_PREFIXES = new Set([ + "01", + "02", + "07", + "08", + "10", + "13", + "19", + "20", + "21", + "23", + "25", + "26", + "28", + "29", + "41", + "43", + "44", + "45", + "50", + "51", + "52", + "55", + "57", + "58", + "60", + "62", + "66", + "67", + "68", + "69", + "70", + "71", + "73", + "76", + "90", + "91", + "94", + "96", + "97" +]); +function collectDateLikeSpansForNarrative(text) { + const spans = []; + const patterns = [ + /\b20\d{2}[./-](?:0[1-9]|1[0-2])(?:[./-](?:0[1-9]|[12]\d|3[01]))?\b/g, + /\b(?:0?[1-9]|[12]\d|3[01])[./-](?:0?[1-9]|1[0-2])[./-](?:\d{2}|\d{4})\b/g, + /\b(?:0?[1-9]|[12]\d|3[01])\s+(?:января|февраля|марта|апреля|мая|июня|июля|августа|сентября|октября|ноября|декабря)\b/giu + ]; + for (const pattern of patterns) { + let match = null; + while ((match = pattern.exec(text)) !== null) { + spans.push({ + start: match.index, + end: match.index + match[0].length + }); + } + } + return spans; +} +function collectAmountLikeSpansForNarrative(text) { + const spans = []; + const pattern = /\b\d{1,3}(?:[ \u00A0]\d{3})+(?:[.,]\d{2})?\b/g; + let match = null; + while ((match = pattern.exec(text)) !== null) { + spans.push({ + start: match.index, + end: match.index + match[0].length + }); + } + return spans; +} +function intersectsNarrativeSpan(start, end, spans) { + return spans.some((span) => start < span.end && end > span.start); +} +function hasAccountContextMarker(text, start, end) { + const left = text.slice(Math.max(0, start - 24), start); + const right = text.slice(end, Math.min(text.length, end + 24)); + return /(?:счет|сч\.?|account|schet|по\s+60|по\s+62|по\s+19|по\s+68|по\s+20|по\s+25|по\s+26|по\s+44|расчет|ндс|закрыти|рбп|амортиз|settlement|vat|close)/iu.test(`${left} ${right}`); +} +function toKnownAccountToken(value) { + const token = String(value ?? "").trim(); + const prefix = token.match(/^(\d{2})/)?.[1]; + if (!prefix || !KNOWN_ACCOUNT_PREFIXES.has(prefix)) { + return null; + } + return token; +} function extractAccountNumbers(values) { - const numbers = values.flatMap((value) => { - const matches = String(value ?? "").match(/\b\d{2}(?:\.\d{1,2})?\b/g); - return matches ?? []; - }); - return uniqueStrings(numbers, 12); + const tokens = []; + for (const value of values) { + const raw = String(value ?? ""); + const matches = raw.match(/\b\d{2}(?:\.\d{1,2})?\b/g) ?? []; + for (const match of matches) { + const account = toKnownAccountToken(match); + if (account) { + tokens.push(account); + } + } + } + return uniqueStrings(tokens, 16); +} +function extractAccountNumbersFromNarrativeText(value) { + const text = String(value ?? "").toLowerCase(); + if (!text.trim()) { + return []; + } + const result = []; + const dateSpans = collectDateLikeSpansForNarrative(text); + const amountSpans = collectAmountLikeSpansForNarrative(text); + const blockedSpans = [...dateSpans, ...amountSpans]; + const contextualPattern = /(?:\b(?:счет(?:а|у|ом|ов)?|сч\.?|account(?:s)?|schet(?:a|u|om|ov)?)\b)\s*(?:№|#|:)?\s*([0-9./,\sиand]{2,96})/giu; + let contextualMatch = null; + while ((contextualMatch = contextualPattern.exec(text)) !== null) { + const chunk = String(contextualMatch[1] ?? ""); + const chunkTokens = chunk.match(/\b\d{2}(?:\.\d{1,2})?\b/g) ?? []; + for (const token of chunkTokens) { + const account = toKnownAccountToken(token); + if (account) { + result.push(account); + } + } + } + const accountPairPattern = /\b(\d{2}(?:\.\d{1,2})?)\s*\/\s*(\d{2}(?:\.\d{1,2})?)\b/g; + let pairMatch = null; + while ((pairMatch = accountPairPattern.exec(text)) !== null) { + const left = toKnownAccountToken(String(pairMatch[1] ?? "")); + const right = toKnownAccountToken(String(pairMatch[2] ?? "")); + if (left) { + result.push(left); + } + if (right) { + result.push(right); + } + } + const explicitPattern = /\b\d{2}(?:\.\d{1,2})?\b/g; + let explicitMatch = null; + while ((explicitMatch = explicitPattern.exec(text)) !== null) { + const token = String(explicitMatch[0] ?? ""); + const account = toKnownAccountToken(token); + if (!account) { + continue; + } + const start = explicitMatch.index; + const end = start + token.length; + if (intersectsNarrativeSpan(start, end, blockedSpans)) { + continue; + } + if (!hasAccountContextMarker(text, start, end)) { + continue; + } + result.push(account); + } + return uniqueStrings(result, 16); } function inferP0NarrativeDomain(units) { const allAccounts = extractAccountNumbers(units.flatMap((unit) => unit.affected_accounts ?? [])); @@ -1642,9 +1903,21 @@ function hasControlledCrossDomainHandoffInResult(result) { function isSettlementDomainToken(value) { return /(?:bank_settlement|customer_settlement|settlements?|supplier_payments|suppliers?|customers?)/i.test(String(value ?? "")); } +function isVatDomainToken(value) { + return /(?:vat_flow|vat|nds|taxes?|purchase_book|sales_book|invoice|book_entry|register)/i.test(String(value ?? "")); +} +function isMonthCloseDomainToken(value) { + return /(?:period_close|month_close|close_operation|cost_close|cost_allocation|deferred_expense)/i.test(String(value ?? "")); +} function isForeignToSettlementDomainToken(value) { return /(?:vat_flow|vat|deferred_expense|period_close|fixed_asset|fixed_assets|taxes?)/i.test(String(value ?? "")); } +function isForeignToVatDomainToken(value) { + return /(?:bank_settlement|customer_settlement|settlements?|period_close|deferred_expense|fixed_asset|fixed_assets|month_close)/i.test(String(value ?? "")); +} +function isForeignToMonthCloseDomainToken(value) { + return /(?:bank_settlement|customer_settlement|settlements?|vat_flow|vat|fixed_asset|fixed_assets)/i.test(String(value ?? "")); +} function collectResultAccounts(result) { const accounts = []; const semanticProfile = summaryValue(result, "semantic_profile"); @@ -1687,11 +1960,19 @@ function isSubstantiveResult(result) { } return result.items.length > 0 || result.evidence.length > 0; } -function evaluateSettlementEvidenceGrounding(results) { +function evaluateP0DomainEvidenceGrounding(results, focusDomain) { + if (!focusDomain) { + return { + has_primary: false, + has_foreign_primary: false, + foreign_primary_domains: [], + blocked: false + }; + } const substantive = results.filter((item) => isSubstantiveResult(item)); if (substantive.length === 0) { return { - has_settlement_primary: false, + has_primary: false, has_foreign_primary: false, foreign_primary_domains: [], blocked: false @@ -1701,42 +1982,91 @@ function evaluateSettlementEvidenceGrounding(results) { const accounts = collectResultAccounts(result); const domains = collectResultDomains(result); const relations = collectResultRelations(result); - const settlement = accounts.some((item) => isSettlementAccountToken(item) || /^(?:51|76)(?:\.|$)/.test(item)) || - domains.some((item) => isSettlementDomainToken(item)) || - relations.some((item) => /payment_to_settlement|statement_to_document|contract_to_documents/.test(item)); - const foreignDomains = domains.filter((item) => isForeignToSettlementDomainToken(item)); + let inDomain = false; + let foreignDomains = []; + if (focusDomain === "settlements_60_62") { + inDomain = + accounts.some((item) => isSettlementAccountToken(item) || /^(?:51|76)(?:\.|$)/.test(item)) || + domains.some((item) => isSettlementDomainToken(item)) || + relations.some((item) => /payment_to_settlement|statement_to_document|contract_to_documents|linked_to_settlement|settlement_closed/.test(item)); + foreignDomains = domains.filter((item) => isForeignToSettlementDomainToken(item)); + } + else if (focusDomain === "vat_document_register_book") { + inDomain = + accounts.some((item) => isVatAccountToken(item)) || + domains.some((item) => isVatDomainToken(item)) || + relations.some((item) => /invoice_to_vat|source_doc_present|invoice_linked|book_entry_generated|deduction_posted|register_to_book|vat_/i.test(item)); + foreignDomains = domains.filter((item) => isForeignToVatDomainToken(item)); + } + else if (focusDomain === "month_close_costs_20_44") { + inDomain = + accounts.some((item) => isCloseCostsAccountToken(item)) || + domains.some((item) => isMonthCloseDomainToken(item)) || + relations.some((item) => /costs_accumulated|allocation_rules_resolved|close_operation_runs|residuals_zero|close_operation|period_close|allocation|writeoff/i.test(item)); + foreignDomains = domains.filter((item) => isForeignToMonthCloseDomainToken(item)); + } return { - settlement, + inDomain, foreignDomains: uniqueStrings(foreignDomains, 8) }; }; const top = substantive[0]; const topClass = classify(top); - const hasAnySettlement = substantive.some((item) => classify(item).settlement); - const hasForeignPrimary = topClass.foreignDomains.length > 0 && !topClass.settlement; - const blocked = hasForeignPrimary && !hasAnySettlement && !hasControlledCrossDomainHandoffInResult(top); + const hasAnyPrimary = substantive.some((item) => classify(item).inDomain); + const hasForeignPrimary = topClass.foreignDomains.length > 0 && !topClass.inDomain; + const blocked = hasForeignPrimary && !hasAnyPrimary && !hasControlledCrossDomainHandoffInResult(top); return { - has_settlement_primary: hasAnySettlement, + has_primary: hasAnyPrimary, has_foreign_primary: hasForeignPrimary, foreign_primary_domains: topClass.foreignDomains, blocked }; } +function hasStrongNarrativeDomainSignalInText(userMessage, domain) { + if (!domain) { + return false; + } + const text = String(userMessage ?? "").toLowerCase(); + const accountTokens = extractAccountNumbersFromNarrativeText(text); + if (domain === "settlements_60_62") { + return (accountTokens.some((item) => isSettlementAccountToken(item)) || + /(60\.0[12]|62\.0[12]|долг|аванс|зач[её]т|взаимозач|расч[её]т)/i.test(text)); + } + if (domain === "vat_document_register_book") { + return (accountTokens.some((item) => isVatAccountToken(item)) || + /(ндс|vat|счет[-\s]?фактур|сч[её]т[-\s]?фактур|книг[аи]|регистр)/i.test(text)); + } + if (domain === "month_close_costs_20_44") { + return (accountTokens.some((item) => isCloseCostsAccountToken(item)) || + /(закрыти[ея]\s+месяц|закрытие\s+счетов|регламентн|косвенн|затрат|распределени|рбп|амортиз|финансовых\s+результат|month\s*close|period\s*close|close\s+operation)/i.test(text)); + } + return false; +} function inferP0FocusNarrativeDomain(userMessage, results, units, focusDomainHint) { const fromHint = p0NarrativeDomainFromHint(focusDomainHint); + const fromMessage = inferNarrativeDomainFromText(userMessage); + const strongFromMessage = Boolean(fromMessage && hasStrongNarrativeDomainSignalInText(userMessage, fromMessage)); + const fromDomainGuard = inferP0NarrativeDomainFromDomainGuards(results); + if (fromHint && fromMessage && fromHint !== fromMessage) { + return strongFromMessage ? fromMessage : fromHint; + } if (fromHint) { return fromHint; } - const fromDomainGuard = inferP0NarrativeDomainFromDomainGuards(results); + if (fromDomainGuard && fromMessage && fromDomainGuard !== fromMessage) { + return strongFromMessage ? fromMessage : fromDomainGuard; + } if (fromDomainGuard) { return fromDomainGuard; } - const fromMessage = inferNarrativeDomainFromText(userMessage); + if (strongFromMessage) { + return fromMessage; + } if (fromMessage) { return fromMessage; } const semanticScopes = collectSemanticProfileScopes(results); - const messageAccounts = extractAccountNumbers([userMessage]); + const messageAccounts = extractAccountNumbersFromNarrativeText(userMessage); const hasExplicitP0AccountSignal = [...messageAccounts, ...semanticScopes.accounts].some((item) => isSettlementAccountToken(item) || isVatAccountToken(item) || isCloseCostsAccountToken(item)); // Domain lock is only applied when we have an explicit P0 signal from the query/profile. if (!hasExplicitP0AccountSignal) { @@ -1887,12 +2217,19 @@ function humanizeFactForDirectAnswer(value) { } function buildDirectAnswer(input) { const topFact = humanizeFactForDirectAnswer(firstMeaningfulFact(input.retrievalResults)); + const domainAnchor = domainNarrativeAnchor(input.focusDomain); + const topFactDomain = topFact ? inferNarrativeDomainFromText(topFact) : null; + const topFactAligned = Boolean(topFact) && (!input.focusDomain || topFactDomain === input.focusDomain); + const preferredFact = topFactAligned ? topFact : null; if (input.mode === "focused_grounded") { - return topFact ?? "Проблема подтверждена на текущей опоре и готова к точечной проверке."; + return preferredFact ?? domainAnchor ?? "Проблема подтверждена на текущей опоре и готова к точечной проверке."; } if (input.mode === "broad_partial") { - if (topFact) { - return `${topFact.replace(/[.!?]+$/u, "")}; подтверждение пока частичное.`; + if (preferredFact) { + return `${preferredFact.replace(/[.!?]+$/u, "")}; подтверждение пока частичное.`; + } + if (domainAnchor) { + return `${domainAnchor.replace(/[.!?]+$/u, "")}; подтверждение пока частичное.`; } return "Есть признаки проблемы, но опора частичная и вывод ограничен."; } @@ -1962,9 +2299,19 @@ function buildProblemCentricAnswerStructure(input) { .map((item) => item.source_ref?.canonical_ref) .filter((item) => typeof item === "string" && item.trim().length > 0), 6); const evidenceIds = uniqueStrings(input.evidenceItems.map((item) => item.evidence_id), 10); + const aggregateEvidenceConfidence = aggregateConfidence(input.retrievalResults, input.evidenceItems); + const hasCriticalEvidenceLimitation = input.limitationReasonCodes.includes("weak_source_mapping") || + input.limitationReasonCodes.includes("insufficient_detail"); + const confidenceLimited = input.mode !== "focused_grounded" || + weakUnits || + input.domainLockMiss || + input.limitationReasonCodes.includes("missing_mechanism") || + input.limitationReasonCodes.includes("heuristic_inference") || + hasCriticalEvidenceLimitation || + aggregateEvidenceConfidence === "low"; const mechanismStatus = unitMechanismNotes.length === 0 ? "unresolved" - : weakUnits || input.limitationReasonCodes.includes("missing_mechanism") + : confidenceLimited ? "limited" : "grounded"; const problemSpecificLimitations = []; @@ -2067,18 +2414,42 @@ function limitationReasonToUserText(code) { } function inferNarrativeDomainFromText(value) { const text = String(value ?? "").toLowerCase(); - const accountTokens = extractAccountNumbers([text]); - const hasSettlementLexicalSignal = /(оплат|долг|аванс|взаимозач|зачет|зачёт|поставщ|покупат|не\s+сход)/i.test(text); - if (accountTokens.some((token) => isSettlementAccountToken(token)) || hasSettlementLexicalSignal) { - return "settlements_60_62"; + const accountTokens = extractAccountNumbersFromNarrativeText(text); + let settlementScore = 0; + let vatScore = 0; + let monthCloseScore = 0; + if (accountTokens.some((token) => isSettlementAccountToken(token))) { + settlementScore += 3; } - if (accountTokens.some((token) => isVatAccountToken(token)) || /(ндс|счет[-\s]?фактур|регистр|книг)/i.test(text)) { + if (accountTokens.some((token) => isVatAccountToken(token))) { + vatScore += 3; + } + if (accountTokens.some((token) => isCloseCostsAccountToken(token))) { + monthCloseScore += 3; + } + if (/(долг|аванс|взаимозач|зачет|зачёт|62\.01|62\.02|60\.01|60\.02|не\s+сход)/i.test(text)) { + settlementScore += 2; + } + if (/(ндс|vat|счет[-\s]?фактур|сч[её]т[-\s]?фактур|книг[аи]|регистр)/i.test(text)) { + vatScore += 3; + } + if (/(закрыти[ея]\s+месяц|закрытие\s+счетов|регламентн|косвенн|затрат|распределени|рбп|амортиз|финансовых\s+результат|month\s*close|period\s*close|close\s+operation)/i.test(text)) { + monthCloseScore += 3; + } + const maxScore = Math.max(settlementScore, vatScore, monthCloseScore); + if (maxScore <= 0) { + return null; + } + // Tie-break prioritizes explicit VAT and month-close lexical markers over broad settlement wording. + if (vatScore === maxScore) { return "vat_document_register_book"; } - if (accountTokens.some((token) => isCloseCostsAccountToken(token)) || - /(закрыти[ея]\s+месяц|затрат|распределени|списан)/i.test(text)) { + if (monthCloseScore === maxScore) { return "month_close_costs_20_44"; } + if (settlementScore === maxScore) { + return "settlements_60_62"; + } return null; } function isIncompleteEvidence(structure) { @@ -2171,6 +2542,10 @@ function buildEvidenceSectionLines(structure) { const claimLinks = Array.isArray(structure.evidence_block.claim_evidence_links) ? structure.evidence_block.claim_evidence_links.length : 0; + const reliabilityLimited = structure.mechanism_block.status !== "grounded" || + structure.uncertainty_block.limitations.length > 0 || + structure.uncertainty_block.open_uncertainties.length > 0 || + structure.evidence_block.coverage_note === "coverage_partial_or_limited"; const lines = []; const coverageSplitLines = buildCoverageSplitLines(structure); if (evidenceCount > 0) { @@ -2186,7 +2561,7 @@ function buildEvidenceSectionLines(structure) { lines.push("Опора частичная: часть требований покрыта не полностью."); } else if (evidenceCount > 0) { - lines.push("Опора достаточна для первичного вывода."); + lines.push(reliabilityLimited ? "Опора есть, но достаточна только для предварительного вывода." : "Опора достаточна для первичного вывода."); } if (lines.length === 0) { lines.push("Использована доступная выборка документов и проводок в текущем snapshot."); @@ -2267,6 +2642,8 @@ function humanizeLimitationToken(value) { return "Не указан документ или объект для трассировки."; if (normalized === "missing_anchor:counterparty") return "Не указан контрагент или договор."; + if (normalized === "primary_domain_evidence_not_confirmed") + return "Целевой механизм активного домена подтвержден частично; вывод ограничен."; if (normalized === "settlement_primary_evidence_not_confirmed") return "Опора по расчетному контуру не подтверждена: в приоритете были сигналы из смежных доменов."; if (normalized.includes("snapshot")) @@ -2330,26 +2707,146 @@ function buildLimitationsSectionLines(structure) { } return ["Существенных ограничений в текущем срезе не выявлено."]; } -function renderPolicyReply(structure) { +function domainNameForQuestionType(domain) { + if (domain === "settlements_60_62") + return "\u0440\u0430\u0441\u0447\u0435\u0442\u043d\u043e\u0433\u043e \u043a\u043e\u043d\u0442\u0443\u0440\u0430"; + if (domain === "vat_document_register_book") + return "\u0446\u0435\u043f\u043e\u0447\u043a\u0438 \u041d\u0414\u0421"; + if (domain === "month_close_costs_20_44") + return "\u043a\u043e\u043d\u0442\u0443\u0440\u0430 \u0437\u0430\u043a\u0440\u044b\u0442\u0438\u044f \u043c\u0435\u0441\u044f\u0446\u0430"; + return "\u0432\u044b\u0431\u0440\u0430\u043d\u043d\u043e\u0433\u043e \u0443\u0447\u0430\u0441\u0442\u043a\u0430"; +} +function buildQuestionTypeShortLine(context) { + const domainName = domainNameForQuestionType(context.focusDomain); + if (context.questionType === "where_break_is") { + return `\u041f\u0440\u0438\u043e\u0440\u0438\u0442\u0435\u0442 \u043e\u0442\u0432\u0435\u0442\u0430: \u043b\u043e\u043a\u0430\u043b\u0438\u0437\u043e\u0432\u0430\u0442\u044c \u0440\u0430\u0437\u0440\u044b\u0432 \u0432\u043d\u0443\u0442\u0440\u0438 ${domainName}.`; + } + if (context.questionType === "prove_or_guess") { + return "\u041f\u0440\u0438\u043e\u0440\u0438\u0442\u0435\u0442 \u043e\u0442\u0432\u0435\u0442\u0430: \u0440\u0430\u0437\u0432\u0435\u0441\u0442\u0438 \u0434\u043e\u043a\u0430\u0437\u0430\u043d\u043e \u0438 \u0433\u0438\u043f\u043e\u0442\u0435\u0437\u0443."; + } + if (context.questionType === "what_is_it_grounded_on") { + return "\u041f\u0440\u0438\u043e\u0440\u0438\u0442\u0435\u0442 \u043e\u0442\u0432\u0435\u0442\u0430: \u043f\u043e\u043a\u0430\u0437\u0430\u0442\u044c \u043e\u0441\u043d\u043e\u0432\u0430\u043d\u0438\u0435 \u0432\u044b\u0432\u043e\u0434\u0430 \u043f\u043e \u0434\u0430\u043d\u043d\u044b\u043c."; + } + if (context.questionType === "which_chains_are_complete_vs_incomplete") { + return "\u041f\u0440\u0438\u043e\u0440\u0438\u0442\u0435\u0442 \u043e\u0442\u0432\u0435\u0442\u0430: \u0440\u0430\u0437\u0434\u0435\u043b\u0438\u0442\u044c \u0437\u0430\u0432\u0435\u0440\u0448\u0435\u043d\u043d\u044b\u0435 \u0438 \u043d\u0435\u0437\u0430\u0432\u0435\u0440\u0448\u0435\u043d\u043d\u044b\u0435 \u0446\u0435\u043f\u043e\u0447\u043a\u0438."; + } + if (context.questionType === "what_to_check_first") { + return "\u041f\u0440\u0438\u043e\u0440\u0438\u0442\u0435\u0442 \u043e\u0442\u0432\u0435\u0442\u0430: \u0434\u0430\u0442\u044c \u043f\u0435\u0440\u0432\u044b\u0439 \u043c\u0430\u0440\u0448\u0440\u0443\u0442 \u043f\u0440\u043e\u0432\u0435\u0440\u043a\u0438."; + } + return null; +} +function buildQuestionTypeBrokenLine(context) { + if (context.questionType !== "where_break_is") { + return null; + } + if (context.focusDomain === "settlements_60_62") { + return "\u0412\u0435\u0440\u043e\u044f\u0442\u043d\u044b\u0439 \u0443\u0437\u0435\u043b \u0440\u0430\u0437\u0440\u044b\u0432\u0430: \u043f\u0440\u0438\u0432\u044f\u0437\u043a\u0430 \u043e\u043f\u043b\u0430\u0442\u044b \u043a \u043e\u0431\u044a\u0435\u043a\u0442\u0443 \u0440\u0430\u0441\u0447\u0435\u0442\u043e\u0432 \u0438 \u0434\u043e\u043a\u0443\u043c\u0435\u043d\u0442\u0443 \u0437\u0430\u043a\u0440\u044b\u0442\u0438\u044f."; + } + if (context.focusDomain === "vat_document_register_book") { + return "\u0412\u0435\u0440\u043e\u044f\u0442\u043d\u044b\u0439 \u0443\u0437\u0435\u043b \u0440\u0430\u0437\u0440\u044b\u0432\u0430: \u0441\u0432\u044f\u0437\u043a\u0430 \u0438\u0441\u0445\u043e\u0434\u043d\u043e\u0433\u043e \u0434\u043e\u043a\u0443\u043c\u0435\u043d\u0442\u0430, \u0441\u0447\u0435\u0442\u0430-\u0444\u0430\u043a\u0442\u0443\u0440\u044b \u0438 \u0437\u0430\u043f\u0438\u0441\u0438 \u043a\u043d\u0438\u0433\u0438."; + } + if (context.focusDomain === "month_close_costs_20_44") { + return "\u0412\u0435\u0440\u043e\u044f\u0442\u043d\u044b\u0439 \u0443\u0437\u0435\u043b \u0440\u0430\u0437\u0440\u044b\u0432\u0430: \u043f\u0435\u0440\u0435\u0445\u043e\u0434 \u043e\u0442 \u043d\u0430\u043a\u043e\u043f\u043b\u0435\u043d\u0438\u044f \u0437\u0430\u0442\u0440\u0430\u0442 \u043a \u0440\u0430\u0441\u043f\u0440\u0435\u0434\u0435\u043b\u0435\u043d\u0438\u044e/\u0437\u0430\u043a\u0440\u044b\u0442\u0438\u044e."; + } + return "\u0412\u0435\u0440\u043e\u044f\u0442\u043d\u044b\u0439 \u0443\u0437\u0435\u043b \u0440\u0430\u0437\u0440\u044b\u0432\u0430 \u043b\u043e\u043a\u0430\u043b\u0438\u0437\u043e\u0432\u0430\u043d \u0447\u0430\u0441\u0442\u0438\u0447\u043d\u043e; \u043d\u0443\u0436\u043d\u0430 \u0442\u043e\u0447\u0435\u0447\u043d\u0430\u044f \u0441\u0432\u0435\u0440\u043a\u0430."; +} +function buildQuestionTypeWhyLine(context) { + if (context.questionType === "prove_or_guess") { + return "\u0417\u0434\u0435\u0441\u044c \u0447\u0435\u0441\u0442\u043d\u043e \u0440\u0430\u0437\u0432\u043e\u0434\u0438\u0442\u0441\u044f \u0447\u0442\u043e \u0443\u0436\u0435 \u043f\u043e\u0434\u0442\u0432\u0435\u0440\u0436\u0434\u0435\u043d\u043e \u0438 \u0447\u0442\u043e \u043f\u043e\u043a\u0430 \u043e\u0441\u0442\u0430\u0435\u0442\u0441\u044f \u0433\u0438\u043f\u043e\u0442\u0435\u0437\u043e\u0439."; + } + if (context.questionType === "which_chains_are_complete_vs_incomplete") { + return "\u0426\u0435\u043f\u043e\u0447\u043a\u0438 \u0440\u0430\u0437\u0434\u0435\u043b\u0435\u043d\u044b \u043d\u0430 \u0437\u0430\u0432\u0435\u0440\u0448\u0435\u043d\u043d\u044b\u0435 \u0438 \u043d\u0435\u0437\u0430\u0432\u0435\u0440\u0448\u0435\u043d\u043d\u044b\u0435 \u043f\u043e \u0442\u0435\u043a\u0443\u0449\u0435\u0439 \u043e\u043f\u043e\u0440\u0435."; + } + return null; +} +function buildQuestionTypeEvidenceLine(context) { + if (context.questionType === "what_is_it_grounded_on") { + return "\u0412 \u044d\u0442\u043e\u043c \u043e\u0442\u0432\u0435\u0442\u0435 \u0432 \u043f\u0440\u0438\u043e\u0440\u0438\u0442\u0435\u0442\u0435 \u043f\u043e\u043a\u0430\u0437\u0430\u043d\u044b \u0438\u043c\u0435\u043d\u043d\u043e \u043e\u0441\u043d\u043e\u0432\u0430\u043d\u0438\u044f \u0432\u044b\u0432\u043e\u0434\u0430."; + } + if (context.questionType === "prove_or_guess") { + return "\u0421\u0438\u043b\u0430 \u0432\u044b\u0432\u043e\u0434\u0430 \u043e\u0446\u0435\u043d\u0435\u043d\u0430 \u043f\u043e \u043f\u0440\u044f\u043c\u043e\u0439 \u043e\u043f\u043e\u0440\u0435, \u0430 \u043d\u0435 \u043f\u043e \u0434\u043e\u0433\u0430\u0434\u043a\u0430\u043c."; + } + return null; +} +function formatAnchorList(anchors, prefix) { + if (anchors.length === 0) { + return null; + } + return `${prefix}: ${anchors.join(", ")}.`; +} +function buildQuestionTypeCheckLine(context) { + if (context.questionType === "what_to_check_first") { + return "\u041d\u0430\u0447\u043d\u0438\u0442\u0435 \u0441 \u043f\u0435\u0440\u0432\u043e\u0433\u043e \u043f\u0443\u043d\u043a\u0442\u0430 \u0438 \u043f\u0440\u043e\u0439\u0434\u0438\u0442\u0435 \u043c\u0430\u0440\u0448\u0440\u0443\u0442 \u043f\u043e\u0441\u043b\u0435\u0434\u043e\u0432\u0430\u0442\u0435\u043b\u044c\u043d\u043e, \u0431\u0435\u0437 \u043f\u0435\u0440\u0435\u0441\u043a\u043e\u043a\u0430."; + } + return null; +} +function buildQuestionTypeLimitationLine(context) { + if (context.questionType === "prove_or_guess") { + return "\u0414\u043b\u044f \u0444\u043e\u0440\u043c\u0430\u0442\u0430 \u00ab\u0434\u043e\u043a\u0430\u0437\u0430\u043d\u043e \u0438\u043b\u0438 \u0433\u0438\u043f\u043e\u0442\u0435\u0437\u0430\u00bb \u0432\u0441\u0435 \u043d\u0435\u0434\u043e\u043a\u0430\u0437\u0430\u043d\u043d\u044b\u0435 \u0447\u0430\u0441\u0442\u0438 \u043e\u0442\u0434\u0435\u043b\u0435\u043d\u044b \u0432 \u043e\u0433\u0440\u0430\u043d\u0438\u0447\u0435\u043d\u0438\u044f."; + } + if (context.questionType === "which_chains_are_complete_vs_incomplete") { + return "\u0414\u0435\u043b\u0435\u043d\u0438\u0435 \u043d\u0430 \u00abcomplete/incomplete\u00bb \u0437\u0430\u0432\u0438\u0441\u0438\u0442 \u043e\u0442 \u043f\u043e\u043b\u043d\u043e\u0442\u044b \u0446\u0435\u043f\u043e\u0447\u043a\u0438 \u0432 \u0442\u0435\u043a\u0443\u0449\u0435\u043c \u0441\u0440\u0435\u0437\u0435."; + } + return null; +} +function applyQuestionTypeAndAnchorPolicy(input) { + const nextShort = buildQuestionTypeShortLine(input.context) ?? input.shortLine; + const nextBroken = dedupeNarrativeLines([buildQuestionTypeBrokenLine(input.context), ...input.brokenLines].filter((item) => Boolean(item)), 4); + const nextWhy = dedupeNarrativeLines([buildQuestionTypeWhyLine(input.context), ...input.whyLines].filter((item) => Boolean(item)), 4); + const anchorUsedLine = formatAnchorList(input.context.anchors.used, "\u0412 \u043e\u043f\u043e\u0440\u0435 \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u043d\u044b \u044f\u043a\u043e\u0440\u044f \u0432\u043e\u043f\u0440\u043e\u0441\u0430"); + const anchorUnusedLine = formatAnchorList(input.context.anchors.unused, "\u042f\u043a\u043e\u0440\u044f \u0438\u0437 \u0432\u043e\u043f\u0440\u043e\u0441\u0430 \u0431\u0435\u0437 \u043f\u0440\u044f\u043c\u043e\u0433\u043e \u043f\u043e\u0434\u0442\u0432\u0435\u0440\u0436\u0434\u0435\u043d\u0438\u044f"); + const nextEvidence = dedupeNarrativeLines([buildQuestionTypeEvidenceLine(input.context), ...input.evidenceLines, anchorUsedLine].filter((item) => Boolean(item)), 7); + const nextChecks = dedupeNarrativeLines([buildQuestionTypeCheckLine(input.context), ...input.checkLines].filter((item) => Boolean(item)), 5); + const nextLimitations = dedupeNarrativeLines([buildQuestionTypeLimitationLine(input.context), anchorUnusedLine, ...input.limitationLines].filter((item) => Boolean(item)), 6); + return { + shortLine: ensureSentence(nextShort), + brokenLines: nextBroken, + whyLines: nextWhy, + evidenceLines: nextEvidence, + checkLines: nextChecks, + limitationLines: nextLimitations + }; +} +function renderPolicyReply(structure, context) { const shortLine = ensureSentence(buildShortSectionLine(structure)); const brokenLines = buildBrokenSectionLines(structure); const whyLines = buildWhySectionLines(structure); const evidenceLines = buildEvidenceSectionLines(structure); const checkLines = buildChecksSectionLines(structure); const limitationLines = buildLimitationsSectionLines(structure); + const enriched = context + ? applyQuestionTypeAndAnchorPolicy({ + shortLine, + brokenLines, + whyLines, + evidenceLines, + checkLines, + limitationLines, + context + }) + : { + shortLine, + brokenLines, + whyLines, + evidenceLines, + checkLines, + limitationLines + }; return sanitizeUserFacingReply([ - `Коротко: ${shortLine}`, - `Что сломано:\n${formatList(brokenLines)}`, - `Почему это похоже на проблему:\n${formatList(whyLines)}`, - `На чем это основано:\n${formatList(evidenceLines)}`, - `Что проверить первым:\n${formatList(checkLines)}`, - `Ограничения:\n${formatList(limitationLines)}` + `Коротко: ${enriched.shortLine}`, + `Что сломано:\n${formatList(enriched.brokenLines)}`, + `Почему это похоже на проблему:\n${formatList(enriched.whyLines)}`, + `На чем это основано:\n${formatList(enriched.evidenceLines)}`, + `Что проверить первым:\n${formatList(enriched.checkLines)}`, + `Ограничения:\n${formatList(enriched.limitationLines)}` ] .filter(Boolean) .join("\n\n")); } function composeAssistantAnswerV11(input) { const fallbackType = fallbackFromSummary(input.routeSummary); + const questionType = input.questionTypeHint ?? "unknown"; + const anchorUsage = evaluateCompanyAnchorUsage(input.companyAnchors, input.retrievalResults); const okResults = input.retrievalResults.filter((item) => item.status === "ok"); const partialResults = input.retrievalResults.filter((item) => item.status === "partial"); const emptyResults = input.retrievalResults.filter((item) => item.status === "empty"); @@ -2368,15 +2865,8 @@ function composeAssistantAnswerV11(input) { const problemUnitSummary = selectProblemUnitSummary(input.retrievalResults); const problemHeavyUnits = problemUnits.filter((item) => PROBLEM_HEAVY_TYPES.has(item.problem_unit_type)); const focusNarrativeDomain = inferP0FocusNarrativeDomain(input.userMessage, input.retrievalResults, problemHeavyUnits, input.focusDomainHint); - const settlementGrounding = focusNarrativeDomain === "settlements_60_62" - ? evaluateSettlementEvidenceGrounding(input.retrievalResults) - : { - has_settlement_primary: false, - has_foreign_primary: false, - foreign_primary_domains: [], - blocked: false - }; - const settlementGroundingBlocked = focusNarrativeDomain === "settlements_60_62" && settlementGrounding.blocked; + const focusDomainGrounding = evaluateP0DomainEvidenceGrounding(input.retrievalResults, focusNarrativeDomain); + const focusDomainGroundingBlocked = Boolean(focusNarrativeDomain && focusDomainGrounding.blocked); const rankedProblemUnits = rankProblemUnitsForAnswer(problemHeavyUnits, lifecycleAnswerEnabled, focusNarrativeDomain); const domainAlignedProblemUnits = focusNarrativeDomain === null ? rankedProblemUnits @@ -2384,7 +2874,7 @@ function composeAssistantAnswerV11(input) { const domainLockMissBase = Boolean(focusNarrativeDomain && rankedProblemUnits.length > 0 && domainAlignedProblemUnits.length === 0); - const domainLockMiss = domainLockMissBase || settlementGroundingBlocked; + const domainLockMiss = domainLockMissBase || focusDomainGroundingBlocked; const selectedProblemUnits = (focusNarrativeDomain === null ? rankedProblemUnits : domainAlignedProblemUnits).slice(0, 4); const claimEvidenceLinks = buildClaimEvidenceLinks(input.retrievalResults); const aggregateEvidenceConfidence = aggregateConfidence(input.retrievalResults, evidenceItems); @@ -2422,7 +2912,7 @@ function composeAssistantAnswerV11(input) { focusedStrong, policySignals }); - const guardedDecision = settlementGroundingBlocked && + const guardedDecision = focusDomainGroundingBlocked && decision.mode !== "out_of_scope" && decision.mode !== "route_mismatch" && decision.mode !== "backend_error" @@ -2437,7 +2927,9 @@ function composeAssistantAnswerV11(input) { policySignals.minimum_evidence_failed || limitationReasonCodes.includes("missing_mechanism") || limitationReasonCodes.includes("weak_source_mapping") || + limitationReasonCodes.includes("insufficient_detail") || aggregateEvidenceConfidence === "low" || + domainLockMiss || lowConfidenceConcentration; const hardBlockedMode = guardedDecision.mode === "out_of_scope" || guardedDecision.mode === "route_mismatch" || @@ -2468,7 +2960,11 @@ function composeAssistantAnswerV11(input) { }); const lifecycleModeActive = lifecycleAnswerEnabled && selectedProblemUnits.length > 0 && hasLifecycleResolution(selectedProblemUnits); return { - assistant_reply: renderPolicyReply(problemCentricStructure), + assistant_reply: renderPolicyReply(problemCentricStructure, { + questionType, + focusDomain: focusNarrativeDomain, + anchors: anchorUsage + }), fallback_type: guardedDecision.fallback_type, reply_type: guardedDecision.reply_type, answer_structure_v11: problemCentricStructure, @@ -2495,9 +2991,12 @@ function composeAssistantAnswerV11(input) { ...limitationReasonCodes.map((code) => limitationReasonToText(code)), ...extractLimitations(input.retrievalResults), ...input.groundingCheck.reasons, - ...(settlementGroundingBlocked + ...(focusDomainGroundingBlocked + ? ["Целевой механизм активного домена подтвержден частично; часть первичной опоры пришла из смежного контура."] + : []), + ...(anchorUsage.unused.length > 0 ? [ - `Primary settlement evidence is not confirmed; foreign domains dominate: ${settlementGrounding.foreign_primary_domains.join(", ") || "unknown"}.` + `Часть якорей запроса пока не подтверждена в опоре: ${anchorUsage.unused.slice(0, 5).join(", ")}.` ] : []), ...(policySignals.minimum_evidence_failed ? ["Minimum evidence gate failed for current scope."] : []), @@ -2511,11 +3010,18 @@ function composeAssistantAnswerV11(input) { ...(guardedDecision.mode === "clarification_required" && missingAnchors.account ? ["missing_anchor:account"] : []), ...(guardedDecision.mode === "clarification_required" && missingAnchors.documentOrObject ? ["missing_anchor:document_or_object"] : []), ...(guardedDecision.mode === "clarification_required" && missingAnchors.counterparty ? ["missing_anchor:counterparty"] : []), - ...(settlementGroundingBlocked ? ["settlement_primary_evidence_not_confirmed"] : []) + ...(focusDomainGroundingBlocked ? ["primary_domain_evidence_not_confirmed"] : []) ], 8); + const confidenceLimited = guardedDecision.mode !== "focused_grounded" || + limitationReasonCodes.includes("missing_mechanism") || + limitationReasonCodes.includes("heuristic_inference") || + limitationReasonCodes.includes("weak_source_mapping") || + limitationReasonCodes.includes("insufficient_detail") || + aggregateEvidenceConfidence === "low" || + focusDomainGroundingBlocked; const mechanismStatus = mechanismNotes.length === 0 ? "unresolved" - : limitationReasonCodes.includes("missing_mechanism") || limitationReasonCodes.includes("heuristic_inference") + : confidenceLimited ? "limited" : "grounded"; const answerStructure = { @@ -2524,7 +3030,8 @@ function composeAssistantAnswerV11(input) { direct_answer: buildDirectAnswer({ mode: guardedDecision.mode, retrievalResults: input.retrievalResults, - policySignals + policySignals, + focusDomain: focusNarrativeDomain }), mechanism_block: { status: mechanismStatus, @@ -2557,7 +3064,11 @@ function composeAssistantAnswerV11(input) { } }; return { - assistant_reply: renderPolicyReply(answerStructure), + assistant_reply: renderPolicyReply(answerStructure, { + questionType, + focusDomain: focusNarrativeDomain, + anchors: anchorUsage + }), fallback_type: guardedDecision.fallback_type, reply_type: guardedDecision.reply_type, answer_structure_v11: answerStructure, diff --git a/llm_normalizer/backend/dist/services/assistantDataLayer.js b/llm_normalizer/backend/dist/services/assistantDataLayer.js index 93a4e20..91929c7 100644 --- a/llm_normalizer/backend/dist/services/assistantDataLayer.js +++ b/llm_normalizer/backend/dist/services/assistantDataLayer.js @@ -30,6 +30,9 @@ const ACCOUNT_SPECIFIC_MARKERS = /(?:\u0441\u0447\u0435\u0442(?:\u0430|\u0443|\u const PERIOD_MARKERS = /\b20\d{2}(?:[-./](?:0[1-9]|1[0-2]))?\b/; const ENTITY_SPECIFIC_MARKERS = /(?:\u043a\u043e\u043d\u0442\u0440\u0430\u0433\u0435\u043d\u0442|supplier|buyer|\u0434\u043e\u043a\u0443\u043c\u0435\u043d\u0442|invoice|posting|register|guid|id[:=\s])/iu; const EXACT_OBJECT_MARKERS = /(?:\u0434\u043e\u043a\u0443\u043c\u0435\u043d\u0442\s*(?:#|\u2116)|\bref\b|\bid\b|trx-\d+|inv-\d+)/iu; +const CONTRACT_MARKERS = /(?:\u0434\u043e\u0433\u043e\u0432\u043e\u0440(?:\u0430|\u0443|\u043e\u043c|\u0435)?\s*(?:№|#|n)\s*[a-z\u0430-\u044f0-9./_-]+)/iu; +const DOCUMENT_NUMBER_MARKERS = /(?:(?:\u0441\u0447(?:\u0435|\u0451)\u0442(?:-\u0444\u0430\u043a\u0442\u0443\u0440(?:\u0430|\u044b))?|\u0440\u0435\u0430\u043b\u0438\u0437\u0430\u0446(?:\u0438\u044f|\u0438\u0438)|\u0430\u043a\u0442)\s*(?:№|#|n)\s*[a-z\u0430-\u044f0-9./_-]+)/iu; +const AMOUNT_MARKERS = /\b(?:\d{1,3}(?:[ \u00A0]\d{3})+(?:[.,]\d{2})?|\d+[.,]\d{2})\b/u; const ROUTE_MIN_EVIDENCE_GATE = { hybrid_store_plus_live: { min_evidence_items: 3, @@ -101,6 +104,9 @@ function detectBroadQuery(fragmentText, route) { const hasEntityAnchor = ENTITY_SPECIFIC_MARKERS.test(lower); const hasExactObjectAnchor = EXACT_OBJECT_MARKERS.test(lower); const hasGuidAnchor = extractGuids(lower).length > 0; + const hasContractAnchor = CONTRACT_MARKERS.test(lower); + const hasDocumentNumberAnchor = DOCUMENT_NUMBER_MARKERS.test(lower); + const hasAmountAnchor = AMOUNT_MARKERS.test(lower); let anchorScore = 0; if (hasGuidAnchor) anchorScore += 3; @@ -112,8 +118,17 @@ function detectBroadQuery(fragmentText, route) { anchorScore += 1; if (hasExactObjectAnchor) anchorScore += 1; + if (hasContractAnchor) + anchorScore += 2; + if (hasDocumentNumberAnchor) + anchorScore += 2; + if (hasAmountAnchor) + anchorScore += 1; const weakAnchors = anchorScore <= 1; - const strongFocus = hasGuidAnchor || (hasAccountAnchor && hasPeriodAnchor) || anchorScore >= 4; + const strongFocus = hasGuidAnchor || + (hasAccountAnchor && hasPeriodAnchor) || + (hasContractAnchor && hasDocumentNumberAnchor) || + anchorScore >= 4; const routeSensitiveBroad = route === "batch_refresh_then_store" || route === "hybrid_store_plus_live"; let broadnessLevel = "low"; if (hasGenericMarker && !strongFocus && (weakAnchors || routeSensitiveBroad)) { @@ -233,9 +248,7 @@ const P0_DOMAIN_CARDS = [ /\u0441\u0447[её]т.?фактур/i, /\u043a\u043d\u0438\u0433[аи]\s+\u043f\u043e\u043a\u0443\u043f/i, /\u043a\u043d\u0438\u0433[аи]\s+\u043f\u0440\u043e\u0434\u0430\u0436/i, - /\u0432\u044b\u0447\u0435\u0442/i, - /\b19\b/, - /\b68\b/ + /\u0432\u044b\u0447\u0435\u0442/i ] }, { @@ -251,19 +264,20 @@ const P0_DOMAIN_CARDS = [ expected_edges: ["document_to_posting", "deferred_expense_to_writeoff", "contract_to_documents"], forbidden_cross_domain_leakage: ["vat", "taxes", "bank", "settlements", "suppliers", "customers", "fixed_assets"], symptom_markers: [ - /\b20\b/, - /\b21\b/, - /\b23\b/, - /\b25\b/, - /\b26\b/, - /\b28\b/, - /\b29\b/, - /\b44\b/, /period\s*close/i, - /\u0437\u0430\u043a\u0440\u044b\u0442/i, + /month\s*close/i, + /close\s+period/i, + /закрыт[а-яё]*\s+период/i, + /close\s+operation/i, + /allocation/i, + /закр/i, + /перио/i, + /\u0437\u0430\u043a\u0440\u044b\u0442(?:\u0438|\u0438\u0435|\u044b|)\s*(?:\u043c\u0435\u0441\u044f\u0446|\u0441\u0447\u0435\u0442)/i, + /\u0440\u0435\u0433\u043b\u0430\u043c\u0435\u043d\u0442/i, /\u0437\u0430\u0442\u0440\u0430\u0442/i, /\u0440\u0430\u0441\u043f\u0440\u0435\u0434\u0435\u043b/i, - /\u043e\u0441\u0442\u0430\u0442\u043a/i + /\u0440\u0431\u043f/i, + /\u0430\u043c\u043e\u0440\u0442\u0438\u0437/i ] } ]; @@ -883,6 +897,26 @@ function extractAccountScopeFromText(text) { pushAccount(account); } } + const closePairPattern = /\b(?:20|21|23|25|26|28|29|44)\s*[-/]\s*(?:20|21|23|25|26|28|29|44)\b/g; + let closePairMatch = null; + while ((closePairMatch = closePairPattern.exec(lower)) !== null) { + const pair = closePairMatch[0]; + const pairAccounts = pair.match(/\b\d{2}(?:\.\d{1,2})?\b/g) ?? []; + for (const account of pairAccounts) { + pushAccount(account); + } + } + const suffixAnchorPattern = /\b(?:51|60|62|68|76|97)(?:\.\d{1,2})?(?:-(?:му|й|го|м|х))?\b/giu; + let suffixAnchorMatch = null; + while ((suffixAnchorMatch = suffixAnchorPattern.exec(lower)) !== null) { + const token = suffixAnchorMatch[0]; + const start = suffixAnchorMatch.index; + const end = start + token.length; + if (intersectsSpan(start, end, dateSpans)) { + continue; + } + pushAccount(token); + } const explicitPattern = /\b\d{2}(?:\.\d{1,2})?\b/g; let explicitMatch = null; const settlementLexicalAnchor = /(оплат|расчет|расч[её]т|аванс|долг|постав|покуп|settlement|payment|supplier|customer)/i.test(lower); @@ -1037,31 +1071,41 @@ function buildSemanticRetrievalProfile(fragmentText) { pushMany(entityTypes, ["counterparty", "contract", "document", "posting"]); pushMany(relationPatterns, ["payment_to_settlement", "statement_to_document", "document_to_posting"]); } - if (/постав|постав|supplier|vendor|60\b/i.test(lower)) { + const hasSettlementAccountScope = accountScope.some((item) => item === "51" || item === "60" || item === "62" || item === "76"); + const hasVatAccountScope = accountScope.some((item) => item === "19" || item === "68"); + const hasFixedAssetAccountScope = accountScope.some((item) => item === "01" || item === "02" || item === "08"); + const hasDeferredExpenseAccountScope = accountScope.some((item) => item === "97"); + const hasMonthCloseCostsAccountScope = accountScope.some((item) => CLOSE_COST_ACCOUNTS.includes(item)); + const hasExplicitMonthCloseLexicalMarker = /(?:закрыти[ея]\s+месяц|закрыт[а-яё]*\s+период|закрытие\s+счетов|регламентн|косвенн|затрат|распределени|рбп|амортиз|финансовых\s+результат|month\s*close|period\s*close|close\s+period|close\s+operation)/i.test(lower) || + (/закр/i.test(lower) && /перио/i.test(lower)); + if (/постав|постав|supplier|vendor/i.test(lower) || hasSettlementAccountScope) { pushMany(domainScope, ["suppliers", "settlements"]); pushMany(documentTypes, ["supplier_receipt", "settlement_document"]); pushMany(entityTypes, ["counterparty", "contract", "document", "posting"]); pushMany(relationPatterns, ["payment_to_settlement", "contract_to_documents"]); } - if (/покупат|покупат|customer|buyer|62\b/i.test(lower)) { + if (/покупат|покупат|customer|buyer/i.test(lower) || hasSettlementAccountScope) { pushMany(domainScope, ["customers", "settlements"]); pushMany(documentTypes, ["sales_document", "settlement_document"]); pushMany(entityTypes, ["counterparty", "contract", "document", "posting"]); pushMany(relationPatterns, ["payment_to_settlement", "contract_to_documents"]); } - if (/РЅРґСЃ|ндс|vat|РєРЅРёРіР° РїРѕРєСѓРїРѕРє|РєРЅРёРіР° продаж|счет.?фактур|книг[аи]\s+покуп|книг[аи]\s+продаж|сч[её]т.?фактур|19\b|68\b/i.test(lower)) { + if (/РЅРґСЃ|ндс|vat|РєРЅРёРіР° РїРѕРєСѓРїРѕРє|РєРЅРёРіР° продаж|счет.?фактур|книг[аи]\s+покуп|книг[аи]\s+продаж|сч[её]т.?фактур/i.test(lower) || + hasVatAccountScope) { pushMany(domainScope, ["vat", "taxes"]); pushMany(documentTypes, ["invoice", "vat_document"]); pushMany(entityTypes, ["document", "tax_entry", "posting"]); pushMany(relationPatterns, ["invoice_to_vat", "document_to_posting"]); } - if (/РѕСЃ|РѕСЃРЅРѕРІРЅ(ые|ых)\s+сред|(?:^|[^a-zа-яё])ос(?:$|[^a-zа-яё])|основн(ые|ых|ым)?\s+средств|fixed asset|amort|амортиз|амортиз|01\b|02\b|08\b/i.test(lower)) { + if (/РѕСЃ|РѕСЃРЅРѕРІРЅ(ые|ых)\s+сред|(?:^|[^a-zа-яё])ос(?:$|[^a-zа-яё])|основн(ые|ых|ым)?\s+средств|fixed asset|amort|амортиз|амортиз/i.test(lower) || + hasFixedAssetAccountScope) { pushMany(domainScope, ["fixed_assets"]); pushMany(documentTypes, ["fixed_asset_card", "fixed_asset_acceptance", "depreciation_document"]); pushMany(entityTypes, ["fixed_asset", "document", "posting"]); pushMany(relationPatterns, ["asset_card_to_depreciation", "document_to_posting"]); } - if (/СЂР±Рї|расходы будущих периодов|рбп|расходы\s+будущих\s+периодов|deferred|writeoff|97\b/i.test(lower)) { + if (/СЂР±Рї|расходы будущих периодов|рбп|расходы\s+будущих\s+периодов|deferred|writeoff/i.test(lower) || + hasDeferredExpenseAccountScope) { pushMany(domainScope, ["deferred_expense", "period_close"]); pushMany(documentTypes, ["deferred_expense_document", "period_close_document"]); pushMany(entityTypes, ["document", "posting"]); @@ -1084,7 +1128,7 @@ function buildSemanticRetrievalProfile(fragmentText) { pushMany(anomalyPatterns, ["repeated_anomaly"]); pushMany(rankingBasis, ["repeatability"]); } - if (/закрыт|закрытие|период|закрыт|закрытие|период|month close|period close|closure/i.test(lower)) { + if (hasExplicitMonthCloseLexicalMarker || hasMonthCloseCostsAccountScope || hasDeferredExpenseAccountScope) { pushMany(domainScope, ["period_close"]); pushMany(anomalyPatterns, ["closure_risk", "broken_lifecycle"]); pushMany(documentTypes, ["period_close_document"]); diff --git a/llm_normalizer/backend/dist/services/assistantService.js b/llm_normalizer/backend/dist/services/assistantService.js index 1f1a2b4..293ce3a 100644 --- a/llm_normalizer/backend/dist/services/assistantService.js +++ b/llm_normalizer/backend/dist/services/assistantService.js @@ -46,6 +46,8 @@ const assistantDataLayer_1 = __importStar(require("./assistantDataLayer")); const assistantSessionLogger_1 = __importStar(require("./assistantSessionLogger")); const investigationState_1 = __importStar(require("./investigationState")); const retrievalResultNormalizer_1 = __importStar(require("./retrievalResultNormalizer")); +const questionTypeResolver_1 = __importStar(require("./questionTypeResolver")); +const companyAnchorResolver_1 = __importStar(require("./companyAnchorResolver")); function retrievalSummaryForRoute(route) { if (route === "store_canonical") return "Canonical accounting data path selected."; @@ -870,6 +872,26 @@ function extractFollowupAccountAnchorsLoose(text) { } return Array.from(new Set(anchors)); } +function inferP0DomainFromMessage(text) { + const lower = String(text ?? "").toLowerCase(); + const accountTokens = extractAccountTokens(lower); + const hasVatAccount = accountTokens.some((token) => /^(?:19|68)(?:\.|$)/.test(token)); + const hasSettlementAccount = accountTokens.some((token) => /^(?:51|60|62|76)(?:\.|$)/.test(token)); + const hasMonthCloseAccount = accountTokens.some((token) => /^(?:97|2\d|3\d|4[0-4])(?:\.|$)/.test(token)); + const vatLexical = /(?:ндс|vat|счет[\s-]?фактур|сч[её]т[\s-]?фактур|книг[аи]\s+(?:покуп|продаж)|налогов)/i.test(lower); + const settlementLexical = /(?:долг|аванс|зач[её]т|взаимозач|расч[её]т|оплат|платеж|платёж|постав|покупател)/i.test(lower); + const monthCloseLexical = /(?:закрыти[ея]\s+месяц|закрытие счетов|регламентн|косвенн|затрат|распределени|рбп|амортиз|финансовых результат)/i.test(lower); + if (hasVatAccount || vatLexical) { + return "vat_document_register_book"; + } + if (monthCloseLexical || hasMonthCloseAccount) { + return "month_close_costs_20_44"; + } + if (hasSettlementAccount || settlementLexical) { + return "settlements_60_62"; + } + return null; +} function hasStrongFollowupAnchors(userMessage, state) { const explicitPeriod = extractNormalizedPeriodLiteral(userMessage); if (explicitPeriod && state.focus.period && explicitPeriod !== state.focus.period) { @@ -878,6 +900,14 @@ function hasStrongFollowupAnchors(userMessage, state) { return true; } } + const inferredDomain = inferP0DomainFromMessage(userMessage); + const activeDomain = compactWhitespace(state.followup_context?.active_domain ?? state.focus.domain ?? ""); + if (inferredDomain && activeDomain && inferredDomain !== activeDomain) { + const domainLooksLikeFollowupRefinement = hasFollowupMarker(userMessage) && hasReferentialPointer(userMessage); + if (!domainLooksLikeFollowupRefinement) { + return true; + } + } const explicitAccounts = extractAccountTokens(userMessage); const followupAccounts = explicitAccounts.length > 0 ? explicitAccounts : extractFollowupAccountAnchorsLoose(userMessage); if (followupAccounts.length > 0) { @@ -1193,6 +1223,8 @@ class AssistantService { const focusDomainHint = followupBinding.usage?.applied ? session.investigation_state?.followup_context?.active_domain ?? session.investigation_state?.focus.domain ?? null : null; + const questionTypeClass = (0, questionTypeResolver_1.resolveQuestionType)(userMessage); + const companyAnchors = (0, companyAnchorResolver_1.resolveCompanyAnchors)(userMessage); const composition = (0, answerComposer_1.composeAssistantAnswer)({ userMessage, routeSummary: normalized.route_hint_summary, @@ -1201,6 +1233,8 @@ class AssistantService { coverageReport: coverageEvaluation.coverage, groundingCheck, focusDomainHint, + questionTypeHint: questionTypeClass, + companyAnchors, enableAnswerPolicyV11: config_1.FEATURE_ASSISTANT_ANSWER_POLICY_V11, enableProblemCentricAnswerV1: config_1.FEATURE_ASSISTANT_PROBLEM_CENTRIC_ANSWER_V1, enableLifecycleAnswerV1: config_1.FEATURE_ASSISTANT_LIFECYCLE_ANSWER_V1 @@ -1251,6 +1285,8 @@ class AssistantService { retrieval_results: retrievalResults, answer_grounding_check: groundingCheck, dropped_intent_segments: extractDiscardedIntentSegments(normalized.normalized), + question_type_class: questionTypeClass, + company_anchors: companyAnchors, ...(followupBinding.usage ? { followup_state_usage: followupBinding.usage } : {}), problem_centric_answer_applied: composition.problem_centric_answer_applied ?? false, problem_units_used_count: composition.problem_units_used_count ?? 0, @@ -1314,6 +1350,8 @@ class AssistantService { route_subject_match: groundingCheck.route_subject_match, clarification_target: coverageEvaluation.coverage.clarification_needed_for, dropped_intent_segments: extractDiscardedIntentSegments(normalized.normalized), + question_type_class: questionTypeClass, + company_anchors: companyAnchors, ...(followupBinding.usage ? { followup_state_usage: followupBinding.usage } : {}), problem_centric_answer_applied: composition.problem_centric_answer_applied ?? false, problem_units_used_count: composition.problem_units_used_count ?? 0, diff --git a/llm_normalizer/backend/dist/services/companyAnchorResolver.js b/llm_normalizer/backend/dist/services/companyAnchorResolver.js new file mode 100644 index 0000000..12fbf49 --- /dev/null +++ b/llm_normalizer/backend/dist/services/companyAnchorResolver.js @@ -0,0 +1,151 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.resolveCompanyAnchors = resolveCompanyAnchors; +const CONTRACT_PATTERN = /(?:\u0434\u043e\u0433\u043e\u0432\u043e\u0440(?:\u0430|\u0443|ом|е)?\s*(?:№|#|n)?\s*([a-zа-я0-9./_-]+))/giu; +const DOCUMENT_NUMBER_PATTERN = /(?:(?:\u0441\u0447(?:\u0435|\u0451)\u0442(?:-\u0444\u0430\u043a\u0442\u0443\u0440(?:а|ы))?|\u0440\u0435\u0430\u043b\u0438\u0437\u0430\u0446(?:ия|ии)|\u0430\u043a\u0442)\s*(?:№|#|n)\s*([a-zа-я0-9./_-]+))/giu; +const DATE_PATTERN = /\b(?:\d{1,2}[./]\d{1,2}[./]\d{2,4}|\d{1,2}\s+(?:\u044f\u043d\u0432\u0430\u0440\u044f|\u0444\u0435\u0432\u0440\u0430\u043b\u044f|\u043c\u0430\u0440\u0442\u0430|\u0430\u043f\u0440\u0435\u043b\u044f|\u043c\u0430\u044f|\u0438\u044e\u043d\u044f|\u0438\u044e\u043b\u044f|\u0430\u0432\u0433\u0443\u0441\u0442\u0430|\u0441\u0435\u043d\u0442\u044f\u0431\u0440\u044f|\u043e\u043a\u0442\u044f\u0431\u0440\u044f|\u043d\u043e\u044f\u0431\u0440\u044f|\u0434\u0435\u043a\u0430\u0431\u0440\u044f))\b/giu; +const AMOUNT_PATTERN = /\b(?:\d{1,3}(?:[ \u00A0]\d{3})+(?:[.,]\d{2})?|\d+[.,]\d{2})\b/gu; +const CONTEXTUAL_ACCOUNT_PATTERN = /(?:\b(?:\u0441\u0447(?:\u0435|\u0451)\u0442(?:а|у|ом|ов)?|account|schet)\b\s*(?:№|#|:)?\s*)(\d{2}(?:\.\d{2})?)/giu; +const ACCOUNT_PAIR_PATTERN = /\b(\d{2}\.\d{2})\s*\/\s*(\d{2}\.\d{2})\b/gu; +const PERIOD_PATTERN = /\b(?:20\d{2}(?:[-./](?:0?[1-9]|1[0-2]))?|(?:\u0438\u044e\u043b\u044c|\u0438\u044e\u043d\u044c|\u0430\u0432\u0433\u0443\u0441\u0442|\u0441\u0435\u043d\u0442\u044f\u0431\u0440\u044c|\u043e\u043a\u0442\u044f\u0431\u0440\u044c|\u043d\u043e\u044f\u0431\u0440\u044c|\u0434\u0435\u043a\u0430\u0431\u0440\u044c|\u044f\u043d\u0432\u0430\u0440\u044c|\u0444\u0435\u0432\u0440\u0430\u043b\u044c|\u043c\u0430\u0440\u0442|\u0430\u043f\u0440\u0435\u043b\u044c|\u043c\u0430\u0439)\s+20\d{2})\b/giu; +const DOCUMENT_TYPE_PATTERNS = [ + { name: "invoice", pattern: /\b(?:\u0441\u0447(?:\u0435|\u0451)\u0442-\u0444\u0430\u043a\u0442\u0443\u0440|invoice)\b/iu }, + { name: "realization", pattern: /\b(?:\u0440\u0435\u0430\u043b\u0438\u0437\u0430\u0446|realization)\b/iu }, + { name: "payment", pattern: /\b(?:\u043e\u043f\u043b\u0430\u0442|payment|\u043f\u043b\u0430\u0442\u0435\u0436)\b/iu }, + { name: "receipt", pattern: /\b(?:\u043f\u043e\u0441\u0442\u0443\u043f\u043b\u0435\u043d|receipt)\b/iu }, + { name: "close", pattern: /\b(?:\u0437\u0430\u043a\u0440\u044b\u0442\u0438|\u0440\u0435\u0433\u043b\u0430\u043c\u0435\u043d\u0442)\b/iu }, + { name: "rbp_writeoff", pattern: /\b(?:\u0440\u0431\u043f|\u0441\u043f\u0438\u0441\u0430\u043d\u0438\u0435)\b/iu }, + { name: "amortization", pattern: /\b(?:\u0430\u043c\u043e\u0440\u0442\u0438\u0437|amortization)\b/iu } +]; +const KNOWN_ACCOUNT_PREFIXES = new Set([ + "01", + "02", + "07", + "08", + "10", + "13", + "19", + "20", + "21", + "23", + "25", + "26", + "41", + "43", + "44", + "45", + "50", + "51", + "52", + "55", + "57", + "58", + "60", + "62", + "66", + "67", + "68", + "69", + "70", + "71", + "73", + "76", + "90", + "91", + "94", + "96", + "97" +]); +function uniqueStrings(values, limit = 48) { + return Array.from(new Set(values.map((item) => String(item ?? "").trim()).filter(Boolean))).slice(0, limit); +} +function normalizeAnchorToken(value) { + return String(value ?? "") + .replace(/\s+/g, " ") + .trim(); +} +function collectMatches(text, pattern, useCaptures = true) { + const values = []; + pattern.lastIndex = 0; + for (const match of text.matchAll(pattern)) { + if (!match) + continue; + if (useCaptures && match.length > 1) { + for (let i = 1; i < match.length; i += 1) { + const token = normalizeAnchorToken(match[i] ?? ""); + if (token) + values.push(token); + } + continue; + } + const token = normalizeAnchorToken(match[0] ?? ""); + if (token) + values.push(token); + } + return uniqueStrings(values); +} +function isKnownAccount(value) { + const token = String(value ?? "").trim(); + const match = token.match(/^(\d{2})/); + if (!match) { + return false; + } + return KNOWN_ACCOUNT_PREFIXES.has(match[1]); +} +function collectAccountAnchors(text) { + const tokens = new Set(); + for (const token of collectMatches(text, CONTEXTUAL_ACCOUNT_PATTERN, true)) { + if (isKnownAccount(token)) { + tokens.add(token); + } + } + ACCOUNT_PAIR_PATTERN.lastIndex = 0; + for (const match of text.matchAll(ACCOUNT_PAIR_PATTERN)) { + const left = normalizeAnchorToken(match[1] ?? ""); + const right = normalizeAnchorToken(match[2] ?? ""); + if (left && isKnownAccount(left)) { + tokens.add(left); + } + if (right && isKnownAccount(right)) { + tokens.add(right); + } + } + return Array.from(tokens).slice(0, 24); +} +function collectDocumentTypeAnchors(text) { + return uniqueStrings(DOCUMENT_TYPE_PATTERNS.filter((entry) => entry.pattern.test(text)).map((entry) => entry.name), 12); +} +function flattenAnchors(input) { + return uniqueStrings([ + ...input.contract_numbers, + ...input.document_numbers, + ...input.dates, + ...input.amounts, + ...input.accounts.map((item) => `account:${item}`), + ...input.periods.map((item) => `period:${item}`), + ...input.document_types.map((item) => `doc_type:${item}`) + ], 64); +} +function resolveCompanyAnchors(input) { + const text = String(input ?? ""); + const contractNumbers = collectMatches(text, CONTRACT_PATTERN, true).map((item) => `\u0434\u043e\u0433\u043e\u0432\u043e\u0440 № ${item}`); + const documentNumbers = collectMatches(text, DOCUMENT_NUMBER_PATTERN, true).map((item) => `\u0434\u043e\u043a\u0443\u043c\u0435\u043d\u0442 № ${item}`); + const dates = collectMatches(text, DATE_PATTERN, false); + const amounts = collectMatches(text, AMOUNT_PATTERN, false); + const accounts = collectAccountAnchors(text); + const periods = collectMatches(text, PERIOD_PATTERN, false); + const documentTypes = collectDocumentTypeAnchors(text); + const resultBase = { + contract_numbers: uniqueStrings(contractNumbers, 12), + document_numbers: uniqueStrings(documentNumbers, 16), + dates: uniqueStrings(dates, 16), + amounts: uniqueStrings(amounts, 16), + accounts: uniqueStrings(accounts, 24), + periods: uniqueStrings(periods, 12), + document_types: documentTypes + }; + return { + ...resultBase, + all: flattenAnchors(resultBase) + }; +} diff --git a/llm_normalizer/backend/dist/services/lifecycleRuntime.js b/llm_normalizer/backend/dist/services/lifecycleRuntime.js index 4547eed..360f80d 100644 --- a/llm_normalizer/backend/dist/services/lifecycleRuntime.js +++ b/llm_normalizer/backend/dist/services/lifecycleRuntime.js @@ -595,8 +595,13 @@ function inferLifecycleDomain(input) { ] .join(" ") .toLowerCase(); + const hasExplicitVatHint = includesAny(unitTokens, [/domain_hint:vat_flow/]); + const hasExplicitDeferredHint = includesAny(unitTokens, [/domain_hint:deferred_expense/]); + const hasExplicitFixedAssetHint = includesAny(unitTokens, [/domain_hint:fixed_asset/]); + const hasExplicitPeriodCloseHint = includesAny(unitTokens, [/domain_hint:period_close/]); + const hasCustomerSettlementHint = includesAny(unitTokens, [/domain_hint:customer_settlement/]); + const hasBankSettlementHint = includesAny(unitTokens, [/domain_hint:bank_settlement/]); const hasVatMarkers = includesAny(unitTokens, [ - /domain_hint:vat_flow/, /\binvoice_to_vat\b/, /\bvat_chain_conflict\b/, /(^|[^a-z0-9])nds([^a-z0-9]|$)/, @@ -605,7 +610,6 @@ function inferLifecycleDomain(input) { /\baccount[_:\s-]?(19|68)\b/ ]); const hasDeferredMarkers = includesAny(unitTokens, [ - /domain_hint:deferred_expense/, /\bdeferred(?:_expense)?\b/, /\bdeferred_expense_to_writeoff\b/, /\bwriteoff\b/, @@ -614,7 +618,6 @@ function inferLifecycleDomain(input) { /\baccount[_:\s-]?97\b/ ]); const hasFixedAssetMarkers = includesAny(unitTokens, [ - /domain_hint:fixed_asset/, /\bfixed[_\s-]?asset(?:s)?\b/, /\basset_card_to_depreciation\b/, /\bdepreciation(?:_active)?\b/, @@ -623,7 +626,6 @@ function inferLifecycleDomain(input) { /\baccount[_:\s-]?(01|02|08)\b/ ]); const hasPeriodCloseMarkers = includesAny(unitTokens, [ - /domain_hint:period_close/, /\bperiod[_\s-]?close\b/, /\bperiod_close_risk\b/, /\bclose[_\s-]?risk\b/, @@ -632,6 +634,24 @@ function inferLifecycleDomain(input) { /\bmonth[_\s-]?close\b/, /\bperiod_risk\b/ ]); + if (hasExplicitDeferredHint) { + return "deferred_expense"; + } + if (hasExplicitFixedAssetHint) { + return "fixed_asset"; + } + if (hasExplicitVatHint) { + return "vat_flow"; + } + if (hasExplicitPeriodCloseHint) { + return "period_close"; + } + if (hasCustomerSettlementHint) { + return "customer_settlement"; + } + if (hasBankSettlementHint) { + return "bank_settlement"; + } if (hasDeferredMarkers) { return "deferred_expense"; } diff --git a/llm_normalizer/backend/dist/services/problemUnitAssembler.js b/llm_normalizer/backend/dist/services/problemUnitAssembler.js index 7481bc1..2c3715a 100644 --- a/llm_normalizer/backend/dist/services/problemUnitAssembler.js +++ b/llm_normalizer/backend/dist/services/problemUnitAssembler.js @@ -67,11 +67,59 @@ function stringArrayFromUnknown(value) { function stringArrayFromPayload(item, key) { return stringArrayFromUnknown(item.payload[key]); } +function domainHintsFromSummary(summary) { + const hints = []; + const purityGuard = toObject(summary.domain_purity_guard); + const domainCardId = String(purityGuard?.domain_card_id ?? "").trim(); + if (domainCardId === "settlements_60_62") { + return ["bank_settlement", "customer_settlement"]; + } + if (domainCardId === "vat_document_register_book") { + return ["vat_flow"]; + } + if (domainCardId === "month_close_costs_20_44") { + return ["period_close"]; + } + const semanticProfile = toObject(summary.semantic_profile); + const domainScope = stringArrayFromUnknown(semanticProfile?.domain_scope); + for (const domain of domainScope) { + const normalized = domain.toLowerCase(); + if (normalized === "bank" || + normalized === "settlements" || + normalized === "suppliers" || + normalized === "supplier_payments" || + normalized === "other_settlements") { + hints.push("bank_settlement"); + continue; + } + if (normalized === "customers") { + hints.push("customer_settlement"); + continue; + } + if (normalized === "vat" || normalized === "taxes") { + hints.push("vat_flow"); + continue; + } + if (normalized === "period_close") { + hints.push("period_close"); + continue; + } + if (normalized === "deferred_expense") { + hints.push("deferred_expense"); + continue; + } + if (normalized === "fixed_assets") { + hints.push("fixed_asset"); + } + } + return uniqueStrings(hints); +} function extractSemanticProfile(summary) { const semanticProfile = toObject(summary.semantic_profile); + const domainHints = domainHintsFromSummary(summary).map((item) => `domain_hint:${item}`); return { - relation_patterns: stringArrayFromUnknown(semanticProfile?.relation_patterns), - anomaly_patterns: stringArrayFromUnknown(semanticProfile?.anomaly_patterns) + relation_patterns: uniqueStrings([...stringArrayFromUnknown(semanticProfile?.relation_patterns), ...domainHints]), + anomaly_patterns: uniqueStrings([...stringArrayFromUnknown(semanticProfile?.anomaly_patterns), ...domainHints]) }; } function resolveEntityOverlay(item, rawEntities) { diff --git a/llm_normalizer/backend/dist/services/questionTypeResolver.js b/llm_normalizer/backend/dist/services/questionTypeResolver.js new file mode 100644 index 0000000..5d4b335 --- /dev/null +++ b/llm_normalizer/backend/dist/services/questionTypeResolver.js @@ -0,0 +1,44 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.resolveQuestionType = resolveQuestionType; +const QUESTION_TYPE_RULES = [ + { + type: "what_to_check_first", + pattern: /(?:\bwhat\s+to\s+check\s+first\b|\bfirst\s+check\b|\bcheck\s+first\b|\u0441\s+\u0447\u0435\u0433\u043e\s+\u043d\u0430\u0447\u0430\u0442\u044c\s+\u043f\u0440\u043e\u0432\u0435\u0440\u043a|\u0447\u0442\u043e\s+\u043f\u0440\u043e\u0432\u0435\u0440\u0438\u0442\u044c\s+\u043f\u0435\u0440\u0432)/iu + }, + { + type: "what_is_it_grounded_on", + pattern: /(?:\bwhat\s+is\s+it\s+grounded\s+on\b|\bgrounded\s+on\b|\bbased\s+on\b|\bwhat\s+evidence\b|\u043d\u0430\s+\u0447(?:\u0435|\u0451)\u043c\s+\u044d\u0442\u043e\s+\u043e\u0441\u043d\u043e\u0432\u0430\u043d|\u0447\u0435\u043c\s+\u043f\u043e\u0434\u0442\u0432\u0435\u0440\u0436\u0434)/iu + }, + { + type: "prove_or_guess", + pattern: /(?:\bprove\b|\bguess\b|\bprove\s+or\s+guess\b|\bis\s+it\s+proven\b|\u044d\u0442\u043e\s+\u0434\u043e\u043a\u0430\u0437\u0430\u043d|\u0438\u043b\u0438\s+\u0442\u043e\u043b\u044c\u043a\u043e\s+\u0433\u0438\u043f\u043e\u0442\u0435\u0437|\u0434\u043e\u043a\u0430\u0437\u0430\u043d|\u0434\u043e\u0433\u0430\u0434|\u0435\u0441\u0442\u044c\s+\u043b\u0438|\u043c\u043e\u0436\u0435\u0442\s+\u043b\u0438|\u044d\u0442\u043e\s+\u0443\u0436\u0435.*\u0438\u043b\u0438)/iu + }, + { + type: "which_chains_are_complete_vs_incomplete", + pattern: /(?:\bcomplete(?:d)?\b.*\bincomplete\b|\bwhich\s+chains?\b|\bcomplete\s+vs\s+incomplete\b|\u043a\u0430\u043a\u0438\u0435\s+\u0446\u0435\u043f\u043e\u0447\u043a[аи]\s+.*\u0437\u0430\u0432\u0435\u0440\u0448|\u0447\u0442\u043e\s+\u0437\u0430\u043a\u0440\u044b\u0442\u043e.*\u0447\u0442\u043e\s+\u043d\u0435\u0442)/iu + }, + { + type: "where_break_is", + pattern: /(?:\bwhere\s+is\s+the\s+break\b|\bwhere\s+exactly\b|\blocate\b|\u0433\u0434\u0435\s+\u0438\u043c\u0435\u043d\u043d\u043e|\u0433\u0434\u0435\s+\u0440\u0430\u0437\u0440\u044b\u0432|\u0432\s+\u043a\u0430\u043a\u043e\u043c\s+\u043c\u0435\u0441\u0442\u0435)/iu + }, + { + type: "why_breaks", + pattern: /(?:\bwhy\b|\bwhy\s+does\s+it\s+break\b|\u043f\u043e\u0447\u0435\u043c\u0443|\u0432\s+\u0447(?:\u0435|\u0451)\u043c\s+\u043f\u0440\u0438\u0447\u0438\u043d\u0430|\u0438\u0437-\u0437\u0430\s+\u0447\u0435\u0433\u043e)/iu + } +]; +function resolveQuestionType(input) { + const text = String(input ?? "").trim(); + if (!text) { + return "unknown"; + } + for (const rule of QUESTION_TYPE_RULES) { + if (rule.pattern.test(text)) { + return rule.type; + } + } + if (/[??]/u.test(text)) { + return "why_breaks"; + } + return "unknown"; +} diff --git a/llm_normalizer/backend/scripts/analyzeWave13Chat20.js b/llm_normalizer/backend/scripts/analyzeWave13Chat20.js new file mode 100644 index 0000000..beb723d --- /dev/null +++ b/llm_normalizer/backend/scripts/analyzeWave13Chat20.js @@ -0,0 +1,614 @@ +#!/usr/bin/env node + +const fs = require("node:fs"); +const path = require("node:path"); + +const EXPECTED_QUESTION_TYPES = [ + "why_breaks", + "prove_or_guess", + "prove_or_guess", + "why_breaks", + "where_break_is", + "prove_or_guess", + "why_breaks", + "which_chains_are_complete_vs_incomplete", + "which_chains_are_complete_vs_incomplete", + "prove_or_guess", + "why_breaks", + "prove_or_guess", + "why_breaks", + "what_is_it_grounded_on", + "why_breaks", + "which_chains_are_complete_vs_incomplete", + "prove_or_guess", + "what_is_it_grounded_on", + "why_breaks", + "prove_or_guess" +]; + +function parseArgs(argv) { + const args = { + rawFile: "", + outputDir: "", + caseMatrixFile: "wave13_chat20_case_matrix_updated.md", + metricsFile: "wave13_chat20_metrics.json", + reportFile: "wave13_regression_report.md", + baselineMetricsFile: "" + }; + + for (let i = 0; i < argv.length; i += 1) { + const token = argv[i]; + if (token === "--raw-file") { + args.rawFile = String(argv[i + 1] ?? ""); + i += 1; + continue; + } + if (token === "--output-dir") { + args.outputDir = String(argv[i + 1] ?? ""); + i += 1; + continue; + } + if (token === "--case-matrix-file") { + args.caseMatrixFile = String(argv[i + 1] ?? args.caseMatrixFile); + i += 1; + continue; + } + if (token === "--metrics-file") { + args.metricsFile = String(argv[i + 1] ?? args.metricsFile); + i += 1; + continue; + } + if (token === "--report-file") { + args.reportFile = String(argv[i + 1] ?? args.reportFile); + i += 1; + continue; + } + if (token === "--baseline-metrics-file") { + args.baselineMetricsFile = String(argv[i + 1] ?? ""); + i += 1; + } + } + + return args; +} + +function ensureDir(dirPath) { + fs.mkdirSync(dirPath, { recursive: true }); +} + +function readJson(filePath) { + const raw = fs.readFileSync(filePath, "utf8").replace(/^\uFEFF/, ""); + return JSON.parse(raw); +} + +function writeUtf8Bom(filePath, content) { + ensureDir(path.dirname(filePath)); + fs.writeFileSync(filePath, `\uFEFF${content}`, "utf8"); +} + +function text(value) { + return value == null ? "" : String(value); +} + +function lower(value) { + return text(value).toLowerCase(); +} + +function expectedDomainByIndex(index) { + const caseNo = index + 1; + if (caseNo <= 8) { + return "settlements_60_62"; + } + if (caseNo <= 16) { + return "vat_document_register_book"; + } + return "month_close_costs_20_44"; +} + +function normalizeInternalDomain(domainName) { + const d = lower(domainName); + if (!d) { + return "unknown"; + } + if ( + d.includes("settlement") || + d.includes("supplier") || + d.includes("customer") || + d.includes("bank") + ) { + return "settlements_60_62"; + } + if (d.includes("vat") || d.includes("nds")) { + return "vat_document_register_book"; + } + if ( + d.includes("period_close") || + d.includes("month_close") || + d.includes("deferred_expense") || + d.includes("fixed_asset") || + d.includes("close") + ) { + return "month_close_costs_20_44"; + } + return "unknown"; +} + +function mergeCountMap(target, source) { + if (!source || typeof source !== "object") { + return; + } + for (const [key, value] of Object.entries(source)) { + const name = text(key); + if (!name) { + continue; + } + const count = Number(value) || 0; + if (!target[name]) { + target[name] = 0; + } + target[name] += count > 0 ? count : 1; + } +} + +function collectDomainScores(row) { + const scores = {}; + const retrieval = Array.isArray(row?.debug?.retrieval_results) ? row.debug.retrieval_results : []; + for (const item of retrieval) { + mergeCountMap(scores, item?.problem_unit_summary?.lifecycle_domain_distribution); + mergeCountMap(scores, item?.problem_unit_summary?.graph_summary?.domain_distribution); + const domainCard = text(item?.summary?.domain_purity_guard?.domain_card_id); + if (domainCard) { + if (!scores[domainCard]) { + scores[domainCard] = 0; + } + scores[domainCard] += 2; + } + const resultItems = Array.isArray(item?.items) ? item.items : []; + for (const resultItem of resultItems) { + const scopes = Array.isArray(resultItem?.graph_domain_scope) ? resultItem.graph_domain_scope : []; + for (const scope of scopes) { + const name = text(scope); + if (!name) { + continue; + } + if (!scores[name]) { + scores[name] = 0; + } + scores[name] += 1; + } + } + } + const activeDomain = text(row?.debug?.investigation_state_snapshot?.focus?.active_domain); + if (activeDomain) { + if (!scores[activeDomain]) { + scores[activeDomain] = 0; + } + scores[activeDomain] += 1; + } + return scores; +} + +function pickActualDomain(row) { + const scores = collectDomainScores(row); + const sorted = Object.entries(scores).sort((a, b) => { + if (b[1] !== a[1]) { + return b[1] - a[1]; + } + return String(a[0]).localeCompare(String(b[0])); + }); + if (!sorted.length) { + return "unknown"; + } + return normalizeInternalDomain(sorted[0][0]); +} + +function pickActualQuestionType(row) { + const qType = text(row?.debug?.question_type_class); + return qType || "unknown"; +} + +function extractCompanyAnchors(row) { + const all = row?.debug?.company_anchors?.all; + if (!Array.isArray(all)) { + return []; + } + return all.map((v) => text(v).trim()).filter(Boolean); +} + +function hasAnchorUsageInAnswer(row, anchors) { + if (!anchors.length) { + return false; + } + const reply = lower(row?.assistant_reply); + if (!reply) { + return false; + } + if (reply.includes("в опоре использованы якоря вопроса")) { + return true; + } + for (const anchor of anchors) { + const value = lower(anchor); + if (value.length < 3) { + continue; + } + if (reply.includes(value)) { + return true; + } + } + return false; +} + +function evaluateEvidenceStrength(row) { + const status = lower(row?.debug?.answer_grounding_check?.status); + if (status === "grounded") { + return "strong"; + } + if (status === "partial") { + return "weak"; + } + if (status === "no_grounded_answer") { + return "none"; + } + return "limited"; +} + +function evaluateConfidenceStyle(row) { + const reply = lower(row?.assistant_reply); + if (!reply) { + return "unknown"; + } + const hasLimitation = + reply.includes("ограничени") || + reply.includes("частично") || + reply.includes("низкая") || + reply.includes("не подтвержден"); + const hasConfident = + reply.includes("подтверждено") || + reply.includes("доказ") || + reply.includes("подтверждается"); + if (hasLimitation && hasConfident) { + return "mixed"; + } + if (hasLimitation) { + return "limited"; + } + if (hasConfident) { + return "confident"; + } + return "neutral"; +} + +function containsAny(textValue, needles) { + const body = lower(textValue); + return needles.some((needle) => body.includes(lower(needle))); +} + +function evaluateFirstCheckRelevance(row, expectedDomain) { + const reply = text(row?.assistant_reply); + if (!reply) { + return false; + } + if (expectedDomain === "settlements_60_62") { + return containsAny(reply, [ + "договор", + "объект расчет", + "регистр расчет", + "зачет аванс", + "взаимозачет", + "60/62/76" + ]); + } + if (expectedDomain === "vat_document_register_book") { + return containsAny(reply, [ + "ндс", + "счет-фактур", + "книга покуп", + "книга продаж", + "регистр", + "19" + ]); + } + if (expectedDomain === "month_close_costs_20_44") { + return containsAny(reply, [ + "закрыти", + "рбп", + "амортизац", + "косвен", + "20", + "25", + "26", + "44" + ]); + } + return false; +} + +function evaluateGenericAnswer(row) { + const reply = lower(row?.assistant_reply); + if (!reply) { + return true; + } + const genericPatterns = [ + "коротко: проблема с закрытием расчета подтверждается частично", + "сигнал проблемы есть, но механизм подтвержден не полностью", + "вывод сделан по snapshot", + "проверьте договор, объект расчетов, регистр расчетов", + "проверьте договор и объект расчетов" + ]; + const hits = genericPatterns.filter((pattern) => reply.includes(pattern)).length; + return hits >= 2; +} + +function shortQuestion(value, maxLength = 130) { + const q = text(value).replace(/\s+/g, " ").trim(); + if (q.length <= maxLength) { + return q; + } + return `${q.slice(0, maxLength - 3)}...`; +} + +function toPercent(value) { + return Number(value.toFixed(4)); +} + +function buildCaseRow(index, row) { + const expectedDomain = expectedDomainByIndex(index); + const actualDomain = pickActualDomain(row); + const expectedQuestionType = EXPECTED_QUESTION_TYPES[index] || "unknown"; + const actualQuestionType = pickActualQuestionType(row); + const anchors = extractCompanyAnchors(row); + const anchorsPresent = anchors.length > 0; + const anchorsUsed = hasAnchorUsageInAnswer(row, anchors); + const evidenceStrength = evaluateEvidenceStrength(row); + const confidenceStyle = evaluateConfidenceStyle(row); + const firstCheckRelevant = evaluateFirstCheckRelevance(row, expectedDomain); + const genericAnswer = evaluateGenericAnswer(row); + + const reasons = []; + if (actualDomain !== expectedDomain) { + reasons.push("wrong_domain"); + } + if (actualQuestionType !== expectedQuestionType) { + reasons.push("wrong_question_type"); + } + if (anchorsPresent && !anchorsUsed) { + reasons.push("weak_company_anchor_usage"); + } + if (!firstCheckRelevant) { + reasons.push("wrong_first_check"); + } + if (genericAnswer) { + reasons.push("generic_answer"); + } + + let verdict = "PASS"; + if (reasons.length > 0) { + const hardFail = reasons.includes("wrong_domain") || reasons.includes("wrong_first_check"); + verdict = hardFail || reasons.length >= 3 ? "FAIL" : "SOFT_PASS"; + } + + return { + case_id: text(row?.case_id) || `q${String(index + 1).padStart(2, "0")}`, + question_short: shortQuestion(row?.user_message), + expected_domain: expectedDomain, + actual_domain: actualDomain, + expected_question_type: expectedQuestionType, + actual_question_type: actualQuestionType, + company_anchors_present: anchorsPresent, + company_anchors_used_in_answer: anchorsUsed, + evidence_strength: evidenceStrength, + answer_confidence_style: confidenceStyle, + first_check_relevance: firstCheckRelevant, + verdict, + failure_reason_short: reasons.length ? reasons.join(", ") : "none", + is_generic_answer: genericAnswer, + failure_reasons: reasons + }; +} + +function markdownCell(value) { + return text(value).replace(/\|/g, "\\|"); +} + +function buildCaseMatrixMarkdown(rows) { + const lines = []; + lines.push("# Wave 13 Chat20 Case Matrix (Updated)"); + lines.push(""); + lines.push("| case_id | question_short | expected_domain | actual_domain | expected_question_type | actual_question_type | company_anchors_present | company_anchors_used_in_answer | evidence_strength | answer_confidence_style | first_check_relevance | verdict | failure_reason_short |"); + lines.push("|---|---|---|---|---|---|---|---|---|---|---|---|---|"); + for (const row of rows) { + lines.push( + `| ${markdownCell(row.case_id)} | ${markdownCell(row.question_short)} | ${markdownCell(row.expected_domain)} | ${markdownCell(row.actual_domain)} | ${markdownCell(row.expected_question_type)} | ${markdownCell(row.actual_question_type)} | ${markdownCell(row.company_anchors_present)} | ${markdownCell(row.company_anchors_used_in_answer)} | ${markdownCell(row.evidence_strength)} | ${markdownCell(row.answer_confidence_style)} | ${markdownCell(row.first_check_relevance)} | ${markdownCell(row.verdict)} | ${markdownCell(row.failure_reason_short)} |` + ); + } + lines.push(""); + return `${lines.join("\n")}\n`; +} + +function countBy(rows, selector) { + const result = {}; + for (const row of rows) { + const key = selector(row); + if (!result[key]) { + result[key] = 0; + } + result[key] += 1; + } + return result; +} + +function buildRegressionReport(rows, metrics, baselineMetrics) { + const lines = []; + lines.push("# Wave 13 Regression Report"); + lines.push(""); + lines.push(`- Cases: ${rows.length}`); + lines.push(`- PASS: ${metrics.totals.pass}`); + lines.push(`- SOFT_PASS: ${metrics.totals.soft_pass}`); + lines.push(`- FAIL: ${metrics.totals.fail}`); + lines.push(""); + lines.push("## Metric Snapshot"); + lines.push(`- domain_correctness_rate: ${metrics.domain_correctness_rate}`); + lines.push(`- question_type_fit_rate: ${metrics.question_type_fit_rate}`); + lines.push(`- company_anchor_usage_rate: ${metrics.company_anchor_usage_rate}`); + lines.push(`- generic_answer_rate: ${metrics.generic_answer_rate}`); + lines.push(`- first_check_relevance_rate: ${metrics.first_check_relevance_rate}`); + lines.push(""); + if (baselineMetrics) { + lines.push("## Delta vs Baseline"); + for (const key of [ + "domain_correctness_rate", + "question_type_fit_rate", + "company_anchor_usage_rate", + "generic_answer_rate", + "first_check_relevance_rate" + ]) { + const current = Number(metrics[key] ?? 0); + const baseline = Number(baselineMetrics[key] ?? 0); + const delta = Number((current - baseline).toFixed(4)); + lines.push(`- ${key}: ${baseline} -> ${current} (delta ${delta >= 0 ? "+" : ""}${delta})`); + } + lines.push(""); + } + + const failures = rows.filter((row) => row.verdict !== "PASS"); + const reasonCounts = {}; + for (const row of failures) { + for (const reason of row.failure_reasons) { + if (!reasonCounts[reason]) { + reasonCounts[reason] = 0; + } + reasonCounts[reason] += 1; + } + } + const topReasons = Object.entries(reasonCounts).sort((a, b) => b[1] - a[1]).slice(0, 5); + lines.push("## Top Defects"); + if (!topReasons.length) { + lines.push("- No defects detected."); + } else { + for (const [reason, count] of topReasons) { + lines.push(`- ${reason}: ${count}`); + } + } + lines.push(""); + + lines.push("## FAIL Cases"); + for (const row of rows.filter((item) => item.verdict === "FAIL")) { + lines.push(`- ${row.case_id}: ${row.failure_reason_short}`); + } + lines.push(""); + + return `${lines.join("\n")}\n`; +} + +async function main() { + const args = parseArgs(process.argv.slice(2)); + if (!args.rawFile) { + throw new Error("Missing required argument --raw-file"); + } + if (!args.outputDir) { + throw new Error("Missing required argument --output-dir"); + } + + const rawPath = path.resolve(args.rawFile); + const outputDir = path.resolve(args.outputDir); + + const raw = readJson(rawPath); + const rows = Array.isArray(raw?.rows) ? raw.rows : []; + if (rows.length === 0) { + throw new Error("Raw file contains no rows."); + } + + const caseRows = rows.map((row, index) => buildCaseRow(index, row)); + const totalsByVerdict = countBy(caseRows, (row) => row.verdict); + const domainCorrect = caseRows.filter((row) => row.expected_domain === row.actual_domain).length; + const qTypeFit = caseRows.filter((row) => row.expected_question_type === row.actual_question_type).length; + const anchorsPresentCount = caseRows.filter((row) => row.company_anchors_present).length; + const anchorsUsedCount = caseRows.filter( + (row) => row.company_anchors_present && row.company_anchors_used_in_answer + ).length; + const genericCount = caseRows.filter((row) => row.is_generic_answer).length; + const firstCheckRelevantCount = caseRows.filter((row) => row.first_check_relevance).length; + + const metrics = { + schema_version: "wave13_chat20_metrics_v2", + run_id: path.basename(outputDir), + source_session_id: text(raw?.session_id), + totals: { + cases: caseRows.length, + pass: totalsByVerdict.PASS || 0, + soft_pass: totalsByVerdict.SOFT_PASS || 0, + fail: totalsByVerdict.FAIL || 0 + }, + domain_correctness_rate: toPercent(domainCorrect / caseRows.length), + question_type_fit_rate: toPercent(qTypeFit / caseRows.length), + company_anchor_usage_rate: toPercent( + anchorsPresentCount > 0 ? anchorsUsedCount / anchorsPresentCount : 0 + ), + company_anchor_usage_rate_global: toPercent(anchorsUsedCount / caseRows.length), + generic_answer_rate: toPercent(genericCount / caseRows.length), + first_check_relevance_rate: toPercent(firstCheckRelevantCount / caseRows.length), + anchors_present_cases: anchorsPresentCount, + anchors_used_cases: anchorsUsedCount + }; + + let baselineMetrics = null; + if (args.baselineMetricsFile) { + const baselinePath = path.resolve(args.baselineMetricsFile); + if (fs.existsSync(baselinePath)) { + baselineMetrics = readJson(baselinePath); + metrics.baseline_reference = path.basename(baselinePath); + metrics.baseline_metrics = { + domain_correctness_rate: baselineMetrics.domain_correctness_rate, + question_type_fit_rate: baselineMetrics.question_type_fit_rate, + company_anchor_usage_rate: baselineMetrics.company_anchor_usage_rate, + generic_answer_rate: baselineMetrics.generic_answer_rate, + first_check_relevance_rate: baselineMetrics.first_check_relevance_rate + }; + metrics.delta_vs_baseline = { + domain_correctness_rate_delta: toPercent( + Number(metrics.domain_correctness_rate) - + Number(metrics.baseline_metrics.domain_correctness_rate || 0) + ), + question_type_fit_rate_delta: toPercent( + Number(metrics.question_type_fit_rate) - + Number(metrics.baseline_metrics.question_type_fit_rate || 0) + ), + company_anchor_usage_rate_delta: toPercent( + Number(metrics.company_anchor_usage_rate) - + Number(metrics.baseline_metrics.company_anchor_usage_rate || 0) + ), + generic_answer_rate_delta: toPercent( + Number(metrics.generic_answer_rate) - + Number(metrics.baseline_metrics.generic_answer_rate || 0) + ), + first_check_relevance_rate_delta: toPercent( + Number(metrics.first_check_relevance_rate) - + Number(metrics.baseline_metrics.first_check_relevance_rate || 0) + ) + }; + } + } + + const matrixPath = path.join(outputDir, args.caseMatrixFile); + const metricsPath = path.join(outputDir, args.metricsFile); + const reportPath = path.join(outputDir, args.reportFile); + + writeUtf8Bom(matrixPath, buildCaseMatrixMarkdown(caseRows)); + writeUtf8Bom(metricsPath, `${JSON.stringify(metrics, null, 2)}\n`); + writeUtf8Bom(reportPath, buildRegressionReport(caseRows, metrics, baselineMetrics)); + + process.stdout.write( + [ + `rows=${caseRows.length}`, + `matrix=${matrixPath}`, + `metrics=${metricsPath}`, + `report=${reportPath}` + ].join("\n") + ); +} + +main().catch((error) => { + process.stderr.write(`${error?.stack || error}\n`); + process.exitCode = 1; +}); + diff --git a/llm_normalizer/backend/scripts/runCompanyQuestionBatch.js b/llm_normalizer/backend/scripts/runCompanyQuestionBatch.js new file mode 100644 index 0000000..0635d78 --- /dev/null +++ b/llm_normalizer/backend/scripts/runCompanyQuestionBatch.js @@ -0,0 +1,233 @@ +#!/usr/bin/env node + +const fs = require("node:fs"); +const path = require("node:path"); +const request = require("supertest"); + +function parseArgs(argv) { + const args = { + questionsFile: "", + runDir: "", + rawFileName: "chat20_wave13_raw.json", + chatFileName: "Chat20.txt", + chatRuFileName: "Чат20.txt", + promptsFileName: path.join("prompt_dialogs", "chat20_prompts.md"), + useMock: true, + promptVersion: "normalizer_v2_0_2", + sessionId: "", + casePrefix: "q" + }; + + for (let i = 0; i < argv.length; i += 1) { + const token = argv[i]; + if (token === "--questions-file") { + args.questionsFile = String(argv[i + 1] ?? ""); + i += 1; + continue; + } + if (token === "--run-dir") { + args.runDir = String(argv[i + 1] ?? ""); + i += 1; + continue; + } + if (token === "--raw-file") { + args.rawFileName = String(argv[i + 1] ?? args.rawFileName); + i += 1; + continue; + } + if (token === "--chat-file") { + args.chatFileName = String(argv[i + 1] ?? args.chatFileName); + i += 1; + continue; + } + if (token === "--chat-ru-file") { + args.chatRuFileName = String(argv[i + 1] ?? args.chatRuFileName); + i += 1; + continue; + } + if (token === "--prompts-file") { + args.promptsFileName = String(argv[i + 1] ?? args.promptsFileName); + i += 1; + continue; + } + if (token === "--use-mock") { + const value = String(argv[i + 1] ?? "true").toLowerCase(); + args.useMock = value !== "0" && value !== "false" && value !== "no"; + i += 1; + continue; + } + if (token === "--prompt-version") { + args.promptVersion = String(argv[i + 1] ?? args.promptVersion); + i += 1; + continue; + } + if (token === "--session-id") { + args.sessionId = String(argv[i + 1] ?? ""); + i += 1; + continue; + } + if (token === "--case-prefix") { + args.casePrefix = String(argv[i + 1] ?? args.casePrefix); + i += 1; + } + } + + return args; +} + +function ensureDir(dirPath) { + fs.mkdirSync(dirPath, { recursive: true }); +} + +function readJson(filePath) { + const raw = fs.readFileSync(filePath, "utf8").replace(/^\uFEFF/, ""); + return JSON.parse(raw); +} + +function writeUtf8Bom(filePath, content) { + ensureDir(path.dirname(filePath)); + fs.writeFileSync(filePath, `\uFEFF${content}`, "utf8"); +} + +function asText(value) { + return value == null ? "" : String(value); +} + +function makeCaseId(prefix, index) { + return `${prefix}${String(index + 1).padStart(2, "0")}`; +} + +function buildPromptsMarkdown(questions) { + const lines = []; + for (let i = 0; i < questions.length; i += 1) { + lines.push(`${i + 1}. ${questions[i]}`); + lines.push(""); + } + return `${lines.join("\n").trim()}\n`; +} + +function buildChatTxt(sessionId, exportedAt, rows) { + const lines = []; + lines.push("# Assistant conversation export"); + lines.push(`session_id: ${sessionId}`); + lines.push(`exported_at: ${exportedAt}`); + lines.push(""); + + let messageCounter = 1; + for (const row of rows) { + lines.push(`## ${messageCounter}. user`); + lines.push("message_id: pending"); + lines.push("created_at: pending"); + lines.push("reply_type: n/a"); + lines.push(""); + lines.push(asText(row.user_message)); + lines.push(""); + messageCounter += 1; + + lines.push(`## ${messageCounter}. assistant`); + lines.push(`message_id: ${asText(row.message_id) || "n/a"}`); + lines.push(`created_at: ${asText(row.created_at) || "n/a"}`); + lines.push(`reply_type: ${asText(row.reply_type) || "n/a"}`); + if (row.trace_id) { + lines.push(`trace_id: ${asText(row.trace_id)}`); + } + lines.push(""); + lines.push(asText(row.assistant_reply)); + lines.push(""); + messageCounter += 1; + } + + return `${lines.join("\n").trim()}\n`; +} + +async function main() { + const args = parseArgs(process.argv.slice(2)); + if (!args.questionsFile) { + throw new Error("Missing required argument --questions-file"); + } + if (!args.runDir) { + throw new Error("Missing required argument --run-dir"); + } + + const questionsPath = path.resolve(args.questionsFile); + const runDir = path.resolve(args.runDir); + const backendRoot = path.resolve(__dirname, ".."); + + const questions = readJson(questionsPath); + if (!Array.isArray(questions) || questions.length === 0) { + throw new Error("Questions JSON must be a non-empty array of strings."); + } + + const { createApp } = require(path.join(backendRoot, "dist", "server.js")); + const app = createApp(); + + ensureDir(runDir); + ensureDir(path.join(runDir, "prompt_dialogs")); + + const rows = []; + let sessionId = args.sessionId || `wave13-chat20-${Date.now()}`; + + for (let i = 0; i < questions.length; i += 1) { + const userMessage = asText(questions[i]); + const response = await request(app).post("/api/assistant/message").send({ + useMock: args.useMock, + promptVersion: args.promptVersion, + session_id: sessionId, + user_message: userMessage + }); + + const body = response.body || {}; + sessionId = asText(body.session_id) || sessionId; + const item = body.conversation_item || {}; + rows.push({ + case_id: makeCaseId(args.casePrefix, i), + user_message: userMessage, + assistant_reply: asText(body.assistant_reply), + reply_type: asText(body.reply_type), + message_id: asText(item.message_id), + created_at: asText(item.created_at), + trace_id: asText(item.trace_id || body.debug?.trace_id), + http_status: response.status, + debug: body.debug || {} + }); + } + + const exportedAt = new Date().toISOString(); + const rawPayload = { + session_id: sessionId, + exported_at: exportedAt, + cases_total: rows.length, + rows + }; + + const rawPath = path.join(runDir, args.rawFileName); + const chatPath = path.join(runDir, args.chatFileName); + const chatRuPath = path.join(runDir, args.chatRuFileName); + const promptsPath = path.join(runDir, args.promptsFileName); + + writeUtf8Bom(rawPath, `${JSON.stringify(rawPayload, null, 2)}\n`); + const chatBody = buildChatTxt(sessionId, exportedAt, rows); + writeUtf8Bom(chatPath, chatBody); + if (args.chatRuFileName) { + writeUtf8Bom(chatRuPath, chatBody); + } + writeUtf8Bom(promptsPath, buildPromptsMarkdown(questions)); + + process.stdout.write( + [ + `run_dir=${runDir}`, + `session_id=${sessionId}`, + `cases_total=${rows.length}`, + `raw=${rawPath}`, + `chat=${chatPath}`, + `chat_ru=${chatRuPath}`, + `prompts=${promptsPath}` + ].join("\n") + ); +} + +main().catch((error) => { + process.stderr.write(`${error?.stack || error}\n`); + process.exitCode = 1; +}); + diff --git a/llm_normalizer/backend/src/services/answerComposer.ts b/llm_normalizer/backend/src/services/answerComposer.ts index 93e0eef..564e00a 100644 --- a/llm_normalizer/backend/src/services/answerComposer.ts +++ b/llm_normalizer/backend/src/services/answerComposer.ts @@ -9,6 +9,8 @@ import type { RouteHintSummary } from "../types/normalizer"; import type { AnswerStructureV11, EvidenceConfidence, EvidenceItem, EvidenceLimitationReasonCode } from "../types/stage1Contracts"; import type { ProblemUnit, ProblemUnitSummary, ProblemUnitType } from "../types/stage2ProblemUnits"; +import type { QuestionTypeClass } from "./questionTypeResolver"; +import type { CompanyAnchorSet } from "./companyAnchorResolver"; type ProblemAnswerMode = "stage1_policy_v11" | "stage2_problem_centric_v1" | "stage3_lifecycle_aware_v1"; @@ -20,6 +22,8 @@ interface ComposeAnswerInput { coverageReport: RequirementCoverageReport; groundingCheck: AnswerGroundingCheck; focusDomainHint?: string | null; + questionTypeHint?: QuestionTypeClass | null; + companyAnchors?: CompanyAnchorSet | null; enableAnswerPolicyV11?: boolean; enableProblemCentricAnswerV1?: boolean; enableLifecycleAnswerV1?: boolean; @@ -47,6 +51,123 @@ function uniqueStrings(values: string[], limit = 6): string[] { return Array.from(new Set(values.map((item) => item.trim()).filter(Boolean))).slice(0, limit); } +interface CompanyAnchorUsage { + present: string[]; + used: string[]; + unused: string[]; +} + +interface AnswerRenderContext { + questionType: QuestionTypeClass; + focusDomain: P0NarrativeDomain; + anchors: CompanyAnchorUsage; +} + +function withUniquePush(target: string[], value: string): void { + const normalized = String(value ?? "").trim(); + if (!normalized) { + return; + } + if (!target.includes(normalized)) { + target.push(normalized); + } +} + +function normalizeAnchorForMatch(value: string): string { + return String(value ?? "") + .toLowerCase() + .replace(/[^\p{L}\p{N}.:/-]+/gu, " ") + .replace(/\s+/g, " ") + .trim(); +} + +function collectCompanyAnchorTokens(anchors: CompanyAnchorSet | null | undefined): string[] { + if (!anchors) { + return []; + } + const tokens: string[] = []; + for (const item of anchors.contract_numbers ?? []) withUniquePush(tokens, item); + for (const item of anchors.document_numbers ?? []) withUniquePush(tokens, item); + for (const item of anchors.dates ?? []) withUniquePush(tokens, item); + for (const item of anchors.amounts ?? []) withUniquePush(tokens, item); + for (const item of anchors.accounts ?? []) withUniquePush(tokens, `\u0441\u0447\u0435\u0442 ${item}`); + for (const item of anchors.accounts ?? []) withUniquePush(tokens, item); + for (const item of anchors.periods ?? []) withUniquePush(tokens, item); + for (const item of anchors.document_types ?? []) withUniquePush(tokens, item); + for (const item of anchors.all ?? []) withUniquePush(tokens, item); + return uniqueStrings(tokens, 48); +} + +function collectRetrievalCorpus(results: UnifiedRetrievalResult[]): string { + const chunks: string[] = []; + for (const result of results) { + chunks.push(JSON.stringify(result.summary ?? {})); + for (const item of result.items.slice(0, 10)) { + chunks.push(JSON.stringify(item)); + } + for (const evidence of result.evidence.slice(0, 16)) { + chunks.push(JSON.stringify(evidence)); + } + chunks.push(...result.why_included.slice(0, 16)); + chunks.push(...result.selection_reason.slice(0, 16)); + chunks.push(...result.business_interpretation.slice(0, 16)); + } + return chunks.join(" ").toLowerCase(); +} + +function isAnchorMatchedInCorpus(anchor: string, corpus: string): boolean { + const normalized = normalizeAnchorForMatch(anchor); + if (!normalized) { + return false; + } + if (normalized.length < 3) { + return false; + } + if (corpus.includes(normalized)) { + return true; + } + const withoutPrefix = normalized + .replace(/^(?:\u0434\u043e\u0433\u043e\u0432\u043e\u0440|document|account|period|doc_type)\s*[:№#]?\s*/iu, "") + .trim(); + if (withoutPrefix.length >= 3 && corpus.includes(withoutPrefix)) { + return true; + } + if (/^\d+(?:[.,]\d{2})?$/.test(withoutPrefix)) { + const normalizedAmount = withoutPrefix.replace(",", "."); + return corpus.includes(withoutPrefix) || corpus.includes(normalizedAmount); + } + return false; +} + +function evaluateCompanyAnchorUsage( + anchors: CompanyAnchorSet | null | undefined, + retrievalResults: UnifiedRetrievalResult[] +): CompanyAnchorUsage { + const present = collectCompanyAnchorTokens(anchors); + if (present.length === 0) { + return { + present: [], + used: [], + unused: [] + }; + } + const corpus = normalizeAnchorForMatch(collectRetrievalCorpus(retrievalResults)); + const used: string[] = []; + const unused: string[] = []; + for (const anchor of present) { + if (isAnchorMatchedInCorpus(anchor, corpus)) { + withUniquePush(used, anchor); + } else { + withUniquePush(unused, anchor); + } + } + return { + present: uniqueStrings(present, 24), + used: uniqueStrings(used, 12), + unused: uniqueStrings(unused, 12) + }; +} + const UUID_PATTERN = /\b[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}\b/gi; const LONG_HEX_PATTERN = /\b[0-9a-f]{24,}\b/gi; const RAW_REF_BLOB_PATTERN = /\bevidence_source_ref_v1\|[^\s,;]+/gi; @@ -1129,6 +1250,12 @@ function isProblemUnitAlignedWithNarrativeDomain(unit: ProblemUnit, domain: P0Na } if (domain === "vat_document_register_book") { + const foreignVatDomain = ["period_close", "deferred_expense", "fixed_asset", "bank_settlement", "customer_settlement"].includes( + String(unit.lifecycle_domain ?? "") + ); + if (foreignVatDomain && !hasControlledCrossDomainHandoff(unit)) { + return false; + } if (unit.lifecycle_domain === "vat_flow") { return true; } @@ -1139,6 +1266,12 @@ function isProblemUnitAlignedWithNarrativeDomain(unit: ProblemUnit, domain: P0Na } if (domain === "month_close_costs_20_44") { + const foreignMonthCloseDomain = ["vat_flow", "bank_settlement", "customer_settlement", "fixed_asset"].includes( + String(unit.lifecycle_domain ?? "") + ); + if (foreignMonthCloseDomain && !hasControlledCrossDomainHandoff(unit)) { + return false; + } if ( unit.lifecycle_domain === "period_close" || unit.lifecycle_domain === "deferred_expense" || @@ -1775,12 +1908,178 @@ function mapDefectTokenToNarrative(value: string): string | null { return null; } +const KNOWN_ACCOUNT_PREFIXES = new Set([ + "01", + "02", + "07", + "08", + "10", + "13", + "19", + "20", + "21", + "23", + "25", + "26", + "28", + "29", + "41", + "43", + "44", + "45", + "50", + "51", + "52", + "55", + "57", + "58", + "60", + "62", + "66", + "67", + "68", + "69", + "70", + "71", + "73", + "76", + "90", + "91", + "94", + "96", + "97" +]); + +function collectDateLikeSpansForNarrative(text: string): Array<{ start: number; end: number }> { + const spans: Array<{ start: number; end: number }> = []; + const patterns = [ + /\b20\d{2}[./-](?:0[1-9]|1[0-2])(?:[./-](?:0[1-9]|[12]\d|3[01]))?\b/g, + /\b(?:0?[1-9]|[12]\d|3[01])[./-](?:0?[1-9]|1[0-2])[./-](?:\d{2}|\d{4})\b/g, + /\b(?:0?[1-9]|[12]\d|3[01])\s+(?:января|февраля|марта|апреля|мая|июня|июля|августа|сентября|октября|ноября|декабря)\b/giu + ]; + for (const pattern of patterns) { + let match: RegExpExecArray | null = null; + while ((match = pattern.exec(text)) !== null) { + spans.push({ + start: match.index, + end: match.index + match[0].length + }); + } + } + return spans; +} + +function collectAmountLikeSpansForNarrative(text: string): Array<{ start: number; end: number }> { + const spans: Array<{ start: number; end: number }> = []; + const pattern = /\b\d{1,3}(?:[ \u00A0]\d{3})+(?:[.,]\d{2})?\b/g; + let match: RegExpExecArray | null = null; + while ((match = pattern.exec(text)) !== null) { + spans.push({ + start: match.index, + end: match.index + match[0].length + }); + } + return spans; +} + +function intersectsNarrativeSpan( + start: number, + end: number, + spans: Array<{ start: number; end: number }> +): boolean { + return spans.some((span) => start < span.end && end > span.start); +} + +function hasAccountContextMarker(text: string, start: number, end: number): boolean { + const left = text.slice(Math.max(0, start - 24), start); + const right = text.slice(end, Math.min(text.length, end + 24)); + return /(?:счет|сч\.?|account|schet|по\s+60|по\s+62|по\s+19|по\s+68|по\s+20|по\s+25|по\s+26|по\s+44|расчет|ндс|закрыти|рбп|амортиз|settlement|vat|close)/iu.test( + `${left} ${right}` + ); +} + +function toKnownAccountToken(value: string): string | null { + const token = String(value ?? "").trim(); + const prefix = token.match(/^(\d{2})/)?.[1]; + if (!prefix || !KNOWN_ACCOUNT_PREFIXES.has(prefix)) { + return null; + } + return token; +} + function extractAccountNumbers(values: string[]): string[] { - const numbers = values.flatMap((value) => { - const matches = String(value ?? "").match(/\b\d{2}(?:\.\d{1,2})?\b/g); - return matches ?? []; - }); - return uniqueStrings(numbers, 12); + const tokens: string[] = []; + for (const value of values) { + const raw = String(value ?? ""); + const matches = raw.match(/\b\d{2}(?:\.\d{1,2})?\b/g) ?? []; + for (const match of matches) { + const account = toKnownAccountToken(match); + if (account) { + tokens.push(account); + } + } + } + return uniqueStrings(tokens, 16); +} + +function extractAccountNumbersFromNarrativeText(value: string): string[] { + const text = String(value ?? "").toLowerCase(); + if (!text.trim()) { + return []; + } + + const result: string[] = []; + const dateSpans = collectDateLikeSpansForNarrative(text); + const amountSpans = collectAmountLikeSpansForNarrative(text); + const blockedSpans = [...dateSpans, ...amountSpans]; + + const contextualPattern = + /(?:\b(?:счет(?:а|у|ом|ов)?|сч\.?|account(?:s)?|schet(?:a|u|om|ov)?)\b)\s*(?:№|#|:)?\s*([0-9./,\sиand]{2,96})/giu; + let contextualMatch: RegExpExecArray | null = null; + while ((contextualMatch = contextualPattern.exec(text)) !== null) { + const chunk = String(contextualMatch[1] ?? ""); + const chunkTokens = chunk.match(/\b\d{2}(?:\.\d{1,2})?\b/g) ?? []; + for (const token of chunkTokens) { + const account = toKnownAccountToken(token); + if (account) { + result.push(account); + } + } + } + + const accountPairPattern = /\b(\d{2}(?:\.\d{1,2})?)\s*\/\s*(\d{2}(?:\.\d{1,2})?)\b/g; + let pairMatch: RegExpExecArray | null = null; + while ((pairMatch = accountPairPattern.exec(text)) !== null) { + const left = toKnownAccountToken(String(pairMatch[1] ?? "")); + const right = toKnownAccountToken(String(pairMatch[2] ?? "")); + if (left) { + result.push(left); + } + if (right) { + result.push(right); + } + } + + const explicitPattern = /\b\d{2}(?:\.\d{1,2})?\b/g; + let explicitMatch: RegExpExecArray | null = null; + while ((explicitMatch = explicitPattern.exec(text)) !== null) { + const token = String(explicitMatch[0] ?? ""); + const account = toKnownAccountToken(token); + if (!account) { + continue; + } + const start = explicitMatch.index; + const end = start + token.length; + if (intersectsNarrativeSpan(start, end, blockedSpans)) { + continue; + } + if (!hasAccountContextMarker(text, start, end)) { + continue; + } + result.push(account); + } + + return uniqueStrings(result, 16); } function inferP0NarrativeDomain(units: ProblemUnit[]): P0NarrativeDomain { @@ -1914,8 +2213,8 @@ function collectSemanticProfileScopes(results: UnifiedRetrievalResult[]): { acco }; } -interface SettlementEvidenceGrounding { - has_settlement_primary: boolean; +interface P0DomainEvidenceGrounding { + has_primary: boolean; has_foreign_primary: boolean; foreign_primary_domains: string[]; blocked: boolean; @@ -1935,10 +2234,28 @@ function isSettlementDomainToken(value: string): boolean { return /(?:bank_settlement|customer_settlement|settlements?|supplier_payments|suppliers?|customers?)/i.test(String(value ?? "")); } +function isVatDomainToken(value: string): boolean { + return /(?:vat_flow|vat|nds|taxes?|purchase_book|sales_book|invoice|book_entry|register)/i.test(String(value ?? "")); +} + +function isMonthCloseDomainToken(value: string): boolean { + return /(?:period_close|month_close|close_operation|cost_close|cost_allocation|deferred_expense)/i.test(String(value ?? "")); +} + function isForeignToSettlementDomainToken(value: string): boolean { return /(?:vat_flow|vat|deferred_expense|period_close|fixed_asset|fixed_assets|taxes?)/i.test(String(value ?? "")); } +function isForeignToVatDomainToken(value: string): boolean { + return /(?:bank_settlement|customer_settlement|settlements?|period_close|deferred_expense|fixed_asset|fixed_assets|month_close)/i.test( + String(value ?? "") + ); +} + +function isForeignToMonthCloseDomainToken(value: string): boolean { + return /(?:bank_settlement|customer_settlement|settlements?|vat_flow|vat|fixed_asset|fixed_assets)/i.test(String(value ?? "")); +} + function collectResultAccounts(result: UnifiedRetrievalResult): string[] { const accounts: string[] = []; const semanticProfile = summaryValue(result, "semantic_profile"); @@ -1985,46 +2302,111 @@ function isSubstantiveResult(result: UnifiedRetrievalResult): boolean { return result.items.length > 0 || result.evidence.length > 0; } -function evaluateSettlementEvidenceGrounding(results: UnifiedRetrievalResult[]): SettlementEvidenceGrounding { - const substantive = results.filter((item) => isSubstantiveResult(item)); - if (substantive.length === 0) { +function evaluateP0DomainEvidenceGrounding( + results: UnifiedRetrievalResult[], + focusDomain: P0NarrativeDomain +): P0DomainEvidenceGrounding { + if (!focusDomain) { return { - has_settlement_primary: false, + has_primary: false, has_foreign_primary: false, foreign_primary_domains: [], blocked: false }; } - const classify = (result: UnifiedRetrievalResult): { settlement: boolean; foreignDomains: string[] } => { + const substantive = results.filter((item) => isSubstantiveResult(item)); + if (substantive.length === 0) { + return { + has_primary: false, + has_foreign_primary: false, + foreign_primary_domains: [], + blocked: false + }; + } + + const classify = (result: UnifiedRetrievalResult): { inDomain: boolean; foreignDomains: string[] } => { const accounts = collectResultAccounts(result); const domains = collectResultDomains(result); const relations = collectResultRelations(result); - const settlement = - accounts.some((item) => isSettlementAccountToken(item) || /^(?:51|76)(?:\.|$)/.test(item)) || - domains.some((item) => isSettlementDomainToken(item)) || - relations.some((item) => /payment_to_settlement|statement_to_document|contract_to_documents/.test(item)); - const foreignDomains = domains.filter((item) => isForeignToSettlementDomainToken(item)); + let inDomain = false; + let foreignDomains: string[] = []; + + if (focusDomain === "settlements_60_62") { + inDomain = + accounts.some((item) => isSettlementAccountToken(item) || /^(?:51|76)(?:\.|$)/.test(item)) || + domains.some((item) => isSettlementDomainToken(item)) || + relations.some((item) => /payment_to_settlement|statement_to_document|contract_to_documents|linked_to_settlement|settlement_closed/.test(item)); + foreignDomains = domains.filter((item) => isForeignToSettlementDomainToken(item)); + } else if (focusDomain === "vat_document_register_book") { + inDomain = + accounts.some((item) => isVatAccountToken(item)) || + domains.some((item) => isVatDomainToken(item)) || + relations.some((item) => + /invoice_to_vat|source_doc_present|invoice_linked|book_entry_generated|deduction_posted|register_to_book|vat_/i.test(item) + ); + foreignDomains = domains.filter((item) => isForeignToVatDomainToken(item)); + } else if (focusDomain === "month_close_costs_20_44") { + inDomain = + accounts.some((item) => isCloseCostsAccountToken(item)) || + domains.some((item) => isMonthCloseDomainToken(item)) || + relations.some((item) => + /costs_accumulated|allocation_rules_resolved|close_operation_runs|residuals_zero|close_operation|period_close|allocation|writeoff/i.test( + item + ) + ); + foreignDomains = domains.filter((item) => isForeignToMonthCloseDomainToken(item)); + } + return { - settlement, + inDomain, foreignDomains: uniqueStrings(foreignDomains, 8) }; }; const top = substantive[0]; const topClass = classify(top); - const hasAnySettlement = substantive.some((item) => classify(item).settlement); - const hasForeignPrimary = topClass.foreignDomains.length > 0 && !topClass.settlement; - const blocked = hasForeignPrimary && !hasAnySettlement && !hasControlledCrossDomainHandoffInResult(top); + const hasAnyPrimary = substantive.some((item) => classify(item).inDomain); + const hasForeignPrimary = topClass.foreignDomains.length > 0 && !topClass.inDomain; + const blocked = hasForeignPrimary && !hasAnyPrimary && !hasControlledCrossDomainHandoffInResult(top); return { - has_settlement_primary: hasAnySettlement, + has_primary: hasAnyPrimary, has_foreign_primary: hasForeignPrimary, foreign_primary_domains: topClass.foreignDomains, blocked }; } +function hasStrongNarrativeDomainSignalInText(userMessage: string, domain: P0NarrativeDomain): boolean { + if (!domain) { + return false; + } + const text = String(userMessage ?? "").toLowerCase(); + const accountTokens = extractAccountNumbersFromNarrativeText(text); + if (domain === "settlements_60_62") { + return ( + accountTokens.some((item) => isSettlementAccountToken(item)) || + /(60\.0[12]|62\.0[12]|долг|аванс|зач[её]т|взаимозач|расч[её]т)/i.test(text) + ); + } + if (domain === "vat_document_register_book") { + return ( + accountTokens.some((item) => isVatAccountToken(item)) || + /(ндс|vat|счет[-\s]?фактур|сч[её]т[-\s]?фактур|книг[аи]|регистр)/i.test(text) + ); + } + if (domain === "month_close_costs_20_44") { + return ( + accountTokens.some((item) => isCloseCostsAccountToken(item)) || + /(закрыти[ея]\s+месяц|закрытие\s+счетов|регламентн|косвенн|затрат|распределени|рбп|амортиз|финансовых\s+результат|month\s*close|period\s*close|close\s+operation)/i.test( + text + ) + ); + } + return false; +} + function inferP0FocusNarrativeDomain( userMessage: string, results: UnifiedRetrievalResult[], @@ -2032,20 +2414,30 @@ function inferP0FocusNarrativeDomain( focusDomainHint?: string | null ): P0NarrativeDomain { const fromHint = p0NarrativeDomainFromHint(focusDomainHint); + const fromMessage = inferNarrativeDomainFromText(userMessage); + const strongFromMessage = Boolean(fromMessage && hasStrongNarrativeDomainSignalInText(userMessage, fromMessage)); + const fromDomainGuard = inferP0NarrativeDomainFromDomainGuards(results); + if (fromHint && fromMessage && fromHint !== fromMessage) { + return strongFromMessage ? fromMessage : fromHint; + } if (fromHint) { return fromHint; } - const fromDomainGuard = inferP0NarrativeDomainFromDomainGuards(results); + if (fromDomainGuard && fromMessage && fromDomainGuard !== fromMessage) { + return strongFromMessage ? fromMessage : fromDomainGuard; + } if (fromDomainGuard) { return fromDomainGuard; } - const fromMessage = inferNarrativeDomainFromText(userMessage); + if (strongFromMessage) { + return fromMessage; + } if (fromMessage) { return fromMessage; } const semanticScopes = collectSemanticProfileScopes(results); - const messageAccounts = extractAccountNumbers([userMessage]); + const messageAccounts = extractAccountNumbersFromNarrativeText(userMessage); const hasExplicitP0AccountSignal = [...messageAccounts, ...semanticScopes.accounts].some( (item) => isSettlementAccountToken(item) || isVatAccountToken(item) || isCloseCostsAccountToken(item) ); @@ -2224,14 +2616,22 @@ function buildDirectAnswer(input: { mode: PolicyMode; retrievalResults: UnifiedRetrievalResult[]; policySignals: PolicySignals; + focusDomain: P0NarrativeDomain; }): string { const topFact = humanizeFactForDirectAnswer(firstMeaningfulFact(input.retrievalResults)); + const domainAnchor = domainNarrativeAnchor(input.focusDomain); + const topFactDomain = topFact ? inferNarrativeDomainFromText(topFact) : null; + const topFactAligned = Boolean(topFact) && (!input.focusDomain || topFactDomain === input.focusDomain); + const preferredFact = topFactAligned ? topFact : null; if (input.mode === "focused_grounded") { - return topFact ?? "Проблема подтверждена на текущей опоре и готова к точечной проверке."; + return preferredFact ?? domainAnchor ?? "Проблема подтверждена на текущей опоре и готова к точечной проверке."; } if (input.mode === "broad_partial") { - if (topFact) { - return `${topFact.replace(/[.!?]+$/u, "")}; подтверждение пока частичное.`; + if (preferredFact) { + return `${preferredFact.replace(/[.!?]+$/u, "")}; подтверждение пока частичное.`; + } + if (domainAnchor) { + return `${domainAnchor.replace(/[.!?]+$/u, "")}; подтверждение пока частичное.`; } return "Есть признаки проблемы, но опора частичная и вывод ограничен."; } @@ -2338,11 +2738,23 @@ function buildProblemCentricAnswerStructure(input: { 6 ); const evidenceIds = uniqueStrings(input.evidenceItems.map((item) => item.evidence_id), 10); + const aggregateEvidenceConfidence = aggregateConfidence(input.retrievalResults, input.evidenceItems); + const hasCriticalEvidenceLimitation = + input.limitationReasonCodes.includes("weak_source_mapping") || + input.limitationReasonCodes.includes("insufficient_detail"); + const confidenceLimited = + input.mode !== "focused_grounded" || + weakUnits || + input.domainLockMiss || + input.limitationReasonCodes.includes("missing_mechanism") || + input.limitationReasonCodes.includes("heuristic_inference") || + hasCriticalEvidenceLimitation || + aggregateEvidenceConfidence === "low"; const mechanismStatus: AnswerStructureV11["mechanism_block"]["status"] = unitMechanismNotes.length === 0 ? "unresolved" - : weakUnits || input.limitationReasonCodes.includes("missing_mechanism") + : confidenceLimited ? "limited" : "grounded"; @@ -2453,21 +2865,50 @@ function limitationReasonToUserText(code: EvidenceLimitationReasonCode): string function inferNarrativeDomainFromText(value: string): P0NarrativeDomain { const text = String(value ?? "").toLowerCase(); - const accountTokens = extractAccountNumbers([text]); - const hasSettlementLexicalSignal = /(оплат|долг|аванс|взаимозач|зачет|зачёт|поставщ|покупат|не\s+сход)/i.test(text); + const accountTokens = extractAccountNumbersFromNarrativeText(text); - if (accountTokens.some((token) => isSettlementAccountToken(token)) || hasSettlementLexicalSignal) { - return "settlements_60_62"; + let settlementScore = 0; + let vatScore = 0; + let monthCloseScore = 0; + + if (accountTokens.some((token) => isSettlementAccountToken(token))) { + settlementScore += 3; } - if (accountTokens.some((token) => isVatAccountToken(token)) || /(ндс|счет[-\s]?фактур|регистр|книг)/i.test(text)) { - return "vat_document_register_book"; + if (accountTokens.some((token) => isVatAccountToken(token))) { + vatScore += 3; + } + if (accountTokens.some((token) => isCloseCostsAccountToken(token))) { + monthCloseScore += 3; + } + + if (/(долг|аванс|взаимозач|зачет|зачёт|62\.01|62\.02|60\.01|60\.02|не\s+сход)/i.test(text)) { + settlementScore += 2; + } + if (/(ндс|vat|счет[-\s]?фактур|сч[её]т[-\s]?фактур|книг[аи]|регистр)/i.test(text)) { + vatScore += 3; } if ( - accountTokens.some((token) => isCloseCostsAccountToken(token)) || - /(закрыти[ея]\s+месяц|затрат|распределени|списан)/i.test(text) + /(закрыти[ея]\s+месяц|закрытие\s+счетов|регламентн|косвенн|затрат|распределени|рбп|амортиз|финансовых\s+результат|month\s*close|period\s*close|close\s+operation)/i.test( + text + ) ) { + monthCloseScore += 3; + } + + const maxScore = Math.max(settlementScore, vatScore, monthCloseScore); + if (maxScore <= 0) { + return null; + } + // Tie-break prioritizes explicit VAT and month-close lexical markers over broad settlement wording. + if (vatScore === maxScore) { + return "vat_document_register_book"; + } + if (monthCloseScore === maxScore) { return "month_close_costs_20_44"; } + if (settlementScore === maxScore) { + return "settlements_60_62"; + } return null; } @@ -2578,6 +3019,11 @@ function buildEvidenceSectionLines(structure: AnswerStructureV11): string[] { const claimLinks = Array.isArray(structure.evidence_block.claim_evidence_links) ? structure.evidence_block.claim_evidence_links.length : 0; + const reliabilityLimited = + structure.mechanism_block.status !== "grounded" || + structure.uncertainty_block.limitations.length > 0 || + structure.uncertainty_block.open_uncertainties.length > 0 || + structure.evidence_block.coverage_note === "coverage_partial_or_limited"; const lines: string[] = []; const coverageSplitLines = buildCoverageSplitLines(structure); @@ -2593,7 +3039,7 @@ function buildEvidenceSectionLines(structure: AnswerStructureV11): string[] { if (structure.evidence_block.coverage_note === "coverage_partial_or_limited") { lines.push("Опора частичная: часть требований покрыта не полностью."); } else if (evidenceCount > 0) { - lines.push("Опора достаточна для первичного вывода."); + lines.push(reliabilityLimited ? "Опора есть, но достаточна только для предварительного вывода." : "Опора достаточна для первичного вывода."); } if (lines.length === 0) { @@ -2678,6 +3124,8 @@ function humanizeLimitationToken(value: string): string | null { if (normalized === "missing_anchor:account") return "Счет или группа счетов не указаны."; if (normalized === "missing_anchor:document_or_object") return "Не указан документ или объект для трассировки."; if (normalized === "missing_anchor:counterparty") return "Не указан контрагент или договор."; + if (normalized === "primary_domain_evidence_not_confirmed") + return "Целевой механизм активного домена подтвержден частично; вывод ограничен."; if (normalized === "settlement_primary_evidence_not_confirmed") return "Опора по расчетному контуру не подтверждена: в приоритете были сигналы из смежных доменов."; if (normalized.includes("snapshot")) return "Вывод сделан по snapshot и может не включать часть цепочки."; @@ -2733,22 +3181,188 @@ function buildLimitationsSectionLines(structure: AnswerStructureV11): string[] { return ["Существенных ограничений в текущем срезе не выявлено."]; } -function renderPolicyReply(structure: AnswerStructureV11): string { +function domainNameForQuestionType(domain: P0NarrativeDomain): string { + if (domain === "settlements_60_62") return "\u0440\u0430\u0441\u0447\u0435\u0442\u043d\u043e\u0433\u043e \u043a\u043e\u043d\u0442\u0443\u0440\u0430"; + if (domain === "vat_document_register_book") return "\u0446\u0435\u043f\u043e\u0447\u043a\u0438 \u041d\u0414\u0421"; + if (domain === "month_close_costs_20_44") + return "\u043a\u043e\u043d\u0442\u0443\u0440\u0430 \u0437\u0430\u043a\u0440\u044b\u0442\u0438\u044f \u043c\u0435\u0441\u044f\u0446\u0430"; + return "\u0432\u044b\u0431\u0440\u0430\u043d\u043d\u043e\u0433\u043e \u0443\u0447\u0430\u0441\u0442\u043a\u0430"; +} + +function buildQuestionTypeShortLine(context: AnswerRenderContext): string | null { + const domainName = domainNameForQuestionType(context.focusDomain); + if (context.questionType === "where_break_is") { + return `\u041f\u0440\u0438\u043e\u0440\u0438\u0442\u0435\u0442 \u043e\u0442\u0432\u0435\u0442\u0430: \u043b\u043e\u043a\u0430\u043b\u0438\u0437\u043e\u0432\u0430\u0442\u044c \u0440\u0430\u0437\u0440\u044b\u0432 \u0432\u043d\u0443\u0442\u0440\u0438 ${domainName}.`; + } + if (context.questionType === "prove_or_guess") { + return "\u041f\u0440\u0438\u043e\u0440\u0438\u0442\u0435\u0442 \u043e\u0442\u0432\u0435\u0442\u0430: \u0440\u0430\u0437\u0432\u0435\u0441\u0442\u0438 \u0434\u043e\u043a\u0430\u0437\u0430\u043d\u043e \u0438 \u0433\u0438\u043f\u043e\u0442\u0435\u0437\u0443."; + } + if (context.questionType === "what_is_it_grounded_on") { + return "\u041f\u0440\u0438\u043e\u0440\u0438\u0442\u0435\u0442 \u043e\u0442\u0432\u0435\u0442\u0430: \u043f\u043e\u043a\u0430\u0437\u0430\u0442\u044c \u043e\u0441\u043d\u043e\u0432\u0430\u043d\u0438\u0435 \u0432\u044b\u0432\u043e\u0434\u0430 \u043f\u043e \u0434\u0430\u043d\u043d\u044b\u043c."; + } + if (context.questionType === "which_chains_are_complete_vs_incomplete") { + return "\u041f\u0440\u0438\u043e\u0440\u0438\u0442\u0435\u0442 \u043e\u0442\u0432\u0435\u0442\u0430: \u0440\u0430\u0437\u0434\u0435\u043b\u0438\u0442\u044c \u0437\u0430\u0432\u0435\u0440\u0448\u0435\u043d\u043d\u044b\u0435 \u0438 \u043d\u0435\u0437\u0430\u0432\u0435\u0440\u0448\u0435\u043d\u043d\u044b\u0435 \u0446\u0435\u043f\u043e\u0447\u043a\u0438."; + } + if (context.questionType === "what_to_check_first") { + return "\u041f\u0440\u0438\u043e\u0440\u0438\u0442\u0435\u0442 \u043e\u0442\u0432\u0435\u0442\u0430: \u0434\u0430\u0442\u044c \u043f\u0435\u0440\u0432\u044b\u0439 \u043c\u0430\u0440\u0448\u0440\u0443\u0442 \u043f\u0440\u043e\u0432\u0435\u0440\u043a\u0438."; + } + return null; +} + +function buildQuestionTypeBrokenLine(context: AnswerRenderContext): string | null { + if (context.questionType !== "where_break_is") { + return null; + } + if (context.focusDomain === "settlements_60_62") { + return "\u0412\u0435\u0440\u043e\u044f\u0442\u043d\u044b\u0439 \u0443\u0437\u0435\u043b \u0440\u0430\u0437\u0440\u044b\u0432\u0430: \u043f\u0440\u0438\u0432\u044f\u0437\u043a\u0430 \u043e\u043f\u043b\u0430\u0442\u044b \u043a \u043e\u0431\u044a\u0435\u043a\u0442\u0443 \u0440\u0430\u0441\u0447\u0435\u0442\u043e\u0432 \u0438 \u0434\u043e\u043a\u0443\u043c\u0435\u043d\u0442\u0443 \u0437\u0430\u043a\u0440\u044b\u0442\u0438\u044f."; + } + if (context.focusDomain === "vat_document_register_book") { + return "\u0412\u0435\u0440\u043e\u044f\u0442\u043d\u044b\u0439 \u0443\u0437\u0435\u043b \u0440\u0430\u0437\u0440\u044b\u0432\u0430: \u0441\u0432\u044f\u0437\u043a\u0430 \u0438\u0441\u0445\u043e\u0434\u043d\u043e\u0433\u043e \u0434\u043e\u043a\u0443\u043c\u0435\u043d\u0442\u0430, \u0441\u0447\u0435\u0442\u0430-\u0444\u0430\u043a\u0442\u0443\u0440\u044b \u0438 \u0437\u0430\u043f\u0438\u0441\u0438 \u043a\u043d\u0438\u0433\u0438."; + } + if (context.focusDomain === "month_close_costs_20_44") { + return "\u0412\u0435\u0440\u043e\u044f\u0442\u043d\u044b\u0439 \u0443\u0437\u0435\u043b \u0440\u0430\u0437\u0440\u044b\u0432\u0430: \u043f\u0435\u0440\u0435\u0445\u043e\u0434 \u043e\u0442 \u043d\u0430\u043a\u043e\u043f\u043b\u0435\u043d\u0438\u044f \u0437\u0430\u0442\u0440\u0430\u0442 \u043a \u0440\u0430\u0441\u043f\u0440\u0435\u0434\u0435\u043b\u0435\u043d\u0438\u044e/\u0437\u0430\u043a\u0440\u044b\u0442\u0438\u044e."; + } + return "\u0412\u0435\u0440\u043e\u044f\u0442\u043d\u044b\u0439 \u0443\u0437\u0435\u043b \u0440\u0430\u0437\u0440\u044b\u0432\u0430 \u043b\u043e\u043a\u0430\u043b\u0438\u0437\u043e\u0432\u0430\u043d \u0447\u0430\u0441\u0442\u0438\u0447\u043d\u043e; \u043d\u0443\u0436\u043d\u0430 \u0442\u043e\u0447\u0435\u0447\u043d\u0430\u044f \u0441\u0432\u0435\u0440\u043a\u0430."; +} + +function buildQuestionTypeWhyLine(context: AnswerRenderContext): string | null { + if (context.questionType === "prove_or_guess") { + return "\u0417\u0434\u0435\u0441\u044c \u0447\u0435\u0441\u0442\u043d\u043e \u0440\u0430\u0437\u0432\u043e\u0434\u0438\u0442\u0441\u044f \u0447\u0442\u043e \u0443\u0436\u0435 \u043f\u043e\u0434\u0442\u0432\u0435\u0440\u0436\u0434\u0435\u043d\u043e \u0438 \u0447\u0442\u043e \u043f\u043e\u043a\u0430 \u043e\u0441\u0442\u0430\u0435\u0442\u0441\u044f \u0433\u0438\u043f\u043e\u0442\u0435\u0437\u043e\u0439."; + } + if (context.questionType === "which_chains_are_complete_vs_incomplete") { + return "\u0426\u0435\u043f\u043e\u0447\u043a\u0438 \u0440\u0430\u0437\u0434\u0435\u043b\u0435\u043d\u044b \u043d\u0430 \u0437\u0430\u0432\u0435\u0440\u0448\u0435\u043d\u043d\u044b\u0435 \u0438 \u043d\u0435\u0437\u0430\u0432\u0435\u0440\u0448\u0435\u043d\u043d\u044b\u0435 \u043f\u043e \u0442\u0435\u043a\u0443\u0449\u0435\u0439 \u043e\u043f\u043e\u0440\u0435."; + } + return null; +} + +function buildQuestionTypeEvidenceLine(context: AnswerRenderContext): string | null { + if (context.questionType === "what_is_it_grounded_on") { + return "\u0412 \u044d\u0442\u043e\u043c \u043e\u0442\u0432\u0435\u0442\u0435 \u0432 \u043f\u0440\u0438\u043e\u0440\u0438\u0442\u0435\u0442\u0435 \u043f\u043e\u043a\u0430\u0437\u0430\u043d\u044b \u0438\u043c\u0435\u043d\u043d\u043e \u043e\u0441\u043d\u043e\u0432\u0430\u043d\u0438\u044f \u0432\u044b\u0432\u043e\u0434\u0430."; + } + if (context.questionType === "prove_or_guess") { + return "\u0421\u0438\u043b\u0430 \u0432\u044b\u0432\u043e\u0434\u0430 \u043e\u0446\u0435\u043d\u0435\u043d\u0430 \u043f\u043e \u043f\u0440\u044f\u043c\u043e\u0439 \u043e\u043f\u043e\u0440\u0435, \u0430 \u043d\u0435 \u043f\u043e \u0434\u043e\u0433\u0430\u0434\u043a\u0430\u043c."; + } + return null; +} + +function formatAnchorList(anchors: string[], prefix: string): string | null { + if (anchors.length === 0) { + return null; + } + return `${prefix}: ${anchors.join(", ")}.`; +} + +function buildQuestionTypeCheckLine(context: AnswerRenderContext): string | null { + if (context.questionType === "what_to_check_first") { + return "\u041d\u0430\u0447\u043d\u0438\u0442\u0435 \u0441 \u043f\u0435\u0440\u0432\u043e\u0433\u043e \u043f\u0443\u043d\u043a\u0442\u0430 \u0438 \u043f\u0440\u043e\u0439\u0434\u0438\u0442\u0435 \u043c\u0430\u0440\u0448\u0440\u0443\u0442 \u043f\u043e\u0441\u043b\u0435\u0434\u043e\u0432\u0430\u0442\u0435\u043b\u044c\u043d\u043e, \u0431\u0435\u0437 \u043f\u0435\u0440\u0435\u0441\u043a\u043e\u043a\u0430."; + } + return null; +} + +function buildQuestionTypeLimitationLine(context: AnswerRenderContext): string | null { + if (context.questionType === "prove_or_guess") { + return "\u0414\u043b\u044f \u0444\u043e\u0440\u043c\u0430\u0442\u0430 \u00ab\u0434\u043e\u043a\u0430\u0437\u0430\u043d\u043e \u0438\u043b\u0438 \u0433\u0438\u043f\u043e\u0442\u0435\u0437\u0430\u00bb \u0432\u0441\u0435 \u043d\u0435\u0434\u043e\u043a\u0430\u0437\u0430\u043d\u043d\u044b\u0435 \u0447\u0430\u0441\u0442\u0438 \u043e\u0442\u0434\u0435\u043b\u0435\u043d\u044b \u0432 \u043e\u0433\u0440\u0430\u043d\u0438\u0447\u0435\u043d\u0438\u044f."; + } + if (context.questionType === "which_chains_are_complete_vs_incomplete") { + return "\u0414\u0435\u043b\u0435\u043d\u0438\u0435 \u043d\u0430 \u00abcomplete/incomplete\u00bb \u0437\u0430\u0432\u0438\u0441\u0438\u0442 \u043e\u0442 \u043f\u043e\u043b\u043d\u043e\u0442\u044b \u0446\u0435\u043f\u043e\u0447\u043a\u0438 \u0432 \u0442\u0435\u043a\u0443\u0449\u0435\u043c \u0441\u0440\u0435\u0437\u0435."; + } + return null; +} + +function applyQuestionTypeAndAnchorPolicy(input: { + shortLine: string; + brokenLines: string[]; + whyLines: string[]; + evidenceLines: string[]; + checkLines: string[]; + limitationLines: string[]; + context: AnswerRenderContext; +}): { + shortLine: string; + brokenLines: string[]; + whyLines: string[]; + evidenceLines: string[]; + checkLines: string[]; + limitationLines: string[]; +} { + const nextShort = buildQuestionTypeShortLine(input.context) ?? input.shortLine; + const nextBroken = dedupeNarrativeLines( + [buildQuestionTypeBrokenLine(input.context), ...input.brokenLines].filter((item): item is string => Boolean(item)), + 4 + ); + const nextWhy = dedupeNarrativeLines( + [buildQuestionTypeWhyLine(input.context), ...input.whyLines].filter((item): item is string => Boolean(item)), + 4 + ); + const anchorUsedLine = formatAnchorList( + input.context.anchors.used, + "\u0412 \u043e\u043f\u043e\u0440\u0435 \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u043d\u044b \u044f\u043a\u043e\u0440\u044f \u0432\u043e\u043f\u0440\u043e\u0441\u0430" + ); + const anchorUnusedLine = formatAnchorList( + input.context.anchors.unused, + "\u042f\u043a\u043e\u0440\u044f \u0438\u0437 \u0432\u043e\u043f\u0440\u043e\u0441\u0430 \u0431\u0435\u0437 \u043f\u0440\u044f\u043c\u043e\u0433\u043e \u043f\u043e\u0434\u0442\u0432\u0435\u0440\u0436\u0434\u0435\u043d\u0438\u044f" + ); + const nextEvidence = dedupeNarrativeLines( + [buildQuestionTypeEvidenceLine(input.context), ...input.evidenceLines, anchorUsedLine].filter( + (item): item is string => Boolean(item) + ), + 7 + ); + const nextChecks = dedupeNarrativeLines( + [buildQuestionTypeCheckLine(input.context), ...input.checkLines].filter((item): item is string => Boolean(item)), + 5 + ); + const nextLimitations = dedupeNarrativeLines( + [buildQuestionTypeLimitationLine(input.context), anchorUnusedLine, ...input.limitationLines].filter( + (item): item is string => Boolean(item) + ), + 6 + ); + + return { + shortLine: ensureSentence(nextShort), + brokenLines: nextBroken, + whyLines: nextWhy, + evidenceLines: nextEvidence, + checkLines: nextChecks, + limitationLines: nextLimitations + }; +} + +function renderPolicyReply(structure: AnswerStructureV11, context?: AnswerRenderContext): string { const shortLine = ensureSentence(buildShortSectionLine(structure)); const brokenLines = buildBrokenSectionLines(structure); const whyLines = buildWhySectionLines(structure); const evidenceLines = buildEvidenceSectionLines(structure); const checkLines = buildChecksSectionLines(structure); const limitationLines = buildLimitationsSectionLines(structure); + const enriched = context + ? applyQuestionTypeAndAnchorPolicy({ + shortLine, + brokenLines, + whyLines, + evidenceLines, + checkLines, + limitationLines, + context + }) + : { + shortLine, + brokenLines, + whyLines, + evidenceLines, + checkLines, + limitationLines + }; return sanitizeUserFacingReply( [ - `Коротко: ${shortLine}`, - `Что сломано:\n${formatList(brokenLines)}`, - `Почему это похоже на проблему:\n${formatList(whyLines)}`, - `На чем это основано:\n${formatList(evidenceLines)}`, - `Что проверить первым:\n${formatList(checkLines)}`, - `Ограничения:\n${formatList(limitationLines)}` + `Коротко: ${enriched.shortLine}`, + `Что сломано:\n${formatList(enriched.brokenLines)}`, + `Почему это похоже на проблему:\n${formatList(enriched.whyLines)}`, + `На чем это основано:\n${formatList(enriched.evidenceLines)}`, + `Что проверить первым:\n${formatList(enriched.checkLines)}`, + `Ограничения:\n${formatList(enriched.limitationLines)}` ] .filter(Boolean) .join("\n\n") @@ -2757,6 +3371,8 @@ function renderPolicyReply(structure: AnswerStructureV11): string { function composeAssistantAnswerV11(input: ComposeAnswerInput): ComposeAnswerOutput { const fallbackType = fallbackFromSummary(input.routeSummary); + const questionType: QuestionTypeClass = input.questionTypeHint ?? "unknown"; + const anchorUsage = evaluateCompanyAnchorUsage(input.companyAnchors, input.retrievalResults); const okResults = input.retrievalResults.filter((item) => item.status === "ok"); const partialResults = input.retrievalResults.filter((item) => item.status === "partial"); const emptyResults = input.retrievalResults.filter((item) => item.status === "empty"); @@ -2786,15 +3402,8 @@ function composeAssistantAnswerV11(input: ComposeAnswerInput): ComposeAnswerOutp problemHeavyUnits, input.focusDomainHint ); - const settlementGrounding = focusNarrativeDomain === "settlements_60_62" - ? evaluateSettlementEvidenceGrounding(input.retrievalResults) - : { - has_settlement_primary: false, - has_foreign_primary: false, - foreign_primary_domains: [], - blocked: false - }; - const settlementGroundingBlocked = focusNarrativeDomain === "settlements_60_62" && settlementGrounding.blocked; + const focusDomainGrounding = evaluateP0DomainEvidenceGrounding(input.retrievalResults, focusNarrativeDomain); + const focusDomainGroundingBlocked = Boolean(focusNarrativeDomain && focusDomainGrounding.blocked); const rankedProblemUnits = rankProblemUnitsForAnswer(problemHeavyUnits, lifecycleAnswerEnabled, focusNarrativeDomain); const domainAlignedProblemUnits = focusNarrativeDomain === null @@ -2805,7 +3414,7 @@ function composeAssistantAnswerV11(input: ComposeAnswerInput): ComposeAnswerOutp rankedProblemUnits.length > 0 && domainAlignedProblemUnits.length === 0 ); - const domainLockMiss = domainLockMissBase || settlementGroundingBlocked; + const domainLockMiss = domainLockMissBase || focusDomainGroundingBlocked; const selectedProblemUnits = ( focusNarrativeDomain === null ? rankedProblemUnits : domainAlignedProblemUnits ).slice(0, 4); @@ -2853,7 +3462,7 @@ function composeAssistantAnswerV11(input: ComposeAnswerInput): ComposeAnswerOutp policySignals }); const guardedDecision: PolicyDecision = - settlementGroundingBlocked && + focusDomainGroundingBlocked && decision.mode !== "out_of_scope" && decision.mode !== "route_mismatch" && decision.mode !== "backend_error" @@ -2870,7 +3479,9 @@ function composeAssistantAnswerV11(input: ComposeAnswerInput): ComposeAnswerOutp policySignals.minimum_evidence_failed || limitationReasonCodes.includes("missing_mechanism") || limitationReasonCodes.includes("weak_source_mapping") || + limitationReasonCodes.includes("insufficient_detail") || aggregateEvidenceConfidence === "low" || + domainLockMiss || lowConfidenceConcentration; const hardBlockedMode = guardedDecision.mode === "out_of_scope" || @@ -2907,7 +3518,11 @@ function composeAssistantAnswerV11(input: ComposeAnswerInput): ComposeAnswerOutp const lifecycleModeActive = lifecycleAnswerEnabled && selectedProblemUnits.length > 0 && hasLifecycleResolution(selectedProblemUnits); return { - assistant_reply: renderPolicyReply(problemCentricStructure), + assistant_reply: renderPolicyReply(problemCentricStructure, { + questionType, + focusDomain: focusNarrativeDomain, + anchors: anchorUsage + }), fallback_type: guardedDecision.fallback_type, reply_type: guardedDecision.reply_type, answer_structure_v11: problemCentricStructure, @@ -2937,11 +3552,12 @@ function composeAssistantAnswerV11(input: ComposeAnswerInput): ComposeAnswerOutp ...limitationReasonCodes.map((code) => limitationReasonToText(code)), ...extractLimitations(input.retrievalResults), ...input.groundingCheck.reasons, - ...(settlementGroundingBlocked + ...(focusDomainGroundingBlocked + ? ["Целевой механизм активного домена подтвержден частично; часть первичной опоры пришла из смежного контура."] + : []), + ...(anchorUsage.unused.length > 0 ? [ - `Primary settlement evidence is not confirmed; foreign domains dominate: ${ - settlementGrounding.foreign_primary_domains.join(", ") || "unknown" - }.` + `Часть якорей запроса пока не подтверждена в опоре: ${anchorUsage.unused.slice(0, 5).join(", ")}.` ] : []), ...(policySignals.minimum_evidence_failed ? ["Minimum evidence gate failed for current scope."] : []), @@ -2958,15 +3574,24 @@ function composeAssistantAnswerV11(input: ComposeAnswerInput): ComposeAnswerOutp ...(guardedDecision.mode === "clarification_required" && missingAnchors.account ? ["missing_anchor:account"] : []), ...(guardedDecision.mode === "clarification_required" && missingAnchors.documentOrObject ? ["missing_anchor:document_or_object"] : []), ...(guardedDecision.mode === "clarification_required" && missingAnchors.counterparty ? ["missing_anchor:counterparty"] : []), - ...(settlementGroundingBlocked ? ["settlement_primary_evidence_not_confirmed"] : []) + ...(focusDomainGroundingBlocked ? ["primary_domain_evidence_not_confirmed"] : []) ], 8 ); + const confidenceLimited = + guardedDecision.mode !== "focused_grounded" || + limitationReasonCodes.includes("missing_mechanism") || + limitationReasonCodes.includes("heuristic_inference") || + limitationReasonCodes.includes("weak_source_mapping") || + limitationReasonCodes.includes("insufficient_detail") || + aggregateEvidenceConfidence === "low" || + focusDomainGroundingBlocked; + const mechanismStatus: AnswerStructureV11["mechanism_block"]["status"] = mechanismNotes.length === 0 ? "unresolved" - : limitationReasonCodes.includes("missing_mechanism") || limitationReasonCodes.includes("heuristic_inference") + : confidenceLimited ? "limited" : "grounded"; @@ -2976,7 +3601,8 @@ function composeAssistantAnswerV11(input: ComposeAnswerInput): ComposeAnswerOutp direct_answer: buildDirectAnswer({ mode: guardedDecision.mode, retrievalResults: input.retrievalResults, - policySignals + policySignals, + focusDomain: focusNarrativeDomain }), mechanism_block: { status: mechanismStatus, @@ -3011,7 +3637,11 @@ function composeAssistantAnswerV11(input: ComposeAnswerInput): ComposeAnswerOutp }; return { - assistant_reply: renderPolicyReply(answerStructure), + assistant_reply: renderPolicyReply(answerStructure, { + questionType, + focusDomain: focusNarrativeDomain, + anchors: anchorUsage + }), fallback_type: guardedDecision.fallback_type, reply_type: guardedDecision.reply_type, answer_structure_v11: answerStructure, diff --git a/llm_normalizer/backend/src/services/assistantDataLayer.ts b/llm_normalizer/backend/src/services/assistantDataLayer.ts index 06f5fa2..efe65ad 100644 --- a/llm_normalizer/backend/src/services/assistantDataLayer.ts +++ b/llm_normalizer/backend/src/services/assistantDataLayer.ts @@ -108,6 +108,11 @@ const ENTITY_SPECIFIC_MARKERS = /(?:\u043a\u043e\u043d\u0442\u0440\u0430\u0433\u0435\u043d\u0442|supplier|buyer|\u0434\u043e\u043a\u0443\u043c\u0435\u043d\u0442|invoice|posting|register|guid|id[:=\s])/iu; const EXACT_OBJECT_MARKERS = /(?:\u0434\u043e\u043a\u0443\u043c\u0435\u043d\u0442\s*(?:#|\u2116)|\bref\b|\bid\b|trx-\d+|inv-\d+)/iu; +const CONTRACT_MARKERS = + /(?:\u0434\u043e\u0433\u043e\u0432\u043e\u0440(?:\u0430|\u0443|\u043e\u043c|\u0435)?\s*(?:№|#|n)\s*[a-z\u0430-\u044f0-9./_-]+)/iu; +const DOCUMENT_NUMBER_MARKERS = + /(?:(?:\u0441\u0447(?:\u0435|\u0451)\u0442(?:-\u0444\u0430\u043a\u0442\u0443\u0440(?:\u0430|\u044b))?|\u0440\u0435\u0430\u043b\u0438\u0437\u0430\u0446(?:\u0438\u044f|\u0438\u0438)|\u0430\u043a\u0442)\s*(?:№|#|n)\s*[a-z\u0430-\u044f0-9./_-]+)/iu; +const AMOUNT_MARKERS = /\b(?:\d{1,3}(?:[ \u00A0]\d{3})+(?:[.,]\d{2})?|\d+[.,]\d{2})\b/u; const ROUTE_MIN_EVIDENCE_GATE: Record = { hybrid_store_plus_live: { @@ -186,6 +191,9 @@ function detectBroadQuery(fragmentText: string, route: string): BroadQueryAssess const hasEntityAnchor = ENTITY_SPECIFIC_MARKERS.test(lower); const hasExactObjectAnchor = EXACT_OBJECT_MARKERS.test(lower); const hasGuidAnchor = extractGuids(lower).length > 0; + const hasContractAnchor = CONTRACT_MARKERS.test(lower); + const hasDocumentNumberAnchor = DOCUMENT_NUMBER_MARKERS.test(lower); + const hasAmountAnchor = AMOUNT_MARKERS.test(lower); let anchorScore = 0; if (hasGuidAnchor) anchorScore += 3; @@ -193,9 +201,16 @@ function detectBroadQuery(fragmentText: string, route: string): BroadQueryAssess if (hasPeriodAnchor) anchorScore += 1; if (hasEntityAnchor) anchorScore += 1; if (hasExactObjectAnchor) anchorScore += 1; + if (hasContractAnchor) anchorScore += 2; + if (hasDocumentNumberAnchor) anchorScore += 2; + if (hasAmountAnchor) anchorScore += 1; const weakAnchors = anchorScore <= 1; - const strongFocus = hasGuidAnchor || (hasAccountAnchor && hasPeriodAnchor) || anchorScore >= 4; + const strongFocus = + hasGuidAnchor || + (hasAccountAnchor && hasPeriodAnchor) || + (hasContractAnchor && hasDocumentNumberAnchor) || + anchorScore >= 4; const routeSensitiveBroad = route === "batch_refresh_then_store" || route === "hybrid_store_plus_live"; let broadnessLevel: BroadnessLevel = "low"; @@ -376,9 +391,7 @@ const P0_DOMAIN_CARDS: P0DomainCard[] = [ /\u0441\u0447[её]т.?фактур/i, /\u043a\u043d\u0438\u0433[аи]\s+\u043f\u043e\u043a\u0443\u043f/i, /\u043a\u043d\u0438\u0433[аи]\s+\u043f\u0440\u043e\u0434\u0430\u0436/i, - /\u0432\u044b\u0447\u0435\u0442/i, - /\b19\b/, - /\b68\b/ + /\u0432\u044b\u0447\u0435\u0442/i ] }, { @@ -394,19 +407,20 @@ const P0_DOMAIN_CARDS: P0DomainCard[] = [ expected_edges: ["document_to_posting", "deferred_expense_to_writeoff", "contract_to_documents"], forbidden_cross_domain_leakage: ["vat", "taxes", "bank", "settlements", "suppliers", "customers", "fixed_assets"], symptom_markers: [ - /\b20\b/, - /\b21\b/, - /\b23\b/, - /\b25\b/, - /\b26\b/, - /\b28\b/, - /\b29\b/, - /\b44\b/, /period\s*close/i, - /\u0437\u0430\u043a\u0440\u044b\u0442/i, + /month\s*close/i, + /close\s+period/i, + /закрыт[а-яё]*\s+период/i, + /close\s+operation/i, + /allocation/i, + /закр/i, + /перио/i, + /\u0437\u0430\u043a\u0440\u044b\u0442(?:\u0438|\u0438\u0435|\u044b|)\s*(?:\u043c\u0435\u0441\u044f\u0446|\u0441\u0447\u0435\u0442)/i, + /\u0440\u0435\u0433\u043b\u0430\u043c\u0435\u043d\u0442/i, /\u0437\u0430\u0442\u0440\u0430\u0442/i, /\u0440\u0430\u0441\u043f\u0440\u0435\u0434\u0435\u043b/i, - /\u043e\u0441\u0442\u0430\u0442\u043a/i + /\u0440\u0431\u043f/i, + /\u0430\u043c\u043e\u0440\u0442\u0438\u0437/i ] } ]; @@ -1241,6 +1255,28 @@ function extractAccountScopeFromText(text: string): string[] { } } + const closePairPattern = /\b(?:20|21|23|25|26|28|29|44)\s*[-/]\s*(?:20|21|23|25|26|28|29|44)\b/g; + let closePairMatch: RegExpExecArray | null = null; + while ((closePairMatch = closePairPattern.exec(lower)) !== null) { + const pair = closePairMatch[0]; + const pairAccounts = pair.match(/\b\d{2}(?:\.\d{1,2})?\b/g) ?? []; + for (const account of pairAccounts) { + pushAccount(account); + } + } + + const suffixAnchorPattern = /\b(?:51|60|62|68|76|97)(?:\.\d{1,2})?(?:-(?:му|й|го|м|х))?\b/giu; + let suffixAnchorMatch: RegExpExecArray | null = null; + while ((suffixAnchorMatch = suffixAnchorPattern.exec(lower)) !== null) { + const token = suffixAnchorMatch[0]; + const start = suffixAnchorMatch.index; + const end = start + token.length; + if (intersectsSpan(start, end, dateSpans)) { + continue; + } + pushAccount(token); + } + const explicitPattern = /\b\d{2}(?:\.\d{1,2})?\b/g; let explicitMatch: RegExpExecArray | null = null; const settlementLexicalAnchor = /(оплат|расчет|расч[её]т|аванс|долг|постав|покуп|settlement|payment|supplier|customer)/i.test( @@ -1405,31 +1441,55 @@ function buildSemanticRetrievalProfile(fragmentText: string): SemanticRetrievalP pushMany(entityTypes, ["counterparty", "contract", "document", "posting"]); pushMany(relationPatterns, ["payment_to_settlement", "statement_to_document", "document_to_posting"]); } - if (/постав|постав|supplier|vendor|60\b/i.test(lower)) { + const hasSettlementAccountScope = accountScope.some((item) => item === "51" || item === "60" || item === "62" || item === "76"); + const hasVatAccountScope = accountScope.some((item) => item === "19" || item === "68"); + const hasFixedAssetAccountScope = accountScope.some((item) => item === "01" || item === "02" || item === "08"); + const hasDeferredExpenseAccountScope = accountScope.some((item) => item === "97"); + const hasMonthCloseCostsAccountScope = accountScope.some((item) => CLOSE_COST_ACCOUNTS.includes(item)); + const hasExplicitMonthCloseLexicalMarker = + /(?:закрыти[ея]\s+месяц|закрыт[а-яё]*\s+период|закрытие\s+счетов|регламентн|косвенн|затрат|распределени|рбп|амортиз|финансовых\s+результат|month\s*close|period\s*close|close\s+period|close\s+operation)/i.test( + lower + ) || + (/закр/i.test(lower) && /перио/i.test(lower)); + + if (/постав|постав|supplier|vendor/i.test(lower) || hasSettlementAccountScope) { pushMany(domainScope, ["suppliers", "settlements"]); pushMany(documentTypes, ["supplier_receipt", "settlement_document"]); pushMany(entityTypes, ["counterparty", "contract", "document", "posting"]); pushMany(relationPatterns, ["payment_to_settlement", "contract_to_documents"]); } - if (/покупат|покупат|customer|buyer|62\b/i.test(lower)) { + if (/покупат|покупат|customer|buyer/i.test(lower) || hasSettlementAccountScope) { pushMany(domainScope, ["customers", "settlements"]); pushMany(documentTypes, ["sales_document", "settlement_document"]); pushMany(entityTypes, ["counterparty", "contract", "document", "posting"]); pushMany(relationPatterns, ["payment_to_settlement", "contract_to_documents"]); } - if (/РЅРґСЃ|ндс|vat|РєРЅРёРіР° РїРѕРєСѓРїРѕРє|РєРЅРёРіР° продаж|счет.?фактур|книг[аи]\s+покуп|книг[аи]\s+продаж|сч[её]т.?фактур|19\b|68\b/i.test(lower)) { + if ( + /РЅРґСЃ|ндс|vat|РєРЅРёРіР° РїРѕРєСѓРїРѕРє|РєРЅРёРіР° продаж|счет.?фактур|книг[аи]\s+покуп|книг[аи]\s+продаж|сч[её]т.?фактур/i.test( + lower + ) || + hasVatAccountScope + ) { pushMany(domainScope, ["vat", "taxes"]); pushMany(documentTypes, ["invoice", "vat_document"]); pushMany(entityTypes, ["document", "tax_entry", "posting"]); pushMany(relationPatterns, ["invoice_to_vat", "document_to_posting"]); } - if (/РѕСЃ|РѕСЃРЅРѕРІРЅ(ые|ых)\s+сред|(?:^|[^a-zа-яё])ос(?:$|[^a-zа-яё])|основн(ые|ых|ым)?\s+средств|fixed asset|amort|амортиз|амортиз|01\b|02\b|08\b/i.test(lower)) { + if ( + /РѕСЃ|РѕСЃРЅРѕРІРЅ(ые|ых)\s+сред|(?:^|[^a-zа-яё])ос(?:$|[^a-zа-яё])|основн(ые|ых|ым)?\s+средств|fixed asset|amort|амортиз|амортиз/i.test( + lower + ) || + hasFixedAssetAccountScope + ) { pushMany(domainScope, ["fixed_assets"]); pushMany(documentTypes, ["fixed_asset_card", "fixed_asset_acceptance", "depreciation_document"]); pushMany(entityTypes, ["fixed_asset", "document", "posting"]); pushMany(relationPatterns, ["asset_card_to_depreciation", "document_to_posting"]); } - if (/СЂР±Рї|расходы будущих периодов|рбп|расходы\s+будущих\s+периодов|deferred|writeoff|97\b/i.test(lower)) { + if ( + /СЂР±Рї|расходы будущих периодов|рбп|расходы\s+будущих\s+периодов|deferred|writeoff/i.test(lower) || + hasDeferredExpenseAccountScope + ) { pushMany(domainScope, ["deferred_expense", "period_close"]); pushMany(documentTypes, ["deferred_expense_document", "period_close_document"]); pushMany(entityTypes, ["document", "posting"]); @@ -1452,7 +1512,7 @@ function buildSemanticRetrievalProfile(fragmentText: string): SemanticRetrievalP pushMany(anomalyPatterns, ["repeated_anomaly"]); pushMany(rankingBasis, ["repeatability"]); } - if (/закрыт|закрытие|период|закрыт|закрытие|период|month close|period close|closure/i.test(lower)) { + if (hasExplicitMonthCloseLexicalMarker || hasMonthCloseCostsAccountScope || hasDeferredExpenseAccountScope) { pushMany(domainScope, ["period_close"]); pushMany(anomalyPatterns, ["closure_risk", "broken_lifecycle"]); pushMany(documentTypes, ["period_close_document"]); diff --git a/llm_normalizer/backend/src/services/assistantService.ts b/llm_normalizer/backend/src/services/assistantService.ts index 050a62e..c3c9a22 100644 --- a/llm_normalizer/backend/src/services/assistantService.ts +++ b/llm_normalizer/backend/src/services/assistantService.ts @@ -8,6 +8,8 @@ import * as assistantDataLayer_1 from "./assistantDataLayer"; import * as assistantSessionLogger_1 from "./assistantSessionLogger"; import * as investigationState_1 from "./investigationState"; import * as retrievalResultNormalizer_1 from "./retrievalResultNormalizer"; +import * as questionTypeResolver_1 from "./questionTypeResolver"; +import * as companyAnchorResolver_1 from "./companyAnchorResolver"; function retrievalSummaryForRoute(route) { if (route === "store_canonical") return "Canonical accounting data path selected."; @@ -832,6 +834,26 @@ function extractFollowupAccountAnchorsLoose(text) { } return Array.from(new Set(anchors)); } +function inferP0DomainFromMessage(text) { + const lower = String(text ?? "").toLowerCase(); + const accountTokens = extractAccountTokens(lower); + const hasVatAccount = accountTokens.some((token) => /^(?:19|68)(?:\.|$)/.test(token)); + const hasSettlementAccount = accountTokens.some((token) => /^(?:51|60|62|76)(?:\.|$)/.test(token)); + const hasMonthCloseAccount = accountTokens.some((token) => /^(?:97|2\d|3\d|4[0-4])(?:\.|$)/.test(token)); + const vatLexical = /(?:ндс|vat|счет[\s-]?фактур|сч[её]т[\s-]?фактур|книг[аи]\s+(?:покуп|продаж)|налогов)/i.test(lower); + const settlementLexical = /(?:долг|аванс|зач[её]т|взаимозач|расч[её]т|оплат|платеж|платёж|постав|покупател)/i.test(lower); + const monthCloseLexical = /(?:закрыти[ея]\s+месяц|закрытие счетов|регламентн|косвенн|затрат|распределени|рбп|амортиз|финансовых результат)/i.test(lower); + if (hasVatAccount || vatLexical) { + return "vat_document_register_book"; + } + if (monthCloseLexical || hasMonthCloseAccount) { + return "month_close_costs_20_44"; + } + if (hasSettlementAccount || settlementLexical) { + return "settlements_60_62"; + } + return null; +} function hasStrongFollowupAnchors(userMessage, state) { const explicitPeriod = extractNormalizedPeriodLiteral(userMessage); if (explicitPeriod && state.focus.period && explicitPeriod !== state.focus.period) { @@ -840,6 +862,14 @@ function hasStrongFollowupAnchors(userMessage, state) { return true; } } + const inferredDomain = inferP0DomainFromMessage(userMessage); + const activeDomain = compactWhitespace(state.followup_context?.active_domain ?? state.focus.domain ?? ""); + if (inferredDomain && activeDomain && inferredDomain !== activeDomain) { + const domainLooksLikeFollowupRefinement = hasFollowupMarker(userMessage) && hasReferentialPointer(userMessage); + if (!domainLooksLikeFollowupRefinement) { + return true; + } + } const explicitAccounts = extractAccountTokens(userMessage); const followupAccounts = explicitAccounts.length > 0 ? explicitAccounts : extractFollowupAccountAnchorsLoose(userMessage); if (followupAccounts.length > 0) { @@ -1155,6 +1185,8 @@ export class AssistantService { const focusDomainHint = followupBinding.usage?.applied ? session.investigation_state?.followup_context?.active_domain ?? session.investigation_state?.focus.domain ?? null : null; + const questionTypeClass = (0, questionTypeResolver_1.resolveQuestionType)(userMessage); + const companyAnchors = (0, companyAnchorResolver_1.resolveCompanyAnchors)(userMessage); const composition = (0, answerComposer_1.composeAssistantAnswer)({ userMessage, routeSummary: normalized.route_hint_summary, @@ -1163,6 +1195,8 @@ export class AssistantService { coverageReport: coverageEvaluation.coverage, groundingCheck, focusDomainHint, + questionTypeHint: questionTypeClass, + companyAnchors, enableAnswerPolicyV11: config_1.FEATURE_ASSISTANT_ANSWER_POLICY_V11, enableProblemCentricAnswerV1: config_1.FEATURE_ASSISTANT_PROBLEM_CENTRIC_ANSWER_V1, enableLifecycleAnswerV1: config_1.FEATURE_ASSISTANT_LIFECYCLE_ANSWER_V1 @@ -1213,6 +1247,8 @@ export class AssistantService { retrieval_results: retrievalResults, answer_grounding_check: groundingCheck, dropped_intent_segments: extractDiscardedIntentSegments(normalized.normalized), + question_type_class: questionTypeClass, + company_anchors: companyAnchors, ...(followupBinding.usage ? { followup_state_usage: followupBinding.usage } : {}), problem_centric_answer_applied: composition.problem_centric_answer_applied ?? false, problem_units_used_count: composition.problem_units_used_count ?? 0, @@ -1276,6 +1312,8 @@ export class AssistantService { route_subject_match: groundingCheck.route_subject_match, clarification_target: coverageEvaluation.coverage.clarification_needed_for, dropped_intent_segments: extractDiscardedIntentSegments(normalized.normalized), + question_type_class: questionTypeClass, + company_anchors: companyAnchors, ...(followupBinding.usage ? { followup_state_usage: followupBinding.usage } : {}), problem_centric_answer_applied: composition.problem_centric_answer_applied ?? false, problem_units_used_count: composition.problem_units_used_count ?? 0, diff --git a/llm_normalizer/backend/src/services/companyAnchorResolver.ts b/llm_normalizer/backend/src/services/companyAnchorResolver.ts new file mode 100644 index 0000000..cbe88d3 --- /dev/null +++ b/llm_normalizer/backend/src/services/companyAnchorResolver.ts @@ -0,0 +1,181 @@ +export interface CompanyAnchorSet { + contract_numbers: string[]; + document_numbers: string[]; + dates: string[]; + amounts: string[]; + accounts: string[]; + periods: string[]; + document_types: string[]; + all: string[]; +} + +const CONTRACT_PATTERN = + /(?:\u0434\u043e\u0433\u043e\u0432\u043e\u0440(?:\u0430|\u0443|ом|е)?\s*(?:№|#|n)?\s*([a-zа-я0-9./_-]+))/giu; +const DOCUMENT_NUMBER_PATTERN = + /(?:(?:\u0441\u0447(?:\u0435|\u0451)\u0442(?:-\u0444\u0430\u043a\u0442\u0443\u0440(?:а|ы))?|\u0440\u0435\u0430\u043b\u0438\u0437\u0430\u0446(?:ия|ии)|\u0430\u043a\u0442)\s*(?:№|#|n)\s*([a-zа-я0-9./_-]+))/giu; +const DATE_PATTERN = + /\b(?:\d{1,2}[./]\d{1,2}[./]\d{2,4}|\d{1,2}\s+(?:\u044f\u043d\u0432\u0430\u0440\u044f|\u0444\u0435\u0432\u0440\u0430\u043b\u044f|\u043c\u0430\u0440\u0442\u0430|\u0430\u043f\u0440\u0435\u043b\u044f|\u043c\u0430\u044f|\u0438\u044e\u043d\u044f|\u0438\u044e\u043b\u044f|\u0430\u0432\u0433\u0443\u0441\u0442\u0430|\u0441\u0435\u043d\u0442\u044f\u0431\u0440\u044f|\u043e\u043a\u0442\u044f\u0431\u0440\u044f|\u043d\u043e\u044f\u0431\u0440\u044f|\u0434\u0435\u043a\u0430\u0431\u0440\u044f))\b/giu; +const AMOUNT_PATTERN = + /\b(?:\d{1,3}(?:[ \u00A0]\d{3})+(?:[.,]\d{2})?|\d+[.,]\d{2})\b/gu; +const CONTEXTUAL_ACCOUNT_PATTERN = + /(?:\b(?:\u0441\u0447(?:\u0435|\u0451)\u0442(?:а|у|ом|ов)?|account|schet)\b\s*(?:№|#|:)?\s*)(\d{2}(?:\.\d{2})?)/giu; +const ACCOUNT_PAIR_PATTERN = /\b(\d{2}\.\d{2})\s*\/\s*(\d{2}\.\d{2})\b/gu; +const PERIOD_PATTERN = + /\b(?:20\d{2}(?:[-./](?:0?[1-9]|1[0-2]))?|(?:\u0438\u044e\u043b\u044c|\u0438\u044e\u043d\u044c|\u0430\u0432\u0433\u0443\u0441\u0442|\u0441\u0435\u043d\u0442\u044f\u0431\u0440\u044c|\u043e\u043a\u0442\u044f\u0431\u0440\u044c|\u043d\u043e\u044f\u0431\u0440\u044c|\u0434\u0435\u043a\u0430\u0431\u0440\u044c|\u044f\u043d\u0432\u0430\u0440\u044c|\u0444\u0435\u0432\u0440\u0430\u043b\u044c|\u043c\u0430\u0440\u0442|\u0430\u043f\u0440\u0435\u043b\u044c|\u043c\u0430\u0439)\s+20\d{2})\b/giu; + +const DOCUMENT_TYPE_PATTERNS: Array<{ name: string; pattern: RegExp }> = [ + { name: "invoice", pattern: /\b(?:\u0441\u0447(?:\u0435|\u0451)\u0442-\u0444\u0430\u043a\u0442\u0443\u0440|invoice)\b/iu }, + { name: "realization", pattern: /\b(?:\u0440\u0435\u0430\u043b\u0438\u0437\u0430\u0446|realization)\b/iu }, + { name: "payment", pattern: /\b(?:\u043e\u043f\u043b\u0430\u0442|payment|\u043f\u043b\u0430\u0442\u0435\u0436)\b/iu }, + { name: "receipt", pattern: /\b(?:\u043f\u043e\u0441\u0442\u0443\u043f\u043b\u0435\u043d|receipt)\b/iu }, + { name: "close", pattern: /\b(?:\u0437\u0430\u043a\u0440\u044b\u0442\u0438|\u0440\u0435\u0433\u043b\u0430\u043c\u0435\u043d\u0442)\b/iu }, + { name: "rbp_writeoff", pattern: /\b(?:\u0440\u0431\u043f|\u0441\u043f\u0438\u0441\u0430\u043d\u0438\u0435)\b/iu }, + { name: "amortization", pattern: /\b(?:\u0430\u043c\u043e\u0440\u0442\u0438\u0437|amortization)\b/iu } +]; + +const KNOWN_ACCOUNT_PREFIXES = new Set([ + "01", + "02", + "07", + "08", + "10", + "13", + "19", + "20", + "21", + "23", + "25", + "26", + "41", + "43", + "44", + "45", + "50", + "51", + "52", + "55", + "57", + "58", + "60", + "62", + "66", + "67", + "68", + "69", + "70", + "71", + "73", + "76", + "90", + "91", + "94", + "96", + "97" +]); + +function uniqueStrings(values: string[], limit = 48): string[] { + return Array.from(new Set(values.map((item) => String(item ?? "").trim()).filter(Boolean))).slice(0, limit); +} + +function normalizeAnchorToken(value: string): string { + return String(value ?? "") + .replace(/\s+/g, " ") + .trim(); +} + +function collectMatches(text: string, pattern: RegExp, useCaptures = true): string[] { + const values: string[] = []; + pattern.lastIndex = 0; + for (const match of text.matchAll(pattern)) { + if (!match) continue; + if (useCaptures && match.length > 1) { + for (let i = 1; i < match.length; i += 1) { + const token = normalizeAnchorToken(match[i] ?? ""); + if (token) values.push(token); + } + continue; + } + const token = normalizeAnchorToken(match[0] ?? ""); + if (token) values.push(token); + } + return uniqueStrings(values); +} + +function isKnownAccount(value: string): boolean { + const token = String(value ?? "").trim(); + const match = token.match(/^(\d{2})/); + if (!match) { + return false; + } + return KNOWN_ACCOUNT_PREFIXES.has(match[1]); +} + +function collectAccountAnchors(text: string): string[] { + const tokens = new Set(); + for (const token of collectMatches(text, CONTEXTUAL_ACCOUNT_PATTERN, true)) { + if (isKnownAccount(token)) { + tokens.add(token); + } + } + ACCOUNT_PAIR_PATTERN.lastIndex = 0; + for (const match of text.matchAll(ACCOUNT_PAIR_PATTERN)) { + const left = normalizeAnchorToken(match[1] ?? ""); + const right = normalizeAnchorToken(match[2] ?? ""); + if (left && isKnownAccount(left)) { + tokens.add(left); + } + if (right && isKnownAccount(right)) { + tokens.add(right); + } + } + return Array.from(tokens).slice(0, 24); +} + +function collectDocumentTypeAnchors(text: string): string[] { + return uniqueStrings( + DOCUMENT_TYPE_PATTERNS.filter((entry) => entry.pattern.test(text)).map((entry) => entry.name), + 12 + ); +} + +function flattenAnchors(input: Omit): string[] { + return uniqueStrings( + [ + ...input.contract_numbers, + ...input.document_numbers, + ...input.dates, + ...input.amounts, + ...input.accounts.map((item) => `account:${item}`), + ...input.periods.map((item) => `period:${item}`), + ...input.document_types.map((item) => `doc_type:${item}`) + ], + 64 + ); +} + +export function resolveCompanyAnchors(input: string): CompanyAnchorSet { + const text = String(input ?? ""); + + const contractNumbers = collectMatches(text, CONTRACT_PATTERN, true).map((item) => `\u0434\u043e\u0433\u043e\u0432\u043e\u0440 № ${item}`); + const documentNumbers = collectMatches(text, DOCUMENT_NUMBER_PATTERN, true).map((item) => `\u0434\u043e\u043a\u0443\u043c\u0435\u043d\u0442 № ${item}`); + const dates = collectMatches(text, DATE_PATTERN, false); + const amounts = collectMatches(text, AMOUNT_PATTERN, false); + const accounts = collectAccountAnchors(text); + const periods = collectMatches(text, PERIOD_PATTERN, false); + const documentTypes = collectDocumentTypeAnchors(text); + + const resultBase: Omit = { + contract_numbers: uniqueStrings(contractNumbers, 12), + document_numbers: uniqueStrings(documentNumbers, 16), + dates: uniqueStrings(dates, 16), + amounts: uniqueStrings(amounts, 16), + accounts: uniqueStrings(accounts, 24), + periods: uniqueStrings(periods, 12), + document_types: documentTypes + }; + + return { + ...resultBase, + all: flattenAnchors(resultBase) + }; +} diff --git a/llm_normalizer/backend/src/services/lifecycleRuntime.ts b/llm_normalizer/backend/src/services/lifecycleRuntime.ts index 01c56a2..f02a518 100644 --- a/llm_normalizer/backend/src/services/lifecycleRuntime.ts +++ b/llm_normalizer/backend/src/services/lifecycleRuntime.ts @@ -627,8 +627,14 @@ function inferLifecycleDomain(input: LifecycleResolverInput): LifecycleDomain { .join(" ") .toLowerCase(); + const hasExplicitVatHint = includesAny(unitTokens, [/domain_hint:vat_flow/]); + const hasExplicitDeferredHint = includesAny(unitTokens, [/domain_hint:deferred_expense/]); + const hasExplicitFixedAssetHint = includesAny(unitTokens, [/domain_hint:fixed_asset/]); + const hasExplicitPeriodCloseHint = includesAny(unitTokens, [/domain_hint:period_close/]); + const hasCustomerSettlementHint = includesAny(unitTokens, [/domain_hint:customer_settlement/]); + const hasBankSettlementHint = includesAny(unitTokens, [/domain_hint:bank_settlement/]); + const hasVatMarkers = includesAny(unitTokens, [ - /domain_hint:vat_flow/, /\binvoice_to_vat\b/, /\bvat_chain_conflict\b/, /(^|[^a-z0-9])nds([^a-z0-9]|$)/, @@ -637,7 +643,6 @@ function inferLifecycleDomain(input: LifecycleResolverInput): LifecycleDomain { /\baccount[_:\s-]?(19|68)\b/ ]); const hasDeferredMarkers = includesAny(unitTokens, [ - /domain_hint:deferred_expense/, /\bdeferred(?:_expense)?\b/, /\bdeferred_expense_to_writeoff\b/, /\bwriteoff\b/, @@ -646,7 +651,6 @@ function inferLifecycleDomain(input: LifecycleResolverInput): LifecycleDomain { /\baccount[_:\s-]?97\b/ ]); const hasFixedAssetMarkers = includesAny(unitTokens, [ - /domain_hint:fixed_asset/, /\bfixed[_\s-]?asset(?:s)?\b/, /\basset_card_to_depreciation\b/, /\bdepreciation(?:_active)?\b/, @@ -655,7 +659,6 @@ function inferLifecycleDomain(input: LifecycleResolverInput): LifecycleDomain { /\baccount[_:\s-]?(01|02|08)\b/ ]); const hasPeriodCloseMarkers = includesAny(unitTokens, [ - /domain_hint:period_close/, /\bperiod[_\s-]?close\b/, /\bperiod_close_risk\b/, /\bclose[_\s-]?risk\b/, @@ -665,6 +668,25 @@ function inferLifecycleDomain(input: LifecycleResolverInput): LifecycleDomain { /\bperiod_risk\b/ ]); + if (hasExplicitDeferredHint) { + return "deferred_expense"; + } + if (hasExplicitFixedAssetHint) { + return "fixed_asset"; + } + if (hasExplicitVatHint) { + return "vat_flow"; + } + if (hasExplicitPeriodCloseHint) { + return "period_close"; + } + if (hasCustomerSettlementHint) { + return "customer_settlement"; + } + if (hasBankSettlementHint) { + return "bank_settlement"; + } + if (hasDeferredMarkers) { return "deferred_expense"; } diff --git a/llm_normalizer/backend/src/services/problemUnitAssembler.ts b/llm_normalizer/backend/src/services/problemUnitAssembler.ts index b90b158..3e5c4cd 100644 --- a/llm_normalizer/backend/src/services/problemUnitAssembler.ts +++ b/llm_normalizer/backend/src/services/problemUnitAssembler.ts @@ -106,14 +106,67 @@ function stringArrayFromPayload(item: EvidenceItem, key: string): string[] { return stringArrayFromUnknown(item.payload[key]); } +function domainHintsFromSummary(summary: Record): string[] { + const hints: string[] = []; + const purityGuard = toObject(summary.domain_purity_guard); + const domainCardId = String(purityGuard?.domain_card_id ?? "").trim(); + if (domainCardId === "settlements_60_62") { + return ["bank_settlement", "customer_settlement"]; + } + if (domainCardId === "vat_document_register_book") { + return ["vat_flow"]; + } + if (domainCardId === "month_close_costs_20_44") { + return ["period_close"]; + } + + const semanticProfile = toObject(summary.semantic_profile); + const domainScope = stringArrayFromUnknown(semanticProfile?.domain_scope); + for (const domain of domainScope) { + const normalized = domain.toLowerCase(); + if ( + normalized === "bank" || + normalized === "settlements" || + normalized === "suppliers" || + normalized === "supplier_payments" || + normalized === "other_settlements" + ) { + hints.push("bank_settlement"); + continue; + } + if (normalized === "customers") { + hints.push("customer_settlement"); + continue; + } + if (normalized === "vat" || normalized === "taxes") { + hints.push("vat_flow"); + continue; + } + if (normalized === "period_close") { + hints.push("period_close"); + continue; + } + if (normalized === "deferred_expense") { + hints.push("deferred_expense"); + continue; + } + if (normalized === "fixed_assets") { + hints.push("fixed_asset"); + } + } + + return uniqueStrings(hints); +} + function extractSemanticProfile(summary: Record): { relation_patterns: string[]; anomaly_patterns: string[]; } { const semanticProfile = toObject(summary.semantic_profile); + const domainHints = domainHintsFromSummary(summary).map((item) => `domain_hint:${item}`); return { - relation_patterns: stringArrayFromUnknown(semanticProfile?.relation_patterns), - anomaly_patterns: stringArrayFromUnknown(semanticProfile?.anomaly_patterns) + relation_patterns: uniqueStrings([...stringArrayFromUnknown(semanticProfile?.relation_patterns), ...domainHints]), + anomaly_patterns: uniqueStrings([...stringArrayFromUnknown(semanticProfile?.anomaly_patterns), ...domainHints]) }; } diff --git a/llm_normalizer/backend/src/services/questionTypeResolver.ts b/llm_normalizer/backend/src/services/questionTypeResolver.ts new file mode 100644 index 0000000..dec3b58 --- /dev/null +++ b/llm_normalizer/backend/src/services/questionTypeResolver.ts @@ -0,0 +1,60 @@ +export type QuestionTypeClass = + | "why_breaks" + | "where_break_is" + | "prove_or_guess" + | "what_is_it_grounded_on" + | "which_chains_are_complete_vs_incomplete" + | "what_to_check_first" + | "unknown"; + +const QUESTION_TYPE_RULES: Array<{ type: QuestionTypeClass; pattern: RegExp }> = [ + { + type: "what_to_check_first", + pattern: + /(?:\bwhat\s+to\s+check\s+first\b|\bfirst\s+check\b|\bcheck\s+first\b|\u0441\s+\u0447\u0435\u0433\u043e\s+\u043d\u0430\u0447\u0430\u0442\u044c\s+\u043f\u0440\u043e\u0432\u0435\u0440\u043a|\u0447\u0442\u043e\s+\u043f\u0440\u043e\u0432\u0435\u0440\u0438\u0442\u044c\s+\u043f\u0435\u0440\u0432)/iu + }, + { + type: "what_is_it_grounded_on", + pattern: + /(?:\bwhat\s+is\s+it\s+grounded\s+on\b|\bgrounded\s+on\b|\bbased\s+on\b|\bwhat\s+evidence\b|\u043d\u0430\s+\u0447(?:\u0435|\u0451)\u043c\s+\u044d\u0442\u043e\s+\u043e\u0441\u043d\u043e\u0432\u0430\u043d|\u0447\u0435\u043c\s+\u043f\u043e\u0434\u0442\u0432\u0435\u0440\u0436\u0434)/iu + }, + { + type: "prove_or_guess", + pattern: + /(?:\bprove\b|\bguess\b|\bprove\s+or\s+guess\b|\bis\s+it\s+proven\b|\u044d\u0442\u043e\s+\u0434\u043e\u043a\u0430\u0437\u0430\u043d|\u0438\u043b\u0438\s+\u0442\u043e\u043b\u044c\u043a\u043e\s+\u0433\u0438\u043f\u043e\u0442\u0435\u0437|\u0434\u043e\u043a\u0430\u0437\u0430\u043d|\u0434\u043e\u0433\u0430\u0434|\u0435\u0441\u0442\u044c\s+\u043b\u0438|\u043c\u043e\u0436\u0435\u0442\s+\u043b\u0438|\u044d\u0442\u043e\s+\u0443\u0436\u0435.*\u0438\u043b\u0438)/iu + }, + { + type: "which_chains_are_complete_vs_incomplete", + pattern: + /(?:\bcomplete(?:d)?\b.*\bincomplete\b|\bwhich\s+chains?\b|\bcomplete\s+vs\s+incomplete\b|\u043a\u0430\u043a\u0438\u0435\s+\u0446\u0435\u043f\u043e\u0447\u043a[аи]\s+.*\u0437\u0430\u0432\u0435\u0440\u0448|\u0447\u0442\u043e\s+\u0437\u0430\u043a\u0440\u044b\u0442\u043e.*\u0447\u0442\u043e\s+\u043d\u0435\u0442)/iu + }, + { + type: "where_break_is", + pattern: + /(?:\bwhere\s+is\s+the\s+break\b|\bwhere\s+exactly\b|\blocate\b|\u0433\u0434\u0435\s+\u0438\u043c\u0435\u043d\u043d\u043e|\u0433\u0434\u0435\s+\u0440\u0430\u0437\u0440\u044b\u0432|\u0432\s+\u043a\u0430\u043a\u043e\u043c\s+\u043c\u0435\u0441\u0442\u0435)/iu + }, + { + type: "why_breaks", + pattern: + /(?:\bwhy\b|\bwhy\s+does\s+it\s+break\b|\u043f\u043e\u0447\u0435\u043c\u0443|\u0432\s+\u0447(?:\u0435|\u0451)\u043c\s+\u043f\u0440\u0438\u0447\u0438\u043d\u0430|\u0438\u0437-\u0437\u0430\s+\u0447\u0435\u0433\u043e)/iu + } +]; + +export function resolveQuestionType(input: string): QuestionTypeClass { + const text = String(input ?? "").trim(); + if (!text) { + return "unknown"; + } + + for (const rule of QUESTION_TYPE_RULES) { + if (rule.pattern.test(text)) { + return rule.type; + } + } + + if (/[??]/u.test(text)) { + return "why_breaks"; + } + + return "unknown"; +} diff --git a/llm_normalizer/backend/tests/assistantWave12VatMonthCloseConsistencyRegression.test.ts b/llm_normalizer/backend/tests/assistantWave12VatMonthCloseConsistencyRegression.test.ts new file mode 100644 index 0000000..db19104 --- /dev/null +++ b/llm_normalizer/backend/tests/assistantWave12VatMonthCloseConsistencyRegression.test.ts @@ -0,0 +1,397 @@ +import { describe, expect, it } from "vitest"; +import { composeAssistantAnswer } from "../src/services/answerComposer"; +import type { AnswerGroundingCheck, RequirementCoverageReport, UnifiedRetrievalResult } from "../src/types/assistant"; +import type { ProblemUnit } from "../src/types/stage2ProblemUnits"; + +function buildRouteSummary() { + return { + mode: "deterministic_v2" as const, + message_in_scope: true, + scope_confidence: "high" as const, + planner: { + total_fragments: 1, + in_scope_fragments: 1, + out_of_scope_fragments: 0, + discarded_fragments: 0, + contains_multiple_tasks: false + }, + decisions: [], + fallback: { + type: "none" as const, + message: null + } + }; +} + +function buildCoverage(input?: Partial): RequirementCoverageReport { + return { + requirements_total: 1, + requirements_covered: 1, + requirements_uncovered: [], + requirements_partially_covered: [], + clarification_needed_for: [], + out_of_scope_requirements: [], + ...input + }; +} + +function buildGrounding(input?: Partial): AnswerGroundingCheck { + return { + status: "grounded", + route_subject_match: true, + missing_requirements: [], + reasons: [], + why_included_summary: ["wave12-test"], + selection_reason_summary: ["wave12-test"], + ...input + }; +} + +function buildProblemUnit(input: { + id: string; + type: ProblemUnit["problem_unit_type"]; + account: string; + defect: string; + lifecycleDomain?: ProblemUnit["lifecycle_domain"]; +}): ProblemUnit { + return { + schema_version: "problem_unit_v0_1", + problem_unit_id: input.id, + problem_unit_type: input.type, + title: "Wave12 problem unit", + mechanism_summary: `Mechanism candidate: ${input.defect}.`, + business_defect_class: input.defect, + severity: { + score: 0.72, + grade: "high" + }, + confidence: { + score: 0.58, + grade: "medium" + }, + affected_entities: ["Document:DOC-1"], + affected_documents: ["Document:DOC-1"], + affected_postings: ["Posting:POST-1"], + affected_accounts: [input.account], + affected_counterparties: ["Counterparty:CP-1"], + affected_contracts: ["Contract:CTR-1"], + failed_expected_edge: input.defect, + period_impact: { + is_period_sensitive: true, + impact_class: "close_risk" + }, + evidence_pack: ["cand-1"], + entity_backlinks: [{ entity: "Document", id: "DOC-1" }], + snapshot_limitations: [], + ...(input.lifecycleDomain + ? { + lifecycle_domain: input.lifecycleDomain + } + : {}) + }; +} + +function buildRetrieval(input: { + requirementId: string; + status: UnifiedRetrievalResult["status"]; + units?: ProblemUnit[]; + accountScope?: string[]; + domainScope?: string[]; + relationPatterns?: string[]; + limitations?: string[]; + confidence?: UnifiedRetrievalResult["confidence"]; + withEvidence?: boolean; +}): UnifiedRetrievalResult { + const units = input.units ?? []; + const withEvidence = input.withEvidence ?? input.status !== "empty"; + return { + fragment_id: `F-${input.requirementId}`, + requirement_ids: [input.requirementId], + route: "hybrid_store_plus_live", + status: input.status, + result_type: "chain", + items: + input.status === "empty" + ? [] + : [ + { + source_entity: "Document", + source_id: "DOC-1", + account_context: input.accountScope ?? ["60"], + graph_domain_scope: input.domainScope ?? ["bank_settlement"], + relation_pattern_hits: input.relationPatterns ?? ["payment_to_settlement"] + } + ], + summary: { + broad_query_detected: false, + broad_result_flag: false, + minimum_evidence_failed: false, + degraded_to: null, + narrowing_strength: "strong", + semantic_profile: { + account_scope: input.accountScope ?? ["60", "62"], + domain_scope: input.domainScope ?? ["bank_settlement", "customer_settlement"], + relation_patterns: input.relationPatterns ?? ["payment_to_settlement"] + } + }, + evidence: + input.status === "empty" || !withEvidence + ? [] + : [ + { + evidence_id: `ev-${input.requirementId}`, + claim_ref: `requirement:${input.requirementId}`, + source_type: "retrieval_item", + source_ref: { + schema_version: "evidence_source_ref_v1", + namespace: "snapshot_2020", + entity: "document", + id: "DOC-1", + period: "2020-07", + canonical_ref: "evidence_source_ref_v1|snapshot_2020|document|doc-1|2020-07" + }, + pointer: { + fragment_id: `F-${input.requirementId}`, + route: "hybrid_store_plus_live", + source: { + namespace: "snapshot_2020", + entity: "document", + id: "DOC-1", + period: "2020-07" + }, + locator: { + field_path: "risk_score", + item_index: 0 + } + }, + evidence_kind: "mechanism_link", + mechanism_note: (input.relationPatterns ?? ["payment_to_settlement"])[0], + confidence: input.status === "ok" ? "medium" : "low", + limitation: null, + payload: { + risk_score: 4 + } + } + ], + candidate_evidence: [], + problem_units: units, + problem_unit_summary: + units.length > 0 + ? { + schema_version: "problem_unit_summary_v0_1", + units_total: units.length, + duplicate_collapses: 0, + unit_types: units.map((unit) => unit.problem_unit_type), + type_distribution: { + [units[0]?.problem_unit_type ?? "broken_chain_segment"]: units.length + }, + severity_distribution: { + low: 0, + medium: 0, + high: units.length + }, + confidence_distribution: { + low: 0, + medium: units.length, + high: 0 + }, + primary_unit_type: units[0]?.problem_unit_type ?? null + } + : null, + why_included: ["wave12-test"], + selection_reason: ["wave12-test"], + risk_factors: ["wave12"], + business_interpretation: ["wave12"], + confidence: input.confidence ?? (input.status === "ok" ? "medium" : "low"), + limitations: input.limitations ?? [], + errors: [] + }; +} + +function composeCase(input: { + userMessage: string; + focusDomainHint: string | null; + retrievalResults: UnifiedRetrievalResult[]; + coverage?: Partial; + grounding?: Partial; +}) { + return composeAssistantAnswer({ + userMessage: input.userMessage, + routeSummary: buildRouteSummary(), + retrievalResults: input.retrievalResults, + requirements: [ + { + requirement_id: "R1", + source_fragment_id: "F-R1", + requirement_text: "Wave 12 requirement", + subject_tokens: [], + status: "covered", + route: "hybrid_store_plus_live" + } + ], + coverageReport: buildCoverage(input.coverage), + groundingCheck: buildGrounding(input.grounding), + focusDomainHint: input.focusDomainHint, + enableAnswerPolicyV11: true, + enableProblemCentricAnswerV1: true, + enableLifecycleAnswerV1: true + }); +} + +describe("wave12 vat/month-close consistency + confidence reconciliation", () => { + it("vat_query_with_strong_signal_must_override_stale_settlement_focus_hint", () => { + const vatUnit = buildProblemUnit({ + id: "pu-vat-1", + type: "cross_branch_inconsistency_cluster", + account: "68", + defect: "invoice_to_vat", + lifecycleDomain: "vat_flow" + }); + const output = composeCase({ + userMessage: "VAT chain July: invoice exists, but purchase book is empty for accounts 19/68. Why?", + focusDomainHint: "settlements_60_62", + retrievalResults: [ + buildRetrieval({ + requirementId: "R1", + status: "ok", + units: [vatUnit], + accountScope: ["19", "68"], + domainScope: ["vat_flow"], + relationPatterns: ["invoice_to_vat"] + }) + ] + }); + + expect(output.assistant_reply).toMatch(/НДС|vat|книг|счет[-\s]?фактур/i); + expect(output.assistant_reply).not.toMatch(/60\/62|закрыти[ея]\s+расч[её]т/i); + }); + + it("month_close_query_with_strong_signal_must_override_stale_vat_focus_hint", () => { + const closeUnit = buildProblemUnit({ + id: "pu-close-1", + type: "period_risk_cluster", + account: "25", + defect: "close_operation_runs_missing", + lifecycleDomain: "period_close" + }); + const output = composeCase({ + userMessage: "Month close July: costs on accounts 20/44 were allocated partially. Where is the break?", + focusDomainHint: "vat_document_register_book", + retrievalResults: [ + buildRetrieval({ + requirementId: "R1", + status: "ok", + units: [closeUnit], + accountScope: ["25", "26"], + domainScope: ["period_close"], + relationPatterns: ["close_operation_runs"] + }) + ] + }); + + expect(output.assistant_reply).toMatch(/закрыти|месяц|затрат|распредел|month close|20\/44/i); + expect(output.assistant_reply).not.toMatch(/НДС|счет[-\s]?фактур|книг/i); + }); + + it("vat_domain_with_foreign_primary_evidence_must_degrade_to_clarification", () => { + const output = composeCase({ + userMessage: "Проверь НДС-цепочку по июлю: документ -> регистр -> книга.", + focusDomainHint: "vat_document_register_book", + retrievalResults: [ + buildRetrieval({ + requirementId: "R1", + status: "ok", + units: [], + accountScope: ["20"], + domainScope: ["period_close", "fixed_asset"], + relationPatterns: ["allocation_rules_resolved"] + }) + ] + }); + + expect(output.reply_type).toBe("clarification_required"); + expect(output.assistant_reply).toContain("Ограничения:"); + expect(output.assistant_reply).not.toContain("Опора достаточна для первичного вывода."); + }); + + it("month_close_domain_with_vat_primary_evidence_must_degrade_to_clarification", () => { + const output = composeCase({ + userMessage: "Проверь закрытие месяца и контур затрат 20-44 за июль.", + focusDomainHint: "month_close_costs_20_44", + retrievalResults: [ + buildRetrieval({ + requirementId: "R1", + status: "ok", + units: [], + accountScope: ["19", "68"], + domainScope: ["vat_flow"], + relationPatterns: ["invoice_to_vat"] + }) + ] + }); + + expect(output.reply_type).toBe("clarification_required"); + expect(output.assistant_reply).toContain("Ограничения:"); + expect(output.assistant_reply).not.toContain("Опора достаточна для первичного вывода."); + }); + + it("confidence_limitation_must_not_contradict_each_other", () => { + const vatUnit = buildProblemUnit({ + id: "pu-vat-2", + type: "lifecycle_anomaly_node", + account: "68", + defect: "invoice_to_vat", + lifecycleDomain: "vat_flow" + }); + const output = composeCase({ + userMessage: "Почему по НДС есть сигнал, но механизм выглядит неполным?", + focusDomainHint: "vat_document_register_book", + retrievalResults: [ + buildRetrieval({ + requirementId: "R1", + status: "ok", + units: [vatUnit], + accountScope: ["19", "68"], + domainScope: ["vat_flow"], + relationPatterns: ["invoice_to_vat"], + limitations: ["Source mapping is weak for part of the evidence."] + }) + ], + grounding: { + status: "partial", + reasons: ["Mechanism is unresolved for part of the evidence."] + } + }); + + expect(output.answer_structure_v11?.mechanism_block?.status).toBe("limited"); + expect(output.assistant_reply).toContain("Ограничения:"); + expect(output.assistant_reply).not.toContain("Опора достаточна для первичного вывода."); + }); + + it("settlement_regression_must_remain_pass", () => { + const settlementUnit = buildProblemUnit({ + id: "pu-settlement-1", + type: "broken_chain_segment", + account: "62", + defect: "failed_edge:payment_to_settlement", + lifecycleDomain: "customer_settlement" + }); + const output = composeCase({ + userMessage: "Оплата есть, но 62.01/62.02 не сходятся. Почему долг остался?", + focusDomainHint: "settlements_60_62", + retrievalResults: [ + buildRetrieval({ + requirementId: "R1", + status: "ok", + units: [settlementUnit], + accountScope: ["62.01", "62.02"], + domainScope: ["customer_settlement"], + relationPatterns: ["payment_to_settlement"] + }) + ] + }); + + expect(output.assistant_reply).toMatch(/62\.01|62\.02|расч[её]т|зач[её]т/i); + expect(output.assistant_reply).not.toMatch(/НДС|книг|закрыти[ея]\s+месяц/i); + }); +}); diff --git a/llm_normalizer/backend/tests/assistantWave13DomainFitQuestionTypeAnchorRegression.test.ts b/llm_normalizer/backend/tests/assistantWave13DomainFitQuestionTypeAnchorRegression.test.ts new file mode 100644 index 0000000..1476f6f --- /dev/null +++ b/llm_normalizer/backend/tests/assistantWave13DomainFitQuestionTypeAnchorRegression.test.ts @@ -0,0 +1,405 @@ +import { describe, expect, it } from "vitest"; +import { composeAssistantAnswer } from "../src/services/answerComposer"; +import type { AnswerGroundingCheck, RequirementCoverageReport, UnifiedRetrievalResult } from "../src/types/assistant"; +import type { ProblemUnit } from "../src/types/stage2ProblemUnits"; +import type { QuestionTypeClass } from "../src/services/questionTypeResolver"; + +function buildRouteSummary() { + return { + mode: "deterministic_v2" as const, + message_in_scope: true, + scope_confidence: "high" as const, + planner: { + total_fragments: 1, + in_scope_fragments: 1, + out_of_scope_fragments: 0, + discarded_fragments: 0, + contains_multiple_tasks: false + }, + decisions: [], + fallback: { + type: "none" as const, + message: null + } + }; +} + +function buildCoverage(input?: Partial): RequirementCoverageReport { + return { + requirements_total: 1, + requirements_covered: 1, + requirements_uncovered: [], + requirements_partially_covered: [], + clarification_needed_for: [], + out_of_scope_requirements: [], + ...input + }; +} + +function buildGrounding(input?: Partial): AnswerGroundingCheck { + return { + status: "grounded", + route_subject_match: true, + missing_requirements: [], + reasons: [], + why_included_summary: ["wave13-test"], + selection_reason_summary: ["wave13-test"], + ...input + }; +} + +function buildProblemUnit(input: { + id: string; + type: ProblemUnit["problem_unit_type"]; + account: string; + defect: string; + lifecycleDomain: ProblemUnit["lifecycle_domain"]; +}): ProblemUnit { + return { + schema_version: "problem_unit_v0_1", + problem_unit_id: input.id, + problem_unit_type: input.type, + title: "Wave13 problem unit", + mechanism_summary: `Mechanism candidate: ${input.defect}.`, + business_defect_class: input.defect, + severity: { + score: 0.72, + grade: "high" + }, + confidence: { + score: 0.63, + grade: "medium" + }, + lifecycle_domain: input.lifecycleDomain, + affected_entities: ["Document:DOC-1"], + affected_documents: ["Document:DOC-1"], + affected_postings: ["Posting:POST-1"], + affected_accounts: [input.account], + affected_counterparties: ["Counterparty:CP-1"], + affected_contracts: ["Contract:CTR-1"], + failed_expected_edge: input.defect, + period_impact: { + is_period_sensitive: true, + impact_class: "close_risk" + }, + evidence_pack: ["cand-1"], + entity_backlinks: [{ entity: "Document", id: "DOC-1" }], + snapshot_limitations: [] + }; +} + +function buildRetrieval(input: { + requirementId: string; + status: UnifiedRetrievalResult["status"]; + units?: ProblemUnit[]; + accountScope?: string[]; + domainScope?: string[]; + relationPatterns?: string[]; + withEvidence?: boolean; + notes?: string[]; +}): UnifiedRetrievalResult { + const units = input.units ?? []; + const withEvidence = input.withEvidence ?? input.status !== "empty"; + const accountScope = input.accountScope ?? ["60", "62"]; + const domainScope = input.domainScope ?? ["bank_settlement"]; + const relationPatterns = input.relationPatterns ?? ["payment_to_settlement"]; + return { + fragment_id: `F-${input.requirementId}`, + requirement_ids: [input.requirementId], + route: "hybrid_store_plus_live", + status: input.status, + result_type: "chain", + items: + input.status === "empty" + ? [] + : [ + { + source_entity: "Document", + source_id: "DOC-1", + display_name: "Счет № 4 от 07.07.20", + account_context: accountScope, + graph_domain_scope: domainScope, + relation_pattern_hits: relationPatterns, + period: "2020-07", + amount: "276 873,60" + } + ], + summary: { + broad_query_detected: false, + broad_result_flag: false, + minimum_evidence_failed: false, + degraded_to: null, + narrowing_strength: "strong", + semantic_profile: { + account_scope: accountScope, + domain_scope: domainScope, + relation_patterns: relationPatterns, + period_scope: { + from: "2020-07-01", + to: "2020-07-31", + granularity: "month" + } + } + }, + evidence: + input.status === "empty" || !withEvidence + ? [] + : [ + { + evidence_id: `ev-${input.requirementId}`, + claim_ref: `requirement:${input.requirementId}`, + source_type: "retrieval_item", + source_ref: { + schema_version: "evidence_source_ref_v1", + namespace: "snapshot_2020_07", + entity: "document", + id: "DOC-1", + period: "2020-07", + canonical_ref: "evidence_source_ref_v1|snapshot_2020_07|document|doc-1|2020-07" + }, + pointer: { + fragment_id: `F-${input.requirementId}`, + route: "hybrid_store_plus_live", + source: { + namespace: "snapshot_2020_07", + entity: "document", + id: "DOC-1", + period: "2020-07" + }, + locator: { + field_path: "risk_score", + item_index: 0 + } + }, + evidence_kind: "mechanism_link", + mechanism_note: relationPatterns[0], + confidence: input.status === "ok" ? "medium" : "low", + limitation: null, + payload: { + notes: input.notes ?? [], + contract: "договор № 01/19-ПТ", + amount: "276 873,60", + date: "07.07.20" + } + } + ], + candidate_evidence: [], + problem_units: units, + problem_unit_summary: + units.length > 0 + ? { + schema_version: "problem_unit_summary_v0_1", + units_total: units.length, + duplicate_collapses: 0, + unit_types: units.map((unit) => unit.problem_unit_type), + type_distribution: { + [units[0]?.problem_unit_type ?? "broken_chain_segment"]: units.length + }, + severity_distribution: { + low: 0, + medium: 0, + high: units.length + }, + confidence_distribution: { + low: 0, + medium: units.length, + high: 0 + }, + primary_unit_type: units[0]?.problem_unit_type ?? null + } + : null, + why_included: ["wave13-test"], + selection_reason: ["wave13-test"], + risk_factors: ["wave13"], + business_interpretation: ["wave13"], + confidence: input.status === "ok" ? "medium" : "low", + limitations: [], + errors: [] + }; +} + +function composeCase(input: { + userMessage: string; + questionType: QuestionTypeClass; + focusDomainHint: string | null; + retrievalResults: UnifiedRetrievalResult[]; + coverage?: Partial; + grounding?: Partial; +}) { + return composeAssistantAnswer({ + userMessage: input.userMessage, + routeSummary: buildRouteSummary(), + retrievalResults: input.retrievalResults, + requirements: [ + { + requirement_id: "R1", + source_fragment_id: "F-R1", + requirement_text: "Wave13 requirement", + subject_tokens: [], + status: "covered", + route: "hybrid_store_plus_live" + } + ], + coverageReport: buildCoverage(input.coverage), + groundingCheck: buildGrounding(input.grounding), + focusDomainHint: input.focusDomainHint, + questionTypeHint: input.questionType, + companyAnchors: { + contract_numbers: ["договор № 01/19-ПТ"], + document_numbers: ["документ № 4"], + dates: ["07.07.20", "13.07.20"], + amounts: ["276 873,60"], + accounts: ["62.02"], + periods: ["июль 2020"], + document_types: ["payment", "invoice"], + all: [ + "договор № 01/19-ПТ", + "документ № 4", + "07.07.20", + "13.07.20", + "276 873,60", + "account:62.02", + "period:июль 2020" + ] + }, + enableAnswerPolicyV11: true, + enableProblemCentricAnswerV1: true, + enableLifecycleAnswerV1: true + }); +} + +describe("wave13 domain fit + question-type fit + company-anchor grounding", () => { + it("settlement_query_must_keep_settlement_domain_even_if_retrieval_contains_vat_noise", () => { + const settlementUnit = buildProblemUnit({ + id: "pu-settlement-1", + type: "broken_chain_segment", + account: "62.02", + defect: "failed_edge:payment_to_settlement", + lifecycleDomain: "customer_settlement" + }); + const output = composeCase({ + userMessage: + "Почему по договору № 01/19-ПТ от 09.01.2019 оплата 276 873,60 есть, а 62.01/62.02 все равно не сходятся?", + questionType: "why_breaks", + focusDomainHint: "vat_document_register_book", + retrievalResults: [ + buildRetrieval({ + requirementId: "R1", + status: "ok", + units: [settlementUnit], + accountScope: ["62.01", "62.02", "19"], + domainScope: ["customer_settlement", "vat_flow"], + relationPatterns: ["payment_to_settlement", "invoice_to_vat"] + }) + ] + }); + + expect(output.assistant_reply).toMatch(/расчет|62\.01|62\.02|зачет|зачёт/i); + expect(output.assistant_reply).not.toMatch(/переход от документа к регистру и книге|цепочке ндс/i); + }); + + it("question_type_where_break_is_must_produce_localization_style_line", () => { + const settlementUnit = buildProblemUnit({ + id: "pu-settlement-2", + type: "broken_chain_segment", + account: "62.02", + defect: "failed_edge:payment_to_settlement", + lifecycleDomain: "customer_settlement" + }); + const output = composeCase({ + userMessage: + "Где именно разрыв по договору № 01/19-ПТ: в договоре, объекте расчетов или в связке документов?", + questionType: "where_break_is", + focusDomainHint: "settlements_60_62", + retrievalResults: [buildRetrieval({ requirementId: "R1", status: "ok", units: [settlementUnit] })] + }); + + expect(output.assistant_reply).toMatch(/локализ|узел разрыва|где именно/i); + }); + + it("question_type_prove_or_guess_must_explicitly_separate_proven_vs_hypothesis", () => { + const vatUnit = buildProblemUnit({ + id: "pu-vat-1", + type: "cross_branch_inconsistency_cluster", + account: "68", + defect: "invoice_to_vat", + lifecycleDomain: "vat_flow" + }); + const output = composeCase({ + userMessage: + "По НДС это доказано по данным или это только гипотеза? На чем основано утверждение?", + questionType: "prove_or_guess", + focusDomainHint: "vat_document_register_book", + retrievalResults: [ + buildRetrieval({ + requirementId: "R1", + status: "ok", + units: [vatUnit], + accountScope: ["19", "68"], + domainScope: ["vat_flow"], + relationPatterns: ["invoice_to_vat"] + }) + ], + grounding: { + status: "partial", + reasons: ["Mechanism is unresolved for part of the evidence."] + } + }); + + expect(output.assistant_reply).toMatch(/доказан|гипотез|ограничени/i); + }); + + it("anchor_usage_lines_must_be_present_when_company_anchors_are_used", () => { + const output = composeCase({ + userMessage: + "Оплата по счету № 4 от 07.07.20 на 276 873,60 пришла 13 июля. Что проверить первым по 62.02?", + questionType: "what_to_check_first", + focusDomainHint: "settlements_60_62", + retrievalResults: [buildRetrieval({ requirementId: "R1", status: "ok" })] + }); + + expect(output.assistant_reply).toMatch(/якоря вопроса/i); + expect(output.assistant_reply).toMatch(/договор|07\.07\.20|276 873,60|62\.02/i); + }); + + it("anchor_usage_must_be_honest_when_part_of_anchors_not_confirmed", () => { + const output = composeCase({ + userMessage: + "Почему по договору № 01/19-ПТ не сходится 62.02 в июле 2020, если была оплата 276 873,60?", + questionType: "what_is_it_grounded_on", + focusDomainHint: "settlements_60_62", + retrievalResults: [ + buildRetrieval({ + requirementId: "R1", + status: "ok", + accountScope: ["60"], + domainScope: ["bank_settlement"], + relationPatterns: ["payment_recorded"], + withEvidence: true + }) + ] + }); + + expect(output.assistant_reply).toMatch(/без прямого подтверждения|ограничени/i); + }); + + it("answers_for_different_question_types_must_not_collapse_to_same_generic_pattern", () => { + const baseRetrieval = [buildRetrieval({ requirementId: "R1", status: "ok" })]; + const whereOutput = composeCase({ + userMessage: "Где именно разрыв по 62.01/62.02?", + questionType: "where_break_is", + focusDomainHint: "settlements_60_62", + retrievalResults: baseRetrieval + }); + const checkFirstOutput = composeCase({ + userMessage: "Что проверить первым по 62.01/62.02?", + questionType: "what_to_check_first", + focusDomainHint: "settlements_60_62", + retrievalResults: baseRetrieval + }); + + expect(whereOutput.assistant_reply).not.toEqual(checkFirstOutput.assistant_reply); + expect(whereOutput.assistant_reply).toMatch(/локализ|разрыв/i); + expect(checkFirstOutput.assistant_reply).toMatch(/первый маршрут проверки|начните с первого пункта/i); + }); +}); diff --git a/llm_normalizer/data/eval_cases/eval-7Sx8faOQpm.report.json b/llm_normalizer/data/eval_cases/eval-7Sx8faOQpm.report.json new file mode 100644 index 0000000..e6ad0f7 --- /dev/null +++ b/llm_normalizer/data/eval_cases/eval-7Sx8faOQpm.report.json @@ -0,0 +1,111 @@ +{ + "run_id": "eval-7Sx8faOQpm", + "timestamp": "2026-03-28T09:54:19.332Z", + "mode": "single-pass-strict", + "use_mock": true, + "prompt_version": "normalizer_v2_0_2", + "schema_version": "v2_0_2", + "dataset": { + "source": "inline_raw_questions", + "file": null, + "raw_questions_count": 2 + }, + "cases_total": 2, + "metrics": { + "schema_validation_pass_rate": 100, + "scope_detection_accuracy": null, + "scope_in_scope_rate": 100, + "multi_intent_detected_rate": 0, + "clarification_required_rate": 0, + "avg_fragments_per_message": 1, + "out_of_scope_fragment_rate": 0, + "routed_fragment_rate": 100, + "no_route_fragment_rate": 0, + "route_resolution_accuracy": null, + "no_route_precision": null, + "false_no_route_rate": null, + "execution_state_consistency_rate": 100, + "executable_with_soft_assumptions_rate": 100, + "soft_assumption_used_fragment_rate": 100, + "clarification_precision": null, + "clarification_recall": null, + "false_clarification_rate": null + }, + "budget": { + "requests_total": 0, + "retries_used": 0 + }, + "clarification_eval": { + "labeled_cases": 0, + "true_positive": 0, + "false_positive": 0, + "false_negative": 0 + }, + "route_eval": { + "labeled_cases": 0, + "correct_cases": 0, + "expected_routed_cases": 0, + "no_route_true_positive": 0, + "no_route_false_positive": 0 + }, + "scope_eval": { + "labeled_cases": 0, + "correct_cases": 0 + }, + "execution_state_eval": { + "checks_total": 2, + "checks_passed": 2 + }, + "route_distribution": { + "hybrid_store_plus_live": 2 + }, + "fallback_distribution": { + "none": 2 + }, + "results": [ + { + "case_id": "BQ-001", + "raw_question": "Проверь счет 60 за июнь 2020", + "validation_passed": true, + "message_in_scope": true, + "scope_confidence": "high", + "contains_multiple_tasks": false, + "fragments_total": 1, + "in_scope_fragments": 1, + "out_of_scope_fragments": 0, + "unclear_fragments": 0, + "fallback_type": "none", + "predicted_route_status": "routed", + "expected_route_status": null, + "predicted_no_route_reason": null, + "expected_no_route_reason": null, + "predicted_clarification_required": false, + "expected_clarification_required": null, + "executable_with_soft_assumptions_fragments": 1, + "trace_id": "1wEfvoR_2DJrlV", + "request_count_for_case": 0 + }, + { + "case_id": "BQ-002", + "raw_question": "Покажи риски по НДС и по закрытию", + "validation_passed": true, + "message_in_scope": true, + "scope_confidence": "high", + "contains_multiple_tasks": false, + "fragments_total": 1, + "in_scope_fragments": 1, + "out_of_scope_fragments": 0, + "unclear_fragments": 0, + "fallback_type": "none", + "predicted_route_status": "routed", + "expected_route_status": null, + "predicted_no_route_reason": null, + "expected_no_route_reason": null, + "predicted_clarification_required": false, + "expected_clarification_required": null, + "executable_with_soft_assumptions_fragments": 1, + "trace_id": "wp-jtQq3mp7uMk", + "request_count_for_case": 0 + } + ] +} \ No newline at end of file diff --git a/llm_normalizer/data/eval_cases/eval-DI-aCpLWqK.report.json b/llm_normalizer/data/eval_cases/eval-DI-aCpLWqK.report.json new file mode 100644 index 0000000..6aea3cb --- /dev/null +++ b/llm_normalizer/data/eval_cases/eval-DI-aCpLWqK.report.json @@ -0,0 +1,137 @@ +{ + "run_id": "eval-DI-aCpLWqK", + "timestamp": "2026-03-28T09:54:17.753Z", + "mode": "single-pass-strict", + "use_mock": true, + "prompt_version": "normalizer_v2_0_2", + "schema_version": "v2_0_2", + "dataset": { + "source": "inline_raw_questions", + "file": null, + "raw_questions_count": 3 + }, + "cases_total": 3, + "metrics": { + "schema_validation_pass_rate": 100, + "scope_detection_accuracy": null, + "scope_in_scope_rate": 33.33, + "multi_intent_detected_rate": 0, + "clarification_required_rate": 0, + "avg_fragments_per_message": 1, + "out_of_scope_fragment_rate": 33.33, + "routed_fragment_rate": 66.67, + "no_route_fragment_rate": 33.33, + "route_resolution_accuracy": null, + "no_route_precision": null, + "false_no_route_rate": null, + "execution_state_consistency_rate": 66.67, + "executable_with_soft_assumptions_rate": 100, + "soft_assumption_used_fragment_rate": 100, + "clarification_precision": null, + "clarification_recall": null, + "false_clarification_rate": null + }, + "budget": { + "requests_total": 0, + "retries_used": 0 + }, + "clarification_eval": { + "labeled_cases": 0, + "true_positive": 0, + "false_positive": 0, + "false_negative": 0 + }, + "route_eval": { + "labeled_cases": 0, + "correct_cases": 0, + "expected_routed_cases": 0, + "no_route_true_positive": 0, + "no_route_false_positive": 0 + }, + "scope_eval": { + "labeled_cases": 0, + "correct_cases": 0 + }, + "execution_state_eval": { + "checks_total": 3, + "checks_passed": 2 + }, + "route_distribution": { + "hybrid_store_plus_live": 1, + "no_route": 1, + "batch_refresh_then_store": 1 + }, + "fallback_distribution": { + "none": 1, + "out_of_scope": 1, + "clarification": 1 + }, + "results": [ + { + "case_id": "BQ-001", + "raw_question": "Проверь хвосты по поставщикам и разложи цепочку", + "validation_passed": true, + "message_in_scope": true, + "scope_confidence": "high", + "contains_multiple_tasks": false, + "fragments_total": 1, + "in_scope_fragments": 1, + "out_of_scope_fragments": 0, + "unclear_fragments": 0, + "fallback_type": "none", + "predicted_route_status": "routed", + "expected_route_status": null, + "predicted_no_route_reason": null, + "expected_no_route_reason": null, + "predicted_clarification_required": false, + "expected_clarification_required": null, + "executable_with_soft_assumptions_fragments": 1, + "trace_id": "LQSTgr_jZDLKSE", + "request_count_for_case": 0 + }, + { + "case_id": "BQ-002", + "raw_question": "Как вообще по ФСБУ", + "validation_passed": true, + "message_in_scope": false, + "scope_confidence": "low", + "contains_multiple_tasks": false, + "fragments_total": 1, + "in_scope_fragments": 0, + "out_of_scope_fragments": 1, + "unclear_fragments": 0, + "fallback_type": "out_of_scope", + "predicted_route_status": "no_route", + "expected_route_status": null, + "predicted_no_route_reason": "out_of_scope", + "expected_no_route_reason": null, + "predicted_clarification_required": false, + "expected_clarification_required": null, + "executable_with_soft_assumptions_fragments": 0, + "trace_id": "C_DgPdm03zoNRm", + "request_count_for_case": 0 + }, + { + "case_id": "BQ-003", + "raw_question": "Покажи топ рисков за июнь 2020", + "validation_passed": true, + "message_in_scope": false, + "scope_confidence": "low", + "contains_multiple_tasks": false, + "fragments_total": 1, + "in_scope_fragments": 0, + "out_of_scope_fragments": 0, + "unclear_fragments": 1, + "fallback_type": "clarification", + "predicted_route_status": "routed", + "expected_route_status": null, + "predicted_no_route_reason": null, + "expected_no_route_reason": null, + "predicted_clarification_required": false, + "expected_clarification_required": null, + "executable_with_soft_assumptions_fragments": 0, + "trace_id": "ou478il0iFNsIQ", + "request_count_for_case": 0 + } + ] +} \ No newline at end of file diff --git a/llm_normalizer/data/eval_cases/eval-KEtQ8SYPKI.report.json b/llm_normalizer/data/eval_cases/eval-KEtQ8SYPKI.report.json new file mode 100644 index 0000000..0d65fdd --- /dev/null +++ b/llm_normalizer/data/eval_cases/eval-KEtQ8SYPKI.report.json @@ -0,0 +1,137 @@ +{ + "run_id": "eval-KEtQ8SYPKI", + "timestamp": "2026-03-28T09:23:28.807Z", + "mode": "single-pass-strict", + "use_mock": true, + "prompt_version": "normalizer_v2_0_2", + "schema_version": "v2_0_2", + "dataset": { + "source": "inline_raw_questions", + "file": null, + "raw_questions_count": 3 + }, + "cases_total": 3, + "metrics": { + "schema_validation_pass_rate": 100, + "scope_detection_accuracy": null, + "scope_in_scope_rate": 33.33, + "multi_intent_detected_rate": 0, + "clarification_required_rate": 0, + "avg_fragments_per_message": 1, + "out_of_scope_fragment_rate": 33.33, + "routed_fragment_rate": 66.67, + "no_route_fragment_rate": 33.33, + "route_resolution_accuracy": null, + "no_route_precision": null, + "false_no_route_rate": null, + "execution_state_consistency_rate": 66.67, + "executable_with_soft_assumptions_rate": 100, + "soft_assumption_used_fragment_rate": 100, + "clarification_precision": null, + "clarification_recall": null, + "false_clarification_rate": null + }, + "budget": { + "requests_total": 0, + "retries_used": 0 + }, + "clarification_eval": { + "labeled_cases": 0, + "true_positive": 0, + "false_positive": 0, + "false_negative": 0 + }, + "route_eval": { + "labeled_cases": 0, + "correct_cases": 0, + "expected_routed_cases": 0, + "no_route_true_positive": 0, + "no_route_false_positive": 0 + }, + "scope_eval": { + "labeled_cases": 0, + "correct_cases": 0 + }, + "execution_state_eval": { + "checks_total": 3, + "checks_passed": 2 + }, + "route_distribution": { + "hybrid_store_plus_live": 1, + "no_route": 1, + "batch_refresh_then_store": 1 + }, + "fallback_distribution": { + "none": 1, + "out_of_scope": 1, + "clarification": 1 + }, + "results": [ + { + "case_id": "BQ-001", + "raw_question": "Проверь хвосты по поставщикам и разложи цепочку", + "validation_passed": true, + "message_in_scope": true, + "scope_confidence": "high", + "contains_multiple_tasks": false, + "fragments_total": 1, + "in_scope_fragments": 1, + "out_of_scope_fragments": 0, + "unclear_fragments": 0, + "fallback_type": "none", + "predicted_route_status": "routed", + "expected_route_status": null, + "predicted_no_route_reason": null, + "expected_no_route_reason": null, + "predicted_clarification_required": false, + "expected_clarification_required": null, + "executable_with_soft_assumptions_fragments": 1, + "trace_id": "wKL8CXbPIJDi5V", + "request_count_for_case": 0 + }, + { + "case_id": "BQ-002", + "raw_question": "Как вообще по ФСБУ", + "validation_passed": true, + "message_in_scope": false, + "scope_confidence": "low", + "contains_multiple_tasks": false, + "fragments_total": 1, + "in_scope_fragments": 0, + "out_of_scope_fragments": 1, + "unclear_fragments": 0, + "fallback_type": "out_of_scope", + "predicted_route_status": "no_route", + "expected_route_status": null, + "predicted_no_route_reason": "out_of_scope", + "expected_no_route_reason": null, + "predicted_clarification_required": false, + "expected_clarification_required": null, + "executable_with_soft_assumptions_fragments": 0, + "trace_id": "VGu1HWqb9Ka5QF", + "request_count_for_case": 0 + }, + { + "case_id": "BQ-003", + "raw_question": "Покажи топ рисков за июнь 2020", + "validation_passed": true, + "message_in_scope": false, + "scope_confidence": "low", + "contains_multiple_tasks": false, + "fragments_total": 1, + "in_scope_fragments": 0, + "out_of_scope_fragments": 0, + "unclear_fragments": 1, + "fallback_type": "clarification", + "predicted_route_status": "routed", + "expected_route_status": null, + "predicted_no_route_reason": null, + "expected_no_route_reason": null, + "predicted_clarification_required": false, + "expected_clarification_required": null, + "executable_with_soft_assumptions_fragments": 0, + "trace_id": "rvXo7PioBUelzY", + "request_count_for_case": 0 + } + ] +} \ No newline at end of file diff --git a/llm_normalizer/data/eval_cases/eval-KSX4Dsn7Xg.report.json b/llm_normalizer/data/eval_cases/eval-KSX4Dsn7Xg.report.json new file mode 100644 index 0000000..31a4429 --- /dev/null +++ b/llm_normalizer/data/eval_cases/eval-KSX4Dsn7Xg.report.json @@ -0,0 +1,111 @@ +{ + "run_id": "eval-KSX4Dsn7Xg", + "timestamp": "2026-03-28T09:54:19.191Z", + "mode": "single-pass-strict", + "use_mock": true, + "prompt_version": "normalizer_v2_0_2", + "schema_version": "v2_0_2", + "dataset": { + "source": "inline_raw_questions", + "file": null, + "raw_questions_count": 2 + }, + "cases_total": 2, + "metrics": { + "schema_validation_pass_rate": 100, + "scope_detection_accuracy": null, + "scope_in_scope_rate": 100, + "multi_intent_detected_rate": 0, + "clarification_required_rate": 0, + "avg_fragments_per_message": 1, + "out_of_scope_fragment_rate": 0, + "routed_fragment_rate": 100, + "no_route_fragment_rate": 0, + "route_resolution_accuracy": null, + "no_route_precision": null, + "false_no_route_rate": null, + "execution_state_consistency_rate": 100, + "executable_with_soft_assumptions_rate": 100, + "soft_assumption_used_fragment_rate": 100, + "clarification_precision": null, + "clarification_recall": null, + "false_clarification_rate": null + }, + "budget": { + "requests_total": 0, + "retries_used": 0 + }, + "clarification_eval": { + "labeled_cases": 0, + "true_positive": 0, + "false_positive": 0, + "false_negative": 0 + }, + "route_eval": { + "labeled_cases": 0, + "correct_cases": 0, + "expected_routed_cases": 0, + "no_route_true_positive": 0, + "no_route_false_positive": 0 + }, + "scope_eval": { + "labeled_cases": 0, + "correct_cases": 0 + }, + "execution_state_eval": { + "checks_total": 2, + "checks_passed": 2 + }, + "route_distribution": { + "hybrid_store_plus_live": 2 + }, + "fallback_distribution": { + "none": 2 + }, + "results": [ + { + "case_id": "BQ-001", + "raw_question": "Проверь счет 60 за июнь 2020", + "validation_passed": true, + "message_in_scope": true, + "scope_confidence": "high", + "contains_multiple_tasks": false, + "fragments_total": 1, + "in_scope_fragments": 1, + "out_of_scope_fragments": 0, + "unclear_fragments": 0, + "fallback_type": "none", + "predicted_route_status": "routed", + "expected_route_status": null, + "predicted_no_route_reason": null, + "expected_no_route_reason": null, + "predicted_clarification_required": false, + "expected_clarification_required": null, + "executable_with_soft_assumptions_fragments": 1, + "trace_id": "airV6dR4a5sk0p", + "request_count_for_case": 0 + }, + { + "case_id": "BQ-002", + "raw_question": "Покажи риски по счету 97", + "validation_passed": true, + "message_in_scope": true, + "scope_confidence": "high", + "contains_multiple_tasks": false, + "fragments_total": 1, + "in_scope_fragments": 1, + "out_of_scope_fragments": 0, + "unclear_fragments": 0, + "fallback_type": "none", + "predicted_route_status": "routed", + "expected_route_status": null, + "predicted_no_route_reason": null, + "expected_no_route_reason": null, + "predicted_clarification_required": false, + "expected_clarification_required": null, + "executable_with_soft_assumptions_fragments": 1, + "trace_id": "FpNPo_qn_TakTC", + "request_count_for_case": 0 + } + ] +} \ No newline at end of file diff --git a/llm_normalizer/data/eval_cases/eval-O1pl1uP_Kc.report.json b/llm_normalizer/data/eval_cases/eval-O1pl1uP_Kc.report.json new file mode 100644 index 0000000..00f8c10 --- /dev/null +++ b/llm_normalizer/data/eval_cases/eval-O1pl1uP_Kc.report.json @@ -0,0 +1,111 @@ +{ + "run_id": "eval-O1pl1uP_Kc", + "timestamp": "2026-03-28T09:09:42.733Z", + "mode": "single-pass-strict", + "use_mock": true, + "prompt_version": "normalizer_v2_0_2", + "schema_version": "v2_0_2", + "dataset": { + "source": "inline_raw_questions", + "file": null, + "raw_questions_count": 2 + }, + "cases_total": 2, + "metrics": { + "schema_validation_pass_rate": 100, + "scope_detection_accuracy": null, + "scope_in_scope_rate": 100, + "multi_intent_detected_rate": 0, + "clarification_required_rate": 0, + "avg_fragments_per_message": 1, + "out_of_scope_fragment_rate": 0, + "routed_fragment_rate": 100, + "no_route_fragment_rate": 0, + "route_resolution_accuracy": null, + "no_route_precision": null, + "false_no_route_rate": null, + "execution_state_consistency_rate": 100, + "executable_with_soft_assumptions_rate": 100, + "soft_assumption_used_fragment_rate": 100, + "clarification_precision": null, + "clarification_recall": null, + "false_clarification_rate": null + }, + "budget": { + "requests_total": 0, + "retries_used": 0 + }, + "clarification_eval": { + "labeled_cases": 0, + "true_positive": 0, + "false_positive": 0, + "false_negative": 0 + }, + "route_eval": { + "labeled_cases": 0, + "correct_cases": 0, + "expected_routed_cases": 0, + "no_route_true_positive": 0, + "no_route_false_positive": 0 + }, + "scope_eval": { + "labeled_cases": 0, + "correct_cases": 0 + }, + "execution_state_eval": { + "checks_total": 2, + "checks_passed": 2 + }, + "route_distribution": { + "hybrid_store_plus_live": 2 + }, + "fallback_distribution": { + "none": 2 + }, + "results": [ + { + "case_id": "BQ-001", + "raw_question": "Проверь счет 60 за июнь 2020", + "validation_passed": true, + "message_in_scope": true, + "scope_confidence": "high", + "contains_multiple_tasks": false, + "fragments_total": 1, + "in_scope_fragments": 1, + "out_of_scope_fragments": 0, + "unclear_fragments": 0, + "fallback_type": "none", + "predicted_route_status": "routed", + "expected_route_status": null, + "predicted_no_route_reason": null, + "expected_no_route_reason": null, + "predicted_clarification_required": false, + "expected_clarification_required": null, + "executable_with_soft_assumptions_fragments": 1, + "trace_id": "DYfGFpealu2tnx", + "request_count_for_case": 0 + }, + { + "case_id": "BQ-002", + "raw_question": "Покажи риски по НДС и по закрытию", + "validation_passed": true, + "message_in_scope": true, + "scope_confidence": "high", + "contains_multiple_tasks": false, + "fragments_total": 1, + "in_scope_fragments": 1, + "out_of_scope_fragments": 0, + "unclear_fragments": 0, + "fallback_type": "none", + "predicted_route_status": "routed", + "expected_route_status": null, + "predicted_no_route_reason": null, + "expected_no_route_reason": null, + "predicted_clarification_required": false, + "expected_clarification_required": null, + "executable_with_soft_assumptions_fragments": 1, + "trace_id": "ZweuomcToD_MJn", + "request_count_for_case": 0 + } + ] +} \ No newline at end of file diff --git a/llm_normalizer/data/eval_cases/eval-WTP_zUxzSk.report.json b/llm_normalizer/data/eval_cases/eval-WTP_zUxzSk.report.json new file mode 100644 index 0000000..19df6c2 --- /dev/null +++ b/llm_normalizer/data/eval_cases/eval-WTP_zUxzSk.report.json @@ -0,0 +1,111 @@ +{ + "run_id": "eval-WTP_zUxzSk", + "timestamp": "2026-03-28T09:23:30.429Z", + "mode": "single-pass-strict", + "use_mock": true, + "prompt_version": "normalizer_v2_0_2", + "schema_version": "v2_0_2", + "dataset": { + "source": "inline_raw_questions", + "file": null, + "raw_questions_count": 2 + }, + "cases_total": 2, + "metrics": { + "schema_validation_pass_rate": 100, + "scope_detection_accuracy": null, + "scope_in_scope_rate": 100, + "multi_intent_detected_rate": 0, + "clarification_required_rate": 0, + "avg_fragments_per_message": 1, + "out_of_scope_fragment_rate": 0, + "routed_fragment_rate": 100, + "no_route_fragment_rate": 0, + "route_resolution_accuracy": null, + "no_route_precision": null, + "false_no_route_rate": null, + "execution_state_consistency_rate": 100, + "executable_with_soft_assumptions_rate": 100, + "soft_assumption_used_fragment_rate": 100, + "clarification_precision": null, + "clarification_recall": null, + "false_clarification_rate": null + }, + "budget": { + "requests_total": 0, + "retries_used": 0 + }, + "clarification_eval": { + "labeled_cases": 0, + "true_positive": 0, + "false_positive": 0, + "false_negative": 0 + }, + "route_eval": { + "labeled_cases": 0, + "correct_cases": 0, + "expected_routed_cases": 0, + "no_route_true_positive": 0, + "no_route_false_positive": 0 + }, + "scope_eval": { + "labeled_cases": 0, + "correct_cases": 0 + }, + "execution_state_eval": { + "checks_total": 2, + "checks_passed": 2 + }, + "route_distribution": { + "hybrid_store_plus_live": 2 + }, + "fallback_distribution": { + "none": 2 + }, + "results": [ + { + "case_id": "BQ-001", + "raw_question": "Проверь счет 60 за июнь 2020", + "validation_passed": true, + "message_in_scope": true, + "scope_confidence": "high", + "contains_multiple_tasks": false, + "fragments_total": 1, + "in_scope_fragments": 1, + "out_of_scope_fragments": 0, + "unclear_fragments": 0, + "fallback_type": "none", + "predicted_route_status": "routed", + "expected_route_status": null, + "predicted_no_route_reason": null, + "expected_no_route_reason": null, + "predicted_clarification_required": false, + "expected_clarification_required": null, + "executable_with_soft_assumptions_fragments": 1, + "trace_id": "pgJG_e5WDfRPGr", + "request_count_for_case": 0 + }, + { + "case_id": "BQ-002", + "raw_question": "Покажи риски по НДС и по закрытию", + "validation_passed": true, + "message_in_scope": true, + "scope_confidence": "high", + "contains_multiple_tasks": false, + "fragments_total": 1, + "in_scope_fragments": 1, + "out_of_scope_fragments": 0, + "unclear_fragments": 0, + "fallback_type": "none", + "predicted_route_status": "routed", + "expected_route_status": null, + "predicted_no_route_reason": null, + "expected_no_route_reason": null, + "predicted_clarification_required": false, + "expected_clarification_required": null, + "executable_with_soft_assumptions_fragments": 1, + "trace_id": "0iHAq9Sf3LQoGv", + "request_count_for_case": 0 + } + ] +} \ No newline at end of file diff --git a/llm_normalizer/data/eval_cases/eval-XunNzxsTLF.report.json b/llm_normalizer/data/eval_cases/eval-XunNzxsTLF.report.json new file mode 100644 index 0000000..9aeea47 --- /dev/null +++ b/llm_normalizer/data/eval_cases/eval-XunNzxsTLF.report.json @@ -0,0 +1,137 @@ +{ + "run_id": "eval-XunNzxsTLF", + "timestamp": "2026-03-28T09:43:10.148Z", + "mode": "single-pass-strict", + "use_mock": true, + "prompt_version": "normalizer_v2_0_2", + "schema_version": "v2_0_2", + "dataset": { + "source": "inline_raw_questions", + "file": null, + "raw_questions_count": 3 + }, + "cases_total": 3, + "metrics": { + "schema_validation_pass_rate": 100, + "scope_detection_accuracy": null, + "scope_in_scope_rate": 33.33, + "multi_intent_detected_rate": 0, + "clarification_required_rate": 0, + "avg_fragments_per_message": 1, + "out_of_scope_fragment_rate": 33.33, + "routed_fragment_rate": 66.67, + "no_route_fragment_rate": 33.33, + "route_resolution_accuracy": null, + "no_route_precision": null, + "false_no_route_rate": null, + "execution_state_consistency_rate": 66.67, + "executable_with_soft_assumptions_rate": 100, + "soft_assumption_used_fragment_rate": 100, + "clarification_precision": null, + "clarification_recall": null, + "false_clarification_rate": null + }, + "budget": { + "requests_total": 0, + "retries_used": 0 + }, + "clarification_eval": { + "labeled_cases": 0, + "true_positive": 0, + "false_positive": 0, + "false_negative": 0 + }, + "route_eval": { + "labeled_cases": 0, + "correct_cases": 0, + "expected_routed_cases": 0, + "no_route_true_positive": 0, + "no_route_false_positive": 0 + }, + "scope_eval": { + "labeled_cases": 0, + "correct_cases": 0 + }, + "execution_state_eval": { + "checks_total": 3, + "checks_passed": 2 + }, + "route_distribution": { + "hybrid_store_plus_live": 1, + "no_route": 1, + "batch_refresh_then_store": 1 + }, + "fallback_distribution": { + "none": 1, + "out_of_scope": 1, + "clarification": 1 + }, + "results": [ + { + "case_id": "BQ-001", + "raw_question": "Проверь хвосты по поставщикам и разложи цепочку", + "validation_passed": true, + "message_in_scope": true, + "scope_confidence": "high", + "contains_multiple_tasks": false, + "fragments_total": 1, + "in_scope_fragments": 1, + "out_of_scope_fragments": 0, + "unclear_fragments": 0, + "fallback_type": "none", + "predicted_route_status": "routed", + "expected_route_status": null, + "predicted_no_route_reason": null, + "expected_no_route_reason": null, + "predicted_clarification_required": false, + "expected_clarification_required": null, + "executable_with_soft_assumptions_fragments": 1, + "trace_id": "iwEKj8yXXL_94j", + "request_count_for_case": 0 + }, + { + "case_id": "BQ-002", + "raw_question": "Как вообще по ФСБУ", + "validation_passed": true, + "message_in_scope": false, + "scope_confidence": "low", + "contains_multiple_tasks": false, + "fragments_total": 1, + "in_scope_fragments": 0, + "out_of_scope_fragments": 1, + "unclear_fragments": 0, + "fallback_type": "out_of_scope", + "predicted_route_status": "no_route", + "expected_route_status": null, + "predicted_no_route_reason": "out_of_scope", + "expected_no_route_reason": null, + "predicted_clarification_required": false, + "expected_clarification_required": null, + "executable_with_soft_assumptions_fragments": 0, + "trace_id": "Ww5O8UQ8xVYdqX", + "request_count_for_case": 0 + }, + { + "case_id": "BQ-003", + "raw_question": "Покажи топ рисков за июнь 2020", + "validation_passed": true, + "message_in_scope": false, + "scope_confidence": "low", + "contains_multiple_tasks": false, + "fragments_total": 1, + "in_scope_fragments": 0, + "out_of_scope_fragments": 0, + "unclear_fragments": 1, + "fallback_type": "clarification", + "predicted_route_status": "routed", + "expected_route_status": null, + "predicted_no_route_reason": null, + "expected_no_route_reason": null, + "predicted_clarification_required": false, + "expected_clarification_required": null, + "executable_with_soft_assumptions_fragments": 0, + "trace_id": "U2ODfQtj3y9ieS", + "request_count_for_case": 0 + } + ] +} \ No newline at end of file diff --git a/llm_normalizer/data/eval_cases/eval-XyP2pAdsJB.report.json b/llm_normalizer/data/eval_cases/eval-XyP2pAdsJB.report.json new file mode 100644 index 0000000..cf05b44 --- /dev/null +++ b/llm_normalizer/data/eval_cases/eval-XyP2pAdsJB.report.json @@ -0,0 +1,111 @@ +{ + "run_id": "eval-XyP2pAdsJB", + "timestamp": "2026-03-28T09:58:48.945Z", + "mode": "single-pass-strict", + "use_mock": true, + "prompt_version": "normalizer_v2_0_2", + "schema_version": "v2_0_2", + "dataset": { + "source": "inline_raw_questions", + "file": null, + "raw_questions_count": 2 + }, + "cases_total": 2, + "metrics": { + "schema_validation_pass_rate": 100, + "scope_detection_accuracy": null, + "scope_in_scope_rate": 100, + "multi_intent_detected_rate": 0, + "clarification_required_rate": 0, + "avg_fragments_per_message": 1, + "out_of_scope_fragment_rate": 0, + "routed_fragment_rate": 100, + "no_route_fragment_rate": 0, + "route_resolution_accuracy": null, + "no_route_precision": null, + "false_no_route_rate": null, + "execution_state_consistency_rate": 100, + "executable_with_soft_assumptions_rate": 100, + "soft_assumption_used_fragment_rate": 100, + "clarification_precision": null, + "clarification_recall": null, + "false_clarification_rate": null + }, + "budget": { + "requests_total": 0, + "retries_used": 0 + }, + "clarification_eval": { + "labeled_cases": 0, + "true_positive": 0, + "false_positive": 0, + "false_negative": 0 + }, + "route_eval": { + "labeled_cases": 0, + "correct_cases": 0, + "expected_routed_cases": 0, + "no_route_true_positive": 0, + "no_route_false_positive": 0 + }, + "scope_eval": { + "labeled_cases": 0, + "correct_cases": 0 + }, + "execution_state_eval": { + "checks_total": 2, + "checks_passed": 2 + }, + "route_distribution": { + "hybrid_store_plus_live": 2 + }, + "fallback_distribution": { + "none": 2 + }, + "results": [ + { + "case_id": "BQ-001", + "raw_question": "Проверь счет 60 за июнь 2020", + "validation_passed": true, + "message_in_scope": true, + "scope_confidence": "high", + "contains_multiple_tasks": false, + "fragments_total": 1, + "in_scope_fragments": 1, + "out_of_scope_fragments": 0, + "unclear_fragments": 0, + "fallback_type": "none", + "predicted_route_status": "routed", + "expected_route_status": null, + "predicted_no_route_reason": null, + "expected_no_route_reason": null, + "predicted_clarification_required": false, + "expected_clarification_required": null, + "executable_with_soft_assumptions_fragments": 1, + "trace_id": "KH-f-MZ4cm--WR", + "request_count_for_case": 0 + }, + { + "case_id": "BQ-002", + "raw_question": "Покажи риски по счету 97", + "validation_passed": true, + "message_in_scope": true, + "scope_confidence": "high", + "contains_multiple_tasks": false, + "fragments_total": 1, + "in_scope_fragments": 1, + "out_of_scope_fragments": 0, + "unclear_fragments": 0, + "fallback_type": "none", + "predicted_route_status": "routed", + "expected_route_status": null, + "predicted_no_route_reason": null, + "expected_no_route_reason": null, + "predicted_clarification_required": false, + "expected_clarification_required": null, + "executable_with_soft_assumptions_fragments": 1, + "trace_id": "yiAsfJaSW5Qm3o", + "request_count_for_case": 0 + } + ] +} \ No newline at end of file diff --git a/llm_normalizer/data/eval_cases/eval-a3v4udOWPA.report.json b/llm_normalizer/data/eval_cases/eval-a3v4udOWPA.report.json new file mode 100644 index 0000000..cf568f3 --- /dev/null +++ b/llm_normalizer/data/eval_cases/eval-a3v4udOWPA.report.json @@ -0,0 +1,111 @@ +{ + "run_id": "eval-a3v4udOWPA", + "timestamp": "2026-03-28T09:43:11.528Z", + "mode": "single-pass-strict", + "use_mock": true, + "prompt_version": "normalizer_v2_0_2", + "schema_version": "v2_0_2", + "dataset": { + "source": "inline_raw_questions", + "file": null, + "raw_questions_count": 2 + }, + "cases_total": 2, + "metrics": { + "schema_validation_pass_rate": 100, + "scope_detection_accuracy": null, + "scope_in_scope_rate": 100, + "multi_intent_detected_rate": 0, + "clarification_required_rate": 0, + "avg_fragments_per_message": 1, + "out_of_scope_fragment_rate": 0, + "routed_fragment_rate": 100, + "no_route_fragment_rate": 0, + "route_resolution_accuracy": null, + "no_route_precision": null, + "false_no_route_rate": null, + "execution_state_consistency_rate": 100, + "executable_with_soft_assumptions_rate": 100, + "soft_assumption_used_fragment_rate": 100, + "clarification_precision": null, + "clarification_recall": null, + "false_clarification_rate": null + }, + "budget": { + "requests_total": 0, + "retries_used": 0 + }, + "clarification_eval": { + "labeled_cases": 0, + "true_positive": 0, + "false_positive": 0, + "false_negative": 0 + }, + "route_eval": { + "labeled_cases": 0, + "correct_cases": 0, + "expected_routed_cases": 0, + "no_route_true_positive": 0, + "no_route_false_positive": 0 + }, + "scope_eval": { + "labeled_cases": 0, + "correct_cases": 0 + }, + "execution_state_eval": { + "checks_total": 2, + "checks_passed": 2 + }, + "route_distribution": { + "hybrid_store_plus_live": 2 + }, + "fallback_distribution": { + "none": 2 + }, + "results": [ + { + "case_id": "BQ-001", + "raw_question": "Проверь счет 60 за июнь 2020", + "validation_passed": true, + "message_in_scope": true, + "scope_confidence": "high", + "contains_multiple_tasks": false, + "fragments_total": 1, + "in_scope_fragments": 1, + "out_of_scope_fragments": 0, + "unclear_fragments": 0, + "fallback_type": "none", + "predicted_route_status": "routed", + "expected_route_status": null, + "predicted_no_route_reason": null, + "expected_no_route_reason": null, + "predicted_clarification_required": false, + "expected_clarification_required": null, + "executable_with_soft_assumptions_fragments": 1, + "trace_id": "YGuB_vMi7qg803", + "request_count_for_case": 0 + }, + { + "case_id": "BQ-002", + "raw_question": "Покажи риски по счету 97", + "validation_passed": true, + "message_in_scope": true, + "scope_confidence": "high", + "contains_multiple_tasks": false, + "fragments_total": 1, + "in_scope_fragments": 1, + "out_of_scope_fragments": 0, + "unclear_fragments": 0, + "fallback_type": "none", + "predicted_route_status": "routed", + "expected_route_status": null, + "predicted_no_route_reason": null, + "expected_no_route_reason": null, + "predicted_clarification_required": false, + "expected_clarification_required": null, + "executable_with_soft_assumptions_fragments": 1, + "trace_id": "snXRfmWFgVHm3i", + "request_count_for_case": 0 + } + ] +} \ No newline at end of file diff --git a/llm_normalizer/data/eval_cases/eval-do0FoiH-Xe.report.json b/llm_normalizer/data/eval_cases/eval-do0FoiH-Xe.report.json new file mode 100644 index 0000000..cef8e2d --- /dev/null +++ b/llm_normalizer/data/eval_cases/eval-do0FoiH-Xe.report.json @@ -0,0 +1,111 @@ +{ + "run_id": "eval-do0FoiH-Xe", + "timestamp": "2026-03-28T09:43:11.767Z", + "mode": "single-pass-strict", + "use_mock": true, + "prompt_version": "normalizer_v2_0_2", + "schema_version": "v2_0_2", + "dataset": { + "source": "inline_raw_questions", + "file": null, + "raw_questions_count": 2 + }, + "cases_total": 2, + "metrics": { + "schema_validation_pass_rate": 100, + "scope_detection_accuracy": null, + "scope_in_scope_rate": 100, + "multi_intent_detected_rate": 0, + "clarification_required_rate": 0, + "avg_fragments_per_message": 1, + "out_of_scope_fragment_rate": 0, + "routed_fragment_rate": 100, + "no_route_fragment_rate": 0, + "route_resolution_accuracy": null, + "no_route_precision": null, + "false_no_route_rate": null, + "execution_state_consistency_rate": 100, + "executable_with_soft_assumptions_rate": 100, + "soft_assumption_used_fragment_rate": 100, + "clarification_precision": null, + "clarification_recall": null, + "false_clarification_rate": null + }, + "budget": { + "requests_total": 0, + "retries_used": 0 + }, + "clarification_eval": { + "labeled_cases": 0, + "true_positive": 0, + "false_positive": 0, + "false_negative": 0 + }, + "route_eval": { + "labeled_cases": 0, + "correct_cases": 0, + "expected_routed_cases": 0, + "no_route_true_positive": 0, + "no_route_false_positive": 0 + }, + "scope_eval": { + "labeled_cases": 0, + "correct_cases": 0 + }, + "execution_state_eval": { + "checks_total": 2, + "checks_passed": 2 + }, + "route_distribution": { + "hybrid_store_plus_live": 2 + }, + "fallback_distribution": { + "none": 2 + }, + "results": [ + { + "case_id": "BQ-001", + "raw_question": "Проверь счет 60 за июнь 2020", + "validation_passed": true, + "message_in_scope": true, + "scope_confidence": "high", + "contains_multiple_tasks": false, + "fragments_total": 1, + "in_scope_fragments": 1, + "out_of_scope_fragments": 0, + "unclear_fragments": 0, + "fallback_type": "none", + "predicted_route_status": "routed", + "expected_route_status": null, + "predicted_no_route_reason": null, + "expected_no_route_reason": null, + "predicted_clarification_required": false, + "expected_clarification_required": null, + "executable_with_soft_assumptions_fragments": 1, + "trace_id": "dOLnE76GCDaC7H", + "request_count_for_case": 0 + }, + { + "case_id": "BQ-002", + "raw_question": "Покажи риски по НДС и по закрытию", + "validation_passed": true, + "message_in_scope": true, + "scope_confidence": "high", + "contains_multiple_tasks": false, + "fragments_total": 1, + "in_scope_fragments": 1, + "out_of_scope_fragments": 0, + "unclear_fragments": 0, + "fallback_type": "none", + "predicted_route_status": "routed", + "expected_route_status": null, + "predicted_no_route_reason": null, + "expected_no_route_reason": null, + "predicted_clarification_required": false, + "expected_clarification_required": null, + "executable_with_soft_assumptions_fragments": 1, + "trace_id": "cTDkKra0xTEPWK", + "request_count_for_case": 0 + } + ] +} \ No newline at end of file diff --git a/llm_normalizer/data/eval_cases/eval-gt_MR9X37J.report.json b/llm_normalizer/data/eval_cases/eval-gt_MR9X37J.report.json new file mode 100644 index 0000000..fdfd18f --- /dev/null +++ b/llm_normalizer/data/eval_cases/eval-gt_MR9X37J.report.json @@ -0,0 +1,111 @@ +{ + "run_id": "eval-gt_MR9X37J", + "timestamp": "2026-03-28T09:09:42.504Z", + "mode": "single-pass-strict", + "use_mock": true, + "prompt_version": "normalizer_v2_0_2", + "schema_version": "v2_0_2", + "dataset": { + "source": "inline_raw_questions", + "file": null, + "raw_questions_count": 2 + }, + "cases_total": 2, + "metrics": { + "schema_validation_pass_rate": 100, + "scope_detection_accuracy": null, + "scope_in_scope_rate": 100, + "multi_intent_detected_rate": 0, + "clarification_required_rate": 0, + "avg_fragments_per_message": 1, + "out_of_scope_fragment_rate": 0, + "routed_fragment_rate": 100, + "no_route_fragment_rate": 0, + "route_resolution_accuracy": null, + "no_route_precision": null, + "false_no_route_rate": null, + "execution_state_consistency_rate": 100, + "executable_with_soft_assumptions_rate": 100, + "soft_assumption_used_fragment_rate": 100, + "clarification_precision": null, + "clarification_recall": null, + "false_clarification_rate": null + }, + "budget": { + "requests_total": 0, + "retries_used": 0 + }, + "clarification_eval": { + "labeled_cases": 0, + "true_positive": 0, + "false_positive": 0, + "false_negative": 0 + }, + "route_eval": { + "labeled_cases": 0, + "correct_cases": 0, + "expected_routed_cases": 0, + "no_route_true_positive": 0, + "no_route_false_positive": 0 + }, + "scope_eval": { + "labeled_cases": 0, + "correct_cases": 0 + }, + "execution_state_eval": { + "checks_total": 2, + "checks_passed": 2 + }, + "route_distribution": { + "hybrid_store_plus_live": 2 + }, + "fallback_distribution": { + "none": 2 + }, + "results": [ + { + "case_id": "BQ-001", + "raw_question": "Проверь счет 60 за июнь 2020", + "validation_passed": true, + "message_in_scope": true, + "scope_confidence": "high", + "contains_multiple_tasks": false, + "fragments_total": 1, + "in_scope_fragments": 1, + "out_of_scope_fragments": 0, + "unclear_fragments": 0, + "fallback_type": "none", + "predicted_route_status": "routed", + "expected_route_status": null, + "predicted_no_route_reason": null, + "expected_no_route_reason": null, + "predicted_clarification_required": false, + "expected_clarification_required": null, + "executable_with_soft_assumptions_fragments": 1, + "trace_id": "u0PbQMwXMrEkkc", + "request_count_for_case": 0 + }, + { + "case_id": "BQ-002", + "raw_question": "Покажи риски по счету 97", + "validation_passed": true, + "message_in_scope": true, + "scope_confidence": "high", + "contains_multiple_tasks": false, + "fragments_total": 1, + "in_scope_fragments": 1, + "out_of_scope_fragments": 0, + "unclear_fragments": 0, + "fallback_type": "none", + "predicted_route_status": "routed", + "expected_route_status": null, + "predicted_no_route_reason": null, + "expected_no_route_reason": null, + "predicted_clarification_required": false, + "expected_clarification_required": null, + "executable_with_soft_assumptions_fragments": 1, + "trace_id": "HzY3_UubbqgLW1", + "request_count_for_case": 0 + } + ] +} \ No newline at end of file diff --git a/llm_normalizer/data/eval_cases/eval-nBbn-LjIiA.report.json b/llm_normalizer/data/eval_cases/eval-nBbn-LjIiA.report.json new file mode 100644 index 0000000..d118b70 --- /dev/null +++ b/llm_normalizer/data/eval_cases/eval-nBbn-LjIiA.report.json @@ -0,0 +1,137 @@ +{ + "run_id": "eval-nBbn-LjIiA", + "timestamp": "2026-03-28T09:09:41.153Z", + "mode": "single-pass-strict", + "use_mock": true, + "prompt_version": "normalizer_v2_0_2", + "schema_version": "v2_0_2", + "dataset": { + "source": "inline_raw_questions", + "file": null, + "raw_questions_count": 3 + }, + "cases_total": 3, + "metrics": { + "schema_validation_pass_rate": 100, + "scope_detection_accuracy": null, + "scope_in_scope_rate": 33.33, + "multi_intent_detected_rate": 0, + "clarification_required_rate": 0, + "avg_fragments_per_message": 1, + "out_of_scope_fragment_rate": 33.33, + "routed_fragment_rate": 66.67, + "no_route_fragment_rate": 33.33, + "route_resolution_accuracy": null, + "no_route_precision": null, + "false_no_route_rate": null, + "execution_state_consistency_rate": 66.67, + "executable_with_soft_assumptions_rate": 100, + "soft_assumption_used_fragment_rate": 100, + "clarification_precision": null, + "clarification_recall": null, + "false_clarification_rate": null + }, + "budget": { + "requests_total": 0, + "retries_used": 0 + }, + "clarification_eval": { + "labeled_cases": 0, + "true_positive": 0, + "false_positive": 0, + "false_negative": 0 + }, + "route_eval": { + "labeled_cases": 0, + "correct_cases": 0, + "expected_routed_cases": 0, + "no_route_true_positive": 0, + "no_route_false_positive": 0 + }, + "scope_eval": { + "labeled_cases": 0, + "correct_cases": 0 + }, + "execution_state_eval": { + "checks_total": 3, + "checks_passed": 2 + }, + "route_distribution": { + "hybrid_store_plus_live": 1, + "no_route": 1, + "batch_refresh_then_store": 1 + }, + "fallback_distribution": { + "none": 1, + "out_of_scope": 1, + "clarification": 1 + }, + "results": [ + { + "case_id": "BQ-001", + "raw_question": "Проверь хвосты по поставщикам и разложи цепочку", + "validation_passed": true, + "message_in_scope": true, + "scope_confidence": "high", + "contains_multiple_tasks": false, + "fragments_total": 1, + "in_scope_fragments": 1, + "out_of_scope_fragments": 0, + "unclear_fragments": 0, + "fallback_type": "none", + "predicted_route_status": "routed", + "expected_route_status": null, + "predicted_no_route_reason": null, + "expected_no_route_reason": null, + "predicted_clarification_required": false, + "expected_clarification_required": null, + "executable_with_soft_assumptions_fragments": 1, + "trace_id": "-H_wQprzOS7_Id", + "request_count_for_case": 0 + }, + { + "case_id": "BQ-002", + "raw_question": "Как вообще по ФСБУ", + "validation_passed": true, + "message_in_scope": false, + "scope_confidence": "low", + "contains_multiple_tasks": false, + "fragments_total": 1, + "in_scope_fragments": 0, + "out_of_scope_fragments": 1, + "unclear_fragments": 0, + "fallback_type": "out_of_scope", + "predicted_route_status": "no_route", + "expected_route_status": null, + "predicted_no_route_reason": "out_of_scope", + "expected_no_route_reason": null, + "predicted_clarification_required": false, + "expected_clarification_required": null, + "executable_with_soft_assumptions_fragments": 0, + "trace_id": "fQOvPenqT3TF8w", + "request_count_for_case": 0 + }, + { + "case_id": "BQ-003", + "raw_question": "Покажи топ рисков за июнь 2020", + "validation_passed": true, + "message_in_scope": false, + "scope_confidence": "low", + "contains_multiple_tasks": false, + "fragments_total": 1, + "in_scope_fragments": 0, + "out_of_scope_fragments": 0, + "unclear_fragments": 1, + "fallback_type": "clarification", + "predicted_route_status": "routed", + "expected_route_status": null, + "predicted_no_route_reason": null, + "expected_no_route_reason": null, + "predicted_clarification_required": false, + "expected_clarification_required": null, + "executable_with_soft_assumptions_fragments": 0, + "trace_id": "Z3MSiUIB6-qCqJ", + "request_count_for_case": 0 + } + ] +} \ No newline at end of file diff --git a/llm_normalizer/data/eval_cases/eval-qsqmRe1REB.report.json b/llm_normalizer/data/eval_cases/eval-qsqmRe1REB.report.json new file mode 100644 index 0000000..f1566ef --- /dev/null +++ b/llm_normalizer/data/eval_cases/eval-qsqmRe1REB.report.json @@ -0,0 +1,111 @@ +{ + "run_id": "eval-qsqmRe1REB", + "timestamp": "2026-03-28T09:23:30.226Z", + "mode": "single-pass-strict", + "use_mock": true, + "prompt_version": "normalizer_v2_0_2", + "schema_version": "v2_0_2", + "dataset": { + "source": "inline_raw_questions", + "file": null, + "raw_questions_count": 2 + }, + "cases_total": 2, + "metrics": { + "schema_validation_pass_rate": 100, + "scope_detection_accuracy": null, + "scope_in_scope_rate": 100, + "multi_intent_detected_rate": 0, + "clarification_required_rate": 0, + "avg_fragments_per_message": 1, + "out_of_scope_fragment_rate": 0, + "routed_fragment_rate": 100, + "no_route_fragment_rate": 0, + "route_resolution_accuracy": null, + "no_route_precision": null, + "false_no_route_rate": null, + "execution_state_consistency_rate": 100, + "executable_with_soft_assumptions_rate": 100, + "soft_assumption_used_fragment_rate": 100, + "clarification_precision": null, + "clarification_recall": null, + "false_clarification_rate": null + }, + "budget": { + "requests_total": 0, + "retries_used": 0 + }, + "clarification_eval": { + "labeled_cases": 0, + "true_positive": 0, + "false_positive": 0, + "false_negative": 0 + }, + "route_eval": { + "labeled_cases": 0, + "correct_cases": 0, + "expected_routed_cases": 0, + "no_route_true_positive": 0, + "no_route_false_positive": 0 + }, + "scope_eval": { + "labeled_cases": 0, + "correct_cases": 0 + }, + "execution_state_eval": { + "checks_total": 2, + "checks_passed": 2 + }, + "route_distribution": { + "hybrid_store_plus_live": 2 + }, + "fallback_distribution": { + "none": 2 + }, + "results": [ + { + "case_id": "BQ-001", + "raw_question": "Проверь счет 60 за июнь 2020", + "validation_passed": true, + "message_in_scope": true, + "scope_confidence": "high", + "contains_multiple_tasks": false, + "fragments_total": 1, + "in_scope_fragments": 1, + "out_of_scope_fragments": 0, + "unclear_fragments": 0, + "fallback_type": "none", + "predicted_route_status": "routed", + "expected_route_status": null, + "predicted_no_route_reason": null, + "expected_no_route_reason": null, + "predicted_clarification_required": false, + "expected_clarification_required": null, + "executable_with_soft_assumptions_fragments": 1, + "trace_id": "t27mUt40DZkNxA", + "request_count_for_case": 0 + }, + { + "case_id": "BQ-002", + "raw_question": "Покажи риски по счету 97", + "validation_passed": true, + "message_in_scope": true, + "scope_confidence": "high", + "contains_multiple_tasks": false, + "fragments_total": 1, + "in_scope_fragments": 1, + "out_of_scope_fragments": 0, + "unclear_fragments": 0, + "fallback_type": "none", + "predicted_route_status": "routed", + "expected_route_status": null, + "predicted_no_route_reason": null, + "expected_no_route_reason": null, + "predicted_clarification_required": false, + "expected_clarification_required": null, + "executable_with_soft_assumptions_fragments": 1, + "trace_id": "SRUJifR4MwjGqq", + "request_count_for_case": 0 + } + ] +} \ No newline at end of file diff --git a/llm_normalizer/data/eval_cases/eval-vXCfmE5dFG.report.json b/llm_normalizer/data/eval_cases/eval-vXCfmE5dFG.report.json new file mode 100644 index 0000000..6defae4 --- /dev/null +++ b/llm_normalizer/data/eval_cases/eval-vXCfmE5dFG.report.json @@ -0,0 +1,111 @@ +{ + "run_id": "eval-vXCfmE5dFG", + "timestamp": "2026-03-28T09:58:49.158Z", + "mode": "single-pass-strict", + "use_mock": true, + "prompt_version": "normalizer_v2_0_2", + "schema_version": "v2_0_2", + "dataset": { + "source": "inline_raw_questions", + "file": null, + "raw_questions_count": 2 + }, + "cases_total": 2, + "metrics": { + "schema_validation_pass_rate": 100, + "scope_detection_accuracy": null, + "scope_in_scope_rate": 100, + "multi_intent_detected_rate": 0, + "clarification_required_rate": 0, + "avg_fragments_per_message": 1, + "out_of_scope_fragment_rate": 0, + "routed_fragment_rate": 100, + "no_route_fragment_rate": 0, + "route_resolution_accuracy": null, + "no_route_precision": null, + "false_no_route_rate": null, + "execution_state_consistency_rate": 100, + "executable_with_soft_assumptions_rate": 100, + "soft_assumption_used_fragment_rate": 100, + "clarification_precision": null, + "clarification_recall": null, + "false_clarification_rate": null + }, + "budget": { + "requests_total": 0, + "retries_used": 0 + }, + "clarification_eval": { + "labeled_cases": 0, + "true_positive": 0, + "false_positive": 0, + "false_negative": 0 + }, + "route_eval": { + "labeled_cases": 0, + "correct_cases": 0, + "expected_routed_cases": 0, + "no_route_true_positive": 0, + "no_route_false_positive": 0 + }, + "scope_eval": { + "labeled_cases": 0, + "correct_cases": 0 + }, + "execution_state_eval": { + "checks_total": 2, + "checks_passed": 2 + }, + "route_distribution": { + "hybrid_store_plus_live": 2 + }, + "fallback_distribution": { + "none": 2 + }, + "results": [ + { + "case_id": "BQ-001", + "raw_question": "Проверь счет 60 за июнь 2020", + "validation_passed": true, + "message_in_scope": true, + "scope_confidence": "high", + "contains_multiple_tasks": false, + "fragments_total": 1, + "in_scope_fragments": 1, + "out_of_scope_fragments": 0, + "unclear_fragments": 0, + "fallback_type": "none", + "predicted_route_status": "routed", + "expected_route_status": null, + "predicted_no_route_reason": null, + "expected_no_route_reason": null, + "predicted_clarification_required": false, + "expected_clarification_required": null, + "executable_with_soft_assumptions_fragments": 1, + "trace_id": "ZU34_DiRow0vZ5", + "request_count_for_case": 0 + }, + { + "case_id": "BQ-002", + "raw_question": "Покажи риски по НДС и по закрытию", + "validation_passed": true, + "message_in_scope": true, + "scope_confidence": "high", + "contains_multiple_tasks": false, + "fragments_total": 1, + "in_scope_fragments": 1, + "out_of_scope_fragments": 0, + "unclear_fragments": 0, + "fallback_type": "none", + "predicted_route_status": "routed", + "expected_route_status": null, + "predicted_no_route_reason": null, + "expected_no_route_reason": null, + "predicted_clarification_required": false, + "expected_clarification_required": null, + "executable_with_soft_assumptions_fragments": 1, + "trace_id": "Nojaq_kesX2XGI", + "request_count_for_case": 0 + } + ] +} \ No newline at end of file diff --git a/llm_normalizer/data/eval_cases/eval-xytqNgINxK.report.json b/llm_normalizer/data/eval_cases/eval-xytqNgINxK.report.json new file mode 100644 index 0000000..9b7899c --- /dev/null +++ b/llm_normalizer/data/eval_cases/eval-xytqNgINxK.report.json @@ -0,0 +1,137 @@ +{ + "run_id": "eval-xytqNgINxK", + "timestamp": "2026-03-28T09:58:47.522Z", + "mode": "single-pass-strict", + "use_mock": true, + "prompt_version": "normalizer_v2_0_2", + "schema_version": "v2_0_2", + "dataset": { + "source": "inline_raw_questions", + "file": null, + "raw_questions_count": 3 + }, + "cases_total": 3, + "metrics": { + "schema_validation_pass_rate": 100, + "scope_detection_accuracy": null, + "scope_in_scope_rate": 33.33, + "multi_intent_detected_rate": 0, + "clarification_required_rate": 0, + "avg_fragments_per_message": 1, + "out_of_scope_fragment_rate": 33.33, + "routed_fragment_rate": 66.67, + "no_route_fragment_rate": 33.33, + "route_resolution_accuracy": null, + "no_route_precision": null, + "false_no_route_rate": null, + "execution_state_consistency_rate": 66.67, + "executable_with_soft_assumptions_rate": 100, + "soft_assumption_used_fragment_rate": 100, + "clarification_precision": null, + "clarification_recall": null, + "false_clarification_rate": null + }, + "budget": { + "requests_total": 0, + "retries_used": 0 + }, + "clarification_eval": { + "labeled_cases": 0, + "true_positive": 0, + "false_positive": 0, + "false_negative": 0 + }, + "route_eval": { + "labeled_cases": 0, + "correct_cases": 0, + "expected_routed_cases": 0, + "no_route_true_positive": 0, + "no_route_false_positive": 0 + }, + "scope_eval": { + "labeled_cases": 0, + "correct_cases": 0 + }, + "execution_state_eval": { + "checks_total": 3, + "checks_passed": 2 + }, + "route_distribution": { + "hybrid_store_plus_live": 1, + "no_route": 1, + "batch_refresh_then_store": 1 + }, + "fallback_distribution": { + "none": 1, + "out_of_scope": 1, + "clarification": 1 + }, + "results": [ + { + "case_id": "BQ-001", + "raw_question": "Проверь хвосты по поставщикам и разложи цепочку", + "validation_passed": true, + "message_in_scope": true, + "scope_confidence": "high", + "contains_multiple_tasks": false, + "fragments_total": 1, + "in_scope_fragments": 1, + "out_of_scope_fragments": 0, + "unclear_fragments": 0, + "fallback_type": "none", + "predicted_route_status": "routed", + "expected_route_status": null, + "predicted_no_route_reason": null, + "expected_no_route_reason": null, + "predicted_clarification_required": false, + "expected_clarification_required": null, + "executable_with_soft_assumptions_fragments": 1, + "trace_id": "JUjGkgksG-QX_y", + "request_count_for_case": 0 + }, + { + "case_id": "BQ-002", + "raw_question": "Как вообще по ФСБУ", + "validation_passed": true, + "message_in_scope": false, + "scope_confidence": "low", + "contains_multiple_tasks": false, + "fragments_total": 1, + "in_scope_fragments": 0, + "out_of_scope_fragments": 1, + "unclear_fragments": 0, + "fallback_type": "out_of_scope", + "predicted_route_status": "no_route", + "expected_route_status": null, + "predicted_no_route_reason": "out_of_scope", + "expected_no_route_reason": null, + "predicted_clarification_required": false, + "expected_clarification_required": null, + "executable_with_soft_assumptions_fragments": 0, + "trace_id": "9Os3edsGypmVvr", + "request_count_for_case": 0 + }, + { + "case_id": "BQ-003", + "raw_question": "Покажи топ рисков за июнь 2020", + "validation_passed": true, + "message_in_scope": false, + "scope_confidence": "low", + "contains_multiple_tasks": false, + "fragments_total": 1, + "in_scope_fragments": 0, + "out_of_scope_fragments": 0, + "unclear_fragments": 1, + "fallback_type": "clarification", + "predicted_route_status": "routed", + "expected_route_status": null, + "predicted_no_route_reason": null, + "expected_no_route_reason": null, + "predicted_clarification_required": false, + "expected_clarification_required": null, + "executable_with_soft_assumptions_fragments": 0, + "trace_id": "FgsR4vO6BqpNvV", + "request_count_for_case": 0 + } + ] +} \ No newline at end of file diff --git a/llm_normalizer/docs/runs/2026-03-28_Stage_04_Wave_14_Domain_Regression_Rollback_Domain_Locked_Anchor_Usage.zip b/llm_normalizer/docs/runs/2026-03-28_Stage_04_Wave_14_Domain_Regression_Rollback_Domain_Locked_Anchor_Usage.zip new file mode 100644 index 0000000..30422e1 Binary files /dev/null and b/llm_normalizer/docs/runs/2026-03-28_Stage_04_Wave_14_Domain_Regression_Rollback_Domain_Locked_Anchor_Usage.zip differ