NODEDC_1C/llm_normalizer/backend/dist/services/normalizerService.js

1551 lines
66 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.NormalizerService = void 0;
const nanoid_1 = require("nanoid");
const config_1 = require("../config");
const promptBuilder_1 = require("./promptBuilder");
const routeHintAdapter_1 = require("./routeHintAdapter");
const schemaValidator_1 = require("./schemaValidator");
const traceLogger_1 = require("./traceLogger");
const RETRY_INSTRUCTION_V1 = "IMPORTANT: return valid JSON strictly matching schema normalized_query_v1. No markdown.";
const RETRY_INSTRUCTION_V2 = "IMPORTANT: return valid JSON strictly matching schema normalized_query_v2. No markdown.";
const RETRY_INSTRUCTION_V2_0_1 = "IMPORTANT: return valid JSON strictly matching schema normalized_query_v2_0_1. No markdown.";
const RETRY_INSTRUCTION_V2_0_2 = "IMPORTANT: return valid JSON strictly matching schema normalized_query_v2_0_2. No markdown.";
const CONFIDENCE_LEVELS = ["high", "medium", "low"];
const DOMAIN_RELEVANCE_VALUES = ["in_scope", "out_of_scope", "unclear"];
const BUSINESS_SCOPE_VALUES = [
"company_specific_accounting",
"generic_accounting",
"offtopic",
"unclear"
];
const CANDIDATE_LABEL_VALUES = [
"heavy_analytical",
"cross_entity",
"drilldown_explain",
"rule_based_account_control",
"anomaly_probe",
"period_close_risk",
"ambiguous_human_query",
"simple_factual"
];
function safeJsonParse(text) {
const cleaned = text.trim().replace(/^```json\s*/i, "").replace(/^```\s*/i, "").replace(/```$/i, "").trim();
return JSON.parse(cleaned);
}
function resolveSchemaVersion(payload) {
const explicit = String(payload.schemaVersion ?? "").toLowerCase().trim();
if (explicit === "v2_0_2" || explicit === "normalized_query_v2_0_2") {
return "v2_0_2";
}
if (explicit === "v2_0_1" || explicit === "normalized_query_v2_0_1") {
return "v2_0_1";
}
if (explicit === "v2" || explicit === "normalized_query_v2") {
return "v2";
}
if (explicit === "v1" || explicit === "normalized_query_v1") {
return "v1";
}
const promptVersion = String(payload.promptVersion ?? config_1.DEFAULT_PROMPT_VERSION).toLowerCase().trim();
if (promptVersion === "normalizer_v2" || promptVersion.startsWith("normalizer_v2")) {
if (promptVersion === "normalizer_v2_0_2") {
return "v2_0_2";
}
if (promptVersion === "normalizer_v2_0_1") {
return "v2_0_1";
}
return "v2";
}
return "v1";
}
function shouldEscalateOutputBudget(rawModelResponse) {
if (!rawModelResponse || typeof rawModelResponse !== "object") {
return false;
}
const root = rawModelResponse;
const status = String(root.status ?? "").toLowerCase();
const details = (root.incomplete_details ?? {});
const reason = String(details.reason ?? "").toLowerCase();
return status === "incomplete" && reason === "max_output_tokens";
}
function computeRetryMaxOutputTokens(current, rawModelResponse) {
if (!shouldEscalateOutputBudget(rawModelResponse)) {
return current;
}
const escalated = Math.max(current + 400, Math.ceil(current * 1.6));
return Math.min(escalated, 2400);
}
function normalizeToken(value) {
return String(value ?? "")
.trim()
.toLowerCase()
.replace(/[\s-]+/g, "_");
}
function toOptionalString(value) {
if (typeof value !== "string") {
return null;
}
const trimmed = value.trim();
return trimmed.length > 0 ? trimmed : null;
}
function coerceBoolean(value, fallback = false) {
if (typeof value === "boolean") {
return value;
}
if (typeof value === "number") {
if (value === 1)
return true;
if (value === 0)
return false;
return fallback;
}
if (typeof value === "string") {
const token = value.trim().toLowerCase();
if (["true", "1", "yes", "y", "да", "ok"].includes(token)) {
return true;
}
if (["false", "0", "no", "n", "нет"].includes(token)) {
return false;
}
}
return fallback;
}
function coerceStringArray(value) {
if (Array.isArray(value)) {
return Array.from(new Set(value
.map((item) => (typeof item === "string" ? item.trim() : ""))
.filter((item) => item.length > 0)));
}
if (typeof value === "string") {
return Array.from(new Set(value
.split(/[,\n;]+/)
.map((item) => item.trim())
.filter((item) => item.length > 0)));
}
return [];
}
function coerceConfidence(value, fallback) {
if (typeof value === "string") {
const token = normalizeToken(value);
if (CONFIDENCE_LEVELS.includes(token)) {
return token;
}
}
if (typeof value === "number" && Number.isFinite(value)) {
const normalized = value > 1 ? value / 100 : value;
if (normalized >= 0.75)
return "high";
if (normalized >= 0.45)
return "medium";
return "low";
}
return fallback;
}
function coerceDomainRelevance(value, fallback) {
if (typeof value === "boolean") {
return value ? "in_scope" : "out_of_scope";
}
const token = normalizeToken(value);
if (DOMAIN_RELEVANCE_VALUES.includes(token)) {
return token;
}
if (["in_scope_true", "in_scope_yes", "in_scope_relevant", "relevant", "supported"].includes(token)) {
return "in_scope";
}
if (["out_scope", "outofscope", "offtopic", "off_topic", "irrelevant"].includes(token)) {
return "out_of_scope";
}
if (token === "true")
return "in_scope";
if (token === "false")
return "out_of_scope";
if (["unknown", "ambiguous", "n_a", "na"].includes(token))
return "unclear";
return fallback;
}
function coerceBusinessScope(value, fallback, domainRelevance) {
const token = normalizeToken(value);
if (BUSINESS_SCOPE_VALUES.includes(token)) {
return token;
}
if (["company_specific", "company_accounting", "document_review", "settlement", "bank_settlement"].includes(token)) {
return "company_specific_accounting";
}
if (["generic", "general_accounting", "general"].includes(token)) {
return "generic_accounting";
}
if (["out_of_scope", "off_topic", "outside"].includes(token)) {
return "offtopic";
}
if (token === "unknown") {
return "unclear";
}
if (domainRelevance === "out_of_scope") {
return "offtopic";
}
if (domainRelevance === "in_scope") {
return "company_specific_accounting";
}
return fallback;
}
function coerceFragmentId(value, index, fallback) {
if (typeof value === "string" && value.trim().length > 0) {
return value.trim();
}
if (typeof value === "number" && Number.isFinite(value)) {
const n = Math.max(1, Math.floor(value));
return `F${n}`;
}
return fallback || `F${index + 1}`;
}
function parseYear(value) {
if (typeof value === "number" && Number.isInteger(value) && value >= 1900 && value <= 2200) {
return value;
}
if (typeof value === "string") {
const trimmed = value.trim();
if (/^\d{4}$/.test(trimmed)) {
const parsed = Number.parseInt(trimmed, 10);
if (Number.isInteger(parsed) && parsed >= 1900 && parsed <= 2200) {
return parsed;
}
}
}
return null;
}
function parseMonth(value) {
if (typeof value === "number" && Number.isInteger(value) && value >= 1 && value <= 12) {
return value;
}
if (typeof value === "string") {
const trimmed = value.trim();
if (/^\d{1,2}$/.test(trimmed)) {
const parsed = Number.parseInt(trimmed, 10);
if (parsed >= 1 && parsed <= 12) {
return parsed;
}
}
}
return null;
}
function coerceTimeScope(value, rawText, fallback) {
if (value && typeof value === "object") {
const source = value;
const rawType = normalizeToken(source.type);
const confidence = coerceConfidence(source.confidence, fallback.confidence);
if (["explicit", "inferred", "missing"].includes(rawType)) {
if (rawType === "missing") {
return {
type: "missing",
value: null,
confidence
};
}
return {
type: rawType,
value: toOptionalString(source.value),
confidence
};
}
const periodType = normalizeToken(source.period_type);
const year = parseYear(source.year);
const month = parseMonth(source.month);
if ((periodType === "year" || (periodType.length === 0 && year !== null)) && year !== null) {
return {
type: "explicit",
value: String(year),
confidence: confidence === "low" ? "medium" : confidence
};
}
if ((periodType === "month" || periodType === "year_month" || (year !== null && month !== null)) && year !== null && month !== null) {
return {
type: "explicit",
value: `${year}-${String(month).padStart(2, "0")}`,
confidence: confidence === "low" ? "medium" : confidence
};
}
}
const inferred = inferTimeScope(rawText);
if (inferred.type !== "missing") {
return inferred;
}
return fallback;
}
function coerceFlags(value, fallback) {
if (!value || typeof value !== "object") {
return fallback;
}
const source = value;
const pick = (key, aliases = []) => {
if (key in source) {
return coerceBoolean(source[key], fallback[key]);
}
for (const alias of aliases) {
if (alias in source) {
return coerceBoolean(source[alias], fallback[key]);
}
}
return fallback[key];
};
return {
has_multi_entity_scope: pick("has_multi_entity_scope", ["multi_entity_scope"]),
asks_for_chain_explanation: pick("asks_for_chain_explanation", ["asks_for_chain", "chain_explanation"]),
asks_for_ranking_or_top: pick("asks_for_ranking_or_top", ["asks_for_ranking", "asks_for_top"]),
asks_for_period_summary: pick("asks_for_period_summary", ["period_summary"]),
asks_for_rule_check: pick("asks_for_rule_check", ["rule_check"]),
asks_for_anomaly_scan: pick("asks_for_anomaly_scan", ["anomaly_scan"]),
asks_for_exact_object_trace: pick("asks_for_exact_object_trace", ["exact_object_trace"]),
asks_for_evidence: pick("asks_for_evidence", ["evidence"]),
mentions_period_close_context: pick("mentions_period_close_context", ["period_close_context"])
};
}
function inferSemanticHints(rawText, timeScope) {
return {
scope_target_kind: "none",
scope_target_text: null,
date_scope_kind: timeScope.type === "explicit" ? "explicit" : "missing",
self_scope_detected: false,
selected_object_scope_detected: /(?:по\s+выбранному\s+объекту|selected\s+object)/iu.test(String(rawText ?? ""))
};
}
function coerceSemanticScopeTargetKind(value) {
const token = normalizeToken(value);
if (token === "none" ||
token === "self_scope" ||
token === "selected_object" ||
token === "organization" ||
token === "warehouse" ||
token === "counterparty" ||
token === "contract" ||
token === "item") {
return token;
}
if (["organization_scope", "company_scope", "org_scope", "company", "organization_anchor"].includes(token)) {
return "organization";
}
if (["warehouse_scope", "stock_scope", "warehouse_anchor"].includes(token)) {
return "warehouse";
}
if (["own_company_scope", "implicit_self_scope", "our_scope"].includes(token)) {
return "self_scope";
}
if (["selected_object_scope", "selected_object_anchor"].includes(token)) {
return "selected_object";
}
return "none";
}
function coerceSemanticDateScopeKind(value) {
const token = normalizeToken(value);
if (token === "explicit" || token === "implicit_current" || token === "missing") {
return token;
}
if (["implicit_current_snapshot", "current", "today", "default_current"].includes(token)) {
return "implicit_current";
}
return "missing";
}
function coerceSemanticHints(value, rawText, timeScope) {
const fallback = inferSemanticHints(rawText, timeScope);
if (!value || typeof value !== "object") {
return fallback;
}
const source = value;
return {
scope_target_kind: coerceSemanticScopeTargetKind(source.scope_target_kind ?? source.anchor_kind ?? source.scope_kind),
scope_target_text: toOptionalString(source.scope_target_text ??
source.anchor_value ??
source.organization ??
source.warehouse ??
source.counterparty ??
source.contract ??
source.item) ?? fallback.scope_target_text,
date_scope_kind: coerceSemanticDateScopeKind(source.date_scope_kind ?? source.date_scope ?? source.time_scope_kind),
self_scope_detected: coerceBoolean(source.self_scope_detected, fallback.self_scope_detected),
selected_object_scope_detected: coerceBoolean(source.selected_object_scope_detected, fallback.selected_object_scope_detected)
};
}
function mapCandidateLabel(value) {
const token = normalizeToken(value);
if (CANDIDATE_LABEL_VALUES.includes(token)) {
return token;
}
if (["show_documents", "document_list", "show_docs", "point_answer", "lookup"].includes(token)) {
return "simple_factual";
}
if (["ranking", "top", "summary", "analytical"].includes(token)) {
return "heavy_analytical";
}
if (["chain", "cross", "cross_domain"].includes(token)) {
return "cross_entity";
}
if (["rule_check", "control", "rules"].includes(token)) {
return "rule_based_account_control";
}
if (["risk_scan", "anomaly", "risk"].includes(token)) {
return "anomaly_probe";
}
if (["period_close", "month_close"].includes(token)) {
return "period_close_risk";
}
if (["ambiguous", "unclear"].includes(token)) {
return "ambiguous_human_query";
}
return null;
}
function coerceCandidateLabels(value, flags, domainRelevance, fallback) {
const parsed = coerceStringArray(value)
.map((item) => mapCandidateLabel(item))
.filter((item) => Boolean(item));
if (parsed.length > 0) {
return Array.from(new Set(parsed));
}
const inferred = pickCandidateLabels(flags, domainRelevance);
if (inferred.length > 0) {
return inferred;
}
return fallback;
}
function coerceFragmentV2(rawFragment, index, userMessage) {
const source = rawFragment && typeof rawFragment === "object" ? rawFragment : {};
const rawText = toOptionalString(source.raw_fragment_text) ??
toOptionalString(source.rawText) ??
toOptionalString(source.fragment_text) ??
toOptionalString(source.text) ??
userMessage.trim();
const base = buildFragmentV2(rawText, index) ?? buildFragmentV2(userMessage, index);
if (!base) {
return null;
}
const domainRelevance = coerceDomainRelevance(source.domain_relevance, base.domain_relevance);
const businessScope = coerceBusinessScope(source.business_scope, base.business_scope, domainRelevance);
const flags = coerceFlags(source.flags, base.flags);
const entityHints = coerceStringArray(source.entity_hints);
const accountHints = coerceStringArray(source.account_hints);
const documentHints = coerceStringArray(source.document_hints);
const registerHints = coerceStringArray(source.register_hints);
const timeScope = coerceTimeScope(source.time_scope, rawText, base.time_scope);
return {
fragment_id: coerceFragmentId(source.fragment_id, index, base.fragment_id),
raw_fragment_text: rawText,
normalized_fragment_text: toOptionalString(source.normalized_fragment_text) ?? base.normalized_fragment_text,
domain_relevance: domainRelevance,
business_scope: businessScope,
entity_hints: entityHints.length > 0 ? entityHints : base.entity_hints,
account_hints: accountHints.length > 0 ? accountHints : base.account_hints,
document_hints: documentHints.length > 0 ? documentHints : base.document_hints,
register_hints: registerHints.length > 0 ? registerHints : base.register_hints,
time_scope: timeScope,
flags,
semantic_hints: coerceSemanticHints(source.semantic_hints, rawText, timeScope),
candidate_labels: coerceCandidateLabels(source.candidate_labels, flags, domainRelevance, base.candidate_labels),
confidence: coerceConfidence(source.confidence, base.confidence)
};
}
function coerceDiscardedFragments(value) {
if (!Array.isArray(value)) {
return [];
}
const collected = [];
for (const item of value) {
if (!item || typeof item !== "object") {
continue;
}
const source = item;
const raw = toOptionalString(source.raw_fragment_text);
const reason = toOptionalString(source.reason);
if (!raw || !reason) {
continue;
}
collected.push({
raw_fragment_text: raw,
reason
});
}
return collected;
}
function coerceScopeConfidence(value, fallback) {
return coerceConfidence(value, fallback);
}
function coerceGlobalNotes(value, fallbackNeedsClarification) {
if (!value || typeof value !== "object") {
return {
needs_clarification: fallbackNeedsClarification,
clarification_reason: fallbackNeedsClarification ? "clarification_required" : null
};
}
const source = value;
const needs = coerceBoolean(source.needs_clarification, fallbackNeedsClarification);
const clarificationReason = toOptionalString(source.clarification_reason);
return {
needs_clarification: needs,
clarification_reason: needs ? clarificationReason ?? "clarification_required" : null
};
}
function coerceNormalizedCandidateV2(candidate, userMessage) {
if (!candidate || typeof candidate !== "object") {
return null;
}
const source = candidate;
const sourceFragments = Array.isArray(source.fragments)
? source.fragments
: source.fragment && typeof source.fragment === "object"
? [source.fragment]
: splitIntoCandidateFragments(userMessage).map((text) => ({ raw_fragment_text: text }));
const fragments = sourceFragments
.map((item, index) => coerceFragmentV2(item, index, userMessage))
.filter((item) => item !== null);
const inScopeCount = fragments.filter((item) => item.domain_relevance === "in_scope").length;
const unclearCount = fragments.filter((item) => item.domain_relevance === "unclear").length;
const messageInScope = inScopeCount > 0;
const inferredScopeConfidence = messageInScope ? (unclearCount > 0 ? "medium" : "high") : "low";
const inferredNeedsClarification = messageInScope && (unclearCount > 0 || fragments.some((item) => item.time_scope.type === "missing"));
return {
schema_version: "normalized_query_v2",
user_message_raw: toOptionalString(source.user_message_raw) ?? userMessage,
message_in_scope: coerceBoolean(source.message_in_scope, messageInScope),
scope_confidence: coerceScopeConfidence(source.scope_confidence, inferredScopeConfidence),
contains_multiple_tasks: coerceBoolean(source.contains_multiple_tasks, fragments.length > 1),
fragments,
discarded_fragments: coerceDiscardedFragments(source.discarded_fragments),
global_notes: coerceGlobalNotes(source.global_notes, inferredNeedsClarification)
};
}
function collectDateSpans(text) {
const spans = [];
const patterns = [
/\b20\d{2}(?:[-/.](?:0?[1-9]|1[0-2]))(?:[-/.](?:0?[1-9]|[12]\d|3[01]))?\b/g,
/\b(?:0?[1-9]|[12]\d|3[01])[./-](?:0?[1-9]|1[0-2])[./-](?:\d{2}|\d{4})\b/g,
/\b(?:0?[1-9]|[12]\d|3[01])\s+(?:январ[ьяе]|феврал[ьяе]|март[ае]?|апрел[ьяе]|ма[йея]|июн[ьяе]?|июл[ьяе]?|август[ае]?|сентябр[ьяе]?|октябр[ьяе]?|ноябр[ьяе]?|декабр[ьяе]?|january|february|march|april|may|june|july|august|september|october|november|december)(?:\s+20\d{2})?\b/giu
];
for (const pattern of patterns) {
let match = null;
while ((match = pattern.exec(text)) !== null) {
spans.push({
start: match.index,
end: match.index + match[0].length
});
}
}
return spans;
}
function collectAmountSpans(text) {
const spans = [];
const patterns = [/\b\d{1,3}(?:[ \u00A0]\d{3})+(?:[.,]\d{2})?\b/g, /\b\d+[.,]\d{2}\b/g];
for (const pattern of patterns) {
let match = null;
while ((match = pattern.exec(text)) !== null) {
spans.push({
start: match.index,
end: match.index + match[0].length
});
}
}
return spans;
}
function collectPercentSpans(text) {
const spans = [];
const pattern = /\b\d{1,3}(?:[.,]\d+)?\s*%/g;
let match = null;
while ((match = pattern.exec(text)) !== null) {
spans.push({
start: match.index,
end: match.index + match[0].length
});
}
return spans;
}
function intersectsAnySpan(start, end, spans) {
return spans.some((span) => start < span.end && end > span.start);
}
function extractAccounts(text) {
const lower = String(text ?? "").toLowerCase();
const knownPrefixes = new Set([
"01",
"02",
"07",
"08",
"10",
"13",
"19",
"20",
"21",
"23",
"25",
"26",
"28",
"29",
"41",
"43",
"44",
"45",
"50",
"51",
"52",
"55",
"57",
"58",
"60",
"62",
"66",
"67",
"68",
"69",
"70",
"71",
"73",
"76",
"90",
"91",
"94",
"96",
"97"
]);
const explicitAccounts = new Set();
const contextualPattern = /(?:\bсч(?:е|ё)т(?:а|у|ом|ов)?\b|\bсч\.?\b|\baccount(?:s)?\b|\bschet(?:a|u|om|ov)?\b)\s*(?:№|#|:)?\s*(\d{2}(?:\.\d{2})?)/giu;
let contextual = null;
while ((contextual = contextualPattern.exec(lower)) !== null) {
if (contextual[1]) {
const token = String(contextual[1]).trim();
const prefix = token.match(/^(\d{2})/)?.[1] ?? null;
if (prefix && knownPrefixes.has(prefix)) {
explicitAccounts.add(token);
}
}
}
if (explicitAccounts.size > 0) {
return Array.from(explicitAccounts);
}
const spans = [...collectDateSpans(lower), ...collectAmountSpans(lower), ...collectPercentSpans(lower)];
const hasAccountingLexeme = /(?:\bсчет(?:а|у|ом|ов)?\b|\bсч\.?\b|\baccount(?:s)?\b|\bschet(?:a|u|om|ov)?\b|оплат|расчет|расч[её]т|аванс|долг|settlement|payment|supplier|customer|ндс|vat|амортиз|рбп|deferred)/iu.test(lower);
if (!hasAccountingLexeme) {
return [];
}
const extracted = [];
const genericPattern = /\b\d{2}(?:\.\d{2})?\b/g;
let generic = null;
while ((generic = genericPattern.exec(lower)) !== null) {
const value = generic[0];
const start = generic.index;
const end = start + value.length;
if (intersectsAnySpan(start, end, spans)) {
continue;
}
const prefix = value.match(/^(\d{2})/)?.[1] ?? null;
if (!prefix || !knownPrefixes.has(prefix)) {
continue;
}
extracted.push(value);
}
return Array.from(new Set(extracted));
}
function detectRouteByHeuristicsV1(question) {
const q = question.toLowerCase();
const hasExactTrace = /(документ\s*(№|#)|\bref\b|\bid\b|строк[аи].*проводк|конкретн(ый|ого|ая).*документ|точн(ый|ого).*источник|trx-\d+|inv-\d+)/i.test(q);
const hasCrossChain = /(разлож|цепоч|чем подтверж|связк|документ.*оплат|закрывающ|взаиморасчет|хвост.*(документ|оплат|проводк))/i.test(q);
const hasPeriodCloseRisk = /(предзакры|закрыти[ея].*период|перед сдачей отчетност|последн(ий|его).*(день|дня)|срыв.*закрыт|может взорвать)/i.test(q);
const hasHeavyOverview = /(рейтинг|топ|в целом|обзор|приоритиз|company|самых|концентрац|срез)/i.test(q);
const hasRiskProbe = /(аномал|подозр|зоны риска|ручной ошиб|подозрительн|риск|хвост)/i.test(q);
const hasRuleControl = /(контрол|правил|ошибк.*дат|срок.*амортиз|настройк|\b97\b|\bос\b|68\.02|ндс)/i.test(q);
if (hasExactTrace) {
return "live_mcp_drilldown";
}
if (hasCrossChain) {
return "hybrid_store_plus_live";
}
if (hasPeriodCloseRisk || hasHeavyOverview) {
return "batch_refresh_then_store";
}
if (hasRiskProbe || hasRuleControl) {
return "store_feature_risk";
}
return "store_canonical";
}
function buildMockNormalizedV1(userQuestion, expectedRoute) {
const q = userQuestion.toLowerCase();
const routeHint = expectedRoute ?? detectRouteByHeuristicsV1(userQuestion);
const hasPeriod = /(январ|феврал|март|апрел|май|июн|июл|август|сентябр|октябр|ноябр|декабр|квартал|период|конец месяца|20\d{2})/i.test(userQuestion);
const hasHeavyGoal = /(рейтинг|топ|обзор|приоритиз|срез|в целом|концентрац|самых)/i.test(q);
const hasCloseRisk = /(предзакры|закрыти[ея].*период|срыв.*закрыт|последн.*день)/i.test(q);
const hasRule = /(правил|контрол|ошибк.*дат|амортиз|настройк|\b97\b|ндс|\b01\b|\b02\b)/i.test(q);
const hasAnomaly = /(аномал|подозр|риск|хвост|не сход|завис|крив)/i.test(q);
const hasExactTrace = routeHint === "live_mcp_drilldown";
let intentClass = "simple_factual";
if (routeHint === "live_mcp_drilldown") {
intentClass = "drilldown_explain";
}
else if (routeHint === "hybrid_store_plus_live") {
intentClass = "cross_entity";
}
else if (routeHint === "batch_refresh_then_store") {
intentClass = hasCloseRisk && !hasHeavyGoal ? "period_close_risk" : "heavy_analytical";
}
else if (routeHint === "store_feature_risk") {
intentClass = hasRule ? "rule_based_account_control" : hasAnomaly ? "anomaly_probe" : "ambiguous_human_query";
}
const expectedOutputShape = intentClass === "period_close_risk"
? "prioritized_review_list"
: routeHint === "batch_refresh_then_store"
? "ranked_list"
: routeHint === "hybrid_store_plus_live"
? "reconciliation_report"
: routeHint === "live_mcp_drilldown"
? "evidence_chain"
: hasAnomaly
? "anomaly_summary"
: "point_answer";
return {
schema_version: "normalized_query_v1",
user_question_raw: userQuestion,
normalized_question: userQuestion.trim(),
intent_class: intentClass,
business_problem_type: "normalization_playground",
domain_entities: routeHint === "hybrid_store_plus_live" ? ["контрагент", "документ", "проводка"] : ["счет"],
accounts_mentioned: extractAccounts(userQuestion),
documents_mentioned: /документ|реализац|поступлен|выписк|платеж/i.test(userQuestion) ? ["документ"] : [],
registers_mentioned: /регистр|движен/i.test(userQuestion) ? ["регистр"] : [],
period_scope: {
type: hasPeriod ? "inferred" : "missing",
value: hasPeriod ? "2020-06" : null,
confidence: hasPeriod ? "medium" : "low"
},
requires: {
needs_cross_entity_join: routeHint === "hybrid_store_plus_live",
needs_causal_chain: routeHint === "hybrid_store_plus_live" || /почему|чем подтверж|где рвется/i.test(userQuestion),
needs_exact_object_trace: hasExactTrace,
needs_ranking: routeHint === "batch_refresh_then_store" && intentClass !== "period_close_risk",
needs_anomaly_summary: hasAnomaly && routeHint !== "hybrid_store_plus_live",
needs_runtime_truth: hasExactTrace,
needs_period_cut: hasPeriod,
needs_evidence: routeHint === "hybrid_store_plus_live" || hasExactTrace
},
expected_output_shape: expectedOutputShape,
route_hint: routeHint,
ambiguities: hasPeriod
? []
: [
{
field: "period_scope",
reason: "period is not explicitly provided",
severity: "medium"
}
],
confidence: {
overall: hasPeriod ? "medium" : "low",
intent_class: "medium",
route_hint: hasPeriod ? "medium" : "low"
}
};
}
function applyConfidenceGuardV1(item) {
const wordCount = item.user_question_raw.trim().split(/\s+/).filter(Boolean).length;
const hasAmbiguity = item.ambiguities.length > 0;
const longLayeredQuestion = wordCount >= 20;
const uncertainPeriod = item.period_scope.type !== "explicit";
const hasPeriodBoundaryLex = /(предзакры|закрыти[ея].*период|перед сдачей отчетност|перед закрытием)/i.test(item.user_question_raw) &&
/(рейтинг|топ|обзор|summary|срез|концентрац|в целом|приоритиз|самых)/i.test(item.user_question_raw);
const suspicious = hasAmbiguity || longLayeredQuestion || uncertainPeriod || hasPeriodBoundaryLex;
if (!suspicious) {
return item;
}
return {
...item,
confidence: {
...item.confidence,
overall: item.confidence.overall === "high" ? "medium" : item.confidence.overall,
route_hint: item.confidence.route_hint === "high" ? "medium" : item.confidence.route_hint
}
};
}
function splitIntoCandidateFragments(message) {
const primary = message
.split(/[\n;]+|(?<=[.!?])\s+/)
.map((item) => item.replace(/^\s*[-*•]\s*/, "").trim())
.filter(Boolean);
if (primary.length > 0) {
return primary;
}
const fallback = message.trim();
return fallback ? [fallback] : [];
}
function inferTimeScope(text) {
const explicit = text.match(/\b(20\d{2}(?:[-/.](?:0[1-9]|1[0-2]))?)\b/);
if (explicit) {
return {
type: "explicit",
value: explicit[1],
confidence: "high"
};
}
const inferred = text.match(/(январ[ья]|феврал[ья]|март[ае]?|апрел[ья]|ма[йя]|июн[ьяе]?|июл[ьяе]?|август[ае]?|сентябр[ьяе]?|октябр[ьяе]?|ноябр[ьяе]?|декабр[ьяе]?|квартал|конец месяца|период)/i);
if (inferred) {
return {
type: "inferred",
value: inferred[1],
confidence: "medium"
};
}
return {
type: "missing",
value: null,
confidence: "low"
};
}
function pickCandidateLabels(flags, domainRelevance) {
if (domainRelevance !== "in_scope") {
return [];
}
const labels = [];
if (flags.asks_for_exact_object_trace)
labels.push("drilldown_explain");
if (flags.has_multi_entity_scope && flags.asks_for_chain_explanation)
labels.push("cross_entity");
if (flags.asks_for_rule_check)
labels.push("rule_based_account_control");
if (flags.asks_for_anomaly_scan)
labels.push("anomaly_probe");
if (flags.asks_for_ranking_or_top || flags.asks_for_period_summary)
labels.push("heavy_analytical");
if (flags.mentions_period_close_context && !flags.asks_for_ranking_or_top)
labels.push("period_close_risk");
if (labels.length === 0)
labels.push("simple_factual");
return Array.from(new Set(labels));
}
function buildFragmentV2(rawText, index) {
const text = rawText.trim();
if (text.length < 3) {
return null;
}
const lower = text.toLowerCase();
const noiseOnly = /^(ну|короче|типа|ладно|ага|ок(ей)?)$/i.test(lower);
if (noiseOnly) {
return null;
}
const inScopeTokens = /(проводк|документ|реализац|поступлен|взаиморасчет|сальдо|остатк|сч(?:е|ё)т|ндс|амортиз|расходы будущих периодов|рбп|ос|контрагент|оплат|банк|выписк|склад|товар|материал|списани|жизненн|цикл|переход|lifecycle|writeoff|deferred)/i.test(lower);
const translitInScopeTokens = /\b(?:schet|scheta|schetu|schetom|postavsh|kontragent|dokument|doc|oplata|oplati|platezh|vypisk|provodk|realiz|postuplen|nds|os|saldo|hvost|tail|anomali|risk|zakryt|lifecycle|state|transition|writeoff|deferred|periodclose)\b/i.test(lower);
const lifecycleInScopeTokens = /(lifecycle|жизненн(?:ого|ый)?\s+цикл|стади|переход|списани|writeoff|deferred|period\s*close)/i.test(lower);
const genericAccountingTokens = /(фсбу|налогов(ый|ого)|нк рф|закон|форма отчетности|как правильно в бухгалтерии)/i.test(lower);
const offTopicTokens = /(погода|анекдот|музык|фильм|игр[аы]|рецепт|курс валют в мире)/i.test(lower);
let domainRelevance = "unclear";
let businessScope = "unclear";
if (offTopicTokens) {
domainRelevance = "out_of_scope";
businessScope = "offtopic";
}
else if (genericAccountingTokens && !inScopeTokens && !translitInScopeTokens) {
domainRelevance = "out_of_scope";
businessScope = "generic_accounting";
}
else if (inScopeTokens || translitInScopeTokens || lifecycleInScopeTokens) {
domainRelevance = "in_scope";
businessScope = "company_specific_accounting";
}
const entityTokenCount = (lower.match(/(документ|оплат|проводк|контрагент|договор|реализац|поступлен|выписк|закрыт|взаиморасчет|склад|товар|материал|поставщ|покупат|списани|жизненн|цикл)/g) ?? [])
.length;
const translitEntityTokenCount = (lower.match(/\b(?:dokument|oplata|platezh|provodk|kontragent|postavsh|pokupat|realiz|postuplen|vypisk|zakryt|schet|sklad|tovar|material)\b/g) ?? []).length;
const entityTokenCountTotal = entityTokenCount + translitEntityTokenCount;
const flags = {
has_multi_entity_scope: entityTokenCountTotal >= 2,
asks_for_chain_explanation: /(цепоч|разлож|почему|чем подтверж|где рвет|связк|логик.*операц)/i.test(lower),
asks_for_ranking_or_top: /(топ|рейтинг|сам(ые|ых)|максимальн|сильнее всего|приоритиз)/i.test(lower),
asks_for_period_summary: /(срез|обзор|в целом|картина периода|summary|по периоду)/i.test(lower),
asks_for_rule_check: /(правил|контрол|корректн|ошибк.*дат|срок списан|амортиз|настройк|проверь)/i.test(lower),
asks_for_anomaly_scan: /(аномал|подозр|риск|хвост|не сход|завис|крив|искажа)/i.test(lower),
asks_for_exact_object_trace: /(документ\s*(№|#)|\bref\b|\bid\b|строк[аи]\s+проводк|операц.*№|trx-\d+|inv-\d+|doc-\d+)/i.test(lower),
asks_for_evidence: /(чем подтверж|документ|проводк|движен|акт сверк|доказат|evidence)/i.test(lower),
mentions_period_close_context: /(закрыти[ея]\s+период|предзакры|конец месяца|сдач[аи]\s+отчетност)/i.test(lower)
};
const translitHints = {
chain: /\b(?:razlozh|pochemu|chem podtver|gde rv|svyaz|razryv|chain)\b/i.test(lower),
rule: /\b(?:prover|check|rule|control|korrekt)\b/i.test(lower),
anomaly: /\b(?:anomal|risk|hvost|tail|mismatch)\b/i.test(lower),
evidence: /\b(?:dokument|provodk|evidence|doc)\b/i.test(lower)
};
if (translitHints.chain)
flags.asks_for_chain_explanation = true;
if (translitHints.rule)
flags.asks_for_rule_check = true;
if (translitHints.anomaly)
flags.asks_for_anomaly_scan = true;
if (translitHints.evidence)
flags.asks_for_evidence = true;
const candidateLabels = pickCandidateLabels(flags, domainRelevance);
let confidence = "medium";
if (domainRelevance === "out_of_scope" || domainRelevance === "unclear") {
confidence = "low";
}
else if (flags.asks_for_exact_object_trace || flags.asks_for_ranking_or_top) {
confidence = "high";
}
const timeScope = inferTimeScope(text);
return {
fragment_id: `F${index + 1}`,
raw_fragment_text: text,
normalized_fragment_text: text.charAt(0).toUpperCase() + text.slice(1),
domain_relevance: domainRelevance,
business_scope: businessScope,
entity_hints: Array.from(new Set(Array.from(lower.matchAll(/(поставщик|покупател|контрагент|договор|банк|склад|товар|материал|ос|взаиморасчет|реализац|поступлен)/g)).map((item) => item[0]))),
account_hints: extractAccounts(text),
document_hints: Array.from(new Set(Array.from(lower.matchAll(/(документ|реализац|поступлен|платеж|выписк|акт сверк)/g)).map((item) => item[0]))),
register_hints: Array.from(new Set(Array.from(lower.matchAll(/(регистр|движен|остатк|сальдо)/g)).map((item) => item[0]))),
time_scope: timeScope,
flags,
semantic_hints: inferSemanticHints(text, timeScope),
candidate_labels: candidateLabels,
confidence
};
}
function buildMockNormalizedV2(userMessage) {
const rawFragments = splitIntoCandidateFragments(userMessage);
const fragments = [];
const discarded = [];
rawFragments.forEach((raw, index) => {
const built = buildFragmentV2(raw, index);
if (!built) {
discarded.push({
raw_fragment_text: raw,
reason: "noise_or_too_short"
});
return;
}
fragments.push(built);
});
const inScopeCount = fragments.filter((item) => item.domain_relevance === "in_scope").length;
const unclearCount = fragments.filter((item) => item.domain_relevance === "unclear").length;
const messageInScope = inScopeCount > 0;
const scopeConfidence = messageInScope ? (unclearCount > 0 ? "medium" : "high") : "low";
const needsClarification = messageInScope && (unclearCount > 0 || fragments.some((item) => item.time_scope.type === "missing"));
return {
schema_version: "normalized_query_v2",
user_message_raw: userMessage,
message_in_scope: messageInScope,
scope_confidence: scopeConfidence,
contains_multiple_tasks: fragments.length > 1,
fragments,
discarded_fragments: discarded,
global_notes: {
needs_clarification: needsClarification,
clarification_reason: needsClarification ? "Недостаточно периода/контекста по части фрагментов." : null
}
};
}
function hasSessionPeriodContext(context) {
if (!context) {
return false;
}
const periodHint = String(context.period_hint ?? "").trim();
const businessContext = String(context.business_context ?? "").toLowerCase();
if (periodHint.length > 0) {
return true;
}
return (businessContext.includes("current_analysis_period") ||
businessContext.includes("active_period") ||
businessContext.includes("рабочий месяц") ||
businessContext.includes("активный период"));
}
function hasBusinessNodeSignals(fragment) {
if (fragment.domain_relevance !== "in_scope") {
return false;
}
return (fragment.entity_hints.length > 0 ||
fragment.account_hints.length > 0 ||
fragment.document_hints.length > 0 ||
fragment.register_hints.length > 0 ||
fragment.candidate_labels.length > 0 ||
Object.values(fragment.flags).some((value) => value));
}
function routeCanBeSelected(fragment) {
if (fragment.domain_relevance !== "in_scope") {
return false;
}
if (fragment.business_scope === "unclear") {
return false;
}
return hasBusinessNodeSignals(fragment);
}
function hasJuly2020SnapshotSignal(userMessage, sessionContext) {
const text = String(userMessage ?? "").toLowerCase();
const contextPeriod = String(sessionContext?.period_hint ?? "").toLowerCase();
const businessContext = String(sessionContext?.business_context ?? "").toLowerCase();
if (/(?:\b2020[-/.]0?7\b|\bиюл[ьяе]?\b(?:\s+20\d{2})?|\bjuly\b(?:\s+20\d{2})?)/i.test(text)) {
return true;
}
return /2020[-/.]0?7|июл|july/.test(`${contextPeriod} ${businessContext}`);
}
function hasP0SignalForCompanyScope(userMessage) {
const lower = String(userMessage ?? "").toLowerCase();
return /(?:\b(?:01|02|08|19|20|21|23|25|26|28|29|44|51|60|62|68|76|97)(?:\.\d{1,2})?\b|ндс|vat|supplier|customer|settlement|month\s*close|рбп|deferred|закрыти[ея]\s+месяц|амортиз|поставщ|покупат)/i.test(lower);
}
function applyCompanyScopeResolutionV2(candidate, userMessage, sessionContext) {
if (!candidate || typeof candidate !== "object") {
return candidate;
}
const source = candidate;
if (!Array.isArray(source.fragments)) {
return candidate;
}
const forceCompanyScope = hasJuly2020SnapshotSignal(userMessage, sessionContext) && hasP0SignalForCompanyScope(userMessage);
if (!forceCompanyScope) {
return candidate;
}
let changed = false;
const fragments = source.fragments.map((fragment) => {
if (!fragment || typeof fragment !== "object") {
return fragment;
}
const value = fragment;
if (value.domain_relevance !== "in_scope") {
return fragment;
}
const scopeValue = String(value.business_scope ?? "").trim();
if (scopeValue !== "generic_accounting" && scopeValue !== "unclear") {
return fragment;
}
changed = true;
return {
...value,
business_scope: "company_specific_accounting"
};
});
if (!changed) {
return candidate;
}
return {
...source,
fragments
};
}
function dedupeSoftAssumptions(input) {
return Array.from(new Set(input));
}
function decideFragmentExecutionPolicy(fragment, sessionContext) {
const softAssumptions = [];
const hasPeriodContext = hasSessionPeriodContext(sessionContext);
const periodIsCritical = fragment.flags.asks_for_period_summary || fragment.flags.mentions_period_close_context || fragment.flags.asks_for_ranking_or_top;
if (fragment.domain_relevance === "out_of_scope") {
return {
execution_readiness: "needs_clarification",
clarification_reason: "fragment_out_of_scope",
soft_assumption_used: []
};
}
if (fragment.domain_relevance === "unclear") {
return {
execution_readiness: "needs_clarification",
clarification_reason: "domain_or_scope_unclear",
soft_assumption_used: []
};
}
if (!hasBusinessNodeSignals(fragment)) {
return {
execution_readiness: "needs_clarification",
clarification_reason: "business_area_not_identified",
soft_assumption_used: []
};
}
if (!routeCanBeSelected(fragment)) {
return {
execution_readiness: "needs_clarification",
clarification_reason: "route_cannot_be_selected_reliably",
soft_assumption_used: []
};
}
if (fragment.time_scope.type === "missing") {
if (hasPeriodContext) {
softAssumptions.push("period_from_session_context");
}
else if (periodIsCritical) {
return {
execution_readiness: "needs_clarification",
clarification_reason: "critical_period_missing",
soft_assumption_used: []
};
}
}
if (fragment.flags.asks_for_anomaly_scan ||
fragment.flags.asks_for_rule_check ||
fragment.flags.asks_for_ranking_or_top ||
fragment.flags.asks_for_period_summary) {
softAssumptions.push("problem_scan_mode_enabled");
}
if (fragment.business_scope === "company_specific_accounting" && fragment.entity_hints.length === 0 && fragment.account_hints.length === 0) {
softAssumptions.push("company_scope_defaulted");
}
const assumptions = dedupeSoftAssumptions(softAssumptions);
if (assumptions.length > 0) {
return {
execution_readiness: "executable_with_soft_assumptions",
clarification_reason: null,
soft_assumption_used: assumptions
};
}
return {
execution_readiness: "executable",
clarification_reason: null,
soft_assumption_used: []
};
}
function toV201Fragment(fragment, sessionContext) {
const policy = decideFragmentExecutionPolicy(fragment, sessionContext);
return {
...fragment,
execution_readiness: policy.execution_readiness,
clarification_reason: policy.clarification_reason,
soft_assumption_used: policy.soft_assumption_used
};
}
function applyClarificationPolicyV201(candidate, userMessage, sessionContext) {
if (!candidate || typeof candidate !== "object") {
return null;
}
const source = candidate;
if (!Array.isArray(source.fragments)) {
return null;
}
const baseFragments = source.fragments
.map((item) => item)
.filter((item) => item && typeof item === "object" && typeof item.fragment_id === "string");
const fragments = baseFragments.map((fragment) => toV201Fragment(fragment, sessionContext));
const inScopeFragments = fragments.filter((fragment) => fragment.domain_relevance === "in_scope");
const blockingFragments = inScopeFragments.filter((fragment) => fragment.execution_readiness === "needs_clarification");
const needsClarification = inScopeFragments.length > 0 && blockingFragments.length === inScopeFragments.length;
return {
schema_version: "normalized_query_v2_0_1",
user_message_raw: String(source.user_message_raw ?? userMessage),
message_in_scope: inScopeFragments.length > 0,
scope_confidence: source.scope_confidence ?? (inScopeFragments.length > 0 ? "medium" : "low"),
contains_multiple_tasks: typeof source.contains_multiple_tasks === "boolean" ? source.contains_multiple_tasks : fragments.length > 1,
fragments,
discarded_fragments: Array.isArray(source.discarded_fragments)
? source.discarded_fragments
: [],
global_notes: {
needs_clarification: needsClarification,
clarification_reason: needsClarification ? blockingFragments[0]?.clarification_reason ?? "clarification_required" : null
}
};
}
function resolveFragmentExecutionStateV202(fragment, sessionContext) {
const v201 = decideFragmentExecutionPolicy(fragment, sessionContext);
if (fragment.domain_relevance === "out_of_scope") {
return {
execution_readiness: "no_route",
clarification_reason: "fragment_out_of_scope",
soft_assumption_used: [],
route_status: "no_route",
no_route_reason: "out_of_scope"
};
}
if (v201.execution_readiness === "needs_clarification") {
return {
execution_readiness: "needs_clarification",
clarification_reason: v201.clarification_reason ?? "insufficient_specificity",
soft_assumption_used: [],
route_status: "no_route",
no_route_reason: "insufficient_specificity"
};
}
if (!routeCanBeSelected(fragment)) {
return {
execution_readiness: "no_route",
clarification_reason: "route_mapping_missing",
soft_assumption_used: [],
route_status: "no_route",
no_route_reason: "missing_mapping"
};
}
// Deterministic no-route guard:
// routable in-scope fragments cannot remain unresolved.
return {
execution_readiness: v201.execution_readiness,
clarification_reason: null,
soft_assumption_used: v201.soft_assumption_used,
route_status: "routed",
no_route_reason: null
};
}
function toV202Fragment(fragment, sessionContext) {
const policy = resolveFragmentExecutionStateV202(fragment, sessionContext);
return {
...fragment,
execution_readiness: policy.execution_readiness,
clarification_reason: policy.clarification_reason,
soft_assumption_used: policy.soft_assumption_used,
route_status: policy.route_status,
no_route_reason: policy.no_route_reason
};
}
function applyExecutionStatePolicyV202(candidate, userMessage, sessionContext) {
if (!candidate || typeof candidate !== "object") {
return null;
}
const source = candidate;
if (!Array.isArray(source.fragments)) {
return null;
}
const baseFragments = source.fragments
.map((item) => item)
.filter((item) => item && typeof item === "object" && typeof item.fragment_id === "string");
const fragments = baseFragments.map((fragment) => toV202Fragment(fragment, sessionContext));
const inScopeFragments = fragments.filter((fragment) => fragment.domain_relevance === "in_scope");
const clarificationBlocks = inScopeFragments.filter((fragment) => fragment.execution_readiness === "needs_clarification");
const needsClarification = inScopeFragments.length > 0 && clarificationBlocks.length === inScopeFragments.length;
return {
schema_version: "normalized_query_v2_0_2",
user_message_raw: String(source.user_message_raw ?? userMessage),
message_in_scope: inScopeFragments.length > 0,
scope_confidence: source.scope_confidence ?? (inScopeFragments.length > 0 ? "medium" : "low"),
contains_multiple_tasks: typeof source.contains_multiple_tasks === "boolean" ? source.contains_multiple_tasks : fragments.length > 1,
fragments,
discarded_fragments: Array.isArray(source.discarded_fragments)
? source.discarded_fragments
: [],
global_notes: {
needs_clarification: needsClarification,
clarification_reason: needsClarification ? clarificationBlocks[0]?.clarification_reason ?? "clarification_required" : null
}
};
}
function buildMockNormalizedV2_0_1(userMessage, sessionContext) {
const v2 = buildMockNormalizedV2(userMessage);
const adjusted = applyClarificationPolicyV201(v2, userMessage, sessionContext);
if (adjusted) {
return adjusted;
}
return {
schema_version: "normalized_query_v2_0_1",
user_message_raw: userMessage,
message_in_scope: v2.message_in_scope,
scope_confidence: v2.scope_confidence,
contains_multiple_tasks: v2.contains_multiple_tasks,
fragments: v2.fragments.map((fragment) => ({
...fragment,
execution_readiness: "needs_clarification",
clarification_reason: "policy_fallback",
soft_assumption_used: []
})),
discarded_fragments: v2.discarded_fragments,
global_notes: {
needs_clarification: true,
clarification_reason: "policy_fallback"
}
};
}
function buildMockNormalizedV2_0_2(userMessage, sessionContext) {
const v2 = buildMockNormalizedV2(userMessage);
const adjusted = applyExecutionStatePolicyV202(v2, userMessage, sessionContext);
if (adjusted) {
return adjusted;
}
return {
schema_version: "normalized_query_v2_0_2",
user_message_raw: userMessage,
message_in_scope: v2.message_in_scope,
scope_confidence: v2.scope_confidence,
contains_multiple_tasks: v2.contains_multiple_tasks,
fragments: v2.fragments.map((fragment) => ({
...fragment,
execution_readiness: "needs_clarification",
clarification_reason: "policy_fallback",
soft_assumption_used: [],
route_status: "no_route",
no_route_reason: "unsupported_fragment_type"
})),
discarded_fragments: v2.discarded_fragments,
global_notes: {
needs_clarification: true,
clarification_reason: "policy_fallback"
}
};
}
function routeHintForHistory(normalized, routeSummary) {
if (!normalized || !routeSummary) {
return null;
}
if (normalized.schema_version === "normalized_query_v1") {
return normalized.route_hint;
}
const decision = routeSummary.mode === "deterministic_v2" ? routeSummary.decisions.find((item) => item.route !== "no_route") : null;
return decision?.route ?? null;
}
function confidenceForHistory(normalized, routeSummary) {
if (!normalized || !routeSummary) {
return null;
}
if (normalized.schema_version === "normalized_query_v1") {
return normalized.confidence.route_hint;
}
return normalized.scope_confidence;
}
function collectTraceCompletenessIssues(input) {
const issues = [];
if (!input.rawModelResponse) {
issues.push("missing_raw_model_output");
}
if (!input.normalized) {
issues.push("missing_parsed_normalized_json");
return issues;
}
if (input.normalized.schema_version === "normalized_query_v1") {
return issues;
}
if (!Array.isArray(input.normalized.fragments)) {
issues.push("missing_parsed_fragments");
return issues;
}
for (const fragment of input.normalized.fragments) {
const needsResolvedExecutionState = input.normalized.schema_version === "normalized_query_v2_0_1" || input.normalized.schema_version === "normalized_query_v2_0_2";
if (needsResolvedExecutionState && !("execution_readiness" in fragment)) {
issues.push(`fragment_${fragment.fragment_id}_missing_execution_readiness`);
}
if (input.normalized.schema_version === "normalized_query_v2_0_2") {
if (!("route_status" in fragment)) {
issues.push(`fragment_${fragment.fragment_id}_missing_route_status`);
}
if (!("no_route_reason" in fragment)) {
issues.push(`fragment_${fragment.fragment_id}_missing_no_route_reason`);
}
}
}
if (!input.routeHintSummary || input.routeHintSummary.mode !== "deterministic_v2") {
issues.push("missing_route_hint_summary_v2");
return issues;
}
const decisionCount = Array.isArray(input.routeHintSummary.decisions) ? input.routeHintSummary.decisions.length : 0;
if (decisionCount !== input.normalized.fragments.length) {
issues.push("route_decision_count_mismatch");
}
return issues;
}
class NormalizerService {
openaiClient;
constructor(openaiClient) {
this.openaiClient = openaiClient;
}
async normalize(payload) {
const traceId = (0, nanoid_1.nanoid)(14);
const startedAt = Date.now();
const llmProvider = payload.llmProvider === "local" ? "local" : "openai";
const model = payload.model ?? config_1.DEFAULT_MODEL;
const baseUrl = payload.baseUrl ?? config_1.DEFAULT_OPENAI_BASE_URL;
const temperature = payload.temperature ?? config_1.DEFAULT_TEMPERATURE;
const maxOutputTokens = payload.maxOutputTokens ?? config_1.DEFAULT_MAX_OUTPUT_TOKENS;
const retryPolicy = payload.retryPolicy ?? "default";
const schemaVersion = resolveSchemaVersion(payload);
const promptBundle = (0, promptBuilder_1.buildPromptBundle)({
promptVersion: payload.promptVersion,
systemPrompt: payload.systemPrompt,
developerPrompt: payload.developerPrompt,
domainPrompt: payload.domainPrompt,
schemaNotes: undefined,
fewShotExamples: payload.fewShotExamples
});
let rawModelResponse = null;
let outputText = "";
let usage = { input_tokens: 0, output_tokens: 0, total_tokens: 0 };
let requestCountForCase = 0;
if (payload.useMock) {
const mock = schemaVersion === "v2"
? buildMockNormalizedV2(payload.userQuestion)
: schemaVersion === "v2_0_2"
? buildMockNormalizedV2_0_2(payload.userQuestion, payload.context)
: schemaVersion === "v2_0_1"
? buildMockNormalizedV2_0_1(payload.userQuestion, payload.context)
: buildMockNormalizedV1(payload.userQuestion, payload.context?.expected_route);
rawModelResponse = { mode: "mock", schema_version: schemaVersion };
outputText = JSON.stringify(mock, null, 2);
}
else {
const apiKey = payload.apiKey ?? process.env.OPENAI_API_KEY;
const firstTry = await this.openaiClient.normalize({
llmProvider,
apiKey: String(apiKey ?? ""),
model,
baseUrl,
abortSignal: payload.abortSignal,
temperature,
maxOutputTokens
}, {
systemPrompt: promptBundle.systemPrompt,
developerPrompt: promptBundle.combinedDeveloperPrompt,
domainPrompt: promptBundle.domainPrompt,
userQuestion: payload.userQuestion,
schemaVersion
});
requestCountForCase += 1;
rawModelResponse = firstTry.raw;
outputText = firstTry.outputText;
usage = firstTry.usage;
}
let normalizedCandidate;
let validation = { passed: false, errors: ["NO_VALIDATION"] };
try {
normalizedCandidate = safeJsonParse(outputText);
if (schemaVersion !== "v1") {
normalizedCandidate = coerceNormalizedCandidateV2(normalizedCandidate, payload.userQuestion) ?? normalizedCandidate;
normalizedCandidate = applyCompanyScopeResolutionV2(normalizedCandidate, payload.userQuestion, payload.context);
}
if (schemaVersion === "v2_0_2") {
normalizedCandidate = applyExecutionStatePolicyV202(normalizedCandidate, payload.userQuestion, payload.context);
}
else if (schemaVersion === "v2_0_1") {
normalizedCandidate = applyClarificationPolicyV201(normalizedCandidate, payload.userQuestion, payload.context);
}
validation = (0, schemaValidator_1.validateNormalized)(normalizedCandidate, schemaVersion);
}
catch (error) {
normalizedCandidate = null;
validation = {
passed: false,
errors: [`JSON_PARSE_ERROR: ${error instanceof Error ? error.message : String(error)}`]
};
}
const canRetry = retryPolicy === "default" || retryPolicy === "single-pass-strict";
if (!payload.useMock && !validation.passed && canRetry) {
const retryMaxOutputTokens = computeRetryMaxOutputTokens(maxOutputTokens, rawModelResponse);
const retry = await this.openaiClient.normalize({
llmProvider,
apiKey: String(payload.apiKey ?? process.env.OPENAI_API_KEY ?? ""),
model,
baseUrl,
abortSignal: payload.abortSignal,
temperature,
maxOutputTokens: retryMaxOutputTokens
}, {
systemPrompt: promptBundle.systemPrompt,
developerPrompt: promptBundle.combinedDeveloperPrompt,
domainPrompt: promptBundle.domainPrompt,
userQuestion: payload.userQuestion,
schemaVersion,
controlledRetryInstruction: schemaVersion === "v2"
? RETRY_INSTRUCTION_V2
: schemaVersion === "v2_0_2"
? RETRY_INSTRUCTION_V2_0_2
: schemaVersion === "v2_0_1"
? RETRY_INSTRUCTION_V2_0_1
: RETRY_INSTRUCTION_V1
});
requestCountForCase += 1;
rawModelResponse = retry.raw;
outputText = retry.outputText;
usage = retry.usage;
try {
normalizedCandidate = safeJsonParse(outputText);
if (schemaVersion !== "v1") {
normalizedCandidate = coerceNormalizedCandidateV2(normalizedCandidate, payload.userQuestion) ?? normalizedCandidate;
normalizedCandidate = applyCompanyScopeResolutionV2(normalizedCandidate, payload.userQuestion, payload.context);
}
if (schemaVersion === "v2_0_2") {
normalizedCandidate = applyExecutionStatePolicyV202(normalizedCandidate, payload.userQuestion, payload.context);
}
else if (schemaVersion === "v2_0_1") {
normalizedCandidate = applyClarificationPolicyV201(normalizedCandidate, payload.userQuestion, payload.context);
}
validation = (0, schemaValidator_1.validateNormalized)(normalizedCandidate, schemaVersion);
}
catch (error) {
normalizedCandidate = null;
validation = {
passed: false,
errors: [`JSON_PARSE_ERROR_AFTER_RETRY: ${error instanceof Error ? error.message : String(error)}`]
};
}
}
let normalized = null;
if (validation.passed) {
if (schemaVersion === "v1") {
normalized = applyConfidenceGuardV1(normalizedCandidate);
}
else if (schemaVersion === "v2_0_2") {
normalized = normalizedCandidate;
}
else if (schemaVersion === "v2_0_1") {
normalized = normalizedCandidate;
}
else {
normalized = normalizedCandidate;
}
}
const routeHintSummary = normalized ? (0, routeHintAdapter_1.toRouteHintSummary)(normalized) : null;
const latency = Date.now() - startedAt;
const traceCompletenessIssues = collectTraceCompletenessIssues({
traceId,
schemaVersion,
rawModelResponse: rawModelResponse ?? outputText,
normalized,
routeHintSummary
});
if (traceCompletenessIssues.length > 0) {
console.error(`[trace-completeness] trace_id=${traceId} schema=${schemaVersion} issues=${traceCompletenessIssues.join(",")}`);
}
const response = {
trace_id: traceId,
ok: validation.passed,
normalized,
route_hint_summary: routeHintSummary,
raw_model_output: rawModelResponse ?? outputText,
validation,
usage,
latency_ms: latency,
prompt_version: promptBundle.prompt_version,
schema_version: schemaVersion,
request_count_for_case: requestCountForCase
};
const traceRouteHint = routeHintForHistory(normalized, routeHintSummary);
const traceConfidence = confidenceForHistory(normalized, routeHintSummary);
const traceRecord = {
trace_id: traceId,
timestamp: new Date().toISOString(),
model,
prompt_version: promptBundle.prompt_version,
schema_version: schemaVersion,
case_id: payload.context?.case_id,
user_question_raw: payload.userQuestion,
context: {
period_hint: payload.context?.period_hint ?? null,
business_context: payload.context?.business_context ?? null,
expected_route: payload.context?.expected_route ?? null,
case_id: payload.context?.case_id ?? null,
eval_mode: payload.context?.eval_mode ?? null,
trace_completeness_issues: traceCompletenessIssues
},
request_payload_redacted: (0, traceLogger_1.redactRequestPayload)({
...payload,
apiKey: payload.apiKey ? "***REDACTED***" : undefined
}),
raw_model_response: rawModelResponse ?? outputText,
parsed_normalized_json: normalized,
validation_result: validation,
route_hint_summary: routeHintSummary,
route_hint: traceRouteHint,
confidence: traceConfidence,
usage,
latency_ms: latency,
expected_route: payload.context?.expected_route,
eval_label: payload.context?.eval_label,
eval_mode: payload.context?.eval_mode,
request_count_for_case: requestCountForCase
};
(0, traceLogger_1.saveTrace)(traceRecord);
if (payload.saveAsTestCase && normalized?.schema_version === "normalized_query_v1") {
(0, traceLogger_1.saveEvalCase)({
case_id: `NQ-${Date.now()}`,
raw_question: payload.userQuestion,
expected: {
intent_class: normalized.intent_class,
route_hint: normalized.route_hint,
requires: {
needs_cross_entity_join: normalized.requires.needs_cross_entity_join,
needs_causal_chain: normalized.requires.needs_causal_chain
},
accounts_mentioned: normalized.accounts_mentioned,
expected_output_shape: normalized.expected_output_shape
}
});
}
return response;
}
}
exports.NormalizerService = NormalizerService;