1480 lines
64 KiB
JavaScript
1480 lines
64 KiB
JavaScript
"use strict";
|
||
Object.defineProperty(exports, "__esModule", { value: true });
|
||
exports.NormalizerService = void 0;
|
||
const nanoid_1 = require("nanoid");
|
||
const config_1 = require("../config");
|
||
const promptBuilder_1 = require("./promptBuilder");
|
||
const routeHintAdapter_1 = require("./routeHintAdapter");
|
||
const schemaValidator_1 = require("./schemaValidator");
|
||
const traceLogger_1 = require("./traceLogger");
|
||
const RETRY_INSTRUCTION_V1 = "IMPORTANT: return valid JSON strictly matching schema normalized_query_v1. No markdown.";
|
||
const RETRY_INSTRUCTION_V2 = "IMPORTANT: return valid JSON strictly matching schema normalized_query_v2. No markdown.";
|
||
const RETRY_INSTRUCTION_V2_0_1 = "IMPORTANT: return valid JSON strictly matching schema normalized_query_v2_0_1. No markdown.";
|
||
const RETRY_INSTRUCTION_V2_0_2 = "IMPORTANT: return valid JSON strictly matching schema normalized_query_v2_0_2. No markdown.";
|
||
const CONFIDENCE_LEVELS = ["high", "medium", "low"];
|
||
const DOMAIN_RELEVANCE_VALUES = ["in_scope", "out_of_scope", "unclear"];
|
||
const BUSINESS_SCOPE_VALUES = [
|
||
"company_specific_accounting",
|
||
"generic_accounting",
|
||
"offtopic",
|
||
"unclear"
|
||
];
|
||
const CANDIDATE_LABEL_VALUES = [
|
||
"heavy_analytical",
|
||
"cross_entity",
|
||
"drilldown_explain",
|
||
"rule_based_account_control",
|
||
"anomaly_probe",
|
||
"period_close_risk",
|
||
"ambiguous_human_query",
|
||
"simple_factual"
|
||
];
|
||
function safeJsonParse(text) {
|
||
const cleaned = text.trim().replace(/^```json\s*/i, "").replace(/^```\s*/i, "").replace(/```$/i, "").trim();
|
||
return JSON.parse(cleaned);
|
||
}
|
||
function resolveSchemaVersion(payload) {
|
||
const explicit = String(payload.schemaVersion ?? "").toLowerCase().trim();
|
||
if (explicit === "v2_0_2" || explicit === "normalized_query_v2_0_2") {
|
||
return "v2_0_2";
|
||
}
|
||
if (explicit === "v2_0_1" || explicit === "normalized_query_v2_0_1") {
|
||
return "v2_0_1";
|
||
}
|
||
if (explicit === "v2" || explicit === "normalized_query_v2") {
|
||
return "v2";
|
||
}
|
||
if (explicit === "v1" || explicit === "normalized_query_v1") {
|
||
return "v1";
|
||
}
|
||
const promptVersion = String(payload.promptVersion ?? config_1.DEFAULT_PROMPT_VERSION).toLowerCase().trim();
|
||
if (promptVersion === "normalizer_v2" || promptVersion.startsWith("normalizer_v2")) {
|
||
if (promptVersion === "normalizer_v2_0_2") {
|
||
return "v2_0_2";
|
||
}
|
||
if (promptVersion === "normalizer_v2_0_1") {
|
||
return "v2_0_1";
|
||
}
|
||
return "v2";
|
||
}
|
||
return "v1";
|
||
}
|
||
function shouldEscalateOutputBudget(rawModelResponse) {
|
||
if (!rawModelResponse || typeof rawModelResponse !== "object") {
|
||
return false;
|
||
}
|
||
const root = rawModelResponse;
|
||
const status = String(root.status ?? "").toLowerCase();
|
||
const details = (root.incomplete_details ?? {});
|
||
const reason = String(details.reason ?? "").toLowerCase();
|
||
return status === "incomplete" && reason === "max_output_tokens";
|
||
}
|
||
function computeRetryMaxOutputTokens(current, rawModelResponse) {
|
||
if (!shouldEscalateOutputBudget(rawModelResponse)) {
|
||
return current;
|
||
}
|
||
const escalated = Math.max(current + 400, Math.ceil(current * 1.6));
|
||
return Math.min(escalated, 2400);
|
||
}
|
||
function normalizeToken(value) {
|
||
return String(value ?? "")
|
||
.trim()
|
||
.toLowerCase()
|
||
.replace(/[\s-]+/g, "_");
|
||
}
|
||
function toOptionalString(value) {
|
||
if (typeof value !== "string") {
|
||
return null;
|
||
}
|
||
const trimmed = value.trim();
|
||
return trimmed.length > 0 ? trimmed : null;
|
||
}
|
||
function coerceBoolean(value, fallback = false) {
|
||
if (typeof value === "boolean") {
|
||
return value;
|
||
}
|
||
if (typeof value === "number") {
|
||
if (value === 1)
|
||
return true;
|
||
if (value === 0)
|
||
return false;
|
||
return fallback;
|
||
}
|
||
if (typeof value === "string") {
|
||
const token = value.trim().toLowerCase();
|
||
if (["true", "1", "yes", "y", "да", "ok"].includes(token)) {
|
||
return true;
|
||
}
|
||
if (["false", "0", "no", "n", "нет"].includes(token)) {
|
||
return false;
|
||
}
|
||
}
|
||
return fallback;
|
||
}
|
||
function coerceStringArray(value) {
|
||
if (Array.isArray(value)) {
|
||
return Array.from(new Set(value
|
||
.map((item) => (typeof item === "string" ? item.trim() : ""))
|
||
.filter((item) => item.length > 0)));
|
||
}
|
||
if (typeof value === "string") {
|
||
return Array.from(new Set(value
|
||
.split(/[,\n;]+/)
|
||
.map((item) => item.trim())
|
||
.filter((item) => item.length > 0)));
|
||
}
|
||
return [];
|
||
}
|
||
function coerceConfidence(value, fallback) {
|
||
if (typeof value === "string") {
|
||
const token = normalizeToken(value);
|
||
if (CONFIDENCE_LEVELS.includes(token)) {
|
||
return token;
|
||
}
|
||
}
|
||
if (typeof value === "number" && Number.isFinite(value)) {
|
||
const normalized = value > 1 ? value / 100 : value;
|
||
if (normalized >= 0.75)
|
||
return "high";
|
||
if (normalized >= 0.45)
|
||
return "medium";
|
||
return "low";
|
||
}
|
||
return fallback;
|
||
}
|
||
function coerceDomainRelevance(value, fallback) {
|
||
if (typeof value === "boolean") {
|
||
return value ? "in_scope" : "out_of_scope";
|
||
}
|
||
const token = normalizeToken(value);
|
||
if (DOMAIN_RELEVANCE_VALUES.includes(token)) {
|
||
return token;
|
||
}
|
||
if (["in_scope_true", "in_scope_yes", "in_scope_relevant", "relevant", "supported"].includes(token)) {
|
||
return "in_scope";
|
||
}
|
||
if (["out_scope", "outofscope", "offtopic", "off_topic", "irrelevant"].includes(token)) {
|
||
return "out_of_scope";
|
||
}
|
||
if (token === "true")
|
||
return "in_scope";
|
||
if (token === "false")
|
||
return "out_of_scope";
|
||
if (["unknown", "ambiguous", "n_a", "na"].includes(token))
|
||
return "unclear";
|
||
return fallback;
|
||
}
|
||
function coerceBusinessScope(value, fallback, domainRelevance) {
|
||
const token = normalizeToken(value);
|
||
if (BUSINESS_SCOPE_VALUES.includes(token)) {
|
||
return token;
|
||
}
|
||
if (["company_specific", "company_accounting", "document_review", "settlement", "bank_settlement"].includes(token)) {
|
||
return "company_specific_accounting";
|
||
}
|
||
if (["generic", "general_accounting", "general"].includes(token)) {
|
||
return "generic_accounting";
|
||
}
|
||
if (["out_of_scope", "off_topic", "outside"].includes(token)) {
|
||
return "offtopic";
|
||
}
|
||
if (token === "unknown") {
|
||
return "unclear";
|
||
}
|
||
if (domainRelevance === "out_of_scope") {
|
||
return "offtopic";
|
||
}
|
||
if (domainRelevance === "in_scope") {
|
||
return "company_specific_accounting";
|
||
}
|
||
return fallback;
|
||
}
|
||
function coerceFragmentId(value, index, fallback) {
|
||
if (typeof value === "string" && value.trim().length > 0) {
|
||
return value.trim();
|
||
}
|
||
if (typeof value === "number" && Number.isFinite(value)) {
|
||
const n = Math.max(1, Math.floor(value));
|
||
return `F${n}`;
|
||
}
|
||
return fallback || `F${index + 1}`;
|
||
}
|
||
function parseYear(value) {
|
||
if (typeof value === "number" && Number.isInteger(value) && value >= 1900 && value <= 2200) {
|
||
return value;
|
||
}
|
||
if (typeof value === "string") {
|
||
const trimmed = value.trim();
|
||
if (/^\d{4}$/.test(trimmed)) {
|
||
const parsed = Number.parseInt(trimmed, 10);
|
||
if (Number.isInteger(parsed) && parsed >= 1900 && parsed <= 2200) {
|
||
return parsed;
|
||
}
|
||
}
|
||
}
|
||
return null;
|
||
}
|
||
function parseMonth(value) {
|
||
if (typeof value === "number" && Number.isInteger(value) && value >= 1 && value <= 12) {
|
||
return value;
|
||
}
|
||
if (typeof value === "string") {
|
||
const trimmed = value.trim();
|
||
if (/^\d{1,2}$/.test(trimmed)) {
|
||
const parsed = Number.parseInt(trimmed, 10);
|
||
if (parsed >= 1 && parsed <= 12) {
|
||
return parsed;
|
||
}
|
||
}
|
||
}
|
||
return null;
|
||
}
|
||
function coerceTimeScope(value, rawText, fallback) {
|
||
if (value && typeof value === "object") {
|
||
const source = value;
|
||
const rawType = normalizeToken(source.type);
|
||
const confidence = coerceConfidence(source.confidence, fallback.confidence);
|
||
if (["explicit", "inferred", "missing"].includes(rawType)) {
|
||
if (rawType === "missing") {
|
||
return {
|
||
type: "missing",
|
||
value: null,
|
||
confidence
|
||
};
|
||
}
|
||
return {
|
||
type: rawType,
|
||
value: toOptionalString(source.value),
|
||
confidence
|
||
};
|
||
}
|
||
const periodType = normalizeToken(source.period_type);
|
||
const year = parseYear(source.year);
|
||
const month = parseMonth(source.month);
|
||
if ((periodType === "year" || (periodType.length === 0 && year !== null)) && year !== null) {
|
||
return {
|
||
type: "explicit",
|
||
value: String(year),
|
||
confidence: confidence === "low" ? "medium" : confidence
|
||
};
|
||
}
|
||
if ((periodType === "month" || periodType === "year_month" || (year !== null && month !== null)) && year !== null && month !== null) {
|
||
return {
|
||
type: "explicit",
|
||
value: `${year}-${String(month).padStart(2, "0")}`,
|
||
confidence: confidence === "low" ? "medium" : confidence
|
||
};
|
||
}
|
||
}
|
||
const inferred = inferTimeScope(rawText);
|
||
if (inferred.type !== "missing") {
|
||
return inferred;
|
||
}
|
||
return fallback;
|
||
}
|
||
function coerceFlags(value, fallback) {
|
||
if (!value || typeof value !== "object") {
|
||
return fallback;
|
||
}
|
||
const source = value;
|
||
const pick = (key, aliases = []) => {
|
||
if (key in source) {
|
||
return coerceBoolean(source[key], fallback[key]);
|
||
}
|
||
for (const alias of aliases) {
|
||
if (alias in source) {
|
||
return coerceBoolean(source[alias], fallback[key]);
|
||
}
|
||
}
|
||
return fallback[key];
|
||
};
|
||
return {
|
||
has_multi_entity_scope: pick("has_multi_entity_scope", ["multi_entity_scope"]),
|
||
asks_for_chain_explanation: pick("asks_for_chain_explanation", ["asks_for_chain", "chain_explanation"]),
|
||
asks_for_ranking_or_top: pick("asks_for_ranking_or_top", ["asks_for_ranking", "asks_for_top"]),
|
||
asks_for_period_summary: pick("asks_for_period_summary", ["period_summary"]),
|
||
asks_for_rule_check: pick("asks_for_rule_check", ["rule_check"]),
|
||
asks_for_anomaly_scan: pick("asks_for_anomaly_scan", ["anomaly_scan"]),
|
||
asks_for_exact_object_trace: pick("asks_for_exact_object_trace", ["exact_object_trace"]),
|
||
asks_for_evidence: pick("asks_for_evidence", ["evidence"]),
|
||
mentions_period_close_context: pick("mentions_period_close_context", ["period_close_context"])
|
||
};
|
||
}
|
||
function mapCandidateLabel(value) {
|
||
const token = normalizeToken(value);
|
||
if (CANDIDATE_LABEL_VALUES.includes(token)) {
|
||
return token;
|
||
}
|
||
if (["show_documents", "document_list", "show_docs", "point_answer", "lookup"].includes(token)) {
|
||
return "simple_factual";
|
||
}
|
||
if (["ranking", "top", "summary", "analytical"].includes(token)) {
|
||
return "heavy_analytical";
|
||
}
|
||
if (["chain", "cross", "cross_domain"].includes(token)) {
|
||
return "cross_entity";
|
||
}
|
||
if (["rule_check", "control", "rules"].includes(token)) {
|
||
return "rule_based_account_control";
|
||
}
|
||
if (["risk_scan", "anomaly", "risk"].includes(token)) {
|
||
return "anomaly_probe";
|
||
}
|
||
if (["period_close", "month_close"].includes(token)) {
|
||
return "period_close_risk";
|
||
}
|
||
if (["ambiguous", "unclear"].includes(token)) {
|
||
return "ambiguous_human_query";
|
||
}
|
||
return null;
|
||
}
|
||
function coerceCandidateLabels(value, flags, domainRelevance, fallback) {
|
||
const parsed = coerceStringArray(value)
|
||
.map((item) => mapCandidateLabel(item))
|
||
.filter((item) => Boolean(item));
|
||
if (parsed.length > 0) {
|
||
return Array.from(new Set(parsed));
|
||
}
|
||
const inferred = pickCandidateLabels(flags, domainRelevance);
|
||
if (inferred.length > 0) {
|
||
return inferred;
|
||
}
|
||
return fallback;
|
||
}
|
||
function coerceFragmentV2(rawFragment, index, userMessage) {
|
||
const source = rawFragment && typeof rawFragment === "object" ? rawFragment : {};
|
||
const rawText = toOptionalString(source.raw_fragment_text) ??
|
||
toOptionalString(source.rawText) ??
|
||
toOptionalString(source.fragment_text) ??
|
||
toOptionalString(source.text) ??
|
||
userMessage.trim();
|
||
const base = buildFragmentV2(rawText, index) ?? buildFragmentV2(userMessage, index);
|
||
if (!base) {
|
||
return null;
|
||
}
|
||
const domainRelevance = coerceDomainRelevance(source.domain_relevance, base.domain_relevance);
|
||
const businessScope = coerceBusinessScope(source.business_scope, base.business_scope, domainRelevance);
|
||
const flags = coerceFlags(source.flags, base.flags);
|
||
const entityHints = coerceStringArray(source.entity_hints);
|
||
const accountHints = coerceStringArray(source.account_hints);
|
||
const documentHints = coerceStringArray(source.document_hints);
|
||
const registerHints = coerceStringArray(source.register_hints);
|
||
return {
|
||
fragment_id: coerceFragmentId(source.fragment_id, index, base.fragment_id),
|
||
raw_fragment_text: rawText,
|
||
normalized_fragment_text: toOptionalString(source.normalized_fragment_text) ?? base.normalized_fragment_text,
|
||
domain_relevance: domainRelevance,
|
||
business_scope: businessScope,
|
||
entity_hints: entityHints.length > 0 ? entityHints : base.entity_hints,
|
||
account_hints: accountHints.length > 0 ? accountHints : base.account_hints,
|
||
document_hints: documentHints.length > 0 ? documentHints : base.document_hints,
|
||
register_hints: registerHints.length > 0 ? registerHints : base.register_hints,
|
||
time_scope: coerceTimeScope(source.time_scope, rawText, base.time_scope),
|
||
flags,
|
||
candidate_labels: coerceCandidateLabels(source.candidate_labels, flags, domainRelevance, base.candidate_labels),
|
||
confidence: coerceConfidence(source.confidence, base.confidence)
|
||
};
|
||
}
|
||
function coerceDiscardedFragments(value) {
|
||
if (!Array.isArray(value)) {
|
||
return [];
|
||
}
|
||
const collected = [];
|
||
for (const item of value) {
|
||
if (!item || typeof item !== "object") {
|
||
continue;
|
||
}
|
||
const source = item;
|
||
const raw = toOptionalString(source.raw_fragment_text);
|
||
const reason = toOptionalString(source.reason);
|
||
if (!raw || !reason) {
|
||
continue;
|
||
}
|
||
collected.push({
|
||
raw_fragment_text: raw,
|
||
reason
|
||
});
|
||
}
|
||
return collected;
|
||
}
|
||
function coerceScopeConfidence(value, fallback) {
|
||
return coerceConfidence(value, fallback);
|
||
}
|
||
function coerceGlobalNotes(value, fallbackNeedsClarification) {
|
||
if (!value || typeof value !== "object") {
|
||
return {
|
||
needs_clarification: fallbackNeedsClarification,
|
||
clarification_reason: fallbackNeedsClarification ? "clarification_required" : null
|
||
};
|
||
}
|
||
const source = value;
|
||
const needs = coerceBoolean(source.needs_clarification, fallbackNeedsClarification);
|
||
const clarificationReason = toOptionalString(source.clarification_reason);
|
||
return {
|
||
needs_clarification: needs,
|
||
clarification_reason: needs ? clarificationReason ?? "clarification_required" : null
|
||
};
|
||
}
|
||
function coerceNormalizedCandidateV2(candidate, userMessage) {
|
||
if (!candidate || typeof candidate !== "object") {
|
||
return null;
|
||
}
|
||
const source = candidate;
|
||
const sourceFragments = Array.isArray(source.fragments)
|
||
? source.fragments
|
||
: source.fragment && typeof source.fragment === "object"
|
||
? [source.fragment]
|
||
: splitIntoCandidateFragments(userMessage).map((text) => ({ raw_fragment_text: text }));
|
||
const fragments = sourceFragments
|
||
.map((item, index) => coerceFragmentV2(item, index, userMessage))
|
||
.filter((item) => item !== null);
|
||
const inScopeCount = fragments.filter((item) => item.domain_relevance === "in_scope").length;
|
||
const unclearCount = fragments.filter((item) => item.domain_relevance === "unclear").length;
|
||
const messageInScope = inScopeCount > 0;
|
||
const inferredScopeConfidence = messageInScope ? (unclearCount > 0 ? "medium" : "high") : "low";
|
||
const inferredNeedsClarification = messageInScope && (unclearCount > 0 || fragments.some((item) => item.time_scope.type === "missing"));
|
||
return {
|
||
schema_version: "normalized_query_v2",
|
||
user_message_raw: toOptionalString(source.user_message_raw) ?? userMessage,
|
||
message_in_scope: coerceBoolean(source.message_in_scope, messageInScope),
|
||
scope_confidence: coerceScopeConfidence(source.scope_confidence, inferredScopeConfidence),
|
||
contains_multiple_tasks: coerceBoolean(source.contains_multiple_tasks, fragments.length > 1),
|
||
fragments,
|
||
discarded_fragments: coerceDiscardedFragments(source.discarded_fragments),
|
||
global_notes: coerceGlobalNotes(source.global_notes, inferredNeedsClarification)
|
||
};
|
||
}
|
||
function collectDateSpans(text) {
|
||
const spans = [];
|
||
const patterns = [
|
||
/\b20\d{2}(?:[-/.](?:0?[1-9]|1[0-2]))(?:[-/.](?:0?[1-9]|[12]\d|3[01]))?\b/g,
|
||
/\b(?:0?[1-9]|[12]\d|3[01])[./-](?:0?[1-9]|1[0-2])[./-](?:\d{2}|\d{4})\b/g,
|
||
/\b(?:0?[1-9]|[12]\d|3[01])\s+(?:январ[ьяе]|феврал[ьяе]|март[ае]?|апрел[ьяе]|ма[йея]|июн[ьяе]?|июл[ьяе]?|август[ае]?|сентябр[ьяе]?|октябр[ьяе]?|ноябр[ьяе]?|декабр[ьяе]?|january|february|march|april|may|june|july|august|september|october|november|december)(?:\s+20\d{2})?\b/giu
|
||
];
|
||
for (const pattern of patterns) {
|
||
let match = null;
|
||
while ((match = pattern.exec(text)) !== null) {
|
||
spans.push({
|
||
start: match.index,
|
||
end: match.index + match[0].length
|
||
});
|
||
}
|
||
}
|
||
return spans;
|
||
}
|
||
function collectAmountSpans(text) {
|
||
const spans = [];
|
||
const patterns = [/\b\d{1,3}(?:[ \u00A0]\d{3})+(?:[.,]\d{2})?\b/g, /\b\d+[.,]\d{2}\b/g];
|
||
for (const pattern of patterns) {
|
||
let match = null;
|
||
while ((match = pattern.exec(text)) !== null) {
|
||
spans.push({
|
||
start: match.index,
|
||
end: match.index + match[0].length
|
||
});
|
||
}
|
||
}
|
||
return spans;
|
||
}
|
||
function collectPercentSpans(text) {
|
||
const spans = [];
|
||
const pattern = /\b\d{1,3}(?:[.,]\d+)?\s*%/g;
|
||
let match = null;
|
||
while ((match = pattern.exec(text)) !== null) {
|
||
spans.push({
|
||
start: match.index,
|
||
end: match.index + match[0].length
|
||
});
|
||
}
|
||
return spans;
|
||
}
|
||
function intersectsAnySpan(start, end, spans) {
|
||
return spans.some((span) => start < span.end && end > span.start);
|
||
}
|
||
function extractAccounts(text) {
|
||
const lower = String(text ?? "").toLowerCase();
|
||
const knownPrefixes = new Set([
|
||
"01",
|
||
"02",
|
||
"07",
|
||
"08",
|
||
"10",
|
||
"13",
|
||
"19",
|
||
"20",
|
||
"21",
|
||
"23",
|
||
"25",
|
||
"26",
|
||
"28",
|
||
"29",
|
||
"41",
|
||
"43",
|
||
"44",
|
||
"45",
|
||
"50",
|
||
"51",
|
||
"52",
|
||
"55",
|
||
"57",
|
||
"58",
|
||
"60",
|
||
"62",
|
||
"66",
|
||
"67",
|
||
"68",
|
||
"69",
|
||
"70",
|
||
"71",
|
||
"73",
|
||
"76",
|
||
"90",
|
||
"91",
|
||
"94",
|
||
"96",
|
||
"97"
|
||
]);
|
||
const explicitAccounts = new Set();
|
||
const contextualPattern = /(?:\bсч(?:е|ё)т(?:а|у|ом|ов)?\b|\bсч\.?\b|\baccount(?:s)?\b|\bschet(?:a|u|om|ov)?\b)\s*(?:№|#|:)?\s*(\d{2}(?:\.\d{2})?)/giu;
|
||
let contextual = null;
|
||
while ((contextual = contextualPattern.exec(lower)) !== null) {
|
||
if (contextual[1]) {
|
||
const token = String(contextual[1]).trim();
|
||
const prefix = token.match(/^(\d{2})/)?.[1] ?? null;
|
||
if (prefix && knownPrefixes.has(prefix)) {
|
||
explicitAccounts.add(token);
|
||
}
|
||
}
|
||
}
|
||
if (explicitAccounts.size > 0) {
|
||
return Array.from(explicitAccounts);
|
||
}
|
||
const spans = [...collectDateSpans(lower), ...collectAmountSpans(lower), ...collectPercentSpans(lower)];
|
||
const hasAccountingLexeme = /(?:\bсчет(?:а|у|ом|ов)?\b|\bсч\.?\b|\baccount(?:s)?\b|\bschet(?:a|u|om|ov)?\b|оплат|расчет|расч[её]т|аванс|долг|settlement|payment|supplier|customer|ндс|vat|амортиз|рбп|deferred)/iu.test(lower);
|
||
if (!hasAccountingLexeme) {
|
||
return [];
|
||
}
|
||
const extracted = [];
|
||
const genericPattern = /\b\d{2}(?:\.\d{2})?\b/g;
|
||
let generic = null;
|
||
while ((generic = genericPattern.exec(lower)) !== null) {
|
||
const value = generic[0];
|
||
const start = generic.index;
|
||
const end = start + value.length;
|
||
if (intersectsAnySpan(start, end, spans)) {
|
||
continue;
|
||
}
|
||
const prefix = value.match(/^(\d{2})/)?.[1] ?? null;
|
||
if (!prefix || !knownPrefixes.has(prefix)) {
|
||
continue;
|
||
}
|
||
extracted.push(value);
|
||
}
|
||
return Array.from(new Set(extracted));
|
||
}
|
||
function detectRouteByHeuristicsV1(question) {
|
||
const q = question.toLowerCase();
|
||
const hasExactTrace = /(документ\s*(№|#)|\bref\b|\bid\b|строк[аи].*проводк|конкретн(ый|ого|ая).*документ|точн(ый|ого).*источник|trx-\d+|inv-\d+)/i.test(q);
|
||
const hasCrossChain = /(разлож|цепоч|чем подтверж|связк|документ.*оплат|закрывающ|взаиморасчет|хвост.*(документ|оплат|проводк))/i.test(q);
|
||
const hasPeriodCloseRisk = /(предзакры|закрыти[ея].*период|перед сдачей отчетност|последн(ий|его).*(день|дня)|срыв.*закрыт|может взорвать)/i.test(q);
|
||
const hasHeavyOverview = /(рейтинг|топ|в целом|обзор|приоритиз|company|самых|концентрац|срез)/i.test(q);
|
||
const hasRiskProbe = /(аномал|подозр|зоны риска|ручной ошиб|подозрительн|риск|хвост)/i.test(q);
|
||
const hasRuleControl = /(контрол|правил|ошибк.*дат|срок.*амортиз|настройк|\b97\b|\bос\b|68\.02|ндс)/i.test(q);
|
||
if (hasExactTrace) {
|
||
return "live_mcp_drilldown";
|
||
}
|
||
if (hasCrossChain) {
|
||
return "hybrid_store_plus_live";
|
||
}
|
||
if (hasPeriodCloseRisk || hasHeavyOverview) {
|
||
return "batch_refresh_then_store";
|
||
}
|
||
if (hasRiskProbe || hasRuleControl) {
|
||
return "store_feature_risk";
|
||
}
|
||
return "store_canonical";
|
||
}
|
||
function buildMockNormalizedV1(userQuestion, expectedRoute) {
|
||
const q = userQuestion.toLowerCase();
|
||
const routeHint = expectedRoute ?? detectRouteByHeuristicsV1(userQuestion);
|
||
const hasPeriod = /(январ|феврал|март|апрел|май|июн|июл|август|сентябр|октябр|ноябр|декабр|квартал|период|конец месяца|20\d{2})/i.test(userQuestion);
|
||
const hasHeavyGoal = /(рейтинг|топ|обзор|приоритиз|срез|в целом|концентрац|самых)/i.test(q);
|
||
const hasCloseRisk = /(предзакры|закрыти[ея].*период|срыв.*закрыт|последн.*день)/i.test(q);
|
||
const hasRule = /(правил|контрол|ошибк.*дат|амортиз|настройк|\b97\b|ндс|\b01\b|\b02\b)/i.test(q);
|
||
const hasAnomaly = /(аномал|подозр|риск|хвост|не сход|завис|крив)/i.test(q);
|
||
const hasExactTrace = routeHint === "live_mcp_drilldown";
|
||
let intentClass = "simple_factual";
|
||
if (routeHint === "live_mcp_drilldown") {
|
||
intentClass = "drilldown_explain";
|
||
}
|
||
else if (routeHint === "hybrid_store_plus_live") {
|
||
intentClass = "cross_entity";
|
||
}
|
||
else if (routeHint === "batch_refresh_then_store") {
|
||
intentClass = hasCloseRisk && !hasHeavyGoal ? "period_close_risk" : "heavy_analytical";
|
||
}
|
||
else if (routeHint === "store_feature_risk") {
|
||
intentClass = hasRule ? "rule_based_account_control" : hasAnomaly ? "anomaly_probe" : "ambiguous_human_query";
|
||
}
|
||
const expectedOutputShape = intentClass === "period_close_risk"
|
||
? "prioritized_review_list"
|
||
: routeHint === "batch_refresh_then_store"
|
||
? "ranked_list"
|
||
: routeHint === "hybrid_store_plus_live"
|
||
? "reconciliation_report"
|
||
: routeHint === "live_mcp_drilldown"
|
||
? "evidence_chain"
|
||
: hasAnomaly
|
||
? "anomaly_summary"
|
||
: "point_answer";
|
||
return {
|
||
schema_version: "normalized_query_v1",
|
||
user_question_raw: userQuestion,
|
||
normalized_question: userQuestion.trim(),
|
||
intent_class: intentClass,
|
||
business_problem_type: "normalization_playground",
|
||
domain_entities: routeHint === "hybrid_store_plus_live" ? ["контрагент", "документ", "проводка"] : ["счет"],
|
||
accounts_mentioned: extractAccounts(userQuestion),
|
||
documents_mentioned: /документ|реализац|поступлен|выписк|платеж/i.test(userQuestion) ? ["документ"] : [],
|
||
registers_mentioned: /регистр|движен/i.test(userQuestion) ? ["регистр"] : [],
|
||
period_scope: {
|
||
type: hasPeriod ? "inferred" : "missing",
|
||
value: hasPeriod ? "2020-06" : null,
|
||
confidence: hasPeriod ? "medium" : "low"
|
||
},
|
||
requires: {
|
||
needs_cross_entity_join: routeHint === "hybrid_store_plus_live",
|
||
needs_causal_chain: routeHint === "hybrid_store_plus_live" || /почему|чем подтверж|где рвется/i.test(userQuestion),
|
||
needs_exact_object_trace: hasExactTrace,
|
||
needs_ranking: routeHint === "batch_refresh_then_store" && intentClass !== "period_close_risk",
|
||
needs_anomaly_summary: hasAnomaly && routeHint !== "hybrid_store_plus_live",
|
||
needs_runtime_truth: hasExactTrace,
|
||
needs_period_cut: hasPeriod,
|
||
needs_evidence: routeHint === "hybrid_store_plus_live" || hasExactTrace
|
||
},
|
||
expected_output_shape: expectedOutputShape,
|
||
route_hint: routeHint,
|
||
ambiguities: hasPeriod
|
||
? []
|
||
: [
|
||
{
|
||
field: "period_scope",
|
||
reason: "period is not explicitly provided",
|
||
severity: "medium"
|
||
}
|
||
],
|
||
confidence: {
|
||
overall: hasPeriod ? "medium" : "low",
|
||
intent_class: "medium",
|
||
route_hint: hasPeriod ? "medium" : "low"
|
||
}
|
||
};
|
||
}
|
||
function applyConfidenceGuardV1(item) {
|
||
const wordCount = item.user_question_raw.trim().split(/\s+/).filter(Boolean).length;
|
||
const hasAmbiguity = item.ambiguities.length > 0;
|
||
const longLayeredQuestion = wordCount >= 20;
|
||
const uncertainPeriod = item.period_scope.type !== "explicit";
|
||
const hasPeriodBoundaryLex = /(предзакры|закрыти[ея].*период|перед сдачей отчетност|перед закрытием)/i.test(item.user_question_raw) &&
|
||
/(рейтинг|топ|обзор|summary|срез|концентрац|в целом|приоритиз|самых)/i.test(item.user_question_raw);
|
||
const suspicious = hasAmbiguity || longLayeredQuestion || uncertainPeriod || hasPeriodBoundaryLex;
|
||
if (!suspicious) {
|
||
return item;
|
||
}
|
||
return {
|
||
...item,
|
||
confidence: {
|
||
...item.confidence,
|
||
overall: item.confidence.overall === "high" ? "medium" : item.confidence.overall,
|
||
route_hint: item.confidence.route_hint === "high" ? "medium" : item.confidence.route_hint
|
||
}
|
||
};
|
||
}
|
||
function splitIntoCandidateFragments(message) {
|
||
const primary = message
|
||
.split(/[\n;]+|(?<=[.!?])\s+/)
|
||
.map((item) => item.replace(/^\s*[-*•]\s*/, "").trim())
|
||
.filter(Boolean);
|
||
if (primary.length > 0) {
|
||
return primary;
|
||
}
|
||
const fallback = message.trim();
|
||
return fallback ? [fallback] : [];
|
||
}
|
||
function inferTimeScope(text) {
|
||
const explicit = text.match(/\b(20\d{2}(?:[-/.](?:0[1-9]|1[0-2]))?)\b/);
|
||
if (explicit) {
|
||
return {
|
||
type: "explicit",
|
||
value: explicit[1],
|
||
confidence: "high"
|
||
};
|
||
}
|
||
const inferred = text.match(/(январ[ья]|феврал[ья]|март[ае]?|апрел[ья]|ма[йя]|июн[ьяе]?|июл[ьяе]?|август[ае]?|сентябр[ьяе]?|октябр[ьяе]?|ноябр[ьяе]?|декабр[ьяе]?|квартал|конец месяца|период)/i);
|
||
if (inferred) {
|
||
return {
|
||
type: "inferred",
|
||
value: inferred[1],
|
||
confidence: "medium"
|
||
};
|
||
}
|
||
return {
|
||
type: "missing",
|
||
value: null,
|
||
confidence: "low"
|
||
};
|
||
}
|
||
function pickCandidateLabels(flags, domainRelevance) {
|
||
if (domainRelevance !== "in_scope") {
|
||
return [];
|
||
}
|
||
const labels = [];
|
||
if (flags.asks_for_exact_object_trace)
|
||
labels.push("drilldown_explain");
|
||
if (flags.has_multi_entity_scope && flags.asks_for_chain_explanation)
|
||
labels.push("cross_entity");
|
||
if (flags.asks_for_rule_check)
|
||
labels.push("rule_based_account_control");
|
||
if (flags.asks_for_anomaly_scan)
|
||
labels.push("anomaly_probe");
|
||
if (flags.asks_for_ranking_or_top || flags.asks_for_period_summary)
|
||
labels.push("heavy_analytical");
|
||
if (flags.mentions_period_close_context && !flags.asks_for_ranking_or_top)
|
||
labels.push("period_close_risk");
|
||
if (labels.length === 0)
|
||
labels.push("simple_factual");
|
||
return Array.from(new Set(labels));
|
||
}
|
||
function buildFragmentV2(rawText, index) {
|
||
const text = rawText.trim();
|
||
if (text.length < 3) {
|
||
return null;
|
||
}
|
||
const lower = text.toLowerCase();
|
||
const noiseOnly = /^(ну|короче|типа|ладно|ага|ок(ей)?)$/i.test(lower);
|
||
if (noiseOnly) {
|
||
return null;
|
||
}
|
||
const inScopeTokens = /(проводк|документ|реализац|поступлен|взаиморасчет|сальдо|остатк|сч(?:е|ё)т|ндс|амортиз|расходы будущих периодов|рбп|ос|контрагент|оплат|банк|выписк|склад|товар|материал|списани|жизненн|цикл|переход|lifecycle|writeoff|deferred)/i.test(lower);
|
||
const translitInScopeTokens = /\b(?:schet|scheta|schetu|schetom|postavsh|kontragent|dokument|doc|oplata|oplati|platezh|vypisk|provodk|realiz|postuplen|nds|os|saldo|hvost|tail|anomali|risk|zakryt|lifecycle|state|transition|writeoff|deferred|periodclose)\b/i.test(lower);
|
||
const lifecycleInScopeTokens = /(lifecycle|жизненн(?:ого|ый)?\s+цикл|стади|переход|списани|writeoff|deferred|period\s*close)/i.test(lower);
|
||
const genericAccountingTokens = /(фсбу|налогов(ый|ого)|нк рф|закон|форма отчетности|как правильно в бухгалтерии)/i.test(lower);
|
||
const offTopicTokens = /(погода|анекдот|музык|фильм|игр[аы]|рецепт|курс валют в мире)/i.test(lower);
|
||
let domainRelevance = "unclear";
|
||
let businessScope = "unclear";
|
||
if (offTopicTokens) {
|
||
domainRelevance = "out_of_scope";
|
||
businessScope = "offtopic";
|
||
}
|
||
else if (genericAccountingTokens && !inScopeTokens && !translitInScopeTokens) {
|
||
domainRelevance = "out_of_scope";
|
||
businessScope = "generic_accounting";
|
||
}
|
||
else if (inScopeTokens || translitInScopeTokens || lifecycleInScopeTokens) {
|
||
domainRelevance = "in_scope";
|
||
businessScope = "company_specific_accounting";
|
||
}
|
||
const entityTokenCount = (lower.match(/(документ|оплат|проводк|контрагент|договор|реализац|поступлен|выписк|закрыт|взаиморасчет|склад|товар|материал|поставщ|покупат|списани|жизненн|цикл)/g) ?? [])
|
||
.length;
|
||
const translitEntityTokenCount = (lower.match(/\b(?:dokument|oplata|platezh|provodk|kontragent|postavsh|pokupat|realiz|postuplen|vypisk|zakryt|schet|sklad|tovar|material)\b/g) ?? []).length;
|
||
const entityTokenCountTotal = entityTokenCount + translitEntityTokenCount;
|
||
const flags = {
|
||
has_multi_entity_scope: entityTokenCountTotal >= 2,
|
||
asks_for_chain_explanation: /(цепоч|разлож|почему|чем подтверж|где рвет|связк|логик.*операц)/i.test(lower),
|
||
asks_for_ranking_or_top: /(топ|рейтинг|сам(ые|ых)|максимальн|сильнее всего|приоритиз)/i.test(lower),
|
||
asks_for_period_summary: /(срез|обзор|в целом|картина периода|summary|по периоду)/i.test(lower),
|
||
asks_for_rule_check: /(правил|контрол|корректн|ошибк.*дат|срок списан|амортиз|настройк|проверь)/i.test(lower),
|
||
asks_for_anomaly_scan: /(аномал|подозр|риск|хвост|не сход|завис|крив|искажа)/i.test(lower),
|
||
asks_for_exact_object_trace: /(документ\s*(№|#)|\bref\b|\bid\b|строк[аи]\s+проводк|операц.*№|trx-\d+|inv-\d+|doc-\d+)/i.test(lower),
|
||
asks_for_evidence: /(чем подтверж|документ|проводк|движен|акт сверк|доказат|evidence)/i.test(lower),
|
||
mentions_period_close_context: /(закрыти[ея]\s+период|предзакры|конец месяца|сдач[аи]\s+отчетност)/i.test(lower)
|
||
};
|
||
const translitHints = {
|
||
chain: /\b(?:razlozh|pochemu|chem podtver|gde rv|svyaz|razryv|chain)\b/i.test(lower),
|
||
rule: /\b(?:prover|check|rule|control|korrekt)\b/i.test(lower),
|
||
anomaly: /\b(?:anomal|risk|hvost|tail|mismatch)\b/i.test(lower),
|
||
evidence: /\b(?:dokument|provodk|evidence|doc)\b/i.test(lower)
|
||
};
|
||
if (translitHints.chain)
|
||
flags.asks_for_chain_explanation = true;
|
||
if (translitHints.rule)
|
||
flags.asks_for_rule_check = true;
|
||
if (translitHints.anomaly)
|
||
flags.asks_for_anomaly_scan = true;
|
||
if (translitHints.evidence)
|
||
flags.asks_for_evidence = true;
|
||
const candidateLabels = pickCandidateLabels(flags, domainRelevance);
|
||
let confidence = "medium";
|
||
if (domainRelevance === "out_of_scope" || domainRelevance === "unclear") {
|
||
confidence = "low";
|
||
}
|
||
else if (flags.asks_for_exact_object_trace || flags.asks_for_ranking_or_top) {
|
||
confidence = "high";
|
||
}
|
||
return {
|
||
fragment_id: `F${index + 1}`,
|
||
raw_fragment_text: text,
|
||
normalized_fragment_text: text.charAt(0).toUpperCase() + text.slice(1),
|
||
domain_relevance: domainRelevance,
|
||
business_scope: businessScope,
|
||
entity_hints: Array.from(new Set(Array.from(lower.matchAll(/(поставщик|покупател|контрагент|договор|банк|склад|товар|материал|ос|взаиморасчет|реализац|поступлен)/g)).map((item) => item[0]))),
|
||
account_hints: extractAccounts(text),
|
||
document_hints: Array.from(new Set(Array.from(lower.matchAll(/(документ|реализац|поступлен|платеж|выписк|акт сверк)/g)).map((item) => item[0]))),
|
||
register_hints: Array.from(new Set(Array.from(lower.matchAll(/(регистр|движен|остатк|сальдо)/g)).map((item) => item[0]))),
|
||
time_scope: inferTimeScope(text),
|
||
flags,
|
||
candidate_labels: candidateLabels,
|
||
confidence
|
||
};
|
||
}
|
||
function buildMockNormalizedV2(userMessage) {
|
||
const rawFragments = splitIntoCandidateFragments(userMessage);
|
||
const fragments = [];
|
||
const discarded = [];
|
||
rawFragments.forEach((raw, index) => {
|
||
const built = buildFragmentV2(raw, index);
|
||
if (!built) {
|
||
discarded.push({
|
||
raw_fragment_text: raw,
|
||
reason: "noise_or_too_short"
|
||
});
|
||
return;
|
||
}
|
||
fragments.push(built);
|
||
});
|
||
const inScopeCount = fragments.filter((item) => item.domain_relevance === "in_scope").length;
|
||
const unclearCount = fragments.filter((item) => item.domain_relevance === "unclear").length;
|
||
const messageInScope = inScopeCount > 0;
|
||
const scopeConfidence = messageInScope ? (unclearCount > 0 ? "medium" : "high") : "low";
|
||
const needsClarification = messageInScope && (unclearCount > 0 || fragments.some((item) => item.time_scope.type === "missing"));
|
||
return {
|
||
schema_version: "normalized_query_v2",
|
||
user_message_raw: userMessage,
|
||
message_in_scope: messageInScope,
|
||
scope_confidence: scopeConfidence,
|
||
contains_multiple_tasks: fragments.length > 1,
|
||
fragments,
|
||
discarded_fragments: discarded,
|
||
global_notes: {
|
||
needs_clarification: needsClarification,
|
||
clarification_reason: needsClarification ? "Недостаточно периода/контекста по части фрагментов." : null
|
||
}
|
||
};
|
||
}
|
||
function hasSessionPeriodContext(context) {
|
||
if (!context) {
|
||
return false;
|
||
}
|
||
const periodHint = String(context.period_hint ?? "").trim();
|
||
const businessContext = String(context.business_context ?? "").toLowerCase();
|
||
if (periodHint.length > 0) {
|
||
return true;
|
||
}
|
||
return (businessContext.includes("current_analysis_period") ||
|
||
businessContext.includes("active_period") ||
|
||
businessContext.includes("рабочий месяц") ||
|
||
businessContext.includes("активный период"));
|
||
}
|
||
function hasBusinessNodeSignals(fragment) {
|
||
if (fragment.domain_relevance !== "in_scope") {
|
||
return false;
|
||
}
|
||
return (fragment.entity_hints.length > 0 ||
|
||
fragment.account_hints.length > 0 ||
|
||
fragment.document_hints.length > 0 ||
|
||
fragment.register_hints.length > 0 ||
|
||
fragment.candidate_labels.length > 0 ||
|
||
Object.values(fragment.flags).some((value) => value));
|
||
}
|
||
function routeCanBeSelected(fragment) {
|
||
if (fragment.domain_relevance !== "in_scope") {
|
||
return false;
|
||
}
|
||
if (fragment.business_scope === "unclear") {
|
||
return false;
|
||
}
|
||
return hasBusinessNodeSignals(fragment);
|
||
}
|
||
function hasJuly2020SnapshotSignal(userMessage, sessionContext) {
|
||
const text = String(userMessage ?? "").toLowerCase();
|
||
const contextPeriod = String(sessionContext?.period_hint ?? "").toLowerCase();
|
||
const businessContext = String(sessionContext?.business_context ?? "").toLowerCase();
|
||
if (/(?:\b2020[-/.]0?7\b|\bиюл[ьяе]?\b(?:\s+20\d{2})?|\bjuly\b(?:\s+20\d{2})?)/i.test(text)) {
|
||
return true;
|
||
}
|
||
return /2020[-/.]0?7|июл|july/.test(`${contextPeriod} ${businessContext}`);
|
||
}
|
||
function hasP0SignalForCompanyScope(userMessage) {
|
||
const lower = String(userMessage ?? "").toLowerCase();
|
||
return /(?:\b(?:01|02|08|19|20|21|23|25|26|28|29|44|51|60|62|68|76|97)(?:\.\d{1,2})?\b|ндс|vat|supplier|customer|settlement|month\s*close|рбп|deferred|закрыти[ея]\s+месяц|амортиз|поставщ|покупат)/i.test(lower);
|
||
}
|
||
function applyCompanyScopeResolutionV2(candidate, userMessage, sessionContext) {
|
||
if (!candidate || typeof candidate !== "object") {
|
||
return candidate;
|
||
}
|
||
const source = candidate;
|
||
if (!Array.isArray(source.fragments)) {
|
||
return candidate;
|
||
}
|
||
const forceCompanyScope = hasJuly2020SnapshotSignal(userMessage, sessionContext) && hasP0SignalForCompanyScope(userMessage);
|
||
if (!forceCompanyScope) {
|
||
return candidate;
|
||
}
|
||
let changed = false;
|
||
const fragments = source.fragments.map((fragment) => {
|
||
if (!fragment || typeof fragment !== "object") {
|
||
return fragment;
|
||
}
|
||
const value = fragment;
|
||
if (value.domain_relevance !== "in_scope") {
|
||
return fragment;
|
||
}
|
||
const scopeValue = String(value.business_scope ?? "").trim();
|
||
if (scopeValue !== "generic_accounting" && scopeValue !== "unclear") {
|
||
return fragment;
|
||
}
|
||
changed = true;
|
||
return {
|
||
...value,
|
||
business_scope: "company_specific_accounting"
|
||
};
|
||
});
|
||
if (!changed) {
|
||
return candidate;
|
||
}
|
||
return {
|
||
...source,
|
||
fragments
|
||
};
|
||
}
|
||
function dedupeSoftAssumptions(input) {
|
||
return Array.from(new Set(input));
|
||
}
|
||
function decideFragmentExecutionPolicy(fragment, sessionContext) {
|
||
const softAssumptions = [];
|
||
const hasPeriodContext = hasSessionPeriodContext(sessionContext);
|
||
const periodIsCritical = fragment.flags.asks_for_period_summary || fragment.flags.mentions_period_close_context || fragment.flags.asks_for_ranking_or_top;
|
||
if (fragment.domain_relevance === "out_of_scope") {
|
||
return {
|
||
execution_readiness: "needs_clarification",
|
||
clarification_reason: "fragment_out_of_scope",
|
||
soft_assumption_used: []
|
||
};
|
||
}
|
||
if (fragment.domain_relevance === "unclear") {
|
||
return {
|
||
execution_readiness: "needs_clarification",
|
||
clarification_reason: "domain_or_scope_unclear",
|
||
soft_assumption_used: []
|
||
};
|
||
}
|
||
if (!hasBusinessNodeSignals(fragment)) {
|
||
return {
|
||
execution_readiness: "needs_clarification",
|
||
clarification_reason: "business_area_not_identified",
|
||
soft_assumption_used: []
|
||
};
|
||
}
|
||
if (!routeCanBeSelected(fragment)) {
|
||
return {
|
||
execution_readiness: "needs_clarification",
|
||
clarification_reason: "route_cannot_be_selected_reliably",
|
||
soft_assumption_used: []
|
||
};
|
||
}
|
||
if (fragment.time_scope.type === "missing") {
|
||
if (hasPeriodContext) {
|
||
softAssumptions.push("period_from_session_context");
|
||
}
|
||
else if (periodIsCritical) {
|
||
return {
|
||
execution_readiness: "needs_clarification",
|
||
clarification_reason: "critical_period_missing",
|
||
soft_assumption_used: []
|
||
};
|
||
}
|
||
}
|
||
if (fragment.flags.asks_for_anomaly_scan ||
|
||
fragment.flags.asks_for_rule_check ||
|
||
fragment.flags.asks_for_ranking_or_top ||
|
||
fragment.flags.asks_for_period_summary) {
|
||
softAssumptions.push("problem_scan_mode_enabled");
|
||
}
|
||
if (fragment.business_scope === "company_specific_accounting" && fragment.entity_hints.length === 0 && fragment.account_hints.length === 0) {
|
||
softAssumptions.push("company_scope_defaulted");
|
||
}
|
||
const assumptions = dedupeSoftAssumptions(softAssumptions);
|
||
if (assumptions.length > 0) {
|
||
return {
|
||
execution_readiness: "executable_with_soft_assumptions",
|
||
clarification_reason: null,
|
||
soft_assumption_used: assumptions
|
||
};
|
||
}
|
||
return {
|
||
execution_readiness: "executable",
|
||
clarification_reason: null,
|
||
soft_assumption_used: []
|
||
};
|
||
}
|
||
function toV201Fragment(fragment, sessionContext) {
|
||
const policy = decideFragmentExecutionPolicy(fragment, sessionContext);
|
||
return {
|
||
...fragment,
|
||
execution_readiness: policy.execution_readiness,
|
||
clarification_reason: policy.clarification_reason,
|
||
soft_assumption_used: policy.soft_assumption_used
|
||
};
|
||
}
|
||
function applyClarificationPolicyV201(candidate, userMessage, sessionContext) {
|
||
if (!candidate || typeof candidate !== "object") {
|
||
return null;
|
||
}
|
||
const source = candidate;
|
||
if (!Array.isArray(source.fragments)) {
|
||
return null;
|
||
}
|
||
const baseFragments = source.fragments
|
||
.map((item) => item)
|
||
.filter((item) => item && typeof item === "object" && typeof item.fragment_id === "string");
|
||
const fragments = baseFragments.map((fragment) => toV201Fragment(fragment, sessionContext));
|
||
const inScopeFragments = fragments.filter((fragment) => fragment.domain_relevance === "in_scope");
|
||
const blockingFragments = inScopeFragments.filter((fragment) => fragment.execution_readiness === "needs_clarification");
|
||
const needsClarification = inScopeFragments.length > 0 && blockingFragments.length === inScopeFragments.length;
|
||
return {
|
||
schema_version: "normalized_query_v2_0_1",
|
||
user_message_raw: String(source.user_message_raw ?? userMessage),
|
||
message_in_scope: inScopeFragments.length > 0,
|
||
scope_confidence: source.scope_confidence ?? (inScopeFragments.length > 0 ? "medium" : "low"),
|
||
contains_multiple_tasks: typeof source.contains_multiple_tasks === "boolean" ? source.contains_multiple_tasks : fragments.length > 1,
|
||
fragments,
|
||
discarded_fragments: Array.isArray(source.discarded_fragments)
|
||
? source.discarded_fragments
|
||
: [],
|
||
global_notes: {
|
||
needs_clarification: needsClarification,
|
||
clarification_reason: needsClarification ? blockingFragments[0]?.clarification_reason ?? "clarification_required" : null
|
||
}
|
||
};
|
||
}
|
||
function resolveFragmentExecutionStateV202(fragment, sessionContext) {
|
||
const v201 = decideFragmentExecutionPolicy(fragment, sessionContext);
|
||
if (fragment.domain_relevance === "out_of_scope") {
|
||
return {
|
||
execution_readiness: "no_route",
|
||
clarification_reason: "fragment_out_of_scope",
|
||
soft_assumption_used: [],
|
||
route_status: "no_route",
|
||
no_route_reason: "out_of_scope"
|
||
};
|
||
}
|
||
if (v201.execution_readiness === "needs_clarification") {
|
||
return {
|
||
execution_readiness: "needs_clarification",
|
||
clarification_reason: v201.clarification_reason ?? "insufficient_specificity",
|
||
soft_assumption_used: [],
|
||
route_status: "no_route",
|
||
no_route_reason: "insufficient_specificity"
|
||
};
|
||
}
|
||
if (!routeCanBeSelected(fragment)) {
|
||
return {
|
||
execution_readiness: "no_route",
|
||
clarification_reason: "route_mapping_missing",
|
||
soft_assumption_used: [],
|
||
route_status: "no_route",
|
||
no_route_reason: "missing_mapping"
|
||
};
|
||
}
|
||
// Deterministic no-route guard:
|
||
// routable in-scope fragments cannot remain unresolved.
|
||
return {
|
||
execution_readiness: v201.execution_readiness,
|
||
clarification_reason: null,
|
||
soft_assumption_used: v201.soft_assumption_used,
|
||
route_status: "routed",
|
||
no_route_reason: null
|
||
};
|
||
}
|
||
function toV202Fragment(fragment, sessionContext) {
|
||
const policy = resolveFragmentExecutionStateV202(fragment, sessionContext);
|
||
return {
|
||
...fragment,
|
||
execution_readiness: policy.execution_readiness,
|
||
clarification_reason: policy.clarification_reason,
|
||
soft_assumption_used: policy.soft_assumption_used,
|
||
route_status: policy.route_status,
|
||
no_route_reason: policy.no_route_reason
|
||
};
|
||
}
|
||
function applyExecutionStatePolicyV202(candidate, userMessage, sessionContext) {
|
||
if (!candidate || typeof candidate !== "object") {
|
||
return null;
|
||
}
|
||
const source = candidate;
|
||
if (!Array.isArray(source.fragments)) {
|
||
return null;
|
||
}
|
||
const baseFragments = source.fragments
|
||
.map((item) => item)
|
||
.filter((item) => item && typeof item === "object" && typeof item.fragment_id === "string");
|
||
const fragments = baseFragments.map((fragment) => toV202Fragment(fragment, sessionContext));
|
||
const inScopeFragments = fragments.filter((fragment) => fragment.domain_relevance === "in_scope");
|
||
const clarificationBlocks = inScopeFragments.filter((fragment) => fragment.execution_readiness === "needs_clarification");
|
||
const needsClarification = inScopeFragments.length > 0 && clarificationBlocks.length === inScopeFragments.length;
|
||
return {
|
||
schema_version: "normalized_query_v2_0_2",
|
||
user_message_raw: String(source.user_message_raw ?? userMessage),
|
||
message_in_scope: inScopeFragments.length > 0,
|
||
scope_confidence: source.scope_confidence ?? (inScopeFragments.length > 0 ? "medium" : "low"),
|
||
contains_multiple_tasks: typeof source.contains_multiple_tasks === "boolean" ? source.contains_multiple_tasks : fragments.length > 1,
|
||
fragments,
|
||
discarded_fragments: Array.isArray(source.discarded_fragments)
|
||
? source.discarded_fragments
|
||
: [],
|
||
global_notes: {
|
||
needs_clarification: needsClarification,
|
||
clarification_reason: needsClarification ? clarificationBlocks[0]?.clarification_reason ?? "clarification_required" : null
|
||
}
|
||
};
|
||
}
|
||
function buildMockNormalizedV2_0_1(userMessage, sessionContext) {
|
||
const v2 = buildMockNormalizedV2(userMessage);
|
||
const adjusted = applyClarificationPolicyV201(v2, userMessage, sessionContext);
|
||
if (adjusted) {
|
||
return adjusted;
|
||
}
|
||
return {
|
||
schema_version: "normalized_query_v2_0_1",
|
||
user_message_raw: userMessage,
|
||
message_in_scope: v2.message_in_scope,
|
||
scope_confidence: v2.scope_confidence,
|
||
contains_multiple_tasks: v2.contains_multiple_tasks,
|
||
fragments: v2.fragments.map((fragment) => ({
|
||
...fragment,
|
||
execution_readiness: "needs_clarification",
|
||
clarification_reason: "policy_fallback",
|
||
soft_assumption_used: []
|
||
})),
|
||
discarded_fragments: v2.discarded_fragments,
|
||
global_notes: {
|
||
needs_clarification: true,
|
||
clarification_reason: "policy_fallback"
|
||
}
|
||
};
|
||
}
|
||
function buildMockNormalizedV2_0_2(userMessage, sessionContext) {
|
||
const v2 = buildMockNormalizedV2(userMessage);
|
||
const adjusted = applyExecutionStatePolicyV202(v2, userMessage, sessionContext);
|
||
if (adjusted) {
|
||
return adjusted;
|
||
}
|
||
return {
|
||
schema_version: "normalized_query_v2_0_2",
|
||
user_message_raw: userMessage,
|
||
message_in_scope: v2.message_in_scope,
|
||
scope_confidence: v2.scope_confidence,
|
||
contains_multiple_tasks: v2.contains_multiple_tasks,
|
||
fragments: v2.fragments.map((fragment) => ({
|
||
...fragment,
|
||
execution_readiness: "needs_clarification",
|
||
clarification_reason: "policy_fallback",
|
||
soft_assumption_used: [],
|
||
route_status: "no_route",
|
||
no_route_reason: "unsupported_fragment_type"
|
||
})),
|
||
discarded_fragments: v2.discarded_fragments,
|
||
global_notes: {
|
||
needs_clarification: true,
|
||
clarification_reason: "policy_fallback"
|
||
}
|
||
};
|
||
}
|
||
function routeHintForHistory(normalized, routeSummary) {
|
||
if (!normalized || !routeSummary) {
|
||
return null;
|
||
}
|
||
if (normalized.schema_version === "normalized_query_v1") {
|
||
return normalized.route_hint;
|
||
}
|
||
const decision = routeSummary.mode === "deterministic_v2" ? routeSummary.decisions.find((item) => item.route !== "no_route") : null;
|
||
return decision?.route ?? null;
|
||
}
|
||
function confidenceForHistory(normalized, routeSummary) {
|
||
if (!normalized || !routeSummary) {
|
||
return null;
|
||
}
|
||
if (normalized.schema_version === "normalized_query_v1") {
|
||
return normalized.confidence.route_hint;
|
||
}
|
||
return normalized.scope_confidence;
|
||
}
|
||
function collectTraceCompletenessIssues(input) {
|
||
const issues = [];
|
||
if (!input.rawModelResponse) {
|
||
issues.push("missing_raw_model_output");
|
||
}
|
||
if (!input.normalized) {
|
||
issues.push("missing_parsed_normalized_json");
|
||
return issues;
|
||
}
|
||
if (input.normalized.schema_version === "normalized_query_v1") {
|
||
return issues;
|
||
}
|
||
if (!Array.isArray(input.normalized.fragments)) {
|
||
issues.push("missing_parsed_fragments");
|
||
return issues;
|
||
}
|
||
for (const fragment of input.normalized.fragments) {
|
||
const needsResolvedExecutionState = input.normalized.schema_version === "normalized_query_v2_0_1" || input.normalized.schema_version === "normalized_query_v2_0_2";
|
||
if (needsResolvedExecutionState && !("execution_readiness" in fragment)) {
|
||
issues.push(`fragment_${fragment.fragment_id}_missing_execution_readiness`);
|
||
}
|
||
if (input.normalized.schema_version === "normalized_query_v2_0_2") {
|
||
if (!("route_status" in fragment)) {
|
||
issues.push(`fragment_${fragment.fragment_id}_missing_route_status`);
|
||
}
|
||
if (!("no_route_reason" in fragment)) {
|
||
issues.push(`fragment_${fragment.fragment_id}_missing_no_route_reason`);
|
||
}
|
||
}
|
||
}
|
||
if (!input.routeHintSummary || input.routeHintSummary.mode !== "deterministic_v2") {
|
||
issues.push("missing_route_hint_summary_v2");
|
||
return issues;
|
||
}
|
||
const decisionCount = Array.isArray(input.routeHintSummary.decisions) ? input.routeHintSummary.decisions.length : 0;
|
||
if (decisionCount !== input.normalized.fragments.length) {
|
||
issues.push("route_decision_count_mismatch");
|
||
}
|
||
return issues;
|
||
}
|
||
class NormalizerService {
|
||
openaiClient;
|
||
constructor(openaiClient) {
|
||
this.openaiClient = openaiClient;
|
||
}
|
||
async normalize(payload) {
|
||
const traceId = (0, nanoid_1.nanoid)(14);
|
||
const startedAt = Date.now();
|
||
const llmProvider = payload.llmProvider === "local" ? "local" : "openai";
|
||
const model = payload.model ?? config_1.DEFAULT_MODEL;
|
||
const baseUrl = payload.baseUrl ?? config_1.DEFAULT_OPENAI_BASE_URL;
|
||
const temperature = payload.temperature ?? config_1.DEFAULT_TEMPERATURE;
|
||
const maxOutputTokens = payload.maxOutputTokens ?? config_1.DEFAULT_MAX_OUTPUT_TOKENS;
|
||
const retryPolicy = payload.retryPolicy ?? "default";
|
||
const schemaVersion = resolveSchemaVersion(payload);
|
||
const promptBundle = (0, promptBuilder_1.buildPromptBundle)({
|
||
promptVersion: payload.promptVersion,
|
||
systemPrompt: payload.systemPrompt,
|
||
developerPrompt: payload.developerPrompt,
|
||
domainPrompt: payload.domainPrompt,
|
||
schemaNotes: undefined,
|
||
fewShotExamples: payload.fewShotExamples
|
||
});
|
||
let rawModelResponse = null;
|
||
let outputText = "";
|
||
let usage = { input_tokens: 0, output_tokens: 0, total_tokens: 0 };
|
||
let requestCountForCase = 0;
|
||
if (payload.useMock) {
|
||
const mock = schemaVersion === "v2"
|
||
? buildMockNormalizedV2(payload.userQuestion)
|
||
: schemaVersion === "v2_0_2"
|
||
? buildMockNormalizedV2_0_2(payload.userQuestion, payload.context)
|
||
: schemaVersion === "v2_0_1"
|
||
? buildMockNormalizedV2_0_1(payload.userQuestion, payload.context)
|
||
: buildMockNormalizedV1(payload.userQuestion, payload.context?.expected_route);
|
||
rawModelResponse = { mode: "mock", schema_version: schemaVersion };
|
||
outputText = JSON.stringify(mock, null, 2);
|
||
}
|
||
else {
|
||
const apiKey = payload.apiKey ?? process.env.OPENAI_API_KEY;
|
||
const firstTry = await this.openaiClient.normalize({
|
||
llmProvider,
|
||
apiKey: String(apiKey ?? ""),
|
||
model,
|
||
baseUrl,
|
||
temperature,
|
||
maxOutputTokens
|
||
}, {
|
||
systemPrompt: promptBundle.systemPrompt,
|
||
developerPrompt: promptBundle.combinedDeveloperPrompt,
|
||
domainPrompt: promptBundle.domainPrompt,
|
||
userQuestion: payload.userQuestion,
|
||
schemaVersion
|
||
});
|
||
requestCountForCase += 1;
|
||
rawModelResponse = firstTry.raw;
|
||
outputText = firstTry.outputText;
|
||
usage = firstTry.usage;
|
||
}
|
||
let normalizedCandidate;
|
||
let validation = { passed: false, errors: ["NO_VALIDATION"] };
|
||
try {
|
||
normalizedCandidate = safeJsonParse(outputText);
|
||
if (schemaVersion !== "v1") {
|
||
normalizedCandidate = coerceNormalizedCandidateV2(normalizedCandidate, payload.userQuestion) ?? normalizedCandidate;
|
||
normalizedCandidate = applyCompanyScopeResolutionV2(normalizedCandidate, payload.userQuestion, payload.context);
|
||
}
|
||
if (schemaVersion === "v2_0_2") {
|
||
normalizedCandidate = applyExecutionStatePolicyV202(normalizedCandidate, payload.userQuestion, payload.context);
|
||
}
|
||
else if (schemaVersion === "v2_0_1") {
|
||
normalizedCandidate = applyClarificationPolicyV201(normalizedCandidate, payload.userQuestion, payload.context);
|
||
}
|
||
validation = (0, schemaValidator_1.validateNormalized)(normalizedCandidate, schemaVersion);
|
||
}
|
||
catch (error) {
|
||
normalizedCandidate = null;
|
||
validation = {
|
||
passed: false,
|
||
errors: [`JSON_PARSE_ERROR: ${error instanceof Error ? error.message : String(error)}`]
|
||
};
|
||
}
|
||
const canRetry = retryPolicy === "default" || retryPolicy === "single-pass-strict";
|
||
if (!payload.useMock && !validation.passed && canRetry) {
|
||
const retryMaxOutputTokens = computeRetryMaxOutputTokens(maxOutputTokens, rawModelResponse);
|
||
const retry = await this.openaiClient.normalize({
|
||
llmProvider,
|
||
apiKey: String(payload.apiKey ?? process.env.OPENAI_API_KEY ?? ""),
|
||
model,
|
||
baseUrl,
|
||
temperature,
|
||
maxOutputTokens: retryMaxOutputTokens
|
||
}, {
|
||
systemPrompt: promptBundle.systemPrompt,
|
||
developerPrompt: promptBundle.combinedDeveloperPrompt,
|
||
domainPrompt: promptBundle.domainPrompt,
|
||
userQuestion: payload.userQuestion,
|
||
schemaVersion,
|
||
controlledRetryInstruction: schemaVersion === "v2"
|
||
? RETRY_INSTRUCTION_V2
|
||
: schemaVersion === "v2_0_2"
|
||
? RETRY_INSTRUCTION_V2_0_2
|
||
: schemaVersion === "v2_0_1"
|
||
? RETRY_INSTRUCTION_V2_0_1
|
||
: RETRY_INSTRUCTION_V1
|
||
});
|
||
requestCountForCase += 1;
|
||
rawModelResponse = retry.raw;
|
||
outputText = retry.outputText;
|
||
usage = retry.usage;
|
||
try {
|
||
normalizedCandidate = safeJsonParse(outputText);
|
||
if (schemaVersion !== "v1") {
|
||
normalizedCandidate = coerceNormalizedCandidateV2(normalizedCandidate, payload.userQuestion) ?? normalizedCandidate;
|
||
normalizedCandidate = applyCompanyScopeResolutionV2(normalizedCandidate, payload.userQuestion, payload.context);
|
||
}
|
||
if (schemaVersion === "v2_0_2") {
|
||
normalizedCandidate = applyExecutionStatePolicyV202(normalizedCandidate, payload.userQuestion, payload.context);
|
||
}
|
||
else if (schemaVersion === "v2_0_1") {
|
||
normalizedCandidate = applyClarificationPolicyV201(normalizedCandidate, payload.userQuestion, payload.context);
|
||
}
|
||
validation = (0, schemaValidator_1.validateNormalized)(normalizedCandidate, schemaVersion);
|
||
}
|
||
catch (error) {
|
||
normalizedCandidate = null;
|
||
validation = {
|
||
passed: false,
|
||
errors: [`JSON_PARSE_ERROR_AFTER_RETRY: ${error instanceof Error ? error.message : String(error)}`]
|
||
};
|
||
}
|
||
}
|
||
let normalized = null;
|
||
if (validation.passed) {
|
||
if (schemaVersion === "v1") {
|
||
normalized = applyConfidenceGuardV1(normalizedCandidate);
|
||
}
|
||
else if (schemaVersion === "v2_0_2") {
|
||
normalized = normalizedCandidate;
|
||
}
|
||
else if (schemaVersion === "v2_0_1") {
|
||
normalized = normalizedCandidate;
|
||
}
|
||
else {
|
||
normalized = normalizedCandidate;
|
||
}
|
||
}
|
||
const routeHintSummary = normalized ? (0, routeHintAdapter_1.toRouteHintSummary)(normalized) : null;
|
||
const latency = Date.now() - startedAt;
|
||
const traceCompletenessIssues = collectTraceCompletenessIssues({
|
||
traceId,
|
||
schemaVersion,
|
||
rawModelResponse: rawModelResponse ?? outputText,
|
||
normalized,
|
||
routeHintSummary
|
||
});
|
||
if (traceCompletenessIssues.length > 0) {
|
||
console.error(`[trace-completeness] trace_id=${traceId} schema=${schemaVersion} issues=${traceCompletenessIssues.join(",")}`);
|
||
}
|
||
const response = {
|
||
trace_id: traceId,
|
||
ok: validation.passed,
|
||
normalized,
|
||
route_hint_summary: routeHintSummary,
|
||
raw_model_output: rawModelResponse ?? outputText,
|
||
validation,
|
||
usage,
|
||
latency_ms: latency,
|
||
prompt_version: promptBundle.prompt_version,
|
||
schema_version: schemaVersion,
|
||
request_count_for_case: requestCountForCase
|
||
};
|
||
const traceRouteHint = routeHintForHistory(normalized, routeHintSummary);
|
||
const traceConfidence = confidenceForHistory(normalized, routeHintSummary);
|
||
const traceRecord = {
|
||
trace_id: traceId,
|
||
timestamp: new Date().toISOString(),
|
||
model,
|
||
prompt_version: promptBundle.prompt_version,
|
||
schema_version: schemaVersion,
|
||
case_id: payload.context?.case_id,
|
||
user_question_raw: payload.userQuestion,
|
||
context: {
|
||
period_hint: payload.context?.period_hint ?? null,
|
||
business_context: payload.context?.business_context ?? null,
|
||
expected_route: payload.context?.expected_route ?? null,
|
||
case_id: payload.context?.case_id ?? null,
|
||
eval_mode: payload.context?.eval_mode ?? null,
|
||
trace_completeness_issues: traceCompletenessIssues
|
||
},
|
||
request_payload_redacted: (0, traceLogger_1.redactRequestPayload)({
|
||
...payload,
|
||
apiKey: payload.apiKey ? "***REDACTED***" : undefined
|
||
}),
|
||
raw_model_response: rawModelResponse ?? outputText,
|
||
parsed_normalized_json: normalized,
|
||
validation_result: validation,
|
||
route_hint_summary: routeHintSummary,
|
||
route_hint: traceRouteHint,
|
||
confidence: traceConfidence,
|
||
usage,
|
||
latency_ms: latency,
|
||
expected_route: payload.context?.expected_route,
|
||
eval_label: payload.context?.eval_label,
|
||
eval_mode: payload.context?.eval_mode,
|
||
request_count_for_case: requestCountForCase
|
||
};
|
||
(0, traceLogger_1.saveTrace)(traceRecord);
|
||
if (payload.saveAsTestCase && normalized?.schema_version === "normalized_query_v1") {
|
||
(0, traceLogger_1.saveEvalCase)({
|
||
case_id: `NQ-${Date.now()}`,
|
||
raw_question: payload.userQuestion,
|
||
expected: {
|
||
intent_class: normalized.intent_class,
|
||
route_hint: normalized.route_hint,
|
||
requires: {
|
||
needs_cross_entity_join: normalized.requires.needs_cross_entity_join,
|
||
needs_causal_chain: normalized.requires.needs_causal_chain
|
||
},
|
||
accounts_mentioned: normalized.accounts_mentioned,
|
||
expected_output_shape: normalized.expected_output_shape
|
||
}
|
||
});
|
||
}
|
||
return response;
|
||
}
|
||
}
|
||
exports.NormalizerService = NormalizerService;
|