NODEDC_1C/llm_normalizer/backend/dist/services/normalizerService.js

946 lines
44 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.NormalizerService = void 0;
const nanoid_1 = require("nanoid");
const config_1 = require("../config");
const promptBuilder_1 = require("./promptBuilder");
const routeHintAdapter_1 = require("./routeHintAdapter");
const schemaValidator_1 = require("./schemaValidator");
const traceLogger_1 = require("./traceLogger");
const RETRY_INSTRUCTION_V1 = "IMPORTANT: return valid JSON strictly matching schema normalized_query_v1. No markdown.";
const RETRY_INSTRUCTION_V2 = "IMPORTANT: return valid JSON strictly matching schema normalized_query_v2. No markdown.";
const RETRY_INSTRUCTION_V2_0_1 = "IMPORTANT: return valid JSON strictly matching schema normalized_query_v2_0_1. No markdown.";
const RETRY_INSTRUCTION_V2_0_2 = "IMPORTANT: return valid JSON strictly matching schema normalized_query_v2_0_2. No markdown.";
function safeJsonParse(text) {
const cleaned = text.trim().replace(/^```json\s*/i, "").replace(/^```\s*/i, "").replace(/```$/i, "").trim();
return JSON.parse(cleaned);
}
function resolveSchemaVersion(payload) {
const explicit = String(payload.schemaVersion ?? "").toLowerCase().trim();
if (explicit === "v2_0_2" || explicit === "normalized_query_v2_0_2") {
return "v2_0_2";
}
if (explicit === "v2_0_1" || explicit === "normalized_query_v2_0_1") {
return "v2_0_1";
}
if (explicit === "v2" || explicit === "normalized_query_v2") {
return "v2";
}
if (explicit === "v1" || explicit === "normalized_query_v1") {
return "v1";
}
const promptVersion = String(payload.promptVersion ?? config_1.DEFAULT_PROMPT_VERSION).toLowerCase().trim();
if (promptVersion === "normalizer_v2" || promptVersion.startsWith("normalizer_v2")) {
if (promptVersion === "normalizer_v2_0_2") {
return "v2_0_2";
}
if (promptVersion === "normalizer_v2_0_1") {
return "v2_0_1";
}
return "v2";
}
return "v1";
}
function shouldEscalateOutputBudget(rawModelResponse) {
if (!rawModelResponse || typeof rawModelResponse !== "object") {
return false;
}
const root = rawModelResponse;
const status = String(root.status ?? "").toLowerCase();
const details = (root.incomplete_details ?? {});
const reason = String(details.reason ?? "").toLowerCase();
return status === "incomplete" && reason === "max_output_tokens";
}
function computeRetryMaxOutputTokens(current, rawModelResponse) {
if (!shouldEscalateOutputBudget(rawModelResponse)) {
return current;
}
const escalated = Math.max(current + 400, Math.ceil(current * 1.6));
return Math.min(escalated, 2400);
}
function collectDateSpans(text) {
const spans = [];
const datePattern = /\b20\d{2}[-/.](?:0[1-9]|1[0-2])(?:[-/.](?:0[1-9]|[12]\d|3[01]))?\b/g;
let match = null;
while ((match = datePattern.exec(text)) !== null) {
spans.push({
start: match.index,
end: match.index + match[0].length
});
}
return spans;
}
function intersectsAnySpan(start, end, spans) {
return spans.some((span) => start < span.end && end > span.start);
}
function extractAccounts(text) {
const lower = String(text ?? "").toLowerCase();
const explicitAccounts = new Set();
const contextualPattern = /(?:\bсч(?:е|ё)т(?:а|у|ом|ов)?\b|\bсч\.?\b|\baccount(?:s)?\b|\bschet(?:a|u|om|ov)?\b)\s*(?:№|#|:)?\s*(\d{2}(?:\.\d{2})?)/giu;
let contextual = null;
while ((contextual = contextualPattern.exec(lower)) !== null) {
if (contextual[1]) {
explicitAccounts.add(contextual[1]);
}
}
if (explicitAccounts.size > 0) {
return Array.from(explicitAccounts);
}
const spans = collectDateSpans(lower);
const extracted = [];
const genericPattern = /\b\d{2}(?:\.\d{2})?\b/g;
let generic = null;
while ((generic = genericPattern.exec(lower)) !== null) {
const value = generic[0];
const start = generic.index;
const end = start + value.length;
if (intersectsAnySpan(start, end, spans)) {
continue;
}
extracted.push(value);
}
return Array.from(new Set(extracted));
}
function detectRouteByHeuristicsV1(question) {
const q = question.toLowerCase();
const hasExactTrace = /(документ\s*(№|#)|\bref\b|\bid\b|строк[аи].*проводк|конкретн(ый|ого|ая).*документ|точн(ый|ого).*источник|trx-\d+|inv-\d+)/i.test(q);
const hasCrossChain = /(разлож|цепоч|чем подтверж|связк|документ.*оплат|закрывающ|взаиморасчет|хвост.*(документ|оплат|проводк))/i.test(q);
const hasPeriodCloseRisk = /(предзакры|закрыти[ея].*период|перед сдачей отчетност|последн(ий|его).*(день|дня)|срыв.*закрыт|может взорвать)/i.test(q);
const hasHeavyOverview = /(рейтинг|топ|в целом|обзор|приоритиз|company|самых|концентрац|срез)/i.test(q);
const hasRiskProbe = /(аномал|подозр|зоны риска|ручной ошиб|подозрительн|риск|хвост)/i.test(q);
const hasRuleControl = /(контрол|правил|ошибк.*дат|срок.*амортиз|настройк|\b97\b|\bос\b|68\.02|ндс)/i.test(q);
if (hasExactTrace) {
return "live_mcp_drilldown";
}
if (hasCrossChain) {
return "hybrid_store_plus_live";
}
if (hasPeriodCloseRisk || hasHeavyOverview) {
return "batch_refresh_then_store";
}
if (hasRiskProbe || hasRuleControl) {
return "store_feature_risk";
}
return "store_canonical";
}
function buildMockNormalizedV1(userQuestion, expectedRoute) {
const q = userQuestion.toLowerCase();
const routeHint = expectedRoute ?? detectRouteByHeuristicsV1(userQuestion);
const hasPeriod = /(январ|феврал|март|апрел|май|июн|июл|август|сентябр|октябр|ноябр|декабр|квартал|период|конец месяца|20\d{2})/i.test(userQuestion);
const hasHeavyGoal = /(рейтинг|топ|обзор|приоритиз|срез|в целом|концентрац|самых)/i.test(q);
const hasCloseRisk = /(предзакры|закрыти[ея].*период|срыв.*закрыт|последн.*день)/i.test(q);
const hasRule = /(правил|контрол|ошибк.*дат|амортиз|настройк|\b97\b|ндс|\b01\b|\b02\b)/i.test(q);
const hasAnomaly = /(аномал|подозр|риск|хвост|не сход|завис|крив)/i.test(q);
const hasExactTrace = routeHint === "live_mcp_drilldown";
let intentClass = "simple_factual";
if (routeHint === "live_mcp_drilldown") {
intentClass = "drilldown_explain";
}
else if (routeHint === "hybrid_store_plus_live") {
intentClass = "cross_entity";
}
else if (routeHint === "batch_refresh_then_store") {
intentClass = hasCloseRisk && !hasHeavyGoal ? "period_close_risk" : "heavy_analytical";
}
else if (routeHint === "store_feature_risk") {
intentClass = hasRule ? "rule_based_account_control" : hasAnomaly ? "anomaly_probe" : "ambiguous_human_query";
}
const expectedOutputShape = intentClass === "period_close_risk"
? "prioritized_review_list"
: routeHint === "batch_refresh_then_store"
? "ranked_list"
: routeHint === "hybrid_store_plus_live"
? "reconciliation_report"
: routeHint === "live_mcp_drilldown"
? "evidence_chain"
: hasAnomaly
? "anomaly_summary"
: "point_answer";
return {
schema_version: "normalized_query_v1",
user_question_raw: userQuestion,
normalized_question: userQuestion.trim(),
intent_class: intentClass,
business_problem_type: "normalization_playground",
domain_entities: routeHint === "hybrid_store_plus_live" ? ["контрагент", "документ", "проводка"] : ["счет"],
accounts_mentioned: extractAccounts(userQuestion),
documents_mentioned: /документ|реализац|поступлен|выписк|платеж/i.test(userQuestion) ? ["документ"] : [],
registers_mentioned: /регистр|движен/i.test(userQuestion) ? ["регистр"] : [],
period_scope: {
type: hasPeriod ? "inferred" : "missing",
value: hasPeriod ? "2020-06" : null,
confidence: hasPeriod ? "medium" : "low"
},
requires: {
needs_cross_entity_join: routeHint === "hybrid_store_plus_live",
needs_causal_chain: routeHint === "hybrid_store_plus_live" || /почему|чем подтверж|где рвется/i.test(userQuestion),
needs_exact_object_trace: hasExactTrace,
needs_ranking: routeHint === "batch_refresh_then_store" && intentClass !== "period_close_risk",
needs_anomaly_summary: hasAnomaly && routeHint !== "hybrid_store_plus_live",
needs_runtime_truth: hasExactTrace,
needs_period_cut: hasPeriod,
needs_evidence: routeHint === "hybrid_store_plus_live" || hasExactTrace
},
expected_output_shape: expectedOutputShape,
route_hint: routeHint,
ambiguities: hasPeriod
? []
: [
{
field: "period_scope",
reason: "period is not explicitly provided",
severity: "medium"
}
],
confidence: {
overall: hasPeriod ? "medium" : "low",
intent_class: "medium",
route_hint: hasPeriod ? "medium" : "low"
}
};
}
function applyConfidenceGuardV1(item) {
const wordCount = item.user_question_raw.trim().split(/\s+/).filter(Boolean).length;
const hasAmbiguity = item.ambiguities.length > 0;
const longLayeredQuestion = wordCount >= 20;
const uncertainPeriod = item.period_scope.type !== "explicit";
const hasPeriodBoundaryLex = /(предзакры|закрыти[ея].*период|перед сдачей отчетност|перед закрытием)/i.test(item.user_question_raw) &&
/(рейтинг|топ|обзор|summary|срез|концентрац|в целом|приоритиз|самых)/i.test(item.user_question_raw);
const suspicious = hasAmbiguity || longLayeredQuestion || uncertainPeriod || hasPeriodBoundaryLex;
if (!suspicious) {
return item;
}
return {
...item,
confidence: {
...item.confidence,
overall: item.confidence.overall === "high" ? "medium" : item.confidence.overall,
route_hint: item.confidence.route_hint === "high" ? "medium" : item.confidence.route_hint
}
};
}
function splitIntoCandidateFragments(message) {
const primary = message
.split(/[\n;]+|(?<=[.!?])\s+/)
.map((item) => item.replace(/^\s*[-*•]\s*/, "").trim())
.filter(Boolean);
if (primary.length > 0) {
return primary;
}
const fallback = message.trim();
return fallback ? [fallback] : [];
}
function inferTimeScope(text) {
const explicit = text.match(/\b(20\d{2}(?:[-/.](?:0[1-9]|1[0-2]))?)\b/);
if (explicit) {
return {
type: "explicit",
value: explicit[1],
confidence: "high"
};
}
const inferred = text.match(/(январ[ья]|феврал[ья]|март[ае]?|апрел[ья]|ма[йя]|июн[ьяе]?|июл[ьяе]?|август[ае]?|сентябр[ьяе]?|октябр[ьяе]?|ноябр[ьяе]?|декабр[ьяе]?|квартал|конец месяца|период)/i);
if (inferred) {
return {
type: "inferred",
value: inferred[1],
confidence: "medium"
};
}
return {
type: "missing",
value: null,
confidence: "low"
};
}
function pickCandidateLabels(flags, domainRelevance) {
if (domainRelevance !== "in_scope") {
return [];
}
const labels = [];
if (flags.asks_for_exact_object_trace)
labels.push("drilldown_explain");
if (flags.has_multi_entity_scope && flags.asks_for_chain_explanation)
labels.push("cross_entity");
if (flags.asks_for_rule_check)
labels.push("rule_based_account_control");
if (flags.asks_for_anomaly_scan)
labels.push("anomaly_probe");
if (flags.asks_for_ranking_or_top || flags.asks_for_period_summary)
labels.push("heavy_analytical");
if (flags.mentions_period_close_context && !flags.asks_for_ranking_or_top)
labels.push("period_close_risk");
if (labels.length === 0)
labels.push("simple_factual");
return Array.from(new Set(labels));
}
function buildFragmentV2(rawText, index) {
const text = rawText.trim();
if (text.length < 3) {
return null;
}
const lower = text.toLowerCase();
const noiseOnly = /^(ну|короче|типа|ладно|ага|ок(ей)?)$/i.test(lower);
if (noiseOnly) {
return null;
}
const inScopeTokens = /(проводк|документ|реализац|поступлен|взаиморасчет|сальдо|остатк|сч(?:е|ё)т|ндс|амортиз|расходы будущих периодов|рбп|ос|контрагент|оплат|банк|выписк|склад|товар|материал|списани|жизненн|цикл|переход|lifecycle|writeoff|deferred)/i.test(lower);
const translitInScopeTokens = /\b(?:schet|scheta|schetu|schetom|postavsh|kontragent|dokument|doc|oplata|oplati|platezh|vypisk|provodk|realiz|postuplen|nds|os|saldo|hvost|tail|anomali|risk|zakryt|lifecycle|state|transition|writeoff|deferred|periodclose)\b/i.test(lower);
const lifecycleInScopeTokens = /(lifecycle|жизненн(?:ого|ый)?\s+цикл|стади|переход|списани|writeoff|deferred|period\s*close)/i.test(lower);
const genericAccountingTokens = /(фсбу|налогов(ый|ого)|нк рф|закон|форма отчетности|как правильно в бухгалтерии)/i.test(lower);
const offTopicTokens = /(погода|анекдот|музык|фильм|игр[аы]|рецепт|курс валют в мире)/i.test(lower);
let domainRelevance = "unclear";
let businessScope = "unclear";
if (offTopicTokens) {
domainRelevance = "out_of_scope";
businessScope = "offtopic";
}
else if (genericAccountingTokens && !inScopeTokens && !translitInScopeTokens) {
domainRelevance = "out_of_scope";
businessScope = "generic_accounting";
}
else if (inScopeTokens || translitInScopeTokens || lifecycleInScopeTokens) {
domainRelevance = "in_scope";
businessScope = "company_specific_accounting";
}
const entityTokenCount = (lower.match(/(документ|оплат|проводк|контрагент|договор|реализац|поступлен|выписк|закрыт|взаиморасчет|склад|товар|материал|поставщ|покупат|списани|жизненн|цикл)/g) ?? [])
.length;
const translitEntityTokenCount = (lower.match(/\b(?:dokument|oplata|platezh|provodk|kontragent|postavsh|pokupat|realiz|postuplen|vypisk|zakryt|schet|sklad|tovar|material)\b/g) ?? []).length;
const entityTokenCountTotal = entityTokenCount + translitEntityTokenCount;
const flags = {
has_multi_entity_scope: entityTokenCountTotal >= 2,
asks_for_chain_explanation: /(цепоч|разлож|почему|чем подтверж|где рвет|связк|логик.*операц)/i.test(lower),
asks_for_ranking_or_top: /(топ|рейтинг|сам(ые|ых)|максимальн|сильнее всего|приоритиз)/i.test(lower),
asks_for_period_summary: /(срез|обзор|в целом|картина периода|summary|по периоду)/i.test(lower),
asks_for_rule_check: /(правил|контрол|корректн|ошибк.*дат|срок списан|амортиз|настройк|проверь)/i.test(lower),
asks_for_anomaly_scan: /(аномал|подозр|риск|хвост|не сход|завис|крив|искажа)/i.test(lower),
asks_for_exact_object_trace: /(документ\s*(№|#)|\bref\b|\bid\b|строк[аи]\s+проводк|операц.*№|trx-\d+|inv-\d+|doc-\d+)/i.test(lower),
asks_for_evidence: /(чем подтверж|документ|проводк|движен|акт сверк|доказат|evidence)/i.test(lower),
mentions_period_close_context: /(закрыти[ея]\s+период|предзакры|конец месяца|сдач[аи]\s+отчетност)/i.test(lower)
};
const translitHints = {
chain: /\b(?:razlozh|pochemu|chem podtver|gde rv|svyaz|razryv|chain)\b/i.test(lower),
rule: /\b(?:prover|check|rule|control|korrekt)\b/i.test(lower),
anomaly: /\b(?:anomal|risk|hvost|tail|mismatch)\b/i.test(lower),
evidence: /\b(?:dokument|provodk|evidence|doc)\b/i.test(lower)
};
if (translitHints.chain)
flags.asks_for_chain_explanation = true;
if (translitHints.rule)
flags.asks_for_rule_check = true;
if (translitHints.anomaly)
flags.asks_for_anomaly_scan = true;
if (translitHints.evidence)
flags.asks_for_evidence = true;
const candidateLabels = pickCandidateLabels(flags, domainRelevance);
let confidence = "medium";
if (domainRelevance === "out_of_scope" || domainRelevance === "unclear") {
confidence = "low";
}
else if (flags.asks_for_exact_object_trace || flags.asks_for_ranking_or_top) {
confidence = "high";
}
return {
fragment_id: `F${index + 1}`,
raw_fragment_text: text,
normalized_fragment_text: text.charAt(0).toUpperCase() + text.slice(1),
domain_relevance: domainRelevance,
business_scope: businessScope,
entity_hints: Array.from(new Set(Array.from(lower.matchAll(/(поставщик|покупател|контрагент|договор|банк|склад|товар|материал|ос|взаиморасчет|реализац|поступлен)/g)).map((item) => item[0]))),
account_hints: extractAccounts(text),
document_hints: Array.from(new Set(Array.from(lower.matchAll(/(документ|реализац|поступлен|платеж|выписк|акт сверк)/g)).map((item) => item[0]))),
register_hints: Array.from(new Set(Array.from(lower.matchAll(/(регистр|движен|остатк|сальдо)/g)).map((item) => item[0]))),
time_scope: inferTimeScope(text),
flags,
candidate_labels: candidateLabels,
confidence
};
}
function buildMockNormalizedV2(userMessage) {
const rawFragments = splitIntoCandidateFragments(userMessage);
const fragments = [];
const discarded = [];
rawFragments.forEach((raw, index) => {
const built = buildFragmentV2(raw, index);
if (!built) {
discarded.push({
raw_fragment_text: raw,
reason: "noise_or_too_short"
});
return;
}
fragments.push(built);
});
const inScopeCount = fragments.filter((item) => item.domain_relevance === "in_scope").length;
const unclearCount = fragments.filter((item) => item.domain_relevance === "unclear").length;
const messageInScope = inScopeCount > 0;
const scopeConfidence = messageInScope ? (unclearCount > 0 ? "medium" : "high") : "low";
const needsClarification = messageInScope && (unclearCount > 0 || fragments.some((item) => item.time_scope.type === "missing"));
return {
schema_version: "normalized_query_v2",
user_message_raw: userMessage,
message_in_scope: messageInScope,
scope_confidence: scopeConfidence,
contains_multiple_tasks: fragments.length > 1,
fragments,
discarded_fragments: discarded,
global_notes: {
needs_clarification: needsClarification,
clarification_reason: needsClarification ? "Недостаточно периода/контекста по части фрагментов." : null
}
};
}
function hasSessionPeriodContext(context) {
if (!context) {
return false;
}
const periodHint = String(context.period_hint ?? "").trim();
const businessContext = String(context.business_context ?? "").toLowerCase();
if (periodHint.length > 0) {
return true;
}
return (businessContext.includes("current_analysis_period") ||
businessContext.includes("active_period") ||
businessContext.includes("рабочий месяц") ||
businessContext.includes("активный период"));
}
function hasBusinessNodeSignals(fragment) {
if (fragment.domain_relevance !== "in_scope") {
return false;
}
return (fragment.entity_hints.length > 0 ||
fragment.account_hints.length > 0 ||
fragment.document_hints.length > 0 ||
fragment.register_hints.length > 0 ||
fragment.candidate_labels.length > 0 ||
Object.values(fragment.flags).some((value) => value));
}
function routeCanBeSelected(fragment) {
if (fragment.domain_relevance !== "in_scope") {
return false;
}
if (fragment.business_scope === "unclear") {
return false;
}
return hasBusinessNodeSignals(fragment);
}
function dedupeSoftAssumptions(input) {
return Array.from(new Set(input));
}
function decideFragmentExecutionPolicy(fragment, sessionContext) {
const softAssumptions = [];
const hasPeriodContext = hasSessionPeriodContext(sessionContext);
const periodIsCritical = fragment.flags.asks_for_period_summary || fragment.flags.mentions_period_close_context || fragment.flags.asks_for_ranking_or_top;
if (fragment.domain_relevance === "out_of_scope") {
return {
execution_readiness: "needs_clarification",
clarification_reason: "fragment_out_of_scope",
soft_assumption_used: []
};
}
if (fragment.domain_relevance === "unclear") {
return {
execution_readiness: "needs_clarification",
clarification_reason: "domain_or_scope_unclear",
soft_assumption_used: []
};
}
if (!hasBusinessNodeSignals(fragment)) {
return {
execution_readiness: "needs_clarification",
clarification_reason: "business_area_not_identified",
soft_assumption_used: []
};
}
if (!routeCanBeSelected(fragment)) {
return {
execution_readiness: "needs_clarification",
clarification_reason: "route_cannot_be_selected_reliably",
soft_assumption_used: []
};
}
if (fragment.time_scope.type === "missing") {
if (hasPeriodContext) {
softAssumptions.push("period_from_session_context");
}
else if (periodIsCritical) {
return {
execution_readiness: "needs_clarification",
clarification_reason: "critical_period_missing",
soft_assumption_used: []
};
}
}
if (fragment.flags.asks_for_anomaly_scan ||
fragment.flags.asks_for_rule_check ||
fragment.flags.asks_for_ranking_or_top ||
fragment.flags.asks_for_period_summary) {
softAssumptions.push("problem_scan_mode_enabled");
}
if (fragment.business_scope === "company_specific_accounting" && fragment.entity_hints.length === 0 && fragment.account_hints.length === 0) {
softAssumptions.push("company_scope_defaulted");
}
const assumptions = dedupeSoftAssumptions(softAssumptions);
if (assumptions.length > 0) {
return {
execution_readiness: "executable_with_soft_assumptions",
clarification_reason: null,
soft_assumption_used: assumptions
};
}
return {
execution_readiness: "executable",
clarification_reason: null,
soft_assumption_used: []
};
}
function toV201Fragment(fragment, sessionContext) {
const policy = decideFragmentExecutionPolicy(fragment, sessionContext);
return {
...fragment,
execution_readiness: policy.execution_readiness,
clarification_reason: policy.clarification_reason,
soft_assumption_used: policy.soft_assumption_used
};
}
function applyClarificationPolicyV201(candidate, userMessage, sessionContext) {
if (!candidate || typeof candidate !== "object") {
return null;
}
const source = candidate;
if (!Array.isArray(source.fragments)) {
return null;
}
const baseFragments = source.fragments
.map((item) => item)
.filter((item) => item && typeof item === "object" && typeof item.fragment_id === "string");
const fragments = baseFragments.map((fragment) => toV201Fragment(fragment, sessionContext));
const inScopeFragments = fragments.filter((fragment) => fragment.domain_relevance === "in_scope");
const blockingFragments = inScopeFragments.filter((fragment) => fragment.execution_readiness === "needs_clarification");
const needsClarification = inScopeFragments.length > 0 && blockingFragments.length === inScopeFragments.length;
return {
schema_version: "normalized_query_v2_0_1",
user_message_raw: String(source.user_message_raw ?? userMessage),
message_in_scope: inScopeFragments.length > 0,
scope_confidence: source.scope_confidence ?? (inScopeFragments.length > 0 ? "medium" : "low"),
contains_multiple_tasks: typeof source.contains_multiple_tasks === "boolean" ? source.contains_multiple_tasks : fragments.length > 1,
fragments,
discarded_fragments: Array.isArray(source.discarded_fragments)
? source.discarded_fragments
: [],
global_notes: {
needs_clarification: needsClarification,
clarification_reason: needsClarification ? blockingFragments[0]?.clarification_reason ?? "clarification_required" : null
}
};
}
function resolveFragmentExecutionStateV202(fragment, sessionContext) {
const v201 = decideFragmentExecutionPolicy(fragment, sessionContext);
if (fragment.domain_relevance === "out_of_scope") {
return {
execution_readiness: "no_route",
clarification_reason: "fragment_out_of_scope",
soft_assumption_used: [],
route_status: "no_route",
no_route_reason: "out_of_scope"
};
}
if (v201.execution_readiness === "needs_clarification") {
return {
execution_readiness: "needs_clarification",
clarification_reason: v201.clarification_reason ?? "insufficient_specificity",
soft_assumption_used: [],
route_status: "no_route",
no_route_reason: "insufficient_specificity"
};
}
if (!routeCanBeSelected(fragment)) {
return {
execution_readiness: "no_route",
clarification_reason: "route_mapping_missing",
soft_assumption_used: [],
route_status: "no_route",
no_route_reason: "missing_mapping"
};
}
// Deterministic no-route guard:
// routable in-scope fragments cannot remain unresolved.
return {
execution_readiness: v201.execution_readiness,
clarification_reason: null,
soft_assumption_used: v201.soft_assumption_used,
route_status: "routed",
no_route_reason: null
};
}
function toV202Fragment(fragment, sessionContext) {
const policy = resolveFragmentExecutionStateV202(fragment, sessionContext);
return {
...fragment,
execution_readiness: policy.execution_readiness,
clarification_reason: policy.clarification_reason,
soft_assumption_used: policy.soft_assumption_used,
route_status: policy.route_status,
no_route_reason: policy.no_route_reason
};
}
function applyExecutionStatePolicyV202(candidate, userMessage, sessionContext) {
if (!candidate || typeof candidate !== "object") {
return null;
}
const source = candidate;
if (!Array.isArray(source.fragments)) {
return null;
}
const baseFragments = source.fragments
.map((item) => item)
.filter((item) => item && typeof item === "object" && typeof item.fragment_id === "string");
const fragments = baseFragments.map((fragment) => toV202Fragment(fragment, sessionContext));
const inScopeFragments = fragments.filter((fragment) => fragment.domain_relevance === "in_scope");
const clarificationBlocks = inScopeFragments.filter((fragment) => fragment.execution_readiness === "needs_clarification");
const needsClarification = inScopeFragments.length > 0 && clarificationBlocks.length === inScopeFragments.length;
return {
schema_version: "normalized_query_v2_0_2",
user_message_raw: String(source.user_message_raw ?? userMessage),
message_in_scope: inScopeFragments.length > 0,
scope_confidence: source.scope_confidence ?? (inScopeFragments.length > 0 ? "medium" : "low"),
contains_multiple_tasks: typeof source.contains_multiple_tasks === "boolean" ? source.contains_multiple_tasks : fragments.length > 1,
fragments,
discarded_fragments: Array.isArray(source.discarded_fragments)
? source.discarded_fragments
: [],
global_notes: {
needs_clarification: needsClarification,
clarification_reason: needsClarification ? clarificationBlocks[0]?.clarification_reason ?? "clarification_required" : null
}
};
}
function buildMockNormalizedV2_0_1(userMessage, sessionContext) {
const v2 = buildMockNormalizedV2(userMessage);
const adjusted = applyClarificationPolicyV201(v2, userMessage, sessionContext);
if (adjusted) {
return adjusted;
}
return {
schema_version: "normalized_query_v2_0_1",
user_message_raw: userMessage,
message_in_scope: v2.message_in_scope,
scope_confidence: v2.scope_confidence,
contains_multiple_tasks: v2.contains_multiple_tasks,
fragments: v2.fragments.map((fragment) => ({
...fragment,
execution_readiness: "needs_clarification",
clarification_reason: "policy_fallback",
soft_assumption_used: []
})),
discarded_fragments: v2.discarded_fragments,
global_notes: {
needs_clarification: true,
clarification_reason: "policy_fallback"
}
};
}
function buildMockNormalizedV2_0_2(userMessage, sessionContext) {
const v2 = buildMockNormalizedV2(userMessage);
const adjusted = applyExecutionStatePolicyV202(v2, userMessage, sessionContext);
if (adjusted) {
return adjusted;
}
return {
schema_version: "normalized_query_v2_0_2",
user_message_raw: userMessage,
message_in_scope: v2.message_in_scope,
scope_confidence: v2.scope_confidence,
contains_multiple_tasks: v2.contains_multiple_tasks,
fragments: v2.fragments.map((fragment) => ({
...fragment,
execution_readiness: "needs_clarification",
clarification_reason: "policy_fallback",
soft_assumption_used: [],
route_status: "no_route",
no_route_reason: "unsupported_fragment_type"
})),
discarded_fragments: v2.discarded_fragments,
global_notes: {
needs_clarification: true,
clarification_reason: "policy_fallback"
}
};
}
function routeHintForHistory(normalized, routeSummary) {
if (!normalized || !routeSummary) {
return null;
}
if (normalized.schema_version === "normalized_query_v1") {
return normalized.route_hint;
}
const decision = routeSummary.mode === "deterministic_v2" ? routeSummary.decisions.find((item) => item.route !== "no_route") : null;
return decision?.route ?? null;
}
function confidenceForHistory(normalized, routeSummary) {
if (!normalized || !routeSummary) {
return null;
}
if (normalized.schema_version === "normalized_query_v1") {
return normalized.confidence.route_hint;
}
return normalized.scope_confidence;
}
function collectTraceCompletenessIssues(input) {
const issues = [];
if (!input.rawModelResponse) {
issues.push("missing_raw_model_output");
}
if (!input.normalized) {
issues.push("missing_parsed_normalized_json");
return issues;
}
if (input.normalized.schema_version === "normalized_query_v1") {
return issues;
}
if (!Array.isArray(input.normalized.fragments)) {
issues.push("missing_parsed_fragments");
return issues;
}
for (const fragment of input.normalized.fragments) {
const needsResolvedExecutionState = input.normalized.schema_version === "normalized_query_v2_0_1" || input.normalized.schema_version === "normalized_query_v2_0_2";
if (needsResolvedExecutionState && !("execution_readiness" in fragment)) {
issues.push(`fragment_${fragment.fragment_id}_missing_execution_readiness`);
}
if (input.normalized.schema_version === "normalized_query_v2_0_2") {
if (!("route_status" in fragment)) {
issues.push(`fragment_${fragment.fragment_id}_missing_route_status`);
}
if (!("no_route_reason" in fragment)) {
issues.push(`fragment_${fragment.fragment_id}_missing_no_route_reason`);
}
}
}
if (!input.routeHintSummary || input.routeHintSummary.mode !== "deterministic_v2") {
issues.push("missing_route_hint_summary_v2");
return issues;
}
const decisionCount = Array.isArray(input.routeHintSummary.decisions) ? input.routeHintSummary.decisions.length : 0;
if (decisionCount !== input.normalized.fragments.length) {
issues.push("route_decision_count_mismatch");
}
return issues;
}
class NormalizerService {
openaiClient;
constructor(openaiClient) {
this.openaiClient = openaiClient;
}
async normalize(payload) {
const traceId = (0, nanoid_1.nanoid)(14);
const startedAt = Date.now();
const model = payload.model ?? config_1.DEFAULT_MODEL;
const baseUrl = payload.baseUrl ?? config_1.DEFAULT_OPENAI_BASE_URL;
const temperature = payload.temperature ?? config_1.DEFAULT_TEMPERATURE;
const maxOutputTokens = payload.maxOutputTokens ?? config_1.DEFAULT_MAX_OUTPUT_TOKENS;
const retryPolicy = payload.retryPolicy ?? "default";
const schemaVersion = resolveSchemaVersion(payload);
const promptBundle = (0, promptBuilder_1.buildPromptBundle)({
promptVersion: payload.promptVersion,
systemPrompt: payload.systemPrompt,
developerPrompt: payload.developerPrompt,
domainPrompt: payload.domainPrompt,
schemaNotes: undefined,
fewShotExamples: payload.fewShotExamples
});
let rawModelResponse = null;
let outputText = "";
let usage = { input_tokens: 0, output_tokens: 0, total_tokens: 0 };
let requestCountForCase = 0;
if (payload.useMock) {
const mock = schemaVersion === "v2"
? buildMockNormalizedV2(payload.userQuestion)
: schemaVersion === "v2_0_2"
? buildMockNormalizedV2_0_2(payload.userQuestion, payload.context)
: schemaVersion === "v2_0_1"
? buildMockNormalizedV2_0_1(payload.userQuestion, payload.context)
: buildMockNormalizedV1(payload.userQuestion, payload.context?.expected_route);
rawModelResponse = { mode: "mock", schema_version: schemaVersion };
outputText = JSON.stringify(mock, null, 2);
}
else {
const apiKey = payload.apiKey ?? process.env.OPENAI_API_KEY;
const firstTry = await this.openaiClient.normalize({
apiKey: String(apiKey ?? ""),
model,
baseUrl,
temperature,
maxOutputTokens
}, {
systemPrompt: promptBundle.systemPrompt,
developerPrompt: promptBundle.combinedDeveloperPrompt,
domainPrompt: promptBundle.domainPrompt,
userQuestion: payload.userQuestion,
schemaVersion
});
requestCountForCase += 1;
rawModelResponse = firstTry.raw;
outputText = firstTry.outputText;
usage = firstTry.usage;
}
let normalizedCandidate;
let validation = { passed: false, errors: ["NO_VALIDATION"] };
try {
normalizedCandidate = safeJsonParse(outputText);
if (schemaVersion === "v2_0_2") {
normalizedCandidate = applyExecutionStatePolicyV202(normalizedCandidate, payload.userQuestion, payload.context);
}
else if (schemaVersion === "v2_0_1") {
normalizedCandidate = applyClarificationPolicyV201(normalizedCandidate, payload.userQuestion, payload.context);
}
validation = (0, schemaValidator_1.validateNormalized)(normalizedCandidate, schemaVersion);
}
catch (error) {
normalizedCandidate = null;
validation = {
passed: false,
errors: [`JSON_PARSE_ERROR: ${error instanceof Error ? error.message : String(error)}`]
};
}
const canRetry = retryPolicy === "default" || retryPolicy === "single-pass-strict";
if (!payload.useMock && !validation.passed && canRetry) {
const retryMaxOutputTokens = computeRetryMaxOutputTokens(maxOutputTokens, rawModelResponse);
const retry = await this.openaiClient.normalize({
apiKey: String(payload.apiKey ?? process.env.OPENAI_API_KEY ?? ""),
model,
baseUrl,
temperature,
maxOutputTokens: retryMaxOutputTokens
}, {
systemPrompt: promptBundle.systemPrompt,
developerPrompt: promptBundle.combinedDeveloperPrompt,
domainPrompt: promptBundle.domainPrompt,
userQuestion: payload.userQuestion,
schemaVersion,
controlledRetryInstruction: schemaVersion === "v2"
? RETRY_INSTRUCTION_V2
: schemaVersion === "v2_0_2"
? RETRY_INSTRUCTION_V2_0_2
: schemaVersion === "v2_0_1"
? RETRY_INSTRUCTION_V2_0_1
: RETRY_INSTRUCTION_V1
});
requestCountForCase += 1;
rawModelResponse = retry.raw;
outputText = retry.outputText;
usage = retry.usage;
try {
normalizedCandidate = safeJsonParse(outputText);
if (schemaVersion === "v2_0_2") {
normalizedCandidate = applyExecutionStatePolicyV202(normalizedCandidate, payload.userQuestion, payload.context);
}
else if (schemaVersion === "v2_0_1") {
normalizedCandidate = applyClarificationPolicyV201(normalizedCandidate, payload.userQuestion, payload.context);
}
validation = (0, schemaValidator_1.validateNormalized)(normalizedCandidate, schemaVersion);
}
catch (error) {
normalizedCandidate = null;
validation = {
passed: false,
errors: [`JSON_PARSE_ERROR_AFTER_RETRY: ${error instanceof Error ? error.message : String(error)}`]
};
}
}
let normalized = null;
if (validation.passed) {
if (schemaVersion === "v1") {
normalized = applyConfidenceGuardV1(normalizedCandidate);
}
else if (schemaVersion === "v2_0_2") {
normalized = normalizedCandidate;
}
else if (schemaVersion === "v2_0_1") {
normalized = normalizedCandidate;
}
else {
normalized = normalizedCandidate;
}
}
const routeHintSummary = normalized ? (0, routeHintAdapter_1.toRouteHintSummary)(normalized) : null;
const latency = Date.now() - startedAt;
const traceCompletenessIssues = collectTraceCompletenessIssues({
traceId,
schemaVersion,
rawModelResponse: rawModelResponse ?? outputText,
normalized,
routeHintSummary
});
if (traceCompletenessIssues.length > 0) {
console.error(`[trace-completeness] trace_id=${traceId} schema=${schemaVersion} issues=${traceCompletenessIssues.join(",")}`);
}
const response = {
trace_id: traceId,
ok: validation.passed,
normalized,
route_hint_summary: routeHintSummary,
raw_model_output: rawModelResponse ?? outputText,
validation,
usage,
latency_ms: latency,
prompt_version: promptBundle.prompt_version,
schema_version: schemaVersion,
request_count_for_case: requestCountForCase
};
const traceRouteHint = routeHintForHistory(normalized, routeHintSummary);
const traceConfidence = confidenceForHistory(normalized, routeHintSummary);
const traceRecord = {
trace_id: traceId,
timestamp: new Date().toISOString(),
model,
prompt_version: promptBundle.prompt_version,
schema_version: schemaVersion,
case_id: payload.context?.case_id,
user_question_raw: payload.userQuestion,
context: {
period_hint: payload.context?.period_hint ?? null,
business_context: payload.context?.business_context ?? null,
expected_route: payload.context?.expected_route ?? null,
case_id: payload.context?.case_id ?? null,
eval_mode: payload.context?.eval_mode ?? null,
trace_completeness_issues: traceCompletenessIssues
},
request_payload_redacted: (0, traceLogger_1.redactRequestPayload)({
...payload,
apiKey: payload.apiKey ? "***REDACTED***" : undefined
}),
raw_model_response: rawModelResponse ?? outputText,
parsed_normalized_json: normalized,
validation_result: validation,
route_hint_summary: routeHintSummary,
route_hint: traceRouteHint,
confidence: traceConfidence,
usage,
latency_ms: latency,
expected_route: payload.context?.expected_route,
eval_label: payload.context?.eval_label,
eval_mode: payload.context?.eval_mode,
request_count_for_case: requestCountForCase
};
(0, traceLogger_1.saveTrace)(traceRecord);
if (payload.saveAsTestCase && normalized?.schema_version === "normalized_query_v1") {
(0, traceLogger_1.saveEvalCase)({
case_id: `NQ-${Date.now()}`,
raw_question: payload.userQuestion,
expected: {
intent_class: normalized.intent_class,
route_hint: normalized.route_hint,
requires: {
needs_cross_entity_join: normalized.requires.needs_cross_entity_join,
needs_causal_chain: normalized.requires.needs_causal_chain
},
accounts_mentioned: normalized.accounts_mentioned,
expected_output_shape: normalized.expected_output_shape
}
});
}
return response;
}
}
exports.NormalizerService = NormalizerService;