NODEDC_1C/llm_normalizer/backend/dist/services/investigationState.js

471 lines
22 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.cloneInvestigationState = cloneInvestigationState;
exports.createEmptyInvestigationState = createEmptyInvestigationState;
exports.updateInvestigationState = updateInvestigationState;
const stage1Contracts_1 = require("../types/stage1Contracts");
const stage2ProblemUnits_1 = require("../types/stage2ProblemUnits");
function uniqueStrings(values) {
return Array.from(new Set(values.map((item) => item.trim()).filter(Boolean)));
}
function capStrings(values, max) {
return uniqueStrings(values).slice(0, max);
}
function detectAccounts(text) {
return capStrings(text.match(/\b\d{2}(?:\.\d{2})?\b/g) ?? [], stage1Contracts_1.INVESTIGATION_MAX_PRIMARY_ACCOUNTS);
}
function detectPeriod(text) {
const monthly = text.match(/\b(20\d{2})[-/.](0[1-9]|1[0-2])\b/);
if (monthly)
return `${monthly[1]}-${monthly[2]}`;
const yearly = text.match(/\b(20\d{2})\b/);
if (yearly)
return yearly[1];
return null;
}
function detectExplicitDomainHint(text) {
const messageCorpus = String(text ?? "").toLowerCase();
const accounts = detectAccounts(text);
const hasSettlementSignal = accounts.some((item) => isSettlementAccount(item)) ||
/(?:60(?:\.\d{2})?|62(?:\.\d{2})?|оплат|расч[её]т|зач[её]т|аванс|долг|поставщ|покупат|settlement|payment|supplier|customer)/i.test(messageCorpus);
if (hasSettlementSignal) {
return "settlements_60_62";
}
const hasVatSignal = accounts.some((item) => isVatAccount(item)) ||
/(?:ндс|сч[её]т[\s-]?фактур|книг[аи]|vat|invoice|book|register)/i.test(messageCorpus);
if (hasVatSignal) {
return "vat_document_register_book";
}
const hasCloseSignal = accounts.some((item) => isCloseCostsAccount(item)) ||
/(?:закрыти|месяц|затрат|распредел|списан|period\s*close|month\s*close|allocation|residual|cost|рбп)/i.test(messageCorpus);
if (hasCloseSignal) {
return "month_close_costs_20_44";
}
const hasFixedAssetSignal = accounts.some((item) => isFixedAssetAccount(item)) ||
/(?:амортиз|основн(ые|ых|ым)?\s+средств|(?:^|[^a-zа-яё])ос(?:$|[^a-zа-яё])|объект[а-яё]*\s+ос|fixed\s*asset|depreciat)/i.test(messageCorpus);
if (hasFixedAssetSignal) {
return "fixed_asset_amortization";
}
return null;
}
function buildQuestionScopeId(input) {
const domainPart = String(input.domain ?? "").trim();
const periodPart = String(input.period ?? "").trim();
const accountPart = capStrings(input.accounts.map((item) => String(item ?? "").trim()).filter(Boolean), 4).join(",");
const subjectPart = String(input.subject ?? "").trim().slice(0, 96).toLowerCase();
const parts = [
domainPart ? `d:${domainPart}` : "",
periodPart ? `p:${periodPart}` : "",
accountPart ? `a:${accountPart}` : "",
subjectPart ? `s:${subjectPart}` : ""
].filter(Boolean);
if (parts.length === 0) {
return null;
}
return parts.join("|");
}
function deriveScopeOrigin(input) {
if (input.followupApplied) {
return "followup_state_carryover";
}
const hasExplicitPeriod = Boolean(detectPeriod(input.userMessage));
const hasExplicitAccounts = detectAccounts(input.userMessage).length > 0;
const explicitDomain = detectExplicitDomainHint(input.userMessage);
if (hasExplicitPeriod || hasExplicitAccounts || explicitDomain) {
return "explicit_from_message";
}
const routeDomain = deriveDomain(input.routeSummary);
if (routeDomain && routeDomain !== "no_route") {
return "route_derived";
}
return "underspecified";
}
function deriveDomain(routeSummary) {
if (!routeSummary)
return null;
if (routeSummary.mode === "legacy_v1") {
return routeSummary.route_hint;
}
const routes = routeSummary.decisions.map((item) => item.route).filter((route) => route !== "no_route");
const uniqueRoutes = uniqueStrings(routes);
if (uniqueRoutes.length === 0) {
return "no_route";
}
return uniqueRoutes.join(",");
}
function deriveNarrowingStatus(routeSummary, coverageReport) {
if (!routeSummary) {
return "unknown";
}
if (routeSummary.mode === "legacy_v1") {
return "not_needed";
}
if (routeSummary.fallback.type === "clarification" || coverageReport.clarification_needed_for.length > 0) {
return "needs_clarification";
}
const hasNoRoute = routeSummary.decisions.some((item) => item.route === "no_route");
if (hasNoRoute) {
return "broad_guarded";
}
return routeSummary.decisions.length > 1 ? "applied" : "not_needed";
}
function deriveQueryModeHint(routeSummary) {
if (!routeSummary) {
return "investigation_candidate";
}
if (routeSummary.mode === "legacy_v1") {
return "direct_answer";
}
return routeSummary.fallback.type === "none" ? "direct_answer" : "investigation_candidate";
}
function collectEvidenceRefs(retrievalResults) {
const refs = retrievalResults.flatMap((result) => result.evidence.map((item) => item.evidence_id));
return capStrings(refs, stage1Contracts_1.INVESTIGATION_MAX_EVIDENCE_REFS);
}
function collectOpenUncertainties(coverageReport, retrievalResults) {
const requirementNotes = [
...coverageReport.requirements_uncovered.map((item) => `uncovered:${item}`),
...coverageReport.requirements_partially_covered.map((item) => `partial:${item}`),
...coverageReport.clarification_needed_for.map((item) => `clarify:${item}`),
...coverageReport.out_of_scope_requirements.map((item) => `out_of_scope:${item}`)
];
const limitationNotes = retrievalResults.flatMap((result) => result.limitations).slice(0, 6);
return capStrings([...requirementNotes, ...limitationNotes], stage1Contracts_1.INVESTIGATION_MAX_UNCERTAINTIES);
}
function normalizeAccountPrefix(value) {
const account = String(value ?? "").trim();
if (!account) {
return null;
}
const match = account.match(/^(\d{2})/);
return match?.[1] ?? null;
}
function isSettlementAccount(value) {
const prefix = normalizeAccountPrefix(value);
return prefix === "60" || prefix === "62" || prefix === "51" || prefix === "76";
}
function isVatAccount(value) {
const prefix = normalizeAccountPrefix(value);
return prefix === "19" || prefix === "68";
}
function isFixedAssetAccount(value) {
const prefix = normalizeAccountPrefix(value);
return prefix === "01" || prefix === "02" || prefix === "08";
}
function isCloseCostsAccount(value) {
const prefix = normalizeAccountPrefix(value);
if (!prefix) {
return false;
}
const account = Number(prefix);
return (account >= 20 && account <= 44) || prefix === "97";
}
function inferFollowupActiveDomain(input) {
const messageCorpus = String(input.userMessage ?? "").toLowerCase();
const contextualCorpus = input.allowStateCarryover
? `${messageCorpus} ${input.previous.focus.active_query_subject ?? ""}`.toLowerCase()
: messageCorpus;
const hasFixedAssetLexicalSignal = /(?:амортиз|основн(ые|ых|ым)?\s+средств|(?:^|[^a-zа-яё])ос(?:$|[^a-zа-яё])|объект[а-яё]*\s+ос|fixed\s*asset|depreciat)/i.test(messageCorpus);
const hasFixedAssetAccountSignal = input.focusAccounts.some((item) => isFixedAssetAccount(item)) &&
/(?:сч[её]т(?:а|у|ом|е)?\s*(?:01|02|08)|(?:01|02|08)(?:\.\d{2})?\s*\/\s*(?:01|02|08)(?:\.\d{2})?|\b0[128](?:\.\d{2})?\b)/i.test(messageCorpus);
if (hasFixedAssetLexicalSignal || hasFixedAssetAccountSignal) {
return "fixed_asset_amortization";
}
const hasSettlementSignal = input.focusAccounts.some((item) => isSettlementAccount(item)) ||
/(?:60(?:\.\d{2})?|62(?:\.\d{2})?|оплат|расч[её]т|зач[её]т|аванс|долг|поставщ|покупат|settlement|payment|supplier|customer)/i.test(messageCorpus);
if (hasSettlementSignal) {
return "settlements_60_62";
}
const hasVatSignal = input.focusAccounts.some((item) => isVatAccount(item)) ||
/(?:ндс|сч[её]т[\s-]?фактур|книг[аи]|vat|invoice|book|register)/i.test(messageCorpus);
if (hasVatSignal) {
return "vat_document_register_book";
}
const hasCloseSignal = input.focusAccounts.some((item) => isCloseCostsAccount(item)) ||
/(?:закрыти|месяц|затрат|распредел|списан|period\s*close|month\s*close|allocation|residual|cost)/i.test(messageCorpus);
if (hasCloseSignal) {
return "month_close_costs_20_44";
}
if (input.allowStateCarryover &&
/(?:60(?:\.\d{2})?|62(?:\.\d{2})?|оплат|расч[её]т|аванс|долг|settlement|payment)/i.test(contextualCorpus) &&
(input.previous.followup_context?.active_domain === "settlements_60_62" ||
input.previous.focus.domain === "settlements_60_62")) {
return "settlements_60_62";
}
const routeDomain = deriveDomain(input.routeSummary);
if (routeDomain && routeDomain !== "no_route") {
return routeDomain;
}
if (input.allowStateCarryover) {
return input.previous.followup_context?.active_domain ?? input.previous.focus.domain ?? null;
}
return null;
}
function collectUncoveredRequirementIds(coverageReport) {
return capStrings([
...coverageReport.requirements_uncovered,
...coverageReport.requirements_partially_covered,
...coverageReport.clarification_needed_for,
...coverageReport.out_of_scope_requirements
], stage1Contracts_1.INVESTIGATION_MAX_REQUIREMENT_LINKS);
}
function collectEvidenceSummary(retrievalResults) {
const lines = retrievalResults.map((result) => {
const requirementRef = result.requirement_ids[0] ?? result.fragment_id;
return `${requirementRef}:${result.status}:${result.route}`;
});
return capStrings(lines, 6);
}
function settlementFocusActions(activeDomain) {
if (activeDomain !== "settlements_60_62") {
return [];
}
return [
"Проверьте договор и объект расчетов по платежу.",
"Сверьте регистр расчетов и привязку платежа к закрывающему документу.",
"Проверьте зачет аванса или взаимозачет по связке 60/62."
];
}
function normalizeEntityBacklinks(values) {
const result = [];
const seen = new Set();
for (const item of values) {
const entity = String(item.entity ?? "").trim();
const id = String(item.id ?? "").trim();
if (!entity || !id) {
continue;
}
const key = `${entity}::${id}`;
if (seen.has(key)) {
continue;
}
seen.add(key);
result.push({
entity,
id
});
}
return result;
}
function collectProblemUnits(retrievalResults) {
return retrievalResults.flatMap((result) => result.problem_units ?? []);
}
function capProblemUnitState(state) {
return {
active_problem_units: capStrings(state.active_problem_units, stage2ProblemUnits_1.INVESTIGATION_MAX_ACTIVE_PROBLEM_UNITS),
resolved_problem_units: capStrings(state.resolved_problem_units, stage2ProblemUnits_1.INVESTIGATION_MAX_RESOLVED_PROBLEM_UNITS),
problem_unit_backlinks: state.problem_unit_backlinks
.map((item) => ({
problem_unit_id: String(item.problem_unit_id ?? "").trim(),
entity_backlinks: normalizeEntityBacklinks(item.entity_backlinks ?? [])
}))
.filter((item) => Boolean(item.problem_unit_id) && item.entity_backlinks.length > 0)
.slice(0, stage2ProblemUnits_1.INVESTIGATION_MAX_PROBLEM_UNIT_BACKLINKS),
focus_problem_types: capStrings(state.focus_problem_types.map((item) => String(item)), stage2ProblemUnits_1.INVESTIGATION_MAX_FOCUS_PROBLEM_TYPES)
};
}
function updateProblemUnitState(previous, retrievalResults) {
const previousState = previous.problem_unit_state;
const currentProblemUnits = collectProblemUnits(retrievalResults);
const currentIds = capStrings(currentProblemUnits.map((item) => String(item.problem_unit_id ?? "")), stage2ProblemUnits_1.INVESTIGATION_MAX_ACTIVE_PROBLEM_UNITS);
const currentTypes = capStrings(currentProblemUnits.map((item) => String(item.problem_unit_type ?? "")), stage2ProblemUnits_1.INVESTIGATION_MAX_FOCUS_PROBLEM_TYPES);
const currentBacklinksRaw = currentProblemUnits
.filter((item) => currentIds.includes(item.problem_unit_id))
.map((item) => ({
problem_unit_id: item.problem_unit_id,
entity_backlinks: normalizeEntityBacklinks(item.entity_backlinks ?? [])
}))
.filter((item) => item.entity_backlinks.length > 0);
const currentBacklinksById = new Map(currentBacklinksRaw.map((item) => [item.problem_unit_id, item.entity_backlinks]));
const previousBacklinksById = new Map((previousState?.problem_unit_backlinks ?? []).map((item) => [item.problem_unit_id, item.entity_backlinks]));
const active_problem_units = currentIds.length > 0
? currentIds
: capStrings(previousState?.active_problem_units ?? [], stage2ProblemUnits_1.INVESTIGATION_MAX_ACTIVE_PROBLEM_UNITS);
const resolved_problem_units = currentIds.length > 0
? capStrings([
...(previousState?.active_problem_units ?? []).filter((item) => !currentIds.includes(item)),
...(previousState?.resolved_problem_units ?? [])
], stage2ProblemUnits_1.INVESTIGATION_MAX_RESOLVED_PROBLEM_UNITS)
: capStrings(previousState?.resolved_problem_units ?? [], stage2ProblemUnits_1.INVESTIGATION_MAX_RESOLVED_PROBLEM_UNITS);
const problem_unit_backlinks = active_problem_units
.map((problemUnitId) => {
const entity_backlinks = normalizeEntityBacklinks(currentBacklinksById.get(problemUnitId) ?? previousBacklinksById.get(problemUnitId) ?? []);
if (entity_backlinks.length === 0) {
return null;
}
return {
problem_unit_id: problemUnitId,
entity_backlinks
};
})
.filter((item) => item !== null)
.slice(0, stage2ProblemUnits_1.INVESTIGATION_MAX_PROBLEM_UNIT_BACKLINKS);
const focus_problem_types = currentTypes.length > 0
? currentTypes
: capStrings((previousState?.focus_problem_types ?? []).map((item) => String(item)), stage2ProblemUnits_1.INVESTIGATION_MAX_FOCUS_PROBLEM_TYPES);
const nextState = capProblemUnitState({
active_problem_units,
resolved_problem_units,
problem_unit_backlinks,
focus_problem_types
});
if (nextState.active_problem_units.length === 0 &&
nextState.resolved_problem_units.length === 0 &&
nextState.problem_unit_backlinks.length === 0 &&
nextState.focus_problem_types.length === 0) {
return undefined;
}
return nextState;
}
function cloneInvestigationState(state) {
if (!state)
return null;
const cloned = {
...state,
focus: {
...state.focus,
primary_accounts: [...state.focus.primary_accounts]
},
evidence_refs: [...state.evidence_refs],
open_uncertainties: [...state.open_uncertainties],
followup_context: state.followup_context
? {
...state.followup_context,
referenced_requirement_ids: [...state.followup_context.referenced_requirement_ids],
...(state.followup_context.active_requirement_ids
? {
active_requirement_ids: [...state.followup_context.active_requirement_ids]
}
: {}),
...(state.followup_context.uncovered_requirement_ids
? {
uncovered_requirement_ids: [...state.followup_context.uncovered_requirement_ids]
}
: {}),
...(state.followup_context.settlement_next_actions
? {
settlement_next_actions: [...state.followup_context.settlement_next_actions]
}
: {}),
...(state.followup_context.evidence_summary
? {
evidence_summary: [...state.followup_context.evidence_summary]
}
: {})
}
: null
};
if (state.problem_unit_state) {
cloned.problem_unit_state = capProblemUnitState({
active_problem_units: [...state.problem_unit_state.active_problem_units],
resolved_problem_units: [...state.problem_unit_state.resolved_problem_units],
problem_unit_backlinks: state.problem_unit_state.problem_unit_backlinks.map((item) => ({
problem_unit_id: item.problem_unit_id,
entity_backlinks: [...item.entity_backlinks]
})),
focus_problem_types: [...state.problem_unit_state.focus_problem_types]
});
}
return cloned;
}
function createEmptyInvestigationState(sessionId, timestamp = new Date().toISOString()) {
return {
schema_version: stage1Contracts_1.INVESTIGATION_STATE_SCHEMA_VERSION,
session_id: sessionId,
status: "idle",
turn_index: 0,
updated_at: timestamp,
question_id: null,
question_scope_id: null,
scope_origin: null,
focus: {
domain: null,
period: null,
primary_accounts: [],
active_query_subject: null
},
narrowing_status: "unknown",
evidence_refs: [],
open_uncertainties: [],
last_answer_mode: null,
followup_context: null,
query_mode_hint: "direct_answer"
};
}
function updateInvestigationState(input) {
const previous = input.previous;
const followupApplied = input.followupApplied === true;
const focusFromMessage = capStrings(detectAccounts(input.userMessage), stage1Contracts_1.INVESTIGATION_MAX_PRIMARY_ACCOUNTS);
const mergedFocusAccounts = followupApplied
? capStrings([...focusFromMessage, ...previous.focus.primary_accounts], stage1Contracts_1.INVESTIGATION_MAX_PRIMARY_ACCOUNTS)
: capStrings(focusFromMessage, stage1Contracts_1.INVESTIGATION_MAX_PRIMARY_ACCOUNTS);
const requirementIds = capStrings(input.requirements.map((item) => item.requirement_id), stage1Contracts_1.INVESTIGATION_MAX_REQUIREMENT_LINKS);
const mainRequirement = input.requirements[0]?.requirement_text ?? input.userMessage;
const problemUnitState = updateProblemUnitState(previous, input.retrievalResults);
const uncoveredRequirementIds = collectUncoveredRequirementIds(input.coverageReport);
const routeDomain = deriveDomain(input.routeSummary);
const activeDomain = inferFollowupActiveDomain({
userMessage: input.userMessage,
focusAccounts: focusFromMessage,
routeSummary: input.routeSummary,
previous,
allowStateCarryover: followupApplied
});
const focusDomain = activeDomain ?? routeDomain ?? (followupApplied ? previous.focus.domain : null);
const detectedPeriod = detectPeriod(input.userMessage);
const focusPeriod = detectedPeriod ?? (followupApplied ? previous.focus.period : null);
const settlementNextActions = settlementFocusActions(activeDomain);
const lastProblemUnitId = problemUnitState?.active_problem_units[0] ?? null;
const evidenceSummary = collectEvidenceSummary(input.retrievalResults);
const scopeOrigin = deriveScopeOrigin({
followupApplied,
userMessage: input.userMessage,
routeSummary: input.routeSummary
});
const questionScopeId = buildQuestionScopeId({
domain: focusDomain,
period: focusPeriod,
accounts: mergedFocusAccounts,
subject: mainRequirement
});
return {
schema_version: stage1Contracts_1.INVESTIGATION_STATE_SCHEMA_VERSION,
session_id: previous.session_id,
status: "active",
turn_index: previous.turn_index + 1,
updated_at: input.timestamp,
question_id: input.questionId,
question_scope_id: questionScopeId,
scope_origin: scopeOrigin,
focus: {
domain: focusDomain,
period: focusPeriod,
primary_accounts: mergedFocusAccounts,
active_query_subject: mainRequirement.slice(0, 180)
},
narrowing_status: deriveNarrowingStatus(input.routeSummary, input.coverageReport),
evidence_refs: capStrings([...collectEvidenceRefs(input.retrievalResults), ...previous.evidence_refs], stage1Contracts_1.INVESTIGATION_MAX_EVIDENCE_REFS),
open_uncertainties: collectOpenUncertainties(input.coverageReport, input.retrievalResults),
last_answer_mode: input.replyType,
followup_context: {
previous_question_id: previous.question_id,
last_user_message: input.userMessage.slice(0, 240),
referenced_requirement_ids: requirementIds,
active_domain: activeDomain,
active_requirement_ids: requirementIds,
uncovered_requirement_ids: uncoveredRequirementIds,
last_problem_unit_id: lastProblemUnitId,
settlement_next_actions: settlementNextActions,
evidence_summary: evidenceSummary,
question_scope_id: questionScopeId,
scope_origin: scopeOrigin
},
query_mode_hint: deriveQueryModeHint(input.routeSummary),
...(problemUnitState
? {
problem_unit_state: problemUnitState
}
: {})
};
}