NODEDC_1C/llm_normalizer/backend/src/services/investigationState.ts

530 lines
19 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import type {
AssistantRequirement,
RequirementCoverageReport,
UnifiedRetrievalResult
} from "../types/assistant";
import type { RouteHintSummary } from "../types/normalizer";
import type {
InvestigationLastAnswerMode,
InvestigationNarrowingStatus,
InvestigationState
} from "../types/stage1Contracts";
import {
INVESTIGATION_MAX_EVIDENCE_REFS,
INVESTIGATION_MAX_PRIMARY_ACCOUNTS,
INVESTIGATION_MAX_REQUIREMENT_LINKS,
INVESTIGATION_MAX_UNCERTAINTIES,
INVESTIGATION_STATE_SCHEMA_VERSION
} from "../types/stage1Contracts";
import type {
InvestigationProblemUnitState,
InvestigationStateWithProblemUnits,
ProblemUnit,
ProblemUnitEntityBacklink
} from "../types/stage2ProblemUnits";
import {
INVESTIGATION_MAX_ACTIVE_PROBLEM_UNITS,
INVESTIGATION_MAX_FOCUS_PROBLEM_TYPES,
INVESTIGATION_MAX_PROBLEM_UNIT_BACKLINKS,
INVESTIGATION_MAX_RESOLVED_PROBLEM_UNITS
} from "../types/stage2ProblemUnits";
interface UpdateInvestigationStateInput {
previous: InvestigationStateWithProblemUnits;
timestamp: string;
questionId: string;
userMessage: string;
routeSummary: RouteHintSummary | null;
requirements: AssistantRequirement[];
coverageReport: RequirementCoverageReport;
retrievalResults: UnifiedRetrievalResult[];
replyType: InvestigationLastAnswerMode;
}
function uniqueStrings(values: string[]): string[] {
return Array.from(new Set(values.map((item) => item.trim()).filter(Boolean)));
}
function capStrings(values: string[], max: number): string[] {
return uniqueStrings(values).slice(0, max);
}
function detectAccounts(text: string): string[] {
return capStrings(text.match(/\b\d{2}(?:\.\d{2})?\b/g) ?? [], INVESTIGATION_MAX_PRIMARY_ACCOUNTS);
}
function detectPeriod(text: string): string | null {
const monthly = text.match(/\b(20\d{2})[-/.](0[1-9]|1[0-2])\b/);
if (monthly) return `${monthly[1]}-${monthly[2]}`;
const yearly = text.match(/\b(20\d{2})\b/);
if (yearly) return yearly[1];
return null;
}
function deriveDomain(routeSummary: RouteHintSummary | null): string | null {
if (!routeSummary) return null;
if (routeSummary.mode === "legacy_v1") {
return routeSummary.route_hint;
}
const routes = routeSummary.decisions.map((item) => item.route).filter((route) => route !== "no_route");
const uniqueRoutes = uniqueStrings(routes);
if (uniqueRoutes.length === 0) {
return "no_route";
}
return uniqueRoutes.join(",");
}
function deriveNarrowingStatus(
routeSummary: RouteHintSummary | null,
coverageReport: RequirementCoverageReport
): InvestigationNarrowingStatus {
if (!routeSummary) {
return "unknown";
}
if (routeSummary.mode === "legacy_v1") {
return "not_needed";
}
if (routeSummary.fallback.type === "clarification" || coverageReport.clarification_needed_for.length > 0) {
return "needs_clarification";
}
const hasNoRoute = routeSummary.decisions.some((item) => item.route === "no_route");
if (hasNoRoute) {
return "broad_guarded";
}
return routeSummary.decisions.length > 1 ? "applied" : "not_needed";
}
function deriveQueryModeHint(routeSummary: RouteHintSummary | null): InvestigationState["query_mode_hint"] {
if (!routeSummary) {
return "investigation_candidate";
}
if (routeSummary.mode === "legacy_v1") {
return "direct_answer";
}
return routeSummary.fallback.type === "none" ? "direct_answer" : "investigation_candidate";
}
function collectEvidenceRefs(retrievalResults: UnifiedRetrievalResult[]): string[] {
const refs = retrievalResults.flatMap((result) => result.evidence.map((item) => item.evidence_id));
return capStrings(refs, INVESTIGATION_MAX_EVIDENCE_REFS);
}
function collectOpenUncertainties(
coverageReport: RequirementCoverageReport,
retrievalResults: UnifiedRetrievalResult[]
): string[] {
const requirementNotes = [
...coverageReport.requirements_uncovered.map((item) => `uncovered:${item}`),
...coverageReport.requirements_partially_covered.map((item) => `partial:${item}`),
...coverageReport.clarification_needed_for.map((item) => `clarify:${item}`),
...coverageReport.out_of_scope_requirements.map((item) => `out_of_scope:${item}`)
];
const limitationNotes = retrievalResults.flatMap((result) => result.limitations).slice(0, 6);
return capStrings([...requirementNotes, ...limitationNotes], INVESTIGATION_MAX_UNCERTAINTIES);
}
function normalizeAccountPrefix(value: string): string | null {
const account = String(value ?? "").trim();
if (!account) {
return null;
}
const match = account.match(/^(\d{2})/);
return match?.[1] ?? null;
}
function isSettlementAccount(value: string): boolean {
const prefix = normalizeAccountPrefix(value);
return prefix === "60" || prefix === "62" || prefix === "51" || prefix === "76";
}
function isVatAccount(value: string): boolean {
const prefix = normalizeAccountPrefix(value);
return prefix === "19" || prefix === "68";
}
function isFixedAssetAccount(value: string): boolean {
const prefix = normalizeAccountPrefix(value);
return prefix === "01" || prefix === "02" || prefix === "08";
}
function isCloseCostsAccount(value: string): boolean {
const prefix = normalizeAccountPrefix(value);
if (!prefix) {
return false;
}
const account = Number(prefix);
return (account >= 20 && account <= 44) || prefix === "97";
}
function inferFollowupActiveDomain(input: {
userMessage: string;
focusAccounts: string[];
routeSummary: RouteHintSummary | null;
previous: InvestigationStateWithProblemUnits;
}): string | null {
const messageCorpus = String(input.userMessage ?? "").toLowerCase();
const contextualCorpus = `${messageCorpus} ${input.previous.focus.active_query_subject ?? ""}`.toLowerCase();
const hasFixedAssetLexicalSignal =
/(?:амортиз|основн(ые|ых|ым)?\s+средств|(?:^|[^a-zа-яё])ос(?:$|[^a-zа-яё])|объект[а-яё]*\s+ос|fixed\s*asset|depreciat)/i.test(
messageCorpus
);
const hasFixedAssetAccountSignal =
input.focusAccounts.some((item) => isFixedAssetAccount(item)) &&
/(?:сч[её]т(?:а|у|ом|е)?\s*(?:01|02|08)|(?:01|02|08)(?:\.\d{2})?\s*\/\s*(?:01|02|08)(?:\.\d{2})?|\b0[128](?:\.\d{2})?\b)/i.test(
messageCorpus
);
if (hasFixedAssetLexicalSignal || hasFixedAssetAccountSignal) {
return "fixed_asset_amortization";
}
const hasSettlementSignal =
input.focusAccounts.some((item) => isSettlementAccount(item)) ||
/(?:60(?:\.\d{2})?|62(?:\.\d{2})?|оплат|расч[её]т|зач[её]т|аванс|долг|поставщ|покупат|settlement|payment|supplier|customer)/i.test(
messageCorpus
);
if (hasSettlementSignal) {
return "settlements_60_62";
}
const hasVatSignal =
input.focusAccounts.some((item) => isVatAccount(item)) ||
/(?:ндс|сч[её]т[\s-]?фактур|книг[аи]|vat|invoice|book|register)/i.test(messageCorpus);
if (hasVatSignal) {
return "vat_document_register_book";
}
const hasCloseSignal =
input.focusAccounts.some((item) => isCloseCostsAccount(item)) ||
/(?:закрыти|месяц|затрат|распредел|списан|period\s*close|month\s*close|allocation|residual|cost)/i.test(messageCorpus);
if (hasCloseSignal) {
return "month_close_costs_20_44";
}
if (
/(?:60(?:\.\d{2})?|62(?:\.\d{2})?|оплат|расч[её]т|аванс|долг|settlement|payment)/i.test(contextualCorpus) &&
(input.previous.followup_context?.active_domain === "settlements_60_62" ||
input.previous.focus.domain === "settlements_60_62")
) {
return "settlements_60_62";
}
const routeDomain = deriveDomain(input.routeSummary);
if (routeDomain && routeDomain !== "no_route") {
return routeDomain;
}
return input.previous.followup_context?.active_domain ?? input.previous.focus.domain ?? null;
}
function collectUncoveredRequirementIds(coverageReport: RequirementCoverageReport): string[] {
return capStrings(
[
...coverageReport.requirements_uncovered,
...coverageReport.requirements_partially_covered,
...coverageReport.clarification_needed_for,
...coverageReport.out_of_scope_requirements
],
INVESTIGATION_MAX_REQUIREMENT_LINKS
);
}
function collectEvidenceSummary(retrievalResults: UnifiedRetrievalResult[]): string[] {
const lines = retrievalResults.map((result) => {
const requirementRef = result.requirement_ids[0] ?? result.fragment_id;
return `${requirementRef}:${result.status}:${result.route}`;
});
return capStrings(lines, 6);
}
function settlementFocusActions(activeDomain: string | null): string[] {
if (activeDomain !== "settlements_60_62") {
return [];
}
return [
"Проверьте договор и объект расчетов по платежу.",
"Сверьте регистр расчетов и привязку платежа к закрывающему документу.",
"Проверьте зачет аванса или взаимозачет по связке 60/62."
];
}
function normalizeEntityBacklinks(values: ProblemUnitEntityBacklink[]): ProblemUnitEntityBacklink[] {
const result: ProblemUnitEntityBacklink[] = [];
const seen = new Set<string>();
for (const item of values) {
const entity = String(item.entity ?? "").trim();
const id = String(item.id ?? "").trim();
if (!entity || !id) {
continue;
}
const key = `${entity}::${id}`;
if (seen.has(key)) {
continue;
}
seen.add(key);
result.push({
entity,
id
});
}
return result;
}
function collectProblemUnits(retrievalResults: UnifiedRetrievalResult[]): ProblemUnit[] {
return retrievalResults.flatMap((result) => result.problem_units ?? []);
}
function capProblemUnitState(state: InvestigationProblemUnitState): InvestigationProblemUnitState {
return {
active_problem_units: capStrings(state.active_problem_units, INVESTIGATION_MAX_ACTIVE_PROBLEM_UNITS),
resolved_problem_units: capStrings(state.resolved_problem_units, INVESTIGATION_MAX_RESOLVED_PROBLEM_UNITS),
problem_unit_backlinks: state.problem_unit_backlinks
.map((item) => ({
problem_unit_id: String(item.problem_unit_id ?? "").trim(),
entity_backlinks: normalizeEntityBacklinks(item.entity_backlinks ?? [])
}))
.filter((item) => Boolean(item.problem_unit_id) && item.entity_backlinks.length > 0)
.slice(0, INVESTIGATION_MAX_PROBLEM_UNIT_BACKLINKS),
focus_problem_types: capStrings(
state.focus_problem_types.map((item) => String(item)),
INVESTIGATION_MAX_FOCUS_PROBLEM_TYPES
) as InvestigationProblemUnitState["focus_problem_types"]
};
}
function updateProblemUnitState(
previous: InvestigationStateWithProblemUnits,
retrievalResults: UnifiedRetrievalResult[]
): InvestigationProblemUnitState | undefined {
const previousState = previous.problem_unit_state;
const currentProblemUnits = collectProblemUnits(retrievalResults);
const currentIds = capStrings(
currentProblemUnits.map((item) => String(item.problem_unit_id ?? "")),
INVESTIGATION_MAX_ACTIVE_PROBLEM_UNITS
);
const currentTypes = capStrings(
currentProblemUnits.map((item) => String(item.problem_unit_type ?? "")),
INVESTIGATION_MAX_FOCUS_PROBLEM_TYPES
) as InvestigationProblemUnitState["focus_problem_types"];
const currentBacklinksRaw = currentProblemUnits
.filter((item) => currentIds.includes(item.problem_unit_id))
.map((item) => ({
problem_unit_id: item.problem_unit_id,
entity_backlinks: normalizeEntityBacklinks(item.entity_backlinks ?? [])
}))
.filter((item) => item.entity_backlinks.length > 0);
const currentBacklinksById = new Map(
currentBacklinksRaw.map((item) => [item.problem_unit_id, item.entity_backlinks] as const)
);
const previousBacklinksById = new Map(
(previousState?.problem_unit_backlinks ?? []).map((item) => [item.problem_unit_id, item.entity_backlinks] as const)
);
const active_problem_units =
currentIds.length > 0
? currentIds
: capStrings(previousState?.active_problem_units ?? [], INVESTIGATION_MAX_ACTIVE_PROBLEM_UNITS);
const resolved_problem_units =
currentIds.length > 0
? capStrings(
[
...(previousState?.active_problem_units ?? []).filter((item) => !currentIds.includes(item)),
...(previousState?.resolved_problem_units ?? [])
],
INVESTIGATION_MAX_RESOLVED_PROBLEM_UNITS
)
: capStrings(previousState?.resolved_problem_units ?? [], INVESTIGATION_MAX_RESOLVED_PROBLEM_UNITS);
const problem_unit_backlinks = active_problem_units
.map((problemUnitId) => {
const entity_backlinks = normalizeEntityBacklinks(
currentBacklinksById.get(problemUnitId) ?? previousBacklinksById.get(problemUnitId) ?? []
);
if (entity_backlinks.length === 0) {
return null;
}
return {
problem_unit_id: problemUnitId,
entity_backlinks
};
})
.filter((item): item is NonNullable<typeof item> => item !== null)
.slice(0, INVESTIGATION_MAX_PROBLEM_UNIT_BACKLINKS);
const focus_problem_types =
currentTypes.length > 0
? currentTypes
: capStrings(
(previousState?.focus_problem_types ?? []).map((item) => String(item)),
INVESTIGATION_MAX_FOCUS_PROBLEM_TYPES
) as InvestigationProblemUnitState["focus_problem_types"];
const nextState = capProblemUnitState({
active_problem_units,
resolved_problem_units,
problem_unit_backlinks,
focus_problem_types
});
if (
nextState.active_problem_units.length === 0 &&
nextState.resolved_problem_units.length === 0 &&
nextState.problem_unit_backlinks.length === 0 &&
nextState.focus_problem_types.length === 0
) {
return undefined;
}
return nextState;
}
export function cloneInvestigationState(state: InvestigationStateWithProblemUnits | null): InvestigationStateWithProblemUnits | null {
if (!state) return null;
const cloned: InvestigationStateWithProblemUnits = {
...state,
focus: {
...state.focus,
primary_accounts: [...state.focus.primary_accounts]
},
evidence_refs: [...state.evidence_refs],
open_uncertainties: [...state.open_uncertainties],
followup_context: state.followup_context
? {
...state.followup_context,
referenced_requirement_ids: [...state.followup_context.referenced_requirement_ids],
...(state.followup_context.active_requirement_ids
? {
active_requirement_ids: [...state.followup_context.active_requirement_ids]
}
: {}),
...(state.followup_context.uncovered_requirement_ids
? {
uncovered_requirement_ids: [...state.followup_context.uncovered_requirement_ids]
}
: {}),
...(state.followup_context.settlement_next_actions
? {
settlement_next_actions: [...state.followup_context.settlement_next_actions]
}
: {}),
...(state.followup_context.evidence_summary
? {
evidence_summary: [...state.followup_context.evidence_summary]
}
: {})
}
: null
};
if (state.problem_unit_state) {
cloned.problem_unit_state = capProblemUnitState({
active_problem_units: [...state.problem_unit_state.active_problem_units],
resolved_problem_units: [...state.problem_unit_state.resolved_problem_units],
problem_unit_backlinks: state.problem_unit_state.problem_unit_backlinks.map((item) => ({
problem_unit_id: item.problem_unit_id,
entity_backlinks: [...item.entity_backlinks]
})),
focus_problem_types: [...state.problem_unit_state.focus_problem_types]
});
}
return cloned;
}
export function createEmptyInvestigationState(
sessionId: string,
timestamp = new Date().toISOString()
): InvestigationStateWithProblemUnits {
return {
schema_version: INVESTIGATION_STATE_SCHEMA_VERSION,
session_id: sessionId,
status: "idle",
turn_index: 0,
updated_at: timestamp,
question_id: null,
focus: {
domain: null,
period: null,
primary_accounts: [],
active_query_subject: null
},
narrowing_status: "unknown",
evidence_refs: [],
open_uncertainties: [],
last_answer_mode: null,
followup_context: null,
query_mode_hint: "direct_answer"
};
}
export function updateInvestigationState(input: UpdateInvestigationStateInput): InvestigationStateWithProblemUnits {
const previous = input.previous;
const focusFromMessage = capStrings(detectAccounts(input.userMessage), INVESTIGATION_MAX_PRIMARY_ACCOUNTS);
const mergedFocusAccounts = capStrings(
[...focusFromMessage, ...previous.focus.primary_accounts],
INVESTIGATION_MAX_PRIMARY_ACCOUNTS
);
const requirementIds = capStrings(
input.requirements.map((item) => item.requirement_id),
INVESTIGATION_MAX_REQUIREMENT_LINKS
);
const mainRequirement = input.requirements[0]?.requirement_text ?? input.userMessage;
const problemUnitState = updateProblemUnitState(previous, input.retrievalResults);
const uncoveredRequirementIds = collectUncoveredRequirementIds(input.coverageReport);
const activeDomain = inferFollowupActiveDomain({
userMessage: input.userMessage,
focusAccounts: focusFromMessage,
routeSummary: input.routeSummary,
previous
});
const focusDomain = activeDomain ?? deriveDomain(input.routeSummary) ?? previous.focus.domain;
const settlementNextActions = settlementFocusActions(activeDomain);
const lastProblemUnitId = problemUnitState?.active_problem_units[0] ?? null;
const evidenceSummary = collectEvidenceSummary(input.retrievalResults);
return {
schema_version: INVESTIGATION_STATE_SCHEMA_VERSION,
session_id: previous.session_id,
status: "active",
turn_index: previous.turn_index + 1,
updated_at: input.timestamp,
question_id: input.questionId,
focus: {
domain: focusDomain,
period: detectPeriod(input.userMessage) ?? previous.focus.period,
primary_accounts: mergedFocusAccounts,
active_query_subject: mainRequirement.slice(0, 180)
},
narrowing_status: deriveNarrowingStatus(input.routeSummary, input.coverageReport),
evidence_refs: capStrings(
[...collectEvidenceRefs(input.retrievalResults), ...previous.evidence_refs],
INVESTIGATION_MAX_EVIDENCE_REFS
),
open_uncertainties: collectOpenUncertainties(input.coverageReport, input.retrievalResults),
last_answer_mode: input.replyType,
followup_context: {
previous_question_id: previous.question_id,
last_user_message: input.userMessage.slice(0, 240),
referenced_requirement_ids: requirementIds,
active_domain: activeDomain,
active_requirement_ids: requirementIds,
uncovered_requirement_ids: uncoveredRequirementIds,
last_problem_unit_id: lastProblemUnitId,
settlement_next_actions: settlementNextActions,
evidence_summary: evidenceSummary
},
query_mode_hint: deriveQueryModeHint(input.routeSummary),
...(problemUnitState
? {
problem_unit_state: problemUnitState
}
: {})
};
}