530 lines
19 KiB
TypeScript
530 lines
19 KiB
TypeScript
import type {
|
||
AssistantRequirement,
|
||
RequirementCoverageReport,
|
||
UnifiedRetrievalResult
|
||
} from "../types/assistant";
|
||
import type { RouteHintSummary } from "../types/normalizer";
|
||
import type {
|
||
InvestigationLastAnswerMode,
|
||
InvestigationNarrowingStatus,
|
||
InvestigationState
|
||
} from "../types/stage1Contracts";
|
||
import {
|
||
INVESTIGATION_MAX_EVIDENCE_REFS,
|
||
INVESTIGATION_MAX_PRIMARY_ACCOUNTS,
|
||
INVESTIGATION_MAX_REQUIREMENT_LINKS,
|
||
INVESTIGATION_MAX_UNCERTAINTIES,
|
||
INVESTIGATION_STATE_SCHEMA_VERSION
|
||
} from "../types/stage1Contracts";
|
||
import type {
|
||
InvestigationProblemUnitState,
|
||
InvestigationStateWithProblemUnits,
|
||
ProblemUnit,
|
||
ProblemUnitEntityBacklink
|
||
} from "../types/stage2ProblemUnits";
|
||
import {
|
||
INVESTIGATION_MAX_ACTIVE_PROBLEM_UNITS,
|
||
INVESTIGATION_MAX_FOCUS_PROBLEM_TYPES,
|
||
INVESTIGATION_MAX_PROBLEM_UNIT_BACKLINKS,
|
||
INVESTIGATION_MAX_RESOLVED_PROBLEM_UNITS
|
||
} from "../types/stage2ProblemUnits";
|
||
|
||
interface UpdateInvestigationStateInput {
|
||
previous: InvestigationStateWithProblemUnits;
|
||
timestamp: string;
|
||
questionId: string;
|
||
userMessage: string;
|
||
routeSummary: RouteHintSummary | null;
|
||
requirements: AssistantRequirement[];
|
||
coverageReport: RequirementCoverageReport;
|
||
retrievalResults: UnifiedRetrievalResult[];
|
||
replyType: InvestigationLastAnswerMode;
|
||
}
|
||
|
||
function uniqueStrings(values: string[]): string[] {
|
||
return Array.from(new Set(values.map((item) => item.trim()).filter(Boolean)));
|
||
}
|
||
|
||
function capStrings(values: string[], max: number): string[] {
|
||
return uniqueStrings(values).slice(0, max);
|
||
}
|
||
|
||
function detectAccounts(text: string): string[] {
|
||
return capStrings(text.match(/\b\d{2}(?:\.\d{2})?\b/g) ?? [], INVESTIGATION_MAX_PRIMARY_ACCOUNTS);
|
||
}
|
||
|
||
function detectPeriod(text: string): string | null {
|
||
const monthly = text.match(/\b(20\d{2})[-/.](0[1-9]|1[0-2])\b/);
|
||
if (monthly) return `${monthly[1]}-${monthly[2]}`;
|
||
const yearly = text.match(/\b(20\d{2})\b/);
|
||
if (yearly) return yearly[1];
|
||
return null;
|
||
}
|
||
|
||
function deriveDomain(routeSummary: RouteHintSummary | null): string | null {
|
||
if (!routeSummary) return null;
|
||
if (routeSummary.mode === "legacy_v1") {
|
||
return routeSummary.route_hint;
|
||
}
|
||
const routes = routeSummary.decisions.map((item) => item.route).filter((route) => route !== "no_route");
|
||
const uniqueRoutes = uniqueStrings(routes);
|
||
if (uniqueRoutes.length === 0) {
|
||
return "no_route";
|
||
}
|
||
return uniqueRoutes.join(",");
|
||
}
|
||
|
||
function deriveNarrowingStatus(
|
||
routeSummary: RouteHintSummary | null,
|
||
coverageReport: RequirementCoverageReport
|
||
): InvestigationNarrowingStatus {
|
||
if (!routeSummary) {
|
||
return "unknown";
|
||
}
|
||
|
||
if (routeSummary.mode === "legacy_v1") {
|
||
return "not_needed";
|
||
}
|
||
|
||
if (routeSummary.fallback.type === "clarification" || coverageReport.clarification_needed_for.length > 0) {
|
||
return "needs_clarification";
|
||
}
|
||
|
||
const hasNoRoute = routeSummary.decisions.some((item) => item.route === "no_route");
|
||
if (hasNoRoute) {
|
||
return "broad_guarded";
|
||
}
|
||
|
||
return routeSummary.decisions.length > 1 ? "applied" : "not_needed";
|
||
}
|
||
|
||
function deriveQueryModeHint(routeSummary: RouteHintSummary | null): InvestigationState["query_mode_hint"] {
|
||
if (!routeSummary) {
|
||
return "investigation_candidate";
|
||
}
|
||
if (routeSummary.mode === "legacy_v1") {
|
||
return "direct_answer";
|
||
}
|
||
return routeSummary.fallback.type === "none" ? "direct_answer" : "investigation_candidate";
|
||
}
|
||
|
||
function collectEvidenceRefs(retrievalResults: UnifiedRetrievalResult[]): string[] {
|
||
const refs = retrievalResults.flatMap((result) => result.evidence.map((item) => item.evidence_id));
|
||
return capStrings(refs, INVESTIGATION_MAX_EVIDENCE_REFS);
|
||
}
|
||
|
||
function collectOpenUncertainties(
|
||
coverageReport: RequirementCoverageReport,
|
||
retrievalResults: UnifiedRetrievalResult[]
|
||
): string[] {
|
||
const requirementNotes = [
|
||
...coverageReport.requirements_uncovered.map((item) => `uncovered:${item}`),
|
||
...coverageReport.requirements_partially_covered.map((item) => `partial:${item}`),
|
||
...coverageReport.clarification_needed_for.map((item) => `clarify:${item}`),
|
||
...coverageReport.out_of_scope_requirements.map((item) => `out_of_scope:${item}`)
|
||
];
|
||
const limitationNotes = retrievalResults.flatMap((result) => result.limitations).slice(0, 6);
|
||
return capStrings([...requirementNotes, ...limitationNotes], INVESTIGATION_MAX_UNCERTAINTIES);
|
||
}
|
||
|
||
function normalizeAccountPrefix(value: string): string | null {
|
||
const account = String(value ?? "").trim();
|
||
if (!account) {
|
||
return null;
|
||
}
|
||
const match = account.match(/^(\d{2})/);
|
||
return match?.[1] ?? null;
|
||
}
|
||
|
||
function isSettlementAccount(value: string): boolean {
|
||
const prefix = normalizeAccountPrefix(value);
|
||
return prefix === "60" || prefix === "62" || prefix === "51" || prefix === "76";
|
||
}
|
||
|
||
function isVatAccount(value: string): boolean {
|
||
const prefix = normalizeAccountPrefix(value);
|
||
return prefix === "19" || prefix === "68";
|
||
}
|
||
|
||
function isFixedAssetAccount(value: string): boolean {
|
||
const prefix = normalizeAccountPrefix(value);
|
||
return prefix === "01" || prefix === "02" || prefix === "08";
|
||
}
|
||
|
||
function isCloseCostsAccount(value: string): boolean {
|
||
const prefix = normalizeAccountPrefix(value);
|
||
if (!prefix) {
|
||
return false;
|
||
}
|
||
const account = Number(prefix);
|
||
return (account >= 20 && account <= 44) || prefix === "97";
|
||
}
|
||
|
||
function inferFollowupActiveDomain(input: {
|
||
userMessage: string;
|
||
focusAccounts: string[];
|
||
routeSummary: RouteHintSummary | null;
|
||
previous: InvestigationStateWithProblemUnits;
|
||
}): string | null {
|
||
const messageCorpus = String(input.userMessage ?? "").toLowerCase();
|
||
const contextualCorpus = `${messageCorpus} ${input.previous.focus.active_query_subject ?? ""}`.toLowerCase();
|
||
|
||
const hasFixedAssetLexicalSignal =
|
||
/(?:амортиз|основн(ые|ых|ым)?\s+средств|(?:^|[^a-zа-яё])ос(?:$|[^a-zа-яё])|объект[а-яё]*\s+ос|fixed\s*asset|depreciat)/i.test(
|
||
messageCorpus
|
||
);
|
||
const hasFixedAssetAccountSignal =
|
||
input.focusAccounts.some((item) => isFixedAssetAccount(item)) &&
|
||
/(?:сч[её]т(?:а|у|ом|е)?\s*(?:01|02|08)|(?:01|02|08)(?:\.\d{2})?\s*\/\s*(?:01|02|08)(?:\.\d{2})?|\b0[128](?:\.\d{2})?\b)/i.test(
|
||
messageCorpus
|
||
);
|
||
if (hasFixedAssetLexicalSignal || hasFixedAssetAccountSignal) {
|
||
return "fixed_asset_amortization";
|
||
}
|
||
|
||
const hasSettlementSignal =
|
||
input.focusAccounts.some((item) => isSettlementAccount(item)) ||
|
||
/(?:60(?:\.\d{2})?|62(?:\.\d{2})?|оплат|расч[её]т|зач[её]т|аванс|долг|поставщ|покупат|settlement|payment|supplier|customer)/i.test(
|
||
messageCorpus
|
||
);
|
||
if (hasSettlementSignal) {
|
||
return "settlements_60_62";
|
||
}
|
||
|
||
const hasVatSignal =
|
||
input.focusAccounts.some((item) => isVatAccount(item)) ||
|
||
/(?:ндс|сч[её]т[\s-]?фактур|книг[аи]|vat|invoice|book|register)/i.test(messageCorpus);
|
||
if (hasVatSignal) {
|
||
return "vat_document_register_book";
|
||
}
|
||
|
||
const hasCloseSignal =
|
||
input.focusAccounts.some((item) => isCloseCostsAccount(item)) ||
|
||
/(?:закрыти|месяц|затрат|распредел|списан|period\s*close|month\s*close|allocation|residual|cost)/i.test(messageCorpus);
|
||
if (hasCloseSignal) {
|
||
return "month_close_costs_20_44";
|
||
}
|
||
|
||
if (
|
||
/(?:60(?:\.\d{2})?|62(?:\.\d{2})?|оплат|расч[её]т|аванс|долг|settlement|payment)/i.test(contextualCorpus) &&
|
||
(input.previous.followup_context?.active_domain === "settlements_60_62" ||
|
||
input.previous.focus.domain === "settlements_60_62")
|
||
) {
|
||
return "settlements_60_62";
|
||
}
|
||
|
||
const routeDomain = deriveDomain(input.routeSummary);
|
||
if (routeDomain && routeDomain !== "no_route") {
|
||
return routeDomain;
|
||
}
|
||
|
||
return input.previous.followup_context?.active_domain ?? input.previous.focus.domain ?? null;
|
||
}
|
||
|
||
function collectUncoveredRequirementIds(coverageReport: RequirementCoverageReport): string[] {
|
||
return capStrings(
|
||
[
|
||
...coverageReport.requirements_uncovered,
|
||
...coverageReport.requirements_partially_covered,
|
||
...coverageReport.clarification_needed_for,
|
||
...coverageReport.out_of_scope_requirements
|
||
],
|
||
INVESTIGATION_MAX_REQUIREMENT_LINKS
|
||
);
|
||
}
|
||
|
||
function collectEvidenceSummary(retrievalResults: UnifiedRetrievalResult[]): string[] {
|
||
const lines = retrievalResults.map((result) => {
|
||
const requirementRef = result.requirement_ids[0] ?? result.fragment_id;
|
||
return `${requirementRef}:${result.status}:${result.route}`;
|
||
});
|
||
return capStrings(lines, 6);
|
||
}
|
||
|
||
function settlementFocusActions(activeDomain: string | null): string[] {
|
||
if (activeDomain !== "settlements_60_62") {
|
||
return [];
|
||
}
|
||
return [
|
||
"Проверьте договор и объект расчетов по платежу.",
|
||
"Сверьте регистр расчетов и привязку платежа к закрывающему документу.",
|
||
"Проверьте зачет аванса или взаимозачет по связке 60/62."
|
||
];
|
||
}
|
||
|
||
function normalizeEntityBacklinks(values: ProblemUnitEntityBacklink[]): ProblemUnitEntityBacklink[] {
|
||
const result: ProblemUnitEntityBacklink[] = [];
|
||
const seen = new Set<string>();
|
||
for (const item of values) {
|
||
const entity = String(item.entity ?? "").trim();
|
||
const id = String(item.id ?? "").trim();
|
||
if (!entity || !id) {
|
||
continue;
|
||
}
|
||
const key = `${entity}::${id}`;
|
||
if (seen.has(key)) {
|
||
continue;
|
||
}
|
||
seen.add(key);
|
||
result.push({
|
||
entity,
|
||
id
|
||
});
|
||
}
|
||
return result;
|
||
}
|
||
|
||
function collectProblemUnits(retrievalResults: UnifiedRetrievalResult[]): ProblemUnit[] {
|
||
return retrievalResults.flatMap((result) => result.problem_units ?? []);
|
||
}
|
||
|
||
function capProblemUnitState(state: InvestigationProblemUnitState): InvestigationProblemUnitState {
|
||
return {
|
||
active_problem_units: capStrings(state.active_problem_units, INVESTIGATION_MAX_ACTIVE_PROBLEM_UNITS),
|
||
resolved_problem_units: capStrings(state.resolved_problem_units, INVESTIGATION_MAX_RESOLVED_PROBLEM_UNITS),
|
||
problem_unit_backlinks: state.problem_unit_backlinks
|
||
.map((item) => ({
|
||
problem_unit_id: String(item.problem_unit_id ?? "").trim(),
|
||
entity_backlinks: normalizeEntityBacklinks(item.entity_backlinks ?? [])
|
||
}))
|
||
.filter((item) => Boolean(item.problem_unit_id) && item.entity_backlinks.length > 0)
|
||
.slice(0, INVESTIGATION_MAX_PROBLEM_UNIT_BACKLINKS),
|
||
focus_problem_types: capStrings(
|
||
state.focus_problem_types.map((item) => String(item)),
|
||
INVESTIGATION_MAX_FOCUS_PROBLEM_TYPES
|
||
) as InvestigationProblemUnitState["focus_problem_types"]
|
||
};
|
||
}
|
||
|
||
function updateProblemUnitState(
|
||
previous: InvestigationStateWithProblemUnits,
|
||
retrievalResults: UnifiedRetrievalResult[]
|
||
): InvestigationProblemUnitState | undefined {
|
||
const previousState = previous.problem_unit_state;
|
||
const currentProblemUnits = collectProblemUnits(retrievalResults);
|
||
const currentIds = capStrings(
|
||
currentProblemUnits.map((item) => String(item.problem_unit_id ?? "")),
|
||
INVESTIGATION_MAX_ACTIVE_PROBLEM_UNITS
|
||
);
|
||
const currentTypes = capStrings(
|
||
currentProblemUnits.map((item) => String(item.problem_unit_type ?? "")),
|
||
INVESTIGATION_MAX_FOCUS_PROBLEM_TYPES
|
||
) as InvestigationProblemUnitState["focus_problem_types"];
|
||
|
||
const currentBacklinksRaw = currentProblemUnits
|
||
.filter((item) => currentIds.includes(item.problem_unit_id))
|
||
.map((item) => ({
|
||
problem_unit_id: item.problem_unit_id,
|
||
entity_backlinks: normalizeEntityBacklinks(item.entity_backlinks ?? [])
|
||
}))
|
||
.filter((item) => item.entity_backlinks.length > 0);
|
||
|
||
const currentBacklinksById = new Map(
|
||
currentBacklinksRaw.map((item) => [item.problem_unit_id, item.entity_backlinks] as const)
|
||
);
|
||
const previousBacklinksById = new Map(
|
||
(previousState?.problem_unit_backlinks ?? []).map((item) => [item.problem_unit_id, item.entity_backlinks] as const)
|
||
);
|
||
|
||
const active_problem_units =
|
||
currentIds.length > 0
|
||
? currentIds
|
||
: capStrings(previousState?.active_problem_units ?? [], INVESTIGATION_MAX_ACTIVE_PROBLEM_UNITS);
|
||
|
||
const resolved_problem_units =
|
||
currentIds.length > 0
|
||
? capStrings(
|
||
[
|
||
...(previousState?.active_problem_units ?? []).filter((item) => !currentIds.includes(item)),
|
||
...(previousState?.resolved_problem_units ?? [])
|
||
],
|
||
INVESTIGATION_MAX_RESOLVED_PROBLEM_UNITS
|
||
)
|
||
: capStrings(previousState?.resolved_problem_units ?? [], INVESTIGATION_MAX_RESOLVED_PROBLEM_UNITS);
|
||
|
||
const problem_unit_backlinks = active_problem_units
|
||
.map((problemUnitId) => {
|
||
const entity_backlinks = normalizeEntityBacklinks(
|
||
currentBacklinksById.get(problemUnitId) ?? previousBacklinksById.get(problemUnitId) ?? []
|
||
);
|
||
if (entity_backlinks.length === 0) {
|
||
return null;
|
||
}
|
||
return {
|
||
problem_unit_id: problemUnitId,
|
||
entity_backlinks
|
||
};
|
||
})
|
||
.filter((item): item is NonNullable<typeof item> => item !== null)
|
||
.slice(0, INVESTIGATION_MAX_PROBLEM_UNIT_BACKLINKS);
|
||
|
||
const focus_problem_types =
|
||
currentTypes.length > 0
|
||
? currentTypes
|
||
: capStrings(
|
||
(previousState?.focus_problem_types ?? []).map((item) => String(item)),
|
||
INVESTIGATION_MAX_FOCUS_PROBLEM_TYPES
|
||
) as InvestigationProblemUnitState["focus_problem_types"];
|
||
|
||
const nextState = capProblemUnitState({
|
||
active_problem_units,
|
||
resolved_problem_units,
|
||
problem_unit_backlinks,
|
||
focus_problem_types
|
||
});
|
||
|
||
if (
|
||
nextState.active_problem_units.length === 0 &&
|
||
nextState.resolved_problem_units.length === 0 &&
|
||
nextState.problem_unit_backlinks.length === 0 &&
|
||
nextState.focus_problem_types.length === 0
|
||
) {
|
||
return undefined;
|
||
}
|
||
|
||
return nextState;
|
||
}
|
||
|
||
export function cloneInvestigationState(state: InvestigationStateWithProblemUnits | null): InvestigationStateWithProblemUnits | null {
|
||
if (!state) return null;
|
||
const cloned: InvestigationStateWithProblemUnits = {
|
||
...state,
|
||
focus: {
|
||
...state.focus,
|
||
primary_accounts: [...state.focus.primary_accounts]
|
||
},
|
||
evidence_refs: [...state.evidence_refs],
|
||
open_uncertainties: [...state.open_uncertainties],
|
||
followup_context: state.followup_context
|
||
? {
|
||
...state.followup_context,
|
||
referenced_requirement_ids: [...state.followup_context.referenced_requirement_ids],
|
||
...(state.followup_context.active_requirement_ids
|
||
? {
|
||
active_requirement_ids: [...state.followup_context.active_requirement_ids]
|
||
}
|
||
: {}),
|
||
...(state.followup_context.uncovered_requirement_ids
|
||
? {
|
||
uncovered_requirement_ids: [...state.followup_context.uncovered_requirement_ids]
|
||
}
|
||
: {}),
|
||
...(state.followup_context.settlement_next_actions
|
||
? {
|
||
settlement_next_actions: [...state.followup_context.settlement_next_actions]
|
||
}
|
||
: {}),
|
||
...(state.followup_context.evidence_summary
|
||
? {
|
||
evidence_summary: [...state.followup_context.evidence_summary]
|
||
}
|
||
: {})
|
||
}
|
||
: null
|
||
};
|
||
if (state.problem_unit_state) {
|
||
cloned.problem_unit_state = capProblemUnitState({
|
||
active_problem_units: [...state.problem_unit_state.active_problem_units],
|
||
resolved_problem_units: [...state.problem_unit_state.resolved_problem_units],
|
||
problem_unit_backlinks: state.problem_unit_state.problem_unit_backlinks.map((item) => ({
|
||
problem_unit_id: item.problem_unit_id,
|
||
entity_backlinks: [...item.entity_backlinks]
|
||
})),
|
||
focus_problem_types: [...state.problem_unit_state.focus_problem_types]
|
||
});
|
||
}
|
||
return cloned;
|
||
}
|
||
|
||
export function createEmptyInvestigationState(
|
||
sessionId: string,
|
||
timestamp = new Date().toISOString()
|
||
): InvestigationStateWithProblemUnits {
|
||
return {
|
||
schema_version: INVESTIGATION_STATE_SCHEMA_VERSION,
|
||
session_id: sessionId,
|
||
status: "idle",
|
||
turn_index: 0,
|
||
updated_at: timestamp,
|
||
question_id: null,
|
||
focus: {
|
||
domain: null,
|
||
period: null,
|
||
primary_accounts: [],
|
||
active_query_subject: null
|
||
},
|
||
narrowing_status: "unknown",
|
||
evidence_refs: [],
|
||
open_uncertainties: [],
|
||
last_answer_mode: null,
|
||
followup_context: null,
|
||
query_mode_hint: "direct_answer"
|
||
};
|
||
}
|
||
|
||
export function updateInvestigationState(input: UpdateInvestigationStateInput): InvestigationStateWithProblemUnits {
|
||
const previous = input.previous;
|
||
const focusFromMessage = capStrings(detectAccounts(input.userMessage), INVESTIGATION_MAX_PRIMARY_ACCOUNTS);
|
||
const mergedFocusAccounts = capStrings(
|
||
[...focusFromMessage, ...previous.focus.primary_accounts],
|
||
INVESTIGATION_MAX_PRIMARY_ACCOUNTS
|
||
);
|
||
const requirementIds = capStrings(
|
||
input.requirements.map((item) => item.requirement_id),
|
||
INVESTIGATION_MAX_REQUIREMENT_LINKS
|
||
);
|
||
const mainRequirement = input.requirements[0]?.requirement_text ?? input.userMessage;
|
||
const problemUnitState = updateProblemUnitState(previous, input.retrievalResults);
|
||
const uncoveredRequirementIds = collectUncoveredRequirementIds(input.coverageReport);
|
||
const activeDomain = inferFollowupActiveDomain({
|
||
userMessage: input.userMessage,
|
||
focusAccounts: focusFromMessage,
|
||
routeSummary: input.routeSummary,
|
||
previous
|
||
});
|
||
const focusDomain = activeDomain ?? deriveDomain(input.routeSummary) ?? previous.focus.domain;
|
||
const settlementNextActions = settlementFocusActions(activeDomain);
|
||
const lastProblemUnitId = problemUnitState?.active_problem_units[0] ?? null;
|
||
const evidenceSummary = collectEvidenceSummary(input.retrievalResults);
|
||
|
||
return {
|
||
schema_version: INVESTIGATION_STATE_SCHEMA_VERSION,
|
||
session_id: previous.session_id,
|
||
status: "active",
|
||
turn_index: previous.turn_index + 1,
|
||
updated_at: input.timestamp,
|
||
question_id: input.questionId,
|
||
focus: {
|
||
domain: focusDomain,
|
||
period: detectPeriod(input.userMessage) ?? previous.focus.period,
|
||
primary_accounts: mergedFocusAccounts,
|
||
active_query_subject: mainRequirement.slice(0, 180)
|
||
},
|
||
narrowing_status: deriveNarrowingStatus(input.routeSummary, input.coverageReport),
|
||
evidence_refs: capStrings(
|
||
[...collectEvidenceRefs(input.retrievalResults), ...previous.evidence_refs],
|
||
INVESTIGATION_MAX_EVIDENCE_REFS
|
||
),
|
||
open_uncertainties: collectOpenUncertainties(input.coverageReport, input.retrievalResults),
|
||
last_answer_mode: input.replyType,
|
||
followup_context: {
|
||
previous_question_id: previous.question_id,
|
||
last_user_message: input.userMessage.slice(0, 240),
|
||
referenced_requirement_ids: requirementIds,
|
||
active_domain: activeDomain,
|
||
active_requirement_ids: requirementIds,
|
||
uncovered_requirement_ids: uncoveredRequirementIds,
|
||
last_problem_unit_id: lastProblemUnitId,
|
||
settlement_next_actions: settlementNextActions,
|
||
evidence_summary: evidenceSummary
|
||
},
|
||
query_mode_hint: deriveQueryModeHint(input.routeSummary),
|
||
...(problemUnitState
|
||
? {
|
||
problem_unit_state: problemUnitState
|
||
}
|
||
: {})
|
||
};
|
||
}
|
||
|