NODEDC_1C/llm_normalizer/backend/dist/services/investigationState.js

258 lines
12 KiB
JavaScript

"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.cloneInvestigationState = cloneInvestigationState;
exports.createEmptyInvestigationState = createEmptyInvestigationState;
exports.updateInvestigationState = updateInvestigationState;
const stage1Contracts_1 = require("../types/stage1Contracts");
const stage2ProblemUnits_1 = require("../types/stage2ProblemUnits");
function uniqueStrings(values) {
return Array.from(new Set(values.map((item) => item.trim()).filter(Boolean)));
}
function capStrings(values, max) {
return uniqueStrings(values).slice(0, max);
}
function detectAccounts(text) {
return capStrings(text.match(/\b\d{2}(?:\.\d{2})?\b/g) ?? [], stage1Contracts_1.INVESTIGATION_MAX_PRIMARY_ACCOUNTS);
}
function detectPeriod(text) {
const monthly = text.match(/\b(20\d{2})[-/.](0[1-9]|1[0-2])\b/);
if (monthly)
return `${monthly[1]}-${monthly[2]}`;
const yearly = text.match(/\b(20\d{2})\b/);
if (yearly)
return yearly[1];
return null;
}
function deriveDomain(routeSummary) {
if (!routeSummary)
return null;
if (routeSummary.mode === "legacy_v1") {
return routeSummary.route_hint;
}
const routes = routeSummary.decisions.map((item) => item.route).filter((route) => route !== "no_route");
const uniqueRoutes = uniqueStrings(routes);
if (uniqueRoutes.length === 0) {
return "no_route";
}
return uniqueRoutes.join(",");
}
function deriveNarrowingStatus(routeSummary, coverageReport) {
if (!routeSummary) {
return "unknown";
}
if (routeSummary.mode === "legacy_v1") {
return "not_needed";
}
if (routeSummary.fallback.type === "clarification" || coverageReport.clarification_needed_for.length > 0) {
return "needs_clarification";
}
const hasNoRoute = routeSummary.decisions.some((item) => item.route === "no_route");
if (hasNoRoute) {
return "broad_guarded";
}
return routeSummary.decisions.length > 1 ? "applied" : "not_needed";
}
function deriveQueryModeHint(routeSummary) {
if (!routeSummary) {
return "investigation_candidate";
}
if (routeSummary.mode === "legacy_v1") {
return "direct_answer";
}
return routeSummary.fallback.type === "none" ? "direct_answer" : "investigation_candidate";
}
function collectEvidenceRefs(retrievalResults) {
const refs = retrievalResults.flatMap((result) => result.evidence.map((item) => item.evidence_id));
return capStrings(refs, stage1Contracts_1.INVESTIGATION_MAX_EVIDENCE_REFS);
}
function collectOpenUncertainties(coverageReport, retrievalResults) {
const requirementNotes = [
...coverageReport.requirements_uncovered.map((item) => `uncovered:${item}`),
...coverageReport.requirements_partially_covered.map((item) => `partial:${item}`),
...coverageReport.clarification_needed_for.map((item) => `clarify:${item}`),
...coverageReport.out_of_scope_requirements.map((item) => `out_of_scope:${item}`)
];
const limitationNotes = retrievalResults.flatMap((result) => result.limitations).slice(0, 6);
return capStrings([...requirementNotes, ...limitationNotes], stage1Contracts_1.INVESTIGATION_MAX_UNCERTAINTIES);
}
function normalizeEntityBacklinks(values) {
const result = [];
const seen = new Set();
for (const item of values) {
const entity = String(item.entity ?? "").trim();
const id = String(item.id ?? "").trim();
if (!entity || !id) {
continue;
}
const key = `${entity}::${id}`;
if (seen.has(key)) {
continue;
}
seen.add(key);
result.push({
entity,
id
});
}
return result;
}
function collectProblemUnits(retrievalResults) {
return retrievalResults.flatMap((result) => result.problem_units ?? []);
}
function capProblemUnitState(state) {
return {
active_problem_units: capStrings(state.active_problem_units, stage2ProblemUnits_1.INVESTIGATION_MAX_ACTIVE_PROBLEM_UNITS),
resolved_problem_units: capStrings(state.resolved_problem_units, stage2ProblemUnits_1.INVESTIGATION_MAX_RESOLVED_PROBLEM_UNITS),
problem_unit_backlinks: state.problem_unit_backlinks
.map((item) => ({
problem_unit_id: String(item.problem_unit_id ?? "").trim(),
entity_backlinks: normalizeEntityBacklinks(item.entity_backlinks ?? [])
}))
.filter((item) => Boolean(item.problem_unit_id) && item.entity_backlinks.length > 0)
.slice(0, stage2ProblemUnits_1.INVESTIGATION_MAX_PROBLEM_UNIT_BACKLINKS),
focus_problem_types: capStrings(state.focus_problem_types.map((item) => String(item)), stage2ProblemUnits_1.INVESTIGATION_MAX_FOCUS_PROBLEM_TYPES)
};
}
function updateProblemUnitState(previous, retrievalResults) {
const previousState = previous.problem_unit_state;
const currentProblemUnits = collectProblemUnits(retrievalResults);
const currentIds = capStrings(currentProblemUnits.map((item) => String(item.problem_unit_id ?? "")), stage2ProblemUnits_1.INVESTIGATION_MAX_ACTIVE_PROBLEM_UNITS);
const currentTypes = capStrings(currentProblemUnits.map((item) => String(item.problem_unit_type ?? "")), stage2ProblemUnits_1.INVESTIGATION_MAX_FOCUS_PROBLEM_TYPES);
const currentBacklinksRaw = currentProblemUnits
.filter((item) => currentIds.includes(item.problem_unit_id))
.map((item) => ({
problem_unit_id: item.problem_unit_id,
entity_backlinks: normalizeEntityBacklinks(item.entity_backlinks ?? [])
}))
.filter((item) => item.entity_backlinks.length > 0);
const currentBacklinksById = new Map(currentBacklinksRaw.map((item) => [item.problem_unit_id, item.entity_backlinks]));
const previousBacklinksById = new Map((previousState?.problem_unit_backlinks ?? []).map((item) => [item.problem_unit_id, item.entity_backlinks]));
const active_problem_units = currentIds.length > 0
? currentIds
: capStrings(previousState?.active_problem_units ?? [], stage2ProblemUnits_1.INVESTIGATION_MAX_ACTIVE_PROBLEM_UNITS);
const resolved_problem_units = currentIds.length > 0
? capStrings([
...(previousState?.active_problem_units ?? []).filter((item) => !currentIds.includes(item)),
...(previousState?.resolved_problem_units ?? [])
], stage2ProblemUnits_1.INVESTIGATION_MAX_RESOLVED_PROBLEM_UNITS)
: capStrings(previousState?.resolved_problem_units ?? [], stage2ProblemUnits_1.INVESTIGATION_MAX_RESOLVED_PROBLEM_UNITS);
const problem_unit_backlinks = active_problem_units
.map((problemUnitId) => {
const entity_backlinks = normalizeEntityBacklinks(currentBacklinksById.get(problemUnitId) ?? previousBacklinksById.get(problemUnitId) ?? []);
if (entity_backlinks.length === 0) {
return null;
}
return {
problem_unit_id: problemUnitId,
entity_backlinks
};
})
.filter((item) => item !== null)
.slice(0, stage2ProblemUnits_1.INVESTIGATION_MAX_PROBLEM_UNIT_BACKLINKS);
const focus_problem_types = currentTypes.length > 0
? currentTypes
: capStrings((previousState?.focus_problem_types ?? []).map((item) => String(item)), stage2ProblemUnits_1.INVESTIGATION_MAX_FOCUS_PROBLEM_TYPES);
const nextState = capProblemUnitState({
active_problem_units,
resolved_problem_units,
problem_unit_backlinks,
focus_problem_types
});
if (nextState.active_problem_units.length === 0 &&
nextState.resolved_problem_units.length === 0 &&
nextState.problem_unit_backlinks.length === 0 &&
nextState.focus_problem_types.length === 0) {
return undefined;
}
return nextState;
}
function cloneInvestigationState(state) {
if (!state)
return null;
const cloned = {
...state,
focus: {
...state.focus,
primary_accounts: [...state.focus.primary_accounts]
},
evidence_refs: [...state.evidence_refs],
open_uncertainties: [...state.open_uncertainties],
followup_context: state.followup_context
? {
...state.followup_context,
referenced_requirement_ids: [...state.followup_context.referenced_requirement_ids]
}
: null
};
if (state.problem_unit_state) {
cloned.problem_unit_state = capProblemUnitState({
active_problem_units: [...state.problem_unit_state.active_problem_units],
resolved_problem_units: [...state.problem_unit_state.resolved_problem_units],
problem_unit_backlinks: state.problem_unit_state.problem_unit_backlinks.map((item) => ({
problem_unit_id: item.problem_unit_id,
entity_backlinks: [...item.entity_backlinks]
})),
focus_problem_types: [...state.problem_unit_state.focus_problem_types]
});
}
return cloned;
}
function createEmptyInvestigationState(sessionId, timestamp = new Date().toISOString()) {
return {
schema_version: stage1Contracts_1.INVESTIGATION_STATE_SCHEMA_VERSION,
session_id: sessionId,
status: "idle",
turn_index: 0,
updated_at: timestamp,
question_id: null,
focus: {
domain: null,
period: null,
primary_accounts: [],
active_query_subject: null
},
narrowing_status: "unknown",
evidence_refs: [],
open_uncertainties: [],
last_answer_mode: null,
followup_context: null,
query_mode_hint: "direct_answer"
};
}
function updateInvestigationState(input) {
const previous = input.previous;
const focusFromMessage = capStrings(detectAccounts(input.userMessage), stage1Contracts_1.INVESTIGATION_MAX_PRIMARY_ACCOUNTS);
const requirementIds = capStrings(input.requirements.map((item) => item.requirement_id), stage1Contracts_1.INVESTIGATION_MAX_REQUIREMENT_LINKS);
const mainRequirement = input.requirements[0]?.requirement_text ?? input.userMessage;
const problemUnitState = updateProblemUnitState(previous, input.retrievalResults);
return {
schema_version: stage1Contracts_1.INVESTIGATION_STATE_SCHEMA_VERSION,
session_id: previous.session_id,
status: "active",
turn_index: previous.turn_index + 1,
updated_at: input.timestamp,
question_id: input.questionId,
focus: {
domain: deriveDomain(input.routeSummary) ?? previous.focus.domain,
period: detectPeriod(input.userMessage) ?? previous.focus.period,
primary_accounts: capStrings([...focusFromMessage, ...previous.focus.primary_accounts], stage1Contracts_1.INVESTIGATION_MAX_PRIMARY_ACCOUNTS),
active_query_subject: mainRequirement.slice(0, 180)
},
narrowing_status: deriveNarrowingStatus(input.routeSummary, input.coverageReport),
evidence_refs: capStrings([...collectEvidenceRefs(input.retrievalResults), ...previous.evidence_refs], stage1Contracts_1.INVESTIGATION_MAX_EVIDENCE_REFS),
open_uncertainties: collectOpenUncertainties(input.coverageReport, input.retrievalResults),
last_answer_mode: input.replyType,
followup_context: {
previous_question_id: previous.question_id,
last_user_message: input.userMessage.slice(0, 240),
referenced_requirement_ids: requirementIds
},
query_mode_hint: deriveQueryModeHint(input.routeSummary),
...(problemUnitState
? {
problem_unit_state: problemUnitState
}
: {})
};
}