NODEDC_1C/llm_normalizer/backend/dist/services/problemUnitAssembler.js

620 lines
26 KiB
JavaScript

"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.buildCandidateEvidence = buildCandidateEvidence;
exports.clusterCandidateEvidence = clusterCandidateEvidence;
exports.detectProblemUnitType = detectProblemUnitType;
exports.scoreProblemSeverity = scoreProblemSeverity;
exports.buildProblemUnit = buildProblemUnit;
exports.collapseDuplicates = collapseDuplicates;
exports.assembleProblemUnits = assembleProblemUnits;
const stage2ProblemUnits_1 = require("../types/stage2ProblemUnits");
const config_1 = require("../config");
const lifecycleRuntime_1 = require("./lifecycleRuntime");
function toObject(value) {
if (!value || typeof value !== "object" || Array.isArray(value)) {
return null;
}
return value;
}
function uniqueStrings(values) {
return Array.from(new Set(values.map((item) => item.trim()).filter(Boolean)));
}
function clampUnitScore(value) {
if (!Number.isFinite(value)) {
return 0;
}
if (value <= 0)
return 0;
if (value >= 1)
return 1;
return Number(value.toFixed(2));
}
function gradeForScore(score) {
if (score >= 0.7)
return "high";
if (score >= 0.4)
return "medium";
return "low";
}
function confidenceGradeForScore(score) {
if (score >= 0.75)
return "high";
if (score >= 0.45)
return "medium";
return "low";
}
function confidenceToScore(value) {
if (value === "high")
return 1;
if (value === "medium")
return 0.6;
return 0.3;
}
function valueFromPayload(item, key) {
const value = item.payload[key];
if (typeof value !== "string") {
return null;
}
const trimmed = value.trim();
return trimmed.length > 0 ? trimmed : null;
}
function stringArrayFromUnknown(value) {
if (!Array.isArray(value)) {
return [];
}
return uniqueStrings(value.map((entry) => String(entry)));
}
function stringArrayFromPayload(item, key) {
return stringArrayFromUnknown(item.payload[key]);
}
function domainHintsFromSummary(summary) {
const hints = [];
const purityGuard = toObject(summary.domain_purity_guard);
const domainCardId = String(purityGuard?.domain_card_id ?? "").trim();
if (domainCardId === "settlements_60_62") {
return ["bank_settlement", "customer_settlement"];
}
if (domainCardId === "vat_document_register_book") {
return ["vat_flow"];
}
if (domainCardId === "month_close_costs_20_44") {
return ["period_close"];
}
const semanticProfile = toObject(summary.semantic_profile);
const domainScope = stringArrayFromUnknown(semanticProfile?.domain_scope);
for (const domain of domainScope) {
const normalized = domain.toLowerCase();
if (normalized === "bank" ||
normalized === "settlements" ||
normalized === "suppliers" ||
normalized === "supplier_payments" ||
normalized === "other_settlements") {
hints.push("bank_settlement");
continue;
}
if (normalized === "customers") {
hints.push("customer_settlement");
continue;
}
if (normalized === "vat" || normalized === "taxes") {
hints.push("vat_flow");
continue;
}
if (normalized === "period_close") {
hints.push("period_close");
continue;
}
if (normalized === "deferred_expense") {
hints.push("deferred_expense");
continue;
}
if (normalized === "fixed_assets") {
hints.push("fixed_asset");
}
}
return uniqueStrings(hints);
}
function extractSemanticProfile(summary) {
const semanticProfile = toObject(summary.semantic_profile);
const domainHints = domainHintsFromSummary(summary).map((item) => `domain_hint:${item}`);
return {
relation_patterns: uniqueStrings([...stringArrayFromUnknown(semanticProfile?.relation_patterns), ...domainHints]),
anomaly_patterns: uniqueStrings([...stringArrayFromUnknown(semanticProfile?.anomaly_patterns), ...domainHints])
};
}
function resolveEntityOverlay(item, rawEntities) {
const sourceId = String(item.pointer.source.id ?? "").toLowerCase();
if (!sourceId) {
return null;
}
for (const entity of rawEntities) {
const candidates = [
String(entity.source_id ?? ""),
String(entity.entity_id ?? ""),
String(entity.id ?? "")
]
.map((entry) => entry.toLowerCase())
.filter(Boolean);
if (candidates.includes(sourceId)) {
return entity;
}
}
return null;
}
function detectRelationPatternHits(item, overlay, context) {
const hits = [];
const failedEdge = valueFromPayload(item, "failed_expected_edge");
if (failedEdge) {
hits.push(`failed_edge:${failedEdge}`);
}
const expectedStep = valueFromPayload(item, "expected_next_step");
if (expectedStep) {
hits.push(`expected_step:${expectedStep}`);
}
const relationPattern = valueFromPayload(item, "relation_pattern");
if (relationPattern) {
hits.push(relationPattern);
}
hits.push(...stringArrayFromPayload(item, "relation_patterns"));
hits.push(...stringArrayFromPayload(item, "relation_pattern_hits"));
if (overlay) {
hits.push(...stringArrayFromUnknown(overlay.relation_pattern_hits));
hits.push(...stringArrayFromUnknown(overlay.relation_types));
}
hits.push(...context.summary_relation_patterns);
if (context.route === "hybrid_store_plus_live" && hits.length === 0) {
hits.push("chain_scope");
}
return uniqueStrings(hits);
}
function detectAnomalyPatterns(item, overlay, context) {
const patterns = [];
patterns.push(...stringArrayFromPayload(item, "anomaly_patterns"));
patterns.push(...stringArrayFromPayload(item, "risk_factors"));
patterns.push(...stringArrayFromPayload(item, "lifecycle_gaps"));
patterns.push(...stringArrayFromPayload(item, "lifecycle_markers"));
const explicit = valueFromPayload(item, "anomaly_pattern");
if (explicit) {
patterns.push(explicit);
}
if (overlay) {
patterns.push(...stringArrayFromUnknown(overlay.risk_factors));
patterns.push(...stringArrayFromUnknown(overlay.lifecycle_gaps));
patterns.push(...stringArrayFromUnknown(overlay.anomaly_patterns));
}
patterns.push(...context.summary_anomaly_patterns);
patterns.push(...context.risk_factors);
if (item.evidence_kind === "anomaly_signal") {
patterns.push("anomaly_signal");
}
if (item.limitation?.reason_code === "missing_mechanism") {
patterns.push("missing_mechanism");
}
if (item.limitation?.reason_code === "insufficient_detail") {
patterns.push("insufficient_detail");
}
const defectClass = valueFromPayload(item, "business_defect_class");
if (defectClass) {
patterns.push(defectClass);
}
if (context.route === "store_feature_risk") {
patterns.push("risk_route");
}
return uniqueStrings(patterns);
}
function buildEntityBacklinks(item, overlay) {
const backlinks = [];
const sourceEntity = String(item.pointer.source.entity ?? "").trim();
const sourceId = String(item.pointer.source.id ?? "").trim();
if (sourceEntity && sourceId) {
backlinks.push({
entity: sourceEntity,
id: sourceId
});
}
const payloadEntity = valueFromPayload(item, "source_entity");
const payloadId = valueFromPayload(item, "source_id");
if (payloadEntity && payloadId) {
backlinks.push({
entity: payloadEntity,
id: payloadId
});
}
const overlayEntity = overlay ? String(overlay.source_entity ?? "").trim() : "";
const overlayId = overlay ? String(overlay.source_id ?? "").trim() : "";
if (overlayEntity && overlayId) {
backlinks.push({
entity: overlayEntity,
id: overlayId
});
}
return Array.from(new Map(backlinks.map((entry) => [`${entry.entity.toLowerCase()}|${entry.id.toLowerCase()}`, entry])).values());
}
function inferExpectedState(item) {
return valueFromPayload(item, "expected_state") ?? valueFromPayload(item, "expected_next_step") ?? undefined;
}
function inferActualState(item) {
return valueFromPayload(item, "actual_state") ?? valueFromPayload(item, "mechanism_of_failure") ?? undefined;
}
function buildCandidateEvidence(items, route, contextInput) {
const context = {
route,
result_type: contextInput?.result_type,
raw_entities: contextInput?.raw_entities ?? [],
summary_relation_patterns: contextInput?.summary_relation_patterns ?? [],
summary_anomaly_patterns: contextInput?.summary_anomaly_patterns ?? [],
risk_factors: contextInput?.risk_factors ?? []
};
return items.map((item, index) => {
const overlay = resolveEntityOverlay(item, context.raw_entities);
return {
schema_version: stage2ProblemUnits_1.CANDIDATE_EVIDENCE_SCHEMA_VERSION,
candidate_id: `cand-${item.evidence_id || `${route}-${index + 1}`}`,
route,
source_ref: item.source_ref,
expected_state: inferExpectedState(item),
actual_state: inferActualState(item),
relation_pattern_hits: detectRelationPatternHits(item, overlay, context),
anomaly_patterns: detectAnomalyPatterns(item, overlay, context),
entity_backlinks: buildEntityBacklinks(item, overlay),
confidence_hint: item.confidence
};
});
}
function clusterSignature(candidate) {
const relation = candidate.relation_pattern_hits[0] ?? "none";
const anomaly = candidate.anomaly_patterns[0] ?? "none";
return [candidate.route, candidate.source_ref.canonical_ref, relation, anomaly].join("|");
}
function clusterCandidateEvidence(candidates) {
const byCluster = new Map();
for (const candidate of candidates) {
const signature = clusterSignature(candidate);
const current = byCluster.get(signature) ?? [];
current.push(candidate);
byCluster.set(signature, current);
}
return Array.from(byCluster.entries()).map(([cluster_id, clusterCandidates]) => ({
cluster_id,
candidates: clusterCandidates
}));
}
function hasAny(value, pattern) {
return pattern.test(value);
}
function detectProblemUnitType(cluster) {
const relationText = cluster.candidates.flatMap((item) => item.relation_pattern_hits).join(" ").toLowerCase();
const anomalyText = cluster.candidates.flatMap((item) => item.anomaly_patterns).join(" ").toLowerCase();
const sourceText = cluster.candidates
.map((item) => `${item.source_ref.entity} ${item.source_ref.id}`)
.join(" ")
.toLowerCase();
const routeText = cluster.candidates.map((item) => item.route).join(" ").toLowerCase();
if (hasAny(`${anomalyText} ${sourceText}`, /cross[_\s-]?branch|vat|nds|tax|ндс/)) {
return "cross_branch_inconsistency_cluster";
}
if (hasAny(`${anomalyText} ${relationText}`, /period|close[_\s-]?risk|reporting|закрыт|period_close/)) {
return "period_risk_cluster";
}
if (hasAny(anomalyText, /settlement|tail|unresolved|хвост|незакрыт/)) {
return "unresolved_settlement_cluster";
}
if (hasAny(anomalyText, /lifecycle|deferred|broken_lifecycle|списани|амортиз|рбп/)) {
return "lifecycle_anomaly_node";
}
if (hasAny(relationText, /failed_edge|statement_to_document|payment_to_settlement|chain|broken|цепоч|разрыв/)
|| (routeText.includes("hybrid_store_plus_live") && relationText.length > 0)) {
return "broken_chain_segment";
}
return "document_conflict";
}
function scoreProblemSeverity(cluster) {
const candidates = cluster.candidates;
const averageConfidence = candidates.length > 0
? candidates.reduce((acc, item) => acc + confidenceToScore(item.confidence_hint), 0) / candidates.length
: 0;
const hasEdgeBreak = candidates.some((item) => item.relation_pattern_hits.some((pattern) => /failed_edge|chain|statement_to_document|payment_to_settlement/i.test(pattern)));
const hasAnomaly = candidates.some((item) => item.anomaly_patterns.length > 0);
const hasPeriodRisk = candidates.some((item) => item.anomaly_patterns.some((pattern) => /period|close|reporting|закрыт/i.test(pattern)));
const candidateBoost = Math.min(candidates.length, 5) * 0.08;
let severityScore = 0.25 + candidateBoost;
if (hasEdgeBreak)
severityScore += 0.2;
if (hasAnomaly)
severityScore += 0.15;
if (hasPeriodRisk)
severityScore += 0.1;
const normalizedSeverity = clampUnitScore(severityScore);
let confidenceScore = averageConfidence;
if (hasEdgeBreak)
confidenceScore += 0.05;
const normalizedConfidence = clampUnitScore(confidenceScore);
return {
severity: {
score: normalizedSeverity,
grade: gradeForScore(normalizedSeverity)
},
confidence: {
score: normalizedConfidence,
grade: confidenceGradeForScore(normalizedConfidence)
}
};
}
function unitTitle(type) {
if (type === "document_conflict")
return "Document conflict detected";
if (type === "broken_chain_segment")
return "Broken chain segment detected";
if (type === "lifecycle_anomaly_node")
return "Lifecycle anomaly node detected";
if (type === "unresolved_settlement_cluster")
return "Unresolved settlement cluster detected";
if (type === "period_risk_cluster")
return "Period risk cluster detected";
return "Cross-branch inconsistency cluster detected";
}
function mechanismSummary(cluster, type) {
const relationHints = uniqueStrings(cluster.candidates.flatMap((item) => item.relation_pattern_hits));
const anomalyHints = uniqueStrings(cluster.candidates.flatMap((item) => item.anomaly_patterns));
const primaryRelation = relationHints[0];
const primaryAnomaly = anomalyHints[0];
if (primaryRelation) {
return `Mechanism candidate: ${primaryRelation}.`;
}
if (primaryAnomaly) {
return `Mechanism inferred from anomaly pattern: ${primaryAnomaly}.`;
}
return `Mechanism is currently inferred at baseline level for ${type}.`;
}
function businessDefectClass(cluster, type) {
const patterns = uniqueStrings([
...cluster.candidates.flatMap((item) => item.relation_pattern_hits),
...cluster.candidates.flatMap((item) => item.anomaly_patterns)
]);
return patterns[0] ?? type;
}
function collectAffectedByEntity(backlinks, pattern) {
return uniqueStrings(backlinks.filter((entry) => pattern.test(entry.entity)).map((entry) => `${entry.entity}:${entry.id}`));
}
function parseFailedExpectedEdge(cluster) {
const withEdge = cluster.candidates.flatMap((item) => item.relation_pattern_hits).find((pattern) => pattern.startsWith("failed_edge:"));
if (!withEdge) {
return undefined;
}
return withEdge.replace(/^failed_edge:/, "").trim() || undefined;
}
function mergeBacklinks(candidates) {
return Array.from(new Map(candidates
.flatMap((item) => item.entity_backlinks)
.map((entry) => [`${entry.entity.toLowerCase()}|${entry.id.toLowerCase()}`, entry])).values());
}
function buildProblemUnit(cluster, index) {
const type = detectProblemUnitType(cluster);
const scored = scoreProblemSeverity(cluster);
const backlinks = mergeBacklinks(cluster.candidates);
const expectedState = cluster.candidates.find((item) => typeof item.expected_state === "string")?.expected_state;
const actualState = cluster.candidates.find((item) => typeof item.actual_state === "string")?.actual_state;
const failedExpectedEdge = parseFailedExpectedEdge(cluster);
const periodSensitive = cluster.candidates.some((item) => item.anomaly_patterns.some((pattern) => /period|close|reporting|закрыт/i.test(pattern)));
const hasLowConfidence = cluster.candidates.some((item) => item.confidence_hint === "low");
return {
schema_version: stage2ProblemUnits_1.PROBLEM_UNIT_SCHEMA_VERSION,
problem_unit_id: `pu-${type}-${index + 1}`,
problem_unit_type: type,
title: unitTitle(type),
mechanism_summary: mechanismSummary(cluster, type),
business_defect_class: businessDefectClass(cluster, type),
severity: scored.severity,
confidence: scored.confidence,
affected_entities: uniqueStrings(backlinks.map((entry) => `${entry.entity}:${entry.id}`)),
affected_documents: collectAffectedByEntity(backlinks, /doc|document|invoice|плат|реал|поступ/i),
affected_postings: collectAffectedByEntity(backlinks, /posting|journal|провод/i),
affected_accounts: collectAffectedByEntity(backlinks, /account|счет|сч/i),
affected_counterparties: collectAffectedByEntity(backlinks, /counterparty|supplier|buyer|контраг|постав|покуп/i),
affected_contracts: collectAffectedByEntity(backlinks, /contract|договор/i),
...(expectedState ? { expected_state: expectedState } : {}),
...(actualState ? { actual_state: actualState } : {}),
...(failedExpectedEdge ? { failed_expected_edge: failedExpectedEdge } : {}),
...(periodSensitive
? {
period_impact: {
is_period_sensitive: true,
impact_class: "close_risk"
}
}
: {}),
evidence_pack: uniqueStrings(cluster.candidates.map((item) => item.candidate_id)),
entity_backlinks: backlinks,
snapshot_limitations: uniqueStrings(hasLowConfidence ? ["low_confidence_candidates_present"] : [])
};
}
function collapseSignature(unit) {
const backlink = unit.entity_backlinks[0] ? `${unit.entity_backlinks[0].entity}|${unit.entity_backlinks[0].id}` : "none";
return [unit.problem_unit_type, unit.business_defect_class, unit.failed_expected_edge ?? "none", backlink].join("|");
}
function preferredLifecycleSource(left, right) {
const leftRank = left.lifecycle_ranking_score ?? 0;
const rightRank = right.lifecycle_ranking_score ?? 0;
if (rightRank > leftRank) {
return right;
}
if (leftRank > rightRank) {
return left;
}
return right.confidence.score > left.confidence.score ? right : left;
}
function collapseDuplicates(units) {
const bySignature = new Map();
let duplicateCollapses = 0;
for (const unit of units) {
const signature = collapseSignature(unit);
const existing = bySignature.get(signature);
if (!existing) {
bySignature.set(signature, unit);
continue;
}
duplicateCollapses += 1;
const preferredLifecycle = preferredLifecycleSource(existing, unit);
bySignature.set(signature, {
...existing,
evidence_pack: uniqueStrings([...existing.evidence_pack, ...unit.evidence_pack]),
entity_backlinks: Array.from(new Map([...existing.entity_backlinks, ...unit.entity_backlinks].map((entry) => [
`${entry.entity.toLowerCase()}|${entry.id.toLowerCase()}`,
entry
])).values()),
affected_entities: uniqueStrings([...existing.affected_entities, ...unit.affected_entities]),
affected_documents: uniqueStrings([...existing.affected_documents, ...unit.affected_documents]),
affected_postings: uniqueStrings([...existing.affected_postings, ...unit.affected_postings]),
affected_accounts: uniqueStrings([...existing.affected_accounts, ...unit.affected_accounts]),
affected_counterparties: uniqueStrings([...existing.affected_counterparties, ...unit.affected_counterparties]),
affected_contracts: uniqueStrings([...existing.affected_contracts, ...unit.affected_contracts]),
snapshot_limitations: uniqueStrings([...existing.snapshot_limitations, ...unit.snapshot_limitations]),
severity: unit.severity.score > existing.severity.score ? unit.severity : existing.severity,
confidence: unit.confidence.score > existing.confidence.score ? unit.confidence : existing.confidence,
...(preferredLifecycle.lifecycle_domain
? {
lifecycle_domain: preferredLifecycle.lifecycle_domain
}
: {}),
...(preferredLifecycle.lifecycle_object_id
? {
lifecycle_object_id: preferredLifecycle.lifecycle_object_id
}
: {}),
...(preferredLifecycle.current_lifecycle_state
? {
current_lifecycle_state: preferredLifecycle.current_lifecycle_state
}
: {}),
...(preferredLifecycle.expected_lifecycle_state
? {
expected_lifecycle_state: preferredLifecycle.expected_lifecycle_state
}
: {}),
...(preferredLifecycle.missing_transition
? {
missing_transition: preferredLifecycle.missing_transition
}
: {}),
...(preferredLifecycle.invalid_transition
? {
invalid_transition: preferredLifecycle.invalid_transition
}
: {}),
...(preferredLifecycle.lifecycle_defect_type
? {
lifecycle_defect_type: preferredLifecycle.lifecycle_defect_type
}
: {}),
...(preferredLifecycle.stale_duration
? {
stale_duration: preferredLifecycle.stale_duration
}
: {}),
...(preferredLifecycle.lifecycle_confidence
? {
lifecycle_confidence: preferredLifecycle.lifecycle_confidence
}
: {}),
...(preferredLifecycle.business_lifecycle_interpretation
? {
business_lifecycle_interpretation: preferredLifecycle.business_lifecycle_interpretation
}
: {}),
...(preferredLifecycle.lifecycle_resolution
? {
lifecycle_resolution: preferredLifecycle.lifecycle_resolution
}
: {}),
...(preferredLifecycle.lifecycle_ranking_score !== undefined
? {
lifecycle_ranking_score: preferredLifecycle.lifecycle_ranking_score
}
: {}),
...(preferredLifecycle.lifecycle_ranking_basis
? {
lifecycle_ranking_basis: preferredLifecycle.lifecycle_ranking_basis
}
: {})
});
}
return {
problem_units: Array.from(bySignature.values()),
duplicate_collapses: duplicateCollapses
};
}
function buildSummary(units, duplicateCollapses) {
const unitTypes = uniqueStrings(units.map((item) => item.problem_unit_type));
const typeDistribution = {};
const severityDistribution = {
low: 0,
medium: 0,
high: 0
};
const confidenceDistribution = {
low: 0,
medium: 0,
high: 0
};
const lifecycleDomainDistribution = {};
const lifecycleDefectDistribution = {};
let lifecycleEnrichedUnits = 0;
for (const unit of units) {
typeDistribution[unit.problem_unit_type] = (typeDistribution[unit.problem_unit_type] ?? 0) + 1;
severityDistribution[unit.severity.grade] += 1;
confidenceDistribution[unit.confidence.grade] += 1;
if (unit.lifecycle_domain) {
lifecycleEnrichedUnits += 1;
lifecycleDomainDistribution[unit.lifecycle_domain] = (lifecycleDomainDistribution[unit.lifecycle_domain] ?? 0) + 1;
}
if (unit.lifecycle_defect_type) {
lifecycleDefectDistribution[unit.lifecycle_defect_type] = (lifecycleDefectDistribution[unit.lifecycle_defect_type] ?? 0) + 1;
}
}
return {
schema_version: stage2ProblemUnits_1.PROBLEM_UNIT_SUMMARY_SCHEMA_VERSION,
units_total: units.length,
duplicate_collapses: duplicateCollapses,
unit_types: unitTypes,
type_distribution: typeDistribution,
severity_distribution: severityDistribution,
confidence_distribution: confidenceDistribution,
primary_unit_type: units[0]?.problem_unit_type ?? null,
lifecycle_enriched_units: lifecycleEnrichedUnits,
lifecycle_domain_distribution: lifecycleDomainDistribution,
lifecycle_defect_distribution: lifecycleDefectDistribution
};
}
function assembleProblemUnits(input) {
const summary = input.summary ?? {};
const semanticProfile = extractSemanticProfile(summary);
const candidates = buildCandidateEvidence(input.evidence, input.route, {
route: input.route,
result_type: input.result_type,
raw_entities: input.raw_entities ?? [],
summary_relation_patterns: semanticProfile.relation_patterns,
summary_anomaly_patterns: semanticProfile.anomaly_patterns,
risk_factors: uniqueStrings(input.risk_factors ?? [])
});
const clusters = clusterCandidateEvidence(candidates);
const units = clusters.map((cluster, index) => {
const baseUnit = buildProblemUnit(cluster, index);
if (!config_1.FEATURE_ASSISTANT_LIFECYCLE_RUNTIME_V1) {
return baseUnit;
}
return (0, lifecycleRuntime_1.enrichProblemUnitLifecycle)({
unit: baseUnit,
candidates: cluster.candidates
});
});
const collapsed = collapseDuplicates(units);
const rankedUnits = config_1.FEATURE_ASSISTANT_LIFECYCLE_RUNTIME_V1
? (0, lifecycleRuntime_1.rankLifecycleProblemUnits)(collapsed.problem_units)
: collapsed.problem_units;
return {
candidate_evidence: candidates,
problem_units: rankedUnits,
problem_unit_summary: buildSummary(rankedUnits, collapsed.duplicate_collapses)
};
}