NODEDC_1C/llm_normalizer/backend/dist/services/retrievalResultNormalizer.js

479 lines
17 KiB
JavaScript

"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.normalizeRetrievalResult = normalizeRetrievalResult;
const config_1 = require("../config");
const stage1Contracts_1 = require("../types/stage1Contracts");
const problemUnitAssembler_1 = require("./problemUnitAssembler");
const stage4GraphRuntime_1 = require("./stage4GraphRuntime");
function toObject(value) {
if (!value || typeof value !== "object" || Array.isArray(value)) {
return null;
}
return value;
}
function toStringOrNull(value) {
if (typeof value !== "string")
return null;
const trimmed = value.trim();
return trimmed.length > 0 ? trimmed : null;
}
function toNumberOrNull(value) {
if (typeof value === "number" && Number.isFinite(value)) {
return value;
}
return null;
}
function normalizeStatus(value) {
if (value === "ok" || value === "empty" || value === "partial" || value === "error") {
return value;
}
return "error";
}
function normalizeResultType(value) {
if (value === "list" || value === "summary" || value === "object" || value === "chain" || value === "ranking") {
return value;
}
return "summary";
}
function normalizeObjectArray(value) {
if (!Array.isArray(value)) {
return [];
}
return value
.map((item) => (item && typeof item === "object" ? item : null))
.filter((item) => item !== null);
}
function normalizeSummary(value) {
if (!value || typeof value !== "object" || Array.isArray(value)) {
return {};
}
return value;
}
function normalizeErrors(value) {
if (!Array.isArray(value)) {
return [];
}
return value.map((item) => String(item));
}
function normalizeStringArray(value) {
if (!Array.isArray(value)) {
return [];
}
return value.map((item) => String(item));
}
function mergeSummaryWithProblemUnitMeta(summary, input) {
return {
...summary,
problem_units_enabled: true,
candidate_evidence_count: input.candidateEvidenceCount,
problem_units_count: input.problemUnitsCount,
problem_unit_types: input.unitTypes,
problem_unit_duplicate_collapses: input.duplicateCollapses,
problem_unit_severity_distribution: input.severityDistribution,
problem_unit_confidence_distribution: input.confidenceDistribution,
lifecycle_enriched_units: input.lifecycleEnrichedUnits,
problem_unit_lifecycle_domain_distribution: input.lifecycleDomainDistribution,
problem_unit_lifecycle_defect_distribution: input.lifecycleDefectDistribution
};
}
function mergeSummaryWithGraphMeta(summary, graphSummary) {
return {
...summary,
graph_runtime_enabled: true,
graph_total_units: graphSummary.total_units,
graph_bound_units: graphSummary.bound_units,
graph_nodes_count: graphSummary.node_count,
graph_edges_count: graphSummary.edge_count,
graph_missing_links_count: graphSummary.missing_links_count,
graph_conflicting_links_count: graphSummary.conflicting_links_count,
graph_coverage_grade: graphSummary.graph_coverage_grade,
graph_domain_distribution: graphSummary.domain_distribution,
graph_relation_distribution: graphSummary.relation_distribution
};
}
function normalizeConfidence(value) {
if (value === "high" || value === "medium" || value === "low") {
return value;
}
return "medium";
}
function parseEvidenceConfidence(value) {
if (value === "high" || value === "medium" || value === "low") {
return value;
}
return null;
}
function normalizeEvidenceNamespace(value) {
const normalized = toStringOrNull(value)?.toLowerCase();
if (!normalized)
return "unknown";
if (normalized === "snapshot_2020" || normalized === "snapshot")
return "snapshot_2020";
if (normalized === "assistant_derived" || normalized === "derived")
return "assistant_derived";
return "unknown";
}
function inferEvidenceKind(item) {
if (item.mechanism_of_failure !== undefined || item.failed_expected_edge !== undefined || item.expected_next_step !== undefined) {
return "mechanism_link";
}
if (item.risk_score !== undefined || item.zero_guid_values !== undefined || item.unknown_link_count !== undefined) {
return "anomaly_signal";
}
if (item.records_count !== undefined || item.operations_count !== undefined || item.document_refs_count !== undefined) {
return "aggregation";
}
if (item.limitation !== undefined || item.is_snapshot_limited !== undefined) {
return "limitation_note";
}
return "factual_anchor";
}
function inferMechanismNoteLegacy(kind, item) {
const explicit = toStringOrNull(item.mechanism_note);
if (explicit) {
return explicit;
}
if (kind === "mechanism_link") {
const failure = toStringOrNull(item.mechanism_of_failure);
if (failure)
return failure;
return "Mechanism link inferred from retrieval evidence.";
}
if (kind === "anomaly_signal") {
return "Anomaly signal inferred from risk-oriented fields.";
}
if (kind === "aggregation") {
return "Aggregated evidence item.";
}
if (kind === "limitation_note") {
return "Evidence includes explicit limitation hints.";
}
return "Factual evidence anchor.";
}
function resolveMechanismNote(kind, item) {
const explicit = toStringOrNull(item.mechanism_note);
if (explicit) {
return {
note: explicit,
reliable: true
};
}
if (kind === "mechanism_link") {
const failure = toStringOrNull(item.mechanism_of_failure);
if (failure) {
return {
note: failure,
reliable: true
};
}
const failedEdge = toStringOrNull(item.failed_expected_edge);
const expectedNext = toStringOrNull(item.expected_next_step);
const composed = [failedEdge ? `failed_edge=${failedEdge}` : null, expectedNext ? `expected_next_step=${expectedNext}` : null]
.filter((part) => Boolean(part))
.join("; ");
if (composed) {
return {
note: composed,
reliable: true
};
}
}
if (!config_1.FEATURE_ASSISTANT_EVIDENCE_ENRICHMENT_V1) {
return {
note: inferMechanismNoteLegacy(kind, item),
reliable: false
};
}
return {
note: null,
reliable: false
};
}
function normalizeEvidenceSourceType(value, record) {
const normalized = toStringOrNull(value);
if (normalized === "retrieval_item" || normalized === "retrieval_summary" || normalized === "derived") {
return normalized;
}
if (record.records_count !== undefined || record.operations_count !== undefined || record.document_refs_count !== undefined) {
return "retrieval_summary";
}
return "retrieval_item";
}
function readPointer(record) {
const pointer = toObject(record.pointer);
return pointer ?? {};
}
function normalizeEvidencePointer(fragmentId, route, record, index) {
const pointer = readPointer(record);
const source = toObject(pointer.source);
const locator = toObject(pointer.locator);
const sourceEntityCandidate = toStringOrNull(source?.entity) ?? toStringOrNull(record.source_entity);
const sourceEntity = sourceEntityCandidate ?? "unknown_entity";
const sourceIdCandidate = toStringOrNull(source?.id) ?? toStringOrNull(record.source_id);
const sourceId = sourceIdCandidate ?? `${route}:${fragmentId}:${index + 1}`;
const period = toStringOrNull(source?.period) ?? toStringOrNull(record.period);
const namespace = normalizeEvidenceNamespace(source?.namespace ?? record.source_namespace);
return {
pointer: {
fragment_id: toStringOrNull(pointer.fragment_id) ?? fragmentId,
route: toStringOrNull(pointer.route) ?? route,
source: {
namespace,
entity: sourceEntity,
id: sourceId,
period
},
locator: {
field_path: toStringOrNull(locator?.field_path) ?? toStringOrNull(record.field_path),
item_index: toNumberOrNull(locator?.item_index) ?? index
}
},
fallback_source_namespace: namespace === "unknown",
fallback_source_entity: sourceEntityCandidate === null,
fallback_source_id: sourceIdCandidate === null
};
}
function canonicalizeSourceRefPart(value) {
return encodeURIComponent((value ?? "none").trim().toLowerCase());
}
function buildSourceRef(pointer) {
return {
schema_version: stage1Contracts_1.EVIDENCE_SOURCE_REF_SCHEMA_VERSION,
namespace: pointer.source.namespace,
entity: pointer.source.entity,
id: pointer.source.id,
period: pointer.source.period,
canonical_ref: [
stage1Contracts_1.EVIDENCE_SOURCE_REF_SCHEMA_VERSION,
canonicalizeSourceRefPart(pointer.source.namespace),
canonicalizeSourceRefPart(pointer.source.entity),
canonicalizeSourceRefPart(pointer.source.id),
canonicalizeSourceRefPart(pointer.source.period)
].join("|")
};
}
function toBoolean(value) {
if (typeof value === "boolean")
return value;
if (typeof value === "number")
return value !== 0;
if (typeof value === "string") {
const lowered = value.trim().toLowerCase();
return lowered === "true" || lowered === "1" || lowered === "yes";
}
return false;
}
function limitationCodeFromText(text) {
const lower = text.toLowerCase();
if (/(snapshot|read-only|read only)/i.test(lower)) {
return "snapshot_only";
}
if (/heuristic/i.test(lower)) {
return "heuristic_inference";
}
if (/mechanism/i.test(lower)) {
return "missing_mechanism";
}
if (/(guid|detail|specific)/i.test(lower)) {
return "insufficient_detail";
}
return "unknown";
}
function resolveEvidenceLimitation(input) {
const explicitLimitation = toStringOrNull(input.record.limitation);
if (explicitLimitation) {
return {
reason_code: limitationCodeFromText(explicitLimitation),
note: explicitLimitation
};
}
if (toBoolean(input.record.is_snapshot_limited)) {
return {
reason_code: "snapshot_only",
note: null
};
}
if (!config_1.FEATURE_ASSISTANT_EVIDENCE_ENRICHMENT_V1) {
return null;
}
if (input.mechanismExpected && !input.mechanismReliable) {
return {
reason_code: "missing_mechanism",
note: null
};
}
if (input.pointerWeak) {
return {
reason_code: "weak_source_mapping",
note: null
};
}
if (input.sourceType === "derived") {
return {
reason_code: "heuristic_inference",
note: null
};
}
if (input.evidenceKind === "limitation_note") {
return {
reason_code: "unknown",
note: null
};
}
return null;
}
function downgradeConfidence(value) {
if (value === "high")
return "medium";
if (value === "medium")
return "low";
return "low";
}
function resolveEvidenceConfidence(input) {
if (!config_1.FEATURE_ASSISTANT_EVIDENCE_ENRICHMENT_V1) {
return input.explicitConfidence ?? "medium";
}
let confidence = input.explicitConfidence ?? (input.sourceType === "retrieval_item" ? "medium" : "low");
if (input.limitation?.reason_code === "missing_mechanism" || input.limitation?.reason_code === "weak_source_mapping") {
confidence = downgradeConfidence(confidence);
}
if (input.sourceType === "derived" && !input.explicitConfidence) {
confidence = "low";
}
if (input.mechanismExpected && !input.mechanismReliable) {
confidence = "low";
}
if (input.pointerWeak) {
confidence = "low";
}
return confidence;
}
function normalizeEvidenceItems(fragmentId, requirementIds, route, value) {
const records = normalizeObjectArray(value);
return records.map((record, index) => {
const evidenceId = toStringOrNull(record.evidence_id) ?? `ev-${fragmentId}-${index + 1}`;
const claimRef = toStringOrNull(record.claim_ref) ??
(requirementIds[0] ? `requirement:${requirementIds[0]}` : `fragment:${fragmentId}`);
const evidenceKind = inferEvidenceKind(record);
const sourceType = normalizeEvidenceSourceType(record.source_type, record);
const pointerResult = normalizeEvidencePointer(fragmentId, route, record, index);
const mechanism = resolveMechanismNote(evidenceKind, record);
const mechanismExpected = evidenceKind === "mechanism_link" || evidenceKind === "anomaly_signal" || evidenceKind === "aggregation";
const pointerWeak = pointerResult.fallback_source_namespace || pointerResult.fallback_source_entity || pointerResult.fallback_source_id;
const limitation = resolveEvidenceLimitation({
record,
sourceType,
evidenceKind,
mechanismReliable: mechanism.reliable,
mechanismExpected,
pointerWeak
});
const confidence = resolveEvidenceConfidence({
explicitConfidence: parseEvidenceConfidence(record.confidence),
sourceType,
mechanismReliable: mechanism.reliable,
mechanismExpected,
limitation,
pointerWeak
});
return {
evidence_id: evidenceId,
claim_ref: claimRef,
source_type: sourceType,
source_ref: buildSourceRef(pointerResult.pointer),
pointer: pointerResult.pointer,
evidence_kind: evidenceKind,
mechanism_note: mechanism.note,
confidence,
limitation,
payload: record
};
});
}
function normalizeRetrievalResult(fragmentId, requirementIds, route, raw) {
const items = normalizeObjectArray(raw.items);
const summary = normalizeSummary(raw.summary);
const evidence = normalizeEvidenceItems(fragmentId, requirementIds, route, raw.evidence);
const baseResult = {
fragment_id: fragmentId,
requirement_ids: requirementIds,
route,
status: normalizeStatus(raw.status),
result_type: normalizeResultType(raw.result_type),
items,
summary,
evidence,
why_included: normalizeStringArray(raw.why_included),
selection_reason: normalizeStringArray(raw.selection_reason),
risk_factors: normalizeStringArray(raw.risk_factors),
business_interpretation: normalizeStringArray(raw.business_interpretation),
confidence: normalizeConfidence(raw.confidence),
limitations: normalizeStringArray(raw.limitations),
errors: normalizeErrors(raw.errors)
};
if (!config_1.FEATURE_ASSISTANT_PROBLEM_UNITS_V1) {
return baseResult;
}
const assembled = (0, problemUnitAssembler_1.assembleProblemUnits)({
route,
result_type: baseResult.result_type,
evidence,
raw_entities: items,
summary,
risk_factors: baseResult.risk_factors,
selection_reason: baseResult.selection_reason,
business_interpretation: baseResult.business_interpretation
});
const graphBuild = config_1.FEATURE_ASSISTANT_GRAPH_RUNTIME_V1
? (0, stage4GraphRuntime_1.buildAccountingGraph)({
route,
candidateEvidence: assembled.candidate_evidence,
problemUnits: assembled.problem_units
})
: null;
const graphBindingByUnitId = new Map(graphBuild?.unit_bindings.map((item) => [item.problem_unit_id, item]) ?? []);
const graphBoundProblemUnits = assembled.problem_units.map((unit) => {
const binding = graphBindingByUnitId.get(unit.problem_unit_id);
if (!binding) {
return unit;
}
return {
...unit,
graph_binding: binding
};
});
const graphBoundSummary = graphBuild
? {
...assembled.problem_unit_summary,
graph_summary: graphBuild.summary
}
: assembled.problem_unit_summary;
let enrichedSummary = mergeSummaryWithProblemUnitMeta(summary, {
candidateEvidenceCount: assembled.candidate_evidence.length,
problemUnitsCount: graphBoundProblemUnits.length,
unitTypes: graphBoundSummary.unit_types,
duplicateCollapses: graphBoundSummary.duplicate_collapses,
severityDistribution: graphBoundSummary.severity_distribution,
confidenceDistribution: graphBoundSummary.confidence_distribution,
lifecycleEnrichedUnits: graphBoundSummary.lifecycle_enriched_units ?? 0,
lifecycleDomainDistribution: (graphBoundSummary.lifecycle_domain_distribution ?? {}),
lifecycleDefectDistribution: (graphBoundSummary.lifecycle_defect_distribution ?? {})
});
if (graphBuild) {
enrichedSummary = mergeSummaryWithGraphMeta(enrichedSummary, graphBuild.summary);
}
return {
...baseResult,
summary: enrichedSummary,
raw_entities: items,
candidate_evidence: assembled.candidate_evidence,
problem_units: graphBoundProblemUnits,
problem_unit_summary: graphBoundSummary,
...(graphBuild
? {
accounting_graph: graphBuild
}
: {})
};
}