diff --git a/llm_normalizer/backend/dist/services/assistantRuntimeGuards.js b/llm_normalizer/backend/dist/services/assistantRuntimeGuards.js index dce5949..d7e8388 100644 --- a/llm_normalizer/backend/dist/services/assistantRuntimeGuards.js +++ b/llm_normalizer/backend/dist/services/assistantRuntimeGuards.js @@ -536,6 +536,13 @@ function inferPrimaryWindowFromAnchor(anchor) { granularity: "month" }; } +function toTemporalGuardInput(window, fallback) { + if (window) { + return `${window.from}..${window.to}`; + } + const value = String(fallback ?? "").trim(); + return value || null; +} function resolveTemporalGuard(input) { const rawAnchorText = collectRawTemporalAnchorText(input.userMessage, input.companyAnchors); const julyAnchor = resolveJulyAnchor(rawAnchorText); @@ -543,11 +550,14 @@ function resolveTemporalGuard(input) { const reasonCodes = []; if (!julyAnchor.applyGuard) { const resolvedWindow = inferPrimaryWindowFromAnchor(normalizedAnchor.value); + const guardInput = toTemporalGuardInput(resolvedWindow, normalizedAnchor.value); return { raw_time_anchor: julyAnchor.raw, raw_time_scope: normalizedAnchor.value, resolved_time_anchor: normalizedAnchor.value, resolved_primary_period: resolvedWindow, + effective_primary_period: resolvedWindow, + temporal_guard_input: guardInput, temporal_alignment_status: normalizedAnchor.value ? "aligned" : "conflicting", temporal_resolution_source: normalizedAnchor.source, temporal_guard_basis: normalizedAnchor.value ? "raw_time_scope_unlocked" : "none", @@ -575,11 +585,16 @@ function resolveTemporalGuard(input) { reasonCodes.push("missing_time_anchor_under_snapshot_lock"); } const allowedContextWindow = buildAllowedContextWindow(julyAnchor.window); + const resolvedPrimaryPeriod = julyAnchor.window; + const effectivePrimaryPeriod = resolvedPrimaryPeriod ?? inferPrimaryWindowFromAnchor(julyAnchor.resolved ?? normalizedAnchor.value); + const guardInput = toTemporalGuardInput(effectivePrimaryPeriod, julyAnchor.resolved ?? normalizedAnchor.value); return { raw_time_anchor: julyAnchor.raw, raw_time_scope: normalizedAnchor.value, resolved_time_anchor: julyAnchor.resolved ?? normalizedAnchor.value, - resolved_primary_period: julyAnchor.window, + resolved_primary_period: resolvedPrimaryPeriod, + effective_primary_period: effectivePrimaryPeriod, + temporal_guard_input: guardInput, temporal_alignment_status: temporalAlignmentStatus, temporal_resolution_source: julyAnchor.source, temporal_guard_basis: julyAnchor.window ? "resolved_primary_period" : "none", @@ -597,9 +612,10 @@ function applyTemporalHintToExecutionPlan(executionPlan, temporal) { if (!temporal.temporal_guard_applied) { return executionPlan; } - const hint = temporal.primary_period_window?.granularity === "day" && temporal.resolved_time_anchor + const primaryWindow = temporal.effective_primary_period ?? temporal.primary_period_window; + const hint = primaryWindow?.granularity === "day" && temporal.resolved_time_anchor ? `primary period ${temporal.resolved_time_anchor}; controlled temporal expansion only for linked entities` - : `primary period July 2020 (${JULY_WINDOW.from}..${JULY_WINDOW.to}); controlled temporal expansion only for linked entities`; + : `primary period July 2020 (${primaryWindow?.from ?? JULY_WINDOW.from}..${primaryWindow?.to ?? JULY_WINDOW.to}); controlled temporal expansion only for linked entities`; return executionPlan.map((item) => { if (!item.should_execute) { return item; @@ -947,6 +963,9 @@ function withinAllowedContextWindow(normalizedPeriod, temporal) { } return normalizedPeriod >= temporal.allowed_context_window.from && normalizedPeriod <= temporal.allowed_context_window.to; } +function effectivePrimaryPeriodWindow(temporal) { + return temporal.effective_primary_period ?? temporal.primary_period_window; +} function evidenceAdmissibilityReasons(input) { const reasons = new Set(); if (input.evidence.limitation?.reason_code === "weak_source_mapping") { @@ -956,15 +975,16 @@ function evidenceAdmissibilityReasons(input) { reasons.add("zero_live_match"); } const period = extractEvidencePeriod(input.evidence); - if (period && input.temporal.primary_period_window) { + const primaryWindow = effectivePrimaryPeriodWindow(input.temporal); + if (period && primaryWindow) { const normalized = normalizeEvidenceDate(period); const expansionMeta = evidenceContextExpansionMeta(input.evidence); - if (normalized && !isPeriodWithinWindow(normalized, input.temporal.primary_period_window)) { + if (normalized && !isPeriodWithinWindow(normalized, primaryWindow)) { const insideAllowed = withinAllowedContextWindow(normalized, input.temporal); if (insideAllowed && expansionMeta.allowed && expansionMeta.reason) { // Allowed controlled temporal expansion: period is outside primary but linked and explained. } - else if (normalized > input.temporal.primary_period_window.to && !insideAllowed) { + else if (normalized > primaryWindow.to && !insideAllowed) { reasons.add("future_dated_or_out_of_window"); } else { @@ -1005,15 +1025,16 @@ function itemRejectReasons(input) { reasons.add("zero_live_match"); } const period = itemPeriod(input.item); - if (period && input.temporal.primary_period_window) { + const primaryWindow = effectivePrimaryPeriodWindow(input.temporal); + if (period && primaryWindow) { const normalized = normalizeEvidenceDate(period); const expansionMeta = itemContextExpansionMeta(input.item); - if (normalized && !isPeriodWithinWindow(normalized, input.temporal.primary_period_window)) { + if (normalized && !isPeriodWithinWindow(normalized, primaryWindow)) { const insideAllowed = withinAllowedContextWindow(normalized, input.temporal); if (insideAllowed && expansionMeta.allowed && expansionMeta.reason) { // Allowed controlled temporal expansion: period is outside primary but linked and explained. } - else if (normalized > input.temporal.primary_period_window.to && !insideAllowed) { + else if (normalized > primaryWindow.to && !insideAllowed) { reasons.add("future_dated_or_out_of_window"); } else { diff --git a/llm_normalizer/backend/dist/services/assistantService.js b/llm_normalizer/backend/dist/services/assistantService.js index 187f2c5..a4a31d9 100644 --- a/llm_normalizer/backend/dist/services/assistantService.js +++ b/llm_normalizer/backend/dist/services/assistantService.js @@ -197,6 +197,104 @@ function resolveBusinessScopeAlignment(input) { route_summary_resolved: resolvedSummary }; } +function isJuly2020TemporalResolved(temporalGuard) { + if (!temporalGuard || typeof temporalGuard !== "object") { + return false; + } + const resolvedAnchor = String(temporalGuard.resolved_time_anchor ?? "").trim(); + if (/^2020-07(?:-\d{2})?$/.test(resolvedAnchor)) { + return true; + } + const effective = temporalGuard.effective_primary_period && typeof temporalGuard.effective_primary_period === "object" + ? temporalGuard.effective_primary_period + : null; + if (effective) { + const from = String(effective.from ?? "").trim(); + const to = String(effective.to ?? "").trim(); + if (/^2020-07-\d{2}$/.test(from) && /^2020-07-\d{2}$/.test(to)) { + return true; + } + } + const resolvedPrimary = temporalGuard.resolved_primary_period && typeof temporalGuard.resolved_primary_period === "object" + ? temporalGuard.resolved_primary_period + : null; + if (!resolvedPrimary) { + return false; + } + const from = String(resolvedPrimary.from ?? "").trim(); + const to = String(resolvedPrimary.to ?? "").trim(); + return /^2020-07-\d{2}$/.test(from) && /^2020-07-\d{2}$/.test(to); +} +function hasP0ClaimSignal(claimType, focusDomainHint) { + const claim = String(claimType ?? "").trim(); + if (claim === "prove_settlement_closure_state" || + claim === "prove_advance_offset_state" || + claim === "prove_vat_chain_completeness" || + claim === "prove_month_close_state" || + claim === "prove_rbp_tail_state") { + return true; + } + return (focusDomainHint === "settlements_60_62" || + focusDomainHint === "vat_document_register_book" || + focusDomainHint === "month_close_costs_20_44" || + focusDomainHint === "fixed_asset_amortization"); +} +function resolveBusinessScopeFromLiveContext(input) { + const current = input.current; + const routeSummary = current?.route_summary_resolved; + const julyResolved = isJuly2020TemporalResolved(input.temporalGuard); + const p0Signal = hasP0ClaimSignal(input.claimType, input.focusDomainHint); + if (!julyResolved || !p0Signal) { + return current; + } + const reasons = Array.isArray(current.scope_resolution_reason) ? [...current.scope_resolution_reason] : []; + if (!reasons.includes("temporal_claim_bound_company_scope_recovery")) { + reasons.push("temporal_claim_bound_company_scope_recovery"); + } + const currentScopes = Array.isArray(current.business_scope_resolved) ? current.business_scope_resolved : []; + let changed = false; + const normalizedScopes = currentScopes + .map((item) => String(item ?? "").trim()) + .filter(Boolean) + .map((item) => { + if (item === "generic_accounting" || item === "unclear") { + changed = true; + return "company_specific_accounting"; + } + return item; + }); + if (!normalizedScopes.includes("company_specific_accounting")) { + normalizedScopes.push("company_specific_accounting"); + changed = true; + } + let routeSummaryResolved = routeSummary; + if (routeSummary && routeSummary.mode === "deterministic_v2" && Array.isArray(routeSummary.decisions)) { + const decisions = routeSummary.decisions.map((decision) => { + const scopeValue = String(decision.business_scope ?? "").trim(); + if (scopeValue !== "generic_accounting" && scopeValue !== "unclear") { + return decision; + } + changed = true; + return { + ...decision, + business_scope: "company_specific_accounting" + }; + }); + routeSummaryResolved = changed + ? { + ...routeSummary, + decisions + } + : routeSummary; + } + return { + ...current, + business_scope_resolved: Array.from(new Set(normalizedScopes)), + company_grounding_applied: current.company_grounding_applied || changed, + scope_resolution_reason: reasons, + route_summary_resolved: routeSummaryResolved + }; +} function escapeRegex(value) { return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); } @@ -1334,19 +1432,20 @@ class AssistantService { }; const normalized = await this.normalizerService.normalize(normalizePayload); const companyAnchors = (0, companyAnchorResolver_1.resolveCompanyAnchors)(userMessage); - const businessScopeResolution = resolveBusinessScopeAlignment({ + const initialBusinessScopeResolution = resolveBusinessScopeAlignment({ userMessage, companyAnchors, normalized: normalized.normalized, routeSummary: normalized.route_hint_summary }); - const resolvedRouteSummary = businessScopeResolution.route_summary_resolved; const inferredDomainByMessage = inferP0DomainFromMessage(userMessage); - const focusDomainForGuards = inferredDomainByMessage === "settlements_60_62" || - inferredDomainByMessage === "vat_document_register_book" || - inferredDomainByMessage === "month_close_costs_20_44" - ? inferredDomainByMessage - : null; + const focusDomainForGuards = inferredDomainByMessage === "fixed_asset_amortization" + ? "month_close_costs_20_44" + : inferredDomainByMessage === "settlements_60_62" || + inferredDomainByMessage === "vat_document_register_book" || + inferredDomainByMessage === "month_close_costs_20_44" + ? inferredDomainByMessage + : null; const temporalGuard = (0, assistantRuntimeGuards_1.resolveTemporalGuard)({ userMessage, normalized: normalized.normalized, @@ -1361,8 +1460,15 @@ class AssistantService { userMessage, companyAnchors, focusDomainHint: focusDomainForGuards, - primaryPeriod: temporalGuard.primary_period_window + primaryPeriod: temporalGuard.effective_primary_period ?? temporalGuard.primary_period_window }); + const businessScopeResolution = resolveBusinessScopeFromLiveContext({ + current: initialBusinessScopeResolution, + temporalGuard, + claimType: claimAnchorAudit.claim_type, + focusDomainHint: focusDomainForGuards + }); + const resolvedRouteSummary = businessScopeResolution.route_summary_resolved; const requirementExtraction = extractRequirements(resolvedRouteSummary, normalized.normalized, userMessage); let executionPlan = toExecutionPlan(resolvedRouteSummary, normalized.normalized, userMessage, requirementExtraction.byFragment); executionPlan = (0, assistantRuntimeGuards_1.applyTemporalHintToExecutionPlan)(executionPlan, temporalGuard); @@ -1538,10 +1644,13 @@ class AssistantService { business_scope_resolved: businessScopeResolution.business_scope_resolved, company_grounding_applied: businessScopeResolution.company_grounding_applied, scope_resolution_reason: businessScopeResolution.scope_resolution_reason, + company_scope_resolution_reason: businessScopeResolution.scope_resolution_reason, raw_time_anchor: temporalGuard.raw_time_anchor, raw_time_scope: temporalGuard.raw_time_scope, resolved_time_anchor: temporalGuard.resolved_time_anchor, resolved_primary_period: temporalGuard.resolved_primary_period, + effective_primary_period: temporalGuard.effective_primary_period, + temporal_guard_input: temporalGuard.temporal_guard_input, temporal_alignment_status: temporalGuard.temporal_alignment_status, temporal_resolution_source: temporalGuard.temporal_resolution_source, temporal_guard_basis: temporalGuard.temporal_guard_basis, @@ -1627,10 +1736,13 @@ class AssistantService { business_scope_resolved: businessScopeResolution.business_scope_resolved, company_grounding_applied: businessScopeResolution.company_grounding_applied, scope_resolution_reason: businessScopeResolution.scope_resolution_reason, + company_scope_resolution_reason: businessScopeResolution.scope_resolution_reason, raw_time_anchor: temporalGuard.raw_time_anchor, raw_time_scope: temporalGuard.raw_time_scope, resolved_time_anchor: temporalGuard.resolved_time_anchor, resolved_primary_period: temporalGuard.resolved_primary_period, + effective_primary_period: temporalGuard.effective_primary_period, + temporal_guard_input: temporalGuard.temporal_guard_input, temporal_alignment_status: temporalGuard.temporal_alignment_status, temporal_resolution_source: temporalGuard.temporal_resolution_source, temporal_guard_basis: temporalGuard.temporal_guard_basis, diff --git a/llm_normalizer/backend/scripts/wave19_2LiveReplayPack.js b/llm_normalizer/backend/scripts/wave19_2LiveReplayPack.js new file mode 100644 index 0000000..a6961e8 --- /dev/null +++ b/llm_normalizer/backend/scripts/wave19_2LiveReplayPack.js @@ -0,0 +1,534 @@ +const fs = require("node:fs"); +const path = require("node:path"); +const request = require("supertest"); + +const CASE_LABELS = [ + { case_id: "L1", label: "vat_chain_furniture_13_15_july", expected_mode: "grounded_or_stronger" }, + { case_id: "L2", label: "rbp_tail_31_july_5000", expected_mode: "limited_or_grounded" }, + { case_id: "L3", label: "fa_amortization_2471_2465_849", expected_mode: "limited_or_grounded" } +]; + +function ensureDir(dirPath) { + fs.mkdirSync(dirPath, { recursive: true }); +} + +function writeJson(filePath, payload) { + ensureDir(path.dirname(filePath)); + fs.writeFileSync(filePath, `${JSON.stringify(payload, null, 2)}\n`, "utf8"); +} + +function writeText(filePath, text) { + ensureDir(path.dirname(filePath)); + fs.writeFileSync(filePath, text, "utf8"); +} + +function ratio(numerator, denominator) { + if (!Number.isFinite(numerator) || !Number.isFinite(denominator) || denominator <= 0) { + return 0; + } + return Number((numerator / denominator).toFixed(4)); +} + +function parseConversationSections(markdown) { + const lines = String(markdown ?? "").split(/\r?\n/); + const sections = []; + let current = null; + let mode = "idle"; + + function pushCurrent() { + if (!current) { + return; + } + sections.push({ + role: current.role, + index: current.index, + metadata: current.metadata, + body: current.body.join("\n").trim() + }); + current = null; + mode = "idle"; + } + + for (const line of lines) { + const heading = line.match(/^##\s+(\d+)\.\s+(user|assistant)\s*$/i); + if (heading) { + pushCurrent(); + current = { + index: Number(heading[1]), + role: String(heading[2]).toLowerCase(), + metadata: {}, + body: [] + }; + mode = "meta"; + continue; + } + if (!current) { + continue; + } + if (mode === "meta") { + if (line.trim() === "") { + mode = "body"; + continue; + } + const meta = line.match(/^([a-zA-Z0-9_]+):\s*(.*)$/); + if (meta) { + current.metadata[meta[1]] = meta[2]; + } + continue; + } + if (/^###\s+technical_debug_payload_json\s*$/i.test(line)) { + pushCurrent(); + continue; + } + current.body.push(line); + } + pushCurrent(); + return sections; +} + +function isJuly2020Period(period) { + if (!period || typeof period !== "object") { + return false; + } + const from = String(period.from ?? "").trim(); + const to = String(period.to ?? "").trim(); + return /^2020-07-\d{2}$/.test(from) && /^2020-07-\d{2}$/.test(to); +} + +function extractLiveCalls(debug) { + const rows = []; + const retrievalResults = Array.isArray(debug?.retrieval_results) ? debug.retrieval_results : []; + for (const result of retrievalResults) { + const summary = result?.summary ?? {}; + const live = summary?.live_mcp; + if (!live || typeof live !== "object") { + continue; + } + rows.push({ + fragment_id: result?.fragment_id ?? null, + route: result?.route ?? null, + method: String(live.method ?? "execute_query"), + args_summary: live.args ?? null, + query_subject: String(live.query_subject ?? summary.query_subject ?? ""), + account_scope: Array.isArray(live.account_scope) ? live.account_scope : [], + fetched_rows: Number(live.fetched_rows ?? 0), + returned_rows: Number(live.returned_rows ?? 0), + matched_rows: Number(live.matched_rows ?? 0), + status: String(live.status ?? "unknown") + }); + } + return rows; +} + +function contradictionFlags(debug) { + const temporal = debug?.temporal_guard ?? {}; + const eligibility = debug?.grounded_answer_eligibility_guard ?? {}; + const effective = temporal.effective_primary_period ?? debug?.effective_primary_period ?? null; + const temporalOutcome = String(temporal.temporal_guard_outcome ?? ""); + const hasJulyEffective = isJuly2020Period(effective); + const basisMismatch = + String(temporal.temporal_guard_basis ?? "") !== String(eligibility.eligibility_time_basis ?? ""); + const failedUnderJuly = hasJulyEffective && temporalOutcome === "failed_out_of_snapshot_window"; + return { + has_july_effective_primary_period: hasJulyEffective, + temporal_guard_outcome: temporalOutcome, + temporal_basis_mismatch: basisMismatch, + failed_under_july_effective_period: failedUnderJuly, + contradiction: failedUnderJuly || basisMismatch + }; +} + +function claimPathCompleted(debug) { + const targeted = debug?.targeted_evidence_acquisition ?? {}; + const hitRate = Number(targeted.targeted_evidence_hit_rate ?? 0); + const checkStatus = targeted.check_status && typeof targeted.check_status === "object" ? targeted.check_status : {}; + const hasFound = Object.values(checkStatus).some((value) => String(value) === "found"); + return hitRate > 0 && hasFound; +} + +function composeLiveReplayExport(session, caseRows, generatedAtIso) { + const lines = []; + lines.push("# Assistant conversation export"); + lines.push(`session_id: ${session.session_id}`); + lines.push("export_mode: technical"); + lines.push(`exported_at: ${generatedAtIso}`); + lines.push(""); + + let sectionIndex = 1; + for (const row of caseRows) { + const user = row.userItem; + const assistant = row.assistantItem; + lines.push(`## ${sectionIndex}. user`); + lines.push(`message_id: ${user.message_id}`); + lines.push(`created_at: ${user.created_at}`); + lines.push("reply_type: n/a"); + lines.push(""); + lines.push(user.text); + lines.push(""); + sectionIndex += 1; + + lines.push(`## ${sectionIndex}. assistant`); + lines.push(`message_id: ${assistant.message_id}`); + lines.push(`created_at: ${assistant.created_at}`); + lines.push(`reply_type: ${assistant.reply_type}`); + lines.push(`trace_id: ${assistant.trace_id}`); + lines.push(""); + lines.push(assistant.text); + lines.push(""); + lines.push("### technical_debug_payload_json"); + lines.push("```json"); + lines.push(JSON.stringify(assistant.debug ?? {}, null, 2)); + lines.push("```"); + lines.push(""); + sectionIndex += 1; + } + return lines.join("\n"); +} + +async function main() { + const runDir = process.argv[2]; + const sourceFile = process.argv[3]; + if (!runDir || !sourceFile) { + throw new Error("Usage: node wave19_2LiveReplayPack.js "); + } + + const sourceText = fs.readFileSync(sourceFile, "utf8"); + const sections = parseConversationSections(sourceText); + const userSections = sections.filter((item) => item.role === "user"); + const assistantSections = sections.filter((item) => item.role === "assistant"); + const userMessages = userSections.map((item) => item.body).filter((item) => item.length > 0).slice(0, 3); + if (userMessages.length < 3) { + throw new Error(`Expected at least 3 user messages in source file, got ${userMessages.length}`); + } + + const baselinePartialCoverage = assistantSections + .slice(0, 3) + .filter((item) => String(item.metadata.reply_type ?? "") === "partial_coverage").length; + const baselinePartialCoverageRate = ratio(baselinePartialCoverage, Math.max(1, Math.min(3, assistantSections.length))); + + process.env.FEATURE_ASSISTANT_MCP_RUNTIME_V1 = "1"; + const { createApp } = require("../dist/server.js"); + const app = createApp(); + const sessionId = `asst-wave19_2-${Date.now()}`; + + const replayRows = []; + for (let i = 0; i < 3; i += 1) { + const message = userMessages[i]; + const response = await request(app).post("/api/assistant/message").send({ + session_id: sessionId, + useMock: true, + promptVersion: "normalizer_v2_0_2", + user_message: message + }); + if (response.status !== 200) { + throw new Error(`Replay case ${i + 1} failed with status=${response.status}`); + } + const debug = response.body?.debug ?? {}; + const eligibility = debug?.grounded_answer_eligibility_guard ?? {}; + const admissibility = debug?.evidence_admissibility_gate ?? {}; + const temporal = debug?.temporal_guard ?? {}; + const targeted = debug?.targeted_evidence_acquisition ?? {}; + const calls = extractLiveCalls(debug); + const contradiction = contradictionFlags(debug); + + replayRows.push({ + ...CASE_LABELS[i], + user_message: message, + reply_type: String(response.body?.reply_type ?? ""), + assistant_reply: String(response.body?.assistant_reply ?? ""), + trace_id: String(response.body?.trace_id ?? debug?.trace_id ?? ""), + business_scope_raw: Array.isArray(debug?.business_scope_raw) ? debug.business_scope_raw : [], + business_scope_resolved: Array.isArray(debug?.business_scope_resolved) ? debug.business_scope_resolved : [], + company_scope_resolution_reason: Array.isArray(debug?.company_scope_resolution_reason) + ? debug.company_scope_resolution_reason + : Array.isArray(debug?.scope_resolution_reason) + ? debug.scope_resolution_reason + : [], + raw_time_scope: temporal?.raw_time_scope ?? null, + resolved_time_anchor: temporal?.resolved_time_anchor ?? null, + effective_primary_period: temporal?.effective_primary_period ?? null, + temporal_guard_input: temporal?.temporal_guard_input ?? null, + temporal_guard_outcome: temporal?.temporal_guard_outcome ?? null, + eligibility_time_basis: eligibility?.eligibility_time_basis ?? null, + temporal_guard_basis: temporal?.temporal_guard_basis ?? null, + contradiction, + claim_type: debug?.claim_anchor_audit?.claim_type ?? null, + claim_anchor_resolution_rate: Number(debug?.claim_anchor_audit?.claim_anchor_resolution_rate ?? 0), + targeted_evidence_hit_rate: Number(targeted?.targeted_evidence_hit_rate ?? 0), + admissible_evidence_count: Number(admissibility?.admissible_evidence_count ?? 0), + reject_breakdown: admissibility?.reject_breakdown ?? null, + eligibility: { + eligible: Boolean(eligibility?.eligible), + grounding_mode: String(eligibility?.grounding_mode ?? ""), + outcome: String(eligibility?.outcome ?? ""), + reason_codes: Array.isArray(eligibility?.reason_codes) ? eligibility.reason_codes : [] + }, + live_calls: calls, + debug + }); + } + + const sessionResponse = await request(app).get(`/api/assistant/session/${sessionId}`); + if (sessionResponse.status !== 200) { + throw new Error(`Failed to load replay session: status=${sessionResponse.status}`); + } + const session = sessionResponse.body?.session; + const sessionItems = Array.isArray(session?.items) ? session.items : []; + + const userItems = sessionItems.filter((item) => item?.role === "user"); + const assistantItems = sessionItems.filter((item) => item?.role === "assistant"); + const caseRowsWithItems = replayRows.map((row, index) => ({ + ...row, + userItem: userItems[index] ?? { + message_id: `user-${index + 1}`, + created_at: new Date().toISOString(), + text: row.user_message + }, + assistantItem: assistantItems[index] ?? { + message_id: `assistant-${index + 1}`, + created_at: new Date().toISOString(), + text: row.assistant_reply, + reply_type: row.reply_type, + trace_id: row.trace_id, + debug: row.debug + } + })); + + const contradictionCount = replayRows.filter((row) => row.contradiction.contradiction).length; + const scopeResolvedCount = replayRows.filter((row) => row.business_scope_resolved.includes("company_specific_accounting")).length; + const admissibleNonZeroCount = replayRows.filter((row) => row.admissible_evidence_count > 0).length; + const partialCoverageCount = replayRows.filter((row) => row.reply_type === "partial_coverage").length; + const claimPathCompletedCount = replayRows.filter((row) => claimPathCompleted(row.debug)).length; + const falseGroundedCount = replayRows.filter( + (row) => row.eligibility.grounding_mode === "grounded_positive" && row.admissible_evidence_count <= 0 + ).length; + + const metrics = { + case_count: replayRows.length, + baseline_partial_coverage_default_rate: baselinePartialCoverageRate, + live_temporal_contradiction_rate: ratio(contradictionCount, replayRows.length), + live_company_scope_resolution_rate: ratio(scopeResolvedCount, replayRows.length), + live_admissible_evidence_nonzero_rate: ratio(admissibleNonZeroCount, replayRows.length), + live_partial_coverage_default_rate: ratio(partialCoverageCount, replayRows.length), + live_claim_path_completion_rate: ratio(claimPathCompletedCount, replayRows.length), + live_false_grounded_answer_rate: ratio(falseGroundedCount, replayRows.length) + }; + + const thresholds = { + live_temporal_contradiction_rate: 0, + live_company_scope_resolution_rate: 1, + live_false_grounded_answer_rate: 0, + live_admissible_evidence_nonzero_min_cases: 2 + }; + + const temporalFixed = metrics.live_temporal_contradiction_rate <= thresholds.live_temporal_contradiction_rate; + const companyScopeFixed = metrics.live_company_scope_resolution_rate >= thresholds.live_company_scope_resolution_rate; + const evidencePathFixed = + admissibleNonZeroCount >= thresholds.live_admissible_evidence_nonzero_min_cases && + metrics.live_false_grounded_answer_rate <= thresholds.live_false_grounded_answer_rate; + const partialReduced = metrics.live_partial_coverage_default_rate < metrics.baseline_partial_coverage_default_rate; + + let overallStatus = "WAVE19_2_NOT_ACCEPTED"; + if (temporalFixed && companyScopeFixed && evidencePathFixed && partialReduced) { + overallStatus = "WAVE19_2_ACCEPTED"; + } else if (temporalFixed && companyScopeFixed && metrics.live_false_grounded_answer_rate <= 0) { + overallStatus = "WAVE19_2_ACCEPTED_WITH_LIMITATIONS"; + } + + const runSummary = { + run_id: path.basename(runDir), + stage: "Stage_04", + wave: "Wave_19_2", + scope: "live_runtime_fix_by_replay_1txt", + source_of_truth: sourceFile, + execution: { + replay_mode: "exact_questions_from_1_txt", + runtime_path: "assistant_message_with_mcp_runtime_on", + normalizer_mode: "useMock=true", + session_id: sessionId + }, + thresholds, + metrics, + verdicts: { + LIVE_TEMPORAL_ALIGNMENT_FIXED: temporalFixed ? "FIXED" : "NOT_FIXED", + LIVE_COMPANY_SCOPE_FIXED: companyScopeFixed ? "FIXED" : "NOT_FIXED", + LIVE_EVIDENCE_PATH_FIXED: evidencePathFixed ? "FIXED" : "NOT_FIXED", + LIVE_PARTIAL_COVERAGE_DEFAULT_REDUCED: partialReduced ? "REDUCED" : "NOT_REDUCED", + overall_status: overallStatus + } + }; + writeJson(path.join(runDir, "run_summary.json"), runSummary); + + const temporalAudit = { + generated_at: new Date().toISOString(), + cases: replayRows.map((row) => ({ + case_id: row.case_id, + label: row.label, + raw_time_scope: row.raw_time_scope, + resolved_time_anchor: row.resolved_time_anchor, + effective_primary_period: row.effective_primary_period, + temporal_guard_input: row.temporal_guard_input, + temporal_guard_basis: row.temporal_guard_basis, + eligibility_time_basis: row.eligibility_time_basis, + temporal_guard_outcome: row.temporal_guard_outcome, + contradiction: row.contradiction + })), + metric: { + live_temporal_contradiction_rate: metrics.live_temporal_contradiction_rate + } + }; + writeJson(path.join(runDir, "temporal_contradiction_audit.json"), temporalAudit); + + const scopeAudit = { + generated_at: new Date().toISOString(), + cases: replayRows.map((row) => ({ + case_id: row.case_id, + label: row.label, + business_scope_raw: row.business_scope_raw, + business_scope_resolved: row.business_scope_resolved, + company_scope_resolution_reason: row.company_scope_resolution_reason + })), + metric: { + live_company_scope_resolution_rate: metrics.live_company_scope_resolution_rate + } + }; + writeJson(path.join(runDir, "business_scope_resolution_audit.json"), scopeAudit); + + const mcpToEvidence = { + generated_at: new Date().toISOString(), + cases: replayRows.map((row) => ({ + case_id: row.case_id, + label: row.label, + claim_type: row.claim_type, + admissible_evidence_count: row.admissible_evidence_count, + live_calls: row.live_calls, + claim_targeted_hit_rate: row.targeted_evidence_hit_rate, + eligibility: row.eligibility + })) + }; + writeJson(path.join(runDir, "live_mcp_to_evidence_handoff.json"), mcpToEvidence); + + const rejectBreakdown = { + generated_at: new Date().toISOString(), + aggregate: replayRows.reduce( + (acc, row) => { + const breakdown = row.reject_breakdown && typeof row.reject_breakdown === "object" ? row.reject_breakdown : {}; + for (const key of Object.keys(acc)) { + acc[key] += Number(breakdown[key] ?? 0); + } + return acc; + }, + { + wrong_period: 0, + wrong_domain: 0, + wrong_account_scope: 0, + weak_source_mapping: 0, + zero_live_match: 0, + future_dated_or_out_of_window: 0 + } + ), + cases: replayRows.map((row) => ({ + case_id: row.case_id, + label: row.label, + reject_breakdown: row.reject_breakdown + })) + }; + writeJson(path.join(runDir, "admissibility_reject_breakdown_live.json"), rejectBreakdown); + + for (const row of replayRows) { + writeJson(path.join(runDir, "debug_payloads", `${row.case_id}_${row.label}.json`), { + case_id: row.case_id, + label: row.label, + trace_id: row.trace_id, + reply_type: row.reply_type, + debug: row.debug + }); + } + + const caseMatrixLines = []; + caseMatrixLines.push("# Live Case Matrix"); + caseMatrixLines.push(""); + caseMatrixLines.push("| Case | Label | Reply | Claim Type | Admissible Evidence | Grounding Mode | Scope | Temporal |"); + caseMatrixLines.push("| --- | --- | --- | --- | ---: | --- | --- | --- |"); + for (const row of replayRows) { + caseMatrixLines.push( + `| ${row.case_id} | ${row.label} | ${row.reply_type} | ${row.claim_type ?? "n/a"} | ${row.admissible_evidence_count} | ${row.eligibility.grounding_mode} | ${row.business_scope_resolved.join(", ") || "n/a"} | ${row.temporal_guard_outcome} |` + ); + } + writeText(path.join(runDir, "live_case_matrix.md"), `${caseMatrixLines.join("\n")}\n`); + + const replayReportLines = []; + replayReportLines.push("# Live Replay Report (Wave 19.2)"); + replayReportLines.push(""); + replayReportLines.push("## Source"); + replayReportLines.push(`- Source of truth replayed from: \`${sourceFile}\``); + replayReportLines.push("- Replayed exactly 3 user turns from the original export."); + replayReportLines.push("- Runtime path: MCP ON, useMock=true."); + replayReportLines.push(""); + replayReportLines.push("## Metrics"); + replayReportLines.push(`- live_temporal_contradiction_rate: ${metrics.live_temporal_contradiction_rate}`); + replayReportLines.push(`- live_company_scope_resolution_rate: ${metrics.live_company_scope_resolution_rate}`); + replayReportLines.push(`- live_admissible_evidence_nonzero_rate: ${metrics.live_admissible_evidence_nonzero_rate}`); + replayReportLines.push(`- live_partial_coverage_default_rate: ${metrics.live_partial_coverage_default_rate}`); + replayReportLines.push(`- baseline_partial_coverage_default_rate: ${metrics.baseline_partial_coverage_default_rate}`); + replayReportLines.push(`- live_claim_path_completion_rate: ${metrics.live_claim_path_completion_rate}`); + replayReportLines.push(`- live_false_grounded_answer_rate: ${metrics.live_false_grounded_answer_rate}`); + replayReportLines.push(""); + replayReportLines.push("## Verdict"); + replayReportLines.push(`- LIVE_TEMPORAL_ALIGNMENT_FIXED: ${temporalFixed ? "FIXED" : "NOT_FIXED"}`); + replayReportLines.push(`- LIVE_COMPANY_SCOPE_FIXED: ${companyScopeFixed ? "FIXED" : "NOT_FIXED"}`); + replayReportLines.push(`- LIVE_EVIDENCE_PATH_FIXED: ${evidencePathFixed ? "FIXED" : "NOT_FIXED"}`); + replayReportLines.push(`- LIVE_PARTIAL_COVERAGE_DEFAULT_REDUCED: ${partialReduced ? "REDUCED" : "NOT_REDUCED"}`); + replayReportLines.push(`- Overall: ${overallStatus}`); + replayReportLines.push(""); + writeText(path.join(runDir, "live_replay_report.md"), `${replayReportLines.join("\n")}\n`); + + const chatExportLines = []; + chatExportLines.push("# Chat Export Live Replay"); + chatExportLines.push(""); + for (const row of replayRows) { + chatExportLines.push(`## ${row.case_id} | ${row.label}`); + chatExportLines.push(`user: ${row.user_message}`); + chatExportLines.push(`assistant(reply_type=${row.reply_type}, trace_id=${row.trace_id}): ${row.assistant_reply.replace(/\s+/g, " ").trim()}`); + chatExportLines.push(""); + } + writeText(path.join(runDir, "chat_export_live_replay.md"), `${chatExportLines.join("\n")}\n`); + + const generatedAtIso = new Date().toISOString(); + const liveReplayTxt = composeLiveReplayExport(session, caseRowsWithItems, generatedAtIso); + writeText(path.join(runDir, "1_live_replay.txt"), liveReplayTxt); + + const readme = [ + "# Stage 4 / Wave 19.2 - Live Runtime Fix by Replay 1.txt", + "", + "## What was run", + "- Source-of-truth replay from original `1.txt` user turns.", + "- MCP runtime ON (`FEATURE_ASSISTANT_MCP_RUNTIME_V1=1`).", + "- Normalizer in `useMock=true` mode.", + "", + "## Produced artifacts", + "- run_summary.json", + "- live_replay_report.md", + "- live_case_matrix.md", + "- business_scope_resolution_audit.json", + "- temporal_contradiction_audit.json", + "- live_mcp_to_evidence_handoff.json", + "- admissibility_reject_breakdown_live.json", + "- chat_export_live_replay.md", + "- debug_payloads/", + "- 1_live_replay.txt", + "", + "## Final verdict", + `- LIVE_TEMPORAL_ALIGNMENT_FIXED: ${temporalFixed ? "FIXED" : "NOT_FIXED"}`, + `- LIVE_COMPANY_SCOPE_FIXED: ${companyScopeFixed ? "FIXED" : "NOT_FIXED"}`, + `- LIVE_EVIDENCE_PATH_FIXED: ${evidencePathFixed ? "FIXED" : "NOT_FIXED"}`, + `- LIVE_PARTIAL_COVERAGE_DEFAULT_REDUCED: ${partialReduced ? "REDUCED" : "NOT_REDUCED"}`, + `- Overall: ${overallStatus}` + ].join("\n"); + writeText(path.join(runDir, "README.md"), `${readme}\n`); +} + +main().catch((error) => { + process.stderr.write(`${error instanceof Error ? error.stack || error.message : String(error)}\n`); + process.exit(1); +}); diff --git a/llm_normalizer/backend/src/services/assistantRuntimeGuards.ts b/llm_normalizer/backend/src/services/assistantRuntimeGuards.ts index 3ed1f33..8873bec 100644 --- a/llm_normalizer/backend/src/services/assistantRuntimeGuards.ts +++ b/llm_normalizer/backend/src/services/assistantRuntimeGuards.ts @@ -576,6 +576,8 @@ export interface TemporalGuardAudit { raw_time_scope: string | null; resolved_time_anchor: string | null; resolved_primary_period: TemporalWindow | null; + effective_primary_period: TemporalWindow | null; + temporal_guard_input: string | null; temporal_alignment_status: TemporalAlignmentStatus; temporal_resolution_source: string; temporal_guard_basis: "resolved_primary_period" | "raw_time_scope_unlocked" | "none"; @@ -620,6 +622,14 @@ function inferPrimaryWindowFromAnchor(anchor: string | null): TemporalWindow | n }; } +function toTemporalGuardInput(window: TemporalWindow | null, fallback: string | null): string | null { + if (window) { + return `${window.from}..${window.to}`; + } + const value = String(fallback ?? "").trim(); + return value || null; +} + export function resolveTemporalGuard(input: { userMessage: string; normalized: NormalizedPayload | null | undefined; @@ -631,11 +641,14 @@ export function resolveTemporalGuard(input: { const reasonCodes: string[] = []; if (!julyAnchor.applyGuard) { const resolvedWindow = inferPrimaryWindowFromAnchor(normalizedAnchor.value); + const guardInput = toTemporalGuardInput(resolvedWindow, normalizedAnchor.value); return { raw_time_anchor: julyAnchor.raw, raw_time_scope: normalizedAnchor.value, resolved_time_anchor: normalizedAnchor.value, resolved_primary_period: resolvedWindow, + effective_primary_period: resolvedWindow, + temporal_guard_input: guardInput, temporal_alignment_status: normalizedAnchor.value ? "aligned" : "conflicting", temporal_resolution_source: normalizedAnchor.source, temporal_guard_basis: normalizedAnchor.value ? "raw_time_scope_unlocked" : "none", @@ -662,11 +675,16 @@ export function resolveTemporalGuard(input: { reasonCodes.push("missing_time_anchor_under_snapshot_lock"); } const allowedContextWindow = buildAllowedContextWindow(julyAnchor.window); + const resolvedPrimaryPeriod = julyAnchor.window; + const effectivePrimaryPeriod = resolvedPrimaryPeriod ?? inferPrimaryWindowFromAnchor(julyAnchor.resolved ?? normalizedAnchor.value); + const guardInput = toTemporalGuardInput(effectivePrimaryPeriod, julyAnchor.resolved ?? normalizedAnchor.value); return { raw_time_anchor: julyAnchor.raw, raw_time_scope: normalizedAnchor.value, resolved_time_anchor: julyAnchor.resolved ?? normalizedAnchor.value, - resolved_primary_period: julyAnchor.window, + resolved_primary_period: resolvedPrimaryPeriod, + effective_primary_period: effectivePrimaryPeriod, + temporal_guard_input: guardInput, temporal_alignment_status: temporalAlignmentStatus, temporal_resolution_source: julyAnchor.source, temporal_guard_basis: julyAnchor.window ? "resolved_primary_period" : "none", @@ -690,10 +708,11 @@ export function applyTemporalHintToExecutionPlan< if (!temporal.temporal_guard_applied) { return executionPlan; } + const primaryWindow = temporal.effective_primary_period ?? temporal.primary_period_window; const hint = - temporal.primary_period_window?.granularity === "day" && temporal.resolved_time_anchor + primaryWindow?.granularity === "day" && temporal.resolved_time_anchor ? `primary period ${temporal.resolved_time_anchor}; controlled temporal expansion only for linked entities` - : `primary period July 2020 (${JULY_WINDOW.from}..${JULY_WINDOW.to}); controlled temporal expansion only for linked entities`; + : `primary period July 2020 (${primaryWindow?.from ?? JULY_WINDOW.from}..${primaryWindow?.to ?? JULY_WINDOW.to}); controlled temporal expansion only for linked entities`; return executionPlan.map((item) => { if (!item.should_execute) { return item; @@ -1145,6 +1164,10 @@ function withinAllowedContextWindow(normalizedPeriod: string, temporal: Temporal return normalizedPeriod >= temporal.allowed_context_window.from && normalizedPeriod <= temporal.allowed_context_window.to; } +function effectivePrimaryPeriodWindow(temporal: TemporalGuardAudit): TemporalWindow | null { + return temporal.effective_primary_period ?? temporal.primary_period_window; +} + function evidenceAdmissibilityReasons(input: { evidence: EvidenceItem; temporal: TemporalGuardAudit; @@ -1160,14 +1183,15 @@ function evidenceAdmissibilityReasons(input: { reasons.add("zero_live_match"); } const period = extractEvidencePeriod(input.evidence); - if (period && input.temporal.primary_period_window) { + const primaryWindow = effectivePrimaryPeriodWindow(input.temporal); + if (period && primaryWindow) { const normalized = normalizeEvidenceDate(period); const expansionMeta = evidenceContextExpansionMeta(input.evidence); - if (normalized && !isPeriodWithinWindow(normalized, input.temporal.primary_period_window)) { + if (normalized && !isPeriodWithinWindow(normalized, primaryWindow)) { const insideAllowed = withinAllowedContextWindow(normalized, input.temporal); if (insideAllowed && expansionMeta.allowed && expansionMeta.reason) { // Allowed controlled temporal expansion: period is outside primary but linked and explained. - } else if (normalized > input.temporal.primary_period_window.to && !insideAllowed) { + } else if (normalized > primaryWindow.to && !insideAllowed) { reasons.add("future_dated_or_out_of_window"); } else { reasons.add("wrong_period"); @@ -1217,14 +1241,15 @@ function itemRejectReasons(input: { reasons.add("zero_live_match"); } const period = itemPeriod(input.item); - if (period && input.temporal.primary_period_window) { + const primaryWindow = effectivePrimaryPeriodWindow(input.temporal); + if (period && primaryWindow) { const normalized = normalizeEvidenceDate(period); const expansionMeta = itemContextExpansionMeta(input.item); - if (normalized && !isPeriodWithinWindow(normalized, input.temporal.primary_period_window)) { + if (normalized && !isPeriodWithinWindow(normalized, primaryWindow)) { const insideAllowed = withinAllowedContextWindow(normalized, input.temporal); if (insideAllowed && expansionMeta.allowed && expansionMeta.reason) { // Allowed controlled temporal expansion: period is outside primary but linked and explained. - } else if (normalized > input.temporal.primary_period_window.to && !insideAllowed) { + } else if (normalized > primaryWindow.to && !insideAllowed) { reasons.add("future_dated_or_out_of_window"); } else { reasons.add("wrong_period"); diff --git a/llm_normalizer/backend/src/services/assistantService.ts b/llm_normalizer/backend/src/services/assistantService.ts index bd2a3a3..057d3ca 100644 --- a/llm_normalizer/backend/src/services/assistantService.ts +++ b/llm_normalizer/backend/src/services/assistantService.ts @@ -159,6 +159,104 @@ function resolveBusinessScopeAlignment(input) { route_summary_resolved: resolvedSummary }; } +function isJuly2020TemporalResolved(temporalGuard) { + if (!temporalGuard || typeof temporalGuard !== "object") { + return false; + } + const resolvedAnchor = String(temporalGuard.resolved_time_anchor ?? "").trim(); + if (/^2020-07(?:-\d{2})?$/.test(resolvedAnchor)) { + return true; + } + const effective = temporalGuard.effective_primary_period && typeof temporalGuard.effective_primary_period === "object" + ? temporalGuard.effective_primary_period + : null; + if (effective) { + const from = String(effective.from ?? "").trim(); + const to = String(effective.to ?? "").trim(); + if (/^2020-07-\d{2}$/.test(from) && /^2020-07-\d{2}$/.test(to)) { + return true; + } + } + const resolvedPrimary = temporalGuard.resolved_primary_period && typeof temporalGuard.resolved_primary_period === "object" + ? temporalGuard.resolved_primary_period + : null; + if (!resolvedPrimary) { + return false; + } + const from = String(resolvedPrimary.from ?? "").trim(); + const to = String(resolvedPrimary.to ?? "").trim(); + return /^2020-07-\d{2}$/.test(from) && /^2020-07-\d{2}$/.test(to); +} +function hasP0ClaimSignal(claimType, focusDomainHint) { + const claim = String(claimType ?? "").trim(); + if (claim === "prove_settlement_closure_state" || + claim === "prove_advance_offset_state" || + claim === "prove_vat_chain_completeness" || + claim === "prove_month_close_state" || + claim === "prove_rbp_tail_state") { + return true; + } + return (focusDomainHint === "settlements_60_62" || + focusDomainHint === "vat_document_register_book" || + focusDomainHint === "month_close_costs_20_44" || + focusDomainHint === "fixed_asset_amortization"); +} +function resolveBusinessScopeFromLiveContext(input) { + const current = input.current; + const routeSummary = current?.route_summary_resolved; + const julyResolved = isJuly2020TemporalResolved(input.temporalGuard); + const p0Signal = hasP0ClaimSignal(input.claimType, input.focusDomainHint); + if (!julyResolved || !p0Signal) { + return current; + } + const reasons = Array.isArray(current.scope_resolution_reason) ? [...current.scope_resolution_reason] : []; + if (!reasons.includes("temporal_claim_bound_company_scope_recovery")) { + reasons.push("temporal_claim_bound_company_scope_recovery"); + } + const currentScopes = Array.isArray(current.business_scope_resolved) ? current.business_scope_resolved : []; + let changed = false; + const normalizedScopes = currentScopes + .map((item) => String(item ?? "").trim()) + .filter(Boolean) + .map((item) => { + if (item === "generic_accounting" || item === "unclear") { + changed = true; + return "company_specific_accounting"; + } + return item; + }); + if (!normalizedScopes.includes("company_specific_accounting")) { + normalizedScopes.push("company_specific_accounting"); + changed = true; + } + let routeSummaryResolved = routeSummary; + if (routeSummary && routeSummary.mode === "deterministic_v2" && Array.isArray(routeSummary.decisions)) { + const decisions = routeSummary.decisions.map((decision) => { + const scopeValue = String(decision.business_scope ?? "").trim(); + if (scopeValue !== "generic_accounting" && scopeValue !== "unclear") { + return decision; + } + changed = true; + return { + ...decision, + business_scope: "company_specific_accounting" + }; + }); + routeSummaryResolved = changed + ? { + ...routeSummary, + decisions + } + : routeSummary; + } + return { + ...current, + business_scope_resolved: Array.from(new Set(normalizedScopes)), + company_grounding_applied: current.company_grounding_applied || changed, + scope_resolution_reason: reasons, + route_summary_resolved: routeSummaryResolved + }; +} function escapeRegex(value) { return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); } @@ -1296,19 +1394,20 @@ export class AssistantService { }; const normalized = await this.normalizerService.normalize(normalizePayload); const companyAnchors = (0, companyAnchorResolver_1.resolveCompanyAnchors)(userMessage); - const businessScopeResolution = resolveBusinessScopeAlignment({ + const initialBusinessScopeResolution = resolveBusinessScopeAlignment({ userMessage, companyAnchors, normalized: normalized.normalized, routeSummary: normalized.route_hint_summary }); - const resolvedRouteSummary = businessScopeResolution.route_summary_resolved; const inferredDomainByMessage = inferP0DomainFromMessage(userMessage); - const focusDomainForGuards = inferredDomainByMessage === "settlements_60_62" || - inferredDomainByMessage === "vat_document_register_book" || - inferredDomainByMessage === "month_close_costs_20_44" - ? inferredDomainByMessage - : null; + const focusDomainForGuards = inferredDomainByMessage === "fixed_asset_amortization" + ? "month_close_costs_20_44" + : inferredDomainByMessage === "settlements_60_62" || + inferredDomainByMessage === "vat_document_register_book" || + inferredDomainByMessage === "month_close_costs_20_44" + ? inferredDomainByMessage + : null; const temporalGuard = (0, assistantRuntimeGuards_1.resolveTemporalGuard)({ userMessage, normalized: normalized.normalized, @@ -1323,8 +1422,15 @@ export class AssistantService { userMessage, companyAnchors, focusDomainHint: focusDomainForGuards, - primaryPeriod: temporalGuard.primary_period_window + primaryPeriod: temporalGuard.effective_primary_period ?? temporalGuard.primary_period_window }); + const businessScopeResolution = resolveBusinessScopeFromLiveContext({ + current: initialBusinessScopeResolution, + temporalGuard, + claimType: claimAnchorAudit.claim_type, + focusDomainHint: focusDomainForGuards + }); + const resolvedRouteSummary = businessScopeResolution.route_summary_resolved; const requirementExtraction = extractRequirements(resolvedRouteSummary, normalized.normalized, userMessage); let executionPlan = toExecutionPlan(resolvedRouteSummary, normalized.normalized, userMessage, requirementExtraction.byFragment); executionPlan = (0, assistantRuntimeGuards_1.applyTemporalHintToExecutionPlan)(executionPlan, temporalGuard); @@ -1500,10 +1606,13 @@ export class AssistantService { business_scope_resolved: businessScopeResolution.business_scope_resolved, company_grounding_applied: businessScopeResolution.company_grounding_applied, scope_resolution_reason: businessScopeResolution.scope_resolution_reason, + company_scope_resolution_reason: businessScopeResolution.scope_resolution_reason, raw_time_anchor: temporalGuard.raw_time_anchor, raw_time_scope: temporalGuard.raw_time_scope, resolved_time_anchor: temporalGuard.resolved_time_anchor, resolved_primary_period: temporalGuard.resolved_primary_period, + effective_primary_period: temporalGuard.effective_primary_period, + temporal_guard_input: temporalGuard.temporal_guard_input, temporal_alignment_status: temporalGuard.temporal_alignment_status, temporal_resolution_source: temporalGuard.temporal_resolution_source, temporal_guard_basis: temporalGuard.temporal_guard_basis, @@ -1589,10 +1698,13 @@ export class AssistantService { business_scope_resolved: businessScopeResolution.business_scope_resolved, company_grounding_applied: businessScopeResolution.company_grounding_applied, scope_resolution_reason: businessScopeResolution.scope_resolution_reason, + company_scope_resolution_reason: businessScopeResolution.scope_resolution_reason, raw_time_anchor: temporalGuard.raw_time_anchor, raw_time_scope: temporalGuard.raw_time_scope, resolved_time_anchor: temporalGuard.resolved_time_anchor, resolved_primary_period: temporalGuard.resolved_primary_period, + effective_primary_period: temporalGuard.effective_primary_period, + temporal_guard_input: temporalGuard.temporal_guard_input, temporal_alignment_status: temporalGuard.temporal_alignment_status, temporal_resolution_source: temporalGuard.temporal_resolution_source, temporal_guard_basis: temporalGuard.temporal_guard_basis, diff --git a/llm_normalizer/backend/src/types/assistant.ts b/llm_normalizer/backend/src/types/assistant.ts index bcb685e..5abfdce 100644 --- a/llm_normalizer/backend/src/types/assistant.ts +++ b/llm_normalizer/backend/src/types/assistant.ts @@ -82,6 +82,12 @@ export interface TemporalGuardDebug { to: string; granularity: "day" | "month"; } | null; + effective_primary_period: { + from: string; + to: string; + granularity: "day" | "month"; + } | null; + temporal_guard_input: string | null; temporal_alignment_status: "aligned" | "corrected" | "conflicting"; temporal_resolution_source: string; temporal_guard_basis: "resolved_primary_period" | "raw_time_scope_unlocked" | "none"; @@ -267,6 +273,7 @@ export interface AssistantDebugPayload { business_scope_resolved?: string[]; company_grounding_applied?: boolean; scope_resolution_reason?: string[]; + company_scope_resolution_reason?: string[]; raw_time_anchor?: string | null; raw_time_scope?: string | null; resolved_time_anchor?: string | null; @@ -275,6 +282,12 @@ export interface AssistantDebugPayload { to: string; granularity: "day" | "month"; } | null; + effective_primary_period?: { + from: string; + to: string; + granularity: "day" | "month"; + } | null; + temporal_guard_input?: string | null; temporal_alignment_status?: TemporalGuardDebug["temporal_alignment_status"]; temporal_resolution_source?: string; temporal_guard_basis?: TemporalGuardDebug["temporal_guard_basis"]; diff --git a/llm_normalizer/backend/tests/assistantEndpoint.test.ts b/llm_normalizer/backend/tests/assistantEndpoint.test.ts index cbb46ab..552d13a 100644 --- a/llm_normalizer/backend/tests/assistantEndpoint.test.ts +++ b/llm_normalizer/backend/tests/assistantEndpoint.test.ts @@ -32,6 +32,8 @@ describe("assistant mode API", () => { expect(typeof response.body.debug?.temporal_guard_outcome).toBe("string"); expect(typeof response.body.debug?.temporal_alignment_status).toBe("string"); expect(typeof response.body.debug?.temporal_guard_basis).toBe("string"); + expect(response.body.debug).toHaveProperty("effective_primary_period"); + expect(response.body.debug).toHaveProperty("temporal_guard_input"); expect(response.body.debug?.domain_polarity_guard).toBeTruthy(); expect(Array.isArray(response.body.debug?.raw_numeric_tokens)).toBe(true); expect(Array.isArray(response.body.debug?.classified_numeric_tokens)).toBe(true); @@ -42,6 +44,7 @@ describe("assistant mode API", () => { expect(typeof response.body.debug?.eligibility_time_basis).toBe("string"); expect(typeof response.body.debug?.grounded_answer_eligibility_guard?.eligibility_time_basis).toBe("string"); expect(typeof response.body.debug?.grounded_answer_eligibility_guard?.business_scope_passed).toBe("boolean"); + expect(Array.isArray(response.body.debug?.company_scope_resolution_reason ?? [])).toBe(true); expect(Array.isArray(response.body.conversation)).toBe(true); expect(response.body.conversation.length).toBe(2); }); diff --git a/llm_normalizer/backend/tests/assistantRuntimeGuardsStage4Pack.test.ts b/llm_normalizer/backend/tests/assistantRuntimeGuardsStage4Pack.test.ts index cf7597d..5c922f0 100644 --- a/llm_normalizer/backend/tests/assistantRuntimeGuardsStage4Pack.test.ts +++ b/llm_normalizer/backend/tests/assistantRuntimeGuardsStage4Pack.test.ts @@ -151,6 +151,8 @@ describe("stage4 blocker-pack runtime guards", () => { expect(temporal.temporal_guard_outcome).toBe("passed"); expect(temporal.raw_time_scope).toBe("2023-07-06"); expect(temporal.resolved_time_anchor).toBe("2020-07-06"); + expect(temporal.effective_primary_period?.from).toBe("2020-07-06"); + expect(temporal.temporal_guard_input).toBe("2020-07-06..2020-07-06"); expect(temporal.temporal_alignment_status).toBe("corrected"); expect(temporal.temporal_guard_basis).toBe("resolved_primary_period"); expect(temporal.normalized_anchor_drift_detected).toBe(true); @@ -179,6 +181,7 @@ describe("stage4 blocker-pack runtime guards", () => { expect(temporal.temporal_guard_applied).toBe(true); expect(temporal.temporal_guard_outcome).toBe("passed"); expect(temporal.resolved_time_anchor).toBe("2020-07"); + expect(temporal.effective_primary_period?.from).toBe("2020-07-01"); expect(hintedPlan[0].fragment_text).toMatch(/июля 2020|2020-07-01/); }); @@ -330,6 +333,12 @@ describe("stage4 blocker-pack runtime guards", () => { to: "2020-07-06", granularity: "day" }, + effective_primary_period: { + from: "2020-07-06", + to: "2020-07-06", + granularity: "day" + }, + temporal_guard_input: "2020-07-06..2020-07-06", temporal_alignment_status: "corrected", temporal_resolution_source: "company_snapshot_july_day_lock", temporal_guard_basis: "resolved_primary_period", diff --git a/llm_normalizer/docs/runs/2026-03-29_Stage_04_Wave_19_2_Live_Runtime_Fix_Replay_1txt.zip b/llm_normalizer/docs/runs/2026-03-29_Stage_04_Wave_19_2_Live_Runtime_Fix_Replay_1txt.zip new file mode 100644 index 0000000..88e1f30 Binary files /dev/null and b/llm_normalizer/docs/runs/2026-03-29_Stage_04_Wave_19_2_Live_Runtime_Fix_Replay_1txt.zip differ