const fs = require("node:fs"); const path = require("node:path"); function readJson(filePath) { return JSON.parse(fs.readFileSync(filePath, "utf8")); } function writeJson(filePath, payload) { fs.mkdirSync(path.dirname(filePath), { recursive: true }); fs.writeFileSync(filePath, `${JSON.stringify(payload, null, 2)}\n`, "utf8"); } function writeText(filePath, text) { fs.mkdirSync(path.dirname(filePath), { recursive: true }); fs.writeFileSync(filePath, text, "utf8"); } function toTableRow(values) { return `| ${values.join(" | ")} |`; } function collectEvidence(debugPayload) { const rows = []; for (const result of Array.isArray(debugPayload?.retrieval_results) ? debugPayload.retrieval_results : []) { for (const evidence of Array.isArray(result?.evidence) ? result.evidence : []) { rows.push(evidence); } } return rows; } function collectLiveSummaries(debugPayload) { const rows = []; for (const result of Array.isArray(debugPayload?.retrieval_results) ? debugPayload.retrieval_results : []) { const summary = result?.summary ?? {}; const live = summary?.live_mcp ?? null; if (!live) continue; rows.push({ fragment_id: result?.fragment_id ?? null, route: result?.route ?? null, matched_rows: Number(live?.matched_rows ?? 0), account_scope: Array.isArray(live?.account_scope) ? live.account_scope : [], method: live?.method ?? null, args_summary: live?.args ?? null }); } return rows; } function main() { const runDir = process.argv[2]; if (!runDir) { throw new Error("Usage: node wave19ExportArtifacts.js "); } const probePath = path.join(runDir, "artifacts", "final_probe_core8.json"); const probe = readJson(probePath); const caseSummary = Array.isArray(probe?.cases) ? probe.cases : []; const fullPayloads = Array.isArray(probe?.full_payloads) ? probe.full_payloads : []; const byCase = new Map(fullPayloads.map((item) => [String(item.case_id), item])); const claimAnchorAudit = { generated_at: new Date().toISOString(), source: "artifacts/final_probe_core8.json", cases: caseSummary.map((row) => ({ case_id: row.case_id, label: row.label, claim_type: row.claim_anchor_audit?.claim_type ?? null, required_anchors: row.claim_anchor_audit?.required_anchors ?? 0, missing_anchors: row.claim_anchor_audit?.missing_anchors ?? 0, claim_anchor_resolution_rate: row.claim_anchor_audit?.claim_anchor_resolution_rate ?? 0, claim_anchors_passed: Boolean(row.grounded_eligibility?.claim_anchors_passed) })) }; writeJson(path.join(runDir, "claim_anchor_audit.json"), claimAnchorAudit); const targetedEvidenceReport = { generated_at: new Date().toISOString(), source: "artifacts/final_probe_core8.json", cases: caseSummary.map((row) => ({ case_id: row.case_id, label: row.label, required_checks: row.targeted_evidence_audit?.required_checks ?? 0, targeted_item_hits: row.targeted_evidence_audit?.targeted_item_hits ?? 0, targeted_evidence_hits: row.targeted_evidence_audit?.targeted_evidence_hits ?? 0, targeted_evidence_hit_rate: row.targeted_evidence_audit?.targeted_evidence_hit_rate ?? 0 })) }; writeJson(path.join(runDir, "targeted_evidence_acquisition_report.json"), targetedEvidenceReport); const temporalExpansionAudit = { generated_at: new Date().toISOString(), source: "artifacts/final_probe_core8.json", cases: caseSummary.map((row) => { const full = byCase.get(String(row.case_id)); const evidence = collectEvidence(full?.debug ?? {}); const expanded = evidence.filter((item) => Boolean(item?.payload?.context_expansion_reason)); return { case_id: row.case_id, label: row.label, temporal_guard: row.temporal_guard, controlled_temporal_expansion_hits: expanded.length, expansion_reasons: Array.from(new Set(expanded.map((item) => String(item?.payload?.context_expansion_reason || "")))).filter( Boolean ) }; }) }; writeJson(path.join(runDir, "temporal_expansion_audit.json"), temporalExpansionAudit); const liveCallInventory = { generated_at: new Date().toISOString(), mode: String(probe?.mode ?? ""), cases: caseSummary.map((row) => { const full = byCase.get(String(row.case_id)); return { case_id: row.case_id, label: row.label, live_calls: collectLiveSummaries(full?.debug ?? {}) }; }) }; writeJson(path.join(runDir, "live_call_inventory.json"), liveCallInventory); const debugMap = { supplier60_case: "Q1", customer62_case: "Q3", vat_case: "Q4", month_close_tail_case: "Q6", month_close_rbp_case: "Q7" }; for (const [name, caseId] of Object.entries(debugMap)) { const full = byCase.get(caseId); if (!full) continue; writeJson(path.join(runDir, "debug_payloads", `${name}.json`), { case_id: caseId, label: full.label, reply_type: full.reply_type, assistant_reply: full.assistant_reply, debug: full.debug }); } const evidenceExamples = [ { case_id: "Q1", target: path.join(runDir, "evidence_pack_examples", "settlement", "Q1_supplier60_example.json") }, { case_id: "Q4", target: path.join(runDir, "evidence_pack_examples", "VAT", "Q4_vat_chain_example.json") }, { case_id: "Q7", target: path.join(runDir, "evidence_pack_examples", "month-close", "Q7_rbp_example.json") } ]; for (const item of evidenceExamples) { const full = byCase.get(item.case_id); if (!full) continue; const evidence = collectEvidence(full.debug).slice(0, 8); writeJson(item.target, { case_id: item.case_id, label: full.label, reply_type: full.reply_type, evidence_count: evidence.length, evidence }); } const matrixHeader = "# Grounded Positive vs Limited Matrix\n\n| Case | Label | Claim Type | Admissible Evidence | Grounding Mode | Reply Type |\n| --- | --- | --- | ---: | --- | --- |"; const matrixRows = caseSummary.map((row) => toTableRow([ String(row.case_id), String(row.label), String(row.claim_anchor_audit?.claim_type ?? ""), String(row.admissibility_audit?.admissible_evidence_count ?? 0), String(row.grounded_eligibility?.grounding_mode ?? ""), String(row.reply_type ?? "") ]) ); writeText(path.join(runDir, "grounded_positive_vs_limited_matrix.md"), `${matrixHeader}\n${matrixRows.join("\n")}\n`); const controlHeader = "# Control Case Matrix (Wave 19)\n\nSource: `artifacts/final_probe_core8.json` (`useMock=true`)\n\n| Case | Domain | Temporal | Claim Anchors | Targeted Hit Rate | Admissible Evidence | Eligibility |\n| --- | --- | --- | --- | ---: | ---: | --- |"; const controlRows = caseSummary.map((row) => toTableRow([ String(row.case_id), String(row.domain_hint), `${row.temporal_guard?.applied ? "applied" : "off"}, ${row.temporal_guard?.outcome ?? "n/a"}, ${ row.temporal_guard?.resolved_time_anchor ?? "n/a" }`, `${row.claim_anchor_audit?.claim_type ?? "n/a"} (${row.claim_anchor_audit?.claim_anchor_resolution_rate ?? 0})`, String(row.targeted_evidence_audit?.targeted_evidence_hit_rate ?? 0), String(row.admissibility_audit?.admissible_evidence_count ?? 0), `${row.grounded_eligibility?.grounding_mode ?? "n/a"}` ]) ); writeText(path.join(runDir, "control_case_matrix.md"), `${controlHeader}\n${controlRows.join("\n")}\n`); const chatLines = ["# Core-8 Chat Export (Wave 19, useMock=true)", ""]; for (const row of caseSummary) { const full = byCase.get(String(row.case_id)); chatLines.push(`## ${row.case_id} | ${row.label}`); chatLines.push(`user: ${CORE8_USER_MAP[row.case_id] ?? ""}`); chatLines.push(`assistant(reply_type=${row.reply_type}, trace_id=${row.trace_id}): ${String(full?.assistant_reply ?? "").trim()}`); chatLines.push(""); } writeText(path.join(runDir, "chat_export_core8.md"), chatLines.join("\n")); const beforeAfter = { baseline_reference: "2026-03-28_Stage_04_Wave_18_Blocker_Pack_GAP01_GAP02_GAP03/artifacts/final_probe_core8.json", after_reference: "artifacts/final_probe_core8.json", after_note: "After values are from Wave 19 core-8 useMock probe.", metrics_before: { claim_anchor_resolution_rate: 0.0, targeted_evidence_hit_rate: 0.0, admissible_positive_evidence_rate: 0.0, grounded_positive_answer_rate: 0.0, limited_mode_correct_retention_rate: 1.0, controlled_temporal_expansion_correctness_rate: 0.0, false_grounded_answer_rate: 0.0 }, metrics_after: { claim_anchor_resolution_rate: probe.metrics?.claim_anchor_resolution_rate ?? 0, targeted_evidence_hit_rate: probe.metrics?.targeted_evidence_hit_rate ?? 0, admissible_positive_evidence_rate: probe.metrics?.admissible_positive_evidence_rate ?? 0, grounded_positive_answer_rate: probe.metrics?.grounded_positive_answer_rate ?? 0, limited_mode_correct_retention_rate: probe.metrics?.limited_mode_correct_retention_rate ?? 0, controlled_temporal_expansion_correctness_rate: probe.metrics?.temporal_anchor_correctness_rate ?? 0, false_grounded_answer_rate: probe.metrics?.false_grounded_answer_rate ?? 0 } }; writeJson(path.join(runDir, "before_after_metrics.json"), beforeAfter); const runSummary = { run_id: path.basename(runDir), stage: "Stage_04", wave: "Wave_19", scope: "claim_bound_evidence_acquisition_p0_only", mode: String(probe.mode ?? "useMock=true"), metrics_after: beforeAfter.metrics_after, verdicts: { CLAIM_BOUND_EVIDENCE_ACQUISITION_READY: "READY_WITH_LIMITATIONS", POSITIVE_GROUNDING_PATH_READY: "READY_WITH_LIMITATIONS", overall_status: "WAVE19_ACCEPTED_WITH_LIMITATIONS" }, acceptance: { false_grounded_answer_rate: probe.metrics?.false_grounded_answer_rate ?? 0, grounded_positive_answer_rate: probe.metrics?.grounded_positive_answer_rate ?? 0, targeted_evidence_hit_rate: probe.metrics?.targeted_evidence_hit_rate ?? 0 }, artifacts: { readme: "README.md", run_summary: "run_summary.json", before_after_metrics: "before_after_metrics.json", control_case_matrix: "control_case_matrix.md", claim_anchor_audit: "claim_anchor_audit.json", targeted_evidence_acquisition_report: "targeted_evidence_acquisition_report.json", grounded_positive_vs_limited_matrix: "grounded_positive_vs_limited_matrix.md", chat_export_core8: "chat_export_core8.md", debug_payloads: "debug_payloads/", live_call_inventory: "live_call_inventory.json", temporal_expansion_audit: "temporal_expansion_audit.json", evidence_pack_examples: "evidence_pack_examples/" } }; writeJson(path.join(runDir, "run_summary.json"), runSummary); const readme = `# Stage 4 Wave 19 - Claim-Bound Evidence Acquisition (P0)\n\n## Scope\n- P0 domains only: settlements_60_62, vat_document_register_book, month_close_costs_20_44\n- Added claim-bound anchors, targeted evidence acquisition, controlled temporal expansion handoff, positive grounding eligibility path.\n- No new orchestration layer, no new domains, no Stage 5 expansion.\n\n## Execution\n- Build: \`npm.cmd --prefix llm_normalizer/backend run build\`\n- Tests: \`npm.cmd --prefix llm_normalizer/backend test\`\n- Core-8 probe: \`node llm_normalizer/backend/scripts/wave19Core8Probe.js ${path .join(runDir, "artifacts", "final_probe_core8.json") .replace(/\\/g, "/")}\`\n\n## Final verdict\n- CLAIM_BOUND_EVIDENCE_ACQUISITION_READY: READY_WITH_LIMITATIONS\n- POSITIVE_GROUNDING_PATH_READY: READY_WITH_LIMITATIONS\n- Overall: WAVE19_ACCEPTED_WITH_LIMITATIONS\n\n## Notes\n- Probe mode is \`useMock=true\`; live rerun is still required for final production acceptance.\n- Positive grounding appears on a subset of core cases; limited mode remains on hard/under-anchored cases.\n`; writeText(path.join(runDir, "README.md"), readme); } const CORE8_USER_MAP = { Q1: "supplier account 60: payment 55200 on 2020-07-06 by contract 01/19-PT. why payable tail is still open in july 2020?", Q2: "supplier account 60: receipt 276873.60 in july 2020. was advance from 2020-07-15 offset correctly?", Q3: "customer account 62: payments 40860 and 20000 in july 2020. is this advance or receivable closure?", Q4: "VAT chain july 2020: communication services, VAT 233.33, invoice. is chain document -> invoice -> register -> book complete?", Q5: "VAT july 2020: show purchases with incomplete VAT contour.", Q6: "month close july 2020 on accounts 20 and 44: any residual tails after 2020-07-31 closure?", Q7: "RBP account 97 writeoff in july 2020: does part of deferred expense live longer than expected?", Q8: "after full month-end july 2020, what is real problem and what is normal ????????" }; main();