NODEDC_1C/llm_normalizer/backend/tmp/run-wave8-eval.ts

68 lines
2.2 KiB
TypeScript

import path from "path";
import request from "supertest";
async function main(): Promise<void> {
const originalLog = console.log;
console.log = () => {};
process.env.FEATURE_ASSISTANT_ANSWER_POLICY_V11 = "1";
process.env.FEATURE_ASSISTANT_STAGE2_EVAL_V1 = "1";
process.env.FEATURE_ASSISTANT_PROBLEM_UNITS_V1 = "1";
process.env.FEATURE_ASSISTANT_PROBLEM_CENTRIC_ANSWER_V1 = "1";
process.env.FEATURE_ASSISTANT_BROAD_GUARD_V1 = "1";
process.env.FEATURE_ASSISTANT_LIFECYCLE_RUNTIME_V1 = "1";
process.env.FEATURE_ASSISTANT_LIFECYCLE_ANSWER_V1 = "1";
process.env.FEATURE_ASSISTANT_GRAPH_RUNTIME_V1 = "1";
const { createApp } = await import("../src/server.ts");
const app = createApp();
const baselinePath = path.resolve(
process.cwd(),
"../docs/runs/2026-03-27_Stage_04_Wave_07_P0_Eval_Harness_Formal_Product_Acceptance/artifacts/current_report.json"
);
const response = await request(app).post("/api/eval/run").send({
eval_target: "assistant_p0",
useMock: true,
mode: "single-pass-strict",
caseSetFile: "p0_eval_corpus_v0_1.json",
compare_with_report_file: baselinePath,
normalizeConfig: {
promptVersion: "normalizer_v2_0_2"
}
});
console.log = originalLog;
if (response.status !== 200) {
console.error(`status=${response.status}`);
console.error(JSON.stringify(response.body, null, 2));
process.exit(1);
}
const report = response.body.report;
originalLog(
JSON.stringify(
{
run_id: report?.run_id,
verdict: report?.acceptance_gate?.verdict,
metrics: report?.metrics?.raw,
assertions: report?.assertions,
run_report_json_path: report?.artifacts?.run_report_json_path,
run_report_md_path: report?.artifacts?.run_report_md_path,
comparison_json_path: report?.comparison?.artifacts?.comparison_report_json_path,
comparison_md_path: report?.comparison?.artifacts?.comparison_report_md_path,
comparison_verdict_delta: report?.comparison?.verdict_delta
},
null,
2
)
);
}
main().catch((error) => {
console.error(error instanceof Error ? error.stack ?? error.message : String(error));
process.exit(1);
});