70 lines
2.3 KiB
TypeScript
70 lines
2.3 KiB
TypeScript
import path from "path";
|
|
import request from "supertest";
|
|
|
|
async function main(): Promise<void> {
|
|
const originalLog = console.log;
|
|
console.log = () => {};
|
|
|
|
process.env.FEATURE_ASSISTANT_ANSWER_POLICY_V11 = "1";
|
|
process.env.FEATURE_ASSISTANT_STAGE2_EVAL_V1 = "1";
|
|
process.env.FEATURE_ASSISTANT_PROBLEM_UNITS_V1 = "1";
|
|
process.env.FEATURE_ASSISTANT_PROBLEM_CENTRIC_ANSWER_V1 = "1";
|
|
process.env.FEATURE_ASSISTANT_BROAD_GUARD_V1 = "1";
|
|
process.env.FEATURE_ASSISTANT_LIFECYCLE_RUNTIME_V1 = "1";
|
|
process.env.FEATURE_ASSISTANT_LIFECYCLE_ANSWER_V1 = "1";
|
|
process.env.FEATURE_ASSISTANT_GRAPH_RUNTIME_V1 = "1";
|
|
|
|
const { createApp } = await import("../src/server.ts");
|
|
const app = createApp();
|
|
|
|
const baselinePath = path.resolve(
|
|
process.cwd(),
|
|
"../docs/runs/2026-03-27_Stage_04_Wave_08_Route_Correctness_Recovery_Domain_Purity_Closure/artifacts/current_report.json"
|
|
);
|
|
|
|
const response = await request(app).post("/api/eval/run").send({
|
|
eval_target: "assistant_p0",
|
|
useMock: true,
|
|
mode: "single-pass-strict",
|
|
caseSetFile: "p0_eval_corpus_v0_2.json",
|
|
compare_with_report_file: baselinePath,
|
|
normalizeConfig: {
|
|
promptVersion: "normalizer_v2_0_2"
|
|
}
|
|
});
|
|
|
|
console.log = originalLog;
|
|
|
|
if (response.status !== 200) {
|
|
console.error(`status=${response.status}`);
|
|
console.error(JSON.stringify(response.body, null, 2));
|
|
process.exit(1);
|
|
}
|
|
|
|
const report = response.body.report;
|
|
originalLog(
|
|
JSON.stringify(
|
|
{
|
|
run_id: report?.run_id,
|
|
acceptance_verdict: report?.acceptance_gate?.verdict,
|
|
baseline_stability_verdict: report?.baseline_stability_gate?.verdict,
|
|
metrics: report?.metrics?.raw,
|
|
quality_gap_metrics: report?.quality_gap_metrics?.raw,
|
|
assertions: report?.assertions,
|
|
run_report_json_path: report?.artifacts?.run_report_json_path,
|
|
run_report_md_path: report?.artifacts?.run_report_md_path,
|
|
comparison_json_path: report?.comparison?.artifacts?.comparison_report_json_path,
|
|
comparison_md_path: report?.comparison?.artifacts?.comparison_report_md_path,
|
|
comparison_verdict_delta: report?.comparison?.verdict_delta
|
|
},
|
|
null,
|
|
2
|
|
)
|
|
);
|
|
}
|
|
|
|
main().catch((error) => {
|
|
console.error(error instanceof Error ? error.stack ?? error.message : String(error));
|
|
process.exit(1);
|
|
});
|