АРЧ АП11 - Добавить scenario acceptance gate для truth harness и закрыть Phase 7 агентным прогоном

This commit is contained in:
dctouch 2026-04-17 12:17:47 +03:00
parent f5ff844105
commit f7edf6aacb
9 changed files with 811 additions and 3 deletions

View File

@ -0,0 +1,126 @@
{
"schema_version": "domain_truth_harness_spec_v1",
"scenario_id": "address_truth_harness_phase7_acceptance_gate_mix",
"domain": "address_phase7_acceptance_gate_mix",
"title": "Phase 7 acceptance replay for inventory root, selected-object continuity, and human meta boundaries",
"description": "Primary acceptance scenario-tree for turnaround 11: root inventory snapshot, selected-object supplier, selected-object documents, same-date restore, plus human meta and historical capability follow-ups.",
"bindings": {},
"steps": [
{
"step_id": "step_01_inventory_march_2021",
"title": "Inventory root snapshot at March 2021",
"criticality": "critical",
"question": "какие остатки на складе на март 2021",
"allowed_reply_types": [
"factual"
],
"expected_intents": [
"inventory_on_hand_as_of_date"
],
"required_filters": {
"as_of_date": "2021-03-31",
"period_from": "2021-03-01",
"period_to": "2021-03-31"
},
"required_direct_answer_patterns_any": [
"31\\.03\\.2021",
"(?i)на складе",
"(?i)столешница 600\\*3050\\*26 альмандин"
]
},
{
"step_id": "step_02_selected_item_supplier",
"title": "Selected-object supplier provenance",
"criticality": "critical",
"question": "По выбранному объекту \"Столешница 600*3050*26 альмандин\": кто нам это поставил?",
"allowed_reply_types": [
"factual"
],
"expected_intents": [
"inventory_purchase_provenance_for_item"
],
"required_direct_answer_patterns_any": [
"(?i)столешница 600\\*3050\\*26 альмандин",
"(?i)поставщик|поставил|куплен",
"(?i)союз|торговый дом"
],
"forbidden_direct_answer_patterns": [
"(?i)^на 31\\.03\\.2021 на складе",
"(?i)^сейчас не дам прямой адресный ответ"
]
},
{
"step_id": "step_03_selected_item_documents",
"title": "Selected-object documents stay in the same contour",
"criticality": "critical",
"question": "По выбранному объекту \"Столешница 600*3050*26 альмандин\": покажи документы по этой позиции",
"allowed_reply_types": [
"factual"
],
"expected_intents": [
"inventory_purchase_documents_for_item"
],
"required_direct_answer_patterns_any": [
"(?i)столешница 600\\*3050\\*26 альмандин",
"(?i)документ",
"(?i)союз|торговый дом"
]
},
{
"step_id": "step_04_inventory_same_date_restore",
"title": "Same-date restore returns to the March root snapshot",
"criticality": "critical",
"question": "покажи еще раз остатки на эту же дату",
"allowed_reply_types": [
"factual"
],
"expected_intents": [
"inventory_on_hand_as_of_date"
],
"required_filters": {
"as_of_date": "2021-03-31",
"period_from": "2021-03-01",
"period_to": "2021-03-31"
},
"required_direct_answer_patterns_any": [
"31\\.03\\.2021",
"(?i)на складе"
],
"forbidden_direct_answer_patterns": [
"(?i)^сейчас не дам прямой адресный ответ",
"(?i)transition_not_supported_by_capability"
]
},
{
"step_id": "step_05_data_scope_meta_interrupt",
"title": "Data-scope meta question remains human and non-technical",
"criticality": "warning",
"question": "по какой компании мы сейчас работаем?",
"required_answer_patterns_any": [
"(?i)компан|организац|контур",
"(?i)работ"
],
"forbidden_answer_patterns": [
"(?i)tool_gate_reason",
"(?i)hard_meta_mode",
"(?i)living_reason"
]
},
{
"step_id": "step_06_historical_capability_followup",
"title": "Historical capability follow-up stays human",
"criticality": "warning",
"question": "а исторические остатки тоже можешь?",
"required_answer_patterns_any": [
"(?i)историческ|история",
"(?i)могу|умею"
],
"forbidden_answer_patterns": [
"(?i)^сейчас не дам прямой адресный ответ",
"(?i)^в текущем адресном контуре этот запрос лучше не закрывать в лоб",
"(?i)tool_gate_reason",
"(?i)hard_meta_mode"
]
}
]
}

View File

@ -1,4 +1,38 @@
[
{
"generation_id": "gen-ag04170911-ff51e1",
"created_at": "2026-04-17T09:11:27+00:00",
"mode": "saved_user_sessions",
"title": "AGENT | Phase 7 acceptance replay for inventory root, selected-object continuity, and human meta boundaries",
"count": 6,
"domain": "address_phase7_acceptance_gate_mix",
"questions": [
"какие остатки на складе на март 2021",
"По выбранному объекту \"Столешница 600*3050*26 альмандин\": кто нам это поставил?",
"По выбранному объекту \"Столешница 600*3050*26 альмандин\": покажи документы по этой позиции",
"покажи еще раз остатки на эту же дату",
"по какой компании мы сейчас работаем?",
"а исторические остатки тоже можешь?"
],
"generated_by": "codex_agent",
"saved_case_set_file": "assistant_autogen_saved_user_sessions_20260417091127_gen-ag04170911-ff51e1.json",
"context": {
"llm_provider": null,
"model": null,
"assistant_prompt_version": null,
"decomposition_prompt_version": null,
"prompt_fingerprint": null,
"autogen_personality_id": null,
"autogen_personality_prompt": null,
"source_session_id": null,
"saved_session_file": "assistant_saved_session_20260417091127_gen-ag04170911-ff51e1.json",
"saved_case_set_kind": "agent_semantic_scenario",
"agent_run": true,
"agent_focus": "scenario acceptance gate over root selected-object restore and human meta",
"architecture_phase": "turnaround_11_phase7",
"source_spec_file": "X:\\1C\\NDC_1C\\docs\\orchestration\\address_truth_harness_phase7_acceptance_gate_mix.json"
}
},
{
"generation_id": "gen-ag04170855-d13dd3",
"created_at": "2026-04-17T08:55:50+00:00",

View File

@ -0,0 +1,93 @@
{
"saved_at": "2026-04-17T09:11:27+00:00",
"generation_id": "gen-ag04170911-ff51e1",
"mode": "saved_user_sessions",
"title": "AGENT | Phase 7 acceptance replay for inventory root, selected-object continuity, and human meta boundaries",
"agent_run": true,
"questions": [
"какие остатки на складе на март 2021",
"По выбранному объекту \"Столешница 600*3050*26 альмандин\": кто нам это поставил?",
"По выбранному объекту \"Столешница 600*3050*26 альмандин\": покажи документы по этой позиции",
"покажи еще раз остатки на эту же дату",
"по какой компании мы сейчас работаем?",
"а исторические остатки тоже можешь?"
],
"metadata": {
"assistant_prompt_version": null,
"decomposition_prompt_version": null,
"prompt_fingerprint": null,
"agent_focus": "scenario acceptance gate over root selected-object restore and human meta",
"architecture_phase": "turnaround_11_phase7",
"source_spec_file": "X:\\1C\\NDC_1C\\docs\\orchestration\\address_truth_harness_phase7_acceptance_gate_mix.json"
},
"source_session_id": null,
"session": {
"session_id": null,
"mode": "agent_semantic_run",
"items": [
{
"message_id": "agent-user-001",
"role": "user",
"text": "какие остатки на складе на март 2021",
"created_at": "2026-04-17T09:11:27+00:00",
"reply_type": null,
"trace_id": null,
"debug": null
},
{
"message_id": "agent-user-002",
"role": "user",
"text": "По выбранному объекту \"Столешница 600*3050*26 альмандин\": кто нам это поставил?",
"created_at": "2026-04-17T09:11:27+00:00",
"reply_type": null,
"trace_id": null,
"debug": null
},
{
"message_id": "agent-user-003",
"role": "user",
"text": "По выбранному объекту \"Столешница 600*3050*26 альмандин\": покажи документы по этой позиции",
"created_at": "2026-04-17T09:11:27+00:00",
"reply_type": null,
"trace_id": null,
"debug": null
},
{
"message_id": "agent-user-004",
"role": "user",
"text": "покажи еще раз остатки на эту же дату",
"created_at": "2026-04-17T09:11:27+00:00",
"reply_type": null,
"trace_id": null,
"debug": null
},
{
"message_id": "agent-user-005",
"role": "user",
"text": "по какой компании мы сейчас работаем?",
"created_at": "2026-04-17T09:11:27+00:00",
"reply_type": null,
"trace_id": null,
"debug": null
},
{
"message_id": "agent-user-006",
"role": "user",
"text": "а исторические остатки тоже можешь?",
"created_at": "2026-04-17T09:11:27+00:00",
"reply_type": null,
"trace_id": null,
"debug": null
}
],
"agent_run": true,
"metadata": {
"assistant_prompt_version": null,
"decomposition_prompt_version": null,
"prompt_fingerprint": null,
"agent_focus": "scenario acceptance gate over root selected-object restore and human meta",
"architecture_phase": "turnaround_11_phase7",
"source_spec_file": "X:\\1C\\NDC_1C\\docs\\orchestration\\address_truth_harness_phase7_acceptance_gate_mix.json"
}
}
}

View File

@ -0,0 +1,43 @@
{
"suite_id": "assistant_saved_session_gen-ag04170911-ff51e1",
"suite_version": "0.1.0",
"schema_version": "assistant_saved_session_suite_v0_1",
"generated_at": "2026-04-17T09:11:27+00:00",
"generation_id": "gen-ag04170911-ff51e1",
"mode": "saved_user_sessions",
"title": "AGENT | Phase 7 acceptance replay for inventory root, selected-object continuity, and human meta boundaries",
"domain": "address_phase7_acceptance_gate_mix",
"scenario_count": 1,
"case_ids": [
"SAVED-001"
],
"cases": [
{
"case_id": "SAVED-001",
"scenario_tag": "agent_saved_user_sessions",
"title": "AGENT | Phase 7 acceptance replay for inventory root, selected-object continuity, and human meta boundaries",
"question_type": "followup",
"broadness_level": "medium",
"turns": [
{
"user_message": "какие остатки на складе на март 2021"
},
{
"user_message": "По выбранному объекту \"Столешница 600*3050*26 альмандин\": кто нам это поставил?"
},
{
"user_message": "По выбранному объекту \"Столешница 600*3050*26 альмандин\": покажи документы по этой позиции"
},
{
"user_message": "покажи еще раз остатки на эту же дату"
},
{
"user_message": "по какой компании мы сейчас работаем?"
},
{
"user_message": "а исторические остатки тоже можешь?"
}
]
}
]
}

View File

@ -0,0 +1,39 @@
{
"suite_id": "assistant_saved_session_runtime_job-Z-vWMI8lw_",
"suite_version": "0.1.0",
"schema_version": "assistant_saved_session_runtime_v0_1",
"title": "AGENT | Phase 7 acceptance replay for inventory root, selected-object continuity, and human meta boundaries",
"scenario_count": 1,
"case_ids": [
"SAVED-001"
],
"cases": [
{
"case_id": "SAVED-001",
"scenario_tag": "saved_user_sessions_runtime",
"title": "AGENT | Phase 7 acceptance replay for inventory root, selected-object continuity, and human meta boundaries",
"question_type": "followup",
"broadness_level": "medium",
"turns": [
{
"user_message": "какие остатки на складе на март 2021"
},
{
"user_message": "По выбранному объекту \"Столешница 600*3050*26 альмандин\": кто нам это поставил?"
},
{
"user_message": "По выбранному объекту \"Столешница 600*3050*26 альмандин\": покажи документы по этой позиции"
},
{
"user_message": "покажи еще раз остатки на эту же дату"
},
{
"user_message": "по какой компании мы сейчас работаем?"
},
{
"user_message": "а исторические остатки тоже можешь?"
}
]
}
]
}

View File

@ -0,0 +1,36 @@
{
"suite_id": "assistant_saved_session_runtime_job-otA7X9BRT5",
"suite_version": "0.1.0",
"schema_version": "assistant_saved_session_runtime_v0_1",
"title": "AGENT | Phase 6 provider/runtime replay across chat, meta, and address boundaries",
"scenario_count": 1,
"case_ids": [
"SAVED-001"
],
"cases": [
{
"case_id": "SAVED-001",
"scenario_tag": "saved_user_sessions_runtime",
"title": "AGENT | Phase 6 provider/runtime replay across chat, meta, and address boundaries",
"question_type": "followup",
"broadness_level": "medium",
"turns": [
{
"user_message": "привет, как дела?"
},
{
"user_message": "по какой компании мы сейчас работаем?"
},
{
"user_message": "что ты можешь по 1С?"
},
{
"user_message": "какие остатки на складе на март 2021"
},
{
"user_message": "а исторические остатки тоже можешь?"
}
]
}
]
}

View File

@ -9,6 +9,7 @@ from types import SimpleNamespace
from typing import Any
import domain_case_loop as dcl
import scenario_acceptance_policy as sap
REPO_ROOT = Path(__file__).resolve().parent.parent
@ -696,6 +697,21 @@ def build_truth_review_markdown(spec: dict[str, Any], scenario_state: dict[str,
return "\n".join(lines).strip() + "\n"
def write_acceptance_artifacts(
output_dir: Path,
spec: dict[str, Any],
scenario_state: dict[str, Any],
review_summary: dict[str, Any],
) -> dict[str, Any]:
acceptance_matrix = sap.build_scenario_acceptance_matrix(spec, scenario_state, review_summary)
pack_state = sap.derive_truth_harness_pack_state(spec, scenario_state, review_summary, acceptance_matrix)
write_json(output_dir / "scenario_acceptance_matrix.json", acceptance_matrix)
write_text(output_dir / "scenario_acceptance_matrix.md", sap.build_scenario_acceptance_matrix_markdown(acceptance_matrix))
write_json(output_dir / "pack_state.json", pack_state)
write_text(output_dir / "final_status.md", sap.build_truth_harness_final_status_markdown(pack_state))
return {"acceptance_matrix": acceptance_matrix, "pack_state": pack_state}
def save_step_bundle(
*,
step_dir: Path,
@ -845,7 +861,13 @@ def review_export(spec: dict[str, Any], export_path: Path, output_dir: Path) ->
write_json(output_dir / "scenario_state.json", scenario_state)
write_json(output_dir / "truth_review.json", {"summary": review_summary, "steps": scenario_state["step_outputs"]})
write_text(output_dir / "truth_review.md", review_markdown)
return {"scenario_state": scenario_state, "review_summary": review_summary}
acceptance_bundle = write_acceptance_artifacts(output_dir, spec, scenario_state, review_summary)
return {
"scenario_state": scenario_state,
"review_summary": review_summary,
"acceptance_matrix": acceptance_bundle["acceptance_matrix"],
"pack_state": acceptance_bundle["pack_state"],
}
def run_live(spec: dict[str, Any], output_dir: Path, args: argparse.Namespace) -> dict[str, Any]:
@ -932,10 +954,15 @@ def run_live(spec: dict[str, Any], output_dir: Path, args: argparse.Namespace) -
write_json(output_dir / "scenario_state.json", scenario_state)
write_json(output_dir / "truth_review.json", {"summary": review_summary, "steps": scenario_state["step_outputs"]})
write_text(output_dir / "truth_review.md", review_markdown)
write_text(output_dir / "final_status.md", f"# Final status\n\n- status: `{review_summary['overall_status']}`\n")
acceptance_bundle = write_acceptance_artifacts(output_dir, spec, scenario_state, review_summary)
print(f"[truth-harness] saved artifacts to {output_dir}")
print(f"[truth-harness] overall_status={review_summary['overall_status']}")
return {"scenario_state": scenario_state, "review_summary": review_summary}
return {
"scenario_state": scenario_state,
"review_summary": review_summary,
"acceptance_matrix": acceptance_bundle["acceptance_matrix"],
"pack_state": acceptance_bundle["pack_state"],
}
def build_bootstrap_spec(export_path: Path, scenario_id: str, domain: str, title: str | None) -> dict[str, Any]:
@ -994,6 +1021,7 @@ def handle_review_export(args: argparse.Namespace) -> int:
)
result = review_export(spec, export_path, output_dir)
print(f"[truth-harness] review-export overall_status={result['review_summary']['overall_status']}")
print(f"[truth-harness] review-export final_status={result['pack_state']['final_status']}")
print(f"[truth-harness] artifacts={output_dir}")
return 0
@ -1006,6 +1034,7 @@ def handle_run_live(args: argparse.Namespace) -> int:
)
result = run_live(spec, output_dir, args)
print(f"[truth-harness] run-live overall_status={result['review_summary']['overall_status']}")
print(f"[truth-harness] run-live final_status={result['pack_state']['final_status']}")
print(f"[truth-harness] artifacts={output_dir}")
return 0

View File

@ -0,0 +1,306 @@
from __future__ import annotations
from datetime import datetime, timezone
from typing import Any
SCENARIO_ACCEPTANCE_MATRIX_SCHEMA_VERSION = "scenario_acceptance_matrix_v1"
TRUTH_HARNESS_PACK_STATE_SCHEMA_VERSION = "truth_harness_pack_state_v1"
SEVERITY_TO_PRIORITY = {
"critical": "P0",
"warning": "P1",
"info": "P2",
}
PRIORITY_RANK = {"P0": 0, "P1": 1, "P2": 2, "none": 3}
SELECTED_OBJECT_INTENTS = {
"inventory_purchase_provenance_for_item",
"inventory_purchase_documents_for_item",
"inventory_sale_trace_for_item",
"inventory_profitability_for_item",
"inventory_purchase_to_sale_chain",
}
def _now_iso() -> str:
return datetime.now(timezone.utc).replace(microsecond=0).isoformat()
def _normalize_step_outputs(scenario_state: dict[str, Any]) -> dict[str, dict[str, Any]]:
raw = scenario_state.get("step_outputs")
return raw if isinstance(raw, dict) else {}
def _normalize_findings(step_state: dict[str, Any]) -> list[dict[str, Any]]:
raw = step_state.get("review_findings")
return [item for item in raw if isinstance(item, dict)] if isinstance(raw, list) else []
def _priority_from_finding(finding: dict[str, Any]) -> str:
severity = str(finding.get("severity") or "").strip().lower()
return SEVERITY_TO_PRIORITY.get(severity, "P2")
def _highest_priority(findings: list[dict[str, Any]]) -> str:
if not findings:
return "none"
priorities = [_priority_from_finding(item) for item in findings]
return sorted(priorities, key=lambda item: PRIORITY_RANK.get(item, 99))[0]
def _has_selected_object_signal(step: dict[str, Any]) -> bool:
question = str(step.get("question_template") or "").lower()
expected_intents = {
str(item).strip()
for item in (step.get("expected_intents") or [])
if str(item).strip()
}
if expected_intents & SELECTED_OBJECT_INTENTS:
return True
return any(
marker in question
for marker in (
"выбранному объекту",
"по этой позиции",
"по ней",
"по нему",
"\"",
)
)
def _is_direct_answer_code(code: str) -> bool:
return code.startswith("required_direct_answer_") or code.startswith("forbidden_direct_answer_")
def _is_temporal_code(code: str) -> bool:
return (
code.startswith("missing_filter:")
or code.startswith("wrong_filter:")
or code.startswith("forbidden_filter_key:")
or code.startswith("forbidden_filter_value:")
or code.startswith("period_carryover_")
or code.startswith("previous_step_missing:")
)
def _is_truth_gate_code(code: str) -> bool:
return code in {
"unexpected_reply_type",
"unexpected_limited_reason_category",
"wrong_result_mode",
}
def _is_route_code(code: str) -> bool:
return code in {"wrong_intent", "wrong_capability", "wrong_recipe", "question_sequence_mismatch"}
def _is_human_answer_quality_code(code: str) -> bool:
return code in {
"required_answer_patterns_any_missing",
"required_answer_patterns_all_missing",
"forbidden_answer_pattern_hit",
}
def _derive_step_invariant_failures(step: dict[str, Any], findings: list[dict[str, Any]]) -> dict[str, bool]:
codes = [str(item.get("code") or "").strip() for item in findings]
selected_object_step = _has_selected_object_signal(step)
return {
"direct_answer": any(_is_direct_answer_code(code) for code in codes),
"temporal_honesty": any(_is_temporal_code(code) for code in codes),
"selected_object_continuity": selected_object_step and any(_is_route_code(code) for code in codes),
"truth_gate": any(_is_truth_gate_code(code) for code in codes),
"human_answer_quality": any(_is_human_answer_quality_code(code) for code in codes),
}
def build_scenario_acceptance_matrix(
spec: dict[str, Any], scenario_state: dict[str, Any], review_summary: dict[str, Any]
) -> dict[str, Any]:
step_outputs = _normalize_step_outputs(scenario_state)
rows: list[dict[str, Any]] = []
unresolved_priority_counts = {"P0": 0, "P1": 0, "P2": 0}
invariant_failure_counts = {
"direct_answer": 0,
"temporal_honesty": 0,
"selected_object_continuity": 0,
"truth_gate": 0,
"human_answer_quality": 0,
}
for index, step in enumerate(spec.get("steps") or [], start=1):
step_id = str(step.get("step_id") or "").strip()
step_state = step_outputs.get(step_id, {}) if step_id else {}
findings = _normalize_findings(step_state)
invariant_failures = _derive_step_invariant_failures(step, findings)
for invariant_name, failed in invariant_failures.items():
if failed:
invariant_failure_counts[invariant_name] += 1
highest_priority = _highest_priority(findings)
if highest_priority in unresolved_priority_counts:
unresolved_priority_counts[highest_priority] += len(
[item for item in findings if _priority_from_finding(item) == highest_priority]
)
rows.append(
{
"index": index,
"step_id": step_id,
"title": step.get("title"),
"question": step.get("question_template"),
"criticality": str(step.get("criticality") or "critical"),
"review_status": str(step_state.get("review_status") or "unknown"),
"reply_type": step_state.get("reply_type"),
"detected_intent": step_state.get("detected_intent"),
"capability_id": step_state.get("capability_id"),
"selected_object_step": _has_selected_object_signal(step),
"highest_unresolved_priority": highest_priority,
"unresolved_findings_count": len(findings),
"invariant_failures": [name for name, failed in invariant_failures.items() if failed],
"findings": findings,
}
)
invariants = {
"direct_answer_ok": invariant_failure_counts["direct_answer"] == 0,
"temporal_honesty_ok": invariant_failure_counts["temporal_honesty"] == 0,
"selected_object_continuity_ok": invariant_failure_counts["selected_object_continuity"] == 0,
"truth_gate_ok": invariant_failure_counts["truth_gate"] == 0,
"human_answer_quality_ok": invariant_failure_counts["human_answer_quality"] == 0,
}
critical_rows = [row for row in rows if row["criticality"] == "critical"]
critical_path_green = bool(critical_rows) and all(row["review_status"] == "pass" for row in critical_rows)
return {
"schema_version": SCENARIO_ACCEPTANCE_MATRIX_SCHEMA_VERSION,
"scenario_id": spec.get("scenario_id"),
"domain": spec.get("domain"),
"title": spec.get("title"),
"review_source": review_summary.get("review_source"),
"session_id": scenario_state.get("session_id"),
"rows": rows,
"summary": {
"steps_total": len(rows),
"critical_steps_total": len(critical_rows),
"critical_steps_passed": sum(1 for row in critical_rows if row["review_status"] == "pass"),
"critical_path_green": critical_path_green,
"unresolved_p0_count": unresolved_priority_counts["P0"],
"unresolved_p1_count": unresolved_priority_counts["P1"],
"unresolved_p2_count": unresolved_priority_counts["P2"],
"invariant_failure_counts": invariant_failure_counts,
"invariants": invariants,
},
"updated_at": _now_iso(),
}
def derive_truth_harness_pack_state(
spec: dict[str, Any],
scenario_state: dict[str, Any],
review_summary: dict[str, Any],
acceptance_matrix: dict[str, Any],
) -> dict[str, Any]:
summary = acceptance_matrix.get("summary") if isinstance(acceptance_matrix.get("summary"), dict) else {}
invariants = summary.get("invariants") if isinstance(summary.get("invariants"), dict) else {}
unresolved_p0_count = int(summary.get("unresolved_p0_count") or 0)
review_overall_status = str(review_summary.get("overall_status") or "unknown")
step_outputs = _normalize_step_outputs(scenario_state)
if not step_outputs:
final_status = "blocked"
final_status_reason = "no_step_outputs"
elif review_overall_status == "pass" and unresolved_p0_count == 0 and all(bool(value) for value in invariants.values()):
final_status = "accepted"
final_status_reason = "scenario_acceptance_gate_passed"
else:
final_status = "partial"
if unresolved_p0_count > 0:
final_status_reason = "unresolved_p0"
elif review_overall_status == "warning":
final_status_reason = "review_warning_remaining"
elif review_overall_status == "fail":
final_status_reason = "review_failures_remaining"
else:
final_status_reason = "acceptance_invariants_not_green"
return {
"schema_version": TRUTH_HARNESS_PACK_STATE_SCHEMA_VERSION,
"pack_id": spec.get("scenario_id"),
"scenario_id": spec.get("scenario_id"),
"domain": spec.get("domain"),
"title": spec.get("title"),
"review_source": review_summary.get("review_source"),
"session_id": scenario_state.get("session_id"),
"steps_total": review_summary.get("steps_total"),
"steps_passed": review_summary.get("steps_passed"),
"steps_with_warning": review_summary.get("steps_with_warning"),
"steps_failed": review_summary.get("steps_failed"),
"review_overall_status": review_overall_status,
"execution_status": "exact" if review_overall_status == "pass" else "partial",
"final_status": final_status,
"final_status_reason": final_status_reason,
"acceptance_gate_passed": final_status == "accepted",
"no_unresolved_p0": unresolved_p0_count == 0,
"unresolved_p0_count": unresolved_p0_count,
"unresolved_p1_count": int(summary.get("unresolved_p1_count") or 0),
"unresolved_p2_count": int(summary.get("unresolved_p2_count") or 0),
"critical_path_green": bool(summary.get("critical_path_green")),
"invariants": invariants,
"updated_at": _now_iso(),
}
def build_scenario_acceptance_matrix_markdown(acceptance_matrix: dict[str, Any]) -> str:
summary = acceptance_matrix.get("summary") if isinstance(acceptance_matrix.get("summary"), dict) else {}
invariants = summary.get("invariants") if isinstance(summary.get("invariants"), dict) else {}
lines = [
"# Scenario acceptance matrix",
"",
f"- scenario_id: `{acceptance_matrix.get('scenario_id') or 'n/a'}`",
f"- domain: `{acceptance_matrix.get('domain') or 'n/a'}`",
f"- title: {acceptance_matrix.get('title') or 'n/a'}",
f"- review_source: `{acceptance_matrix.get('review_source') or 'n/a'}`",
f"- session_id: `{acceptance_matrix.get('session_id') or 'n/a'}`",
f"- critical_path_green: `{summary.get('critical_path_green')}`",
f"- unresolved_p0_count: `{summary.get('unresolved_p0_count')}`",
f"- unresolved_p1_count: `{summary.get('unresolved_p1_count')}`",
f"- unresolved_p2_count: `{summary.get('unresolved_p2_count')}`",
"",
"## Acceptance invariants",
f"- direct_answer_ok: `{invariants.get('direct_answer_ok')}`",
f"- temporal_honesty_ok: `{invariants.get('temporal_honesty_ok')}`",
f"- selected_object_continuity_ok: `{invariants.get('selected_object_continuity_ok')}`",
f"- truth_gate_ok: `{invariants.get('truth_gate_ok')}`",
f"- human_answer_quality_ok: `{invariants.get('human_answer_quality_ok')}`",
"",
"## Steps",
]
for row in acceptance_matrix.get("rows") or []:
lines.extend(
[
f"- `{row.get('step_id')}`",
f" review_status: `{row.get('review_status')}`",
f" criticality: `{row.get('criticality')}`",
f" highest_unresolved_priority: `{row.get('highest_unresolved_priority')}`",
f" selected_object_step: `{row.get('selected_object_step')}`",
f" invariant_failures: {', '.join(row.get('invariant_failures') or []) or 'none'}",
]
)
return "\n".join(lines).strip() + "\n"
def build_truth_harness_final_status_markdown(pack_state: dict[str, Any]) -> str:
invariants = pack_state.get("invariants") if isinstance(pack_state.get("invariants"), dict) else {}
return (
"# Final status\n\n"
f"- status: `{pack_state.get('final_status') or 'n/a'}`\n"
f"- reason: `{pack_state.get('final_status_reason') or 'n/a'}`\n"
f"- review_overall_status: `{pack_state.get('review_overall_status') or 'n/a'}`\n"
f"- no_unresolved_p0: `{pack_state.get('no_unresolved_p0')}`\n"
f"- direct_answer_ok: `{invariants.get('direct_answer_ok')}`\n"
f"- temporal_honesty_ok: `{invariants.get('temporal_honesty_ok')}`\n"
f"- selected_object_continuity_ok: `{invariants.get('selected_object_continuity_ok')}`\n"
f"- truth_gate_ok: `{invariants.get('truth_gate_ok')}`\n"
)

View File

@ -0,0 +1,102 @@
from __future__ import annotations
import unittest
import scenario_acceptance_policy as sap
class ScenarioAcceptancePolicyTests(unittest.TestCase):
def test_marks_partial_when_selected_object_and_temporal_p0_findings_exist(self) -> None:
spec = {
"scenario_id": "demo_phase7",
"domain": "inventory_demo",
"title": "Demo",
"steps": [
{
"step_id": "step_01",
"title": "Selected object supplier",
"question_template": 'По выбранному объекту "Стол": кто поставил?',
"criticality": "critical",
"expected_intents": ["inventory_purchase_provenance_for_item"],
}
],
}
scenario_state = {
"session_id": "asst-demo",
"step_outputs": {
"step_01": {
"review_status": "fail",
"reply_type": "factual",
"detected_intent": "inventory_on_hand_as_of_date",
"capability_id": "confirmed_inventory_on_hand_as_of_date",
"review_findings": [
{"code": "wrong_intent", "severity": "critical"},
{"code": "wrong_filter:as_of_date", "severity": "critical"},
],
}
},
}
review_summary = {
"review_source": "live_strict_replay",
"overall_status": "fail",
"steps_total": 1,
"steps_passed": 0,
"steps_with_warning": 0,
"steps_failed": 1,
}
acceptance_matrix = sap.build_scenario_acceptance_matrix(spec, scenario_state, review_summary)
pack_state = sap.derive_truth_harness_pack_state(spec, scenario_state, review_summary, acceptance_matrix)
self.assertEqual(pack_state["final_status"], "partial")
self.assertFalse(pack_state["invariants"]["selected_object_continuity_ok"])
self.assertFalse(pack_state["invariants"]["temporal_honesty_ok"])
self.assertEqual(pack_state["unresolved_p0_count"], 2)
def test_accepts_when_all_review_and_acceptance_invariants_are_green(self) -> None:
spec = {
"scenario_id": "demo_phase7_green",
"domain": "inventory_demo",
"title": "Demo green",
"steps": [
{
"step_id": "step_01",
"title": "Inventory root",
"question_template": "какие остатки на складе на март 2021",
"criticality": "critical",
"expected_intents": ["inventory_on_hand_as_of_date"],
}
],
}
scenario_state = {
"session_id": "asst-green",
"step_outputs": {
"step_01": {
"review_status": "pass",
"reply_type": "factual",
"detected_intent": "inventory_on_hand_as_of_date",
"capability_id": "confirmed_inventory_on_hand_as_of_date",
"review_findings": [],
}
},
}
review_summary = {
"review_source": "live_strict_replay",
"overall_status": "pass",
"steps_total": 1,
"steps_passed": 1,
"steps_with_warning": 0,
"steps_failed": 0,
}
acceptance_matrix = sap.build_scenario_acceptance_matrix(spec, scenario_state, review_summary)
pack_state = sap.derive_truth_harness_pack_state(spec, scenario_state, review_summary, acceptance_matrix)
self.assertEqual(pack_state["final_status"], "accepted")
self.assertTrue(pack_state["acceptance_gate_passed"])
self.assertTrue(pack_state["critical_path_green"])
self.assertTrue(all(pack_state["invariants"].values()))
if __name__ == "__main__":
unittest.main()