Сохранить route-candidate AGENT loop и валидированные автопрогоны
This commit is contained in:
parent
4fcf349894
commit
5f0c4f5ead
|
|
@ -0,0 +1,190 @@
|
|||
{
|
||||
"schema_version": "domain_truth_harness_spec_v1",
|
||||
"scenario_id": "address_truth_harness_phase90_route_candidate_handoff_open_world",
|
||||
"domain": "address_phase90_route_candidate_handoff",
|
||||
"title": "Phase 90 open-world route candidate handoff replay",
|
||||
"description": "Focused semantic replay for Open-World Route Candidate Promotion. The scenario checks that unfamiliar-but-meaningful 1C asks produce a structured internal route candidate, distinguish missing user scope from executable reviewed routes, preserve catalog alignment, and keep unreviewed proof families honest without leaking route/debug mechanics to the user.",
|
||||
"bindings": {},
|
||||
"steps": [
|
||||
{
|
||||
"step_id": "step_01_open_ranking_needs_organization",
|
||||
"title": "Open ranked money-flow ask needs organization scope before execution",
|
||||
"question": "какой контрагент принес больше всего денег за 2020 год?",
|
||||
"allowed_reply_types": [
|
||||
"clarification_required",
|
||||
"partial_coverage"
|
||||
],
|
||||
"expected_catalog_alignment_status": "selected_matches_top",
|
||||
"expected_catalog_chain_top_match": "value_flow_ranking",
|
||||
"expected_catalog_selected_matches_top": true,
|
||||
"expected_route_candidate_status": "needs_user_scope",
|
||||
"expected_route_candidate_executable_now": false,
|
||||
"expected_route_candidate_missing_axes": [
|
||||
"organization"
|
||||
],
|
||||
"required_answer_patterns_any": [
|
||||
"(?i)организац|компан|контур"
|
||||
],
|
||||
"forbidden_answer_patterns": [
|
||||
"(?i)runtime_",
|
||||
"(?i)route_candidate",
|
||||
"(?i)query_movements",
|
||||
"(?i)primitive",
|
||||
"(?i)planner_",
|
||||
"(?i)catalog_"
|
||||
],
|
||||
"criticality": "critical",
|
||||
"semantic_tags": [
|
||||
"route_candidate_handoff",
|
||||
"needs_user_scope",
|
||||
"value_flow_ranking",
|
||||
"open_organization_scope"
|
||||
]
|
||||
},
|
||||
{
|
||||
"step_id": "step_02_company_scope_promotes_ranking_execution",
|
||||
"title": "Natural company clarification promotes the ranking candidate to reviewed execution",
|
||||
"question": "ООО Альтернатива Плюс",
|
||||
"allowed_reply_types": [
|
||||
"factual",
|
||||
"factual_with_explanation",
|
||||
"partial_coverage"
|
||||
],
|
||||
"expected_catalog_alignment_status": "selected_matches_top",
|
||||
"expected_catalog_chain_top_match": "value_flow_ranking",
|
||||
"expected_catalog_selected_matches_top": true,
|
||||
"expected_route_candidate_status": "ready_for_reviewed_execution",
|
||||
"expected_route_candidate_executable_now": true,
|
||||
"required_answer_patterns_all": [
|
||||
"(?i)2020",
|
||||
"(?i)контрагент|клиент|покупател",
|
||||
"(?i)деньг|поступ|выруч|руб",
|
||||
"(?i)подтвержд|проверен|найден"
|
||||
],
|
||||
"forbidden_answer_patterns": [
|
||||
"(?i)runtime_",
|
||||
"(?i)route_candidate",
|
||||
"(?i)query_movements",
|
||||
"(?i)primitive",
|
||||
"(?i)planner_",
|
||||
"(?i)catalog_",
|
||||
"(?i)прибыль.*точно"
|
||||
],
|
||||
"criticality": "critical",
|
||||
"semantic_tags": [
|
||||
"route_candidate_handoff",
|
||||
"ready_for_reviewed_execution",
|
||||
"value_flow_ranking",
|
||||
"clarification_recovery"
|
||||
]
|
||||
},
|
||||
{
|
||||
"step_id": "step_03_counterparty_lifecycle_not_polluted_by_company_scope",
|
||||
"title": "Counterparty lifecycle pivot has its own executable route candidate",
|
||||
"question": "а теперь сколько лет мы работаем с Группа СВК?",
|
||||
"allowed_reply_types": [
|
||||
"factual",
|
||||
"factual_with_explanation",
|
||||
"partial_coverage"
|
||||
],
|
||||
"expected_catalog_alignment_status": "selected_matches_top",
|
||||
"expected_catalog_chain_top_match": "lifecycle",
|
||||
"expected_catalog_selected_matches_top": true,
|
||||
"expected_route_candidate_status": "ready_for_reviewed_execution",
|
||||
"expected_route_candidate_executable_now": true,
|
||||
"required_answer_patterns_all": [
|
||||
"(?i)СВК|Группа СВК",
|
||||
"(?i)активност|1С|подтвержд|проверен",
|
||||
"(?i)лет|год|месяц",
|
||||
"(?i)юридическ|регистрац|не подтвержд|не доказ"
|
||||
],
|
||||
"forbidden_answer_patterns": [
|
||||
"(?i)Альтернатива Плюс.*контрагент",
|
||||
"(?i)runtime_",
|
||||
"(?i)route_candidate",
|
||||
"(?i)query_documents",
|
||||
"(?i)primitive",
|
||||
"(?i)planner_",
|
||||
"(?i)catalog_"
|
||||
],
|
||||
"criticality": "critical",
|
||||
"semantic_tags": [
|
||||
"route_candidate_handoff",
|
||||
"counterparty_lifecycle",
|
||||
"stale_scope_guard"
|
||||
]
|
||||
},
|
||||
{
|
||||
"step_id": "step_04_vat_movements_are_metadata_scoped_not_stale_counterparty",
|
||||
"title": "VAT movement ask uses metadata-scoped movement candidate instead of stale counterparty focus",
|
||||
"question": "покажи движения по НДС за 2020 по ООО Альтернатива Плюс",
|
||||
"allowed_reply_types": [
|
||||
"factual",
|
||||
"factual_with_explanation",
|
||||
"partial_coverage",
|
||||
"checked_sources_only"
|
||||
],
|
||||
"expected_catalog_alignment_status": "selected_matches_top",
|
||||
"expected_catalog_chain_top_match": "movement_evidence",
|
||||
"expected_catalog_selected_matches_top": true,
|
||||
"expected_route_candidate_status": "ready_for_reviewed_execution",
|
||||
"expected_route_candidate_executable_now": true,
|
||||
"required_answer_patterns_all": [
|
||||
"(?i)НДС",
|
||||
"(?i)2020",
|
||||
"(?i)Альтернатива Плюс|организац",
|
||||
"(?i)движен|строк|проверен|подтвержд|источник"
|
||||
],
|
||||
"forbidden_answer_patterns": [
|
||||
"(?i)Группа СВК.*как контрагент",
|
||||
"(?i)runtime_",
|
||||
"(?i)route_candidate",
|
||||
"(?i)query_movements",
|
||||
"(?i)primitive",
|
||||
"(?i)planner_",
|
||||
"(?i)catalog_"
|
||||
],
|
||||
"criticality": "critical",
|
||||
"semantic_tags": [
|
||||
"route_candidate_handoff",
|
||||
"movement_evidence",
|
||||
"metadata_scope",
|
||||
"vat",
|
||||
"stale_scope_guard"
|
||||
]
|
||||
},
|
||||
{
|
||||
"step_id": "step_05_inventory_reserve_boundary_stays_honest",
|
||||
"title": "Unreviewed inventory reserve proof family remains bounded and does not overclaim",
|
||||
"question": "можно ли по этим данным точно подтвердить резерв под неликвиды на складе?",
|
||||
"allowed_reply_types": [
|
||||
"factual",
|
||||
"factual_with_explanation",
|
||||
"partial_coverage",
|
||||
"no_grounded_answer",
|
||||
"out_of_scope"
|
||||
],
|
||||
"required_answer_patterns_all": [
|
||||
"(?i)резерв|неликвид|склад|товар",
|
||||
"(?i)не подтвержд|не доказ|нельзя точно|нет точн",
|
||||
"(?i)провер|нужн|треб"
|
||||
],
|
||||
"forbidden_answer_patterns": [
|
||||
"(?i)точно подтверждаю",
|
||||
"(?i)резерв.*подтвержден",
|
||||
"(?i)runtime_",
|
||||
"(?i)route_candidate",
|
||||
"(?i)primitive",
|
||||
"(?i)planner_",
|
||||
"(?i)catalog_"
|
||||
],
|
||||
"criticality": "critical",
|
||||
"semantic_tags": [
|
||||
"route_candidate_handoff",
|
||||
"no_overclaim",
|
||||
"inventory_reserve_boundary",
|
||||
"unreviewed_proof_family"
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -0,0 +1,190 @@
|
|||
{
|
||||
"schema_version": "domain_truth_harness_spec_v1",
|
||||
"scenario_id": "address_truth_harness_phase91_route_candidate_driven_enablement_loop",
|
||||
"domain": "address_phase91_route_candidate_driven_enablement_loop",
|
||||
"title": "Phase 91 route-candidate-driven enablement loop canary",
|
||||
"description": "Focused semantic replay for the route-candidate-driven enablement loop. The scenario keeps the phase90 user-facing route-candidate handoff behavior as a canary while the development tooling starts grouping route candidates into repair targets and lead-coder handoff artifacts.",
|
||||
"bindings": {},
|
||||
"steps": [
|
||||
{
|
||||
"step_id": "step_01_open_ranking_needs_organization",
|
||||
"title": "Open ranked money-flow ask needs organization scope before execution",
|
||||
"question": "какой контрагент принес больше всего денег за 2020 год?",
|
||||
"allowed_reply_types": [
|
||||
"clarification_required",
|
||||
"partial_coverage"
|
||||
],
|
||||
"expected_catalog_alignment_status": "selected_matches_top",
|
||||
"expected_catalog_chain_top_match": "value_flow_ranking",
|
||||
"expected_catalog_selected_matches_top": true,
|
||||
"expected_route_candidate_status": "needs_user_scope",
|
||||
"expected_route_candidate_executable_now": false,
|
||||
"expected_route_candidate_missing_axes": [
|
||||
"organization"
|
||||
],
|
||||
"required_answer_patterns_any": [
|
||||
"(?i)организац|компан|контур"
|
||||
],
|
||||
"forbidden_answer_patterns": [
|
||||
"(?i)runtime_",
|
||||
"(?i)route_candidate",
|
||||
"(?i)query_movements",
|
||||
"(?i)primitive",
|
||||
"(?i)planner_",
|
||||
"(?i)catalog_"
|
||||
],
|
||||
"criticality": "critical",
|
||||
"semantic_tags": [
|
||||
"route_candidate_handoff",
|
||||
"needs_user_scope",
|
||||
"value_flow_ranking",
|
||||
"open_organization_scope"
|
||||
]
|
||||
},
|
||||
{
|
||||
"step_id": "step_02_company_scope_promotes_ranking_execution",
|
||||
"title": "Natural company clarification promotes the ranking candidate to reviewed execution",
|
||||
"question": "ООО Альтернатива Плюс",
|
||||
"allowed_reply_types": [
|
||||
"factual",
|
||||
"factual_with_explanation",
|
||||
"partial_coverage"
|
||||
],
|
||||
"expected_catalog_alignment_status": "selected_matches_top",
|
||||
"expected_catalog_chain_top_match": "value_flow_ranking",
|
||||
"expected_catalog_selected_matches_top": true,
|
||||
"expected_route_candidate_status": "ready_for_reviewed_execution",
|
||||
"expected_route_candidate_executable_now": true,
|
||||
"required_answer_patterns_all": [
|
||||
"(?i)2020",
|
||||
"(?i)контрагент|клиент|покупател",
|
||||
"(?i)деньг|поступ|выруч|руб",
|
||||
"(?i)подтвержд|проверен|найден"
|
||||
],
|
||||
"forbidden_answer_patterns": [
|
||||
"(?i)runtime_",
|
||||
"(?i)route_candidate",
|
||||
"(?i)query_movements",
|
||||
"(?i)primitive",
|
||||
"(?i)planner_",
|
||||
"(?i)catalog_",
|
||||
"(?i)прибыль.*точно"
|
||||
],
|
||||
"criticality": "critical",
|
||||
"semantic_tags": [
|
||||
"route_candidate_handoff",
|
||||
"ready_for_reviewed_execution",
|
||||
"value_flow_ranking",
|
||||
"clarification_recovery"
|
||||
]
|
||||
},
|
||||
{
|
||||
"step_id": "step_03_counterparty_lifecycle_stale_scope_reset",
|
||||
"title": "Counterparty lifecycle pivot does not inherit the previous company scope",
|
||||
"question": "а теперь сколько лет мы работаем с Группа СВК?",
|
||||
"allowed_reply_types": [
|
||||
"factual",
|
||||
"factual_with_explanation",
|
||||
"partial_coverage"
|
||||
],
|
||||
"expected_catalog_alignment_status": "selected_matches_top",
|
||||
"expected_catalog_chain_top_match": "lifecycle",
|
||||
"expected_catalog_selected_matches_top": true,
|
||||
"expected_route_candidate_status": "ready_for_reviewed_execution",
|
||||
"expected_route_candidate_executable_now": true,
|
||||
"required_answer_patterns_all": [
|
||||
"(?i)СВК|Группа СВК",
|
||||
"(?i)активност|1С|подтвержд|проверен",
|
||||
"(?i)лет|год|месяц",
|
||||
"(?i)юридическ|регистрац|не подтвержд|не доказ"
|
||||
],
|
||||
"forbidden_answer_patterns": [
|
||||
"(?i)Альтернатива Плюс.*контрагент",
|
||||
"(?i)runtime_",
|
||||
"(?i)route_candidate",
|
||||
"(?i)query_documents",
|
||||
"(?i)primitive",
|
||||
"(?i)planner_",
|
||||
"(?i)catalog_"
|
||||
],
|
||||
"criticality": "critical",
|
||||
"semantic_tags": [
|
||||
"route_candidate_handoff",
|
||||
"counterparty_lifecycle",
|
||||
"stale_scope_guard"
|
||||
]
|
||||
},
|
||||
{
|
||||
"step_id": "step_04_vat_movement_candidate_keeps_company_scope",
|
||||
"title": "VAT movement ask keeps metadata scope and company scope together",
|
||||
"question": "покажи движения по НДС за 2020 по ООО Альтернатива Плюс",
|
||||
"allowed_reply_types": [
|
||||
"factual",
|
||||
"factual_with_explanation",
|
||||
"partial_coverage",
|
||||
"checked_sources_only"
|
||||
],
|
||||
"expected_catalog_alignment_status": "selected_matches_top",
|
||||
"expected_catalog_chain_top_match": "movement_evidence",
|
||||
"expected_catalog_selected_matches_top": true,
|
||||
"expected_route_candidate_status": "ready_for_reviewed_execution",
|
||||
"expected_route_candidate_executable_now": true,
|
||||
"required_answer_patterns_all": [
|
||||
"(?i)НДС",
|
||||
"(?i)2020",
|
||||
"(?i)Альтернатива Плюс|организац",
|
||||
"(?i)движен|строк|проверен|подтвержд|источник"
|
||||
],
|
||||
"forbidden_answer_patterns": [
|
||||
"(?i)Группа СВК.*как контрагент",
|
||||
"(?i)runtime_",
|
||||
"(?i)route_candidate",
|
||||
"(?i)query_movements",
|
||||
"(?i)primitive",
|
||||
"(?i)planner_",
|
||||
"(?i)catalog_"
|
||||
],
|
||||
"criticality": "critical",
|
||||
"semantic_tags": [
|
||||
"route_candidate_handoff",
|
||||
"movement_evidence",
|
||||
"metadata_scope",
|
||||
"vat",
|
||||
"stale_scope_guard"
|
||||
]
|
||||
},
|
||||
{
|
||||
"step_id": "step_05_inventory_reserve_boundary_no_overclaim",
|
||||
"title": "Reserve proof request stays bounded instead of pretending enablement is proof",
|
||||
"question": "можно ли по этим данным точно подтвердить резерв под неликвиды на складе?",
|
||||
"allowed_reply_types": [
|
||||
"factual",
|
||||
"factual_with_explanation",
|
||||
"partial_coverage",
|
||||
"no_grounded_answer",
|
||||
"out_of_scope"
|
||||
],
|
||||
"required_answer_patterns_all": [
|
||||
"(?i)резерв|неликвид|склад|товар",
|
||||
"(?i)не подтвержд|не доказ|нельзя точно|нет точн",
|
||||
"(?i)провер|нужн|треб"
|
||||
],
|
||||
"forbidden_answer_patterns": [
|
||||
"(?i)точно подтверждаю",
|
||||
"(?i)резерв.*подтвержден",
|
||||
"(?i)runtime_",
|
||||
"(?i)route_candidate",
|
||||
"(?i)primitive",
|
||||
"(?i)planner_",
|
||||
"(?i)catalog_"
|
||||
],
|
||||
"criticality": "critical",
|
||||
"semantic_tags": [
|
||||
"route_candidate_handoff",
|
||||
"no_overclaim",
|
||||
"inventory_reserve_boundary",
|
||||
"unreviewed_proof_family"
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -0,0 +1,227 @@
|
|||
{
|
||||
"schema_version": "domain_truth_harness_spec_v1",
|
||||
"scenario_id": "address_truth_harness_phase92_proof_family_enablement_candidates",
|
||||
"domain": "address_phase92_proof_family_enablement_candidates",
|
||||
"title": "Phase 92 proof-family enablement candidates",
|
||||
"description": "Focused semantic replay for route-candidate autonomy: exact proof-family asks must stay user-safe while route_candidate marks missing reviewed proof routes as needs_route_enablement.",
|
||||
"bindings": {},
|
||||
"steps": [
|
||||
{
|
||||
"step_id": "step_01_open_ranking_still_needs_company",
|
||||
"title": "Open ranked money-flow ask still needs organization scope",
|
||||
"question": "какой контрагент принес больше всего денег за 2020 год?",
|
||||
"allowed_reply_types": [
|
||||
"clarification_required",
|
||||
"partial_coverage"
|
||||
],
|
||||
"expected_catalog_alignment_status": "selected_matches_top",
|
||||
"expected_catalog_chain_top_match": "value_flow_ranking",
|
||||
"expected_catalog_selected_matches_top": true,
|
||||
"expected_route_candidate_status": "needs_user_scope",
|
||||
"expected_route_candidate_executable_now": false,
|
||||
"expected_route_candidate_missing_axes": [
|
||||
"organization"
|
||||
],
|
||||
"required_answer_patterns_any": [
|
||||
"(?i)организац|компан|контур"
|
||||
],
|
||||
"forbidden_answer_patterns": [
|
||||
"(?i)runtime_",
|
||||
"(?i)route_candidate",
|
||||
"(?i)query_movements",
|
||||
"(?i)primitive",
|
||||
"(?i)planner_",
|
||||
"(?i)catalog_"
|
||||
],
|
||||
"criticality": "critical",
|
||||
"semantic_tags": [
|
||||
"route_candidate_handoff",
|
||||
"needs_user_scope",
|
||||
"value_flow_ranking"
|
||||
]
|
||||
},
|
||||
{
|
||||
"step_id": "step_02_company_clarification_keeps_reviewed_execution",
|
||||
"title": "Company clarification keeps the reviewed value-flow route executable",
|
||||
"question": "ООО Альтернатива Плюс",
|
||||
"allowed_reply_types": [
|
||||
"factual",
|
||||
"factual_with_explanation",
|
||||
"partial_coverage"
|
||||
],
|
||||
"expected_catalog_alignment_status": "selected_matches_top",
|
||||
"expected_catalog_chain_top_match": "value_flow_ranking",
|
||||
"expected_catalog_selected_matches_top": true,
|
||||
"expected_route_candidate_status": "ready_for_reviewed_execution",
|
||||
"expected_route_candidate_executable_now": true,
|
||||
"required_answer_patterns_all": [
|
||||
"(?i)2020",
|
||||
"(?i)контрагент|клиент|покупател",
|
||||
"(?i)деньг|поступ|выруч|руб",
|
||||
"(?i)подтвержд|проверен|найден"
|
||||
],
|
||||
"forbidden_answer_patterns": [
|
||||
"(?i)runtime_",
|
||||
"(?i)route_candidate",
|
||||
"(?i)query_movements",
|
||||
"(?i)primitive",
|
||||
"(?i)planner_",
|
||||
"(?i)catalog_"
|
||||
],
|
||||
"criticality": "critical",
|
||||
"semantic_tags": [
|
||||
"route_candidate_handoff",
|
||||
"ready_for_reviewed_execution",
|
||||
"value_flow_ranking"
|
||||
]
|
||||
},
|
||||
{
|
||||
"step_id": "step_03_profit_margin_needs_reviewed_pnl_route",
|
||||
"title": "Exact profit/margin ask becomes a proof-family enablement candidate",
|
||||
"question": "по ООО Альтернатива Плюс за 2020 можно точно сказать чистую прибыль и маржу?",
|
||||
"allowed_reply_types": [
|
||||
"factual",
|
||||
"factual_with_explanation",
|
||||
"partial_coverage",
|
||||
"no_grounded_answer"
|
||||
],
|
||||
"expected_catalog_alignment_status": "selected_matches_top",
|
||||
"expected_catalog_chain_top_match": "business_overview",
|
||||
"expected_catalog_selected_matches_top": true,
|
||||
"expected_route_candidate_status": "needs_route_enablement",
|
||||
"expected_route_candidate_executable_now": false,
|
||||
"required_answer_patterns_all": [
|
||||
"(?i)прибыл|марж|рентаб",
|
||||
"(?i)не прибыль|не точн|нельзя точно|не подтвержд|не доказ",
|
||||
"(?i)финансов|p&l|расход|себестоим|закрыт"
|
||||
],
|
||||
"forbidden_answer_patterns": [
|
||||
"(?i)точно подтверждаю",
|
||||
"(?i)чистая прибыль.*подтвержден",
|
||||
"(?i)runtime_",
|
||||
"(?i)route_candidate",
|
||||
"(?i)primitive",
|
||||
"(?i)planner_",
|
||||
"(?i)catalog_"
|
||||
],
|
||||
"criticality": "critical",
|
||||
"semantic_tags": [
|
||||
"business_overview",
|
||||
"profit_margin_boundary",
|
||||
"missing_proof_families",
|
||||
"needs_route_enablement"
|
||||
]
|
||||
},
|
||||
{
|
||||
"step_id": "step_04_debt_due_date_needs_payment_terms_route",
|
||||
"title": "Exact overdue/debt-aging ask becomes a proof-family enablement candidate",
|
||||
"question": "по ООО Альтернатива Плюс на конец 2020 можно точно понять, какая дебиторка просрочена?",
|
||||
"allowed_reply_types": [
|
||||
"factual",
|
||||
"factual_with_explanation",
|
||||
"partial_coverage",
|
||||
"no_grounded_answer"
|
||||
],
|
||||
"expected_catalog_alignment_status": "selected_matches_top",
|
||||
"expected_catalog_chain_top_match": "business_overview",
|
||||
"expected_catalog_selected_matches_top": true,
|
||||
"expected_route_candidate_status": "needs_route_enablement",
|
||||
"expected_route_candidate_executable_now": false,
|
||||
"required_answer_patterns_all": [
|
||||
"(?i)дебитор|долг|просроч|срок",
|
||||
"(?i)не due-date|не точн|нельзя точно|не подтвержд|не доказ|нет срок",
|
||||
"(?i)договор|срок оплат|расчет|провер"
|
||||
],
|
||||
"forbidden_answer_patterns": [
|
||||
"(?i)точно подтверждаю",
|
||||
"(?i)просроч.*подтвержден",
|
||||
"(?i)runtime_",
|
||||
"(?i)route_candidate",
|
||||
"(?i)primitive",
|
||||
"(?i)planner_",
|
||||
"(?i)catalog_"
|
||||
],
|
||||
"criticality": "critical",
|
||||
"semantic_tags": [
|
||||
"business_overview",
|
||||
"debt_due_date_boundary",
|
||||
"missing_proof_families",
|
||||
"needs_route_enablement"
|
||||
]
|
||||
},
|
||||
{
|
||||
"step_id": "step_05_vendor_risk_needs_procurement_quality_route",
|
||||
"title": "Exact vendor-risk ask becomes a proof-family enablement candidate",
|
||||
"question": "по ООО Альтернатива Плюс за 2020 есть ли риск, что мы зависим от одного поставщика?",
|
||||
"allowed_reply_types": [
|
||||
"factual",
|
||||
"factual_with_explanation",
|
||||
"partial_coverage",
|
||||
"no_grounded_answer"
|
||||
],
|
||||
"expected_catalog_alignment_status": "selected_matches_top",
|
||||
"expected_catalog_chain_top_match": "business_overview",
|
||||
"expected_catalog_selected_matches_top": true,
|
||||
"expected_route_candidate_status": "needs_route_enablement",
|
||||
"expected_route_candidate_executable_now": false,
|
||||
"required_answer_patterns_all": [
|
||||
"(?i)поставщик|закуп|исходящ",
|
||||
"(?i)риск|зависим|концентрац",
|
||||
"(?i)proxy|не полный|не точн|нельзя точно|не подтвержд|не доказ"
|
||||
],
|
||||
"forbidden_answer_patterns": [
|
||||
"(?i)точно подтверждаю",
|
||||
"(?i)vendor.*risk.*подтвержден",
|
||||
"(?i)runtime_",
|
||||
"(?i)route_candidate",
|
||||
"(?i)primitive",
|
||||
"(?i)planner_",
|
||||
"(?i)catalog_"
|
||||
],
|
||||
"criticality": "critical",
|
||||
"semantic_tags": [
|
||||
"business_overview",
|
||||
"vendor_risk_procurement_boundary",
|
||||
"missing_proof_families",
|
||||
"needs_route_enablement"
|
||||
]
|
||||
},
|
||||
{
|
||||
"step_id": "step_06_inventory_reserve_needs_quality_route",
|
||||
"title": "Exact reserve/liquidation ask becomes a proof-family enablement candidate",
|
||||
"question": "можно ли по ООО Альтернатива Плюс точно подтвердить резерв под неликвиды на складе?",
|
||||
"allowed_reply_types": [
|
||||
"factual",
|
||||
"factual_with_explanation",
|
||||
"partial_coverage",
|
||||
"no_grounded_answer"
|
||||
],
|
||||
"expected_catalog_alignment_status": "selected_matches_top",
|
||||
"expected_catalog_chain_top_match": "business_overview",
|
||||
"expected_catalog_selected_matches_top": true,
|
||||
"expected_route_candidate_status": "needs_route_enablement",
|
||||
"expected_route_candidate_executable_now": false,
|
||||
"required_answer_patterns_all": [
|
||||
"(?i)резерв|неликвид|склад|товар",
|
||||
"(?i)не подтвержд|не доказ|нельзя точно|нет точн",
|
||||
"(?i)списан|ликвидац|учетн|провер"
|
||||
],
|
||||
"forbidden_answer_patterns": [
|
||||
"(?i)точно подтверждаю",
|
||||
"(?i)резерв.*подтвержден",
|
||||
"(?i)runtime_",
|
||||
"(?i)route_candidate",
|
||||
"(?i)primitive",
|
||||
"(?i)planner_",
|
||||
"(?i)catalog_"
|
||||
],
|
||||
"criticality": "critical",
|
||||
"semantic_tags": [
|
||||
"business_overview",
|
||||
"inventory_reserve_boundary",
|
||||
"missing_proof_families",
|
||||
"needs_route_enablement"
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -0,0 +1,221 @@
|
|||
{
|
||||
"schema_version": "domain_truth_harness_spec_v1",
|
||||
"scenario_id": "address_truth_harness_phase93_accounting_profit_margin_reviewed_route",
|
||||
"domain": "address_phase93_accounting_profit_margin_reviewed_route",
|
||||
"title": "Phase 93 accounting profit-margin reviewed route",
|
||||
"description": "Focused semantic replay for promoting accounting_profit_margin from needs_route_enablement to a reviewed 90/91/99 accounting financial-result route while preserving neighbouring proof-family boundaries.",
|
||||
"bindings": {},
|
||||
"steps": [
|
||||
{
|
||||
"step_id": "step_01_profit_margin_uses_accounting_result",
|
||||
"title": "Exact profit and margin ask uses reviewed 90/91/99 evidence",
|
||||
"question": "по ООО Альтернатива Плюс за 2020 можно точно сказать чистую прибыль и маржу?",
|
||||
"allowed_reply_types": [
|
||||
"factual",
|
||||
"factual_with_explanation",
|
||||
"partial_coverage"
|
||||
],
|
||||
"expected_catalog_alignment_status": "selected_matches_top",
|
||||
"expected_catalog_chain_top_match": "business_overview",
|
||||
"expected_catalog_selected_matches_top": true,
|
||||
"expected_route_candidate_status": "ready_for_reviewed_execution",
|
||||
"expected_route_candidate_executable_now": true,
|
||||
"required_answer_patterns_all": [
|
||||
"(?i)90/91/99|90\\.01|99",
|
||||
"(?i)учетн|финрезульт|прибыл|убыт",
|
||||
"(?i)марж|рентаб",
|
||||
"(?i)2020",
|
||||
"(?i)1С|проверенн|найденн|подтвержд"
|
||||
],
|
||||
"required_answer_patterns_any": [
|
||||
"7[\\s\\u00a0]*136[\\s\\u00a0]*815",
|
||||
"(?i)минус|убыт"
|
||||
],
|
||||
"forbidden_answer_patterns": [
|
||||
"(?i)только bounded operating-flow/trading-margin proxy",
|
||||
"(?i)route_candidate",
|
||||
"(?i)query_movements",
|
||||
"(?i)primitive",
|
||||
"(?i)planner_",
|
||||
"(?i)catalog_"
|
||||
],
|
||||
"criticality": "critical",
|
||||
"semantic_tags": [
|
||||
"business_overview",
|
||||
"profit_margin_boundary",
|
||||
"accounting_profit_margin",
|
||||
"ready_for_reviewed_execution"
|
||||
]
|
||||
},
|
||||
{
|
||||
"step_id": "step_02_short_followup_keeps_accounting_context",
|
||||
"title": "Short follow-up keeps company, year, and accounting result context",
|
||||
"question": "а это прибыль или убыток, коротко?",
|
||||
"allowed_reply_types": [
|
||||
"factual",
|
||||
"factual_with_explanation",
|
||||
"partial_coverage"
|
||||
],
|
||||
"expected_catalog_alignment_status": "selected_matches_top",
|
||||
"expected_catalog_chain_top_match": "business_overview",
|
||||
"expected_catalog_selected_matches_top": true,
|
||||
"expected_route_candidate_status": "ready_for_reviewed_execution",
|
||||
"expected_route_candidate_executable_now": true,
|
||||
"required_answer_patterns_all": [
|
||||
"(?i)убыт|минус",
|
||||
"(?i)2020",
|
||||
"(?i)90/91/99|90\\.01|99|учетн"
|
||||
],
|
||||
"forbidden_answer_patterns": [
|
||||
"(?i)уточните организац",
|
||||
"(?i)какую компанию",
|
||||
"(?i)route_candidate",
|
||||
"(?i)primitive",
|
||||
"(?i)planner_",
|
||||
"(?i)catalog_"
|
||||
],
|
||||
"criticality": "critical",
|
||||
"semantic_tags": [
|
||||
"business_overview",
|
||||
"context_carryover",
|
||||
"profit_margin_boundary",
|
||||
"accounting_profit_margin"
|
||||
]
|
||||
},
|
||||
{
|
||||
"step_id": "step_03_debt_due_date_boundary_still_needs_route",
|
||||
"title": "Debt due-date boundary stays an enablement candidate",
|
||||
"question": "по этой же компании на конец 2020 можно точно понять, какая дебиторка просрочена?",
|
||||
"allowed_reply_types": [
|
||||
"factual",
|
||||
"factual_with_explanation",
|
||||
"partial_coverage",
|
||||
"no_grounded_answer"
|
||||
],
|
||||
"expected_catalog_alignment_status": "selected_matches_top",
|
||||
"expected_catalog_chain_top_match": "business_overview",
|
||||
"expected_catalog_selected_matches_top": true,
|
||||
"expected_route_candidate_status": "needs_route_enablement",
|
||||
"expected_route_candidate_executable_now": false,
|
||||
"required_answer_patterns_all": [
|
||||
"(?i)дебитор|долг|просроч|срок",
|
||||
"(?i)не due-date|не точн|нельзя точно|не подтвержд|не доказ|нет срок",
|
||||
"(?i)договор|срок оплат|расчет|провер"
|
||||
],
|
||||
"forbidden_answer_patterns": [
|
||||
"(?i)точно подтверждаю",
|
||||
"(?i)просроч.*подтвержден",
|
||||
"(?i)route_candidate",
|
||||
"(?i)primitive",
|
||||
"(?i)planner_",
|
||||
"(?i)catalog_"
|
||||
],
|
||||
"criticality": "critical",
|
||||
"semantic_tags": [
|
||||
"business_overview",
|
||||
"debt_due_date_boundary",
|
||||
"missing_proof_families",
|
||||
"needs_route_enablement"
|
||||
]
|
||||
},
|
||||
{
|
||||
"step_id": "step_04_vat_continuity_still_answers",
|
||||
"title": "VAT continuity still answers from the reviewed tax route",
|
||||
"question": "тогда НДС за 2020 по ООО Альтернатива Плюс какой?",
|
||||
"allowed_reply_types": [
|
||||
"factual",
|
||||
"factual_with_explanation",
|
||||
"partial_coverage"
|
||||
],
|
||||
"required_answer_patterns_all": [
|
||||
"(?i)НДС|VAT|налог",
|
||||
"(?i)2020",
|
||||
"(?i)продаж|покуп|к уплат|к возмещ|зачет",
|
||||
"(?i)подтвержд|проверенн|1С"
|
||||
],
|
||||
"forbidden_answer_patterns": [
|
||||
"(?i)route_candidate",
|
||||
"(?i)primitive",
|
||||
"(?i)planner_",
|
||||
"(?i)catalog_"
|
||||
],
|
||||
"criticality": "critical",
|
||||
"semantic_tags": [
|
||||
"business_overview",
|
||||
"vat_continuity",
|
||||
"ready_for_reviewed_execution"
|
||||
]
|
||||
},
|
||||
{
|
||||
"step_id": "step_05_value_flow_ranking_context_still_works",
|
||||
"title": "Value-flow ranking still uses carried organization context",
|
||||
"question": "а какой контрагент принес больше всего денег за 2020?",
|
||||
"allowed_reply_types": [
|
||||
"factual",
|
||||
"factual_with_explanation",
|
||||
"partial_coverage"
|
||||
],
|
||||
"expected_catalog_alignment_status": "selected_matches_top",
|
||||
"expected_catalog_chain_top_match": "value_flow_ranking",
|
||||
"expected_catalog_selected_matches_top": true,
|
||||
"expected_route_candidate_status": "ready_for_reviewed_execution",
|
||||
"expected_route_candidate_executable_now": true,
|
||||
"required_answer_patterns_all": [
|
||||
"(?i)2020",
|
||||
"(?i)контрагент|клиент|покупател",
|
||||
"(?i)деньг|поступ|выруч|руб",
|
||||
"(?i)подтвержд|проверенн|найден"
|
||||
],
|
||||
"forbidden_answer_patterns": [
|
||||
"(?i)уточните организац",
|
||||
"(?i)какую компанию",
|
||||
"(?i)route_candidate",
|
||||
"(?i)query_movements",
|
||||
"(?i)primitive",
|
||||
"(?i)planner_",
|
||||
"(?i)catalog_"
|
||||
],
|
||||
"criticality": "critical",
|
||||
"semantic_tags": [
|
||||
"value_flow_ranking",
|
||||
"context_carryover",
|
||||
"ready_for_reviewed_execution"
|
||||
]
|
||||
},
|
||||
{
|
||||
"step_id": "step_06_inventory_reserve_boundary_still_needs_route",
|
||||
"title": "Inventory reserve boundary remains honest and bounded",
|
||||
"question": "по ООО Альтернатива Плюс на конец 2020 можно точно подтвердить резерв под неликвиды на складе?",
|
||||
"allowed_reply_types": [
|
||||
"factual",
|
||||
"factual_with_explanation",
|
||||
"partial_coverage",
|
||||
"no_grounded_answer"
|
||||
],
|
||||
"expected_catalog_alignment_status": "selected_matches_top",
|
||||
"expected_catalog_chain_top_match": "business_overview",
|
||||
"expected_catalog_selected_matches_top": true,
|
||||
"expected_route_candidate_status": "needs_route_enablement",
|
||||
"expected_route_candidate_executable_now": false,
|
||||
"required_answer_patterns_all": [
|
||||
"(?i)резерв|неликвид|склад|товар",
|
||||
"(?i)не подтвержд|не доказ|нельзя точно|нет точн",
|
||||
"(?i)списан|ликвидац|учетн|провер"
|
||||
],
|
||||
"forbidden_answer_patterns": [
|
||||
"(?i)точно подтверждаю",
|
||||
"(?i)route_candidate",
|
||||
"(?i)primitive",
|
||||
"(?i)planner_",
|
||||
"(?i)catalog_"
|
||||
],
|
||||
"criticality": "critical",
|
||||
"semantic_tags": [
|
||||
"business_overview",
|
||||
"inventory_reserve_boundary",
|
||||
"missing_proof_families",
|
||||
"needs_route_enablement"
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -0,0 +1,254 @@
|
|||
{
|
||||
"schema_version": "domain_truth_harness_spec_v1",
|
||||
"scenario_id": "address_truth_harness_phase94_debt_due_date_aging_reviewed_route",
|
||||
"domain": "address_phase94_debt_due_date_aging_reviewed_route",
|
||||
"title": "Phase 94 debt due-date aging reviewed route",
|
||||
"description": "Focused semantic replay for promoting debt_due_date_aging_quality from proxy-only enablement to a reviewed payment-term/open-balance route while preserving profit, VAT, inventory, and vendor-risk boundaries.",
|
||||
"bindings": {},
|
||||
"steps": [
|
||||
{
|
||||
"step_id": "step_01_debt_due_date_checked_negative",
|
||||
"title": "Debt due-date ask uses reviewed payment-term evidence",
|
||||
"question": "по ООО Альтернатива Плюс на конец 2020 можно точно понять, какая дебиторка просрочена?",
|
||||
"allowed_reply_types": [
|
||||
"factual",
|
||||
"factual_with_explanation",
|
||||
"partial_coverage"
|
||||
],
|
||||
"expected_catalog_alignment_status": "selected_matches_top",
|
||||
"expected_catalog_chain_top_match": "business_overview",
|
||||
"expected_catalog_selected_matches_top": true,
|
||||
"expected_route_candidate_status": "ready_for_reviewed_execution",
|
||||
"expected_route_candidate_executable_now": true,
|
||||
"required_answer_patterns_all": [
|
||||
"(?i)дебитор|долг|открыт.*расчет|остат",
|
||||
"(?i)2020|2020-12-31|конец 2020",
|
||||
"(?i)срок.*оплат|due[- ]?date|просроч",
|
||||
"(?i)не установлен|не подтвержд|не доказ|нет подтвержденной просроч",
|
||||
"(?i)1С|проверенн|найденн|подтвержд"
|
||||
],
|
||||
"forbidden_answer_patterns": [
|
||||
"(?i)точно подтверждаю.*просроч",
|
||||
"(?i)просроч.*подтвержден.*без.*срок",
|
||||
"(?i)route_candidate",
|
||||
"(?i)query_movements",
|
||||
"(?i)primitive",
|
||||
"(?i)planner_",
|
||||
"(?i)catalog_"
|
||||
],
|
||||
"criticality": "critical",
|
||||
"semantic_tags": [
|
||||
"business_overview",
|
||||
"debt_due_date_boundary",
|
||||
"debt_due_date_aging_quality",
|
||||
"ready_for_reviewed_execution",
|
||||
"checked_negative"
|
||||
]
|
||||
},
|
||||
{
|
||||
"step_id": "step_02_short_followup_preserves_due_date_boundary",
|
||||
"title": "Short follow-up keeps debt due-date context",
|
||||
"question": "то есть просрочку доказать нельзя, коротко почему?",
|
||||
"allowed_reply_types": [
|
||||
"factual",
|
||||
"factual_with_explanation",
|
||||
"partial_coverage"
|
||||
],
|
||||
"expected_catalog_alignment_status": "selected_matches_top",
|
||||
"expected_catalog_chain_top_match": "business_overview",
|
||||
"expected_catalog_selected_matches_top": true,
|
||||
"expected_route_candidate_status": "ready_for_reviewed_execution",
|
||||
"expected_route_candidate_executable_now": true,
|
||||
"required_answer_patterns_all": [
|
||||
"(?i)срок.*оплат|due[- ]?date|договор",
|
||||
"(?i)не установлен|не хватает|не подтвержд|не доказ",
|
||||
"(?i)просроч",
|
||||
"(?i)2020|2020-12-31|конец 2020"
|
||||
],
|
||||
"forbidden_answer_patterns": [
|
||||
"(?i)уточните организац",
|
||||
"(?i)какую компанию",
|
||||
"(?i)route_candidate",
|
||||
"(?i)primitive",
|
||||
"(?i)planner_",
|
||||
"(?i)catalog_"
|
||||
],
|
||||
"criticality": "critical",
|
||||
"semantic_tags": [
|
||||
"business_overview",
|
||||
"context_carryover",
|
||||
"debt_due_date_boundary",
|
||||
"checked_negative"
|
||||
]
|
||||
},
|
||||
{
|
||||
"step_id": "step_03_profit_margin_canary_still_reviewed",
|
||||
"title": "Profit/margin canary still uses accounting result route",
|
||||
"question": "а чистая прибыль и маржа за 2020 по этой же компании какие?",
|
||||
"allowed_reply_types": [
|
||||
"factual",
|
||||
"factual_with_explanation",
|
||||
"partial_coverage"
|
||||
],
|
||||
"expected_catalog_alignment_status": "selected_matches_top",
|
||||
"expected_catalog_chain_top_match": "business_overview",
|
||||
"expected_catalog_selected_matches_top": true,
|
||||
"expected_route_candidate_status": "ready_for_reviewed_execution",
|
||||
"expected_route_candidate_executable_now": true,
|
||||
"required_answer_patterns_all": [
|
||||
"(?i)90/91/99|90\\.01|99",
|
||||
"(?i)учетн|финрезульт|прибыл|убыт",
|
||||
"(?i)марж|рентаб",
|
||||
"(?i)2020"
|
||||
],
|
||||
"forbidden_answer_patterns": [
|
||||
"(?i)только bounded operating-flow/trading-margin proxy",
|
||||
"(?i)route_candidate",
|
||||
"(?i)primitive",
|
||||
"(?i)planner_",
|
||||
"(?i)catalog_"
|
||||
],
|
||||
"criticality": "critical",
|
||||
"semantic_tags": [
|
||||
"business_overview",
|
||||
"profit_margin_boundary",
|
||||
"accounting_profit_margin",
|
||||
"canary"
|
||||
]
|
||||
},
|
||||
{
|
||||
"step_id": "step_04_vat_canary_still_answers",
|
||||
"title": "VAT continuity canary still answers from reviewed tax route",
|
||||
"question": "НДС за 2020 по ООО Альтернатива Плюс какой?",
|
||||
"allowed_reply_types": [
|
||||
"factual",
|
||||
"factual_with_explanation",
|
||||
"partial_coverage"
|
||||
],
|
||||
"required_answer_patterns_all": [
|
||||
"(?i)НДС|VAT|налог",
|
||||
"(?i)2020",
|
||||
"(?i)продаж|покуп|к уплат|к возмещ|зачет",
|
||||
"(?i)подтвержд|проверенн|1С"
|
||||
],
|
||||
"forbidden_answer_patterns": [
|
||||
"(?i)route_candidate",
|
||||
"(?i)primitive",
|
||||
"(?i)planner_",
|
||||
"(?i)catalog_"
|
||||
],
|
||||
"criticality": "critical",
|
||||
"semantic_tags": [
|
||||
"business_overview",
|
||||
"vat_continuity",
|
||||
"canary"
|
||||
]
|
||||
},
|
||||
{
|
||||
"step_id": "step_05_value_flow_ranking_context_still_works",
|
||||
"title": "Value-flow ranking still uses carried organization context",
|
||||
"question": "а кто принес больше всего денег за 2020?",
|
||||
"allowed_reply_types": [
|
||||
"factual",
|
||||
"factual_with_explanation",
|
||||
"partial_coverage"
|
||||
],
|
||||
"expected_catalog_alignment_status": "selected_matches_top",
|
||||
"expected_catalog_chain_top_match": "value_flow_ranking",
|
||||
"expected_catalog_selected_matches_top": true,
|
||||
"expected_route_candidate_status": "ready_for_reviewed_execution",
|
||||
"expected_route_candidate_executable_now": true,
|
||||
"required_answer_patterns_all": [
|
||||
"(?i)2020",
|
||||
"(?i)контрагент|клиент|покупател",
|
||||
"(?i)деньг|поступ|выруч|руб",
|
||||
"(?i)подтвержд|проверенн|найден"
|
||||
],
|
||||
"forbidden_answer_patterns": [
|
||||
"(?i)уточните организац",
|
||||
"(?i)какую компанию",
|
||||
"(?i)route_candidate",
|
||||
"(?i)query_movements",
|
||||
"(?i)primitive",
|
||||
"(?i)planner_",
|
||||
"(?i)catalog_"
|
||||
],
|
||||
"criticality": "critical",
|
||||
"semantic_tags": [
|
||||
"value_flow_ranking",
|
||||
"context_carryover",
|
||||
"canary"
|
||||
]
|
||||
},
|
||||
{
|
||||
"step_id": "step_06_inventory_reserve_boundary_still_needs_route",
|
||||
"title": "Inventory reserve boundary remains honest and bounded",
|
||||
"question": "по ООО Альтернатива Плюс на конец 2020 можно точно подтвердить резерв под неликвиды на складе?",
|
||||
"allowed_reply_types": [
|
||||
"factual",
|
||||
"factual_with_explanation",
|
||||
"partial_coverage",
|
||||
"no_grounded_answer"
|
||||
],
|
||||
"expected_catalog_alignment_status": "selected_matches_top",
|
||||
"expected_catalog_chain_top_match": "business_overview",
|
||||
"expected_catalog_selected_matches_top": true,
|
||||
"expected_route_candidate_status": "needs_route_enablement",
|
||||
"expected_route_candidate_executable_now": false,
|
||||
"required_answer_patterns_all": [
|
||||
"(?i)резерв|неликвид|склад|товар",
|
||||
"(?i)не подтвержд|не доказ|нельзя точно|нет точн",
|
||||
"(?i)списан|ликвидац|учетн|провер"
|
||||
],
|
||||
"forbidden_answer_patterns": [
|
||||
"(?i)точно подтверждаю",
|
||||
"(?i)route_candidate",
|
||||
"(?i)primitive",
|
||||
"(?i)planner_",
|
||||
"(?i)catalog_"
|
||||
],
|
||||
"criticality": "critical",
|
||||
"semantic_tags": [
|
||||
"business_overview",
|
||||
"inventory_reserve_boundary",
|
||||
"missing_proof_families",
|
||||
"canary"
|
||||
]
|
||||
},
|
||||
{
|
||||
"step_id": "step_07_vendor_risk_boundary_still_needs_route",
|
||||
"title": "Vendor-risk boundary stays bounded",
|
||||
"question": "а зависимость от одного поставщика за 2020 можно точно оценить?",
|
||||
"allowed_reply_types": [
|
||||
"factual",
|
||||
"factual_with_explanation",
|
||||
"partial_coverage",
|
||||
"no_grounded_answer"
|
||||
],
|
||||
"expected_catalog_alignment_status": "selected_matches_top",
|
||||
"expected_catalog_chain_top_match": "business_overview",
|
||||
"expected_catalog_selected_matches_top": true,
|
||||
"expected_route_candidate_status": "needs_route_enablement",
|
||||
"expected_route_candidate_executable_now": false,
|
||||
"required_answer_patterns_all": [
|
||||
"(?i)поставщик|vendor|supplier|закуп",
|
||||
"(?i)не подтвержд|не доказ|нельзя точно|proxy|сигнал",
|
||||
"(?i)концентрац|зависим"
|
||||
],
|
||||
"forbidden_answer_patterns": [
|
||||
"(?i)точно подтверждаю",
|
||||
"(?i)route_candidate",
|
||||
"(?i)primitive",
|
||||
"(?i)planner_",
|
||||
"(?i)catalog_"
|
||||
],
|
||||
"criticality": "critical",
|
||||
"semantic_tags": [
|
||||
"business_overview",
|
||||
"vendor_risk_procurement_boundary",
|
||||
"missing_proof_families",
|
||||
"canary"
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
File diff suppressed because one or more lines are too long
|
|
@ -1,83 +0,0 @@
|
|||
{
|
||||
"saved_at": "2026-04-17T08:08:08+00:00",
|
||||
"generation_id": "gen-ag04170808-1907fa",
|
||||
"mode": "saved_user_sessions",
|
||||
"title": "AGENT | Phase 4 coverage/evidence replay for counterparty fallback, inventory reset, and selected-object provenance",
|
||||
"agent_run": true,
|
||||
"questions": [
|
||||
"покажи все документы по чепурнову",
|
||||
"что нам отгружал чепурнов, какой товар или услугу?",
|
||||
"какие остатки на складе на март 2021",
|
||||
"По выбранному объекту \"Столешница 600*3050*26 альмандин\": кто нам это поставил?",
|
||||
"покажи еще раз остатки на эту же дату"
|
||||
],
|
||||
"metadata": {
|
||||
"assistant_prompt_version": null,
|
||||
"decomposition_prompt_version": null,
|
||||
"prompt_fingerprint": null,
|
||||
"agent_focus": "coverage/evidence contract on factual, fallback, and root-reset branches",
|
||||
"architecture_phase": "turnaround_11_phase4",
|
||||
"source_spec_file": "X:\\1C\\NDC_1C\\docs\\orchestration\\address_truth_harness_phase4_coverage_evidence_mix.json"
|
||||
},
|
||||
"source_session_id": null,
|
||||
"session": {
|
||||
"session_id": null,
|
||||
"mode": "agent_semantic_run",
|
||||
"items": [
|
||||
{
|
||||
"message_id": "agent-user-001",
|
||||
"role": "user",
|
||||
"text": "покажи все документы по чепурнову",
|
||||
"created_at": "2026-04-17T08:08:08+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-002",
|
||||
"role": "user",
|
||||
"text": "что нам отгружал чепурнов, какой товар или услугу?",
|
||||
"created_at": "2026-04-17T08:08:08+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-003",
|
||||
"role": "user",
|
||||
"text": "какие остатки на складе на март 2021",
|
||||
"created_at": "2026-04-17T08:08:08+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-004",
|
||||
"role": "user",
|
||||
"text": "По выбранному объекту \"Столешница 600*3050*26 альмандин\": кто нам это поставил?",
|
||||
"created_at": "2026-04-17T08:08:08+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-005",
|
||||
"role": "user",
|
||||
"text": "покажи еще раз остатки на эту же дату",
|
||||
"created_at": "2026-04-17T08:08:08+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
}
|
||||
],
|
||||
"agent_run": true,
|
||||
"metadata": {
|
||||
"assistant_prompt_version": null,
|
||||
"decomposition_prompt_version": null,
|
||||
"prompt_fingerprint": null,
|
||||
"agent_focus": "coverage/evidence contract on factual, fallback, and root-reset branches",
|
||||
"architecture_phase": "turnaround_11_phase4",
|
||||
"source_spec_file": "X:\\1C\\NDC_1C\\docs\\orchestration\\address_truth_harness_phase4_coverage_evidence_mix.json"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,93 +0,0 @@
|
|||
{
|
||||
"saved_at": "2026-04-17T08:30:44+00:00",
|
||||
"generation_id": "gen-ag04170830-5f771d",
|
||||
"mode": "saved_user_sessions",
|
||||
"title": "AGENT | Phase 5 meta and memory recap replay over interrupted address context",
|
||||
"agent_run": true,
|
||||
"questions": [
|
||||
"какие остатки на складе на март 2021",
|
||||
"а исторические остатки тоже можешь?",
|
||||
"по какой компании мы сейчас работаем?",
|
||||
"По выбранному объекту \"Столешница 600*3050*26 альмандин\": кто нам это поставил?",
|
||||
"что ты умеешь?",
|
||||
"а ты помнишь, что мы по этой позиции уже выяснили?"
|
||||
],
|
||||
"metadata": {
|
||||
"assistant_prompt_version": null,
|
||||
"decomposition_prompt_version": null,
|
||||
"prompt_fingerprint": null,
|
||||
"agent_focus": "meta and memory recap replay over interrupted address context",
|
||||
"architecture_phase": "turnaround_11_phase5",
|
||||
"source_spec_file": "X:\\1C\\NDC_1C\\docs\\orchestration\\address_truth_harness_phase5_meta_memory_mix.json"
|
||||
},
|
||||
"source_session_id": null,
|
||||
"session": {
|
||||
"session_id": null,
|
||||
"mode": "agent_semantic_run",
|
||||
"items": [
|
||||
{
|
||||
"message_id": "agent-user-001",
|
||||
"role": "user",
|
||||
"text": "какие остатки на складе на март 2021",
|
||||
"created_at": "2026-04-17T08:30:44+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-002",
|
||||
"role": "user",
|
||||
"text": "а исторические остатки тоже можешь?",
|
||||
"created_at": "2026-04-17T08:30:44+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-003",
|
||||
"role": "user",
|
||||
"text": "по какой компании мы сейчас работаем?",
|
||||
"created_at": "2026-04-17T08:30:44+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-004",
|
||||
"role": "user",
|
||||
"text": "По выбранному объекту \"Столешница 600*3050*26 альмандин\": кто нам это поставил?",
|
||||
"created_at": "2026-04-17T08:30:44+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-005",
|
||||
"role": "user",
|
||||
"text": "что ты умеешь?",
|
||||
"created_at": "2026-04-17T08:30:44+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-006",
|
||||
"role": "user",
|
||||
"text": "а ты помнишь, что мы по этой позиции уже выяснили?",
|
||||
"created_at": "2026-04-17T08:30:44+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
}
|
||||
],
|
||||
"agent_run": true,
|
||||
"metadata": {
|
||||
"assistant_prompt_version": null,
|
||||
"decomposition_prompt_version": null,
|
||||
"prompt_fingerprint": null,
|
||||
"agent_focus": "meta and memory recap replay over interrupted address context",
|
||||
"architecture_phase": "turnaround_11_phase5",
|
||||
"source_spec_file": "X:\\1C\\NDC_1C\\docs\\orchestration\\address_truth_harness_phase5_meta_memory_mix.json"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,83 +0,0 @@
|
|||
{
|
||||
"saved_at": "2026-04-17T08:55:50+00:00",
|
||||
"generation_id": "gen-ag04170855-d13dd3",
|
||||
"mode": "saved_user_sessions",
|
||||
"title": "AGENT | Phase 6 provider/runtime replay across chat, meta, and address boundaries",
|
||||
"agent_run": true,
|
||||
"questions": [
|
||||
"привет, как дела?",
|
||||
"по какой компании мы сейчас работаем?",
|
||||
"что ты можешь по 1С?",
|
||||
"какие остатки на складе на март 2021",
|
||||
"а исторические остатки тоже можешь?"
|
||||
],
|
||||
"metadata": {
|
||||
"assistant_prompt_version": null,
|
||||
"decomposition_prompt_version": null,
|
||||
"prompt_fingerprint": null,
|
||||
"agent_focus": "provider runtime axis hardening across chat meta and address boundaries",
|
||||
"architecture_phase": "turnaround_11_phase6",
|
||||
"source_spec_file": "X:\\1C\\NDC_1C\\docs\\orchestration\\address_truth_harness_phase6_provider_axis_mix.json"
|
||||
},
|
||||
"source_session_id": null,
|
||||
"session": {
|
||||
"session_id": null,
|
||||
"mode": "agent_semantic_run",
|
||||
"items": [
|
||||
{
|
||||
"message_id": "agent-user-001",
|
||||
"role": "user",
|
||||
"text": "привет, как дела?",
|
||||
"created_at": "2026-04-17T08:55:50+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-002",
|
||||
"role": "user",
|
||||
"text": "по какой компании мы сейчас работаем?",
|
||||
"created_at": "2026-04-17T08:55:50+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-003",
|
||||
"role": "user",
|
||||
"text": "что ты можешь по 1С?",
|
||||
"created_at": "2026-04-17T08:55:50+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-004",
|
||||
"role": "user",
|
||||
"text": "какие остатки на складе на март 2021",
|
||||
"created_at": "2026-04-17T08:55:50+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-005",
|
||||
"role": "user",
|
||||
"text": "а исторические остатки тоже можешь?",
|
||||
"created_at": "2026-04-17T08:55:50+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
}
|
||||
],
|
||||
"agent_run": true,
|
||||
"metadata": {
|
||||
"assistant_prompt_version": null,
|
||||
"decomposition_prompt_version": null,
|
||||
"prompt_fingerprint": null,
|
||||
"agent_focus": "provider runtime axis hardening across chat meta and address boundaries",
|
||||
"architecture_phase": "turnaround_11_phase6",
|
||||
"source_spec_file": "X:\\1C\\NDC_1C\\docs\\orchestration\\address_truth_harness_phase6_provider_axis_mix.json"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,93 +0,0 @@
|
|||
{
|
||||
"saved_at": "2026-04-17T09:11:27+00:00",
|
||||
"generation_id": "gen-ag04170911-ff51e1",
|
||||
"mode": "saved_user_sessions",
|
||||
"title": "AGENT | Phase 7 acceptance replay for inventory root, selected-object continuity, and human meta boundaries",
|
||||
"agent_run": true,
|
||||
"questions": [
|
||||
"какие остатки на складе на март 2021",
|
||||
"По выбранному объекту \"Столешница 600*3050*26 альмандин\": кто нам это поставил?",
|
||||
"По выбранному объекту \"Столешница 600*3050*26 альмандин\": покажи документы по этой позиции",
|
||||
"покажи еще раз остатки на эту же дату",
|
||||
"по какой компании мы сейчас работаем?",
|
||||
"а исторические остатки тоже можешь?"
|
||||
],
|
||||
"metadata": {
|
||||
"assistant_prompt_version": null,
|
||||
"decomposition_prompt_version": null,
|
||||
"prompt_fingerprint": null,
|
||||
"agent_focus": "scenario acceptance gate over root selected-object restore and human meta",
|
||||
"architecture_phase": "turnaround_11_phase7",
|
||||
"source_spec_file": "X:\\1C\\NDC_1C\\docs\\orchestration\\address_truth_harness_phase7_acceptance_gate_mix.json"
|
||||
},
|
||||
"source_session_id": null,
|
||||
"session": {
|
||||
"session_id": null,
|
||||
"mode": "agent_semantic_run",
|
||||
"items": [
|
||||
{
|
||||
"message_id": "agent-user-001",
|
||||
"role": "user",
|
||||
"text": "какие остатки на складе на март 2021",
|
||||
"created_at": "2026-04-17T09:11:27+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-002",
|
||||
"role": "user",
|
||||
"text": "По выбранному объекту \"Столешница 600*3050*26 альмандин\": кто нам это поставил?",
|
||||
"created_at": "2026-04-17T09:11:27+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-003",
|
||||
"role": "user",
|
||||
"text": "По выбранному объекту \"Столешница 600*3050*26 альмандин\": покажи документы по этой позиции",
|
||||
"created_at": "2026-04-17T09:11:27+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-004",
|
||||
"role": "user",
|
||||
"text": "покажи еще раз остатки на эту же дату",
|
||||
"created_at": "2026-04-17T09:11:27+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-005",
|
||||
"role": "user",
|
||||
"text": "по какой компании мы сейчас работаем?",
|
||||
"created_at": "2026-04-17T09:11:27+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-006",
|
||||
"role": "user",
|
||||
"text": "а исторические остатки тоже можешь?",
|
||||
"created_at": "2026-04-17T09:11:27+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
}
|
||||
],
|
||||
"agent_run": true,
|
||||
"metadata": {
|
||||
"assistant_prompt_version": null,
|
||||
"decomposition_prompt_version": null,
|
||||
"prompt_fingerprint": null,
|
||||
"agent_focus": "scenario acceptance gate over root selected-object restore and human meta",
|
||||
"architecture_phase": "turnaround_11_phase7",
|
||||
"source_spec_file": "X:\\1C\\NDC_1C\\docs\\orchestration\\address_truth_harness_phase7_acceptance_gate_mix.json"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,173 +0,0 @@
|
|||
{
|
||||
"saved_at": "2026-04-17T09:31:44+00:00",
|
||||
"generation_id": "gen-ag04170931-6bb7e5",
|
||||
"mode": "saved_user_sessions",
|
||||
"title": "AGENT | Phase 7 mixed replay for documents, selected-object continuity, meta context, and cross-domain pivots",
|
||||
"agent_run": true,
|
||||
"questions": [
|
||||
"привет, как дела?",
|
||||
"по какой компании мы сейчас работаем?",
|
||||
"покажи все документы по чепурнову",
|
||||
"что нам отгружал чепурнов, какой товар или услугу?",
|
||||
"какие остатки на складе на март 2021",
|
||||
"По выбранному объекту \"Столешница 600*3050*26 альмандин\": кто нам это поставил?",
|
||||
"что ты умеешь?",
|
||||
"По выбранному объекту \"Столешница 600*3050*26 альмандин\": покажи документы по этой позиции",
|
||||
"а ты помнишь, что мы по этой позиции уже выяснили?",
|
||||
"покажи еще раз остатки на эту же дату",
|
||||
"кто нам должен на март 2020",
|
||||
"остатки по складу на эту же дату",
|
||||
"а исторические остатки тоже можешь?",
|
||||
"хвосты покажи по счету 60 на август 2022"
|
||||
],
|
||||
"metadata": {
|
||||
"assistant_prompt_version": null,
|
||||
"decomposition_prompt_version": null,
|
||||
"prompt_fingerprint": null,
|
||||
"agent_focus": "mixed documents meta and cross-domain replay for turnaround 11",
|
||||
"architecture_phase": "turnaround_11_phase7",
|
||||
"source_spec_file": "X:\\1C\\NDC_1C\\docs\\orchestration\\address_truth_harness_phase7_meta_domain_mix.json"
|
||||
},
|
||||
"source_session_id": null,
|
||||
"session": {
|
||||
"session_id": null,
|
||||
"mode": "agent_semantic_run",
|
||||
"items": [
|
||||
{
|
||||
"message_id": "agent-user-001",
|
||||
"role": "user",
|
||||
"text": "привет, как дела?",
|
||||
"created_at": "2026-04-17T09:31:44+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-002",
|
||||
"role": "user",
|
||||
"text": "по какой компании мы сейчас работаем?",
|
||||
"created_at": "2026-04-17T09:31:44+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-003",
|
||||
"role": "user",
|
||||
"text": "покажи все документы по чепурнову",
|
||||
"created_at": "2026-04-17T09:31:44+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-004",
|
||||
"role": "user",
|
||||
"text": "что нам отгружал чепурнов, какой товар или услугу?",
|
||||
"created_at": "2026-04-17T09:31:44+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-005",
|
||||
"role": "user",
|
||||
"text": "какие остатки на складе на март 2021",
|
||||
"created_at": "2026-04-17T09:31:44+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-006",
|
||||
"role": "user",
|
||||
"text": "По выбранному объекту \"Столешница 600*3050*26 альмандин\": кто нам это поставил?",
|
||||
"created_at": "2026-04-17T09:31:44+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-007",
|
||||
"role": "user",
|
||||
"text": "что ты умеешь?",
|
||||
"created_at": "2026-04-17T09:31:44+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-008",
|
||||
"role": "user",
|
||||
"text": "По выбранному объекту \"Столешница 600*3050*26 альмандин\": покажи документы по этой позиции",
|
||||
"created_at": "2026-04-17T09:31:44+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-009",
|
||||
"role": "user",
|
||||
"text": "а ты помнишь, что мы по этой позиции уже выяснили?",
|
||||
"created_at": "2026-04-17T09:31:44+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-010",
|
||||
"role": "user",
|
||||
"text": "покажи еще раз остатки на эту же дату",
|
||||
"created_at": "2026-04-17T09:31:44+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-011",
|
||||
"role": "user",
|
||||
"text": "кто нам должен на март 2020",
|
||||
"created_at": "2026-04-17T09:31:44+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-012",
|
||||
"role": "user",
|
||||
"text": "остатки по складу на эту же дату",
|
||||
"created_at": "2026-04-17T09:31:44+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-013",
|
||||
"role": "user",
|
||||
"text": "а исторические остатки тоже можешь?",
|
||||
"created_at": "2026-04-17T09:31:44+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-014",
|
||||
"role": "user",
|
||||
"text": "хвосты покажи по счету 60 на август 2022",
|
||||
"created_at": "2026-04-17T09:31:44+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
}
|
||||
],
|
||||
"agent_run": true,
|
||||
"metadata": {
|
||||
"assistant_prompt_version": null,
|
||||
"decomposition_prompt_version": null,
|
||||
"prompt_fingerprint": null,
|
||||
"agent_focus": "mixed documents meta and cross-domain replay for turnaround 11",
|
||||
"architecture_phase": "turnaround_11_phase7",
|
||||
"source_spec_file": "X:\\1C\\NDC_1C\\docs\\orchestration\\address_truth_harness_phase7_meta_domain_mix.json"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,173 +0,0 @@
|
|||
{
|
||||
"saved_at": "2026-04-17T09:41:32+00:00",
|
||||
"generation_id": "gen-ag04170941-87680e",
|
||||
"mode": "saved_user_sessions",
|
||||
"title": "AGENT | Phase 7 mixed replay for documents, selected-object continuity, meta context, and cross-domain pivots",
|
||||
"agent_run": true,
|
||||
"questions": [
|
||||
"привет, как дела?",
|
||||
"по какой компании мы сейчас работаем?",
|
||||
"покажи все документы по чепурнову",
|
||||
"что нам отгружал чепурнов, какой товар или услугу?",
|
||||
"какие остатки на складе на март 2021",
|
||||
"По выбранному объекту \"Столешница 600*3050*26 альмандин\": кто нам это поставил?",
|
||||
"что ты умеешь?",
|
||||
"По выбранному объекту \"Столешница 600*3050*26 альмандин\": покажи документы по этой позиции",
|
||||
"а ты помнишь, что мы по этой позиции уже выяснили?",
|
||||
"покажи еще раз остатки на эту же дату",
|
||||
"кто нам должен на март 2020",
|
||||
"остатки по складу на эту же дату",
|
||||
"а исторические остатки тоже можешь?",
|
||||
"хвосты покажи по счету 60 на август 2022"
|
||||
],
|
||||
"metadata": {
|
||||
"assistant_prompt_version": null,
|
||||
"decomposition_prompt_version": null,
|
||||
"prompt_fingerprint": null,
|
||||
"agent_focus": "mixed documents meta and cross-domain replay for turnaround 11",
|
||||
"architecture_phase": "turnaround_11_phase7",
|
||||
"source_spec_file": "X:\\1C\\NDC_1C\\docs\\orchestration\\address_truth_harness_phase7_meta_domain_mix.json"
|
||||
},
|
||||
"source_session_id": null,
|
||||
"session": {
|
||||
"session_id": null,
|
||||
"mode": "agent_semantic_run",
|
||||
"items": [
|
||||
{
|
||||
"message_id": "agent-user-001",
|
||||
"role": "user",
|
||||
"text": "привет, как дела?",
|
||||
"created_at": "2026-04-17T09:41:32+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-002",
|
||||
"role": "user",
|
||||
"text": "по какой компании мы сейчас работаем?",
|
||||
"created_at": "2026-04-17T09:41:32+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-003",
|
||||
"role": "user",
|
||||
"text": "покажи все документы по чепурнову",
|
||||
"created_at": "2026-04-17T09:41:32+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-004",
|
||||
"role": "user",
|
||||
"text": "что нам отгружал чепурнов, какой товар или услугу?",
|
||||
"created_at": "2026-04-17T09:41:32+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-005",
|
||||
"role": "user",
|
||||
"text": "какие остатки на складе на март 2021",
|
||||
"created_at": "2026-04-17T09:41:32+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-006",
|
||||
"role": "user",
|
||||
"text": "По выбранному объекту \"Столешница 600*3050*26 альмандин\": кто нам это поставил?",
|
||||
"created_at": "2026-04-17T09:41:32+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-007",
|
||||
"role": "user",
|
||||
"text": "что ты умеешь?",
|
||||
"created_at": "2026-04-17T09:41:32+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-008",
|
||||
"role": "user",
|
||||
"text": "По выбранному объекту \"Столешница 600*3050*26 альмандин\": покажи документы по этой позиции",
|
||||
"created_at": "2026-04-17T09:41:32+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-009",
|
||||
"role": "user",
|
||||
"text": "а ты помнишь, что мы по этой позиции уже выяснили?",
|
||||
"created_at": "2026-04-17T09:41:32+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-010",
|
||||
"role": "user",
|
||||
"text": "покажи еще раз остатки на эту же дату",
|
||||
"created_at": "2026-04-17T09:41:32+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-011",
|
||||
"role": "user",
|
||||
"text": "кто нам должен на март 2020",
|
||||
"created_at": "2026-04-17T09:41:32+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-012",
|
||||
"role": "user",
|
||||
"text": "остатки по складу на эту же дату",
|
||||
"created_at": "2026-04-17T09:41:32+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-013",
|
||||
"role": "user",
|
||||
"text": "а исторические остатки тоже можешь?",
|
||||
"created_at": "2026-04-17T09:41:32+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-014",
|
||||
"role": "user",
|
||||
"text": "хвосты покажи по счету 60 на август 2022",
|
||||
"created_at": "2026-04-17T09:41:32+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
}
|
||||
],
|
||||
"agent_run": true,
|
||||
"metadata": {
|
||||
"assistant_prompt_version": null,
|
||||
"decomposition_prompt_version": null,
|
||||
"prompt_fingerprint": null,
|
||||
"agent_focus": "mixed documents meta and cross-domain replay for turnaround 11",
|
||||
"architecture_phase": "turnaround_11_phase7",
|
||||
"source_spec_file": "X:\\1C\\NDC_1C\\docs\\orchestration\\address_truth_harness_phase7_meta_domain_mix.json"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,199 +0,0 @@
|
|||
{
|
||||
"saved_at": "2026-04-17T13:26:00+00:00",
|
||||
"generation_id": "gen-ag04171326-15a132",
|
||||
"mode": "saved_user_sessions",
|
||||
"title": "AGENT replay for company selection continuity and organization activity age",
|
||||
"agent_run": true,
|
||||
"questions": [
|
||||
"привет, как дела?",
|
||||
"по какой компании мы сейчас работаем?",
|
||||
"какие остатки на складе на март 2021",
|
||||
"давай по Альтернативе Плюс",
|
||||
"тогда покажи остатки на март 2021",
|
||||
"По выбранному объекту \"Столешница 600*3050*26 альмандин\": кто нам это поставил?",
|
||||
"По выбранному объекту \"Столешница 600*3050*26 альмандин\": покажи документы по этой позиции",
|
||||
"покажи еще раз остатки на эту же дату",
|
||||
"а по Альтернативе Плюс сколько лет активности в базе 1С?",
|
||||
"что ты умеешь?",
|
||||
"а ты помнишь, что мы по этой позиции уже выяснили?",
|
||||
"кто нам должен на март 2020",
|
||||
"остатки по складу на эту же дату"
|
||||
],
|
||||
"metadata": {
|
||||
"assistant_prompt_version": null,
|
||||
"decomposition_prompt_version": null,
|
||||
"prompt_fingerprint": null,
|
||||
"agent_focus": "Targeted AGENT replay for the multi-company clarification flow: select the company in-session, continue the same business path, verify selected-object continuity, then probe whether organization age/activity can be answered from reachable 1C evidence without leaking technical garbage.",
|
||||
"architecture_phase": "turnaround_11",
|
||||
"source_spec_file": "X:\\1C\\NDC_1C\\docs\\orchestration\\address_truth_harness_phase5_company_selection_and_activity_age.json",
|
||||
"scenario_id": "address_truth_harness_phase5_company_selection_and_activity_age",
|
||||
"semantic_tags": [
|
||||
"company_clarification",
|
||||
"company_selected",
|
||||
"company_selection",
|
||||
"inventory_root",
|
||||
"meta_capability",
|
||||
"meta_memory",
|
||||
"meta_scope",
|
||||
"meta_smalltalk",
|
||||
"organization_activity_age",
|
||||
"same_date_pivot",
|
||||
"same_date_restore",
|
||||
"selected_object",
|
||||
"selected_object_documents",
|
||||
"selected_object_supplier",
|
||||
"settlements_receivables"
|
||||
]
|
||||
},
|
||||
"source_session_id": null,
|
||||
"session": {
|
||||
"session_id": null,
|
||||
"mode": "agent_semantic_run",
|
||||
"items": [
|
||||
{
|
||||
"message_id": "agent-user-001",
|
||||
"role": "user",
|
||||
"text": "привет, как дела?",
|
||||
"created_at": "2026-04-17T13:26:00+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-002",
|
||||
"role": "user",
|
||||
"text": "по какой компании мы сейчас работаем?",
|
||||
"created_at": "2026-04-17T13:26:00+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-003",
|
||||
"role": "user",
|
||||
"text": "какие остатки на складе на март 2021",
|
||||
"created_at": "2026-04-17T13:26:00+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-004",
|
||||
"role": "user",
|
||||
"text": "давай по Альтернативе Плюс",
|
||||
"created_at": "2026-04-17T13:26:00+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-005",
|
||||
"role": "user",
|
||||
"text": "тогда покажи остатки на март 2021",
|
||||
"created_at": "2026-04-17T13:26:00+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-006",
|
||||
"role": "user",
|
||||
"text": "По выбранному объекту \"Столешница 600*3050*26 альмандин\": кто нам это поставил?",
|
||||
"created_at": "2026-04-17T13:26:00+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-007",
|
||||
"role": "user",
|
||||
"text": "По выбранному объекту \"Столешница 600*3050*26 альмандин\": покажи документы по этой позиции",
|
||||
"created_at": "2026-04-17T13:26:00+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-008",
|
||||
"role": "user",
|
||||
"text": "покажи еще раз остатки на эту же дату",
|
||||
"created_at": "2026-04-17T13:26:00+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-009",
|
||||
"role": "user",
|
||||
"text": "а по Альтернативе Плюс сколько лет активности в базе 1С?",
|
||||
"created_at": "2026-04-17T13:26:00+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-010",
|
||||
"role": "user",
|
||||
"text": "что ты умеешь?",
|
||||
"created_at": "2026-04-17T13:26:00+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-011",
|
||||
"role": "user",
|
||||
"text": "а ты помнишь, что мы по этой позиции уже выяснили?",
|
||||
"created_at": "2026-04-17T13:26:00+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-012",
|
||||
"role": "user",
|
||||
"text": "кто нам должен на март 2020",
|
||||
"created_at": "2026-04-17T13:26:00+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-013",
|
||||
"role": "user",
|
||||
"text": "остатки по складу на эту же дату",
|
||||
"created_at": "2026-04-17T13:26:00+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
}
|
||||
],
|
||||
"agent_run": true,
|
||||
"metadata": {
|
||||
"assistant_prompt_version": null,
|
||||
"decomposition_prompt_version": null,
|
||||
"prompt_fingerprint": null,
|
||||
"agent_focus": "Targeted AGENT replay for the multi-company clarification flow: select the company in-session, continue the same business path, verify selected-object continuity, then probe whether organization age/activity can be answered from reachable 1C evidence without leaking technical garbage.",
|
||||
"architecture_phase": "turnaround_11",
|
||||
"source_spec_file": "X:\\1C\\NDC_1C\\docs\\orchestration\\address_truth_harness_phase5_company_selection_and_activity_age.json",
|
||||
"scenario_id": "address_truth_harness_phase5_company_selection_and_activity_age",
|
||||
"semantic_tags": [
|
||||
"company_clarification",
|
||||
"company_selected",
|
||||
"company_selection",
|
||||
"inventory_root",
|
||||
"meta_capability",
|
||||
"meta_memory",
|
||||
"meta_scope",
|
||||
"meta_smalltalk",
|
||||
"organization_activity_age",
|
||||
"same_date_pivot",
|
||||
"same_date_restore",
|
||||
"selected_object",
|
||||
"selected_object_documents",
|
||||
"selected_object_supplier",
|
||||
"settlements_receivables"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,129 +0,0 @@
|
|||
{
|
||||
"saved_at": "2026-04-17T15:08:06+00:00",
|
||||
"generation_id": "gen-ag04171508-760111",
|
||||
"mode": "saved_user_sessions",
|
||||
"title": "AGENT replay for inventory clarification continuity and answer-shape cleanliness",
|
||||
"agent_run": true,
|
||||
"questions": [
|
||||
"какие остатки на складе на март 2021",
|
||||
"давай по Альтернативе Плюс",
|
||||
"тогда покажи остатки на март 2021",
|
||||
"По выбранному объекту \"Столешница 600*3050*26 альмандин\": кто нам это поставил?",
|
||||
"а по этой позиции когда была закупка?",
|
||||
"покажи документы по этой позиции",
|
||||
"покажи еще раз остатки на эту же дату",
|
||||
"а что мы уже выяснили по этой позиции?"
|
||||
],
|
||||
"metadata": {
|
||||
"assistant_prompt_version": null,
|
||||
"decomposition_prompt_version": null,
|
||||
"prompt_fingerprint": null,
|
||||
"agent_focus": "Targeted AGENT replay for the recent compose/inventory fixes: company clarification, inventory root restore, selected-object provenance, purchase date/documents follow-ups, and protection against technical garbage in user-facing replies.",
|
||||
"architecture_phase": "turnaround_11",
|
||||
"source_spec_file": "X:\\1C\\NDC_1C\\docs\\orchestration\\address_truth_harness_phase4_inventory_answer_shape_continuity.json",
|
||||
"scenario_id": "address_truth_harness_phase4_inventory_answer_shape_continuity",
|
||||
"semantic_tags": [
|
||||
"company_clarification",
|
||||
"company_selection",
|
||||
"inventory_root",
|
||||
"meta_memory",
|
||||
"meta_scope"
|
||||
]
|
||||
},
|
||||
"source_session_id": null,
|
||||
"session": {
|
||||
"session_id": null,
|
||||
"mode": "agent_semantic_run",
|
||||
"items": [
|
||||
{
|
||||
"message_id": "agent-user-001",
|
||||
"role": "user",
|
||||
"text": "какие остатки на складе на март 2021",
|
||||
"created_at": "2026-04-17T15:08:06+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-002",
|
||||
"role": "user",
|
||||
"text": "давай по Альтернативе Плюс",
|
||||
"created_at": "2026-04-17T15:08:06+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-003",
|
||||
"role": "user",
|
||||
"text": "тогда покажи остатки на март 2021",
|
||||
"created_at": "2026-04-17T15:08:06+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-004",
|
||||
"role": "user",
|
||||
"text": "По выбранному объекту \"Столешница 600*3050*26 альмандин\": кто нам это поставил?",
|
||||
"created_at": "2026-04-17T15:08:06+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-005",
|
||||
"role": "user",
|
||||
"text": "а по этой позиции когда была закупка?",
|
||||
"created_at": "2026-04-17T15:08:06+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-006",
|
||||
"role": "user",
|
||||
"text": "покажи документы по этой позиции",
|
||||
"created_at": "2026-04-17T15:08:06+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-007",
|
||||
"role": "user",
|
||||
"text": "покажи еще раз остатки на эту же дату",
|
||||
"created_at": "2026-04-17T15:08:06+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-008",
|
||||
"role": "user",
|
||||
"text": "а что мы уже выяснили по этой позиции?",
|
||||
"created_at": "2026-04-17T15:08:06+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
}
|
||||
],
|
||||
"agent_run": true,
|
||||
"metadata": {
|
||||
"assistant_prompt_version": null,
|
||||
"decomposition_prompt_version": null,
|
||||
"prompt_fingerprint": null,
|
||||
"agent_focus": "Targeted AGENT replay for the recent compose/inventory fixes: company clarification, inventory root restore, selected-object provenance, purchase date/documents follow-ups, and protection against technical garbage in user-facing replies.",
|
||||
"architecture_phase": "turnaround_11",
|
||||
"source_spec_file": "X:\\1C\\NDC_1C\\docs\\orchestration\\address_truth_harness_phase4_inventory_answer_shape_continuity.json",
|
||||
"scenario_id": "address_truth_harness_phase4_inventory_answer_shape_continuity",
|
||||
"semantic_tags": [
|
||||
"company_clarification",
|
||||
"company_selection",
|
||||
"inventory_root",
|
||||
"meta_memory",
|
||||
"meta_scope"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
File diff suppressed because one or more lines are too long
File diff suppressed because it is too large
Load Diff
|
|
@ -1,125 +0,0 @@
|
|||
{
|
||||
"saved_at": "2026-04-23T13:36:22+00:00",
|
||||
"generation_id": "gen-ag04231336-3d4cc9",
|
||||
"mode": "saved_user_sessions",
|
||||
"title": "AGENT | Живой диалог по организации: денежный срез, сравнение и рейтинг",
|
||||
"agent_run": true,
|
||||
"questions": [
|
||||
"Хочу быстрый денежный срез по одной организации без привязки к контрагенту. Сколько вообще входящих денег было за 2020 год?",
|
||||
"По ООО Альтернатива Плюс.",
|
||||
"Понял, тогда за все время.",
|
||||
"Хорошо. А что по ООО Альтернатива Плюс больше в 2020 году: входящие или исходящие деньги?",
|
||||
"А что по ООО Альтернатива Плюс больше уже за 2021 год: входящие или исходящие деньги?",
|
||||
"И кто больше всего принес денег этой организации в 2020 году?",
|
||||
"А в 2021 году?"
|
||||
],
|
||||
"metadata": {
|
||||
"assistant_prompt_version": null,
|
||||
"decomposition_prompt_version": null,
|
||||
"prompt_fingerprint": null,
|
||||
"agent_focus": "Human organization open-scope dialog: org clarification, all-time incoming total, comparison, ranking",
|
||||
"architecture_phase": "turnaround_11",
|
||||
"source_spec_file": "X:\\1C\\NDC_1C\\docs\\orchestration\\address_truth_harness_phase66_human_org_open_scope_dialog.json",
|
||||
"scenario_id": "address_truth_harness_phase66_human_org_open_scope_dialog",
|
||||
"semantic_tags": [
|
||||
"all_time_followup",
|
||||
"human_dialog",
|
||||
"open_scope_total",
|
||||
"organization_clarification",
|
||||
"organization_scope",
|
||||
"value_flow_comparison",
|
||||
"value_flow_ranking",
|
||||
"year_switch"
|
||||
]
|
||||
},
|
||||
"source_session_id": null,
|
||||
"session": {
|
||||
"session_id": null,
|
||||
"mode": "agent_semantic_run",
|
||||
"items": [
|
||||
{
|
||||
"message_id": "agent-user-001",
|
||||
"role": "user",
|
||||
"text": "Хочу быстрый денежный срез по одной организации без привязки к контрагенту. Сколько вообще входящих денег было за 2020 год?",
|
||||
"created_at": "2026-04-23T13:36:22+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-002",
|
||||
"role": "user",
|
||||
"text": "По ООО Альтернатива Плюс.",
|
||||
"created_at": "2026-04-23T13:36:22+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-003",
|
||||
"role": "user",
|
||||
"text": "Понял, тогда за все время.",
|
||||
"created_at": "2026-04-23T13:36:22+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-004",
|
||||
"role": "user",
|
||||
"text": "Хорошо. А что по ООО Альтернатива Плюс больше в 2020 году: входящие или исходящие деньги?",
|
||||
"created_at": "2026-04-23T13:36:22+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-005",
|
||||
"role": "user",
|
||||
"text": "А что по ООО Альтернатива Плюс больше уже за 2021 год: входящие или исходящие деньги?",
|
||||
"created_at": "2026-04-23T13:36:22+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-006",
|
||||
"role": "user",
|
||||
"text": "И кто больше всего принес денег этой организации в 2020 году?",
|
||||
"created_at": "2026-04-23T13:36:22+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-007",
|
||||
"role": "user",
|
||||
"text": "А в 2021 году?",
|
||||
"created_at": "2026-04-23T13:36:22+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
}
|
||||
],
|
||||
"agent_run": true,
|
||||
"metadata": {
|
||||
"assistant_prompt_version": null,
|
||||
"decomposition_prompt_version": null,
|
||||
"prompt_fingerprint": null,
|
||||
"agent_focus": "Human organization open-scope dialog: org clarification, all-time incoming total, comparison, ranking",
|
||||
"architecture_phase": "turnaround_11",
|
||||
"source_spec_file": "X:\\1C\\NDC_1C\\docs\\orchestration\\address_truth_harness_phase66_human_org_open_scope_dialog.json",
|
||||
"scenario_id": "address_truth_harness_phase66_human_org_open_scope_dialog",
|
||||
"semantic_tags": [
|
||||
"all_time_followup",
|
||||
"human_dialog",
|
||||
"open_scope_total",
|
||||
"organization_clarification",
|
||||
"organization_scope",
|
||||
"value_flow_comparison",
|
||||
"value_flow_ranking",
|
||||
"year_switch"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,123 +0,0 @@
|
|||
{
|
||||
"saved_at": "2026-04-23T13:36:22+00:00",
|
||||
"generation_id": "gen-ag04231336-4fa660",
|
||||
"mode": "saved_user_sessions",
|
||||
"title": "AGENT | Живой диалог по НДС: от ориентации до документов",
|
||||
"agent_run": true,
|
||||
"questions": [
|
||||
"Мне нужно понять, где в 1С по НДС вообще лежат данные. Какие объекты стоит смотреть по НДС?",
|
||||
"Хорошо, тогда покажи движения по ООО Альтернатива Плюс.",
|
||||
"За 2020 год.",
|
||||
"А теперь по документам?",
|
||||
"А теперь за 2021 год?",
|
||||
"А теперь за все время?"
|
||||
],
|
||||
"metadata": {
|
||||
"assistant_prompt_version": null,
|
||||
"decomposition_prompt_version": null,
|
||||
"prompt_fingerprint": null,
|
||||
"agent_focus": "Human VAT dialog: metadata orientation, movement lane, document pivot, year switch, all-time continuity",
|
||||
"architecture_phase": "turnaround_11",
|
||||
"source_spec_file": "X:\\1C\\NDC_1C\\docs\\orchestration\\address_truth_harness_phase64_human_vat_investigation_dialog.json",
|
||||
"scenario_id": "address_truth_harness_phase64_human_vat_investigation_dialog",
|
||||
"semantic_tags": [
|
||||
"all_time_followup",
|
||||
"bounded_retrieval",
|
||||
"document_lane_continuity",
|
||||
"document_pivot_after_movement_retrieval",
|
||||
"human_dialog",
|
||||
"inline_organization_clarification",
|
||||
"metadata_surface",
|
||||
"movement_lane_after_metadata",
|
||||
"movement_lane_execution",
|
||||
"scope_reuse",
|
||||
"vat_orientation",
|
||||
"year_switch_after_pivot"
|
||||
]
|
||||
},
|
||||
"source_session_id": null,
|
||||
"session": {
|
||||
"session_id": null,
|
||||
"mode": "agent_semantic_run",
|
||||
"items": [
|
||||
{
|
||||
"message_id": "agent-user-001",
|
||||
"role": "user",
|
||||
"text": "Мне нужно понять, где в 1С по НДС вообще лежат данные. Какие объекты стоит смотреть по НДС?",
|
||||
"created_at": "2026-04-23T13:36:22+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-002",
|
||||
"role": "user",
|
||||
"text": "Хорошо, тогда покажи движения по ООО Альтернатива Плюс.",
|
||||
"created_at": "2026-04-23T13:36:22+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-003",
|
||||
"role": "user",
|
||||
"text": "За 2020 год.",
|
||||
"created_at": "2026-04-23T13:36:22+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-004",
|
||||
"role": "user",
|
||||
"text": "А теперь по документам?",
|
||||
"created_at": "2026-04-23T13:36:22+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-005",
|
||||
"role": "user",
|
||||
"text": "А теперь за 2021 год?",
|
||||
"created_at": "2026-04-23T13:36:22+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-006",
|
||||
"role": "user",
|
||||
"text": "А теперь за все время?",
|
||||
"created_at": "2026-04-23T13:36:22+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
}
|
||||
],
|
||||
"agent_run": true,
|
||||
"metadata": {
|
||||
"assistant_prompt_version": null,
|
||||
"decomposition_prompt_version": null,
|
||||
"prompt_fingerprint": null,
|
||||
"agent_focus": "Human VAT dialog: metadata orientation, movement lane, document pivot, year switch, all-time continuity",
|
||||
"architecture_phase": "turnaround_11",
|
||||
"source_spec_file": "X:\\1C\\NDC_1C\\docs\\orchestration\\address_truth_harness_phase64_human_vat_investigation_dialog.json",
|
||||
"scenario_id": "address_truth_harness_phase64_human_vat_investigation_dialog",
|
||||
"semantic_tags": [
|
||||
"all_time_followup",
|
||||
"bounded_retrieval",
|
||||
"document_lane_continuity",
|
||||
"document_pivot_after_movement_retrieval",
|
||||
"human_dialog",
|
||||
"inline_organization_clarification",
|
||||
"metadata_surface",
|
||||
"movement_lane_after_metadata",
|
||||
"movement_lane_execution",
|
||||
"scope_reuse",
|
||||
"vat_orientation",
|
||||
"year_switch_after_pivot"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,129 +0,0 @@
|
|||
{
|
||||
"saved_at": "2026-04-23T13:36:22+00:00",
|
||||
"generation_id": "gen-ag04231336-db78b3",
|
||||
"mode": "saved_user_sessions",
|
||||
"title": "AGENT | Живой диалог по СВК: деньги, нетто, документы и движения",
|
||||
"agent_run": true,
|
||||
"questions": [
|
||||
"Хочу проверить одного контрагента. Найди в 1С Группу СВК.",
|
||||
"Посмотри, сколько денег мы получили от него за 2020 год.",
|
||||
"А теперь сколько мы ему заплатили?",
|
||||
"А какое получилось нетто?",
|
||||
"А по документам?",
|
||||
"А по движениям?",
|
||||
"А теперь за 2021 год?"
|
||||
],
|
||||
"metadata": {
|
||||
"assistant_prompt_version": null,
|
||||
"decomposition_prompt_version": null,
|
||||
"prompt_fingerprint": null,
|
||||
"agent_focus": "Human SVK counterparty dialog: grounding, incoming, outgoing, net, documents, movements",
|
||||
"architecture_phase": "turnaround_11",
|
||||
"source_spec_file": "X:\\1C\\NDC_1C\\docs\\orchestration\\address_truth_harness_phase65_human_svk_money_dialog.json",
|
||||
"scenario_id": "address_truth_harness_phase65_human_svk_money_dialog",
|
||||
"semantic_tags": [
|
||||
"counterparty_resolution",
|
||||
"document_pivot_after_value_flow",
|
||||
"entity_grounding",
|
||||
"grounded_counterparty_followup",
|
||||
"human_dialog",
|
||||
"incoming_value_flow",
|
||||
"movement_pivot_after_value_flow",
|
||||
"net_value_flow",
|
||||
"outgoing_value_flow",
|
||||
"year_switch"
|
||||
]
|
||||
},
|
||||
"source_session_id": null,
|
||||
"session": {
|
||||
"session_id": null,
|
||||
"mode": "agent_semantic_run",
|
||||
"items": [
|
||||
{
|
||||
"message_id": "agent-user-001",
|
||||
"role": "user",
|
||||
"text": "Хочу проверить одного контрагента. Найди в 1С Группу СВК.",
|
||||
"created_at": "2026-04-23T13:36:22+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-002",
|
||||
"role": "user",
|
||||
"text": "Посмотри, сколько денег мы получили от него за 2020 год.",
|
||||
"created_at": "2026-04-23T13:36:22+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-003",
|
||||
"role": "user",
|
||||
"text": "А теперь сколько мы ему заплатили?",
|
||||
"created_at": "2026-04-23T13:36:22+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-004",
|
||||
"role": "user",
|
||||
"text": "А какое получилось нетто?",
|
||||
"created_at": "2026-04-23T13:36:22+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-005",
|
||||
"role": "user",
|
||||
"text": "А по документам?",
|
||||
"created_at": "2026-04-23T13:36:22+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-006",
|
||||
"role": "user",
|
||||
"text": "А по движениям?",
|
||||
"created_at": "2026-04-23T13:36:22+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-007",
|
||||
"role": "user",
|
||||
"text": "А теперь за 2021 год?",
|
||||
"created_at": "2026-04-23T13:36:22+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
}
|
||||
],
|
||||
"agent_run": true,
|
||||
"metadata": {
|
||||
"assistant_prompt_version": null,
|
||||
"decomposition_prompt_version": null,
|
||||
"prompt_fingerprint": null,
|
||||
"agent_focus": "Human SVK counterparty dialog: grounding, incoming, outgoing, net, documents, movements",
|
||||
"architecture_phase": "turnaround_11",
|
||||
"source_spec_file": "X:\\1C\\NDC_1C\\docs\\orchestration\\address_truth_harness_phase65_human_svk_money_dialog.json",
|
||||
"scenario_id": "address_truth_harness_phase65_human_svk_money_dialog",
|
||||
"semantic_tags": [
|
||||
"counterparty_resolution",
|
||||
"document_pivot_after_value_flow",
|
||||
"entity_grounding",
|
||||
"grounded_counterparty_followup",
|
||||
"human_dialog",
|
||||
"incoming_value_flow",
|
||||
"movement_pivot_after_value_flow",
|
||||
"net_value_flow",
|
||||
"outgoing_value_flow",
|
||||
"year_switch"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,281 +0,0 @@
|
|||
{
|
||||
"saved_at": "2026-04-23T18:44:25+00:00",
|
||||
"generation_id": "gen-ag04231844-8e552a",
|
||||
"mode": "saved_user_sessions",
|
||||
"title": "AGENT | ARCH: Post-F Semantic Integrity Hardening | Смешанный живой диалог: repeated pivots, орг-срез и СВК",
|
||||
"agent_run": true,
|
||||
"questions": [
|
||||
"Покажи документы по Жуковке 51.",
|
||||
"Хорошо, а теперь платежи по нему тоже покажи.",
|
||||
"А по нему договоры?",
|
||||
"А по нему документы?",
|
||||
"А по нему платежи?",
|
||||
"А за 2021?",
|
||||
"С Жуковкой закончили. Теперь нужна другая задача: быстрый денежный срез по одной организации. Если для ответа нужна организация, просто уточни ее. Сколько вообще входящих денег было за 2020 год?",
|
||||
"По ООО Альтернатива Плюс.",
|
||||
"Понял, тогда за все время.",
|
||||
"Хорошо. А что по ООО Альтернатива Плюс больше в 2020 году: входящие или исходящие деньги?",
|
||||
"А что по ООО Альтернатива Плюс больше уже за 2021 год: входящие или исходящие деньги?",
|
||||
"И кто больше всего принес денег этой организации в 2020 году?",
|
||||
"А в 2021 году?",
|
||||
"Теперь отдельная тема по конкретному контрагенту. Найди в 1С Группу СВК.",
|
||||
"Сколько получили по нему за 2020 год?",
|
||||
"А теперь сколько заплатили?",
|
||||
"А какое нетто?",
|
||||
"А по документам?",
|
||||
"А по движениям?"
|
||||
],
|
||||
"metadata": {
|
||||
"assistant_prompt_version": null,
|
||||
"decomposition_prompt_version": null,
|
||||
"prompt_fingerprint": null,
|
||||
"agent_focus": "Post-F repeated pivots + open-scope organization money + grounded SVK counterparty chain",
|
||||
"architecture_phase": "turnaround_11",
|
||||
"source_spec_file": "X:\\1C\\NDC_1C\\docs\\orchestration\\address_truth_harness_phase82_human_mixed_integrity_status_dialog.json",
|
||||
"scenario_id": "address_truth_harness_phase82_human_mixed_integrity_status_dialog",
|
||||
"semantic_tags": [
|
||||
"all_time_followup",
|
||||
"contracts_followup",
|
||||
"counterparty_pronoun_resolution",
|
||||
"documents_by_counterparty",
|
||||
"documents_followup",
|
||||
"documents_pivot",
|
||||
"fourth_pivot",
|
||||
"grounded_counterparty",
|
||||
"human_dialog",
|
||||
"incoming_value_flow",
|
||||
"movements_pivot",
|
||||
"net_value_flow",
|
||||
"open_scope_total",
|
||||
"organization_clarification",
|
||||
"organization_scope",
|
||||
"payments_followup",
|
||||
"payout_value_flow",
|
||||
"pivot_seed",
|
||||
"post_f_integrity_hardening",
|
||||
"second_pivot",
|
||||
"third_pivot",
|
||||
"topic_reset",
|
||||
"value_flow_comparison",
|
||||
"value_flow_ranking",
|
||||
"year_switch",
|
||||
"year_switch_after_fourth_pivot"
|
||||
]
|
||||
},
|
||||
"source_session_id": null,
|
||||
"session": {
|
||||
"session_id": null,
|
||||
"mode": "agent_semantic_run",
|
||||
"items": [
|
||||
{
|
||||
"message_id": "agent-user-001",
|
||||
"role": "user",
|
||||
"text": "Покажи документы по Жуковке 51.",
|
||||
"created_at": "2026-04-23T18:44:25+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-002",
|
||||
"role": "user",
|
||||
"text": "Хорошо, а теперь платежи по нему тоже покажи.",
|
||||
"created_at": "2026-04-23T18:44:25+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-003",
|
||||
"role": "user",
|
||||
"text": "А по нему договоры?",
|
||||
"created_at": "2026-04-23T18:44:25+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-004",
|
||||
"role": "user",
|
||||
"text": "А по нему документы?",
|
||||
"created_at": "2026-04-23T18:44:25+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-005",
|
||||
"role": "user",
|
||||
"text": "А по нему платежи?",
|
||||
"created_at": "2026-04-23T18:44:25+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-006",
|
||||
"role": "user",
|
||||
"text": "А за 2021?",
|
||||
"created_at": "2026-04-23T18:44:25+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-007",
|
||||
"role": "user",
|
||||
"text": "С Жуковкой закончили. Теперь нужна другая задача: быстрый денежный срез по одной организации. Если для ответа нужна организация, просто уточни ее. Сколько вообще входящих денег было за 2020 год?",
|
||||
"created_at": "2026-04-23T18:44:25+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-008",
|
||||
"role": "user",
|
||||
"text": "По ООО Альтернатива Плюс.",
|
||||
"created_at": "2026-04-23T18:44:25+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-009",
|
||||
"role": "user",
|
||||
"text": "Понял, тогда за все время.",
|
||||
"created_at": "2026-04-23T18:44:25+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-010",
|
||||
"role": "user",
|
||||
"text": "Хорошо. А что по ООО Альтернатива Плюс больше в 2020 году: входящие или исходящие деньги?",
|
||||
"created_at": "2026-04-23T18:44:25+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-011",
|
||||
"role": "user",
|
||||
"text": "А что по ООО Альтернатива Плюс больше уже за 2021 год: входящие или исходящие деньги?",
|
||||
"created_at": "2026-04-23T18:44:25+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-012",
|
||||
"role": "user",
|
||||
"text": "И кто больше всего принес денег этой организации в 2020 году?",
|
||||
"created_at": "2026-04-23T18:44:25+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-013",
|
||||
"role": "user",
|
||||
"text": "А в 2021 году?",
|
||||
"created_at": "2026-04-23T18:44:25+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-014",
|
||||
"role": "user",
|
||||
"text": "Теперь отдельная тема по конкретному контрагенту. Найди в 1С Группу СВК.",
|
||||
"created_at": "2026-04-23T18:44:25+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-015",
|
||||
"role": "user",
|
||||
"text": "Сколько получили по нему за 2020 год?",
|
||||
"created_at": "2026-04-23T18:44:25+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-016",
|
||||
"role": "user",
|
||||
"text": "А теперь сколько заплатили?",
|
||||
"created_at": "2026-04-23T18:44:25+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-017",
|
||||
"role": "user",
|
||||
"text": "А какое нетто?",
|
||||
"created_at": "2026-04-23T18:44:25+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-018",
|
||||
"role": "user",
|
||||
"text": "А по документам?",
|
||||
"created_at": "2026-04-23T18:44:25+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-019",
|
||||
"role": "user",
|
||||
"text": "А по движениям?",
|
||||
"created_at": "2026-04-23T18:44:25+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
}
|
||||
],
|
||||
"agent_run": true,
|
||||
"metadata": {
|
||||
"assistant_prompt_version": null,
|
||||
"decomposition_prompt_version": null,
|
||||
"prompt_fingerprint": null,
|
||||
"agent_focus": "Post-F repeated pivots + open-scope organization money + grounded SVK counterparty chain",
|
||||
"architecture_phase": "turnaround_11",
|
||||
"source_spec_file": "X:\\1C\\NDC_1C\\docs\\orchestration\\address_truth_harness_phase82_human_mixed_integrity_status_dialog.json",
|
||||
"scenario_id": "address_truth_harness_phase82_human_mixed_integrity_status_dialog",
|
||||
"semantic_tags": [
|
||||
"all_time_followup",
|
||||
"contracts_followup",
|
||||
"counterparty_pronoun_resolution",
|
||||
"documents_by_counterparty",
|
||||
"documents_followup",
|
||||
"documents_pivot",
|
||||
"fourth_pivot",
|
||||
"grounded_counterparty",
|
||||
"human_dialog",
|
||||
"incoming_value_flow",
|
||||
"movements_pivot",
|
||||
"net_value_flow",
|
||||
"open_scope_total",
|
||||
"organization_clarification",
|
||||
"organization_scope",
|
||||
"payments_followup",
|
||||
"payout_value_flow",
|
||||
"pivot_seed",
|
||||
"post_f_integrity_hardening",
|
||||
"second_pivot",
|
||||
"third_pivot",
|
||||
"topic_reset",
|
||||
"value_flow_comparison",
|
||||
"value_flow_ranking",
|
||||
"year_switch",
|
||||
"year_switch_after_fourth_pivot"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,153 @@
|
|||
{
|
||||
"saved_at": "2026-05-10T09:49:21+00:00",
|
||||
"generation_id": "gen-ag05100949-2a5ed8",
|
||||
"mode": "saved_user_sessions",
|
||||
"title": "AGENT | Phase 91 route-candidate-driven enablement loop canary",
|
||||
"agent_run": true,
|
||||
"questions": [
|
||||
"какой контрагент принес больше всего денег за 2020 год?",
|
||||
"ООО Альтернатива Плюс",
|
||||
"а теперь сколько лет мы работаем с Группа СВК?",
|
||||
"покажи движения по НДС за 2020 по ООО Альтернатива Плюс",
|
||||
"можно ли по этим данным точно подтвердить резерв под неликвиды на складе?"
|
||||
],
|
||||
"metadata": {
|
||||
"assistant_prompt_version": null,
|
||||
"decomposition_prompt_version": null,
|
||||
"prompt_fingerprint": null,
|
||||
"agent_focus": "route candidate repair-target grouping + handoff canary",
|
||||
"architecture_phase": "Route-Candidate-Driven Enablement Loop",
|
||||
"source_spec_file": "X:\\1C\\NDC_1C\\docs\\orchestration\\address_truth_harness_phase91_route_candidate_driven_enablement_loop.json",
|
||||
"scenario_id": "address_truth_harness_phase91_route_candidate_driven_enablement_loop",
|
||||
"semantic_tags": [
|
||||
"clarification_recovery",
|
||||
"counterparty_lifecycle",
|
||||
"inventory_reserve_boundary",
|
||||
"metadata_scope",
|
||||
"movement_evidence",
|
||||
"needs_user_scope",
|
||||
"no_overclaim",
|
||||
"open_organization_scope",
|
||||
"ready_for_reviewed_execution",
|
||||
"route_candidate_handoff",
|
||||
"stale_scope_guard",
|
||||
"unreviewed_proof_family",
|
||||
"value_flow_ranking",
|
||||
"vat"
|
||||
],
|
||||
"validation_status": "accepted_live_replay",
|
||||
"validated_run_dir": "artifacts\\domain_runs\\address_truth_harness_phase91_route_candidate_driven_enablement_loop_live1_20260510",
|
||||
"saved_after_validated_replay": true,
|
||||
"save_gate": {
|
||||
"schema_version": "agent_semantic_save_gate_v1",
|
||||
"validation_status": "accepted_live_replay",
|
||||
"validated_run_dir": "artifacts\\domain_runs\\address_truth_harness_phase91_route_candidate_driven_enablement_loop_live1_20260510",
|
||||
"final_status": "accepted",
|
||||
"review_overall_status": "pass",
|
||||
"business_overall_status": "pass",
|
||||
"steps_total": 5,
|
||||
"steps_passed": 5,
|
||||
"steps_failed": 0,
|
||||
"steps_with_business_failures": 0,
|
||||
"steps_with_business_warnings": 0,
|
||||
"acceptance_gate_passed": true,
|
||||
"saved_after_validated_replay": true
|
||||
}
|
||||
},
|
||||
"source_session_id": null,
|
||||
"session": {
|
||||
"session_id": null,
|
||||
"mode": "agent_semantic_run",
|
||||
"items": [
|
||||
{
|
||||
"message_id": "agent-user-001",
|
||||
"role": "user",
|
||||
"text": "какой контрагент принес больше всего денег за 2020 год?",
|
||||
"created_at": "2026-05-10T09:49:21+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-002",
|
||||
"role": "user",
|
||||
"text": "ООО Альтернатива Плюс",
|
||||
"created_at": "2026-05-10T09:49:21+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-003",
|
||||
"role": "user",
|
||||
"text": "а теперь сколько лет мы работаем с Группа СВК?",
|
||||
"created_at": "2026-05-10T09:49:21+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-004",
|
||||
"role": "user",
|
||||
"text": "покажи движения по НДС за 2020 по ООО Альтернатива Плюс",
|
||||
"created_at": "2026-05-10T09:49:21+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-005",
|
||||
"role": "user",
|
||||
"text": "можно ли по этим данным точно подтвердить резерв под неликвиды на складе?",
|
||||
"created_at": "2026-05-10T09:49:21+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
}
|
||||
],
|
||||
"agent_run": true,
|
||||
"metadata": {
|
||||
"assistant_prompt_version": null,
|
||||
"decomposition_prompt_version": null,
|
||||
"prompt_fingerprint": null,
|
||||
"agent_focus": "route candidate repair-target grouping + handoff canary",
|
||||
"architecture_phase": "Route-Candidate-Driven Enablement Loop",
|
||||
"source_spec_file": "X:\\1C\\NDC_1C\\docs\\orchestration\\address_truth_harness_phase91_route_candidate_driven_enablement_loop.json",
|
||||
"scenario_id": "address_truth_harness_phase91_route_candidate_driven_enablement_loop",
|
||||
"semantic_tags": [
|
||||
"clarification_recovery",
|
||||
"counterparty_lifecycle",
|
||||
"inventory_reserve_boundary",
|
||||
"metadata_scope",
|
||||
"movement_evidence",
|
||||
"needs_user_scope",
|
||||
"no_overclaim",
|
||||
"open_organization_scope",
|
||||
"ready_for_reviewed_execution",
|
||||
"route_candidate_handoff",
|
||||
"stale_scope_guard",
|
||||
"unreviewed_proof_family",
|
||||
"value_flow_ranking",
|
||||
"vat"
|
||||
],
|
||||
"validation_status": "accepted_live_replay",
|
||||
"validated_run_dir": "artifacts\\domain_runs\\address_truth_harness_phase91_route_candidate_driven_enablement_loop_live1_20260510",
|
||||
"saved_after_validated_replay": true,
|
||||
"save_gate": {
|
||||
"schema_version": "agent_semantic_save_gate_v1",
|
||||
"validation_status": "accepted_live_replay",
|
||||
"validated_run_dir": "artifacts\\domain_runs\\address_truth_harness_phase91_route_candidate_driven_enablement_loop_live1_20260510",
|
||||
"final_status": "accepted",
|
||||
"review_overall_status": "pass",
|
||||
"business_overall_status": "pass",
|
||||
"steps_total": 5,
|
||||
"steps_passed": 5,
|
||||
"steps_failed": 0,
|
||||
"steps_with_business_failures": 0,
|
||||
"steps_with_business_warnings": 0,
|
||||
"acceptance_gate_passed": true,
|
||||
"saved_after_validated_replay": true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,157 @@
|
|||
{
|
||||
"saved_at": "2026-05-10T12:13:26+00:00",
|
||||
"generation_id": "gen-ag05101213-596d99",
|
||||
"mode": "saved_user_sessions",
|
||||
"title": "AGENT | Phase 93 accounting profit-margin reviewed route",
|
||||
"agent_run": true,
|
||||
"questions": [
|
||||
"по ООО Альтернатива Плюс за 2020 можно точно сказать чистую прибыль и маржу?",
|
||||
"а это прибыль или убыток, коротко?",
|
||||
"по этой же компании на конец 2020 можно точно понять, какая дебиторка просрочена?",
|
||||
"тогда НДС за 2020 по ООО Альтернатива Плюс какой?",
|
||||
"а какой контрагент принес больше всего денег за 2020?",
|
||||
"по ООО Альтернатива Плюс на конец 2020 можно точно подтвердить резерв под неликвиды на складе?"
|
||||
],
|
||||
"metadata": {
|
||||
"assistant_prompt_version": null,
|
||||
"decomposition_prompt_version": null,
|
||||
"prompt_fingerprint": null,
|
||||
"agent_focus": "accounting profit-margin reviewed route",
|
||||
"architecture_phase": "Route-Candidate-Driven Enablement Loop",
|
||||
"source_spec_file": "X:\\1C\\NDC_1C\\docs\\orchestration\\address_truth_harness_phase93_accounting_profit_margin_reviewed_route.json",
|
||||
"scenario_id": "address_truth_harness_phase93_accounting_profit_margin_reviewed_route",
|
||||
"semantic_tags": [
|
||||
"accounting_profit_margin",
|
||||
"business_overview",
|
||||
"context_carryover",
|
||||
"debt_due_date_boundary",
|
||||
"inventory_reserve_boundary",
|
||||
"missing_proof_families",
|
||||
"needs_route_enablement",
|
||||
"profit_margin_boundary",
|
||||
"ready_for_reviewed_execution",
|
||||
"value_flow_ranking",
|
||||
"vat_continuity"
|
||||
],
|
||||
"validation_status": "accepted_live_replay",
|
||||
"validated_run_dir": "artifacts\\domain_runs\\address_truth_harness_phase93_accounting_profit_margin_reviewed_route_live3_20260510",
|
||||
"saved_after_validated_replay": true,
|
||||
"save_gate": {
|
||||
"schema_version": "agent_semantic_save_gate_v1",
|
||||
"validation_status": "accepted_live_replay",
|
||||
"validated_run_dir": "artifacts\\domain_runs\\address_truth_harness_phase93_accounting_profit_margin_reviewed_route_live3_20260510",
|
||||
"final_status": "accepted",
|
||||
"review_overall_status": "pass",
|
||||
"business_overall_status": "pass",
|
||||
"steps_total": 6,
|
||||
"steps_passed": 6,
|
||||
"steps_failed": 0,
|
||||
"steps_with_business_failures": 0,
|
||||
"steps_with_business_warnings": 0,
|
||||
"acceptance_gate_passed": true,
|
||||
"saved_after_validated_replay": true
|
||||
}
|
||||
},
|
||||
"source_session_id": null,
|
||||
"session": {
|
||||
"session_id": null,
|
||||
"mode": "agent_semantic_run",
|
||||
"items": [
|
||||
{
|
||||
"message_id": "agent-user-001",
|
||||
"role": "user",
|
||||
"text": "по ООО Альтернатива Плюс за 2020 можно точно сказать чистую прибыль и маржу?",
|
||||
"created_at": "2026-05-10T12:13:26+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-002",
|
||||
"role": "user",
|
||||
"text": "а это прибыль или убыток, коротко?",
|
||||
"created_at": "2026-05-10T12:13:26+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-003",
|
||||
"role": "user",
|
||||
"text": "по этой же компании на конец 2020 можно точно понять, какая дебиторка просрочена?",
|
||||
"created_at": "2026-05-10T12:13:26+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-004",
|
||||
"role": "user",
|
||||
"text": "тогда НДС за 2020 по ООО Альтернатива Плюс какой?",
|
||||
"created_at": "2026-05-10T12:13:26+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-005",
|
||||
"role": "user",
|
||||
"text": "а какой контрагент принес больше всего денег за 2020?",
|
||||
"created_at": "2026-05-10T12:13:26+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-006",
|
||||
"role": "user",
|
||||
"text": "по ООО Альтернатива Плюс на конец 2020 можно точно подтвердить резерв под неликвиды на складе?",
|
||||
"created_at": "2026-05-10T12:13:26+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
}
|
||||
],
|
||||
"agent_run": true,
|
||||
"metadata": {
|
||||
"assistant_prompt_version": null,
|
||||
"decomposition_prompt_version": null,
|
||||
"prompt_fingerprint": null,
|
||||
"agent_focus": "accounting profit-margin reviewed route",
|
||||
"architecture_phase": "Route-Candidate-Driven Enablement Loop",
|
||||
"source_spec_file": "X:\\1C\\NDC_1C\\docs\\orchestration\\address_truth_harness_phase93_accounting_profit_margin_reviewed_route.json",
|
||||
"scenario_id": "address_truth_harness_phase93_accounting_profit_margin_reviewed_route",
|
||||
"semantic_tags": [
|
||||
"accounting_profit_margin",
|
||||
"business_overview",
|
||||
"context_carryover",
|
||||
"debt_due_date_boundary",
|
||||
"inventory_reserve_boundary",
|
||||
"missing_proof_families",
|
||||
"needs_route_enablement",
|
||||
"profit_margin_boundary",
|
||||
"ready_for_reviewed_execution",
|
||||
"value_flow_ranking",
|
||||
"vat_continuity"
|
||||
],
|
||||
"validation_status": "accepted_live_replay",
|
||||
"validated_run_dir": "artifacts\\domain_runs\\address_truth_harness_phase93_accounting_profit_margin_reviewed_route_live3_20260510",
|
||||
"saved_after_validated_replay": true,
|
||||
"save_gate": {
|
||||
"schema_version": "agent_semantic_save_gate_v1",
|
||||
"validation_status": "accepted_live_replay",
|
||||
"validated_run_dir": "artifacts\\domain_runs\\address_truth_harness_phase93_accounting_profit_margin_reviewed_route_live3_20260510",
|
||||
"final_status": "accepted",
|
||||
"review_overall_status": "pass",
|
||||
"business_overall_status": "pass",
|
||||
"steps_total": 6,
|
||||
"steps_passed": 6,
|
||||
"steps_failed": 0,
|
||||
"steps_with_business_failures": 0,
|
||||
"steps_with_business_warnings": 0,
|
||||
"acceptance_gate_passed": true,
|
||||
"saved_after_validated_replay": true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,173 @@
|
|||
{
|
||||
"saved_at": "2026-05-10T13:19:22+00:00",
|
||||
"generation_id": "gen-ag05101319-c04f79",
|
||||
"mode": "saved_user_sessions",
|
||||
"title": "AGENT | Phase 94 debt due-date aging reviewed route",
|
||||
"agent_run": true,
|
||||
"questions": [
|
||||
"по ООО Альтернатива Плюс на конец 2020 можно точно понять, какая дебиторка просрочена?",
|
||||
"то есть просрочку доказать нельзя, коротко почему?",
|
||||
"а чистая прибыль и маржа за 2020 по этой же компании какие?",
|
||||
"НДС за 2020 по ООО Альтернатива Плюс какой?",
|
||||
"а кто принес больше всего денег за 2020?",
|
||||
"по ООО Альтернатива Плюс на конец 2020 можно точно подтвердить резерв под неликвиды на складе?",
|
||||
"а зависимость от одного поставщика за 2020 можно точно оценить?"
|
||||
],
|
||||
"metadata": {
|
||||
"assistant_prompt_version": null,
|
||||
"decomposition_prompt_version": null,
|
||||
"prompt_fingerprint": null,
|
||||
"agent_focus": "Focused semantic replay for promoting debt_due_date_aging_quality from proxy-only enablement to a reviewed payment-term/open-balance route while preserving profit, VAT, inventory, and vendor-risk boundaries.",
|
||||
"architecture_phase": "turnaround_11",
|
||||
"source_spec_file": "X:\\1C\\NDC_1C\\docs\\orchestration\\address_truth_harness_phase94_debt_due_date_aging_reviewed_route.json",
|
||||
"scenario_id": "address_truth_harness_phase94_debt_due_date_aging_reviewed_route",
|
||||
"semantic_tags": [
|
||||
"accounting_profit_margin",
|
||||
"business_overview",
|
||||
"canary",
|
||||
"checked_negative",
|
||||
"context_carryover",
|
||||
"debt_due_date_aging_quality",
|
||||
"debt_due_date_boundary",
|
||||
"inventory_reserve_boundary",
|
||||
"missing_proof_families",
|
||||
"profit_margin_boundary",
|
||||
"ready_for_reviewed_execution",
|
||||
"value_flow_ranking",
|
||||
"vat_continuity",
|
||||
"vendor_risk_procurement_boundary"
|
||||
],
|
||||
"validation_status": "accepted_live_replay",
|
||||
"validated_run_dir": "artifacts\\domain_runs\\phase94_debt_due_date_aging_reviewed_route_live4",
|
||||
"saved_after_validated_replay": true,
|
||||
"save_gate": {
|
||||
"schema_version": "agent_semantic_save_gate_v1",
|
||||
"validation_status": "accepted_live_replay",
|
||||
"validated_run_dir": "artifacts\\domain_runs\\phase94_debt_due_date_aging_reviewed_route_live4",
|
||||
"final_status": "accepted",
|
||||
"review_overall_status": "pass",
|
||||
"business_overall_status": "pass",
|
||||
"steps_total": 7,
|
||||
"steps_passed": 7,
|
||||
"steps_failed": 0,
|
||||
"steps_with_business_failures": 0,
|
||||
"steps_with_business_warnings": 0,
|
||||
"acceptance_gate_passed": true,
|
||||
"saved_after_validated_replay": true
|
||||
}
|
||||
},
|
||||
"source_session_id": null,
|
||||
"session": {
|
||||
"session_id": null,
|
||||
"mode": "agent_semantic_run",
|
||||
"items": [
|
||||
{
|
||||
"message_id": "agent-user-001",
|
||||
"role": "user",
|
||||
"text": "по ООО Альтернатива Плюс на конец 2020 можно точно понять, какая дебиторка просрочена?",
|
||||
"created_at": "2026-05-10T13:19:22+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-002",
|
||||
"role": "user",
|
||||
"text": "то есть просрочку доказать нельзя, коротко почему?",
|
||||
"created_at": "2026-05-10T13:19:22+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-003",
|
||||
"role": "user",
|
||||
"text": "а чистая прибыль и маржа за 2020 по этой же компании какие?",
|
||||
"created_at": "2026-05-10T13:19:22+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-004",
|
||||
"role": "user",
|
||||
"text": "НДС за 2020 по ООО Альтернатива Плюс какой?",
|
||||
"created_at": "2026-05-10T13:19:22+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-005",
|
||||
"role": "user",
|
||||
"text": "а кто принес больше всего денег за 2020?",
|
||||
"created_at": "2026-05-10T13:19:22+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-006",
|
||||
"role": "user",
|
||||
"text": "по ООО Альтернатива Плюс на конец 2020 можно точно подтвердить резерв под неликвиды на складе?",
|
||||
"created_at": "2026-05-10T13:19:22+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
},
|
||||
{
|
||||
"message_id": "agent-user-007",
|
||||
"role": "user",
|
||||
"text": "а зависимость от одного поставщика за 2020 можно точно оценить?",
|
||||
"created_at": "2026-05-10T13:19:22+00:00",
|
||||
"reply_type": null,
|
||||
"trace_id": null,
|
||||
"debug": null
|
||||
}
|
||||
],
|
||||
"agent_run": true,
|
||||
"metadata": {
|
||||
"assistant_prompt_version": null,
|
||||
"decomposition_prompt_version": null,
|
||||
"prompt_fingerprint": null,
|
||||
"agent_focus": "Focused semantic replay for promoting debt_due_date_aging_quality from proxy-only enablement to a reviewed payment-term/open-balance route while preserving profit, VAT, inventory, and vendor-risk boundaries.",
|
||||
"architecture_phase": "turnaround_11",
|
||||
"source_spec_file": "X:\\1C\\NDC_1C\\docs\\orchestration\\address_truth_harness_phase94_debt_due_date_aging_reviewed_route.json",
|
||||
"scenario_id": "address_truth_harness_phase94_debt_due_date_aging_reviewed_route",
|
||||
"semantic_tags": [
|
||||
"accounting_profit_margin",
|
||||
"business_overview",
|
||||
"canary",
|
||||
"checked_negative",
|
||||
"context_carryover",
|
||||
"debt_due_date_aging_quality",
|
||||
"debt_due_date_boundary",
|
||||
"inventory_reserve_boundary",
|
||||
"missing_proof_families",
|
||||
"profit_margin_boundary",
|
||||
"ready_for_reviewed_execution",
|
||||
"value_flow_ranking",
|
||||
"vat_continuity",
|
||||
"vendor_risk_procurement_boundary"
|
||||
],
|
||||
"validation_status": "accepted_live_replay",
|
||||
"validated_run_dir": "artifacts\\domain_runs\\phase94_debt_due_date_aging_reviewed_route_live4",
|
||||
"saved_after_validated_replay": true,
|
||||
"save_gate": {
|
||||
"schema_version": "agent_semantic_save_gate_v1",
|
||||
"validation_status": "accepted_live_replay",
|
||||
"validated_run_dir": "artifacts\\domain_runs\\phase94_debt_due_date_aging_reviewed_route_live4",
|
||||
"final_status": "accepted",
|
||||
"review_overall_status": "pass",
|
||||
"business_overall_status": "pass",
|
||||
"steps_total": 7,
|
||||
"steps_passed": 7,
|
||||
"steps_failed": 0,
|
||||
"steps_with_business_failures": 0,
|
||||
"steps_with_business_warnings": 0,
|
||||
"acceptance_gate_passed": true,
|
||||
"saved_after_validated_replay": true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,40 +0,0 @@
|
|||
{
|
||||
"suite_id": "assistant_saved_session_gen-ag04170808-1907fa",
|
||||
"suite_version": "0.1.0",
|
||||
"schema_version": "assistant_saved_session_suite_v0_1",
|
||||
"generated_at": "2026-04-17T08:08:08+00:00",
|
||||
"generation_id": "gen-ag04170808-1907fa",
|
||||
"mode": "saved_user_sessions",
|
||||
"title": "AGENT | Phase 4 coverage/evidence replay for counterparty fallback, inventory reset, and selected-object provenance",
|
||||
"domain": "address_phase4_coverage_evidence_mix",
|
||||
"scenario_count": 1,
|
||||
"case_ids": [
|
||||
"SAVED-001"
|
||||
],
|
||||
"cases": [
|
||||
{
|
||||
"case_id": "SAVED-001",
|
||||
"scenario_tag": "agent_saved_user_sessions",
|
||||
"title": "AGENT | Phase 4 coverage/evidence replay for counterparty fallback, inventory reset, and selected-object provenance",
|
||||
"question_type": "followup",
|
||||
"broadness_level": "medium",
|
||||
"turns": [
|
||||
{
|
||||
"user_message": "покажи все документы по чепурнову"
|
||||
},
|
||||
{
|
||||
"user_message": "что нам отгружал чепурнов, какой товар или услугу?"
|
||||
},
|
||||
{
|
||||
"user_message": "какие остатки на складе на март 2021"
|
||||
},
|
||||
{
|
||||
"user_message": "По выбранному объекту \"Столешница 600*3050*26 альмандин\": кто нам это поставил?"
|
||||
},
|
||||
{
|
||||
"user_message": "покажи еще раз остатки на эту же дату"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -1,43 +0,0 @@
|
|||
{
|
||||
"suite_id": "assistant_saved_session_gen-ag04170830-5f771d",
|
||||
"suite_version": "0.1.0",
|
||||
"schema_version": "assistant_saved_session_suite_v0_1",
|
||||
"generated_at": "2026-04-17T08:30:44+00:00",
|
||||
"generation_id": "gen-ag04170830-5f771d",
|
||||
"mode": "saved_user_sessions",
|
||||
"title": "AGENT | Phase 5 meta and memory recap replay over interrupted address context",
|
||||
"domain": "address_phase5_meta_memory_mix",
|
||||
"scenario_count": 1,
|
||||
"case_ids": [
|
||||
"SAVED-001"
|
||||
],
|
||||
"cases": [
|
||||
{
|
||||
"case_id": "SAVED-001",
|
||||
"scenario_tag": "agent_saved_user_sessions",
|
||||
"title": "AGENT | Phase 5 meta and memory recap replay over interrupted address context",
|
||||
"question_type": "followup",
|
||||
"broadness_level": "medium",
|
||||
"turns": [
|
||||
{
|
||||
"user_message": "какие остатки на складе на март 2021"
|
||||
},
|
||||
{
|
||||
"user_message": "а исторические остатки тоже можешь?"
|
||||
},
|
||||
{
|
||||
"user_message": "по какой компании мы сейчас работаем?"
|
||||
},
|
||||
{
|
||||
"user_message": "По выбранному объекту \"Столешница 600*3050*26 альмандин\": кто нам это поставил?"
|
||||
},
|
||||
{
|
||||
"user_message": "что ты умеешь?"
|
||||
},
|
||||
{
|
||||
"user_message": "а ты помнишь, что мы по этой позиции уже выяснили?"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -1,40 +0,0 @@
|
|||
{
|
||||
"suite_id": "assistant_saved_session_gen-ag04170855-d13dd3",
|
||||
"suite_version": "0.1.0",
|
||||
"schema_version": "assistant_saved_session_suite_v0_1",
|
||||
"generated_at": "2026-04-17T08:55:50+00:00",
|
||||
"generation_id": "gen-ag04170855-d13dd3",
|
||||
"mode": "saved_user_sessions",
|
||||
"title": "AGENT | Phase 6 provider/runtime replay across chat, meta, and address boundaries",
|
||||
"domain": "address_phase6_provider_axis_mix",
|
||||
"scenario_count": 1,
|
||||
"case_ids": [
|
||||
"SAVED-001"
|
||||
],
|
||||
"cases": [
|
||||
{
|
||||
"case_id": "SAVED-001",
|
||||
"scenario_tag": "agent_saved_user_sessions",
|
||||
"title": "AGENT | Phase 6 provider/runtime replay across chat, meta, and address boundaries",
|
||||
"question_type": "followup",
|
||||
"broadness_level": "medium",
|
||||
"turns": [
|
||||
{
|
||||
"user_message": "привет, как дела?"
|
||||
},
|
||||
{
|
||||
"user_message": "по какой компании мы сейчас работаем?"
|
||||
},
|
||||
{
|
||||
"user_message": "что ты можешь по 1С?"
|
||||
},
|
||||
{
|
||||
"user_message": "какие остатки на складе на март 2021"
|
||||
},
|
||||
{
|
||||
"user_message": "а исторические остатки тоже можешь?"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -1,43 +0,0 @@
|
|||
{
|
||||
"suite_id": "assistant_saved_session_gen-ag04170911-ff51e1",
|
||||
"suite_version": "0.1.0",
|
||||
"schema_version": "assistant_saved_session_suite_v0_1",
|
||||
"generated_at": "2026-04-17T09:11:27+00:00",
|
||||
"generation_id": "gen-ag04170911-ff51e1",
|
||||
"mode": "saved_user_sessions",
|
||||
"title": "AGENT | Phase 7 acceptance replay for inventory root, selected-object continuity, and human meta boundaries",
|
||||
"domain": "address_phase7_acceptance_gate_mix",
|
||||
"scenario_count": 1,
|
||||
"case_ids": [
|
||||
"SAVED-001"
|
||||
],
|
||||
"cases": [
|
||||
{
|
||||
"case_id": "SAVED-001",
|
||||
"scenario_tag": "agent_saved_user_sessions",
|
||||
"title": "AGENT | Phase 7 acceptance replay for inventory root, selected-object continuity, and human meta boundaries",
|
||||
"question_type": "followup",
|
||||
"broadness_level": "medium",
|
||||
"turns": [
|
||||
{
|
||||
"user_message": "какие остатки на складе на март 2021"
|
||||
},
|
||||
{
|
||||
"user_message": "По выбранному объекту \"Столешница 600*3050*26 альмандин\": кто нам это поставил?"
|
||||
},
|
||||
{
|
||||
"user_message": "По выбранному объекту \"Столешница 600*3050*26 альмандин\": покажи документы по этой позиции"
|
||||
},
|
||||
{
|
||||
"user_message": "покажи еще раз остатки на эту же дату"
|
||||
},
|
||||
{
|
||||
"user_message": "по какой компании мы сейчас работаем?"
|
||||
},
|
||||
{
|
||||
"user_message": "а исторические остатки тоже можешь?"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -1,67 +0,0 @@
|
|||
{
|
||||
"suite_id": "assistant_saved_session_gen-ag04170931-6bb7e5",
|
||||
"suite_version": "0.1.0",
|
||||
"schema_version": "assistant_saved_session_suite_v0_1",
|
||||
"generated_at": "2026-04-17T09:31:44+00:00",
|
||||
"generation_id": "gen-ag04170931-6bb7e5",
|
||||
"mode": "saved_user_sessions",
|
||||
"title": "AGENT | Phase 7 mixed replay for documents, selected-object continuity, meta context, and cross-domain pivots",
|
||||
"domain": "address_phase7_meta_domain_mix",
|
||||
"scenario_count": 1,
|
||||
"case_ids": [
|
||||
"SAVED-001"
|
||||
],
|
||||
"cases": [
|
||||
{
|
||||
"case_id": "SAVED-001",
|
||||
"scenario_tag": "agent_saved_user_sessions",
|
||||
"title": "AGENT | Phase 7 mixed replay for documents, selected-object continuity, meta context, and cross-domain pivots",
|
||||
"question_type": "followup",
|
||||
"broadness_level": "medium",
|
||||
"turns": [
|
||||
{
|
||||
"user_message": "привет, как дела?"
|
||||
},
|
||||
{
|
||||
"user_message": "по какой компании мы сейчас работаем?"
|
||||
},
|
||||
{
|
||||
"user_message": "покажи все документы по чепурнову"
|
||||
},
|
||||
{
|
||||
"user_message": "что нам отгружал чепурнов, какой товар или услугу?"
|
||||
},
|
||||
{
|
||||
"user_message": "какие остатки на складе на март 2021"
|
||||
},
|
||||
{
|
||||
"user_message": "По выбранному объекту \"Столешница 600*3050*26 альмандин\": кто нам это поставил?"
|
||||
},
|
||||
{
|
||||
"user_message": "что ты умеешь?"
|
||||
},
|
||||
{
|
||||
"user_message": "По выбранному объекту \"Столешница 600*3050*26 альмандин\": покажи документы по этой позиции"
|
||||
},
|
||||
{
|
||||
"user_message": "а ты помнишь, что мы по этой позиции уже выяснили?"
|
||||
},
|
||||
{
|
||||
"user_message": "покажи еще раз остатки на эту же дату"
|
||||
},
|
||||
{
|
||||
"user_message": "кто нам должен на март 2020"
|
||||
},
|
||||
{
|
||||
"user_message": "остатки по складу на эту же дату"
|
||||
},
|
||||
{
|
||||
"user_message": "а исторические остатки тоже можешь?"
|
||||
},
|
||||
{
|
||||
"user_message": "хвосты покажи по счету 60 на август 2022"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -1,67 +0,0 @@
|
|||
{
|
||||
"suite_id": "assistant_saved_session_gen-ag04170941-87680e",
|
||||
"suite_version": "0.1.0",
|
||||
"schema_version": "assistant_saved_session_suite_v0_1",
|
||||
"generated_at": "2026-04-17T09:41:32+00:00",
|
||||
"generation_id": "gen-ag04170941-87680e",
|
||||
"mode": "saved_user_sessions",
|
||||
"title": "AGENT | Phase 7 mixed replay for documents, selected-object continuity, meta context, and cross-domain pivots",
|
||||
"domain": "address_phase7_meta_domain_mix",
|
||||
"scenario_count": 1,
|
||||
"case_ids": [
|
||||
"SAVED-001"
|
||||
],
|
||||
"cases": [
|
||||
{
|
||||
"case_id": "SAVED-001",
|
||||
"scenario_tag": "agent_saved_user_sessions",
|
||||
"title": "AGENT | Phase 7 mixed replay for documents, selected-object continuity, meta context, and cross-domain pivots",
|
||||
"question_type": "followup",
|
||||
"broadness_level": "medium",
|
||||
"turns": [
|
||||
{
|
||||
"user_message": "привет, как дела?"
|
||||
},
|
||||
{
|
||||
"user_message": "по какой компании мы сейчас работаем?"
|
||||
},
|
||||
{
|
||||
"user_message": "покажи все документы по чепурнову"
|
||||
},
|
||||
{
|
||||
"user_message": "что нам отгружал чепурнов, какой товар или услугу?"
|
||||
},
|
||||
{
|
||||
"user_message": "какие остатки на складе на март 2021"
|
||||
},
|
||||
{
|
||||
"user_message": "По выбранному объекту \"Столешница 600*3050*26 альмандин\": кто нам это поставил?"
|
||||
},
|
||||
{
|
||||
"user_message": "что ты умеешь?"
|
||||
},
|
||||
{
|
||||
"user_message": "По выбранному объекту \"Столешница 600*3050*26 альмандин\": покажи документы по этой позиции"
|
||||
},
|
||||
{
|
||||
"user_message": "а ты помнишь, что мы по этой позиции уже выяснили?"
|
||||
},
|
||||
{
|
||||
"user_message": "покажи еще раз остатки на эту же дату"
|
||||
},
|
||||
{
|
||||
"user_message": "кто нам должен на март 2020"
|
||||
},
|
||||
{
|
||||
"user_message": "остатки по складу на эту же дату"
|
||||
},
|
||||
{
|
||||
"user_message": "а исторические остатки тоже можешь?"
|
||||
},
|
||||
{
|
||||
"user_message": "хвосты покажи по счету 60 на август 2022"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -1,64 +0,0 @@
|
|||
{
|
||||
"suite_id": "assistant_saved_session_gen-ag04171326-15a132",
|
||||
"suite_version": "0.1.0",
|
||||
"schema_version": "assistant_saved_session_suite_v0_1",
|
||||
"generated_at": "2026-04-17T13:26:00+00:00",
|
||||
"generation_id": "gen-ag04171326-15a132",
|
||||
"mode": "saved_user_sessions",
|
||||
"title": "AGENT replay for company selection continuity and organization activity age",
|
||||
"domain": "address_phase5_company_selection_and_activity_age",
|
||||
"scenario_count": 1,
|
||||
"case_ids": [
|
||||
"SAVED-001"
|
||||
],
|
||||
"cases": [
|
||||
{
|
||||
"case_id": "SAVED-001",
|
||||
"scenario_tag": "agent_saved_user_sessions",
|
||||
"title": "AGENT replay for company selection continuity and organization activity age",
|
||||
"question_type": "followup",
|
||||
"broadness_level": "medium",
|
||||
"turns": [
|
||||
{
|
||||
"user_message": "привет, как дела?"
|
||||
},
|
||||
{
|
||||
"user_message": "по какой компании мы сейчас работаем?"
|
||||
},
|
||||
{
|
||||
"user_message": "какие остатки на складе на март 2021"
|
||||
},
|
||||
{
|
||||
"user_message": "давай по Альтернативе Плюс"
|
||||
},
|
||||
{
|
||||
"user_message": "тогда покажи остатки на март 2021"
|
||||
},
|
||||
{
|
||||
"user_message": "По выбранному объекту \"Столешница 600*3050*26 альмандин\": кто нам это поставил?"
|
||||
},
|
||||
{
|
||||
"user_message": "По выбранному объекту \"Столешница 600*3050*26 альмандин\": покажи документы по этой позиции"
|
||||
},
|
||||
{
|
||||
"user_message": "покажи еще раз остатки на эту же дату"
|
||||
},
|
||||
{
|
||||
"user_message": "а по Альтернативе Плюс сколько лет активности в базе 1С?"
|
||||
},
|
||||
{
|
||||
"user_message": "что ты умеешь?"
|
||||
},
|
||||
{
|
||||
"user_message": "а ты помнишь, что мы по этой позиции уже выяснили?"
|
||||
},
|
||||
{
|
||||
"user_message": "кто нам должен на март 2020"
|
||||
},
|
||||
{
|
||||
"user_message": "остатки по складу на эту же дату"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -1,48 +0,0 @@
|
|||
{
|
||||
"suite_id": "assistant_saved_session_gen-ag04171508-760111",
|
||||
"suite_version": "0.1.0",
|
||||
"schema_version": "assistant_saved_session_suite_v0_1",
|
||||
"generated_at": "2026-04-18T07:12:37.854Z",
|
||||
"generation_id": "gen-ag04171508-760111",
|
||||
"mode": "saved_user_sessions",
|
||||
"title": "AGENT replay for inventory clarification continuity and answer-shape cleanliness",
|
||||
"scenario_count": 1,
|
||||
"case_ids": [
|
||||
"SAVED-001"
|
||||
],
|
||||
"cases": [
|
||||
{
|
||||
"case_id": "SAVED-001",
|
||||
"scenario_tag": "agent_saved_user_sessions",
|
||||
"title": "AGENT replay for inventory clarification continuity and answer-shape cleanliness",
|
||||
"question_type": "followup",
|
||||
"broadness_level": "medium",
|
||||
"turns": [
|
||||
{
|
||||
"user_message": "какие остатки на складе на март 2021"
|
||||
},
|
||||
{
|
||||
"user_message": "давай по Альтернативе Плюс"
|
||||
},
|
||||
{
|
||||
"user_message": "тогда покажи остатки на июль2017"
|
||||
},
|
||||
{
|
||||
"user_message": "По выбранному объекту \"Столешница 600*3050*26 альмандин\": кто нам это поставил?"
|
||||
},
|
||||
{
|
||||
"user_message": "а по этой позиции когда была закупка?"
|
||||
},
|
||||
{
|
||||
"user_message": "покажи документы по этой позиции"
|
||||
},
|
||||
{
|
||||
"user_message": "покажи еще раз остатки на эту же дату"
|
||||
},
|
||||
{
|
||||
"user_message": "а что мы уже выяснили по этой позиции?"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -1,51 +0,0 @@
|
|||
{
|
||||
"suite_id": "assistant_saved_session_gen-mo5zy5vo-z9klj34",
|
||||
"suite_version": "0.1.0",
|
||||
"schema_version": "assistant_saved_session_suite_v0_1",
|
||||
"generated_at": "2026-04-19T16:44:57.540Z",
|
||||
"generation_id": "gen-mo5zy5vo-z9klj34",
|
||||
"mode": "saved_user_sessions",
|
||||
"title": "Ручная сессия 19.04.2026, 18:58:20",
|
||||
"scenario_count": 1,
|
||||
"case_ids": [
|
||||
"SAVED-001"
|
||||
],
|
||||
"cases": [
|
||||
{
|
||||
"case_id": "SAVED-001",
|
||||
"scenario_tag": "saved_user_sessions",
|
||||
"title": "Ручная сессия 19.04.2026, 18:58:20",
|
||||
"question_type": "followup",
|
||||
"broadness_level": "medium",
|
||||
"turns": [
|
||||
{
|
||||
"user_message": "привет как дела"
|
||||
},
|
||||
{
|
||||
"user_message": "кто намс должен денег на сегодня"
|
||||
},
|
||||
{
|
||||
"user_message": "что ты можешь"
|
||||
},
|
||||
{
|
||||
"user_message": "кто нам должен денег на сегодня"
|
||||
},
|
||||
{
|
||||
"user_message": "а мы кому"
|
||||
},
|
||||
{
|
||||
"user_message": "какиек остатки на складе на сегодня"
|
||||
},
|
||||
{
|
||||
"user_message": "альтернатива"
|
||||
},
|
||||
{
|
||||
"user_message": "покажи документы по чепурнову"
|
||||
},
|
||||
{
|
||||
"user_message": "какой оборот был свк"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -1,45 +0,0 @@
|
|||
{
|
||||
"suite_id": "assistant_saved_session_gen-moa1y0lw-m30gdsz",
|
||||
"suite_version": "0.1.0",
|
||||
"schema_version": "assistant_saved_session_suite_v0_1",
|
||||
"generated_at": "2026-04-22T12:51:54.645Z",
|
||||
"generation_id": "gen-moa1y0lw-m30gdsz",
|
||||
"mode": "saved_user_sessions",
|
||||
"title": "СВК - Ручная сессия 22.04.2026, 15:50:39",
|
||||
"scenario_count": 1,
|
||||
"case_ids": [
|
||||
"SAVED-001"
|
||||
],
|
||||
"cases": [
|
||||
{
|
||||
"case_id": "SAVED-001",
|
||||
"scenario_tag": "saved_user_sessions",
|
||||
"title": "СВК - Ручная сессия 22.04.2026, 15:50:39",
|
||||
"question_type": "followup",
|
||||
"broadness_level": "medium",
|
||||
"turns": [
|
||||
{
|
||||
"user_message": "найди в 1С контрагента СВК"
|
||||
},
|
||||
{
|
||||
"user_message": "сколько получили по нему за 2020 год"
|
||||
},
|
||||
{
|
||||
"user_message": "а теперь сколько заплатили?"
|
||||
},
|
||||
{
|
||||
"user_message": "а за 2021?"
|
||||
},
|
||||
{
|
||||
"user_message": "а какое нетто?"
|
||||
},
|
||||
{
|
||||
"user_message": "а по документам?"
|
||||
},
|
||||
{
|
||||
"user_message": "а по движениям?"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -1,46 +0,0 @@
|
|||
{
|
||||
"suite_id": "assistant_saved_session_gen-ag04231336-3d4cc9",
|
||||
"suite_version": "0.1.0",
|
||||
"schema_version": "assistant_saved_session_suite_v0_1",
|
||||
"generated_at": "2026-04-23T13:36:22+00:00",
|
||||
"generation_id": "gen-ag04231336-3d4cc9",
|
||||
"mode": "saved_user_sessions",
|
||||
"title": "AGENT | Живой диалог по организации: денежный срез, сравнение и рейтинг",
|
||||
"domain": "address_phase66_human_org_open_scope_dialog",
|
||||
"scenario_count": 1,
|
||||
"case_ids": [
|
||||
"SAVED-001"
|
||||
],
|
||||
"cases": [
|
||||
{
|
||||
"case_id": "SAVED-001",
|
||||
"scenario_tag": "agent_saved_user_sessions",
|
||||
"title": "AGENT | Живой диалог по организации: денежный срез, сравнение и рейтинг",
|
||||
"question_type": "followup",
|
||||
"broadness_level": "medium",
|
||||
"turns": [
|
||||
{
|
||||
"user_message": "Хочу быстрый денежный срез по одной организации без привязки к контрагенту. Сколько вообще входящих денег было за 2020 год?"
|
||||
},
|
||||
{
|
||||
"user_message": "По ООО Альтернатива Плюс."
|
||||
},
|
||||
{
|
||||
"user_message": "Понял, тогда за все время."
|
||||
},
|
||||
{
|
||||
"user_message": "Хорошо. А что по ООО Альтернатива Плюс больше в 2020 году: входящие или исходящие деньги?"
|
||||
},
|
||||
{
|
||||
"user_message": "А что по ООО Альтернатива Плюс больше уже за 2021 год: входящие или исходящие деньги?"
|
||||
},
|
||||
{
|
||||
"user_message": "И кто больше всего принес денег этой организации в 2020 году?"
|
||||
},
|
||||
{
|
||||
"user_message": "А в 2021 году?"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -1,43 +0,0 @@
|
|||
{
|
||||
"suite_id": "assistant_saved_session_gen-ag04231336-4fa660",
|
||||
"suite_version": "0.1.0",
|
||||
"schema_version": "assistant_saved_session_suite_v0_1",
|
||||
"generated_at": "2026-04-23T13:36:22+00:00",
|
||||
"generation_id": "gen-ag04231336-4fa660",
|
||||
"mode": "saved_user_sessions",
|
||||
"title": "AGENT | Живой диалог по НДС: от ориентации до документов",
|
||||
"domain": "address_phase64_human_vat_investigation_dialog",
|
||||
"scenario_count": 1,
|
||||
"case_ids": [
|
||||
"SAVED-001"
|
||||
],
|
||||
"cases": [
|
||||
{
|
||||
"case_id": "SAVED-001",
|
||||
"scenario_tag": "agent_saved_user_sessions",
|
||||
"title": "AGENT | Живой диалог по НДС: от ориентации до документов",
|
||||
"question_type": "followup",
|
||||
"broadness_level": "medium",
|
||||
"turns": [
|
||||
{
|
||||
"user_message": "Мне нужно понять, где в 1С по НДС вообще лежат данные. Какие объекты стоит смотреть по НДС?"
|
||||
},
|
||||
{
|
||||
"user_message": "Хорошо, тогда покажи движения по ООО Альтернатива Плюс."
|
||||
},
|
||||
{
|
||||
"user_message": "За 2020 год."
|
||||
},
|
||||
{
|
||||
"user_message": "А теперь по документам?"
|
||||
},
|
||||
{
|
||||
"user_message": "А теперь за 2021 год?"
|
||||
},
|
||||
{
|
||||
"user_message": "А теперь за все время?"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -1,46 +0,0 @@
|
|||
{
|
||||
"suite_id": "assistant_saved_session_gen-ag04231336-db78b3",
|
||||
"suite_version": "0.1.0",
|
||||
"schema_version": "assistant_saved_session_suite_v0_1",
|
||||
"generated_at": "2026-04-23T13:36:22+00:00",
|
||||
"generation_id": "gen-ag04231336-db78b3",
|
||||
"mode": "saved_user_sessions",
|
||||
"title": "AGENT | Живой диалог по СВК: деньги, нетто, документы и движения",
|
||||
"domain": "address_phase65_human_svk_money_dialog",
|
||||
"scenario_count": 1,
|
||||
"case_ids": [
|
||||
"SAVED-001"
|
||||
],
|
||||
"cases": [
|
||||
{
|
||||
"case_id": "SAVED-001",
|
||||
"scenario_tag": "agent_saved_user_sessions",
|
||||
"title": "AGENT | Живой диалог по СВК: деньги, нетто, документы и движения",
|
||||
"question_type": "followup",
|
||||
"broadness_level": "medium",
|
||||
"turns": [
|
||||
{
|
||||
"user_message": "Хочу проверить одного контрагента. Найди в 1С Группу СВК."
|
||||
},
|
||||
{
|
||||
"user_message": "Посмотри, сколько денег мы получили от него за 2020 год."
|
||||
},
|
||||
{
|
||||
"user_message": "А теперь сколько мы ему заплатили?"
|
||||
},
|
||||
{
|
||||
"user_message": "А какое получилось нетто?"
|
||||
},
|
||||
{
|
||||
"user_message": "А по документам?"
|
||||
},
|
||||
{
|
||||
"user_message": "А по движениям?"
|
||||
},
|
||||
{
|
||||
"user_message": "А теперь за 2021 год?"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -1,82 +0,0 @@
|
|||
{
|
||||
"suite_id": "assistant_saved_session_gen-ag04231844-8e552a",
|
||||
"suite_version": "0.1.0",
|
||||
"schema_version": "assistant_saved_session_suite_v0_1",
|
||||
"generated_at": "2026-04-23T18:44:25+00:00",
|
||||
"generation_id": "gen-ag04231844-8e552a",
|
||||
"mode": "saved_user_sessions",
|
||||
"title": "AGENT | ARCH: Post-F Semantic Integrity Hardening | Смешанный живой диалог: repeated pivots, орг-срез и СВК",
|
||||
"domain": "address_phase82_human_mixed_integrity_status_dialog",
|
||||
"scenario_count": 1,
|
||||
"case_ids": [
|
||||
"SAVED-001"
|
||||
],
|
||||
"cases": [
|
||||
{
|
||||
"case_id": "SAVED-001",
|
||||
"scenario_tag": "agent_saved_user_sessions",
|
||||
"title": "AGENT | ARCH: Post-F Semantic Integrity Hardening | Смешанный живой диалог: repeated pivots, орг-срез и СВК",
|
||||
"question_type": "followup",
|
||||
"broadness_level": "medium",
|
||||
"turns": [
|
||||
{
|
||||
"user_message": "Покажи документы по Жуковке 51."
|
||||
},
|
||||
{
|
||||
"user_message": "Хорошо, а теперь платежи по нему тоже покажи."
|
||||
},
|
||||
{
|
||||
"user_message": "А по нему договоры?"
|
||||
},
|
||||
{
|
||||
"user_message": "А по нему документы?"
|
||||
},
|
||||
{
|
||||
"user_message": "А по нему платежи?"
|
||||
},
|
||||
{
|
||||
"user_message": "А за 2021?"
|
||||
},
|
||||
{
|
||||
"user_message": "С Жуковкой закончили. Теперь нужна другая задача: быстрый денежный срез по одной организации. Если для ответа нужна организация, просто уточни ее. Сколько вообще входящих денег было за 2020 год?"
|
||||
},
|
||||
{
|
||||
"user_message": "По ООО Альтернатива Плюс."
|
||||
},
|
||||
{
|
||||
"user_message": "Понял, тогда за все время."
|
||||
},
|
||||
{
|
||||
"user_message": "Хорошо. А что по ООО Альтернатива Плюс больше в 2020 году: входящие или исходящие деньги?"
|
||||
},
|
||||
{
|
||||
"user_message": "А что по ООО Альтернатива Плюс больше уже за 2021 год: входящие или исходящие деньги?"
|
||||
},
|
||||
{
|
||||
"user_message": "И кто больше всего принес денег этой организации в 2020 году?"
|
||||
},
|
||||
{
|
||||
"user_message": "А в 2021 году?"
|
||||
},
|
||||
{
|
||||
"user_message": "Теперь отдельная тема по конкретному контрагенту. Найди в 1С Группу СВК."
|
||||
},
|
||||
{
|
||||
"user_message": "Сколько получили по нему за 2020 год?"
|
||||
},
|
||||
{
|
||||
"user_message": "А теперь сколько заплатили?"
|
||||
},
|
||||
{
|
||||
"user_message": "А какое нетто?"
|
||||
},
|
||||
{
|
||||
"user_message": "А по документам?"
|
||||
},
|
||||
{
|
||||
"user_message": "А по движениям?"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -0,0 +1,40 @@
|
|||
{
|
||||
"suite_id": "assistant_saved_session_gen-ag05100949-2a5ed8",
|
||||
"suite_version": "0.1.0",
|
||||
"schema_version": "assistant_saved_session_suite_v0_1",
|
||||
"generated_at": "2026-05-10T09:49:21+00:00",
|
||||
"generation_id": "gen-ag05100949-2a5ed8",
|
||||
"mode": "saved_user_sessions",
|
||||
"title": "AGENT | Phase 91 route-candidate-driven enablement loop canary",
|
||||
"domain": "address_phase91_route_candidate_driven_enablement_loop",
|
||||
"scenario_count": 1,
|
||||
"case_ids": [
|
||||
"SAVED-001"
|
||||
],
|
||||
"cases": [
|
||||
{
|
||||
"case_id": "SAVED-001",
|
||||
"scenario_tag": "agent_saved_user_sessions",
|
||||
"title": "AGENT | Phase 91 route-candidate-driven enablement loop canary",
|
||||
"question_type": "followup",
|
||||
"broadness_level": "medium",
|
||||
"turns": [
|
||||
{
|
||||
"user_message": "какой контрагент принес больше всего денег за 2020 год?"
|
||||
},
|
||||
{
|
||||
"user_message": "ООО Альтернатива Плюс"
|
||||
},
|
||||
{
|
||||
"user_message": "а теперь сколько лет мы работаем с Группа СВК?"
|
||||
},
|
||||
{
|
||||
"user_message": "покажи движения по НДС за 2020 по ООО Альтернатива Плюс"
|
||||
},
|
||||
{
|
||||
"user_message": "можно ли по этим данным точно подтвердить резерв под неликвиды на складе?"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -0,0 +1,43 @@
|
|||
{
|
||||
"suite_id": "assistant_saved_session_gen-ag05101213-596d99",
|
||||
"suite_version": "0.1.0",
|
||||
"schema_version": "assistant_saved_session_suite_v0_1",
|
||||
"generated_at": "2026-05-10T12:13:26+00:00",
|
||||
"generation_id": "gen-ag05101213-596d99",
|
||||
"mode": "saved_user_sessions",
|
||||
"title": "AGENT | Phase 93 accounting profit-margin reviewed route",
|
||||
"domain": "address_phase93_accounting_profit_margin_reviewed_route",
|
||||
"scenario_count": 1,
|
||||
"case_ids": [
|
||||
"SAVED-001"
|
||||
],
|
||||
"cases": [
|
||||
{
|
||||
"case_id": "SAVED-001",
|
||||
"scenario_tag": "agent_saved_user_sessions",
|
||||
"title": "AGENT | Phase 93 accounting profit-margin reviewed route",
|
||||
"question_type": "followup",
|
||||
"broadness_level": "medium",
|
||||
"turns": [
|
||||
{
|
||||
"user_message": "по ООО Альтернатива Плюс за 2020 можно точно сказать чистую прибыль и маржу?"
|
||||
},
|
||||
{
|
||||
"user_message": "а это прибыль или убыток, коротко?"
|
||||
},
|
||||
{
|
||||
"user_message": "по этой же компании на конец 2020 можно точно понять, какая дебиторка просрочена?"
|
||||
},
|
||||
{
|
||||
"user_message": "тогда НДС за 2020 по ООО Альтернатива Плюс какой?"
|
||||
},
|
||||
{
|
||||
"user_message": "а какой контрагент принес больше всего денег за 2020?"
|
||||
},
|
||||
{
|
||||
"user_message": "по ООО Альтернатива Плюс на конец 2020 можно точно подтвердить резерв под неликвиды на складе?"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -0,0 +1,46 @@
|
|||
{
|
||||
"suite_id": "assistant_saved_session_gen-ag05101319-c04f79",
|
||||
"suite_version": "0.1.0",
|
||||
"schema_version": "assistant_saved_session_suite_v0_1",
|
||||
"generated_at": "2026-05-10T13:19:22+00:00",
|
||||
"generation_id": "gen-ag05101319-c04f79",
|
||||
"mode": "saved_user_sessions",
|
||||
"title": "AGENT | Phase 94 debt due-date aging reviewed route",
|
||||
"domain": "address_phase94_debt_due_date_aging_reviewed_route",
|
||||
"scenario_count": 1,
|
||||
"case_ids": [
|
||||
"SAVED-001"
|
||||
],
|
||||
"cases": [
|
||||
{
|
||||
"case_id": "SAVED-001",
|
||||
"scenario_tag": "agent_saved_user_sessions",
|
||||
"title": "AGENT | Phase 94 debt due-date aging reviewed route",
|
||||
"question_type": "followup",
|
||||
"broadness_level": "medium",
|
||||
"turns": [
|
||||
{
|
||||
"user_message": "по ООО Альтернатива Плюс на конец 2020 можно точно понять, какая дебиторка просрочена?"
|
||||
},
|
||||
{
|
||||
"user_message": "то есть просрочку доказать нельзя, коротко почему?"
|
||||
},
|
||||
{
|
||||
"user_message": "а чистая прибыль и маржа за 2020 по этой же компании какие?"
|
||||
},
|
||||
{
|
||||
"user_message": "НДС за 2020 по ООО Альтернатива Плюс какой?"
|
||||
},
|
||||
{
|
||||
"user_message": "а кто принес больше всего денег за 2020?"
|
||||
},
|
||||
{
|
||||
"user_message": "по ООО Альтернатива Плюс на конец 2020 можно точно подтвердить резерв под неликвиды на складе?"
|
||||
},
|
||||
{
|
||||
"user_message": "а зависимость от одного поставщика за 2020 можно точно оценить?"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -427,6 +427,13 @@ def classify_truth_harness_step(spec_path: Path, spec: dict[str, Any], step: dic
|
|||
expected_catalog_alignment_status = str(step.get("expected_catalog_alignment_status") or "").strip()
|
||||
expected_catalog_chain_top_match = str(step.get("expected_catalog_chain_top_match") or "").strip()
|
||||
expected_catalog_selected_matches_top = step.get("expected_catalog_selected_matches_top")
|
||||
expected_route_candidate_status = str(step.get("expected_route_candidate_status") or "").strip()
|
||||
expected_route_candidate_executable_now = step.get("expected_route_candidate_executable_now")
|
||||
expected_route_candidate_missing_axes = [
|
||||
str(item).strip()
|
||||
for item in (step.get("expected_route_candidate_missing_axes") or [])
|
||||
if str(item).strip()
|
||||
]
|
||||
semantic_tag_inputs = [*step.get("semantic_tags", []), *_base_step_tags(question, step_id, title, expected_intents)]
|
||||
if (
|
||||
expected_catalog_alignment_status
|
||||
|
|
@ -434,6 +441,12 @@ def classify_truth_harness_step(spec_path: Path, spec: dict[str, Any], step: dic
|
|||
or expected_catalog_selected_matches_top is not None
|
||||
):
|
||||
semantic_tag_inputs.append("planner_catalog_alignment")
|
||||
if (
|
||||
expected_route_candidate_status
|
||||
or expected_route_candidate_executable_now is not None
|
||||
or expected_route_candidate_missing_axes
|
||||
):
|
||||
semantic_tag_inputs.append("route_candidate_handoff")
|
||||
semantic_tags = _normalize_tags(semantic_tag_inputs)
|
||||
return {
|
||||
"entry_id": f"{spec_path.stem}:{step_id}",
|
||||
|
|
@ -451,6 +464,9 @@ def classify_truth_harness_step(spec_path: Path, spec: dict[str, Any], step: dic
|
|||
"expected_catalog_alignment_status": expected_catalog_alignment_status or None,
|
||||
"expected_catalog_chain_top_match": expected_catalog_chain_top_match or None,
|
||||
"expected_catalog_selected_matches_top": expected_catalog_selected_matches_top,
|
||||
"expected_route_candidate_status": expected_route_candidate_status or None,
|
||||
"expected_route_candidate_executable_now": expected_route_candidate_executable_now,
|
||||
"expected_route_candidate_missing_axes": expected_route_candidate_missing_axes,
|
||||
"semantic_tags": semantic_tags,
|
||||
"step_payload": step,
|
||||
}
|
||||
|
|
@ -543,8 +559,16 @@ def _catalog_markdown(catalog: dict[str, Any]) -> str:
|
|||
if entry.get("expected_catalog_selected_matches_top") is not None:
|
||||
catalog_bits.append(f"selected_matches_top={entry.get('expected_catalog_selected_matches_top')}")
|
||||
catalog_suffix = f" | catalog_alignment: {', '.join(catalog_bits)}" if catalog_bits else ""
|
||||
route_bits = []
|
||||
if entry.get("expected_route_candidate_status"):
|
||||
route_bits.append(f"status={entry.get('expected_route_candidate_status')}")
|
||||
if entry.get("expected_route_candidate_executable_now") is not None:
|
||||
route_bits.append(f"executable={entry.get('expected_route_candidate_executable_now')}")
|
||||
if entry.get("expected_route_candidate_missing_axes"):
|
||||
route_bits.append(f"missing={','.join(entry.get('expected_route_candidate_missing_axes') or [])}")
|
||||
route_suffix = f" | route_candidate: {', '.join(route_bits)}" if route_bits else ""
|
||||
lines.append(
|
||||
f"- `{entry.get('entry_id')}` | tags: {tags}{catalog_suffix} | question: {entry.get('question')}"
|
||||
f"- `{entry.get('entry_id')}` | tags: {tags}{catalog_suffix}{route_suffix} | question: {entry.get('question')}"
|
||||
)
|
||||
lines.extend(["", "## Saved session questions"])
|
||||
for entry in catalog.get("saved_session_entries") or []:
|
||||
|
|
|
|||
|
|
@ -122,11 +122,16 @@ BUSINESS_TOP_LINE_SCAFFOLD_MARKERS = (
|
|||
"\u0441\u0442\u0430\u0442\u0443\u0441",
|
||||
)
|
||||
BUSINESS_TECHNICAL_GARBAGE_MARKERS = (
|
||||
"mcp-срез",
|
||||
"лимит выборки mcp",
|
||||
"через mcp",
|
||||
"mcp-провер",
|
||||
"mcp_discovery",
|
||||
"runtime_",
|
||||
"capability_id",
|
||||
"selected_chain_id",
|
||||
"business_overview_route_template_v1",
|
||||
"probe ",
|
||||
"query_movements",
|
||||
"query_documents",
|
||||
)
|
||||
|
|
@ -191,6 +196,7 @@ REPAIR_TARGET_PROBLEM_ORDER = {
|
|||
"followup_action_resolution_gap": 2,
|
||||
"object_memory_gap": 3,
|
||||
"route_gap": 4,
|
||||
"route_candidate_enablement_gap": 4,
|
||||
"answer_shape_mismatch": 5,
|
||||
"field_mapping_gap": 5,
|
||||
"business_utility_gap": 6,
|
||||
|
|
@ -229,6 +235,13 @@ REPAIR_TARGET_FILE_HINTS: dict[str, list[str]] = {
|
|||
"llm_normalizer/backend/src/services/address_runtime/decomposeStage.ts",
|
||||
"llm_normalizer/backend/src/services/assistantService.ts",
|
||||
],
|
||||
"route_candidate_enablement_gap": [
|
||||
"llm_normalizer/backend/src/services/assistantMcpDiscoveryRuntimeBridge.ts",
|
||||
"llm_normalizer/backend/src/services/assistantMcpDiscoveryPlanner.ts",
|
||||
"llm_normalizer/backend/src/services/assistantMcpDiscoveryPilotExecutor.ts",
|
||||
"llm_normalizer/backend/src/services/assistantMcpDiscoveryTurnInputAdapter.ts",
|
||||
"llm_normalizer/backend/src/services/addressRecipeCatalog.ts",
|
||||
],
|
||||
"capability_gap": [
|
||||
"llm_normalizer/backend/src/services/addressCapabilityPolicy.ts",
|
||||
"llm_normalizer/backend/src/services/addressRecipeCatalog.ts",
|
||||
|
|
@ -2313,6 +2326,21 @@ def build_scenario_step_state(
|
|||
"mcp_discovery_catalog_chain_alignment_status": debug.get("mcp_discovery_catalog_chain_alignment_status"),
|
||||
"mcp_discovery_catalog_chain_top_match": debug.get("mcp_discovery_catalog_chain_top_match"),
|
||||
"mcp_discovery_catalog_chain_selected_matches_top": debug.get("mcp_discovery_catalog_chain_selected_matches_top"),
|
||||
"mcp_discovery_route_candidate_status": debug.get("mcp_discovery_route_candidate_status"),
|
||||
"mcp_discovery_route_candidate_fact_family": debug.get("mcp_discovery_route_candidate_fact_family"),
|
||||
"mcp_discovery_route_candidate_action_family": debug.get("mcp_discovery_route_candidate_action_family"),
|
||||
"mcp_discovery_route_candidate_proof_expectation": debug.get("mcp_discovery_route_candidate_proof_expectation"),
|
||||
"mcp_discovery_route_candidate_missing_axes": normalize_string_list(
|
||||
debug.get("mcp_discovery_route_candidate_missing_axes")
|
||||
),
|
||||
"mcp_discovery_route_candidate_provided_axes": normalize_string_list(
|
||||
debug.get("mcp_discovery_route_candidate_provided_axes")
|
||||
),
|
||||
"mcp_discovery_route_candidate_executable_now": debug.get("mcp_discovery_route_candidate_executable_now"),
|
||||
"mcp_discovery_route_candidate_enablement_reason": debug.get(
|
||||
"mcp_discovery_route_candidate_enablement_reason"
|
||||
),
|
||||
"mcp_discovery_route_candidate_next_action": debug.get("mcp_discovery_route_candidate_next_action"),
|
||||
"mcp_discovery_response_applied": debug.get("mcp_discovery_response_applied"),
|
||||
"mcp_discovery_selected_chain_id": debug.get("mcp_discovery_selected_chain_id"),
|
||||
"mcp_discovery_response_candidate_status": (
|
||||
|
|
@ -3338,6 +3366,17 @@ def compact_step_output_for_review(step_output: Any) -> dict[str, Any]:
|
|||
"mcp_discovery_response_applied": step_output.get("mcp_discovery_response_applied"),
|
||||
"mcp_discovery_selected_chain_id": step_output.get("mcp_discovery_selected_chain_id"),
|
||||
"mcp_discovery_response_candidate_status": step_output.get("mcp_discovery_response_candidate_status"),
|
||||
"mcp_discovery_route_candidate_status": step_output.get("mcp_discovery_route_candidate_status"),
|
||||
"mcp_discovery_route_candidate_fact_family": step_output.get("mcp_discovery_route_candidate_fact_family"),
|
||||
"mcp_discovery_route_candidate_action_family": step_output.get("mcp_discovery_route_candidate_action_family"),
|
||||
"mcp_discovery_route_candidate_missing_axes": step_output.get("mcp_discovery_route_candidate_missing_axes"),
|
||||
"mcp_discovery_route_candidate_executable_now": step_output.get(
|
||||
"mcp_discovery_route_candidate_executable_now"
|
||||
),
|
||||
"mcp_discovery_route_candidate_enablement_reason": step_output.get(
|
||||
"mcp_discovery_route_candidate_enablement_reason"
|
||||
),
|
||||
"mcp_discovery_route_candidate_next_action": step_output.get("mcp_discovery_route_candidate_next_action"),
|
||||
"mcp_discovery_effective_intents": step_output.get("mcp_discovery_effective_intents"),
|
||||
"failure_type": step_output.get("failure_type"),
|
||||
"error_message": step_output.get("error_message"),
|
||||
|
|
@ -3368,6 +3407,7 @@ def collect_pack_scenario_artifacts(pack_dir: Path) -> list[dict[str, Any]]:
|
|||
def derive_repair_target_severity(step_output: dict[str, Any]) -> str:
|
||||
if bool(step_output.get("hard_fail")):
|
||||
return "P0"
|
||||
route_candidate_status = str(step_output.get("mcp_discovery_route_candidate_status") or "").strip()
|
||||
violated_invariants = normalize_string_list(step_output.get("violated_invariants"))
|
||||
if any(derive_invariant_severity(step_output, code) == "P0" for code in violated_invariants):
|
||||
return "P0"
|
||||
|
|
@ -3376,6 +3416,8 @@ def derive_repair_target_severity(step_output: dict[str, Any]) -> str:
|
|||
reply_type = str(step_output.get("reply_type") or "").strip()
|
||||
if execution_status == "blocked":
|
||||
return "P0"
|
||||
if route_candidate_status == "needs_route_enablement":
|
||||
return "P1"
|
||||
if acceptance_status in {"rejected", "needs_exact_capability"}:
|
||||
return "P1"
|
||||
if execution_status in {"partial", "needs_exact_capability"} or reply_type == "partial_coverage":
|
||||
|
|
@ -3395,6 +3437,7 @@ def derive_repair_problem_type(step_output: dict[str, Any]) -> str:
|
|||
reply_type = str(step_output.get("reply_type") or "").strip()
|
||||
fallback_type = str(step_output.get("fallback_type") or "").strip()
|
||||
mcp_call_status = str(step_output.get("mcp_call_status") or "").strip()
|
||||
route_candidate_status = str(step_output.get("mcp_discovery_route_candidate_status") or "").strip()
|
||||
|
||||
if "wrong_followup_action" in violated:
|
||||
return "followup_action_resolution_gap"
|
||||
|
|
@ -3421,6 +3464,8 @@ def derive_repair_problem_type(step_output: dict[str, Any]) -> str:
|
|||
return "presentation_gap"
|
||||
if mcp_call_status == "materialized_but_not_anchor_matched":
|
||||
return "domain_anchor_gap"
|
||||
if route_candidate_status == "needs_route_enablement":
|
||||
return "route_candidate_enablement_gap"
|
||||
if acceptance_status == "needs_exact_capability" or execution_status == "needs_exact_capability":
|
||||
return "capability_gap"
|
||||
if reply_type in {"partial_coverage", "clarification_required", "route_mismatch_blocked"} or fallback_type == "partial":
|
||||
|
|
@ -3447,6 +3492,9 @@ def derive_repair_root_cause_layers(step_output: dict[str, Any], problem_type: s
|
|||
layers.append("edge_carryover_gap")
|
||||
elif problem_type == "route_gap":
|
||||
layers.append("semantic_understanding_gap")
|
||||
elif problem_type == "route_candidate_enablement_gap":
|
||||
layers.append("runtime_capability_gap")
|
||||
layers.append("route_candidate_enablement_gap")
|
||||
elif problem_type == "capability_gap":
|
||||
layers.append("runtime_capability_gap")
|
||||
elif problem_type == "presentation_gap":
|
||||
|
|
@ -3481,6 +3529,8 @@ def build_repair_fix_goal(step_output: dict[str, Any], problem_type: str) -> str
|
|||
return f"Keep `{question}` on the requested historical date/period and separate exact-window evidence from nearest available out-of-window evidence."
|
||||
if problem_type == "route_gap":
|
||||
return f"Keep `{question}` on the expected exact route/capability instead of letting wording drift into a different semantic lane."
|
||||
if problem_type == "route_candidate_enablement_gap":
|
||||
return f"Review and enable the route candidate for `{question}` instead of leaving the understood business ask as a non-executable handoff."
|
||||
if problem_type == "capability_gap":
|
||||
return f"Enable an exact route for `{question}` so the loop no longer falls back to partial or unsupported behavior."
|
||||
if problem_type == "presentation_gap":
|
||||
|
|
@ -3496,6 +3546,96 @@ def build_repair_fix_goal(step_output: dict[str, Any], problem_type: str) -> str
|
|||
return f"Improve `{question}` with the smallest patch that removes the current acceptance failure without architecture drift."
|
||||
|
||||
|
||||
def route_candidate_requires_enablement(step_output: dict[str, Any]) -> bool:
|
||||
return str(step_output.get("mcp_discovery_route_candidate_status") or "").strip() == "needs_route_enablement"
|
||||
|
||||
|
||||
def compact_route_candidate_handoff(
|
||||
*,
|
||||
scenario_id: str,
|
||||
step_id: str,
|
||||
step_output: dict[str, Any],
|
||||
) -> dict[str, Any] | None:
|
||||
status = str(step_output.get("mcp_discovery_route_candidate_status") or "").strip()
|
||||
if not status:
|
||||
return None
|
||||
return {
|
||||
"scenario_id": scenario_id,
|
||||
"step_id": step_id,
|
||||
"target_id": f"{scenario_id}:{step_id}",
|
||||
"question_resolved": str(step_output.get("question_resolved") or "").strip() or None,
|
||||
"candidate_status": status,
|
||||
"selected_chain_id": str(step_output.get("mcp_discovery_selected_chain_id") or "").strip() or None,
|
||||
"fact_family": str(step_output.get("mcp_discovery_route_candidate_fact_family") or "").strip() or None,
|
||||
"action_family": str(step_output.get("mcp_discovery_route_candidate_action_family") or "").strip() or None,
|
||||
"missing_axes": normalize_string_list(step_output.get("mcp_discovery_route_candidate_missing_axes")),
|
||||
"executable_now": step_output.get("mcp_discovery_route_candidate_executable_now") is True,
|
||||
"enablement_reason": str(step_output.get("mcp_discovery_route_candidate_enablement_reason") or "").strip()
|
||||
or None,
|
||||
"next_action": str(step_output.get("mcp_discovery_route_candidate_next_action") or "").strip() or None,
|
||||
}
|
||||
|
||||
|
||||
def build_route_candidate_focus_signature(candidate: dict[str, Any]) -> str:
|
||||
status = str(candidate.get("candidate_status") or "unknown").strip() or "unknown"
|
||||
selected_chain_id = str(candidate.get("selected_chain_id") or "no_chain").strip() or "no_chain"
|
||||
missing_axes = ",".join(normalize_string_list(candidate.get("missing_axes"))) or "no_missing_axes"
|
||||
return f"{status}|{selected_chain_id}|{missing_axes}"
|
||||
|
||||
|
||||
def build_route_candidate_handoff_groups(candidates: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
||||
grouped: dict[str, dict[str, Any]] = {}
|
||||
for candidate in candidates:
|
||||
signature = build_route_candidate_focus_signature(candidate)
|
||||
group = grouped.setdefault(
|
||||
signature,
|
||||
{
|
||||
"group_id": signature,
|
||||
"candidate_status": candidate.get("candidate_status"),
|
||||
"selected_chain_id": candidate.get("selected_chain_id"),
|
||||
"fact_family": candidate.get("fact_family"),
|
||||
"action_family": candidate.get("action_family"),
|
||||
"missing_axes": normalize_string_list(candidate.get("missing_axes")),
|
||||
"executable_now": candidate.get("executable_now") is True,
|
||||
"target_ids": [],
|
||||
"scenario_ids": set(),
|
||||
"sample_questions": [],
|
||||
"next_actions": [],
|
||||
"enablement_reasons": [],
|
||||
},
|
||||
)
|
||||
target_id = str(candidate.get("target_id") or "").strip()
|
||||
if target_id:
|
||||
group["target_ids"].append(target_id)
|
||||
scenario_id = str(candidate.get("scenario_id") or "").strip()
|
||||
if scenario_id:
|
||||
group["scenario_ids"].add(scenario_id)
|
||||
question = str(candidate.get("question_resolved") or "").strip()
|
||||
if question and len(group["sample_questions"]) < 3:
|
||||
group["sample_questions"].append(question)
|
||||
next_action = str(candidate.get("next_action") or "").strip()
|
||||
if next_action and next_action not in group["next_actions"]:
|
||||
group["next_actions"].append(next_action)
|
||||
enablement_reason = str(candidate.get("enablement_reason") or "").strip()
|
||||
if enablement_reason and enablement_reason not in group["enablement_reasons"]:
|
||||
group["enablement_reasons"].append(enablement_reason)
|
||||
|
||||
result: list[dict[str, Any]] = []
|
||||
for group in grouped.values():
|
||||
scenario_ids = sorted(group.pop("scenario_ids"))
|
||||
group["scenario_ids"] = scenario_ids
|
||||
group["candidate_count"] = len(group.get("target_ids") or [])
|
||||
result.append(group)
|
||||
result.sort(
|
||||
key=lambda item: (
|
||||
0 if str(item.get("candidate_status") or "") == "needs_route_enablement" else 1,
|
||||
-int(item.get("candidate_count") or 0),
|
||||
str(item.get("group_id") or ""),
|
||||
)
|
||||
)
|
||||
return result
|
||||
|
||||
|
||||
def build_step_repair_target(
|
||||
*,
|
||||
scenario_id: str,
|
||||
|
|
@ -3508,7 +3648,12 @@ def build_step_repair_target(
|
|||
execution_status = str(step_output.get("execution_status") or "").strip() or "unknown"
|
||||
violated_invariants = normalize_string_list(step_output.get("violated_invariants"))
|
||||
warnings = normalize_string_list(step_output.get("warnings"))
|
||||
if acceptance_status in {"validated", "accepted"} and not violated_invariants and not warnings:
|
||||
if (
|
||||
acceptance_status in {"validated", "accepted"}
|
||||
and not violated_invariants
|
||||
and not warnings
|
||||
and not route_candidate_requires_enablement(step_output)
|
||||
):
|
||||
return None
|
||||
|
||||
problem_type = derive_repair_problem_type(step_output)
|
||||
|
|
@ -3524,8 +3669,17 @@ def build_step_repair_target(
|
|||
signals.append(f"violation={violation}")
|
||||
for warning in warnings[:3]:
|
||||
signals.append(f"warning={warning}")
|
||||
route_candidate_status = str(step_output.get("mcp_discovery_route_candidate_status") or "").strip()
|
||||
if route_candidate_status:
|
||||
signals.append(f"route_candidate_status={route_candidate_status}")
|
||||
selected_chain_id = str(step_output.get("mcp_discovery_selected_chain_id") or "").strip()
|
||||
if selected_chain_id:
|
||||
signals.append(f"selected_chain_id={selected_chain_id}")
|
||||
missing_axes = normalize_string_list(step_output.get("mcp_discovery_route_candidate_missing_axes"))
|
||||
if missing_axes:
|
||||
signals.append(f"route_candidate_missing_axes={','.join(missing_axes)}")
|
||||
|
||||
return {
|
||||
target = {
|
||||
"target_id": f"{scenario_id}:{step_id}",
|
||||
"scenario_id": scenario_id,
|
||||
"scenario_title": scenario_title,
|
||||
|
|
@ -3546,6 +3700,12 @@ def build_step_repair_target(
|
|||
"step_state_json": str(step_state_path),
|
||||
},
|
||||
}
|
||||
route_candidate = compact_route_candidate_handoff(scenario_id=scenario_id, step_id=step_id, step_output=step_output)
|
||||
if route_candidate:
|
||||
target["route_candidate"] = route_candidate
|
||||
if route_candidate_status == "needs_route_enablement":
|
||||
target["target_source"] = "route_candidate_enablement"
|
||||
return target
|
||||
|
||||
|
||||
def build_repair_focus_signature(target: dict[str, Any]) -> str:
|
||||
|
|
@ -3610,6 +3770,7 @@ def build_deterministic_repair_targets(
|
|||
scenario_artifacts: list[dict[str, Any]],
|
||||
) -> dict[str, Any]:
|
||||
targets: list[dict[str, Any]] = []
|
||||
route_candidates: list[dict[str, Any]] = []
|
||||
step_validation_index: dict[str, dict[str, Any]] = {}
|
||||
for scenario_artifact in scenario_artifacts:
|
||||
scenario_id = str(scenario_artifact.get("scenario_id") or "").strip()
|
||||
|
|
@ -3626,6 +3787,13 @@ def build_deterministic_repair_targets(
|
|||
continue
|
||||
step_key = f"{scenario_id}:{step_id}"
|
||||
step_validation_index[step_key] = compact_step_output_for_review(raw_step_output)
|
||||
route_candidate = compact_route_candidate_handoff(
|
||||
scenario_id=scenario_id,
|
||||
step_id=str(step_id),
|
||||
step_output=raw_step_output,
|
||||
)
|
||||
if route_candidate:
|
||||
route_candidates.append(route_candidate)
|
||||
target = build_step_repair_target(
|
||||
scenario_id=scenario_id,
|
||||
scenario_title=scenario_title,
|
||||
|
|
@ -3675,6 +3843,11 @@ def build_deterministic_repair_targets(
|
|||
severity = str(target.get("severity") or "P2")
|
||||
if severity in severity_counts:
|
||||
severity_counts[severity] += 1
|
||||
route_candidate_groups = build_route_candidate_handoff_groups(route_candidates)
|
||||
route_candidate_status_counts: dict[str, int] = {}
|
||||
for candidate in route_candidates:
|
||||
status = str(candidate.get("candidate_status") or "unknown").strip() or "unknown"
|
||||
route_candidate_status_counts[status] = route_candidate_status_counts.get(status, 0) + 1
|
||||
return {
|
||||
"schema_version": "domain_pack_repair_targets_v1",
|
||||
"pack_id": pack_state.get("pack_id"),
|
||||
|
|
@ -3685,6 +3858,9 @@ def build_deterministic_repair_targets(
|
|||
"priority_foci": priority_foci,
|
||||
"targets": targets,
|
||||
"step_validation_index": step_validation_index,
|
||||
"route_candidate_handoff_count": len(route_candidates),
|
||||
"route_candidate_status_counts": route_candidate_status_counts,
|
||||
"route_candidate_groups": route_candidate_groups,
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -3959,7 +4135,26 @@ def build_repair_targets_summary(repair_targets: dict[str, Any]) -> str:
|
|||
f"- analyst_priority_target_count: `{repair_targets.get('analyst_priority_target_count') if 'analyst_priority_target_count' in repair_targets else 'n/a'}`",
|
||||
f"- suppressed_analyst_priority_target_count: `{repair_targets.get('suppressed_analyst_priority_target_count') if 'suppressed_analyst_priority_target_count' in repair_targets else 0}`",
|
||||
f"- severity_counts: `{dump_json(repair_targets.get('severity_counts') or {})}`",
|
||||
f"- route_candidate_handoff_count: `{repair_targets.get('route_candidate_handoff_count') if 'route_candidate_handoff_count' in repair_targets else 0}`",
|
||||
f"- route_candidate_status_counts: `{dump_json(repair_targets.get('route_candidate_status_counts') or {})}`",
|
||||
]
|
||||
route_candidate_groups = repair_targets.get("route_candidate_groups") or []
|
||||
if isinstance(route_candidate_groups, list) and route_candidate_groups:
|
||||
lines.extend(["", "## Route Candidate Groups"])
|
||||
for group in route_candidate_groups:
|
||||
if not isinstance(group, dict):
|
||||
continue
|
||||
lines.extend(
|
||||
[
|
||||
f"- group_id: `{group.get('group_id') or 'n/a'}`",
|
||||
f" candidate_count: `{group.get('candidate_count') or 0}`",
|
||||
f" status: `{group.get('candidate_status') or 'n/a'}`",
|
||||
f" selected_chain_id: `{group.get('selected_chain_id') or 'n/a'}`",
|
||||
f" missing_axes: `{', '.join(normalize_string_list(group.get('missing_axes'))) or 'n/a'}`",
|
||||
f" executable_now: `{group.get('executable_now')}`",
|
||||
f" target_ids: `{', '.join(normalize_string_list(group.get('target_ids'))) or 'n/a'}`",
|
||||
]
|
||||
)
|
||||
priority_foci = repair_targets.get("priority_foci") or []
|
||||
if isinstance(priority_foci, list) and priority_foci:
|
||||
lines.extend(
|
||||
|
|
@ -4155,6 +4350,7 @@ def build_analyst_loop_prompt(
|
|||
- `accepted` is forbidden if the evidence bundle shows `pack_state.final_status != accepted` or the deterministic repair targets still contain any `P0` or `P1` items;
|
||||
- `accepted` also requires `direct_answer_ok = true`, `business_usefulness_ok = true`, `temporal_honesty_ok = true`, and `field_truth_ok = true`;
|
||||
- Treat validated bounded MCP discovery as the semantic route when `bounded_mcp_answer_validated = true`, `mcp_discovery_response_applied = true`, `mcp_discovery_response_candidate_status = ready_for_guarded_use`, and `mcp_discovery_effective_intents` matches the business question. The legacy address route may be only a seed lane; do not call that silent heuristic masking unless the selected discovery chain is wrong, not ready, not applied, or the user-facing answer/state is semantically wrong.
|
||||
- Use `mcp_discovery_route_candidate_status`, `mcp_discovery_route_candidate_missing_axes`, and `mcp_discovery_route_candidate_enablement_reason` to distinguish a valid user-scope clarification from a real missing reviewed route. Do not ask the coder to overfit the visible answer when the correct next action is route enablement or missing-axis clarification.
|
||||
- Treat `guarded_insufficiency_validated = true` as an acceptable limited answer only when the user-facing text explicitly says the exact fact is not confirmed and separates movement/candidate evidence from confirmed balances.
|
||||
- Before creating a priority target about missing validation, inspect the compact step state flags `bounded_mcp_answer_validated`, `memory_checkpoint_validated`, `runtime_factual_answer_validated`, and `guarded_insufficiency_validated`; do not repeat a stale target that contradicts those machine-readable flags.
|
||||
- Conversely, if a step is accepted only by first-line shape while `bounded_mcp_answer_validated`, `memory_checkpoint_validated`, `runtime_factual_answer_validated`, and `guarded_insufficiency_validated` are all false, treat that as potential silent heuristic or partial masking.
|
||||
|
|
@ -4414,6 +4610,15 @@ def build_lead_coder_handoff(
|
|||
assigned_focus = select_primary_repair_focus(repair_targets)
|
||||
priority_foci = _limited_dict_items(repair_targets.get("priority_foci") if isinstance(repair_targets, dict) else [])
|
||||
repair_items = _limited_dict_items(repair_targets.get("targets") if isinstance(repair_targets, dict) else [], limit=8)
|
||||
route_candidate_groups = _limited_dict_items(
|
||||
repair_targets.get("route_candidate_groups") if isinstance(repair_targets, dict) else [],
|
||||
limit=8,
|
||||
)
|
||||
route_candidate_enablement_targets = [
|
||||
item
|
||||
for item in repair_items
|
||||
if isinstance(item, dict) and str(item.get("target_source") or "") == "route_candidate_enablement"
|
||||
]
|
||||
candidate_files = [repo_relative(path) for path in build_coder_snapshot_paths(repair_targets)]
|
||||
return {
|
||||
"schema_version": "domain_loop_lead_coder_handoff_v1",
|
||||
|
|
@ -4453,10 +4658,13 @@ def build_lead_coder_handoff(
|
|||
"assigned_primary_focus": assigned_focus or None,
|
||||
"priority_foci": priority_foci,
|
||||
"top_repair_targets": repair_items,
|
||||
"route_candidate_groups": route_candidate_groups,
|
||||
"route_candidate_enablement_targets": route_candidate_enablement_targets,
|
||||
"candidate_files": candidate_files,
|
||||
"lead_instructions": [
|
||||
"Read business_audit.md first and judge the user-facing answer before debug metadata.",
|
||||
"Inspect analyst_verdict.json and repair_targets.json only after the semantic defect is clear.",
|
||||
"Use route_candidate_groups to distinguish missing user scope from a reviewed-route enablement gap before patching.",
|
||||
"Patch code manually in the main Codex context; do not launch a weak autonomous coder by default.",
|
||||
"Keep the patch narrow, preserve UTF-8 without BOM, run targeted tests/build, rebuild graphify after code edits, then rerun the same semantic pack.",
|
||||
],
|
||||
|
|
@ -4523,6 +4731,46 @@ def build_lead_coder_handoff_markdown(handoff: dict[str, Any]) -> str:
|
|||
f" candidate_files: `{', '.join(candidate_files) if candidate_files else 'n/a'}`",
|
||||
]
|
||||
)
|
||||
route_candidate_groups = _limited_dict_items(handoff.get("route_candidate_groups"), limit=8)
|
||||
lines.extend(["", "## Route Candidate Handoff Groups"])
|
||||
if not route_candidate_groups:
|
||||
lines.append("- no route candidates")
|
||||
for group in route_candidate_groups:
|
||||
target_ids = normalize_string_list(group.get("target_ids"))
|
||||
missing_axes = normalize_string_list(group.get("missing_axes"))
|
||||
sample_questions = normalize_string_list(group.get("sample_questions"))
|
||||
next_actions = normalize_string_list(group.get("next_actions"))
|
||||
lines.extend(
|
||||
[
|
||||
f"- group_id: `{group.get('group_id') or 'n/a'}`",
|
||||
f" status: `{group.get('candidate_status') or 'n/a'}`",
|
||||
f" selected_chain_id: `{group.get('selected_chain_id') or 'n/a'}`",
|
||||
f" fact/action: `{group.get('fact_family') or 'n/a'}` / `{group.get('action_family') or 'n/a'}`",
|
||||
f" missing_axes: `{', '.join(missing_axes) if missing_axes else 'n/a'}`",
|
||||
f" executable_now: `{group.get('executable_now')}`",
|
||||
f" target_ids: `{', '.join(target_ids) if target_ids else 'n/a'}`",
|
||||
f" sample_questions: `{'; '.join(sample_questions) if sample_questions else 'n/a'}`",
|
||||
f" next_actions: `{'; '.join(next_actions) if next_actions else 'n/a'}`",
|
||||
]
|
||||
)
|
||||
route_candidate_enablement_targets = _limited_dict_items(
|
||||
handoff.get("route_candidate_enablement_targets"),
|
||||
limit=8,
|
||||
)
|
||||
lines.extend(["", "## Route Candidate Enablement Targets"])
|
||||
if not route_candidate_enablement_targets:
|
||||
lines.append("- no route-candidate enablement targets")
|
||||
for target in route_candidate_enablement_targets:
|
||||
route_candidate = target.get("route_candidate") if isinstance(target.get("route_candidate"), dict) else {}
|
||||
lines.extend(
|
||||
[
|
||||
f"- `{target.get('target_id') or 'n/a'}`",
|
||||
f" severity: `{target.get('severity') or 'n/a'}`",
|
||||
f" selected_chain_id: `{route_candidate.get('selected_chain_id') or 'n/a'}`",
|
||||
f" missing_axes: `{', '.join(normalize_string_list(route_candidate.get('missing_axes'))) or 'n/a'}`",
|
||||
f" fix_goal: {target.get('fix_goal') or 'n/a'}",
|
||||
]
|
||||
)
|
||||
lines.extend(["", "## Candidate Files"])
|
||||
candidate_files = normalize_string_list(handoff.get("candidate_files"))
|
||||
lines.extend([f"- `{item}`" for item in candidate_files] or ["- no candidate files"])
|
||||
|
|
|
|||
|
|
@ -28,6 +28,9 @@ TECHNICAL_QUESTION_FIELDS = (
|
|||
"expected_catalog_alignment_status",
|
||||
"expected_catalog_chain_top_match",
|
||||
"expected_catalog_selected_matches_top",
|
||||
"expected_route_candidate_status",
|
||||
"expected_route_candidate_executable_now",
|
||||
"expected_route_candidate_missing_axes",
|
||||
"required_filters",
|
||||
"forbidden_capabilities",
|
||||
"forbidden_recipes",
|
||||
|
|
@ -100,6 +103,13 @@ def normalize_step_spec(index: int, raw_step: Any) -> dict[str, Any]:
|
|||
str(step.get("expected_catalog_chain_top_match") or "").strip() or None
|
||||
)
|
||||
normalized_step["expected_catalog_selected_matches_top"] = step.get("expected_catalog_selected_matches_top")
|
||||
normalized_step["expected_route_candidate_status"] = (
|
||||
str(step.get("expected_route_candidate_status") or "").strip() or None
|
||||
)
|
||||
normalized_step["expected_route_candidate_executable_now"] = step.get("expected_route_candidate_executable_now")
|
||||
normalized_step["expected_route_candidate_missing_axes"] = normalize_pattern_list(
|
||||
step.get("expected_route_candidate_missing_axes")
|
||||
)
|
||||
normalized_step["required_answer_patterns_any"] = normalize_pattern_list(step.get("required_answer_patterns_any"))
|
||||
normalized_step["required_answer_patterns_all"] = normalize_pattern_list(step.get("required_answer_patterns_all"))
|
||||
normalized_step["required_direct_answer_patterns_any"] = normalize_pattern_list(
|
||||
|
|
@ -395,6 +405,7 @@ def evaluate_truth_step(
|
|||
capability_id = str(step_state.get("capability_id") or "").strip()
|
||||
catalog_alignment_status = str(step_state.get("mcp_discovery_catalog_chain_alignment_status") or "").strip()
|
||||
catalog_chain_top_match = str(step_state.get("mcp_discovery_catalog_chain_top_match") or "").strip()
|
||||
route_candidate_status = str(step_state.get("mcp_discovery_route_candidate_status") or "").strip()
|
||||
limited_reason_category = str(step_state.get("limited_reason_category") or "").strip()
|
||||
extracted_filters = (
|
||||
step_state.get("extracted_filters") if isinstance(step_state.get("extracted_filters"), dict) else {}
|
||||
|
|
@ -477,6 +488,68 @@ def evaluate_truth_step(
|
|||
expected=expected_catalog_selected_matches_top,
|
||||
)
|
||||
|
||||
expected_route_candidate_status = str(
|
||||
resolve_nested_placeholders(
|
||||
step.get("expected_route_candidate_status"),
|
||||
step_results,
|
||||
bindings,
|
||||
runtime_bindings,
|
||||
)
|
||||
or ""
|
||||
).strip()
|
||||
if expected_route_candidate_status and route_candidate_status != expected_route_candidate_status:
|
||||
append_finding(
|
||||
findings,
|
||||
step,
|
||||
"wrong_route_candidate_status",
|
||||
"Runtime route candidate status does not match the expected autonomy handoff verdict for this step.",
|
||||
actual=route_candidate_status or None,
|
||||
expected=expected_route_candidate_status,
|
||||
)
|
||||
|
||||
expected_route_candidate_executable_now = normalize_optional_bool(
|
||||
resolve_nested_placeholders(
|
||||
step.get("expected_route_candidate_executable_now"),
|
||||
step_results,
|
||||
bindings,
|
||||
runtime_bindings,
|
||||
)
|
||||
)
|
||||
if expected_route_candidate_executable_now is not None:
|
||||
actual_route_candidate_executable_now = step_state.get("mcp_discovery_route_candidate_executable_now") is True
|
||||
if actual_route_candidate_executable_now != expected_route_candidate_executable_now:
|
||||
append_finding(
|
||||
findings,
|
||||
step,
|
||||
"wrong_route_candidate_executable_now",
|
||||
"Runtime route candidate executability does not match the expected autonomy handoff verdict.",
|
||||
actual=actual_route_candidate_executable_now,
|
||||
expected=expected_route_candidate_executable_now,
|
||||
)
|
||||
|
||||
expected_route_candidate_missing_axes = dcl.normalize_string_list(
|
||||
resolve_nested_placeholders(
|
||||
step.get("expected_route_candidate_missing_axes"),
|
||||
step_results,
|
||||
bindings,
|
||||
runtime_bindings,
|
||||
)
|
||||
)
|
||||
if expected_route_candidate_missing_axes:
|
||||
actual_missing_axes = dcl.normalize_string_list(step_state.get("mcp_discovery_route_candidate_missing_axes"))
|
||||
missing_expected_axes = [
|
||||
axis for axis in expected_route_candidate_missing_axes if axis not in actual_missing_axes
|
||||
]
|
||||
if missing_expected_axes:
|
||||
append_finding(
|
||||
findings,
|
||||
step,
|
||||
"missing_route_candidate_axes",
|
||||
"Runtime route candidate does not expose expected missing axes for the handoff.",
|
||||
actual=actual_missing_axes,
|
||||
expected=expected_route_candidate_missing_axes,
|
||||
)
|
||||
|
||||
if step_state.get("question_resolved") != step["question_template"]:
|
||||
append_finding(
|
||||
findings,
|
||||
|
|
@ -928,6 +1001,9 @@ def build_truth_review_markdown(spec: dict[str, Any], scenario_state: dict[str,
|
|||
f"catalog_alignment_status: `{step_state.get('mcp_discovery_catalog_chain_alignment_status') or 'n/a'}`",
|
||||
f"catalog_top_match: `{step_state.get('mcp_discovery_catalog_chain_top_match') or 'n/a'}`",
|
||||
f"catalog_selected_matches_top: `{step_state.get('mcp_discovery_catalog_chain_selected_matches_top')}`",
|
||||
f"route_candidate_status: `{step_state.get('mcp_discovery_route_candidate_status') or 'n/a'}`",
|
||||
f"route_candidate_missing_axes: `{', '.join(step_state.get('mcp_discovery_route_candidate_missing_axes') or []) or 'n/a'}`",
|
||||
f"route_candidate_executable_now: `{step_state.get('mcp_discovery_route_candidate_executable_now')}`",
|
||||
f"limited_reason_category: `{step_state.get('limited_reason_category') or 'n/a'}`",
|
||||
f"filters: `{dump_json(step_state.get('extracted_filters') or {})}`",
|
||||
f"direct_answer: {step_state.get('actual_direct_answer') or 'n/a'}",
|
||||
|
|
|
|||
|
|
@ -66,6 +66,51 @@ FALSE_CATASTROPHE_MARKERS = (
|
|||
"неправильно",
|
||||
)
|
||||
BUSINESS_NOUN_MARKERS = tuple(sorted({item for values in DOMAIN_MARKERS.values() for item in values}))
|
||||
GUI_TECHNICAL_LEAK_MARKERS = (
|
||||
"mcp-срез",
|
||||
"лимит выборки mcp",
|
||||
"через mcp",
|
||||
"mcp-провер",
|
||||
"probe ",
|
||||
" probe",
|
||||
)
|
||||
FINANCIAL_INSTITUTION_MARKERS = (
|
||||
"сбербанк",
|
||||
"втб",
|
||||
"альфа-банк",
|
||||
"альфа банк",
|
||||
"тинькофф",
|
||||
"т-банк",
|
||||
"газпромбанк",
|
||||
"росбанк",
|
||||
"райффайзен",
|
||||
"совкомбанк",
|
||||
"промсвязьбанк",
|
||||
"мкб",
|
||||
"ак барс",
|
||||
"уралсиб",
|
||||
"банк ",
|
||||
)
|
||||
ORDINARY_COUNTERPARTY_ROLE_MARKERS = (
|
||||
"принёс",
|
||||
"принес",
|
||||
"клиент",
|
||||
"поставщик",
|
||||
"покупател",
|
||||
"выручк",
|
||||
"доход",
|
||||
"закуп",
|
||||
"procurement concentration proxy",
|
||||
"крупнейший подтвержденный поставщик",
|
||||
)
|
||||
SAFE_FINANCIAL_BOUNDARY_MARKERS = (
|
||||
"банк/финансовая",
|
||||
"финансовая организация",
|
||||
"не считаю",
|
||||
"не обычн",
|
||||
"без назначения платеж",
|
||||
"без договора",
|
||||
)
|
||||
|
||||
|
||||
def now_iso() -> str:
|
||||
|
|
@ -289,10 +334,59 @@ def build_step_for_pair(pair: dict[str, Any]) -> dict[str, Any]:
|
|||
"invariant_severity": {
|
||||
"answer_layering_noise": "P1",
|
||||
"business_answer_too_verbose": "P1",
|
||||
"bank_counterparty_misclassified_as_business_partner": "P1",
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def has_any_marker(text: str, markers: tuple[str, ...]) -> bool:
|
||||
lowered = str(text or "").casefold()
|
||||
return any(marker.casefold() in lowered for marker in markers if marker)
|
||||
|
||||
|
||||
def marker_hits(text: str, markers: tuple[str, ...]) -> list[str]:
|
||||
lowered = str(text or "").casefold()
|
||||
return [marker for marker in markers if marker and marker.casefold() in lowered]
|
||||
|
||||
|
||||
def augment_gui_business_review(step_state: dict[str, Any]) -> dict[str, Any]:
|
||||
review = (
|
||||
dict(step_state.get("business_first_review"))
|
||||
if isinstance(step_state.get("business_first_review"), dict)
|
||||
else {}
|
||||
)
|
||||
assistant_text = str(step_state.get("assistant_text") or "")
|
||||
issue_codes = [str(item) for item in review.get("issue_codes", []) if str(item).strip()]
|
||||
root_layers = [str(item) for item in review.get("suggested_root_cause_layers", []) if str(item).strip()]
|
||||
|
||||
technical_hits = [str(item) for item in review.get("technical_garbage_hits", []) if str(item).strip()]
|
||||
for hit in marker_hits(assistant_text, GUI_TECHNICAL_LEAK_MARKERS):
|
||||
if hit not in technical_hits:
|
||||
technical_hits.append(hit)
|
||||
if technical_hits and "technical_garbage_in_answer" not in issue_codes:
|
||||
issue_codes.append("technical_garbage_in_answer")
|
||||
if technical_hits and "business_utility_gap" not in root_layers:
|
||||
root_layers.append("business_utility_gap")
|
||||
|
||||
has_financial_counterparty = has_any_marker(assistant_text, FINANCIAL_INSTITUTION_MARKERS)
|
||||
has_ordinary_role_claim = has_any_marker(assistant_text, ORDINARY_COUNTERPARTY_ROLE_MARKERS)
|
||||
has_safe_boundary = has_any_marker(assistant_text, SAFE_FINANCIAL_BOUNDARY_MARKERS)
|
||||
if has_financial_counterparty and has_ordinary_role_claim and not has_safe_boundary:
|
||||
issue_code = "bank_counterparty_misclassified_as_business_partner"
|
||||
if issue_code not in issue_codes:
|
||||
issue_codes.append(issue_code)
|
||||
if "business_semantic_role_gap" not in root_layers:
|
||||
root_layers.append("business_semantic_role_gap")
|
||||
|
||||
review["technical_garbage_present"] = bool(technical_hits)
|
||||
review["technical_garbage_hits"] = technical_hits
|
||||
review["issue_codes"] = issue_codes
|
||||
review["suggested_root_cause_layers"] = list(dict.fromkeys(root_layers))
|
||||
review["business_usefulness_ok"] = not issue_codes
|
||||
step_state["business_first_review"] = review
|
||||
return step_state
|
||||
|
||||
|
||||
def build_step_state_for_pair(
|
||||
*,
|
||||
run_id: str,
|
||||
|
|
@ -323,7 +417,7 @@ def build_step_state_for_pair(
|
|||
},
|
||||
}
|
||||
entries = dcl.extract_structured_entries(assistant_text)
|
||||
return dcl.build_scenario_step_state(
|
||||
step_state = dcl.build_scenario_step_state(
|
||||
scenario_id=run_id,
|
||||
domain="assistant_stage1_gui_run",
|
||||
step=build_step_for_pair(pair),
|
||||
|
|
@ -333,6 +427,7 @@ def build_step_state_for_pair(
|
|||
turn_artifact=turn_artifact,
|
||||
entries=entries,
|
||||
)
|
||||
return augment_gui_business_review(step_state)
|
||||
|
||||
|
||||
def severity_rank(severity: str) -> int:
|
||||
|
|
|
|||
|
|
@ -153,6 +153,14 @@ def _is_catalog_alignment_code(code: str) -> bool:
|
|||
}
|
||||
|
||||
|
||||
def _is_route_candidate_code(code: str) -> bool:
|
||||
return code in {
|
||||
"wrong_route_candidate_status",
|
||||
"wrong_route_candidate_executable_now",
|
||||
"missing_route_candidate_axes",
|
||||
}
|
||||
|
||||
|
||||
def _derive_step_invariant_failures(step: dict[str, Any], findings: list[dict[str, Any]]) -> dict[str, bool]:
|
||||
codes = [str(item.get("code") or "").strip() for item in findings]
|
||||
selected_object_step = _has_selected_object_signal(step)
|
||||
|
|
@ -165,6 +173,7 @@ def _derive_step_invariant_failures(step: dict[str, Any], findings: list[dict[st
|
|||
"human_answer_quality": any(_is_human_answer_quality_code(code) for code in codes),
|
||||
"meta_context_integrity": meta_context_step and any(_is_meta_context_code(code) for code in codes),
|
||||
"catalog_alignment": any(_is_catalog_alignment_code(code) for code in codes),
|
||||
"route_candidate_handoff": any(_is_route_candidate_code(code) for code in codes),
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -182,6 +191,7 @@ def build_scenario_acceptance_matrix(
|
|||
"human_answer_quality": 0,
|
||||
"meta_context_integrity": 0,
|
||||
"catalog_alignment": 0,
|
||||
"route_candidate_handoff": 0,
|
||||
}
|
||||
|
||||
for index, step in enumerate(spec.get("steps") or [], start=1):
|
||||
|
|
@ -215,6 +225,19 @@ def build_scenario_acceptance_matrix(
|
|||
"mcp_discovery_response_applied": step_state.get("mcp_discovery_response_applied"),
|
||||
"mcp_discovery_selected_chain_id": step_state.get("mcp_discovery_selected_chain_id"),
|
||||
"mcp_discovery_response_candidate_status": step_state.get("mcp_discovery_response_candidate_status"),
|
||||
"mcp_discovery_route_candidate_status": step_state.get("mcp_discovery_route_candidate_status"),
|
||||
"mcp_discovery_route_candidate_fact_family": step_state.get(
|
||||
"mcp_discovery_route_candidate_fact_family"
|
||||
),
|
||||
"mcp_discovery_route_candidate_action_family": step_state.get(
|
||||
"mcp_discovery_route_candidate_action_family"
|
||||
),
|
||||
"mcp_discovery_route_candidate_missing_axes": step_state.get(
|
||||
"mcp_discovery_route_candidate_missing_axes"
|
||||
),
|
||||
"mcp_discovery_route_candidate_executable_now": step_state.get(
|
||||
"mcp_discovery_route_candidate_executable_now"
|
||||
),
|
||||
"mcp_discovery_effective_intents": step_state.get("mcp_discovery_effective_intents"),
|
||||
"selected_object_step": _has_selected_object_signal(step),
|
||||
"meta_context_step": _has_meta_context_signal(step),
|
||||
|
|
@ -233,6 +256,7 @@ def build_scenario_acceptance_matrix(
|
|||
"human_answer_quality_ok": invariant_failure_counts["human_answer_quality"] == 0,
|
||||
"meta_context_integrity_ok": invariant_failure_counts["meta_context_integrity"] == 0,
|
||||
"catalog_alignment_ok": invariant_failure_counts["catalog_alignment"] == 0,
|
||||
"route_candidate_handoff_ok": invariant_failure_counts["route_candidate_handoff"] == 0,
|
||||
}
|
||||
critical_rows = [row for row in rows if row["criticality"] == "critical"]
|
||||
critical_path_green = bool(critical_rows) and all(row["review_status"] == "pass" for row in critical_rows)
|
||||
|
|
@ -340,6 +364,7 @@ def build_scenario_acceptance_matrix_markdown(acceptance_matrix: dict[str, Any])
|
|||
f"- human_answer_quality_ok: `{invariants.get('human_answer_quality_ok')}`",
|
||||
f"- meta_context_integrity_ok: `{invariants.get('meta_context_integrity_ok')}`",
|
||||
f"- catalog_alignment_ok: `{invariants.get('catalog_alignment_ok')}`",
|
||||
f"- route_candidate_handoff_ok: `{invariants.get('route_candidate_handoff_ok')}`",
|
||||
"",
|
||||
"## Steps",
|
||||
]
|
||||
|
|
@ -353,6 +378,9 @@ def build_scenario_acceptance_matrix_markdown(acceptance_matrix: dict[str, Any])
|
|||
f" catalog_alignment_status: `{row.get('mcp_discovery_catalog_chain_alignment_status') or 'n/a'}`",
|
||||
f" catalog_top_match: `{row.get('mcp_discovery_catalog_chain_top_match') or 'n/a'}`",
|
||||
f" catalog_selected_matches_top: `{row.get('mcp_discovery_catalog_chain_selected_matches_top')}`",
|
||||
f" route_candidate_status: `{row.get('mcp_discovery_route_candidate_status') or 'n/a'}`",
|
||||
f" route_candidate_missing_axes: `{', '.join(row.get('mcp_discovery_route_candidate_missing_axes') or []) or 'n/a'}`",
|
||||
f" route_candidate_executable_now: `{row.get('mcp_discovery_route_candidate_executable_now')}`",
|
||||
f" highest_unresolved_priority: `{row.get('highest_unresolved_priority')}`",
|
||||
f" selected_object_step: `{row.get('selected_object_step')}`",
|
||||
f" meta_context_step: `{row.get('meta_context_step')}`",
|
||||
|
|
|
|||
|
|
@ -573,6 +573,7 @@ def build_stage_context_capsule(
|
|||
"quality_rules": [
|
||||
"Review the human question and visible answer before internal route ids.",
|
||||
"Treat direct-answer-first, business usefulness, temporal honesty, field truth, and answer layering as acceptance gates.",
|
||||
"Use route-candidate status and missing axes to separate clarification, route enablement, and executable reviewed routes.",
|
||||
"Treat deterministic P0/P1 repair targets as blockers even if the analyst wording sounds optimistic.",
|
||||
"After code edits, run targeted tests/build and rebuild graphify before replay evidence is trusted.",
|
||||
],
|
||||
|
|
|
|||
|
|
@ -68,6 +68,25 @@ class AgentSemanticPackBuilderTests(unittest.TestCase):
|
|||
]
|
||||
self.assertEqual(len(catalog_checked_steps), 15)
|
||||
|
||||
def test_classify_truth_harness_step_preserves_route_candidate_expectations(self) -> None:
|
||||
entry = builder.classify_truth_harness_step(
|
||||
builder.ORCHESTRATION_DIR / "demo_route_candidate.json",
|
||||
{"title": "Route candidate demo", "scenario_id": "demo", "domain": "demo"},
|
||||
{
|
||||
"step_id": "step_01",
|
||||
"title": "Needs organization",
|
||||
"question": "show open-world route candidate",
|
||||
"expected_route_candidate_status": "needs_user_scope",
|
||||
"expected_route_candidate_executable_now": False,
|
||||
"expected_route_candidate_missing_axes": ["organization"],
|
||||
},
|
||||
)
|
||||
|
||||
self.assertEqual(entry["expected_route_candidate_status"], "needs_user_scope")
|
||||
self.assertIs(entry["expected_route_candidate_executable_now"], False)
|
||||
self.assertEqual(entry["expected_route_candidate_missing_axes"], ["organization"])
|
||||
self.assertIn("route_candidate_handoff", entry["semantic_tags"])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
|
|
|||
|
|
@ -40,6 +40,14 @@ class DomainCaseLoopStepStateTests(unittest.TestCase):
|
|||
"mcp_discovery_catalog_chain_alignment_status": "selected_matches_top",
|
||||
"mcp_discovery_catalog_chain_top_match": "value_flow",
|
||||
"mcp_discovery_catalog_chain_selected_matches_top": True,
|
||||
"mcp_discovery_route_candidate_status": "needs_user_scope",
|
||||
"mcp_discovery_route_candidate_fact_family": "value_flow",
|
||||
"mcp_discovery_route_candidate_action_family": "turnover",
|
||||
"mcp_discovery_route_candidate_missing_axes": ["organization"],
|
||||
"mcp_discovery_route_candidate_provided_axes": ["period"],
|
||||
"mcp_discovery_route_candidate_executable_now": False,
|
||||
"mcp_discovery_route_candidate_enablement_reason": "Missing scope axes: organization",
|
||||
"mcp_discovery_route_candidate_next_action": "Ask the user for the missing scope axes before MCP execution.",
|
||||
},
|
||||
"session_summary": {},
|
||||
},
|
||||
|
|
@ -49,6 +57,106 @@ class DomainCaseLoopStepStateTests(unittest.TestCase):
|
|||
self.assertEqual(step_state["mcp_discovery_catalog_chain_alignment_status"], "selected_matches_top")
|
||||
self.assertEqual(step_state["mcp_discovery_catalog_chain_top_match"], "value_flow")
|
||||
self.assertTrue(step_state["mcp_discovery_catalog_chain_selected_matches_top"])
|
||||
self.assertEqual(step_state["mcp_discovery_route_candidate_status"], "needs_user_scope")
|
||||
self.assertEqual(step_state["mcp_discovery_route_candidate_missing_axes"], ["organization"])
|
||||
self.assertEqual(step_state["mcp_discovery_route_candidate_provided_axes"], ["period"])
|
||||
self.assertFalse(step_state["mcp_discovery_route_candidate_executable_now"])
|
||||
|
||||
def test_repair_targets_promote_route_candidate_enablement_gaps(self) -> None:
|
||||
repair_targets = dcl.build_deterministic_repair_targets(
|
||||
{"pack_id": "route_candidate_pack", "domain": "open_world", "final_status": "accepted"},
|
||||
[
|
||||
{
|
||||
"scenario_id": "route_candidate_demo",
|
||||
"title": "Route candidate demo",
|
||||
"artifact_dir": "artifacts/domain_runs/route_candidate_demo",
|
||||
"scenario_state": {
|
||||
"step_outputs": {
|
||||
"step_01": {
|
||||
"status": "accepted",
|
||||
"acceptance_status": "accepted",
|
||||
"execution_status": "validated",
|
||||
"question_resolved": "можно ли построить анализ качества поставщиков за 2020?",
|
||||
"reply_type": "partial_coverage",
|
||||
"mcp_discovery_selected_chain_id": "business_overview",
|
||||
"mcp_discovery_route_candidate_status": "needs_route_enablement",
|
||||
"mcp_discovery_route_candidate_fact_family": "supplier_quality",
|
||||
"mcp_discovery_route_candidate_action_family": "risk_or_quality_analysis",
|
||||
"mcp_discovery_route_candidate_missing_axes": [],
|
||||
"mcp_discovery_route_candidate_executable_now": False,
|
||||
"mcp_discovery_route_candidate_enablement_reason": "Reviewed supplier-quality route is not wired yet.",
|
||||
"mcp_discovery_route_candidate_next_action": "Enable a reviewed supplier-quality route before claiming this fact.",
|
||||
}
|
||||
}
|
||||
},
|
||||
}
|
||||
],
|
||||
)
|
||||
|
||||
self.assertEqual(repair_targets["target_count"], 1)
|
||||
self.assertEqual(repair_targets["severity_counts"]["P1"], 1)
|
||||
target = repair_targets["targets"][0]
|
||||
self.assertEqual(target["problem_type"], "route_candidate_enablement_gap")
|
||||
self.assertEqual(target["target_source"], "route_candidate_enablement")
|
||||
self.assertEqual(target["route_candidate"]["candidate_status"], "needs_route_enablement")
|
||||
self.assertEqual(repair_targets["route_candidate_status_counts"], {"needs_route_enablement": 1})
|
||||
self.assertEqual(repair_targets["route_candidate_groups"][0]["selected_chain_id"], "business_overview")
|
||||
|
||||
def test_lead_handoff_surfaces_route_candidate_groups(self) -> None:
|
||||
repair_targets = dcl.build_deterministic_repair_targets(
|
||||
{"pack_id": "route_candidate_pack", "domain": "open_world", "final_status": "accepted"},
|
||||
[
|
||||
{
|
||||
"scenario_id": "route_candidate_demo",
|
||||
"title": "Route candidate demo",
|
||||
"artifact_dir": "artifacts/domain_runs/route_candidate_demo",
|
||||
"scenario_state": {
|
||||
"step_outputs": {
|
||||
"step_01": {
|
||||
"status": "accepted",
|
||||
"acceptance_status": "accepted",
|
||||
"execution_status": "validated",
|
||||
"question_resolved": "можно ли построить анализ качества поставщиков за 2020?",
|
||||
"reply_type": "partial_coverage",
|
||||
"mcp_discovery_selected_chain_id": "business_overview",
|
||||
"mcp_discovery_route_candidate_status": "needs_route_enablement",
|
||||
"mcp_discovery_route_candidate_fact_family": "supplier_quality",
|
||||
"mcp_discovery_route_candidate_action_family": "risk_or_quality_analysis",
|
||||
"mcp_discovery_route_candidate_missing_axes": [],
|
||||
"mcp_discovery_route_candidate_executable_now": False,
|
||||
"mcp_discovery_route_candidate_enablement_reason": "Reviewed supplier-quality route is not wired yet.",
|
||||
"mcp_discovery_route_candidate_next_action": "Enable a reviewed supplier-quality route before claiming this fact.",
|
||||
}
|
||||
}
|
||||
},
|
||||
}
|
||||
],
|
||||
)
|
||||
handoff = dcl.build_lead_coder_handoff(
|
||||
loop_state={"loop_id": "route_candidate_loop"},
|
||||
iteration_id="iteration_00",
|
||||
pack_dir=Path("artifacts/domain_runs/route_candidate_pack"),
|
||||
analyst_verdict_path=Path("artifacts/domain_runs/route_candidate_pack/analyst_verdict.json"),
|
||||
repair_targets_path=Path("artifacts/domain_runs/route_candidate_pack/repair_targets.json"),
|
||||
business_audit_path=Path("artifacts/domain_runs/route_candidate_pack/business_audit.md"),
|
||||
analyst_verdict={"quality_score": 75},
|
||||
repair_targets=repair_targets,
|
||||
target_score=88,
|
||||
loop_decision="needs_exact_capability",
|
||||
analyst_accepted_gate=False,
|
||||
accepted_gate=False,
|
||||
deterministic_gate_ok=False,
|
||||
deterministic_gate_reason="repair_targets_remaining=P0:0,P1:1",
|
||||
requires_user_decision=False,
|
||||
user_decision_type="none",
|
||||
user_decision_prompt=None,
|
||||
)
|
||||
markdown = dcl.build_lead_coder_handoff_markdown(handoff)
|
||||
|
||||
self.assertEqual(handoff["route_candidate_groups"][0]["candidate_status"], "needs_route_enablement")
|
||||
self.assertEqual(handoff["route_candidate_enablement_targets"][0]["problem_type"], "route_candidate_enablement_gap")
|
||||
self.assertIn("## Route Candidate Handoff Groups", markdown)
|
||||
self.assertIn("route_candidate_demo:step_01", markdown)
|
||||
|
||||
def test_analysis_context_date_is_not_implicit_business_filter(self) -> None:
|
||||
step_state = dcl.build_scenario_step_state(
|
||||
|
|
@ -688,6 +796,39 @@ class DomainCaseLoopStepStateTests(unittest.TestCase):
|
|||
self.assertEqual(reviewed["critical_findings_count"], 1)
|
||||
self.assertEqual(reviewed["review_findings"][0]["code"], "wrong_catalog_chain_top_match")
|
||||
|
||||
def test_truth_harness_checks_expected_route_candidate_fields(self) -> None:
|
||||
reviewed = dth.evaluate_truth_step(
|
||||
step={
|
||||
"step_id": "step_01",
|
||||
"question_template": "show route candidate",
|
||||
"criticality": "critical",
|
||||
"allowed_reply_types": [],
|
||||
"expected_route_candidate_status": "needs_user_scope",
|
||||
"expected_route_candidate_executable_now": False,
|
||||
"expected_route_candidate_missing_axes": ["organization", "period"],
|
||||
},
|
||||
step_state={
|
||||
"question_resolved": "show route candidate",
|
||||
"reply_type": "clarification_required",
|
||||
"assistant_text": "Please choose organization.",
|
||||
"actual_direct_answer": "Please choose organization.",
|
||||
"detected_intent": "counterparty_turnover",
|
||||
"selected_recipe": None,
|
||||
"capability_id": None,
|
||||
"mcp_discovery_route_candidate_status": "needs_user_scope",
|
||||
"mcp_discovery_route_candidate_missing_axes": ["organization"],
|
||||
"mcp_discovery_route_candidate_executable_now": False,
|
||||
"extracted_filters": {},
|
||||
},
|
||||
step_results={},
|
||||
bindings={},
|
||||
runtime_bindings={},
|
||||
)
|
||||
|
||||
self.assertEqual(reviewed["review_status"], "fail")
|
||||
self.assertEqual(reviewed["critical_findings_count"], 1)
|
||||
self.assertEqual(reviewed["review_findings"][0]["code"], "missing_route_candidate_axes")
|
||||
|
||||
def test_business_first_review_flags_dirty_direct_answer_surface(self) -> None:
|
||||
step_state = dcl.build_scenario_step_state(
|
||||
scenario_id="business_surface_demo",
|
||||
|
|
|
|||
|
|
@ -132,6 +132,44 @@ class AssistantStage1RunReviewTests(unittest.TestCase):
|
|||
self.assertIn("overall_business_status", markdown)
|
||||
self.assertIn("Question Quality", markdown)
|
||||
|
||||
def test_review_flags_mcp_leak_and_bank_role_misclassification(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
root = Path(tmp)
|
||||
sessions_dir = root / "sessions"
|
||||
reports_dir = root / "reports"
|
||||
run_id = "assistant-stage1-bank-leak"
|
||||
session_file = sessions_dir / f"{run_id}-SAVED-001.json"
|
||||
report_file = reports_dir / f"{run_id}.md"
|
||||
write_json(
|
||||
session_file,
|
||||
session_payload(
|
||||
[
|
||||
{"role": "user", "text": "кто принес больше всего денег за 2020"},
|
||||
{
|
||||
"role": "assistant",
|
||||
"text": "Коротко: больше всего денег принёс контрагент СБЕРБАНК, ПАО. "
|
||||
"Важно: исходящие уперлись в лимит выборки MCP.",
|
||||
"reply_type": "partial_coverage",
|
||||
"message_id": "a-bank",
|
||||
"trace_id": "trace-bank",
|
||||
"debug": {},
|
||||
},
|
||||
]
|
||||
),
|
||||
)
|
||||
report_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
report_file.write_text(f"# Assistant Stage 1 Eval Run\n\n- run_id: {run_id}\n", encoding="utf-8")
|
||||
|
||||
review = reviewer.build_run_review(
|
||||
run_id=run_id,
|
||||
session_files=[session_file],
|
||||
report_path=report_file,
|
||||
)
|
||||
|
||||
self.assertEqual(review["summary"]["overall_business_status"], "fail")
|
||||
self.assertIn("technical_garbage_in_answer", review["summary"]["issue_counts"])
|
||||
self.assertIn("bank_counterparty_misclassified_as_business_partner", review["summary"]["issue_counts"])
|
||||
|
||||
def test_question_quality_treats_short_natural_followups_as_contextual(self) -> None:
|
||||
pairs = [
|
||||
{"pair_index": 1, "user": {"text": "приветик - че как там дела"}},
|
||||
|
|
|
|||
|
|
@ -87,6 +87,11 @@ class ScenarioAcceptancePolicyTests(unittest.TestCase):
|
|||
"mcp_discovery_catalog_chain_alignment_status": "selected_matches_top",
|
||||
"mcp_discovery_catalog_chain_top_match": "inventory_stock_snapshot",
|
||||
"mcp_discovery_catalog_chain_selected_matches_top": True,
|
||||
"mcp_discovery_route_candidate_status": "ready_for_reviewed_execution",
|
||||
"mcp_discovery_route_candidate_fact_family": "inventory_stock_snapshot",
|
||||
"mcp_discovery_route_candidate_action_family": "stock_snapshot",
|
||||
"mcp_discovery_route_candidate_missing_axes": [],
|
||||
"mcp_discovery_route_candidate_executable_now": True,
|
||||
"review_findings": [],
|
||||
}
|
||||
},
|
||||
|
|
@ -116,6 +121,11 @@ class ScenarioAcceptancePolicyTests(unittest.TestCase):
|
|||
"inventory_stock_snapshot",
|
||||
)
|
||||
self.assertTrue(acceptance_matrix["rows"][0]["mcp_discovery_catalog_chain_selected_matches_top"])
|
||||
self.assertEqual(
|
||||
acceptance_matrix["rows"][0]["mcp_discovery_route_candidate_status"],
|
||||
"ready_for_reviewed_execution",
|
||||
)
|
||||
self.assertTrue(acceptance_matrix["rows"][0]["mcp_discovery_route_candidate_executable_now"])
|
||||
|
||||
def test_flags_meta_context_integrity_when_meta_step_leaks_technical_answer_shape(self) -> None:
|
||||
spec = {
|
||||
|
|
@ -212,6 +222,51 @@ class ScenarioAcceptancePolicyTests(unittest.TestCase):
|
|||
self.assertEqual(pack_state["unresolved_p1_count"], 1)
|
||||
self.assertIn("catalog_alignment", acceptance_matrix["rows"][0]["invariant_failures"])
|
||||
|
||||
def test_flags_route_candidate_handoff_invariant_when_expected_candidate_is_wrong(self) -> None:
|
||||
spec = {
|
||||
"scenario_id": "demo_route_candidate",
|
||||
"domain": "open_world_autonomy",
|
||||
"title": "Route candidate",
|
||||
"steps": [
|
||||
{
|
||||
"step_id": "step_01",
|
||||
"title": "Candidate needs scope",
|
||||
"question_template": "show unfamiliar 1c route candidate",
|
||||
"criticality": "critical",
|
||||
"semantic_tags": ["route_candidate_handoff"],
|
||||
}
|
||||
],
|
||||
}
|
||||
scenario_state = {
|
||||
"session_id": "asst-route",
|
||||
"step_outputs": {
|
||||
"step_01": {
|
||||
"review_status": "fail",
|
||||
"reply_type": "clarification_required",
|
||||
"mcp_discovery_route_candidate_status": "ready_for_reviewed_execution",
|
||||
"mcp_discovery_route_candidate_missing_axes": [],
|
||||
"mcp_discovery_route_candidate_executable_now": True,
|
||||
"review_findings": [
|
||||
{"code": "wrong_route_candidate_status", "severity": "critical"},
|
||||
],
|
||||
}
|
||||
},
|
||||
}
|
||||
review_summary = {
|
||||
"review_source": "live_strict_replay",
|
||||
"overall_status": "fail",
|
||||
"steps_total": 1,
|
||||
"steps_passed": 0,
|
||||
"steps_with_warning": 0,
|
||||
"steps_failed": 1,
|
||||
}
|
||||
|
||||
acceptance_matrix = sap.build_scenario_acceptance_matrix(spec, scenario_state, review_summary)
|
||||
pack_state = sap.derive_truth_harness_pack_state(spec, scenario_state, review_summary, acceptance_matrix)
|
||||
|
||||
self.assertFalse(pack_state["invariants"]["route_candidate_handoff_ok"])
|
||||
self.assertIn("route_candidate_handoff", acceptance_matrix["rows"][0]["invariant_failures"])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
|
|
|||
Loading…
Reference in New Issue