119 lines
4.8 KiB
JSON
119 lines
4.8 KiB
JSON
{
|
||
"schema_version": "domain_truth_harness_spec_v1",
|
||
"scenario_id": "agent_cashflow_no_tops_20260523",
|
||
"domain": "autonomy_business_answer_contract",
|
||
"title": "AGENT | Cashflow no-tops display modifier",
|
||
"description": "Targeted AGENT replay: after a 2020 business overview, a compact 'без топов' follow-up must preserve the 2020 company cashflow context and suppress rankings instead of excluding top counterparties.",
|
||
"bindings": {},
|
||
"steps": [
|
||
{
|
||
"step_id": "step_01_direct_money_2020",
|
||
"title": "Direct cashflow baseline for 2020",
|
||
"question": "Сколько денег Альтернатива заработала за 2020 год? Ответь коротко: получили, заплатили, денежное нетто, это прибыль или нет.",
|
||
"allowed_reply_types": ["partial_coverage", "factual_with_explanation", "factual"],
|
||
"required_answer_patterns_all": [
|
||
"47[\\s.]*628[\\s.]*853",
|
||
"43[\\s.]*763[\\s.]*351",
|
||
"3[\\s.]*865[\\s.]*501"
|
||
],
|
||
"forbidden_answer_patterns": [
|
||
"Учтено строк",
|
||
"Первая найденная дата",
|
||
"runtime_",
|
||
"planner_",
|
||
"query_movements",
|
||
"primitive",
|
||
"7\\s*136\\s*815|7136815"
|
||
],
|
||
"criticality": "critical",
|
||
"semantic_tags": ["cashflow", "direct_answer", "baseline"]
|
||
},
|
||
{
|
||
"step_id": "step_02_explicit_overview_with_tops",
|
||
"title": "Explicit overview may include top counterparties",
|
||
"question": "Теперь дай взрослый обзор за 2020 по компании: входящие, исходящие, нетто, топы, но банк в топах отдельно объясни как финансовый поток.",
|
||
"allowed_reply_types": ["partial_coverage", "factual_with_explanation", "factual"],
|
||
"required_answer_patterns_all": [
|
||
"47[\\s.]*628[\\s.]*853",
|
||
"43[\\s.]*763[\\s.]*351",
|
||
"3[\\s.]*865[\\s.]*501",
|
||
"12[\\s.]*792[\\s.]*194",
|
||
"12[\\s.]*093[\\s.]*465",
|
||
"9[\\s.]*612[\\s.]*904"
|
||
],
|
||
"forbidden_answer_patterns": [
|
||
"Учтено строк",
|
||
"Первая найденная дата",
|
||
"runtime_",
|
||
"planner_",
|
||
"query_movements",
|
||
"primitive"
|
||
],
|
||
"criticality": "high",
|
||
"semantic_tags": ["business_overview", "tops_allowed", "bank_boundary"]
|
||
},
|
||
{
|
||
"step_id": "step_03_compact_no_tops_followup",
|
||
"title": "No-tops compact follow-up preserves 2020 cashflow",
|
||
"question": "а если коротко, сколько заработали деньгами без топов?",
|
||
"allowed_reply_types": ["partial_coverage", "factual_with_explanation", "factual"],
|
||
"required_answer_patterns_all": [
|
||
"2020",
|
||
"47[\\s.]*628[\\s.]*853",
|
||
"43[\\s.]*763[\\s.]*351",
|
||
"3[\\s.]*865[\\s.]*501"
|
||
],
|
||
"forbidden_answer_patterns": [
|
||
"2026-05-23",
|
||
"получили\\s+0\\s*руб",
|
||
"заплатили(?:/списали)?\\s+0\\s*руб",
|
||
"нетто\\s+0\\s*руб",
|
||
"с исключением крупнейших",
|
||
"исключением крупнейших",
|
||
"Комитет государственных услуг",
|
||
"Группа СВК",
|
||
"СБЕРБАНК",
|
||
"Что проверить дальше",
|
||
"Учтено строк",
|
||
"Первая найденная дата",
|
||
"runtime_",
|
||
"planner_",
|
||
"query_movements",
|
||
"primitive"
|
||
],
|
||
"criticality": "critical",
|
||
"semantic_tags": ["compact_after_overview", "no_tops_display_modifier", "temporal_carryover"]
|
||
},
|
||
{
|
||
"step_id": "step_04_plain_money_in_out_net",
|
||
"title": "Explicit plain money request remains compact",
|
||
"question": "не обзор, просто деньги: пришло, ушло, нетто за 2020",
|
||
"allowed_reply_types": ["partial_coverage", "factual_with_explanation", "factual"],
|
||
"required_answer_patterns_all": [
|
||
"47[\\s.]*628[\\s.]*853",
|
||
"43[\\s.]*763[\\s.]*351",
|
||
"3[\\s.]*865[\\s.]*501"
|
||
],
|
||
"forbidden_answer_patterns": [
|
||
"Комитет государственных услуг",
|
||
"Группа СВК",
|
||
"СБЕРБАНК",
|
||
"Что проверить дальше",
|
||
"Учтено строк",
|
||
"Первая найденная дата",
|
||
"runtime_",
|
||
"planner_",
|
||
"query_movements",
|
||
"primitive"
|
||
],
|
||
"criticality": "critical",
|
||
"semantic_tags": ["direct_money_only", "ranking_suppression"]
|
||
}
|
||
],
|
||
"acceptance": {
|
||
"min_score": 80,
|
||
"max_unresolved_p0": 0,
|
||
"require_all_critical_steps_pass": true
|
||
}
|
||
}
|