Усилить семантический AGENT gate для 1С

This commit is contained in:
dctouch 2026-05-23 22:01:29 +03:00
parent 75e0e2c66e
commit a4dd9c7c66
9 changed files with 634 additions and 13 deletions

View File

@ -381,6 +381,9 @@ function hasValueFlowActionConflictWithDiscoveryTurnMeaning(input, entryPoint) {
if (askedDomain !== "counterparty_value") { if (askedDomain !== "counterparty_value") {
return false; return false;
} }
if (askedAction === "net_value_flow") {
return true;
}
if (hasExactBankOperationsAddressReply(input, entryPoint)) { if (hasExactBankOperationsAddressReply(input, entryPoint)) {
return false; return false;
} }
@ -388,9 +391,6 @@ function hasValueFlowActionConflictWithDiscoveryTurnMeaning(input, entryPoint) {
if (askedAction === "payout") { if (askedAction === "payout") {
return detectedIntent !== "supplier_payouts_profile"; return detectedIntent !== "supplier_payouts_profile";
} }
if (askedAction === "net_value_flow") {
return true;
}
return false; return false;
} }
function hasEvidenceLaneConflictWithDiscoveryTurnMeaning(input, entryPoint) { function hasEvidenceLaneConflictWithDiscoveryTurnMeaning(input, entryPoint) {
@ -534,6 +534,10 @@ function hasSemanticConflictWithDiscoveryTurnMeaning(input, entryPoint) {
return false; return false;
} }
if (hasExactBankOperationsAddressReply(input, entryPoint)) { if (hasExactBankOperationsAddressReply(input, entryPoint)) {
const askedAction = toNonEmptyString(readDiscoveryTurnMeaning(entryPoint)?.asked_action_family);
if (askedAction === "net_value_flow") {
return true;
}
return false; return false;
} }
const detectedIntent = toNonEmptyString(input.addressRuntimeMeta?.detected_intent); const detectedIntent = toNonEmptyString(input.addressRuntimeMeta?.detected_intent);
@ -644,6 +648,9 @@ function applyAssistantMcpDiscoveryResponsePolicy(input) {
const metadataDiscoveryPriority = hasMetadataDiscoveryPriority(input, entryPoint); const metadataDiscoveryPriority = hasMetadataDiscoveryPriority(input, entryPoint);
const valueFlowActionConflictWithDiscoveryTurnMeaning = hasValueFlowActionConflictWithDiscoveryTurnMeaning(input, entryPoint); const valueFlowActionConflictWithDiscoveryTurnMeaning = hasValueFlowActionConflictWithDiscoveryTurnMeaning(input, entryPoint);
const evidenceLaneConflictWithDiscoveryTurnMeaning = hasEvidenceLaneConflictWithDiscoveryTurnMeaning(input, entryPoint); const evidenceLaneConflictWithDiscoveryTurnMeaning = hasEvidenceLaneConflictWithDiscoveryTurnMeaning(input, entryPoint);
const exactBankOperationsProtectsCurrent = exactBankOperationsAddressReply &&
!semanticConflictWithDiscoveryTurnMeaning &&
!valueFlowActionConflictWithDiscoveryTurnMeaning;
if (!entryPoint) { if (!entryPoint) {
pushReason(reasonCodes, "mcp_discovery_response_policy_no_entry_point"); pushReason(reasonCodes, "mcp_discovery_response_policy_no_entry_point");
} }
@ -701,7 +708,7 @@ function applyAssistantMcpDiscoveryResponsePolicy(input) {
if (exactValueFlowReplyForBusinessOverviewDirectMoneyNeed) { if (exactValueFlowReplyForBusinessOverviewDirectMoneyNeed) {
pushReason(reasonCodes, "mcp_discovery_response_policy_keep_exact_value_flow_reply_over_business_overview_direct_money_clarification"); pushReason(reasonCodes, "mcp_discovery_response_policy_keep_exact_value_flow_reply_over_business_overview_direct_money_clarification");
} }
if (exactBankOperationsAddressReply) { if (exactBankOperationsProtectsCurrent) {
pushReason(reasonCodes, "mcp_discovery_response_policy_keep_exact_bank_operations_address_reply"); pushReason(reasonCodes, "mcp_discovery_response_policy_keep_exact_bank_operations_address_reply");
} }
if (inventoryMarginRankingAddressReply) { if (inventoryMarginRankingAddressReply) {
@ -736,7 +743,7 @@ function applyAssistantMcpDiscoveryResponsePolicy(input) {
!runtimeMatchedExactReply && !runtimeMatchedExactReply &&
!staleMetadataDiscoveryFallbackAgainstExactAddressReply && !staleMetadataDiscoveryFallbackAgainstExactAddressReply &&
!exactValueFlowReplyForBusinessOverviewDirectMoneyNeed && !exactValueFlowReplyForBusinessOverviewDirectMoneyNeed &&
!exactBankOperationsAddressReply && !exactBankOperationsProtectsCurrent &&
!inventoryMarginRankingAddressReply && !inventoryMarginRankingAddressReply &&
!(deterministicBroadBusinessEvaluationReply && candidate.candidate_status === "clarification_candidate") && !(deterministicBroadBusinessEvaluationReply && candidate.candidate_status === "clarification_candidate") &&
ALLOWED_CANDIDATE_STATUSES.has(candidate.candidate_status) && ALLOWED_CANDIDATE_STATUSES.has(candidate.candidate_status) &&

View File

@ -537,6 +537,9 @@ function hasValueFlowActionConflictWithDiscoveryTurnMeaning(
if (askedDomain !== "counterparty_value") { if (askedDomain !== "counterparty_value") {
return false; return false;
} }
if (askedAction === "net_value_flow") {
return true;
}
if (hasExactBankOperationsAddressReply(input, entryPoint)) { if (hasExactBankOperationsAddressReply(input, entryPoint)) {
return false; return false;
} }
@ -544,9 +547,6 @@ function hasValueFlowActionConflictWithDiscoveryTurnMeaning(
if (askedAction === "payout") { if (askedAction === "payout") {
return detectedIntent !== "supplier_payouts_profile"; return detectedIntent !== "supplier_payouts_profile";
} }
if (askedAction === "net_value_flow") {
return true;
}
return false; return false;
} }
@ -726,6 +726,10 @@ function hasSemanticConflictWithDiscoveryTurnMeaning(
return false; return false;
} }
if (hasExactBankOperationsAddressReply(input, entryPoint)) { if (hasExactBankOperationsAddressReply(input, entryPoint)) {
const askedAction = toNonEmptyString(readDiscoveryTurnMeaning(entryPoint)?.asked_action_family);
if (askedAction === "net_value_flow") {
return true;
}
return false; return false;
} }
const detectedIntent = toNonEmptyString(input.addressRuntimeMeta?.detected_intent); const detectedIntent = toNonEmptyString(input.addressRuntimeMeta?.detected_intent);
@ -870,6 +874,10 @@ export function applyAssistantMcpDiscoveryResponsePolicy(
input, input,
entryPoint entryPoint
); );
const exactBankOperationsProtectsCurrent =
exactBankOperationsAddressReply &&
!semanticConflictWithDiscoveryTurnMeaning &&
!valueFlowActionConflictWithDiscoveryTurnMeaning;
if (!entryPoint) { if (!entryPoint) {
pushReason(reasonCodes, "mcp_discovery_response_policy_no_entry_point"); pushReason(reasonCodes, "mcp_discovery_response_policy_no_entry_point");
@ -940,7 +948,7 @@ export function applyAssistantMcpDiscoveryResponsePolicy(
"mcp_discovery_response_policy_keep_exact_value_flow_reply_over_business_overview_direct_money_clarification" "mcp_discovery_response_policy_keep_exact_value_flow_reply_over_business_overview_direct_money_clarification"
); );
} }
if (exactBankOperationsAddressReply) { if (exactBankOperationsProtectsCurrent) {
pushReason(reasonCodes, "mcp_discovery_response_policy_keep_exact_bank_operations_address_reply"); pushReason(reasonCodes, "mcp_discovery_response_policy_keep_exact_bank_operations_address_reply");
} }
if (inventoryMarginRankingAddressReply) { if (inventoryMarginRankingAddressReply) {
@ -980,7 +988,7 @@ export function applyAssistantMcpDiscoveryResponsePolicy(
!runtimeMatchedExactReply && !runtimeMatchedExactReply &&
!staleMetadataDiscoveryFallbackAgainstExactAddressReply && !staleMetadataDiscoveryFallbackAgainstExactAddressReply &&
!exactValueFlowReplyForBusinessOverviewDirectMoneyNeed && !exactValueFlowReplyForBusinessOverviewDirectMoneyNeed &&
!exactBankOperationsAddressReply && !exactBankOperationsProtectsCurrent &&
!inventoryMarginRankingAddressReply && !inventoryMarginRankingAddressReply &&
!(deterministicBroadBusinessEvaluationReply && candidate.candidate_status === "clarification_candidate") && !(deterministicBroadBusinessEvaluationReply && candidate.candidate_status === "clarification_candidate") &&
ALLOWED_CANDIDATE_STATUSES.has(candidate.candidate_status) && ALLOWED_CANDIDATE_STATUSES.has(candidate.candidate_status) &&

View File

@ -700,6 +700,67 @@ describe("assistant MCP discovery response policy", () => {
expect(result.reason_codes).not.toContain("mcp_discovery_response_policy_semantic_conflict_allows_candidate_override"); expect(result.reason_codes).not.toContain("mcp_discovery_response_policy_semantic_conflict_allows_candidate_override");
}); });
it("overrides exact bank operation replies for explicit counterparty received-paid-net questions", () => {
const result = applyAssistantMcpDiscoveryResponsePolicy({
currentReply:
"Exact bank operation answer: found 13 bank operations for Group SVK, showing rows and payment purposes.",
currentReplySource: "address_query_runtime_v1",
currentReplyType: "factual",
addressRuntimeMeta: {
detected_intent: "bank_operations_by_counterparty",
selected_recipe: "address_bank_operations_by_counterparty_v1",
mcp_call_status: "matched_non_empty",
capability_route_mode: "exact",
answer_grounding_check: {
status: "grounded"
},
assistant_mcp_discovery_entry_point_v1: entryPoint({
turn_input: {
adapter_status: "ready",
should_run_discovery: true,
data_need_graph: {
business_fact_family: "value_flow",
subject_candidates: ["Group SVK"],
reason_codes: ["data_need_graph_built", "data_need_graph_bidirectional_value_flow"]
},
turn_meaning_ref: {
asked_domain_family: "counterparty_value",
asked_action_family: "net_value_flow",
explicit_entity_candidates: ["Group SVK"],
explicit_date_scope: "2020",
unsupported_but_understood_family: "counterparty_bidirectional_value_flow_or_netting"
}
},
bridge: {
bridge_status: "answer_draft_ready",
user_facing_response_allowed: true,
business_fact_answer_allowed: true,
requires_user_clarification: false,
answer_draft: {
answer_mode: "confirmed_with_bounded_inference",
headline: "Group SVK 2020: received 12 093 465 rub., paid 0 rub.; net 12 093 465 rub. toward us.",
confirmed_lines: ["received 12 093 465 rub.; paid 0 rub.; net 12 093 465 rub."],
inference_lines: [],
unknown_lines: [],
limitation_lines: [],
next_step_line: null
}
}
})
}
});
expect(result.applied).toBe(true);
expect(result.decision).toBe("apply_candidate");
expect(result.reply_source).toBe("mcp_discovery_response_candidate_guarded");
expect(result.reply_text).toContain("received 12 093 465");
expect(result.reply_text).toContain("paid 0");
expect(result.reply_text).toContain("net 12 093 465");
expect(result.reason_codes).toContain("mcp_discovery_response_policy_value_flow_action_conflict_allows_candidate_override");
expect(result.reason_codes).toContain("mcp_discovery_response_policy_semantic_conflict_allows_candidate_override");
expect(result.reason_codes).not.toContain("mcp_discovery_response_policy_keep_exact_bank_operations_address_reply");
});
it("overrides an exact ranking-shaped address reply when open-scope ranking still needs organization", () => { it("overrides an exact ranking-shaped address reply when open-scope ranking still needs organization", () => {
const result = applyAssistantMcpDiscoveryResponsePolicy({ const result = applyAssistantMcpDiscoveryResponsePolicy({
currentReply: currentReply:

View File

@ -1,4 +1,84 @@
[ [
{
"generation_id": "gen-ag05231858-323f86",
"created_at": "2026-05-23T18:58:48+00:00",
"mode": "saved_user_sessions",
"title": "AGENT | Autonomy business quality pack",
"count": 15,
"domain": "autonomy_business_quality",
"questions": [
"Сколько входящих денег за 2020 год по ООО Альтернатива Плюс без разреза по контрагентам?",
"Сколько исходящих денег за 2020 год по ООО Альтернатива Плюс без разреза по контрагентам?",
"А всего сколько денег пришло в ООО Альтернатива Плюс за 2020, без топов и без контрагентов?",
"Теперь дай взрослый обзор за 2020 по компании: входящие, исходящие, нетто, топы, но банк в топах отдельно объясни как финансовый поток.",
"скока денег альтернатива заработала за 20 год?",
"а это чистая прибыль?",
"Какая чистая прибыль по ООО Альтернатива Плюс за 2020?",
"А отдельно по СБЕРБАНКУ: он для нас клиент, поставщик или финансовый поток? Дай коротко по подтвержденным строкам.",
"какое нетто по деньгам с Группа СВК за 2020 год: сколько получили и сколько заплатили?",
"кому мы должны на конец 2020?",
"а нам кто должен на конец 2020?",
"сколько НДС надо заплатить в налоговую за декабрь 2019?",
"Как ты оценишь деятельность компании?",
"Какая номенклатура товара реализована с высокой прибылью какая с низкой?",
"май 2020"
],
"generated_by": "codex_agent",
"saved_case_set_file": "assistant_autogen_saved_user_sessions_20260523185848_gen-ag05231858-323f86.json",
"context": {
"llm_provider": null,
"model": null,
"assistant_prompt_version": null,
"decomposition_prompt_version": null,
"prompt_fingerprint": null,
"autogen_personality_id": null,
"autogen_personality_prompt": null,
"source_session_id": null,
"saved_session_file": "assistant_saved_session_20260523185848_gen-ag05231858-323f86.json",
"saved_case_set_kind": "agent_semantic_scenario",
"agent_run": true,
"agent_focus": "Expanded targeted AGENT replay for the autonomy milestone: value-flow, business overview, debts, VAT, profit/cashflow distinction, nomenclature margin boundary, and final answer quality must survive realistic business questions.",
"architecture_phase": "turnaround_11",
"source_spec_file": "X:\\1C\\NDC_1C\\docs\\orchestration\\agent_autonomy_business_quality_20260523.json",
"scenario_id": "agent_autonomy_business_quality_20260523",
"semantic_tags": [
"autonomy_core",
"bank_boundary",
"bank_classification",
"business_answer_quality",
"business_evaluation",
"business_overview",
"cashflow_overview",
"cashflow_vs_profit",
"clarification",
"colloquial_money",
"colloquial_total",
"counterparty_value_flow",
"debt_answer_quality",
"direct_profit",
"domain_purity",
"followup_context",
"incoming_total",
"limit_honesty",
"limited_answer",
"net_flow",
"next_action",
"no_profit_substitution",
"no_top_guard",
"nomenclature_margin",
"outgoing_total",
"payables",
"profit_vs_cashflow",
"receivables",
"technical_garbage_guard",
"value_flow",
"vat"
],
"validation_status": "accepted_live_replay",
"validated_run_dir": "artifacts\\domain_runs\\agent_autonomy_business_quality_live_semantic_gate_accepted_20260523",
"saved_after_validated_replay": true
}
},
{ {
"generation_id": "gen-ag05231427-70915a", "generation_id": "gen-ag05231427-70915a",
"created_at": "2026-05-23T14:27:55+00:00", "created_at": "2026-05-23T14:27:55+00:00",

View File

@ -0,0 +1,287 @@
{
"saved_at": "2026-05-23T18:58:48+00:00",
"generation_id": "gen-ag05231858-323f86",
"mode": "saved_user_sessions",
"title": "AGENT | Autonomy business quality pack",
"agent_run": true,
"questions": [
"Сколько входящих денег за 2020 год по ООО Альтернатива Плюс без разреза по контрагентам?",
"Сколько исходящих денег за 2020 год по ООО Альтернатива Плюс без разреза по контрагентам?",
"А всего сколько денег пришло в ООО Альтернатива Плюс за 2020, без топов и без контрагентов?",
"Теперь дай взрослый обзор за 2020 по компании: входящие, исходящие, нетто, топы, но банк в топах отдельно объясни как финансовый поток.",
"скока денег альтернатива заработала за 20 год?",
"а это чистая прибыль?",
"Какая чистая прибыль по ООО Альтернатива Плюс за 2020?",
"А отдельно по СБЕРБАНКУ: он для нас клиент, поставщик или финансовый поток? Дай коротко по подтвержденным строкам.",
"какое нетто по деньгам с Группа СВК за 2020 год: сколько получили и сколько заплатили?",
"кому мы должны на конец 2020?",
"а нам кто должен на конец 2020?",
"сколько НДС надо заплатить в налоговую за декабрь 2019?",
"Как ты оценишь деятельность компании?",
"Какая номенклатура товара реализована с высокой прибылью какая с низкой?",
"май 2020"
],
"metadata": {
"assistant_prompt_version": null,
"decomposition_prompt_version": null,
"prompt_fingerprint": null,
"agent_focus": "Expanded targeted AGENT replay for the autonomy milestone: value-flow, business overview, debts, VAT, profit/cashflow distinction, nomenclature margin boundary, and final answer quality must survive realistic business questions.",
"architecture_phase": "turnaround_11",
"source_spec_file": "X:\\1C\\NDC_1C\\docs\\orchestration\\agent_autonomy_business_quality_20260523.json",
"scenario_id": "agent_autonomy_business_quality_20260523",
"semantic_tags": [
"autonomy_core",
"bank_boundary",
"bank_classification",
"business_answer_quality",
"business_evaluation",
"business_overview",
"cashflow_overview",
"cashflow_vs_profit",
"clarification",
"colloquial_money",
"colloquial_total",
"counterparty_value_flow",
"debt_answer_quality",
"direct_profit",
"domain_purity",
"followup_context",
"incoming_total",
"limit_honesty",
"limited_answer",
"net_flow",
"next_action",
"no_profit_substitution",
"no_top_guard",
"nomenclature_margin",
"outgoing_total",
"payables",
"profit_vs_cashflow",
"receivables",
"technical_garbage_guard",
"value_flow",
"vat"
],
"validation_status": "accepted_live_replay",
"validated_run_dir": "artifacts\\domain_runs\\agent_autonomy_business_quality_live_semantic_gate_accepted_20260523",
"saved_after_validated_replay": true,
"save_gate": {
"schema_version": "agent_semantic_save_gate_v1",
"validation_status": "accepted_live_replay",
"validated_run_dir": "artifacts\\domain_runs\\agent_autonomy_business_quality_live_semantic_gate_accepted_20260523",
"final_status": "accepted",
"review_overall_status": "pass",
"business_overall_status": "pass",
"steps_total": 15,
"steps_passed": 15,
"steps_failed": 0,
"steps_with_business_failures": 0,
"steps_with_business_warnings": 0,
"acceptance_gate_passed": true,
"saved_after_validated_replay": true
}
},
"source_session_id": null,
"session": {
"session_id": null,
"mode": "agent_semantic_run",
"items": [
{
"message_id": "agent-user-001",
"role": "user",
"text": "Сколько входящих денег за 2020 год по ООО Альтернатива Плюс без разреза по контрагентам?",
"created_at": "2026-05-23T18:58:48+00:00",
"reply_type": null,
"trace_id": null,
"debug": null
},
{
"message_id": "agent-user-002",
"role": "user",
"text": "Сколько исходящих денег за 2020 год по ООО Альтернатива Плюс без разреза по контрагентам?",
"created_at": "2026-05-23T18:58:48+00:00",
"reply_type": null,
"trace_id": null,
"debug": null
},
{
"message_id": "agent-user-003",
"role": "user",
"text": "А всего сколько денег пришло в ООО Альтернатива Плюс за 2020, без топов и без контрагентов?",
"created_at": "2026-05-23T18:58:48+00:00",
"reply_type": null,
"trace_id": null,
"debug": null
},
{
"message_id": "agent-user-004",
"role": "user",
"text": "Теперь дай взрослый обзор за 2020 по компании: входящие, исходящие, нетто, топы, но банк в топах отдельно объясни как финансовый поток.",
"created_at": "2026-05-23T18:58:48+00:00",
"reply_type": null,
"trace_id": null,
"debug": null
},
{
"message_id": "agent-user-005",
"role": "user",
"text": "скока денег альтернатива заработала за 20 год?",
"created_at": "2026-05-23T18:58:48+00:00",
"reply_type": null,
"trace_id": null,
"debug": null
},
{
"message_id": "agent-user-006",
"role": "user",
"text": "а это чистая прибыль?",
"created_at": "2026-05-23T18:58:48+00:00",
"reply_type": null,
"trace_id": null,
"debug": null
},
{
"message_id": "agent-user-007",
"role": "user",
"text": "Какая чистая прибыль по ООО Альтернатива Плюс за 2020?",
"created_at": "2026-05-23T18:58:48+00:00",
"reply_type": null,
"trace_id": null,
"debug": null
},
{
"message_id": "agent-user-008",
"role": "user",
"text": "А отдельно по СБЕРБАНКУ: он для нас клиент, поставщик или финансовый поток? Дай коротко по подтвержденным строкам.",
"created_at": "2026-05-23T18:58:48+00:00",
"reply_type": null,
"trace_id": null,
"debug": null
},
{
"message_id": "agent-user-009",
"role": "user",
"text": "какое нетто по деньгам с Группа СВК за 2020 год: сколько получили и сколько заплатили?",
"created_at": "2026-05-23T18:58:48+00:00",
"reply_type": null,
"trace_id": null,
"debug": null
},
{
"message_id": "agent-user-010",
"role": "user",
"text": "кому мы должны на конец 2020?",
"created_at": "2026-05-23T18:58:48+00:00",
"reply_type": null,
"trace_id": null,
"debug": null
},
{
"message_id": "agent-user-011",
"role": "user",
"text": "а нам кто должен на конец 2020?",
"created_at": "2026-05-23T18:58:48+00:00",
"reply_type": null,
"trace_id": null,
"debug": null
},
{
"message_id": "agent-user-012",
"role": "user",
"text": "сколько НДС надо заплатить в налоговую за декабрь 2019?",
"created_at": "2026-05-23T18:58:48+00:00",
"reply_type": null,
"trace_id": null,
"debug": null
},
{
"message_id": "agent-user-013",
"role": "user",
"text": "Как ты оценишь деятельность компании?",
"created_at": "2026-05-23T18:58:48+00:00",
"reply_type": null,
"trace_id": null,
"debug": null
},
{
"message_id": "agent-user-014",
"role": "user",
"text": "Какая номенклатура товара реализована с высокой прибылью какая с низкой?",
"created_at": "2026-05-23T18:58:48+00:00",
"reply_type": null,
"trace_id": null,
"debug": null
},
{
"message_id": "agent-user-015",
"role": "user",
"text": "май 2020",
"created_at": "2026-05-23T18:58:48+00:00",
"reply_type": null,
"trace_id": null,
"debug": null
}
],
"agent_run": true,
"metadata": {
"assistant_prompt_version": null,
"decomposition_prompt_version": null,
"prompt_fingerprint": null,
"agent_focus": "Expanded targeted AGENT replay for the autonomy milestone: value-flow, business overview, debts, VAT, profit/cashflow distinction, nomenclature margin boundary, and final answer quality must survive realistic business questions.",
"architecture_phase": "turnaround_11",
"source_spec_file": "X:\\1C\\NDC_1C\\docs\\orchestration\\agent_autonomy_business_quality_20260523.json",
"scenario_id": "agent_autonomy_business_quality_20260523",
"semantic_tags": [
"autonomy_core",
"bank_boundary",
"bank_classification",
"business_answer_quality",
"business_evaluation",
"business_overview",
"cashflow_overview",
"cashflow_vs_profit",
"clarification",
"colloquial_money",
"colloquial_total",
"counterparty_value_flow",
"debt_answer_quality",
"direct_profit",
"domain_purity",
"followup_context",
"incoming_total",
"limit_honesty",
"limited_answer",
"net_flow",
"next_action",
"no_profit_substitution",
"no_top_guard",
"nomenclature_margin",
"outgoing_total",
"payables",
"profit_vs_cashflow",
"receivables",
"technical_garbage_guard",
"value_flow",
"vat"
],
"validation_status": "accepted_live_replay",
"validated_run_dir": "artifacts\\domain_runs\\agent_autonomy_business_quality_live_semantic_gate_accepted_20260523",
"saved_after_validated_replay": true,
"save_gate": {
"schema_version": "agent_semantic_save_gate_v1",
"validation_status": "accepted_live_replay",
"validated_run_dir": "artifacts\\domain_runs\\agent_autonomy_business_quality_live_semantic_gate_accepted_20260523",
"final_status": "accepted",
"review_overall_status": "pass",
"business_overall_status": "pass",
"steps_total": 15,
"steps_passed": 15,
"steps_failed": 0,
"steps_with_business_failures": 0,
"steps_with_business_warnings": 0,
"acceptance_gate_passed": true,
"saved_after_validated_replay": true
}
}
}
}

View File

@ -0,0 +1,70 @@
{
"suite_id": "assistant_saved_session_gen-ag05231858-323f86",
"suite_version": "0.1.0",
"schema_version": "assistant_saved_session_suite_v0_1",
"generated_at": "2026-05-23T18:58:48+00:00",
"generation_id": "gen-ag05231858-323f86",
"mode": "saved_user_sessions",
"title": "AGENT | Autonomy business quality pack",
"domain": "autonomy_business_quality",
"scenario_count": 1,
"case_ids": [
"SAVED-001"
],
"cases": [
{
"case_id": "SAVED-001",
"scenario_tag": "agent_saved_user_sessions",
"title": "AGENT | Autonomy business quality pack",
"question_type": "followup",
"broadness_level": "medium",
"turns": [
{
"user_message": "Сколько входящих денег за 2020 год по ООО Альтернатива Плюс без разреза по контрагентам?"
},
{
"user_message": "Сколько исходящих денег за 2020 год по ООО Альтернатива Плюс без разреза по контрагентам?"
},
{
"user_message": "А всего сколько денег пришло в ООО Альтернатива Плюс за 2020, без топов и без контрагентов?"
},
{
"user_message": "Теперь дай взрослый обзор за 2020 по компании: входящие, исходящие, нетто, топы, но банк в топах отдельно объясни как финансовый поток."
},
{
"user_message": "скока денег альтернатива заработала за 20 год?"
},
{
"user_message": "а это чистая прибыль?"
},
{
"user_message": "Какая чистая прибыль по ООО Альтернатива Плюс за 2020?"
},
{
"user_message": "А отдельно по СБЕРБАНКУ: он для нас клиент, поставщик или финансовый поток? Дай коротко по подтвержденным строкам."
},
{
"user_message": "какое нетто по деньгам с Группа СВК за 2020 год: сколько получили и сколько заплатили?"
},
{
"user_message": "кому мы должны на конец 2020?"
},
{
"user_message": "а нам кто должен на конец 2020?"
},
{
"user_message": "сколько НДС надо заплатить в налоговую за декабрь 2019?"
},
{
"user_message": "Как ты оценишь деятельность компании?"
},
{
"user_message": "Какая номенклатура товара реализована с высокой прибылью какая с низкой?"
},
{
"user_message": "май 2020"
}
]
}
]
}

View File

@ -118,6 +118,9 @@ BUSINESS_TOP_LINE_SCAFFOLD_MARKERS = (
"\u043e\u0433\u0440\u0430\u043d\u0438\u0447\u0435\u043d\u043d\u044b\u0439 \u0431\u0438\u0437\u043d\u0435\u0441-\u043e\u0431\u0437\u043e\u0440", "\u043e\u0433\u0440\u0430\u043d\u0438\u0447\u0435\u043d\u043d\u044b\u0439 \u0431\u0438\u0437\u043d\u0435\u0441-\u043e\u0431\u0437\u043e\u0440",
"\u0447\u0442\u043e \u043f\u043e\u0434\u0442\u0432\u0435\u0440\u0436\u0434\u0435\u043d\u043e", "\u0447\u0442\u043e \u043f\u043e\u0434\u0442\u0432\u0435\u0440\u0436\u0434\u0435\u043d\u043e",
"\u043f\u0440\u043e\u0432\u0435\u0440\u0435\u043d\u043d\u044b\u0435 \u043a\u043e\u043d\u0442\u0443\u0440\u044b", "\u043f\u0440\u043e\u0432\u0435\u0440\u0435\u043d\u043d\u044b\u0435 \u043a\u043e\u043d\u0442\u0443\u0440\u044b",
"\u043f\u043e \u0434\u0430\u043d\u043d\u044b\u043c 1\u0441 \u043d\u0430\u0439\u0434\u0435\u043d\u044b",
"\u043d\u0430\u0439\u0434\u0435\u043d\u044b \u0441\u0442\u0440\u043e\u043a\u0438",
"\u0441\u0443\u043c\u043c\u0443 \u043c\u043e\u0436\u043d\u043e \u043d\u0430\u0437\u044b\u0432\u0430\u0442\u044c \u0442\u043e\u043b\u044c\u043a\u043e",
"\u0431\u043b\u043e\u043a 1", "\u0431\u043b\u043e\u043a 1",
"\u0441\u0442\u0430\u0442\u0443\u0441", "\u0441\u0442\u0430\u0442\u0443\u0441",
) )

View File

@ -1156,8 +1156,15 @@ def build_business_review_summary(spec: dict[str, Any], scenario_state: dict[str
step_outputs = scenario_state.get("step_outputs") if isinstance(scenario_state.get("step_outputs"), dict) else {} step_outputs = scenario_state.get("step_outputs") if isinstance(scenario_state.get("step_outputs"), dict) else {}
steps: list[dict[str, Any]] = [] steps: list[dict[str, Any]] = []
issue_counts: dict[str, int] = {} issue_counts: dict[str, int] = {}
review_failures = 0
review_warnings = 0
for index, step in enumerate(spec["steps"], start=1): for index, step in enumerate(spec["steps"], start=1):
step_state = step_outputs.get(step["step_id"], {}) step_state = step_outputs.get(step["step_id"], {})
review_status = step_state.get("review_status") if isinstance(step_state, dict) else None
if review_status == "fail":
review_failures += 1
elif review_status == "warning":
review_warnings += 1
business_review = ( business_review = (
step_state.get("business_first_review") step_state.get("business_first_review")
if isinstance(step_state, dict) and isinstance(step_state.get("business_first_review"), dict) if isinstance(step_state, dict) and isinstance(step_state.get("business_first_review"), dict)
@ -1171,7 +1178,7 @@ def build_business_review_summary(spec: dict[str, Any], scenario_state: dict[str
"index": index, "index": index,
"step_id": step["step_id"], "step_id": step["step_id"],
"question": step["question_template"], "question": step["question_template"],
"review_status": step_state.get("review_status") if isinstance(step_state, dict) else None, "review_status": review_status,
"direct_answer": business_review.get("actual_direct_answer"), "direct_answer": business_review.get("actual_direct_answer"),
"answer_length_chars": business_review.get("answer_length_chars"), "answer_length_chars": business_review.get("answer_length_chars"),
"direct_answer_required": business_review.get("direct_answer_required"), "direct_answer_required": business_review.get("direct_answer_required"),
@ -1193,6 +1200,7 @@ def build_business_review_summary(spec: dict[str, Any], scenario_state: dict[str
) )
) )
warnings = sum(1 for step in steps if "business_answer_too_verbose" in step["issue_codes"]) warnings = sum(1 for step in steps if "business_answer_too_verbose" in step["issue_codes"])
semantic_status = "fail" if failed or review_failures else ("warning" if warnings or review_warnings else "pass")
return { return {
"schema_version": "business_first_run_review_v1", "schema_version": "business_first_run_review_v1",
"scenario_id": spec["scenario_id"], "scenario_id": spec["scenario_id"],
@ -1202,24 +1210,32 @@ def build_business_review_summary(spec: dict[str, Any], scenario_state: dict[str
"steps_total": len(steps), "steps_total": len(steps),
"steps_with_business_failures": failed, "steps_with_business_failures": failed,
"steps_with_business_warnings": warnings, "steps_with_business_warnings": warnings,
"steps_with_review_failures": review_failures,
"steps_with_review_warnings": review_warnings,
"issue_counts": issue_counts, "issue_counts": issue_counts,
"overall_business_status": "fail" if failed else ("warning" if warnings else "pass"), "overall_business_status": "fail" if failed else ("warning" if warnings else "pass"),
"overall_semantic_status": semantic_status,
"semantic_gate_passed": semantic_status == "pass",
"steps": steps, "steps": steps,
} }
def build_business_review_markdown(business_review: dict[str, Any]) -> str: def build_business_review_markdown(business_review: dict[str, Any], *, title: str = "Business-first review") -> str:
lines = [ lines = [
"# Business-first review", f"# {title}",
"", "",
f"- scenario_id: `{business_review.get('scenario_id') or 'n/a'}`", f"- scenario_id: `{business_review.get('scenario_id') or 'n/a'}`",
f"- domain: `{business_review.get('domain') or 'n/a'}`", f"- domain: `{business_review.get('domain') or 'n/a'}`",
f"- title: {business_review.get('title') or 'n/a'}", f"- title: {business_review.get('title') or 'n/a'}",
f"- session_id: `{business_review.get('session_id') or 'n/a'}`", f"- session_id: `{business_review.get('session_id') or 'n/a'}`",
f"- overall_business_status: `{business_review.get('overall_business_status') or 'n/a'}`", f"- overall_business_status: `{business_review.get('overall_business_status') or 'n/a'}`",
f"- overall_semantic_status: `{business_review.get('overall_semantic_status') or 'n/a'}`",
f"- semantic_gate_passed: `{business_review.get('semantic_gate_passed') is True}`",
f"- steps_total: `{business_review.get('steps_total')}`", f"- steps_total: `{business_review.get('steps_total')}`",
f"- steps_with_business_failures: `{business_review.get('steps_with_business_failures')}`", f"- steps_with_business_failures: `{business_review.get('steps_with_business_failures')}`",
f"- steps_with_business_warnings: `{business_review.get('steps_with_business_warnings')}`", f"- steps_with_business_warnings: `{business_review.get('steps_with_business_warnings')}`",
f"- steps_with_review_failures: `{business_review.get('steps_with_review_failures')}`",
f"- steps_with_review_warnings: `{business_review.get('steps_with_review_warnings')}`",
f"- issue_counts: `{dump_json(business_review.get('issue_counts') or {})}`", f"- issue_counts: `{dump_json(business_review.get('issue_counts') or {})}`",
"", "",
"## Human Answer Surface", "## Human Answer Surface",
@ -1479,6 +1495,8 @@ def review_export(spec: dict[str, Any], export_path: Path, output_dir: Path) ->
write_text(output_dir / "truth_review.md", review_markdown) write_text(output_dir / "truth_review.md", review_markdown)
write_json(output_dir / "business_review.json", business_review) write_json(output_dir / "business_review.json", business_review)
write_text(output_dir / "business_review.md", build_business_review_markdown(business_review)) write_text(output_dir / "business_review.md", build_business_review_markdown(business_review))
write_json(output_dir / "semantic_audit.json", business_review)
write_text(output_dir / "semantic_audit.md", build_business_review_markdown(business_review, title="Semantic audit"))
acceptance_bundle = write_acceptance_artifacts(output_dir, spec, scenario_state, review_summary) acceptance_bundle = write_acceptance_artifacts(output_dir, spec, scenario_state, review_summary)
return { return {
"scenario_state": scenario_state, "scenario_state": scenario_state,
@ -1575,6 +1593,8 @@ def run_live(spec: dict[str, Any], output_dir: Path, args: argparse.Namespace) -
write_text(output_dir / "truth_review.md", review_markdown) write_text(output_dir / "truth_review.md", review_markdown)
write_json(output_dir / "business_review.json", business_review) write_json(output_dir / "business_review.json", business_review)
write_text(output_dir / "business_review.md", build_business_review_markdown(business_review)) write_text(output_dir / "business_review.md", build_business_review_markdown(business_review))
write_json(output_dir / "semantic_audit.json", business_review)
write_text(output_dir / "semantic_audit.md", build_business_review_markdown(business_review, title="Semantic audit"))
acceptance_bundle = write_acceptance_artifacts(output_dir, spec, scenario_state, review_summary) acceptance_bundle = write_acceptance_artifacts(output_dir, spec, scenario_state, review_summary)
print(f"[truth-harness] saved artifacts to {output_dir}") print(f"[truth-harness] saved artifacts to {output_dir}")
print(f"[truth-harness] overall_status={review_summary['overall_status']}") print(f"[truth-harness] overall_status={review_summary['overall_status']}")

View File

@ -927,6 +927,91 @@ class DomainCaseLoopStepStateTests(unittest.TestCase):
self.assertIn("technical_garbage_in_answer", review["issue_codes"]) self.assertIn("technical_garbage_in_answer", review["issue_codes"])
self.assertNotIn("business_direct_answer_missing", review["issue_codes"]) self.assertNotIn("business_direct_answer_missing", review["issue_codes"])
def test_business_first_review_rejects_found_rows_scaffold_as_direct_answer(self) -> None:
question = "\u0441\u043a\u043e\u043b\u044c\u043a\u043e \u0432\u0445\u043e\u0434\u044f\u0449\u0438\u0445 \u0434\u0435\u043d\u0435\u0433 \u0437\u0430 2020"
step_state = dcl.build_scenario_step_state(
scenario_id="semantic_gate_demo",
domain="value_flow",
step={
"step_id": "step_01",
"title": "Direct money answer",
"depends_on": [],
"question_template": question,
},
step_index=1,
question_resolved=question,
analysis_context={},
turn_artifact={
"assistant_message": {
"reply_type": "factual",
"text": "\u041a\u043e\u0440\u043e\u0442\u043a\u043e: \u041f\u043e \u0434\u0430\u043d\u043d\u044b\u043c 1\u0421 \u043d\u0430\u0439\u0434\u0435\u043d\u044b \u0441\u0442\u0440\u043e\u043a\u0438 \u0432\u0445\u043e\u0434\u044f\u0449\u0438\u0445 \u0434\u0435\u043d\u0435\u0436\u043d\u044b\u0445 \u043f\u043e\u0441\u0442\u0443\u043f\u043b\u0435\u043d\u0438\u0439; \u0441\u0443\u043c\u043c\u0443 \u043c\u043e\u0436\u043d\u043e \u043d\u0430\u0437\u044b\u0432\u0430\u0442\u044c \u0442\u043e\u043b\u044c\u043a\u043e \u0432 \u0440\u0430\u043c\u043a\u0430\u0445 \u043f\u0440\u043e\u0432\u0435\u0440\u043a\u0438.\n\u0427\u0442\u043e \u043f\u043e\u0434\u0442\u0432\u0435\u0440\u0436\u0434\u0435\u043d\u043e: 47 628 853,03 \u0440\u0443\u0431.",
"message_id": "msg-1",
"trace_id": "trace-1",
},
"technical_debug_payload": {},
"session_summary": {},
},
entries=[],
)
review = step_state["business_first_review"]
self.assertFalse(review["direct_answer_first_ok"])
self.assertFalse(review["answer_layering_ok"])
self.assertIn("business_direct_answer_missing", review["issue_codes"])
self.assertIn("answer_layering_noise", review["issue_codes"])
def test_business_first_review_allows_direct_answer_with_evidence_boundary(self) -> None:
question = "\u043a\u0430\u043a\u043e\u0435 \u043d\u0435\u0442\u0442\u043e \u043f\u043e \u0434\u0435\u043d\u044c\u0433\u0430\u043c \u0441 \u0413\u0440\u0443\u043f\u043f\u0430 \u0421\u0412\u041a \u0437\u0430 2020"
step_state = dcl.build_scenario_step_state(
scenario_id="semantic_gate_demo",
domain="value_flow",
step={
"step_id": "step_01",
"title": "Counterparty net flow",
"depends_on": [],
"question_template": question,
},
step_index=1,
question_resolved=question,
analysis_context={},
turn_artifact={
"assistant_message": {
"reply_type": "factual_with_explanation",
"text": "\u041a\u043e\u0440\u043e\u0442\u043a\u043e: \u043f\u043e \u043a\u043e\u043d\u0442\u0440\u0430\u0433\u0435\u043d\u0442\u0443 \u0413\u0440\u0443\u043f\u043f\u0430 \u0421\u0412\u041a \u0437\u0430 2020 \u043f\u043e \u043d\u0430\u0439\u0434\u0435\u043d\u043d\u044b\u043c \u0441\u0442\u0440\u043e\u043a\u0430\u043c 1\u0421 \u043f\u043e\u043b\u0443\u0447\u0438\u043b\u0438 12 093 465 \u0440\u0443\u0431., \u0437\u0430\u043f\u043b\u0430\u0442\u0438\u043b\u0438 0 \u0440\u0443\u0431.; \u043d\u0435\u0442\u0442\u043e \u0432 \u043d\u0430\u0448\u0443 \u0441\u0442\u043e\u0440\u043e\u043d\u0443 12 093 465 \u0440\u0443\u0431.",
"message_id": "msg-1",
"trace_id": "trace-1",
},
"technical_debug_payload": {},
"session_summary": {},
},
entries=[],
)
review = step_state["business_first_review"]
self.assertTrue(review["direct_answer_first_ok"])
self.assertTrue(review["answer_layering_ok"])
self.assertEqual(review["issue_codes"], [])
def test_semantic_audit_markdown_exposes_semantic_gate(self) -> None:
markdown = dth.build_business_review_markdown(
{
"scenario_id": "semantic_gate_demo",
"domain": "value_flow",
"title": "Semantic gate demo",
"session_id": "session-1",
"overall_business_status": "fail",
"steps_total": 1,
"steps_with_business_failures": 1,
"steps_with_business_warnings": 0,
"issue_counts": {"answer_layering_noise": 1},
"steps": [],
},
title="Semantic audit",
)
self.assertIn("# Semantic audit", markdown)
self.assertIn("semantic_gate_passed: `False`", markdown)
def test_truth_harness_promotes_business_review_issues_to_findings(self) -> None: def test_truth_harness_promotes_business_review_issues_to_findings(self) -> None:
step_state = dcl.build_scenario_step_state( step_state = dcl.build_scenario_step_state(
scenario_id="business_surface_demo", scenario_id="business_surface_demo",