Усилить семантический AGENT gate для 1С
This commit is contained in:
parent
75e0e2c66e
commit
a4dd9c7c66
|
|
@ -381,6 +381,9 @@ function hasValueFlowActionConflictWithDiscoveryTurnMeaning(input, entryPoint) {
|
||||||
if (askedDomain !== "counterparty_value") {
|
if (askedDomain !== "counterparty_value") {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
if (askedAction === "net_value_flow") {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
if (hasExactBankOperationsAddressReply(input, entryPoint)) {
|
if (hasExactBankOperationsAddressReply(input, entryPoint)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
@ -388,9 +391,6 @@ function hasValueFlowActionConflictWithDiscoveryTurnMeaning(input, entryPoint) {
|
||||||
if (askedAction === "payout") {
|
if (askedAction === "payout") {
|
||||||
return detectedIntent !== "supplier_payouts_profile";
|
return detectedIntent !== "supplier_payouts_profile";
|
||||||
}
|
}
|
||||||
if (askedAction === "net_value_flow") {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
function hasEvidenceLaneConflictWithDiscoveryTurnMeaning(input, entryPoint) {
|
function hasEvidenceLaneConflictWithDiscoveryTurnMeaning(input, entryPoint) {
|
||||||
|
|
@ -534,6 +534,10 @@ function hasSemanticConflictWithDiscoveryTurnMeaning(input, entryPoint) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (hasExactBankOperationsAddressReply(input, entryPoint)) {
|
if (hasExactBankOperationsAddressReply(input, entryPoint)) {
|
||||||
|
const askedAction = toNonEmptyString(readDiscoveryTurnMeaning(entryPoint)?.asked_action_family);
|
||||||
|
if (askedAction === "net_value_flow") {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
const detectedIntent = toNonEmptyString(input.addressRuntimeMeta?.detected_intent);
|
const detectedIntent = toNonEmptyString(input.addressRuntimeMeta?.detected_intent);
|
||||||
|
|
@ -644,6 +648,9 @@ function applyAssistantMcpDiscoveryResponsePolicy(input) {
|
||||||
const metadataDiscoveryPriority = hasMetadataDiscoveryPriority(input, entryPoint);
|
const metadataDiscoveryPriority = hasMetadataDiscoveryPriority(input, entryPoint);
|
||||||
const valueFlowActionConflictWithDiscoveryTurnMeaning = hasValueFlowActionConflictWithDiscoveryTurnMeaning(input, entryPoint);
|
const valueFlowActionConflictWithDiscoveryTurnMeaning = hasValueFlowActionConflictWithDiscoveryTurnMeaning(input, entryPoint);
|
||||||
const evidenceLaneConflictWithDiscoveryTurnMeaning = hasEvidenceLaneConflictWithDiscoveryTurnMeaning(input, entryPoint);
|
const evidenceLaneConflictWithDiscoveryTurnMeaning = hasEvidenceLaneConflictWithDiscoveryTurnMeaning(input, entryPoint);
|
||||||
|
const exactBankOperationsProtectsCurrent = exactBankOperationsAddressReply &&
|
||||||
|
!semanticConflictWithDiscoveryTurnMeaning &&
|
||||||
|
!valueFlowActionConflictWithDiscoveryTurnMeaning;
|
||||||
if (!entryPoint) {
|
if (!entryPoint) {
|
||||||
pushReason(reasonCodes, "mcp_discovery_response_policy_no_entry_point");
|
pushReason(reasonCodes, "mcp_discovery_response_policy_no_entry_point");
|
||||||
}
|
}
|
||||||
|
|
@ -701,7 +708,7 @@ function applyAssistantMcpDiscoveryResponsePolicy(input) {
|
||||||
if (exactValueFlowReplyForBusinessOverviewDirectMoneyNeed) {
|
if (exactValueFlowReplyForBusinessOverviewDirectMoneyNeed) {
|
||||||
pushReason(reasonCodes, "mcp_discovery_response_policy_keep_exact_value_flow_reply_over_business_overview_direct_money_clarification");
|
pushReason(reasonCodes, "mcp_discovery_response_policy_keep_exact_value_flow_reply_over_business_overview_direct_money_clarification");
|
||||||
}
|
}
|
||||||
if (exactBankOperationsAddressReply) {
|
if (exactBankOperationsProtectsCurrent) {
|
||||||
pushReason(reasonCodes, "mcp_discovery_response_policy_keep_exact_bank_operations_address_reply");
|
pushReason(reasonCodes, "mcp_discovery_response_policy_keep_exact_bank_operations_address_reply");
|
||||||
}
|
}
|
||||||
if (inventoryMarginRankingAddressReply) {
|
if (inventoryMarginRankingAddressReply) {
|
||||||
|
|
@ -736,7 +743,7 @@ function applyAssistantMcpDiscoveryResponsePolicy(input) {
|
||||||
!runtimeMatchedExactReply &&
|
!runtimeMatchedExactReply &&
|
||||||
!staleMetadataDiscoveryFallbackAgainstExactAddressReply &&
|
!staleMetadataDiscoveryFallbackAgainstExactAddressReply &&
|
||||||
!exactValueFlowReplyForBusinessOverviewDirectMoneyNeed &&
|
!exactValueFlowReplyForBusinessOverviewDirectMoneyNeed &&
|
||||||
!exactBankOperationsAddressReply &&
|
!exactBankOperationsProtectsCurrent &&
|
||||||
!inventoryMarginRankingAddressReply &&
|
!inventoryMarginRankingAddressReply &&
|
||||||
!(deterministicBroadBusinessEvaluationReply && candidate.candidate_status === "clarification_candidate") &&
|
!(deterministicBroadBusinessEvaluationReply && candidate.candidate_status === "clarification_candidate") &&
|
||||||
ALLOWED_CANDIDATE_STATUSES.has(candidate.candidate_status) &&
|
ALLOWED_CANDIDATE_STATUSES.has(candidate.candidate_status) &&
|
||||||
|
|
|
||||||
|
|
@ -537,6 +537,9 @@ function hasValueFlowActionConflictWithDiscoveryTurnMeaning(
|
||||||
if (askedDomain !== "counterparty_value") {
|
if (askedDomain !== "counterparty_value") {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
if (askedAction === "net_value_flow") {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
if (hasExactBankOperationsAddressReply(input, entryPoint)) {
|
if (hasExactBankOperationsAddressReply(input, entryPoint)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
@ -544,9 +547,6 @@ function hasValueFlowActionConflictWithDiscoveryTurnMeaning(
|
||||||
if (askedAction === "payout") {
|
if (askedAction === "payout") {
|
||||||
return detectedIntent !== "supplier_payouts_profile";
|
return detectedIntent !== "supplier_payouts_profile";
|
||||||
}
|
}
|
||||||
if (askedAction === "net_value_flow") {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -726,6 +726,10 @@ function hasSemanticConflictWithDiscoveryTurnMeaning(
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (hasExactBankOperationsAddressReply(input, entryPoint)) {
|
if (hasExactBankOperationsAddressReply(input, entryPoint)) {
|
||||||
|
const askedAction = toNonEmptyString(readDiscoveryTurnMeaning(entryPoint)?.asked_action_family);
|
||||||
|
if (askedAction === "net_value_flow") {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
const detectedIntent = toNonEmptyString(input.addressRuntimeMeta?.detected_intent);
|
const detectedIntent = toNonEmptyString(input.addressRuntimeMeta?.detected_intent);
|
||||||
|
|
@ -870,6 +874,10 @@ export function applyAssistantMcpDiscoveryResponsePolicy(
|
||||||
input,
|
input,
|
||||||
entryPoint
|
entryPoint
|
||||||
);
|
);
|
||||||
|
const exactBankOperationsProtectsCurrent =
|
||||||
|
exactBankOperationsAddressReply &&
|
||||||
|
!semanticConflictWithDiscoveryTurnMeaning &&
|
||||||
|
!valueFlowActionConflictWithDiscoveryTurnMeaning;
|
||||||
|
|
||||||
if (!entryPoint) {
|
if (!entryPoint) {
|
||||||
pushReason(reasonCodes, "mcp_discovery_response_policy_no_entry_point");
|
pushReason(reasonCodes, "mcp_discovery_response_policy_no_entry_point");
|
||||||
|
|
@ -940,7 +948,7 @@ export function applyAssistantMcpDiscoveryResponsePolicy(
|
||||||
"mcp_discovery_response_policy_keep_exact_value_flow_reply_over_business_overview_direct_money_clarification"
|
"mcp_discovery_response_policy_keep_exact_value_flow_reply_over_business_overview_direct_money_clarification"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
if (exactBankOperationsAddressReply) {
|
if (exactBankOperationsProtectsCurrent) {
|
||||||
pushReason(reasonCodes, "mcp_discovery_response_policy_keep_exact_bank_operations_address_reply");
|
pushReason(reasonCodes, "mcp_discovery_response_policy_keep_exact_bank_operations_address_reply");
|
||||||
}
|
}
|
||||||
if (inventoryMarginRankingAddressReply) {
|
if (inventoryMarginRankingAddressReply) {
|
||||||
|
|
@ -980,7 +988,7 @@ export function applyAssistantMcpDiscoveryResponsePolicy(
|
||||||
!runtimeMatchedExactReply &&
|
!runtimeMatchedExactReply &&
|
||||||
!staleMetadataDiscoveryFallbackAgainstExactAddressReply &&
|
!staleMetadataDiscoveryFallbackAgainstExactAddressReply &&
|
||||||
!exactValueFlowReplyForBusinessOverviewDirectMoneyNeed &&
|
!exactValueFlowReplyForBusinessOverviewDirectMoneyNeed &&
|
||||||
!exactBankOperationsAddressReply &&
|
!exactBankOperationsProtectsCurrent &&
|
||||||
!inventoryMarginRankingAddressReply &&
|
!inventoryMarginRankingAddressReply &&
|
||||||
!(deterministicBroadBusinessEvaluationReply && candidate.candidate_status === "clarification_candidate") &&
|
!(deterministicBroadBusinessEvaluationReply && candidate.candidate_status === "clarification_candidate") &&
|
||||||
ALLOWED_CANDIDATE_STATUSES.has(candidate.candidate_status) &&
|
ALLOWED_CANDIDATE_STATUSES.has(candidate.candidate_status) &&
|
||||||
|
|
|
||||||
|
|
@ -700,6 +700,67 @@ describe("assistant MCP discovery response policy", () => {
|
||||||
expect(result.reason_codes).not.toContain("mcp_discovery_response_policy_semantic_conflict_allows_candidate_override");
|
expect(result.reason_codes).not.toContain("mcp_discovery_response_policy_semantic_conflict_allows_candidate_override");
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("overrides exact bank operation replies for explicit counterparty received-paid-net questions", () => {
|
||||||
|
const result = applyAssistantMcpDiscoveryResponsePolicy({
|
||||||
|
currentReply:
|
||||||
|
"Exact bank operation answer: found 13 bank operations for Group SVK, showing rows and payment purposes.",
|
||||||
|
currentReplySource: "address_query_runtime_v1",
|
||||||
|
currentReplyType: "factual",
|
||||||
|
addressRuntimeMeta: {
|
||||||
|
detected_intent: "bank_operations_by_counterparty",
|
||||||
|
selected_recipe: "address_bank_operations_by_counterparty_v1",
|
||||||
|
mcp_call_status: "matched_non_empty",
|
||||||
|
capability_route_mode: "exact",
|
||||||
|
answer_grounding_check: {
|
||||||
|
status: "grounded"
|
||||||
|
},
|
||||||
|
assistant_mcp_discovery_entry_point_v1: entryPoint({
|
||||||
|
turn_input: {
|
||||||
|
adapter_status: "ready",
|
||||||
|
should_run_discovery: true,
|
||||||
|
data_need_graph: {
|
||||||
|
business_fact_family: "value_flow",
|
||||||
|
subject_candidates: ["Group SVK"],
|
||||||
|
reason_codes: ["data_need_graph_built", "data_need_graph_bidirectional_value_flow"]
|
||||||
|
},
|
||||||
|
turn_meaning_ref: {
|
||||||
|
asked_domain_family: "counterparty_value",
|
||||||
|
asked_action_family: "net_value_flow",
|
||||||
|
explicit_entity_candidates: ["Group SVK"],
|
||||||
|
explicit_date_scope: "2020",
|
||||||
|
unsupported_but_understood_family: "counterparty_bidirectional_value_flow_or_netting"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
bridge: {
|
||||||
|
bridge_status: "answer_draft_ready",
|
||||||
|
user_facing_response_allowed: true,
|
||||||
|
business_fact_answer_allowed: true,
|
||||||
|
requires_user_clarification: false,
|
||||||
|
answer_draft: {
|
||||||
|
answer_mode: "confirmed_with_bounded_inference",
|
||||||
|
headline: "Group SVK 2020: received 12 093 465 rub., paid 0 rub.; net 12 093 465 rub. toward us.",
|
||||||
|
confirmed_lines: ["received 12 093 465 rub.; paid 0 rub.; net 12 093 465 rub."],
|
||||||
|
inference_lines: [],
|
||||||
|
unknown_lines: [],
|
||||||
|
limitation_lines: [],
|
||||||
|
next_step_line: null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
expect(result.applied).toBe(true);
|
||||||
|
expect(result.decision).toBe("apply_candidate");
|
||||||
|
expect(result.reply_source).toBe("mcp_discovery_response_candidate_guarded");
|
||||||
|
expect(result.reply_text).toContain("received 12 093 465");
|
||||||
|
expect(result.reply_text).toContain("paid 0");
|
||||||
|
expect(result.reply_text).toContain("net 12 093 465");
|
||||||
|
expect(result.reason_codes).toContain("mcp_discovery_response_policy_value_flow_action_conflict_allows_candidate_override");
|
||||||
|
expect(result.reason_codes).toContain("mcp_discovery_response_policy_semantic_conflict_allows_candidate_override");
|
||||||
|
expect(result.reason_codes).not.toContain("mcp_discovery_response_policy_keep_exact_bank_operations_address_reply");
|
||||||
|
});
|
||||||
|
|
||||||
it("overrides an exact ranking-shaped address reply when open-scope ranking still needs organization", () => {
|
it("overrides an exact ranking-shaped address reply when open-scope ranking still needs organization", () => {
|
||||||
const result = applyAssistantMcpDiscoveryResponsePolicy({
|
const result = applyAssistantMcpDiscoveryResponsePolicy({
|
||||||
currentReply:
|
currentReply:
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,84 @@
|
||||||
[
|
[
|
||||||
|
{
|
||||||
|
"generation_id": "gen-ag05231858-323f86",
|
||||||
|
"created_at": "2026-05-23T18:58:48+00:00",
|
||||||
|
"mode": "saved_user_sessions",
|
||||||
|
"title": "AGENT | Autonomy business quality pack",
|
||||||
|
"count": 15,
|
||||||
|
"domain": "autonomy_business_quality",
|
||||||
|
"questions": [
|
||||||
|
"Сколько входящих денег за 2020 год по ООО Альтернатива Плюс без разреза по контрагентам?",
|
||||||
|
"Сколько исходящих денег за 2020 год по ООО Альтернатива Плюс без разреза по контрагентам?",
|
||||||
|
"А всего сколько денег пришло в ООО Альтернатива Плюс за 2020, без топов и без контрагентов?",
|
||||||
|
"Теперь дай взрослый обзор за 2020 по компании: входящие, исходящие, нетто, топы, но банк в топах отдельно объясни как финансовый поток.",
|
||||||
|
"скока денег альтернатива заработала за 20 год?",
|
||||||
|
"а это чистая прибыль?",
|
||||||
|
"Какая чистая прибыль по ООО Альтернатива Плюс за 2020?",
|
||||||
|
"А отдельно по СБЕРБАНКУ: он для нас клиент, поставщик или финансовый поток? Дай коротко по подтвержденным строкам.",
|
||||||
|
"какое нетто по деньгам с Группа СВК за 2020 год: сколько получили и сколько заплатили?",
|
||||||
|
"кому мы должны на конец 2020?",
|
||||||
|
"а нам кто должен на конец 2020?",
|
||||||
|
"сколько НДС надо заплатить в налоговую за декабрь 2019?",
|
||||||
|
"Как ты оценишь деятельность компании?",
|
||||||
|
"Какая номенклатура товара реализована с высокой прибылью какая с низкой?",
|
||||||
|
"май 2020"
|
||||||
|
],
|
||||||
|
"generated_by": "codex_agent",
|
||||||
|
"saved_case_set_file": "assistant_autogen_saved_user_sessions_20260523185848_gen-ag05231858-323f86.json",
|
||||||
|
"context": {
|
||||||
|
"llm_provider": null,
|
||||||
|
"model": null,
|
||||||
|
"assistant_prompt_version": null,
|
||||||
|
"decomposition_prompt_version": null,
|
||||||
|
"prompt_fingerprint": null,
|
||||||
|
"autogen_personality_id": null,
|
||||||
|
"autogen_personality_prompt": null,
|
||||||
|
"source_session_id": null,
|
||||||
|
"saved_session_file": "assistant_saved_session_20260523185848_gen-ag05231858-323f86.json",
|
||||||
|
"saved_case_set_kind": "agent_semantic_scenario",
|
||||||
|
"agent_run": true,
|
||||||
|
"agent_focus": "Expanded targeted AGENT replay for the autonomy milestone: value-flow, business overview, debts, VAT, profit/cashflow distinction, nomenclature margin boundary, and final answer quality must survive realistic business questions.",
|
||||||
|
"architecture_phase": "turnaround_11",
|
||||||
|
"source_spec_file": "X:\\1C\\NDC_1C\\docs\\orchestration\\agent_autonomy_business_quality_20260523.json",
|
||||||
|
"scenario_id": "agent_autonomy_business_quality_20260523",
|
||||||
|
"semantic_tags": [
|
||||||
|
"autonomy_core",
|
||||||
|
"bank_boundary",
|
||||||
|
"bank_classification",
|
||||||
|
"business_answer_quality",
|
||||||
|
"business_evaluation",
|
||||||
|
"business_overview",
|
||||||
|
"cashflow_overview",
|
||||||
|
"cashflow_vs_profit",
|
||||||
|
"clarification",
|
||||||
|
"colloquial_money",
|
||||||
|
"colloquial_total",
|
||||||
|
"counterparty_value_flow",
|
||||||
|
"debt_answer_quality",
|
||||||
|
"direct_profit",
|
||||||
|
"domain_purity",
|
||||||
|
"followup_context",
|
||||||
|
"incoming_total",
|
||||||
|
"limit_honesty",
|
||||||
|
"limited_answer",
|
||||||
|
"net_flow",
|
||||||
|
"next_action",
|
||||||
|
"no_profit_substitution",
|
||||||
|
"no_top_guard",
|
||||||
|
"nomenclature_margin",
|
||||||
|
"outgoing_total",
|
||||||
|
"payables",
|
||||||
|
"profit_vs_cashflow",
|
||||||
|
"receivables",
|
||||||
|
"technical_garbage_guard",
|
||||||
|
"value_flow",
|
||||||
|
"vat"
|
||||||
|
],
|
||||||
|
"validation_status": "accepted_live_replay",
|
||||||
|
"validated_run_dir": "artifacts\\domain_runs\\agent_autonomy_business_quality_live_semantic_gate_accepted_20260523",
|
||||||
|
"saved_after_validated_replay": true
|
||||||
|
}
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"generation_id": "gen-ag05231427-70915a",
|
"generation_id": "gen-ag05231427-70915a",
|
||||||
"created_at": "2026-05-23T14:27:55+00:00",
|
"created_at": "2026-05-23T14:27:55+00:00",
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,287 @@
|
||||||
|
{
|
||||||
|
"saved_at": "2026-05-23T18:58:48+00:00",
|
||||||
|
"generation_id": "gen-ag05231858-323f86",
|
||||||
|
"mode": "saved_user_sessions",
|
||||||
|
"title": "AGENT | Autonomy business quality pack",
|
||||||
|
"agent_run": true,
|
||||||
|
"questions": [
|
||||||
|
"Сколько входящих денег за 2020 год по ООО Альтернатива Плюс без разреза по контрагентам?",
|
||||||
|
"Сколько исходящих денег за 2020 год по ООО Альтернатива Плюс без разреза по контрагентам?",
|
||||||
|
"А всего сколько денег пришло в ООО Альтернатива Плюс за 2020, без топов и без контрагентов?",
|
||||||
|
"Теперь дай взрослый обзор за 2020 по компании: входящие, исходящие, нетто, топы, но банк в топах отдельно объясни как финансовый поток.",
|
||||||
|
"скока денег альтернатива заработала за 20 год?",
|
||||||
|
"а это чистая прибыль?",
|
||||||
|
"Какая чистая прибыль по ООО Альтернатива Плюс за 2020?",
|
||||||
|
"А отдельно по СБЕРБАНКУ: он для нас клиент, поставщик или финансовый поток? Дай коротко по подтвержденным строкам.",
|
||||||
|
"какое нетто по деньгам с Группа СВК за 2020 год: сколько получили и сколько заплатили?",
|
||||||
|
"кому мы должны на конец 2020?",
|
||||||
|
"а нам кто должен на конец 2020?",
|
||||||
|
"сколько НДС надо заплатить в налоговую за декабрь 2019?",
|
||||||
|
"Как ты оценишь деятельность компании?",
|
||||||
|
"Какая номенклатура товара реализована с высокой прибылью какая с низкой?",
|
||||||
|
"май 2020"
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"assistant_prompt_version": null,
|
||||||
|
"decomposition_prompt_version": null,
|
||||||
|
"prompt_fingerprint": null,
|
||||||
|
"agent_focus": "Expanded targeted AGENT replay for the autonomy milestone: value-flow, business overview, debts, VAT, profit/cashflow distinction, nomenclature margin boundary, and final answer quality must survive realistic business questions.",
|
||||||
|
"architecture_phase": "turnaround_11",
|
||||||
|
"source_spec_file": "X:\\1C\\NDC_1C\\docs\\orchestration\\agent_autonomy_business_quality_20260523.json",
|
||||||
|
"scenario_id": "agent_autonomy_business_quality_20260523",
|
||||||
|
"semantic_tags": [
|
||||||
|
"autonomy_core",
|
||||||
|
"bank_boundary",
|
||||||
|
"bank_classification",
|
||||||
|
"business_answer_quality",
|
||||||
|
"business_evaluation",
|
||||||
|
"business_overview",
|
||||||
|
"cashflow_overview",
|
||||||
|
"cashflow_vs_profit",
|
||||||
|
"clarification",
|
||||||
|
"colloquial_money",
|
||||||
|
"colloquial_total",
|
||||||
|
"counterparty_value_flow",
|
||||||
|
"debt_answer_quality",
|
||||||
|
"direct_profit",
|
||||||
|
"domain_purity",
|
||||||
|
"followup_context",
|
||||||
|
"incoming_total",
|
||||||
|
"limit_honesty",
|
||||||
|
"limited_answer",
|
||||||
|
"net_flow",
|
||||||
|
"next_action",
|
||||||
|
"no_profit_substitution",
|
||||||
|
"no_top_guard",
|
||||||
|
"nomenclature_margin",
|
||||||
|
"outgoing_total",
|
||||||
|
"payables",
|
||||||
|
"profit_vs_cashflow",
|
||||||
|
"receivables",
|
||||||
|
"technical_garbage_guard",
|
||||||
|
"value_flow",
|
||||||
|
"vat"
|
||||||
|
],
|
||||||
|
"validation_status": "accepted_live_replay",
|
||||||
|
"validated_run_dir": "artifacts\\domain_runs\\agent_autonomy_business_quality_live_semantic_gate_accepted_20260523",
|
||||||
|
"saved_after_validated_replay": true,
|
||||||
|
"save_gate": {
|
||||||
|
"schema_version": "agent_semantic_save_gate_v1",
|
||||||
|
"validation_status": "accepted_live_replay",
|
||||||
|
"validated_run_dir": "artifacts\\domain_runs\\agent_autonomy_business_quality_live_semantic_gate_accepted_20260523",
|
||||||
|
"final_status": "accepted",
|
||||||
|
"review_overall_status": "pass",
|
||||||
|
"business_overall_status": "pass",
|
||||||
|
"steps_total": 15,
|
||||||
|
"steps_passed": 15,
|
||||||
|
"steps_failed": 0,
|
||||||
|
"steps_with_business_failures": 0,
|
||||||
|
"steps_with_business_warnings": 0,
|
||||||
|
"acceptance_gate_passed": true,
|
||||||
|
"saved_after_validated_replay": true
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"source_session_id": null,
|
||||||
|
"session": {
|
||||||
|
"session_id": null,
|
||||||
|
"mode": "agent_semantic_run",
|
||||||
|
"items": [
|
||||||
|
{
|
||||||
|
"message_id": "agent-user-001",
|
||||||
|
"role": "user",
|
||||||
|
"text": "Сколько входящих денег за 2020 год по ООО Альтернатива Плюс без разреза по контрагентам?",
|
||||||
|
"created_at": "2026-05-23T18:58:48+00:00",
|
||||||
|
"reply_type": null,
|
||||||
|
"trace_id": null,
|
||||||
|
"debug": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"message_id": "agent-user-002",
|
||||||
|
"role": "user",
|
||||||
|
"text": "Сколько исходящих денег за 2020 год по ООО Альтернатива Плюс без разреза по контрагентам?",
|
||||||
|
"created_at": "2026-05-23T18:58:48+00:00",
|
||||||
|
"reply_type": null,
|
||||||
|
"trace_id": null,
|
||||||
|
"debug": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"message_id": "agent-user-003",
|
||||||
|
"role": "user",
|
||||||
|
"text": "А всего сколько денег пришло в ООО Альтернатива Плюс за 2020, без топов и без контрагентов?",
|
||||||
|
"created_at": "2026-05-23T18:58:48+00:00",
|
||||||
|
"reply_type": null,
|
||||||
|
"trace_id": null,
|
||||||
|
"debug": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"message_id": "agent-user-004",
|
||||||
|
"role": "user",
|
||||||
|
"text": "Теперь дай взрослый обзор за 2020 по компании: входящие, исходящие, нетто, топы, но банк в топах отдельно объясни как финансовый поток.",
|
||||||
|
"created_at": "2026-05-23T18:58:48+00:00",
|
||||||
|
"reply_type": null,
|
||||||
|
"trace_id": null,
|
||||||
|
"debug": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"message_id": "agent-user-005",
|
||||||
|
"role": "user",
|
||||||
|
"text": "скока денег альтернатива заработала за 20 год?",
|
||||||
|
"created_at": "2026-05-23T18:58:48+00:00",
|
||||||
|
"reply_type": null,
|
||||||
|
"trace_id": null,
|
||||||
|
"debug": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"message_id": "agent-user-006",
|
||||||
|
"role": "user",
|
||||||
|
"text": "а это чистая прибыль?",
|
||||||
|
"created_at": "2026-05-23T18:58:48+00:00",
|
||||||
|
"reply_type": null,
|
||||||
|
"trace_id": null,
|
||||||
|
"debug": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"message_id": "agent-user-007",
|
||||||
|
"role": "user",
|
||||||
|
"text": "Какая чистая прибыль по ООО Альтернатива Плюс за 2020?",
|
||||||
|
"created_at": "2026-05-23T18:58:48+00:00",
|
||||||
|
"reply_type": null,
|
||||||
|
"trace_id": null,
|
||||||
|
"debug": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"message_id": "agent-user-008",
|
||||||
|
"role": "user",
|
||||||
|
"text": "А отдельно по СБЕРБАНКУ: он для нас клиент, поставщик или финансовый поток? Дай коротко по подтвержденным строкам.",
|
||||||
|
"created_at": "2026-05-23T18:58:48+00:00",
|
||||||
|
"reply_type": null,
|
||||||
|
"trace_id": null,
|
||||||
|
"debug": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"message_id": "agent-user-009",
|
||||||
|
"role": "user",
|
||||||
|
"text": "какое нетто по деньгам с Группа СВК за 2020 год: сколько получили и сколько заплатили?",
|
||||||
|
"created_at": "2026-05-23T18:58:48+00:00",
|
||||||
|
"reply_type": null,
|
||||||
|
"trace_id": null,
|
||||||
|
"debug": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"message_id": "agent-user-010",
|
||||||
|
"role": "user",
|
||||||
|
"text": "кому мы должны на конец 2020?",
|
||||||
|
"created_at": "2026-05-23T18:58:48+00:00",
|
||||||
|
"reply_type": null,
|
||||||
|
"trace_id": null,
|
||||||
|
"debug": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"message_id": "agent-user-011",
|
||||||
|
"role": "user",
|
||||||
|
"text": "а нам кто должен на конец 2020?",
|
||||||
|
"created_at": "2026-05-23T18:58:48+00:00",
|
||||||
|
"reply_type": null,
|
||||||
|
"trace_id": null,
|
||||||
|
"debug": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"message_id": "agent-user-012",
|
||||||
|
"role": "user",
|
||||||
|
"text": "сколько НДС надо заплатить в налоговую за декабрь 2019?",
|
||||||
|
"created_at": "2026-05-23T18:58:48+00:00",
|
||||||
|
"reply_type": null,
|
||||||
|
"trace_id": null,
|
||||||
|
"debug": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"message_id": "agent-user-013",
|
||||||
|
"role": "user",
|
||||||
|
"text": "Как ты оценишь деятельность компании?",
|
||||||
|
"created_at": "2026-05-23T18:58:48+00:00",
|
||||||
|
"reply_type": null,
|
||||||
|
"trace_id": null,
|
||||||
|
"debug": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"message_id": "agent-user-014",
|
||||||
|
"role": "user",
|
||||||
|
"text": "Какая номенклатура товара реализована с высокой прибылью какая с низкой?",
|
||||||
|
"created_at": "2026-05-23T18:58:48+00:00",
|
||||||
|
"reply_type": null,
|
||||||
|
"trace_id": null,
|
||||||
|
"debug": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"message_id": "agent-user-015",
|
||||||
|
"role": "user",
|
||||||
|
"text": "май 2020",
|
||||||
|
"created_at": "2026-05-23T18:58:48+00:00",
|
||||||
|
"reply_type": null,
|
||||||
|
"trace_id": null,
|
||||||
|
"debug": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"agent_run": true,
|
||||||
|
"metadata": {
|
||||||
|
"assistant_prompt_version": null,
|
||||||
|
"decomposition_prompt_version": null,
|
||||||
|
"prompt_fingerprint": null,
|
||||||
|
"agent_focus": "Expanded targeted AGENT replay for the autonomy milestone: value-flow, business overview, debts, VAT, profit/cashflow distinction, nomenclature margin boundary, and final answer quality must survive realistic business questions.",
|
||||||
|
"architecture_phase": "turnaround_11",
|
||||||
|
"source_spec_file": "X:\\1C\\NDC_1C\\docs\\orchestration\\agent_autonomy_business_quality_20260523.json",
|
||||||
|
"scenario_id": "agent_autonomy_business_quality_20260523",
|
||||||
|
"semantic_tags": [
|
||||||
|
"autonomy_core",
|
||||||
|
"bank_boundary",
|
||||||
|
"bank_classification",
|
||||||
|
"business_answer_quality",
|
||||||
|
"business_evaluation",
|
||||||
|
"business_overview",
|
||||||
|
"cashflow_overview",
|
||||||
|
"cashflow_vs_profit",
|
||||||
|
"clarification",
|
||||||
|
"colloquial_money",
|
||||||
|
"colloquial_total",
|
||||||
|
"counterparty_value_flow",
|
||||||
|
"debt_answer_quality",
|
||||||
|
"direct_profit",
|
||||||
|
"domain_purity",
|
||||||
|
"followup_context",
|
||||||
|
"incoming_total",
|
||||||
|
"limit_honesty",
|
||||||
|
"limited_answer",
|
||||||
|
"net_flow",
|
||||||
|
"next_action",
|
||||||
|
"no_profit_substitution",
|
||||||
|
"no_top_guard",
|
||||||
|
"nomenclature_margin",
|
||||||
|
"outgoing_total",
|
||||||
|
"payables",
|
||||||
|
"profit_vs_cashflow",
|
||||||
|
"receivables",
|
||||||
|
"technical_garbage_guard",
|
||||||
|
"value_flow",
|
||||||
|
"vat"
|
||||||
|
],
|
||||||
|
"validation_status": "accepted_live_replay",
|
||||||
|
"validated_run_dir": "artifacts\\domain_runs\\agent_autonomy_business_quality_live_semantic_gate_accepted_20260523",
|
||||||
|
"saved_after_validated_replay": true,
|
||||||
|
"save_gate": {
|
||||||
|
"schema_version": "agent_semantic_save_gate_v1",
|
||||||
|
"validation_status": "accepted_live_replay",
|
||||||
|
"validated_run_dir": "artifacts\\domain_runs\\agent_autonomy_business_quality_live_semantic_gate_accepted_20260523",
|
||||||
|
"final_status": "accepted",
|
||||||
|
"review_overall_status": "pass",
|
||||||
|
"business_overall_status": "pass",
|
||||||
|
"steps_total": 15,
|
||||||
|
"steps_passed": 15,
|
||||||
|
"steps_failed": 0,
|
||||||
|
"steps_with_business_failures": 0,
|
||||||
|
"steps_with_business_warnings": 0,
|
||||||
|
"acceptance_gate_passed": true,
|
||||||
|
"saved_after_validated_replay": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,70 @@
|
||||||
|
{
|
||||||
|
"suite_id": "assistant_saved_session_gen-ag05231858-323f86",
|
||||||
|
"suite_version": "0.1.0",
|
||||||
|
"schema_version": "assistant_saved_session_suite_v0_1",
|
||||||
|
"generated_at": "2026-05-23T18:58:48+00:00",
|
||||||
|
"generation_id": "gen-ag05231858-323f86",
|
||||||
|
"mode": "saved_user_sessions",
|
||||||
|
"title": "AGENT | Autonomy business quality pack",
|
||||||
|
"domain": "autonomy_business_quality",
|
||||||
|
"scenario_count": 1,
|
||||||
|
"case_ids": [
|
||||||
|
"SAVED-001"
|
||||||
|
],
|
||||||
|
"cases": [
|
||||||
|
{
|
||||||
|
"case_id": "SAVED-001",
|
||||||
|
"scenario_tag": "agent_saved_user_sessions",
|
||||||
|
"title": "AGENT | Autonomy business quality pack",
|
||||||
|
"question_type": "followup",
|
||||||
|
"broadness_level": "medium",
|
||||||
|
"turns": [
|
||||||
|
{
|
||||||
|
"user_message": "Сколько входящих денег за 2020 год по ООО Альтернатива Плюс без разреза по контрагентам?"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"user_message": "Сколько исходящих денег за 2020 год по ООО Альтернатива Плюс без разреза по контрагентам?"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"user_message": "А всего сколько денег пришло в ООО Альтернатива Плюс за 2020, без топов и без контрагентов?"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"user_message": "Теперь дай взрослый обзор за 2020 по компании: входящие, исходящие, нетто, топы, но банк в топах отдельно объясни как финансовый поток."
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"user_message": "скока денег альтернатива заработала за 20 год?"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"user_message": "а это чистая прибыль?"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"user_message": "Какая чистая прибыль по ООО Альтернатива Плюс за 2020?"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"user_message": "А отдельно по СБЕРБАНКУ: он для нас клиент, поставщик или финансовый поток? Дай коротко по подтвержденным строкам."
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"user_message": "какое нетто по деньгам с Группа СВК за 2020 год: сколько получили и сколько заплатили?"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"user_message": "кому мы должны на конец 2020?"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"user_message": "а нам кто должен на конец 2020?"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"user_message": "сколько НДС надо заплатить в налоговую за декабрь 2019?"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"user_message": "Как ты оценишь деятельность компании?"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"user_message": "Какая номенклатура товара реализована с высокой прибылью какая с низкой?"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"user_message": "май 2020"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
@ -118,6 +118,9 @@ BUSINESS_TOP_LINE_SCAFFOLD_MARKERS = (
|
||||||
"\u043e\u0433\u0440\u0430\u043d\u0438\u0447\u0435\u043d\u043d\u044b\u0439 \u0431\u0438\u0437\u043d\u0435\u0441-\u043e\u0431\u0437\u043e\u0440",
|
"\u043e\u0433\u0440\u0430\u043d\u0438\u0447\u0435\u043d\u043d\u044b\u0439 \u0431\u0438\u0437\u043d\u0435\u0441-\u043e\u0431\u0437\u043e\u0440",
|
||||||
"\u0447\u0442\u043e \u043f\u043e\u0434\u0442\u0432\u0435\u0440\u0436\u0434\u0435\u043d\u043e",
|
"\u0447\u0442\u043e \u043f\u043e\u0434\u0442\u0432\u0435\u0440\u0436\u0434\u0435\u043d\u043e",
|
||||||
"\u043f\u0440\u043e\u0432\u0435\u0440\u0435\u043d\u043d\u044b\u0435 \u043a\u043e\u043d\u0442\u0443\u0440\u044b",
|
"\u043f\u0440\u043e\u0432\u0435\u0440\u0435\u043d\u043d\u044b\u0435 \u043a\u043e\u043d\u0442\u0443\u0440\u044b",
|
||||||
|
"\u043f\u043e \u0434\u0430\u043d\u043d\u044b\u043c 1\u0441 \u043d\u0430\u0439\u0434\u0435\u043d\u044b",
|
||||||
|
"\u043d\u0430\u0439\u0434\u0435\u043d\u044b \u0441\u0442\u0440\u043e\u043a\u0438",
|
||||||
|
"\u0441\u0443\u043c\u043c\u0443 \u043c\u043e\u0436\u043d\u043e \u043d\u0430\u0437\u044b\u0432\u0430\u0442\u044c \u0442\u043e\u043b\u044c\u043a\u043e",
|
||||||
"\u0431\u043b\u043e\u043a 1",
|
"\u0431\u043b\u043e\u043a 1",
|
||||||
"\u0441\u0442\u0430\u0442\u0443\u0441",
|
"\u0441\u0442\u0430\u0442\u0443\u0441",
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -1156,8 +1156,15 @@ def build_business_review_summary(spec: dict[str, Any], scenario_state: dict[str
|
||||||
step_outputs = scenario_state.get("step_outputs") if isinstance(scenario_state.get("step_outputs"), dict) else {}
|
step_outputs = scenario_state.get("step_outputs") if isinstance(scenario_state.get("step_outputs"), dict) else {}
|
||||||
steps: list[dict[str, Any]] = []
|
steps: list[dict[str, Any]] = []
|
||||||
issue_counts: dict[str, int] = {}
|
issue_counts: dict[str, int] = {}
|
||||||
|
review_failures = 0
|
||||||
|
review_warnings = 0
|
||||||
for index, step in enumerate(spec["steps"], start=1):
|
for index, step in enumerate(spec["steps"], start=1):
|
||||||
step_state = step_outputs.get(step["step_id"], {})
|
step_state = step_outputs.get(step["step_id"], {})
|
||||||
|
review_status = step_state.get("review_status") if isinstance(step_state, dict) else None
|
||||||
|
if review_status == "fail":
|
||||||
|
review_failures += 1
|
||||||
|
elif review_status == "warning":
|
||||||
|
review_warnings += 1
|
||||||
business_review = (
|
business_review = (
|
||||||
step_state.get("business_first_review")
|
step_state.get("business_first_review")
|
||||||
if isinstance(step_state, dict) and isinstance(step_state.get("business_first_review"), dict)
|
if isinstance(step_state, dict) and isinstance(step_state.get("business_first_review"), dict)
|
||||||
|
|
@ -1171,7 +1178,7 @@ def build_business_review_summary(spec: dict[str, Any], scenario_state: dict[str
|
||||||
"index": index,
|
"index": index,
|
||||||
"step_id": step["step_id"],
|
"step_id": step["step_id"],
|
||||||
"question": step["question_template"],
|
"question": step["question_template"],
|
||||||
"review_status": step_state.get("review_status") if isinstance(step_state, dict) else None,
|
"review_status": review_status,
|
||||||
"direct_answer": business_review.get("actual_direct_answer"),
|
"direct_answer": business_review.get("actual_direct_answer"),
|
||||||
"answer_length_chars": business_review.get("answer_length_chars"),
|
"answer_length_chars": business_review.get("answer_length_chars"),
|
||||||
"direct_answer_required": business_review.get("direct_answer_required"),
|
"direct_answer_required": business_review.get("direct_answer_required"),
|
||||||
|
|
@ -1193,6 +1200,7 @@ def build_business_review_summary(spec: dict[str, Any], scenario_state: dict[str
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
warnings = sum(1 for step in steps if "business_answer_too_verbose" in step["issue_codes"])
|
warnings = sum(1 for step in steps if "business_answer_too_verbose" in step["issue_codes"])
|
||||||
|
semantic_status = "fail" if failed or review_failures else ("warning" if warnings or review_warnings else "pass")
|
||||||
return {
|
return {
|
||||||
"schema_version": "business_first_run_review_v1",
|
"schema_version": "business_first_run_review_v1",
|
||||||
"scenario_id": spec["scenario_id"],
|
"scenario_id": spec["scenario_id"],
|
||||||
|
|
@ -1202,24 +1210,32 @@ def build_business_review_summary(spec: dict[str, Any], scenario_state: dict[str
|
||||||
"steps_total": len(steps),
|
"steps_total": len(steps),
|
||||||
"steps_with_business_failures": failed,
|
"steps_with_business_failures": failed,
|
||||||
"steps_with_business_warnings": warnings,
|
"steps_with_business_warnings": warnings,
|
||||||
|
"steps_with_review_failures": review_failures,
|
||||||
|
"steps_with_review_warnings": review_warnings,
|
||||||
"issue_counts": issue_counts,
|
"issue_counts": issue_counts,
|
||||||
"overall_business_status": "fail" if failed else ("warning" if warnings else "pass"),
|
"overall_business_status": "fail" if failed else ("warning" if warnings else "pass"),
|
||||||
|
"overall_semantic_status": semantic_status,
|
||||||
|
"semantic_gate_passed": semantic_status == "pass",
|
||||||
"steps": steps,
|
"steps": steps,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def build_business_review_markdown(business_review: dict[str, Any]) -> str:
|
def build_business_review_markdown(business_review: dict[str, Any], *, title: str = "Business-first review") -> str:
|
||||||
lines = [
|
lines = [
|
||||||
"# Business-first review",
|
f"# {title}",
|
||||||
"",
|
"",
|
||||||
f"- scenario_id: `{business_review.get('scenario_id') or 'n/a'}`",
|
f"- scenario_id: `{business_review.get('scenario_id') or 'n/a'}`",
|
||||||
f"- domain: `{business_review.get('domain') or 'n/a'}`",
|
f"- domain: `{business_review.get('domain') or 'n/a'}`",
|
||||||
f"- title: {business_review.get('title') or 'n/a'}",
|
f"- title: {business_review.get('title') or 'n/a'}",
|
||||||
f"- session_id: `{business_review.get('session_id') or 'n/a'}`",
|
f"- session_id: `{business_review.get('session_id') or 'n/a'}`",
|
||||||
f"- overall_business_status: `{business_review.get('overall_business_status') or 'n/a'}`",
|
f"- overall_business_status: `{business_review.get('overall_business_status') or 'n/a'}`",
|
||||||
|
f"- overall_semantic_status: `{business_review.get('overall_semantic_status') or 'n/a'}`",
|
||||||
|
f"- semantic_gate_passed: `{business_review.get('semantic_gate_passed') is True}`",
|
||||||
f"- steps_total: `{business_review.get('steps_total')}`",
|
f"- steps_total: `{business_review.get('steps_total')}`",
|
||||||
f"- steps_with_business_failures: `{business_review.get('steps_with_business_failures')}`",
|
f"- steps_with_business_failures: `{business_review.get('steps_with_business_failures')}`",
|
||||||
f"- steps_with_business_warnings: `{business_review.get('steps_with_business_warnings')}`",
|
f"- steps_with_business_warnings: `{business_review.get('steps_with_business_warnings')}`",
|
||||||
|
f"- steps_with_review_failures: `{business_review.get('steps_with_review_failures')}`",
|
||||||
|
f"- steps_with_review_warnings: `{business_review.get('steps_with_review_warnings')}`",
|
||||||
f"- issue_counts: `{dump_json(business_review.get('issue_counts') or {})}`",
|
f"- issue_counts: `{dump_json(business_review.get('issue_counts') or {})}`",
|
||||||
"",
|
"",
|
||||||
"## Human Answer Surface",
|
"## Human Answer Surface",
|
||||||
|
|
@ -1479,6 +1495,8 @@ def review_export(spec: dict[str, Any], export_path: Path, output_dir: Path) ->
|
||||||
write_text(output_dir / "truth_review.md", review_markdown)
|
write_text(output_dir / "truth_review.md", review_markdown)
|
||||||
write_json(output_dir / "business_review.json", business_review)
|
write_json(output_dir / "business_review.json", business_review)
|
||||||
write_text(output_dir / "business_review.md", build_business_review_markdown(business_review))
|
write_text(output_dir / "business_review.md", build_business_review_markdown(business_review))
|
||||||
|
write_json(output_dir / "semantic_audit.json", business_review)
|
||||||
|
write_text(output_dir / "semantic_audit.md", build_business_review_markdown(business_review, title="Semantic audit"))
|
||||||
acceptance_bundle = write_acceptance_artifacts(output_dir, spec, scenario_state, review_summary)
|
acceptance_bundle = write_acceptance_artifacts(output_dir, spec, scenario_state, review_summary)
|
||||||
return {
|
return {
|
||||||
"scenario_state": scenario_state,
|
"scenario_state": scenario_state,
|
||||||
|
|
@ -1575,6 +1593,8 @@ def run_live(spec: dict[str, Any], output_dir: Path, args: argparse.Namespace) -
|
||||||
write_text(output_dir / "truth_review.md", review_markdown)
|
write_text(output_dir / "truth_review.md", review_markdown)
|
||||||
write_json(output_dir / "business_review.json", business_review)
|
write_json(output_dir / "business_review.json", business_review)
|
||||||
write_text(output_dir / "business_review.md", build_business_review_markdown(business_review))
|
write_text(output_dir / "business_review.md", build_business_review_markdown(business_review))
|
||||||
|
write_json(output_dir / "semantic_audit.json", business_review)
|
||||||
|
write_text(output_dir / "semantic_audit.md", build_business_review_markdown(business_review, title="Semantic audit"))
|
||||||
acceptance_bundle = write_acceptance_artifacts(output_dir, spec, scenario_state, review_summary)
|
acceptance_bundle = write_acceptance_artifacts(output_dir, spec, scenario_state, review_summary)
|
||||||
print(f"[truth-harness] saved artifacts to {output_dir}")
|
print(f"[truth-harness] saved artifacts to {output_dir}")
|
||||||
print(f"[truth-harness] overall_status={review_summary['overall_status']}")
|
print(f"[truth-harness] overall_status={review_summary['overall_status']}")
|
||||||
|
|
|
||||||
|
|
@ -927,6 +927,91 @@ class DomainCaseLoopStepStateTests(unittest.TestCase):
|
||||||
self.assertIn("technical_garbage_in_answer", review["issue_codes"])
|
self.assertIn("technical_garbage_in_answer", review["issue_codes"])
|
||||||
self.assertNotIn("business_direct_answer_missing", review["issue_codes"])
|
self.assertNotIn("business_direct_answer_missing", review["issue_codes"])
|
||||||
|
|
||||||
|
def test_business_first_review_rejects_found_rows_scaffold_as_direct_answer(self) -> None:
|
||||||
|
question = "\u0441\u043a\u043e\u043b\u044c\u043a\u043e \u0432\u0445\u043e\u0434\u044f\u0449\u0438\u0445 \u0434\u0435\u043d\u0435\u0433 \u0437\u0430 2020"
|
||||||
|
step_state = dcl.build_scenario_step_state(
|
||||||
|
scenario_id="semantic_gate_demo",
|
||||||
|
domain="value_flow",
|
||||||
|
step={
|
||||||
|
"step_id": "step_01",
|
||||||
|
"title": "Direct money answer",
|
||||||
|
"depends_on": [],
|
||||||
|
"question_template": question,
|
||||||
|
},
|
||||||
|
step_index=1,
|
||||||
|
question_resolved=question,
|
||||||
|
analysis_context={},
|
||||||
|
turn_artifact={
|
||||||
|
"assistant_message": {
|
||||||
|
"reply_type": "factual",
|
||||||
|
"text": "\u041a\u043e\u0440\u043e\u0442\u043a\u043e: \u041f\u043e \u0434\u0430\u043d\u043d\u044b\u043c 1\u0421 \u043d\u0430\u0439\u0434\u0435\u043d\u044b \u0441\u0442\u0440\u043e\u043a\u0438 \u0432\u0445\u043e\u0434\u044f\u0449\u0438\u0445 \u0434\u0435\u043d\u0435\u0436\u043d\u044b\u0445 \u043f\u043e\u0441\u0442\u0443\u043f\u043b\u0435\u043d\u0438\u0439; \u0441\u0443\u043c\u043c\u0443 \u043c\u043e\u0436\u043d\u043e \u043d\u0430\u0437\u044b\u0432\u0430\u0442\u044c \u0442\u043e\u043b\u044c\u043a\u043e \u0432 \u0440\u0430\u043c\u043a\u0430\u0445 \u043f\u0440\u043e\u0432\u0435\u0440\u043a\u0438.\n\u0427\u0442\u043e \u043f\u043e\u0434\u0442\u0432\u0435\u0440\u0436\u0434\u0435\u043d\u043e: 47 628 853,03 \u0440\u0443\u0431.",
|
||||||
|
"message_id": "msg-1",
|
||||||
|
"trace_id": "trace-1",
|
||||||
|
},
|
||||||
|
"technical_debug_payload": {},
|
||||||
|
"session_summary": {},
|
||||||
|
},
|
||||||
|
entries=[],
|
||||||
|
)
|
||||||
|
|
||||||
|
review = step_state["business_first_review"]
|
||||||
|
self.assertFalse(review["direct_answer_first_ok"])
|
||||||
|
self.assertFalse(review["answer_layering_ok"])
|
||||||
|
self.assertIn("business_direct_answer_missing", review["issue_codes"])
|
||||||
|
self.assertIn("answer_layering_noise", review["issue_codes"])
|
||||||
|
|
||||||
|
def test_business_first_review_allows_direct_answer_with_evidence_boundary(self) -> None:
|
||||||
|
question = "\u043a\u0430\u043a\u043e\u0435 \u043d\u0435\u0442\u0442\u043e \u043f\u043e \u0434\u0435\u043d\u044c\u0433\u0430\u043c \u0441 \u0413\u0440\u0443\u043f\u043f\u0430 \u0421\u0412\u041a \u0437\u0430 2020"
|
||||||
|
step_state = dcl.build_scenario_step_state(
|
||||||
|
scenario_id="semantic_gate_demo",
|
||||||
|
domain="value_flow",
|
||||||
|
step={
|
||||||
|
"step_id": "step_01",
|
||||||
|
"title": "Counterparty net flow",
|
||||||
|
"depends_on": [],
|
||||||
|
"question_template": question,
|
||||||
|
},
|
||||||
|
step_index=1,
|
||||||
|
question_resolved=question,
|
||||||
|
analysis_context={},
|
||||||
|
turn_artifact={
|
||||||
|
"assistant_message": {
|
||||||
|
"reply_type": "factual_with_explanation",
|
||||||
|
"text": "\u041a\u043e\u0440\u043e\u0442\u043a\u043e: \u043f\u043e \u043a\u043e\u043d\u0442\u0440\u0430\u0433\u0435\u043d\u0442\u0443 \u0413\u0440\u0443\u043f\u043f\u0430 \u0421\u0412\u041a \u0437\u0430 2020 \u043f\u043e \u043d\u0430\u0439\u0434\u0435\u043d\u043d\u044b\u043c \u0441\u0442\u0440\u043e\u043a\u0430\u043c 1\u0421 \u043f\u043e\u043b\u0443\u0447\u0438\u043b\u0438 12 093 465 \u0440\u0443\u0431., \u0437\u0430\u043f\u043b\u0430\u0442\u0438\u043b\u0438 0 \u0440\u0443\u0431.; \u043d\u0435\u0442\u0442\u043e \u0432 \u043d\u0430\u0448\u0443 \u0441\u0442\u043e\u0440\u043e\u043d\u0443 12 093 465 \u0440\u0443\u0431.",
|
||||||
|
"message_id": "msg-1",
|
||||||
|
"trace_id": "trace-1",
|
||||||
|
},
|
||||||
|
"technical_debug_payload": {},
|
||||||
|
"session_summary": {},
|
||||||
|
},
|
||||||
|
entries=[],
|
||||||
|
)
|
||||||
|
|
||||||
|
review = step_state["business_first_review"]
|
||||||
|
self.assertTrue(review["direct_answer_first_ok"])
|
||||||
|
self.assertTrue(review["answer_layering_ok"])
|
||||||
|
self.assertEqual(review["issue_codes"], [])
|
||||||
|
|
||||||
|
def test_semantic_audit_markdown_exposes_semantic_gate(self) -> None:
|
||||||
|
markdown = dth.build_business_review_markdown(
|
||||||
|
{
|
||||||
|
"scenario_id": "semantic_gate_demo",
|
||||||
|
"domain": "value_flow",
|
||||||
|
"title": "Semantic gate demo",
|
||||||
|
"session_id": "session-1",
|
||||||
|
"overall_business_status": "fail",
|
||||||
|
"steps_total": 1,
|
||||||
|
"steps_with_business_failures": 1,
|
||||||
|
"steps_with_business_warnings": 0,
|
||||||
|
"issue_counts": {"answer_layering_noise": 1},
|
||||||
|
"steps": [],
|
||||||
|
},
|
||||||
|
title="Semantic audit",
|
||||||
|
)
|
||||||
|
|
||||||
|
self.assertIn("# Semantic audit", markdown)
|
||||||
|
self.assertIn("semantic_gate_passed: `False`", markdown)
|
||||||
|
|
||||||
def test_truth_harness_promotes_business_review_issues_to_findings(self) -> None:
|
def test_truth_harness_promotes_business_review_issues_to_findings(self) -> None:
|
||||||
step_state = dcl.build_scenario_step_state(
|
step_state = dcl.build_scenario_step_state(
|
||||||
scenario_id="business_surface_demo",
|
scenario_id="business_surface_demo",
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue