NODEDC_1C/llm_normalizer/backend/dist/services/addressQueryShapeClassifier.js

144 lines
4.6 KiB
JavaScript

"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.classifyAddressQueryShape = classifyAddressQueryShape;
const EXPLAIN_PATTERNS = [
/why/iu,
/because/iu,
/root cause/iu,
/prove/iu,
/mechanism/iu,
/\u043f\u043e\u0447\u0435\u043c\u0443/iu,
/\u043f\u0440\u0438\u0447\u0438\u043d/iu,
/\u043e\u0448\u0438\u0431\u043a/iu,
/\u0434\u043e\u043a\u0430\u0436/iu,
/\u043c\u0435\u0445\u0430\u043d\u0438\u0437\u043c/iu
];
const VERIFY_PATTERNS = [
/check/iu,
/verify/iu,
/is there/iu,
/was there/iu,
/\u043f\u0440\u043e\u0432\u0435\u0440/iu,
/\u0435\u0441\u0442\u044c\s+\u043b\u0438/iu,
/\u0431\u044b\u043b\u0438\s+\u043b\u0438/iu
];
const DRILLDOWN_PATTERNS = [
/drilldown/iu,
/breakdown/iu,
/forming balance/iu,
/\u0444\u043e\u0440\u043c\u0438\u0440\u0443\u044e\u0442/iu,
/\u0440\u0430\u0441\u043a\u0440\u043e\u0439/iu,
/\u0438\u0437\s+\u0447\u0435\u0433\u043e/iu
];
const DOCUMENT_PATTERNS = [
/document/iu,
/invoice/iu,
/payment/iu,
/bank operation/iu,
/\u0434\u043e\u043a\u0443\u043c\u0435\u043d\u0442/iu,
/\u043f\u043b\u0430\u0442\u0435\u0436/iu,
/\u043f\u043e\u0441\u0442\u0443\u043f\u043b\u0435\u043d/iu,
/\u0440\u0435\u0430\u043b\u0438\u0437\u0430\u0446/iu,
/\u0432\u044b\u043f\u0438\u0441\u043a/iu
];
const AGGREGATE_PATTERNS = [
/who owes us/iu,
/who we owe/iu,
/balance/iu,
/receivable/iu,
/payable/iu,
/total/iu,
/\u043a\u0442\u043e\s+\u0434\u043e\u043b\u0436\u0435\u043d/iu,
/\u043a\u043e\u043c\u0443\s+\u0434\u043e\u043b\u0436\u043d\u044b/iu,
/\u043e\u0441\u0442\u0430\u0442\u043e\u043a/iu,
/\u0441\u0430\u043b\u044c\u0434\u043e/iu,
/\u043e\u0431\u043e\u0440\u043e\u0442/iu,
/\u0434\u043e\u043b\u0433/iu,
/\u0437\u0430\u0434\u043e\u043b\u0436/iu
];
const OBJECT_PATTERNS = [
/by counterparty/iu,
/by contract/iu,
/counterparty/iu,
/contract/iu,
/\u043f\u043e\s+\u043a\u043e\u043d\u0442\u0440\u0430\u0433\u0435\u043d\u0442/iu,
/\u043f\u043e\s+\u0434\u043e\u0433\u043e\u0432\u043e\u0440/iu,
/\u043a\u043e\u043d\u0442\u0440\u0430\u0433\u0435\u043d\u0442/iu,
/\u0434\u043e\u0433\u043e\u0432\u043e\u0440/iu
];
function hasAnyPattern(text, patterns) {
return patterns.some((pattern) => pattern.test(text));
}
function hasCompoundSignal(text) {
const hasJoin = /(?:\s+\u0438\s+|\sand\s|;|,)/iu.test(text);
const hasActionVerb = /(?:\u043a\u0442\u043e|\u043a\u043e\u043c\u0443|\u043f\u043e\u043a\u0430\u0436\u0438|\u043d\u0430\u0439\u0434\u0438|\u043f\u0440\u043e\u0432\u0435\u0440\u044c|who|show|find|list|check|verify)/iu.test(text);
if (hasJoin && hasActionVerb) {
return true;
}
return /(?:\u043e\u0442\u0434\u0435\u043b\u044c\u043d\u043e|\u0438\s+\u0435\u0449\u0435|\u0438\s+\u0442\u0430\u043a\u0436\u0435|and also|then)/iu.test(text);
}
function classifyAddressQueryShape(userMessage) {
const text = String(userMessage ?? "").trim().toLowerCase();
if (!text) {
return {
shape: "UNKNOWN",
confidence: "low",
reasons: ["empty_message"]
};
}
if (hasAnyPattern(text, EXPLAIN_PATTERNS)) {
return {
shape: "EXPLAIN_OR_REASON",
confidence: "high",
reasons: ["explain_signal_detected"]
};
}
if (hasCompoundSignal(text)) {
return {
shape: "COMPOUND_FACTUAL_QUERY",
confidence: "medium",
reasons: ["compound_signal_detected"]
};
}
if (hasAnyPattern(text, DRILLDOWN_PATTERNS)) {
return {
shape: "DRILLDOWN_REQUEST",
confidence: "high",
reasons: ["drilldown_signal_detected"]
};
}
if (hasAnyPattern(text, VERIFY_PATTERNS)) {
return {
shape: "VERIFY_FACTUAL",
confidence: "medium",
reasons: ["verify_signal_detected"]
};
}
if (hasAnyPattern(text, DOCUMENT_PATTERNS)) {
return {
shape: "DOCUMENT_LIST",
confidence: "medium",
reasons: ["document_list_signal_detected"]
};
}
if (hasAnyPattern(text, AGGREGATE_PATTERNS)) {
return {
shape: "AGGREGATE_LOOKUP",
confidence: "high",
reasons: ["aggregate_signal_detected"]
};
}
if (hasAnyPattern(text, OBJECT_PATTERNS)) {
return {
shape: "OBJECT_LOOKUP",
confidence: "medium",
reasons: ["object_signal_detected"]
};
}
return {
shape: "UNKNOWN",
confidence: "low",
reasons: ["shape_not_detected"]
};
}