NODEDC_1C/llm_normalizer/backend/dist/services/assistantOrganizationMatche...

210 lines
6.5 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.normalizeOrganizationScopeValue = normalizeOrganizationScopeValue;
exports.normalizeOrganizationScopeSearchText = normalizeOrganizationScopeSearchText;
exports.scoreOrganizationMentionInMessage = scoreOrganizationMentionInMessage;
exports.mergeKnownOrganizations = mergeKnownOrganizations;
exports.resolveOrganizationSelectionFromMessage = resolveOrganizationSelectionFromMessage;
const ORGANIZATION_SCOPE_STOPWORDS = new Set([
"ооо",
"зао",
"оао",
"пао",
"ао",
"ип",
"llc",
"inc",
"ltd",
"corp",
"group",
"company",
"co",
"the",
"and",
"org",
"organization",
"компания",
"организация",
"контора",
"фирма",
"база",
"по",
"в",
"во",
"на",
"для",
"из",
"у",
"к",
"от",
"это",
"эта",
"этой",
"этот",
"сегодня",
"сейчас",
"текущая",
"текущей",
"наш",
"наша",
"нашей",
"нашу",
"наши"
]);
function normalizeScopeLabel(value) {
return String(value ?? "")
.replace(/[“”«»]/g, '"')
.replace(/\s+/g, " ")
.trim();
}
function normalizeScopeKey(value) {
return normalizeScopeLabel(value).toLowerCase().replace(/ё/g, "е");
}
function normalizeOrganizationScopeValue(value) {
const normalized = normalizeScopeLabel(value);
if (!normalized) {
return null;
}
let unwrapped = normalized.replace(/^\\+|\\+$/g, "").trim();
if ((unwrapped.startsWith('"') && unwrapped.endsWith('"')) ||
(unwrapped.startsWith("'") && unwrapped.endsWith("'"))) {
unwrapped = unwrapped.slice(1, -1).trim();
}
return unwrapped.length > 0 ? unwrapped : null;
}
function normalizeOrganizationScopeSearchText(value) {
return normalizeScopeKey(value)
.replace(/[^\p{L}\p{N}]+/gu, " ")
.replace(/\s+/g, " ")
.trim();
}
function tokenizeOrganizationScope(value) {
const normalized = normalizeOrganizationScopeSearchText(value);
if (!normalized) {
return [];
}
return normalized
.split(" ")
.map((token) => token.trim())
.filter((token) => token.length >= 3 && !ORGANIZATION_SCOPE_STOPWORDS.has(token));
}
function organizationTokenVariants(token) {
const source = String(token ?? "").trim().toLowerCase();
if (!source) {
return [];
}
const variants = new Set([source]);
const withoutLongEnding = source.replace(/(?:ами|ями|ого|ему|ому|ыми|ими|иях|ях|ах|ей|ой|ом|ем|ам|ям|ую|юю|ая|яя|ое|ее|ые|ие|ов|ев|ий|ый|ой)$/iu, "");
if (withoutLongEnding.length >= 4) {
variants.add(withoutLongEnding);
}
const withoutShortEnding = source.replace(/[аеёиоуыэюя]$/iu, "");
if (withoutShortEnding.length >= 4) {
variants.add(withoutShortEnding);
}
return Array.from(variants);
}
function scoreOrganizationMentionInMessage(message, organization) {
const messageNorm = normalizeOrganizationScopeSearchText(message);
const organizationNorm = normalizeOrganizationScopeSearchText(organization);
if (!messageNorm || !organizationNorm) {
return 0;
}
if (messageNorm.includes(organizationNorm)) {
return 10_000 + organizationNorm.length;
}
const organizationTokens = tokenizeOrganizationScope(organizationNorm);
const messageTokens = tokenizeOrganizationScope(messageNorm);
if (organizationTokens.length === 0 || messageTokens.length === 0) {
return 0;
}
let matchedTokens = 0;
let score = 0;
for (const token of organizationTokens) {
const variants = organizationTokenVariants(token);
let matched = false;
let variantScore = 0;
for (const variant of variants) {
if (!variant) {
continue;
}
if (messageNorm.includes(variant)) {
matched = true;
variantScore = Math.max(variantScore, variant.length * 5);
continue;
}
const fuzzyMatched = messageTokens.some((messageToken) => {
if (messageToken === variant) {
return true;
}
if (messageToken.length >= 5 && variant.length >= 5) {
return messageToken.startsWith(variant) || variant.startsWith(messageToken);
}
return false;
});
if (fuzzyMatched) {
matched = true;
variantScore = Math.max(variantScore, Math.max(20, variant.length * 3));
}
}
if (matched) {
matchedTokens += 1;
score += variantScore > 0 ? variantScore : 10;
}
}
if (matchedTokens === 0) {
return 0;
}
if (matchedTokens === organizationTokens.length) {
score += 400;
}
else {
score += matchedTokens * 50;
}
return score;
}
function mergeKnownOrganizations(values, limit = 50) {
const dedup = new Map();
for (const raw of Array.isArray(values) ? values : []) {
const normalized = normalizeOrganizationScopeValue(raw);
if (!normalized) {
continue;
}
const key = normalizeOrganizationScopeSearchText(normalized);
if (!key || dedup.has(key)) {
continue;
}
dedup.set(key, normalized);
}
return Array.from(dedup.values()).slice(0, limit);
}
function resolveOrganizationSelectionFromMessage(userMessage, knownOrganizations) {
const known = mergeKnownOrganizations(Array.isArray(knownOrganizations) ? knownOrganizations : []);
if (!userMessage || known.length === 0) {
return null;
}
const messageNorm = normalizeOrganizationScopeSearchText(userMessage);
if (!messageNorm) {
return null;
}
const scored = known
.map((organization) => ({
organization,
score: scoreOrganizationMentionInMessage(messageNorm, organization)
}))
.filter((item) => item.score > 0)
.sort((a, b) => b.score - a.score || a.organization.length - b.organization.length);
if (scored.length === 0) {
return null;
}
const best = scored[0];
const second = scored[1];
if (best.score < 90) {
return null;
}
if (second && second.score === best.score) {
return null;
}
return best.organization;
}