301 lines
9.7 KiB
JavaScript
301 lines
9.7 KiB
JavaScript
"use strict";
|
||
Object.defineProperty(exports, "__esModule", { value: true });
|
||
exports.normalizeOrganizationScopeValue = normalizeOrganizationScopeValue;
|
||
exports.normalizeOrganizationScopeSearchText = normalizeOrganizationScopeSearchText;
|
||
exports.scoreOrganizationMentionInMessage = scoreOrganizationMentionInMessage;
|
||
exports.organizationsLikelySameEntity = organizationsLikelySameEntity;
|
||
exports.mergeKnownOrganizations = mergeKnownOrganizations;
|
||
exports.resolveOrganizationSelectionFromMessage = resolveOrganizationSelectionFromMessage;
|
||
const ORGANIZATION_SCOPE_STOPWORDS = new Set([
|
||
"ооо",
|
||
"зао",
|
||
"оао",
|
||
"пао",
|
||
"ао",
|
||
"ип",
|
||
"llc",
|
||
"inc",
|
||
"ltd",
|
||
"corp",
|
||
"group",
|
||
"company",
|
||
"co",
|
||
"the",
|
||
"and",
|
||
"org",
|
||
"organization",
|
||
"компания",
|
||
"организация",
|
||
"контора",
|
||
"фирма",
|
||
"база",
|
||
"по",
|
||
"в",
|
||
"во",
|
||
"на",
|
||
"для",
|
||
"из",
|
||
"у",
|
||
"к",
|
||
"от",
|
||
"это",
|
||
"эта",
|
||
"этой",
|
||
"этот",
|
||
"сегодня",
|
||
"сейчас",
|
||
"текущая",
|
||
"текущей",
|
||
"наш",
|
||
"наша",
|
||
"нашей",
|
||
"нашу",
|
||
"наши"
|
||
]);
|
||
function normalizeScopeLabel(value) {
|
||
return String(value ?? "")
|
||
.replace(/\\/g, " ")
|
||
.replace(/[“”«»]/g, '"')
|
||
.replace(/([\p{L}])"(?=[\p{L}])/gu, "$1в")
|
||
.replace(/\s+/g, " ")
|
||
.trim();
|
||
}
|
||
function normalizeScopeKey(value) {
|
||
return normalizeScopeLabel(value).toLowerCase().replace(/ё/g, "е");
|
||
}
|
||
function normalizeOrganizationScopeValue(value) {
|
||
const normalized = normalizeScopeLabel(value);
|
||
if (!normalized) {
|
||
return null;
|
||
}
|
||
let unwrapped = normalized.trim();
|
||
if ((unwrapped.startsWith('"') && unwrapped.endsWith('"')) ||
|
||
(unwrapped.startsWith("'") && unwrapped.endsWith("'"))) {
|
||
unwrapped = unwrapped.slice(1, -1).trim();
|
||
}
|
||
return unwrapped.length > 0 ? unwrapped : null;
|
||
}
|
||
function normalizeOrganizationScopeSearchText(value) {
|
||
return normalizeScopeKey(value)
|
||
.replace(/[^\p{L}\p{N}]+/gu, " ")
|
||
.replace(/\s+/g, " ")
|
||
.trim();
|
||
}
|
||
function tokenizeOrganizationScope(value) {
|
||
const normalized = normalizeOrganizationScopeSearchText(value);
|
||
if (!normalized) {
|
||
return [];
|
||
}
|
||
return normalized
|
||
.split(" ")
|
||
.map((token) => token.trim())
|
||
.filter((token) => token.length >= 3 && !ORGANIZATION_SCOPE_STOPWORDS.has(token));
|
||
}
|
||
function organizationTokenVariants(token) {
|
||
const source = String(token ?? "").trim().toLowerCase();
|
||
if (!source) {
|
||
return [];
|
||
}
|
||
const variants = new Set([source]);
|
||
const withoutLongEnding = source.replace(/(?:ами|ями|ого|ему|ому|ыми|ими|иях|ях|ах|ей|ой|ом|ем|ам|ям|ую|юю|ая|яя|ое|ее|ые|ие|ов|ев|ий|ый|ой)$/iu, "");
|
||
if (withoutLongEnding.length >= 4) {
|
||
variants.add(withoutLongEnding);
|
||
}
|
||
const withoutShortEnding = source.replace(/[аеёиоуыэюя]$/iu, "");
|
||
if (withoutShortEnding.length >= 4) {
|
||
variants.add(withoutShortEnding);
|
||
}
|
||
return Array.from(variants);
|
||
}
|
||
function isSingleInsertionOrDeletionAway(left, right) {
|
||
const longer = left.length >= right.length ? left : right;
|
||
const shorter = left.length >= right.length ? right : left;
|
||
if (longer.length - shorter.length !== 1) {
|
||
return false;
|
||
}
|
||
let longIndex = 0;
|
||
let shortIndex = 0;
|
||
let mismatchUsed = false;
|
||
while (longIndex < longer.length && shortIndex < shorter.length) {
|
||
if (longer[longIndex] === shorter[shortIndex]) {
|
||
longIndex += 1;
|
||
shortIndex += 1;
|
||
continue;
|
||
}
|
||
if (mismatchUsed) {
|
||
return false;
|
||
}
|
||
mismatchUsed = true;
|
||
longIndex += 1;
|
||
}
|
||
return true;
|
||
}
|
||
function organizationTokensLookEquivalent(left, right) {
|
||
if (!left || !right) {
|
||
return false;
|
||
}
|
||
if (left === right) {
|
||
return true;
|
||
}
|
||
if (left.length >= 5 && right.length >= 5 && (left.startsWith(right) || right.startsWith(left))) {
|
||
return true;
|
||
}
|
||
const leftCompact = left.replace(/\s+/g, "");
|
||
const rightCompact = right.replace(/\s+/g, "");
|
||
if (!leftCompact || !rightCompact) {
|
||
return false;
|
||
}
|
||
if (leftCompact === rightCompact) {
|
||
return true;
|
||
}
|
||
if (leftCompact.length >= 6 && rightCompact.length >= 6 && isSingleInsertionOrDeletionAway(leftCompact, rightCompact)) {
|
||
return true;
|
||
}
|
||
return false;
|
||
}
|
||
function scoreOrganizationMentionInMessage(message, organization) {
|
||
const messageNorm = normalizeOrganizationScopeSearchText(message);
|
||
const organizationNorm = normalizeOrganizationScopeSearchText(organization);
|
||
if (!messageNorm || !organizationNorm) {
|
||
return 0;
|
||
}
|
||
if (messageNorm.includes(organizationNorm)) {
|
||
return 10_000 + organizationNorm.length;
|
||
}
|
||
const organizationTokens = tokenizeOrganizationScope(organizationNorm);
|
||
const messageTokens = tokenizeOrganizationScope(messageNorm);
|
||
if (organizationTokens.length === 0 || messageTokens.length === 0) {
|
||
return 0;
|
||
}
|
||
let matchedTokens = 0;
|
||
let score = 0;
|
||
for (const token of organizationTokens) {
|
||
const variants = organizationTokenVariants(token);
|
||
let matched = false;
|
||
let variantScore = 0;
|
||
for (const variant of variants) {
|
||
if (!variant) {
|
||
continue;
|
||
}
|
||
if (messageNorm.includes(variant)) {
|
||
matched = true;
|
||
variantScore = Math.max(variantScore, variant.length * 5);
|
||
continue;
|
||
}
|
||
const fuzzyMatched = messageTokens.some((messageToken) => {
|
||
if (messageToken === variant) {
|
||
return true;
|
||
}
|
||
if (messageToken.length >= 5 && variant.length >= 5) {
|
||
return messageToken.startsWith(variant) || variant.startsWith(messageToken);
|
||
}
|
||
return false;
|
||
});
|
||
if (fuzzyMatched) {
|
||
matched = true;
|
||
variantScore = Math.max(variantScore, Math.max(20, variant.length * 3));
|
||
}
|
||
}
|
||
if (matched) {
|
||
matchedTokens += 1;
|
||
score += variantScore > 0 ? variantScore : 10;
|
||
}
|
||
}
|
||
if (matchedTokens === 0) {
|
||
return 0;
|
||
}
|
||
if (matchedTokens === organizationTokens.length) {
|
||
score += 400;
|
||
}
|
||
else {
|
||
score += matchedTokens * 50;
|
||
}
|
||
return score;
|
||
}
|
||
function organizationsLikelySameEntity(left, right) {
|
||
const leftNorm = normalizeOrganizationScopeSearchText(left);
|
||
const rightNorm = normalizeOrganizationScopeSearchText(right);
|
||
if (!leftNorm || !rightNorm) {
|
||
return false;
|
||
}
|
||
if (leftNorm === rightNorm) {
|
||
return true;
|
||
}
|
||
const leftTokens = tokenizeOrganizationScope(leftNorm);
|
||
const rightTokens = tokenizeOrganizationScope(rightNorm);
|
||
if (leftTokens.length === 0 || rightTokens.length === 0) {
|
||
return false;
|
||
}
|
||
const leftCompact = leftTokens.join("");
|
||
const rightCompact = rightTokens.join("");
|
||
if (leftCompact && rightCompact) {
|
||
if (leftCompact === rightCompact) {
|
||
return true;
|
||
}
|
||
if (leftCompact.length >= 8 &&
|
||
rightCompact.length >= 8 &&
|
||
isSingleInsertionOrDeletionAway(leftCompact, rightCompact)) {
|
||
return true;
|
||
}
|
||
}
|
||
const leftCovered = leftTokens.every((leftToken) => rightTokens.some((rightToken) => organizationTokensLookEquivalent(leftToken, rightToken)));
|
||
if (!leftCovered) {
|
||
return false;
|
||
}
|
||
const rightCovered = rightTokens.every((rightToken) => leftTokens.some((leftToken) => organizationTokensLookEquivalent(leftToken, rightToken)));
|
||
return rightCovered;
|
||
}
|
||
function mergeKnownOrganizations(values, limit = 50) {
|
||
const dedup = [];
|
||
for (const raw of Array.isArray(values) ? values : []) {
|
||
const normalized = normalizeOrganizationScopeValue(raw);
|
||
if (!normalized) {
|
||
continue;
|
||
}
|
||
const key = normalizeOrganizationScopeSearchText(normalized);
|
||
if (!key) {
|
||
continue;
|
||
}
|
||
const existingIndex = dedup.findIndex((item) => organizationsLikelySameEntity(item, normalized));
|
||
if (existingIndex >= 0) {
|
||
const existing = dedup[existingIndex];
|
||
const existingKey = normalizeOrganizationScopeSearchText(existing);
|
||
if (key.length > existingKey.length || normalized.length > existing.length) {
|
||
dedup[existingIndex] = normalized;
|
||
}
|
||
continue;
|
||
}
|
||
dedup.push(normalized);
|
||
}
|
||
return dedup.slice(0, limit);
|
||
}
|
||
function resolveOrganizationSelectionFromMessage(userMessage, knownOrganizations) {
|
||
const known = mergeKnownOrganizations(Array.isArray(knownOrganizations) ? knownOrganizations : []);
|
||
if (!userMessage || known.length === 0) {
|
||
return null;
|
||
}
|
||
const messageNorm = normalizeOrganizationScopeSearchText(userMessage);
|
||
if (!messageNorm) {
|
||
return null;
|
||
}
|
||
const scored = known
|
||
.map((organization) => ({
|
||
organization,
|
||
score: scoreOrganizationMentionInMessage(messageNorm, organization)
|
||
}))
|
||
.filter((item) => item.score > 0)
|
||
.sort((a, b) => b.score - a.score || a.organization.length - b.organization.length);
|
||
if (scored.length === 0) {
|
||
return null;
|
||
}
|
||
const best = scored[0];
|
||
const second = scored[1];
|
||
if (best.score < 90) {
|
||
return null;
|
||
}
|
||
if (second && second.score === best.score) {
|
||
return null;
|
||
}
|
||
return best.organization;
|
||
}
|