"use strict"; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.repairAddressMojibakeText = repairAddressMojibakeText; exports.normalizeRussianComparableText = normalizeRussianComparableText; const iconv_lite_1 = __importDefault(require("iconv-lite")); function compactWhitespace(value) { return value.replace(/\s+/g, " ").trim(); } function textMojibakeScore(value) { const source = String(value ?? ""); const cyrillic = (source.match(/[\u0400-\u04ff]/g) ?? []).length; const latin = (source.match(/[A-Za-z]/g) ?? []).length; const replacement = (source.match(/[�]/g) ?? []).length; const pairMarkers = (source.match(/(?:Р.|С.|Ð.|Ñ.)/g) ?? []).length; const doubleEncodedMarkers = (source.match(/(?:Р“[Р-џ]|Р’[Р-џ]|Ã.|Â.)/gu) ?? []).length; return cyrillic + latin - replacement * 3 - pairMarkers * 2 - doubleEncodedMarkers * 2; } function looksLikeAddressMojibake(value) { const source = String(value ?? ""); if (!source.trim()) { return false; } if (/[�]/.test(source)) { return true; } if ((source.match(/(?:Р.|С.|Ð.|Ñ.)/g) ?? []).length >= 2) { return true; } if ((source.match(/(?:Р“[Р-џ]|Р’[Р-џ]|Ã.|Â.)/gu) ?? []).length >= 2) { return true; } return false; } function repairAddressMojibakeText(value) { const source = String(value ?? ""); if (!looksLikeAddressMojibake(source)) { return source; } let candidate = source; for (let pass = 0; pass < 3; pass += 1) { let improved = false; try { const fromWin1251 = iconv_lite_1.default.encode(candidate, "win1251").toString("utf8"); if (textMojibakeScore(fromWin1251) > textMojibakeScore(candidate)) { candidate = fromWin1251; improved = true; } } catch { // Ignore decode failures and keep the current candidate. } try { const fromLatin1 = Buffer.from(candidate, "latin1").toString("utf8"); if (textMojibakeScore(fromLatin1) > textMojibakeScore(candidate)) { candidate = fromLatin1; improved = true; } } catch { // Ignore decode failures and keep the current candidate. } if (!improved) { break; } } return candidate; } function normalizeRussianComparableText(value) { return compactWhitespace(repairAddressMojibakeText(String(value ?? "")).toLowerCase()).replace(/ё/g, "е"); }