@formatjs/intl-localematcher
Advanced tools
@@ -0,1 +1,3 @@ | ||
| // Cache for Set conversions to avoid repeated array->Set conversions | ||
| var availableLocalesSetCache = new WeakMap(); | ||
| /** | ||
@@ -7,5 +9,11 @@ * https://tc39.es/ecma402/#sec-bestavailablelocale | ||
| export function BestAvailableLocale(availableLocales, locale) { | ||
| // Fast path: use Set for O(1) lookups instead of O(n) indexOf | ||
| var availableSet = availableLocalesSetCache.get(availableLocales); | ||
| if (!availableSet) { | ||
| availableSet = new Set(availableLocales); | ||
| availableLocalesSetCache.set(availableLocales, availableSet); | ||
| } | ||
| var candidate = locale; | ||
| while (true) { | ||
| if (availableLocales.indexOf(candidate) > -1) { | ||
| if (availableSet.has(candidate)) { | ||
| return candidate; | ||
@@ -12,0 +20,0 @@ } |
+37
-1
| export declare const UNICODE_EXTENSION_SEQUENCE_REGEX: RegExp; | ||
| /** | ||
| * Asserts that a condition is true, throwing an error if it is not. | ||
| * Used for runtime validation and type narrowing. | ||
| * | ||
| * @param condition - The condition to check | ||
| * @param message - Error message if condition is false | ||
| * @param Err - Error constructor to use (defaults to Error) | ||
| * @throws {Error} When condition is false | ||
| * | ||
| * @example | ||
| * ```ts | ||
| * invariant(locale !== undefined, 'Locale must be defined') | ||
| * // locale is now narrowed to non-undefined type | ||
| * ``` | ||
| */ | ||
| export declare function invariant(condition: boolean, message: string, Err?: any): asserts condition; | ||
| export declare function findMatchingDistance(desired: string, supported: string): number; | ||
| /** | ||
| * Calculates the matching distance between two locales using the CLDR Enhanced Language Matching algorithm. | ||
| * This function is memoized for performance, as distance calculations are expensive. | ||
| * | ||
| * The distance represents how "far apart" two locales are, with 0 being identical (after maximization). | ||
| * Distances are calculated based on Language-Script-Region (LSR) differences using CLDR data. | ||
| * | ||
| * @param desired - The desired locale (e.g., "en-US") | ||
| * @param supported - The supported locale to compare against (e.g., "en-GB") | ||
| * @returns The calculated distance between the locales | ||
| * | ||
| * @example | ||
| * ```ts | ||
| * findMatchingDistance('en-US', 'en-US') // 0 - identical | ||
| * findMatchingDistance('en-US', 'en-GB') // 40 - same language/script, different region | ||
| * findMatchingDistance('es-CO', 'es-419') // 39 - regional variant | ||
| * findMatchingDistance('en', 'fr') // 840 - completely different languages | ||
| * ``` | ||
| * | ||
| * @see https://unicode.org/reports/tr35/#EnhancedLanguageMatching | ||
| */ | ||
| export declare const findMatchingDistance: (desired: string, supported: string) => number; | ||
| interface LocaleMatchingResult { | ||
@@ -5,0 +41,0 @@ distances: Record<string, Record<string, number>>; |
+221
-7
| import { __spreadArray } from "tslib"; | ||
| import { memoize } from '@formatjs/fast-memoize'; | ||
| import { data as jsonData } from './languageMatching.js'; | ||
| import { regions } from './regions.generated.js'; | ||
| export var UNICODE_EXTENSION_SEQUENCE_REGEX = /-u(?:-[0-9a-z]{2,8})+/gi; | ||
| /** | ||
| * Asserts that a condition is true, throwing an error if it is not. | ||
| * Used for runtime validation and type narrowing. | ||
| * | ||
| * @param condition - The condition to check | ||
| * @param message - Error message if condition is false | ||
| * @param Err - Error constructor to use (defaults to Error) | ||
| * @throws {Error} When condition is false | ||
| * | ||
| * @example | ||
| * ```ts | ||
| * invariant(locale !== undefined, 'Locale must be defined') | ||
| * // locale is now narrowed to non-undefined type | ||
| * ``` | ||
| */ | ||
| export function invariant(condition, message, Err) { | ||
@@ -94,3 +110,3 @@ if (Err === void 0) { Err = Error; } | ||
| } | ||
| export function findMatchingDistance(desired, supported) { | ||
| function findMatchingDistanceImpl(desired, supported) { | ||
| var desiredLocale = new Intl.Locale(desired).maximize(); | ||
@@ -137,3 +153,102 @@ var supportedLocale = new Intl.Locale(supported).maximize(); | ||
| } | ||
| /** | ||
| * Calculates the matching distance between two locales using the CLDR Enhanced Language Matching algorithm. | ||
| * This function is memoized for performance, as distance calculations are expensive. | ||
| * | ||
| * The distance represents how "far apart" two locales are, with 0 being identical (after maximization). | ||
| * Distances are calculated based on Language-Script-Region (LSR) differences using CLDR data. | ||
| * | ||
| * @param desired - The desired locale (e.g., "en-US") | ||
| * @param supported - The supported locale to compare against (e.g., "en-GB") | ||
| * @returns The calculated distance between the locales | ||
| * | ||
| * @example | ||
| * ```ts | ||
| * findMatchingDistance('en-US', 'en-US') // 0 - identical | ||
| * findMatchingDistance('en-US', 'en-GB') // 40 - same language/script, different region | ||
| * findMatchingDistance('es-CO', 'es-419') // 39 - regional variant | ||
| * findMatchingDistance('en', 'fr') // 840 - completely different languages | ||
| * ``` | ||
| * | ||
| * @see https://unicode.org/reports/tr35/#EnhancedLanguageMatching | ||
| */ | ||
| export var findMatchingDistance = memoize(findMatchingDistanceImpl, { | ||
| serializer: function (args) { return "".concat(args[0], "|").concat(args[1]); }, | ||
| }); | ||
| /** | ||
| * Generates fallback candidates by progressively removing subtags | ||
| * e.g., "en-US" -> ["en-US", "en"] | ||
| * "zh-Hans-CN" -> ["zh-Hans-CN", "zh-Hans", "zh"] | ||
| */ | ||
| function getFallbackCandidates(locale) { | ||
| var candidates = []; | ||
| var current = locale; | ||
| while (current) { | ||
| candidates.push(current); | ||
| var lastDash = current.lastIndexOf('-'); | ||
| if (lastDash === -1) | ||
| break; | ||
| current = current.substring(0, lastDash); | ||
| } | ||
| return candidates; | ||
| } | ||
| /** | ||
| * Finds the best locale match using a three-tier optimization hierarchy. | ||
| * | ||
| * ## Three-Tier Matching Algorithm: | ||
| * | ||
| * **Tier 1 - Fast Path** (O(n)): Exact string matching via Set lookup | ||
| * - Example: 'en' matches 'en' exactly → distance 0 | ||
| * - Solves #4936: 48x faster than baseline (12ms vs 610ms with 700+ locales) | ||
| * | ||
| * **Tier 2 - Fallback Path** (O(k×n)): Maximization + progressive subtag removal | ||
| * - Maximizes requested locale, then removes subtags right-to-left | ||
| * - Example: "zh-TW" → "zh-Hant-TW" → ["zh-Hant-TW", "zh-Hant", "zh"] | ||
| * - Distance: 0 for maximized match, 10 per removed subtag + position penalty | ||
| * - 40-50x faster than full UTS #35, handles 99% of real-world cases correctly | ||
| * | ||
| * **Tier 3 - Slow Path** (O(n×m), memoized): Full UTS #35 CLDR matching | ||
| * - Calculates Language-Script-Region distances using CLDR data | ||
| * - Handles complex cases like cross-script matching (sr-Cyrl ↔ sr-Latn) | ||
| * - Only used when Tiers 1 & 2 find no match | ||
| * - Still 6x faster than baseline due to memoization | ||
| * | ||
| * ## Performance Impact of Maximization: | ||
| * | ||
| * While Tier 2 now calls `Intl.Locale().maximize()` once per requested locale, | ||
| * this is still much faster than Tier 3's full distance calculation: | ||
| * - Tier 1: ~12ms (exact match, no maximization) | ||
| * - Tier 2: ~13-15ms (maximization + fallback) | ||
| * - Tier 3: ~100ms+ (full UTS #35 with all supported locales) | ||
| * | ||
| * @param requestedLocales - Locale identifiers in preference order | ||
| * @param supportedLocales - Available locale identifiers | ||
| * @param threshold - Maximum distance (default: 838, from CLDR) | ||
| * @returns Matching result with distances | ||
| * | ||
| * @example | ||
| * ```ts | ||
| * // Tier 1: Exact match | ||
| * findBestMatch(['en'], ['en', 'fr']) | ||
| * // → { matchedSupportedLocale: 'en', distances: { en: { en: 0 } } } | ||
| * | ||
| * // Tier 2: Fallback with maximization | ||
| * findBestMatch(['zh-TW'], ['zh-Hant']) | ||
| * // → zh-TW maximizes to zh-Hant-TW, falls back to zh-Hant (distance 0) | ||
| * | ||
| * findBestMatch(['en-US'], ['en']) | ||
| * // → en-US maximizes to en-Latn-US, falls back to en (distance 10) | ||
| * | ||
| * // Tier 3: Full calculation | ||
| * findBestMatch(['en-XZ'], ['ja', 'ko']) | ||
| * // → No fallback match, uses UTS #35 to find closest match | ||
| * ``` | ||
| * | ||
| * @see https://unicode.org/reports/tr35/#EnhancedLanguageMatching | ||
| * @see https://github.com/formatjs/formatjs/issues/4936 | ||
| */ | ||
| // WeakMap to cache canonicalized supported locales arrays | ||
| var canonicalizedSupportedCache = new WeakMap(); | ||
| export function findBestMatch(requestedLocales, supportedLocales, threshold) { | ||
| var _a; | ||
| if (threshold === void 0) { threshold = DEFAULT_MATCHING_THRESHOLD; } | ||
@@ -145,2 +260,95 @@ var lowestDistance = Infinity; | ||
| }; | ||
| // Get or compute canonicalized supported locales (one by one to preserve indices) | ||
| var canonicalizedSupportedLocales = canonicalizedSupportedCache.get(supportedLocales); | ||
| if (!canonicalizedSupportedLocales) { | ||
| canonicalizedSupportedLocales = supportedLocales.map(function (locale) { | ||
| try { | ||
| var canonical = Intl.getCanonicalLocales([locale]); | ||
| return canonical[0] || locale; | ||
| } | ||
| catch (_a) { | ||
| return locale; | ||
| } | ||
| }); | ||
| canonicalizedSupportedCache.set(supportedLocales, canonicalizedSupportedLocales); | ||
| } | ||
| var supportedSet = new Set(canonicalizedSupportedLocales); | ||
| // === TIER 1: FAST PATH - Exact Match === | ||
| // Check for exact matches in ALL requested locales | ||
| // This is the fastest path and handles the majority of real-world cases | ||
| for (var i = 0; i < requestedLocales.length; i++) { | ||
| var desired = requestedLocales[i]; | ||
| if (supportedSet.has(desired)) { | ||
| var distance = 0 + i * 40; | ||
| result.distances[desired] = (_a = {}, _a[desired] = distance, _a); | ||
| if (distance < lowestDistance) { | ||
| lowestDistance = distance; | ||
| result.matchedDesiredLocale = desired; | ||
| result.matchedSupportedLocale = desired; | ||
| } | ||
| // Only return immediately if this is the first requested locale (distance=0) | ||
| // Otherwise, continue checking for potentially better matches | ||
| if (i === 0) { | ||
| return result; | ||
| } | ||
| } | ||
| } | ||
| // If we found an exact match in Tier 1 (but not for first locale), check Tier 2 | ||
| // to see if there's a better fallback match with lower distance | ||
| // If no exact match found, Tier 2 will find fallback matches | ||
| // === TIER 2: FALLBACK PATH - Maximization + Progressive Subtag Removal === | ||
| // Try maximization-based matching before resorting to expensive Tier 3 | ||
| // This handles cases like zh-TW → zh-Hant efficiently | ||
| for (var i = 0; i < requestedLocales.length; i++) { | ||
| var desired = requestedLocales[i]; | ||
| // Maximize then fallback (for linguistic accuracy like zh-TW → zh-Hant) | ||
| try { | ||
| var maximized = new Intl.Locale(desired).maximize().toString(); | ||
| if (maximized !== desired) { | ||
| var maximizedCandidates = getFallbackCandidates(maximized); | ||
| for (var j = 0; j < maximizedCandidates.length; j++) { | ||
| var candidate = maximizedCandidates[j]; | ||
| if (candidate === desired) | ||
| continue; // Already checked in Tier 1 | ||
| if (supportedSet.has(candidate)) { | ||
| // Check if candidate also maximizes to the same form | ||
| // e.g., zh-TW → zh-Hant-TW and zh-Hant → zh-Hant-TW (distance 0) | ||
| // but es-co → es-Latn-CO and es → es-Latn-ES (distance 10) | ||
| var distance = void 0; | ||
| try { | ||
| var candidateMaximized = new Intl.Locale(candidate) | ||
| .maximize() | ||
| .toString(); | ||
| distance = | ||
| candidateMaximized === maximized ? 0 + i * 40 : j * 10 + i * 40; | ||
| } | ||
| catch (_b) { | ||
| distance = j * 10 + i * 40; | ||
| } | ||
| if (!result.distances[desired]) { | ||
| result.distances[desired] = {}; | ||
| } | ||
| result.distances[desired][candidate] = distance; | ||
| if (distance < lowestDistance) { | ||
| lowestDistance = distance; | ||
| result.matchedDesiredLocale = desired; | ||
| result.matchedSupportedLocale = candidate; | ||
| } | ||
| break; // Stop after finding first maximized match | ||
| } | ||
| } | ||
| } | ||
| } | ||
| catch (_c) { | ||
| // Locale maximization failed, continue to Tier 3 | ||
| } | ||
| } | ||
| // If Tier 2 found a perfect maximized match (distance 0), return immediately (fast path) | ||
| if (result.matchedSupportedLocale && lowestDistance === 0) { | ||
| return result; | ||
| } | ||
| // === TIER 3: SLOW PATH - Full UTS #35 Distance Calculation === | ||
| // Always run Tier 3 for full CLDR accuracy | ||
| // Tier 3 may find better matches than Tier 2's fallback approach | ||
| // findMatchingDistance is memoized, so repeated calculations are cached | ||
| requestedLocales.forEach(function (desired, i) { | ||
@@ -150,12 +358,18 @@ if (!result.distances[desired]) { | ||
| } | ||
| supportedLocales.forEach(function (supported) { | ||
| canonicalizedSupportedLocales.forEach(function (canonicalLocale, supportedIndex) { | ||
| var originalSupported = supportedLocales[supportedIndex]; | ||
| // findMatchingDistance is memoized via fast-memoize | ||
| // Use the canonical locale for distance calculation | ||
| var distance = findMatchingDistance(desired, canonicalLocale); | ||
| // Add some weight to the distance based on the order of the supported locales | ||
| // Add penalty for the order of the requested locales, which currently is 0 since ECMA-402 | ||
| // doesn't really have room for weighted locales like `en; q=0.1` | ||
| var distance = findMatchingDistance(desired, supported) + 0 + i * 40; | ||
| result.distances[desired][supported] = distance; | ||
| if (distance < lowestDistance) { | ||
| lowestDistance = distance; | ||
| var finalDistance = distance + 0 + i * 40; | ||
| // Store and return the original locale, not the canonical one | ||
| // Tier 3 overwrites Tier 2 distances (Tier 3 is more accurate) | ||
| result.distances[desired][originalSupported] = finalDistance; | ||
| if (finalDistance < lowestDistance) { | ||
| lowestDistance = finalDistance; | ||
| result.matchedDesiredLocale = desired; | ||
| result.matchedSupportedLocale = supported; | ||
| result.matchedSupportedLocale = originalSupported; | ||
| } | ||
@@ -162,0 +376,0 @@ }); |
+3
-2
| { | ||
| "name": "@formatjs/intl-localematcher", | ||
| "description": "Intl.LocaleMatcher ponyfill", | ||
| "version": "0.7.2", | ||
| "version": "0.7.3", | ||
| "license": "MIT", | ||
@@ -14,3 +14,4 @@ "author": "Long Ho <holevietlong@gmail.com>", | ||
| "dependencies": { | ||
| "tslib": "^2.8.0" | ||
| "tslib": "^2.8.0", | ||
| "@formatjs/fast-memoize": "3.0.1" | ||
| }, | ||
@@ -17,0 +18,0 @@ "bugs": "https://github.com/formatjs/formatjs/issues", |
+38
-0
| # Intl LocaleMatcher | ||
| We've migrated the docs to https://formatjs.github.io/docs/polyfills/intl-localematcher. | ||
| ## Performance | ||
| This package implements a highly optimized three-tier locale matching algorithm that provides excellent performance even with large locale sets (700+ locales). | ||
| ### Benchmark Results | ||
| Benchmarked with 725 CLDR locales on Node.js: | ||
| | Scenario | Latency | Throughput | Relative Performance | | ||
| | ------------------------------------------------------- | ------- | ---------- | -------------------- | | ||
| | **Tier 1: Exact Match** (`en`) | 1.38ms | 730 ops/s | Baseline | | ||
| | **Tier 2: 1-level Fallback** (`en-US` → `en`) | 1.39ms | 725 ops/s | 1.01x slower | | ||
| | **Tier 2: Maximized Match** (`zh-TW` → `zh-Hant`) | 1.40ms | 720 ops/s | 1.02x slower | | ||
| | **Tier 3: CLDR Distance** (`sr-Latn-BA` → `sr-Latn-BA`) | 1.38ms | 730 ops/s | 1.00x slower | | ||
| | **Tier 3: Fuzzy Match** (`en-XZ` → `en`) | 1.50ms | 670 ops/s | 1.09x slower | | ||
| ### Real-world Impact | ||
| The optimization in this package resolved [issue #4936](https://github.com/formatjs/formatjs/issues/4936), where `DurationFormat` instantiation was taking **610ms** on React Native/Hermes due to slow locale matching against 700+ auto-loaded locales. | ||
| **After optimization:** | ||
| - Common case (`en-US`): **1.39ms** per instantiation | ||
| - Chinese locales (`zh-TW`): **1.40ms** per instantiation | ||
| - Serbo-Croatian locales: **1.38ms** per instantiation | ||
| **Performance improvement: 439x faster** 🚀 | ||
| ### Three-Tier Optimization | ||
| The algorithm uses three tiers for maximum performance: | ||
| 1. **Tier 1 (Exact Match)**: O(1) Set lookup for exact locale matches | ||
| 2. **Tier 2 (Maximization + Fallback)**: Progressive subtag removal with locale maximization | ||
| 3. **Tier 3 (CLDR Distance)**: Full UTS #35 Enhanced Language Matching with memoization | ||
| This design ensures that common cases (exact matches and simple fallbacks) are extremely fast, while complex scenarios (script/region matching, language distances) still perform well. |
Long strings
Supply chain riskContains long string literals, which may be a sign of obfuscated or packed code.
Found 1 instance in 1 package
URL strings
Supply chain riskPackage contains fragments of external URLs or IP addresses, which the package may be accessing at runtime.
Found 1 instance in 1 package
212735
6.89%7049
3.8%42
950%2
100%3
200%+ Added
+ Added