{
		"name": "@formatjs/intl-segmenter",
		"description": "Polyfill for Intl.Segmenter",
		"version": "12.0.7",
		"version": "12.0.8",
		"license": "MIT",
		@@ -15,4 +15,4 @@ "author": "Matija Gaspar <matijagaspar@gmail.com>",
		"tslib": "^2.8.0",
		"@formatjs/ecma402-abstract": "3.0.7",
		"@formatjs/intl-localematcher": "0.7.4"
		"@formatjs/intl-localematcher": "0.7.5",
		"@formatjs/ecma402-abstract": "3.0.8"
		},
		@@ -19,0 +19,0 @@ "bugs": "https://github.com/formatjs/formatjs/issues",

+6

-6

polyfill-force.js

		@@ -1,7 +0,7 @@
		import { Segmenter } from './src/segmenter.js';
		Object.defineProperty(Intl, 'Segmenter', {
		value: Segmenter,
		enumerable: false,
		writable: true,
		configurable: true,
		import { Segmenter } from "./src/segmenter.js";
		Object.defineProperty(Intl, "Segmenter", {
		value: Segmenter,
		enumerable: false,
		writable: true,
		configurable: true
		});

+8

-8

polyfill.js

		@@ -1,10 +0,10 @@
		import { Segmenter } from './src/segmenter.js';
		import { shouldPolyfill } from './should-polyfill.js';
		import { Segmenter } from "./src/segmenter.js";
		import { shouldPolyfill } from "./should-polyfill.js";
		if (shouldPolyfill()) {
		Object.defineProperty(Intl, 'Segmenter', {
		value: Segmenter,
		enumerable: false,
		writable: true,
		configurable: true,
		});
		Object.defineProperty(Intl, "Segmenter", {
		value: Segmenter,
		enumerable: false,
		writable: true,
		configurable: true
		});
		}

+1

-1

should-polyfill.js

		export function shouldPolyfill() {
		return !Intl.Segmenter;
		return !Intl.Segmenter;
		}

+10

-13

src/segmentation-utils.js

		@@ -1,15 +0,12 @@
		export var replaceVariables = function (variables, input) {
		var findVarRegex = /\$[A-Za-z0-9_]+/gm;
		return input.replaceAll(findVarRegex, function (match) {
		if (!(match in variables)) {
		throw new Error("No such variable ".concat(match));
		}
		return variables[match];
		});
		export const replaceVariables = (variables, input) => {
		const findVarRegex = /\$[A-Za-z0-9_]+/gm;
		return input.replaceAll(findVarRegex, (match) => {
		if (!(match in variables)) {
		throw new Error(`No such variable ${match}`);
		}
		return variables[match];
		});
		};
		export var isSurrogate = function (str, pos) {
		return (0xd800 <= str.charCodeAt(pos - 1) &&
		str.charCodeAt(pos - 1) <= 0xdbff &&
		0xdc00 <= str.charCodeAt(pos) &&
		str.charCodeAt(pos) <= 0xdfff);
		export const isSurrogate = (str, pos) => {
		return 55296 <= str.charCodeAt(pos - 1) && str.charCodeAt(pos - 1) <= 56319 && 56320 <= str.charCodeAt(pos) && str.charCodeAt(pos) <= 57343;
		};
		@@ -16,0 +13,0 @@ // alternative surrogate check mimicking the java implementation

+42

-42

src/segmenter.d.ts

		type SegmentResult = {
		segment: string;
		breakingRule?: string;
		nonBreakingRules?: string[];
		segment: string;
		breakingRule?: string;
		nonBreakingRules?: string[];
		} \| undefined;
		export interface SegmenterOptions {
		localeMatcher?: 'lookup' \| 'best fit';
		granularity?: 'word' \| 'sentence' \| 'grapheme';
		localeMatcher?: "lookup" \| "best fit";
		granularity?: "word" \| "sentence" \| "grapheme";
		}
		export interface SegmenterResolvedOptions {
		locale: string;
		granularity: NonNullable<SegmenterOptions['granularity']>;
		locale: string;
		granularity: NonNullable<SegmenterOptions["granularity"]>;
		}
		declare const breaksAtResult: (breaks: boolean, matchingRule: string) => {
		breaks: boolean;
		matchingRule: string;
		breaks: boolean;
		matchingRule: string;
		};
		export declare class Segmenter {
		private readonly rules;
		private readonly ruleSortedKeys;
		private readonly mergedSegmentationTypeValue;
		constructor(locales: string \| string[] \| undefined, options: SegmenterOptions);
		breaksAt(position: number, input: string): ReturnType<typeof breaksAtResult>;
		segment(input: string): SegmentIterator;
		resolvedOptions(): SegmenterResolvedOptions;
		static availableLocales: Set<string>;
		static supportedLocalesOf(locales?: string \| string[], options?: Pick<SegmenterOptions, 'localeMatcher'>): string[];
		static readonly polyfilled = true;
		private readonly rules;
		private readonly ruleSortedKeys;
		private readonly mergedSegmentationTypeValue;
		constructor(locales: string \| string[] \| undefined, options: SegmenterOptions);
		breaksAt(position: number, input: string): ReturnType<typeof breaksAtResult>;
		segment(input: string): SegmentIterator;
		resolvedOptions(): SegmenterResolvedOptions;
		static availableLocales: Set<string>;
		static supportedLocalesOf(locales?: string \| string[], options?: Pick<SegmenterOptions, "localeMatcher">): string[];
		static readonly polyfilled: true;
		}
		declare class SegmentIterator implements Iterable<SegmentResult>, Iterator<SegmentResult> {
		private readonly segmenter;
		private lastSegmentIndex;
		private input;
		constructor(segmenter: Segmenter, input: string);
		[Symbol.iterator](): SegmentIterator;
		next(): {
		done: boolean;
		value: {
		segment: string;
		index: number;
		input: string;
		isWordLike?: boolean;
		};
		} \| {
		done: boolean;
		value: undefined;
		};
		containing(positionInput: number): {
		segment: string;
		index: number;
		input: string;
		isWordLike?: boolean;
		} \| undefined;
		private readonly segmenter;
		private lastSegmentIndex;
		private input;
		constructor(segmenter: Segmenter, input: string);
		[Symbol.iterator](): SegmentIterator;
		next(): {
		done: boolean;
		value: {
		segment: string;
		index: number;
		input: string;
		isWordLike?: boolean;
		};
		} \| {
		done: boolean;
		value: undefined;
		};
		containing(positionInput: number): {
		segment: string;
		index: number;
		input: string;
		isWordLike?: boolean;
		} \| undefined;
		}
		export type { SegmentIterator };

+332

-328

src/segmenter.js

		@@ -1,346 +0,350 @@
		import { __assign, __spreadArray } from "tslib";
		import { CanonicalizeLocaleList, GetOption, GetOptionsObject, SupportedLocales, getInternalSlot, getMultiInternalSlots, setInternalSlot, } from '@formatjs/ecma402-abstract';
		import { ResolveLocale } from '@formatjs/intl-localematcher';
		import { SegmentationRules } from './cldr-segmentation-rules.generated.js';
		import { isSurrogate, replaceVariables } from './segmentation-utils.js';
		import { CanonicalizeLocaleList, GetOption, GetOptionsObject, SupportedLocales, getInternalSlot, getMultiInternalSlots, setInternalSlot } from "@formatjs/ecma402-abstract";
		import { ResolveLocale } from "@formatjs/intl-localematcher";
		import { SegmentationRules } from "./cldr-segmentation-rules.generated.js";
		import { isSurrogate, replaceVariables } from "./segmentation-utils.js";
		// Cached regex patterns for word character detection
		// Note: Unicode property escape regex is created at runtime in try-catch
		// to avoid compile-time errors when targeting ES5
		var WORD_CHARACTERS_BASIC_REGEX = /\w/;
		const WORD_CHARACTERS_BASIC_REGEX = /\w/;
		// Lazy-initialized Unicode word character regex (null if not supported)
		var WORD_CHARACTERS_UNICODE_REGEX = undefined;
		let WORD_CHARACTERS_UNICODE_REGEX = undefined;
		/**
		* Adds $ to before rules and ^ to after rules for strictness
		* Replaces variables
		* Initializes the RegExp
		*
		* @param rule raw rule string from cldr-segmentation-rules.generated
		* @param variables
		* @param after appends ^ if true and $ if false
		* @returns
		*/
		var generateRuleRegex = function (rule, variables, after) {
		return new RegExp("".concat(after ? '^' : '').concat(replaceVariables(variables, rule)).concat(after ? '' : '$'));
		* Adds $ to before rules and ^ to after rules for strictness
		* Replaces variables
		* Initializes the RegExp
		*
		* @param rule raw rule string from cldr-segmentation-rules.generated
		* @param variables
		* @param after appends ^ if true and $ if false
		* @returns
		*/
		const generateRuleRegex = (rule, variables, after) => {
		return new RegExp(`${after ? "^" : ""}${replaceVariables(variables, rule)}${after ? "" : "$"}`);
		};
		var prepareLocaleSegmentationRules = function (segmentationTypeValue) {
		var preparedRules = {};
		for (var _i = 0, _a = Object.keys(segmentationTypeValue.segmentRules); _i < _a.length; _i++) {
		var ruleNr = _a[_i];
		var ruleValue = segmentationTypeValue.segmentRules[ruleNr];
		var preparedRule = {
		breaks: ruleValue.breaks,
		};
		if ('before' in ruleValue && ruleValue.before) {
		preparedRule.before = generateRuleRegex(ruleValue.before, segmentationTypeValue.variables, false);
		}
		if ('after' in ruleValue && ruleValue.after) {
		preparedRule.after = generateRuleRegex(ruleValue.after, segmentationTypeValue.variables, true);
		}
		preparedRules[ruleNr] = preparedRule;
		}
		return preparedRules;
		const prepareLocaleSegmentationRules = (segmentationTypeValue) => {
		const preparedRules = {};
		for (const ruleNr of Object.keys(segmentationTypeValue.segmentRules)) {
		const ruleValue = segmentationTypeValue.segmentRules[ruleNr];
		const preparedRule = { breaks: ruleValue.breaks };
		if ("before" in ruleValue && ruleValue.before) {
		preparedRule.before = generateRuleRegex(ruleValue.before, segmentationTypeValue.variables, false);
		}
		if ("after" in ruleValue && ruleValue.after) {
		preparedRule.after = generateRuleRegex(ruleValue.after, segmentationTypeValue.variables, true);
		}
		preparedRules[ruleNr] = preparedRule;
		}
		return preparedRules;
		};
		var breaksAtResult = function (breaks, matchingRule) { return ({
		breaks: breaks,
		matchingRule: matchingRule,
		}); };
		var Segmenter = /** @class */ (function () {
		function Segmenter(locales, options) {
		var _newTarget = this.constructor;
		if (_newTarget === undefined) {
		throw TypeError("Constructor Intl.Segmenter requires 'new'");
		}
		var requestedLocales = CanonicalizeLocaleList(locales);
		options = GetOptionsObject(options);
		var opt = Object.create(null);
		var matcher = GetOption(options, 'localeMatcher', 'string', ['lookup', 'best fit'], 'best fit');
		opt.localeMatcher = matcher;
		var granularity = GetOption(options, 'granularity', 'string', ['word', 'sentence', 'grapheme'], 'grapheme');
		setSlot(this, 'granularity', granularity);
		//TODO: figure out correct availible locales
		var r = ResolveLocale(Segmenter.availableLocales, //availible locales
		requestedLocales, opt, [], // there is no relevantExtensionKeys
		{}, function () { return ''; } //use only root rules
		);
		setSlot(this, 'locale', r.locale);
		//root rules based on granularity
		this.mergedSegmentationTypeValue = SegmentationRules.root[granularity];
		//merge root rules with locale ones if locale is specified
		if (r.locale.length) {
		var localeOverrides = SegmentationRules[r.locale];
		if (granularity in localeOverrides) {
		var localeSegmentationTypeValue = localeOverrides[granularity];
		this.mergedSegmentationTypeValue.variables = __assign(__assign({}, this.mergedSegmentationTypeValue.variables), localeSegmentationTypeValue.variables);
		this.mergedSegmentationTypeValue.segmentRules = __assign(__assign({}, this.mergedSegmentationTypeValue.segmentRules), localeSegmentationTypeValue.segmentRules);
		this.mergedSegmentationTypeValue.suppressions = __spreadArray(__spreadArray([], this.mergedSegmentationTypeValue.suppressions, true), localeSegmentationTypeValue.suppressions, true);
		}
		}
		//prepare rules
		this.rules = prepareLocaleSegmentationRules(this.mergedSegmentationTypeValue);
		//order rule keys
		this.ruleSortedKeys = Object.keys(this.rules).sort(function (a, b) { return Number(a) - Number(b); });
		}
		Segmenter.prototype.breaksAt = function (position, input) {
		var ruleSortedKeys = this.ruleSortedKeys;
		var rules = this.rules;
		var mergedSegmentationTypeValue = this.mergedSegmentationTypeValue;
		//artificial rule 0.2
		if (position === 0) {
		return breaksAtResult(true, '0.2');
		}
		if (position === input.length) {
		//rule 0.3
		return breaksAtResult(true, '0.3');
		}
		//artificial rule 0.1: js specific, due to es5 regex not being unicode aware
		//number 0.1 chosen to mimic java implementation, but needs to execute after 0.2 and 0.3 to be inside the string bounds
		if (isSurrogate(input, position)) {
		return breaksAtResult(false, '0.1');
		}
		var stringBeforeBreak = input.substring(0, position);
		var stringAfterBreak = input.substring(position);
		//artificial rule 0.4: handle suppressions
		if ('suppressions' in mergedSegmentationTypeValue) {
		for (var _i = 0, _a = mergedSegmentationTypeValue.suppressions; _i < _a.length; _i++) {
		var suppressions = _a[_i];
		if (stringBeforeBreak.trim().endsWith(suppressions)) {
		return breaksAtResult(false, '0.4');
		}
		}
		}
		// loop through rules and find a match
		for (var _b = 0, ruleSortedKeys_1 = ruleSortedKeys; _b < ruleSortedKeys_1.length; _b++) {
		var ruleKey = ruleSortedKeys_1[_b];
		var _c = rules[ruleKey], before = _c.before, after = _c.after, breaks = _c.breaks;
		// for debugging
		// if (ruleKey === '16' && position === 4) {
		// console.log({before, after, stringBeforeBreak, stringAfterBreak})
		// }
		if (before) {
		if (!before.test(stringBeforeBreak)) {
		//didn't match the before part, therfore skipping
		continue;
		}
		}
		if (after) {
		if (!after.test(stringAfterBreak)) {
		//didn't match the after part, therfore skipping
		continue;
		}
		}
		return breaksAtResult(breaks, ruleKey);
		}
		//artificial rule 999: if no rule matched is Any ÷ Any so return true
		return breaksAtResult(true, '999');
		};
		Segmenter.prototype.segment = function (input) {
		checkReceiver(this, 'segment');
		return new SegmentIterator(this, input);
		};
		Segmenter.prototype.resolvedOptions = function () {
		checkReceiver(this, 'resolvedOptions');
		return __assign({}, getMultiInternalSlots(__INTERNAL_SLOT_MAP__, this, 'locale', 'granularity'));
		};
		Segmenter.supportedLocalesOf = function (locales, options) {
		return SupportedLocales(Segmenter.availableLocales, CanonicalizeLocaleList(locales), options);
		};
		Segmenter.availableLocales = new Set(Object.keys(SegmentationRules).filter(function (key) { return key !== 'root'; }));
		Segmenter.polyfilled = true;
		return Segmenter;
		}());
		export { Segmenter };
		const breaksAtResult = (breaks, matchingRule) => ({
		breaks,
		matchingRule
		});
		export class Segmenter {
		rules;
		ruleSortedKeys;
		mergedSegmentationTypeValue;
		constructor(locales, options) {
		if (new.target === undefined) {
		throw TypeError(`Constructor Intl.Segmenter requires 'new'`);
		}
		const requestedLocales = CanonicalizeLocaleList(locales);
		options = GetOptionsObject(options);
		const opt = Object.create(null);
		const matcher = GetOption(options, "localeMatcher", "string", ["lookup", "best fit"], "best fit");
		opt.localeMatcher = matcher;
		const granularity = GetOption(options, "granularity", "string", [
		"word",
		"sentence",
		"grapheme"
		], "grapheme");
		setSlot(this, "granularity", granularity);
		//TODO: figure out correct availible locales
		const r = ResolveLocale(Segmenter.availableLocales, requestedLocales, opt, [], {}, () => "");
		setSlot(this, "locale", r.locale);
		//root rules based on granularity
		this.mergedSegmentationTypeValue = SegmentationRules.root[granularity];
		//merge root rules with locale ones if locale is specified
		if (r.locale.length) {
		const localeOverrides = SegmentationRules[r.locale];
		if (granularity in localeOverrides) {
		const localeSegmentationTypeValue = localeOverrides[granularity];
		this.mergedSegmentationTypeValue.variables = {
		...this.mergedSegmentationTypeValue.variables,
		...localeSegmentationTypeValue.variables
		};
		this.mergedSegmentationTypeValue.segmentRules = {
		...this.mergedSegmentationTypeValue.segmentRules,
		...localeSegmentationTypeValue.segmentRules
		};
		this.mergedSegmentationTypeValue.suppressions = [...this.mergedSegmentationTypeValue.suppressions, ...localeSegmentationTypeValue.suppressions];
		}
		}
		//prepare rules
		this.rules = prepareLocaleSegmentationRules(this.mergedSegmentationTypeValue);
		//order rule keys
		this.ruleSortedKeys = Object.keys(this.rules).sort((a, b) => Number(a) - Number(b));
		}
		breaksAt(position, input) {
		const ruleSortedKeys = this.ruleSortedKeys;
		const rules = this.rules;
		const mergedSegmentationTypeValue = this.mergedSegmentationTypeValue;
		//artificial rule 0.2
		if (position === 0) {
		return breaksAtResult(true, "0.2");
		}
		if (position === input.length) {
		//rule 0.3
		return breaksAtResult(true, "0.3");
		}
		//artificial rule 0.1: js specific, due to es5 regex not being unicode aware
		//number 0.1 chosen to mimic java implementation, but needs to execute after 0.2 and 0.3 to be inside the string bounds
		if (isSurrogate(input, position)) {
		return breaksAtResult(false, "0.1");
		}
		const stringBeforeBreak = input.substring(0, position);
		const stringAfterBreak = input.substring(position);
		//artificial rule 0.4: handle suppressions
		if ("suppressions" in mergedSegmentationTypeValue) {
		for (const suppressions of mergedSegmentationTypeValue.suppressions) {
		if (stringBeforeBreak.trim().endsWith(suppressions)) {
		return breaksAtResult(false, "0.4");
		}
		}
		}
		// loop through rules and find a match
		for (const ruleKey of ruleSortedKeys) {
		const { before, after, breaks } = rules[ruleKey];
		// for debugging
		// if (ruleKey === '16' && position === 4) {
		// console.log({before, after, stringBeforeBreak, stringAfterBreak})
		// }
		if (before) {
		if (!before.test(stringBeforeBreak)) {
		//didn't match the before part, therfore skipping
		continue;
		}
		}
		if (after) {
		if (!after.test(stringAfterBreak)) {
		//didn't match the after part, therfore skipping
		continue;
		}
		}
		return breaksAtResult(breaks, ruleKey);
		}
		//artificial rule 999: if no rule matched is Any ÷ Any so return true
		return breaksAtResult(true, "999");
		}
		segment(input) {
		checkReceiver(this, "segment");
		return new SegmentIterator(this, input);
		}
		resolvedOptions() {
		checkReceiver(this, "resolvedOptions");
		return { ...getMultiInternalSlots(__INTERNAL_SLOT_MAP__, this, "locale", "granularity") };
		}
		static availableLocales = new Set(Object.keys(SegmentationRules).filter((key) => key !== "root"));
		static supportedLocalesOf(locales, options) {
		return SupportedLocales(Segmenter.availableLocales, CanonicalizeLocaleList(locales), options);
		}
		static polyfilled = true;
		}
		/**
		* Determines if a segment is word-like according to Unicode Word Break rules.
		*
		* A segment is considered word-like if it contains alphabetic characters,
		* numbers, or ideographs. Segments containing only whitespace, punctuation,
		* or symbols are not word-like.
		*
		* Per Unicode Word Break (UAX #29) and native Intl.Segmenter implementations,
		* this matches segments that contain characters from word character classes:
		* ALetter, Hebrew_Letter, Numeric, Katakana, Hiragana, and Ideographic.
		*
		* @param segment - The text segment to check
		* @param matchingRule - The word break rule that created this segment
		* @returns true if the segment is word-like
		*/
		* Determines if a segment is word-like according to Unicode Word Break rules.
		*
		* A segment is considered word-like if it contains alphabetic characters,
		* numbers, or ideographs. Segments containing only whitespace, punctuation,
		* or symbols are not word-like.
		*
		* Per Unicode Word Break (UAX #29) and native Intl.Segmenter implementations,
		* this matches segments that contain characters from word character classes:
		* ALetter, Hebrew_Letter, Numeric, Katakana, Hiragana, and Ideographic.
		*
		* @param segment - The text segment to check
		* @param matchingRule - The word break rule that created this segment
		* @returns true if the segment is word-like
		*/
		function isSegmentWordLike(segment, matchingRule) {
		// Primary check: Does the segment contain word characters?
		// Word-like segments contain letters (including ideographs), numbers,
		// or connecting characters like apostrophes within words
		//
		// Regex matches:
		// - Letters: \p{L} (all Unicode letters)
		// - Numbers: \p{N} (all Unicode numbers)
		// - Marks: \p{M} (combining marks, typically part of letters)
		//
		// Note: Using Unicode property escapes which work in modern JS engines
		// and are necessary for proper internationalization
		// Lazy-initialize Unicode regex on first use
		if (WORD_CHARACTERS_UNICODE_REGEX === undefined) {
		try {
		// Create Unicode property escape regex at runtime to avoid compile-time TS1501 error
		WORD_CHARACTERS_UNICODE_REGEX = new RegExp('[\\p{L}\\p{N}\\p{M}]', 'u');
		}
		catch (_a) {
		// Environment doesn't support Unicode property escapes
		WORD_CHARACTERS_UNICODE_REGEX = null;
		}
		}
		var hasWordCharacters;
		if (WORD_CHARACTERS_UNICODE_REGEX) {
		// Check if segment contains word characters using Unicode property escapes
		// This matches the behavior of native Intl.Segmenter in Chrome/Firefox
		hasWordCharacters = WORD_CHARACTERS_UNICODE_REGEX.test(segment);
		}
		else {
		// Fallback for environments without Unicode property escapes
		// Match basic word characters: letters, numbers, underscores
		hasWordCharacters = WORD_CHARACTERS_BASIC_REGEX.test(segment);
		}
		// If segment contains word characters, it's word-like
		if (hasWordCharacters) {
		return true;
		}
		// If no word characters, check if it's definitely not word-like via rules
		// Non-word-like rules per Unicode Word Break specification (UAX #29):
		// https://unicode.org/reports/tr29/#Word_Boundaries
		//
		// WB3a (3.1): Break before newlines (sot ÷ (Newline \| CR \| LF))
		// WB3b (3.2): Break after newlines ((Newline \| CR \| LF) ÷ eot)
		// WB3d (3.4): Keep horizontal whitespace together (WSegSpace × WSegSpace)
		//
		// These rules specifically identify non-word segments like line breaks and whitespace
		var definitelyNotWordLikeRules = ['3.1', '3.2', '3.4'];
		if (definitelyNotWordLikeRules.includes(matchingRule)) {
		return false;
		}
		// For segments without word characters and not matching specific non-word rules,
		// return false (e.g., punctuation, symbols, whitespace via rule 999)
		return false;
		// Primary check: Does the segment contain word characters?
		// Word-like segments contain letters (including ideographs), numbers,
		// or connecting characters like apostrophes within words
		//
		// Regex matches:
		// - Letters: \p{L} (all Unicode letters)
		// - Numbers: \p{N} (all Unicode numbers)
		// - Marks: \p{M} (combining marks, typically part of letters)
		//
		// Note: Using Unicode property escapes which work in modern JS engines
		// and are necessary for proper internationalization
		// Lazy-initialize Unicode regex on first use
		if (WORD_CHARACTERS_UNICODE_REGEX === undefined) {
		try {
		// Create Unicode property escape regex at runtime to avoid compile-time TS1501 error
		WORD_CHARACTERS_UNICODE_REGEX = new RegExp("[\\p{L}\\p{N}\\p{M}]", "u");
		} catch {
		// Environment doesn't support Unicode property escapes
		WORD_CHARACTERS_UNICODE_REGEX = null;
		}
		}
		let hasWordCharacters;
		if (WORD_CHARACTERS_UNICODE_REGEX) {
		// Check if segment contains word characters using Unicode property escapes
		// This matches the behavior of native Intl.Segmenter in Chrome/Firefox
		hasWordCharacters = WORD_CHARACTERS_UNICODE_REGEX.test(segment);
		} else {
		// Fallback for environments without Unicode property escapes
		// Match basic word characters: letters, numbers, underscores
		hasWordCharacters = WORD_CHARACTERS_BASIC_REGEX.test(segment);
		}
		// If segment contains word characters, it's word-like
		if (hasWordCharacters) {
		return true;
		}
		// If no word characters, check if it's definitely not word-like via rules
		// Non-word-like rules per Unicode Word Break specification (UAX #29):
		// https://unicode.org/reports/tr29/#Word_Boundaries
		//
		// WB3a (3.1): Break before newlines (sot ÷ (Newline \| CR \| LF))
		// WB3b (3.2): Break after newlines ((Newline \| CR \| LF) ÷ eot)
		// WB3d (3.4): Keep horizontal whitespace together (WSegSpace × WSegSpace)
		//
		// These rules specifically identify non-word segments like line breaks and whitespace
		const definitelyNotWordLikeRules = [
		"3.1",
		"3.2",
		"3.4"
		];
		if (definitelyNotWordLikeRules.includes(matchingRule)) {
		return false;
		}
		// For segments without word characters and not matching specific non-word rules,
		// return false (e.g., punctuation, symbols, whitespace via rule 999)
		return false;
		}
		var createSegmentDataObject = function (segmenter, segment, index, input, matchingRule) {
		var returnValue = {
		segment: segment,
		index: index,
		input: input,
		};
		if (getSlot(segmenter, 'granularity') === 'word') {
		returnValue.isWordLike = isSegmentWordLike(segment, matchingRule);
		}
		return returnValue;
		const createSegmentDataObject = (segmenter, segment, index, input, matchingRule) => {
		const returnValue = {
		segment,
		index,
		input
		};
		if (getSlot(segmenter, "granularity") === "word") {
		returnValue.isWordLike = isSegmentWordLike(segment, matchingRule);
		}
		return returnValue;
		};
		var SegmentIterator = /** @class */ (function () {
		function SegmentIterator(segmenter, input) {
		this.segmenter = segmenter;
		this.lastSegmentIndex = 0;
		if (typeof input == 'symbol') {
		throw TypeError("Input must not be a symbol");
		}
		this.input = String(input);
		}
		SegmentIterator.prototype[Symbol.iterator] = function () {
		return new SegmentIterator(this.segmenter, this.input);
		};
		SegmentIterator.prototype.next = function () {
		//using only the relevant bit of the string
		var checkString = this.input.substring(this.lastSegmentIndex);
		//loop from the start of the checkString, until exactly length (breaksAt returns break at pos=== lenght)
		for (var position = 1; position <= checkString.length; position++) {
		var _a = this.segmenter.breaksAt(position, checkString), breaks = _a.breaks, matchingRule = _a.matchingRule;
		if (breaks) {
		var segment = checkString.substring(0, position);
		var index = this.lastSegmentIndex;
		this.lastSegmentIndex += position;
		return {
		done: false,
		value: createSegmentDataObject(this.segmenter, segment, index, this.input, matchingRule),
		};
		}
		}
		//no segment was found by the loop, therefore the segmentation is done
		return { done: true, value: undefined };
		};
		SegmentIterator.prototype.containing = function (positionInput) {
		if (typeof positionInput === 'bigint') {
		throw TypeError('Index must not be a BigInt');
		}
		var position = Number(positionInput);
		//https://tc39.es/ecma262/#sec-tointegerorinfinity
		// 2. If number is NaN, +0𝔽, or -0𝔽, return 0.
		if (isNaN(position) \|\| !position) {
		position = 0;
		}
		// 5. Let integer be floor(abs(ℝ(number))).
		// 6. If number < -0𝔽, set integer to -integer.
		position = Math.floor(Math.abs(position)) * (position < 0 ? -1 : 1);
		if (position < 0 \|\| position >= this.input.length) {
		return undefined;
		}
		//find previous break point
		var previousBreakPoint = 0;
		if (position === 0) {
		previousBreakPoint = 0;
		}
		else {
		var checkString_1 = this.input;
		for (var cursor = position; cursor >= 0; cursor--) {
		var breaks = this.segmenter.breaksAt(cursor, checkString_1).breaks;
		if (breaks) {
		previousBreakPoint = cursor;
		break;
		}
		}
		}
		var checkString = this.input.substring(previousBreakPoint);
		//find next break point
		for (var cursor = 1; cursor <= checkString.length; cursor++) {
		var _a = this.segmenter.breaksAt(cursor, checkString), breaks = _a.breaks, matchingRule = _a.matchingRule;
		if (breaks) {
		var segment = checkString.substring(0, cursor);
		return createSegmentDataObject(this.segmenter, segment, previousBreakPoint, this.input, matchingRule);
		}
		}
		};
		return SegmentIterator;
		}());
		var __INTERNAL_SLOT_MAP__ = new WeakMap();
		class SegmentIterator {
		segmenter;
		lastSegmentIndex;
		input;
		constructor(segmenter, input) {
		this.segmenter = segmenter;
		this.lastSegmentIndex = 0;
		if (typeof input == "symbol") {
		throw TypeError(`Input must not be a symbol`);
		}
		this.input = String(input);
		}
		[Symbol.iterator]() {
		return new SegmentIterator(this.segmenter, this.input);
		}
		next() {
		//using only the relevant bit of the string
		let checkString = this.input.substring(this.lastSegmentIndex);
		//loop from the start of the checkString, until exactly length (breaksAt returns break at pos=== lenght)
		for (let position = 1; position <= checkString.length; position++) {
		const { breaks, matchingRule } = this.segmenter.breaksAt(position, checkString);
		if (breaks) {
		const segment = checkString.substring(0, position);
		const index = this.lastSegmentIndex;
		this.lastSegmentIndex += position;
		return {
		done: false,
		value: createSegmentDataObject(this.segmenter, segment, index, this.input, matchingRule)
		};
		}
		}
		//no segment was found by the loop, therefore the segmentation is done
		return {
		done: true,
		value: undefined
		};
		}
		containing(positionInput) {
		if (typeof positionInput === "bigint") {
		throw TypeError("Index must not be a BigInt");
		}
		let position = Number(positionInput);
		//https://tc39.es/ecma262/#sec-tointegerorinfinity
		// 2. If number is NaN, +0𝔽, or -0𝔽, return 0.
		if (isNaN(position) \|\| !position) {
		position = 0;
		}
		// 5. Let integer be floor(abs(ℝ(number))).
		// 6. If number < -0𝔽, set integer to -integer.
		position = Math.floor(Math.abs(position)) * (position < 0 ? -1 : 1);
		if (position < 0 \|\| position >= this.input.length) {
		return undefined;
		}
		//find previous break point
		let previousBreakPoint = 0;
		if (position === 0) {
		previousBreakPoint = 0;
		} else {
		const checkString = this.input;
		for (let cursor = position; cursor >= 0; cursor--) {
		const { breaks } = this.segmenter.breaksAt(cursor, checkString);
		if (breaks) {
		previousBreakPoint = cursor;
		break;
		}
		}
		}
		let checkString = this.input.substring(previousBreakPoint);
		//find next break point
		for (let cursor = 1; cursor <= checkString.length; cursor++) {
		const { breaks, matchingRule } = this.segmenter.breaksAt(cursor, checkString);
		if (breaks) {
		const segment = checkString.substring(0, cursor);
		return createSegmentDataObject(this.segmenter, segment, previousBreakPoint, this.input, matchingRule);
		}
		}
		}
		}
		const __INTERNAL_SLOT_MAP__ = new WeakMap();
		function getSlot(instance, key) {
		return getInternalSlot(__INTERNAL_SLOT_MAP__, instance, key);
		return getInternalSlot(__INTERNAL_SLOT_MAP__, instance, key);
		}
		function setSlot(instance, key, value) {
		setInternalSlot(__INTERNAL_SLOT_MAP__, instance, key, value);
		setInternalSlot(__INTERNAL_SLOT_MAP__, instance, key, value);
		}
		function checkReceiver(receiver, methodName) {
		if (!(receiver instanceof Segmenter)) {
		throw TypeError("Method Intl.Segmenter.prototype.".concat(methodName, " called on incompatible receiver"));
		}
		if (!(receiver instanceof Segmenter)) {
		throw TypeError(`Method Intl.Segmenter.prototype.${methodName} called on incompatible receiver`);
		}
		}
		try {
		// IE11 does not have Symbol
		if (typeof Symbol !== 'undefined') {
		Object.defineProperty(Segmenter.prototype, Symbol.toStringTag, {
		value: 'Intl.Segmenter',
		writable: false,
		enumerable: false,
		configurable: true,
		});
		}
		//github.com/tc39/test262/blob/main/test/intl402/Segmenter/constructor/length.js
		Object.defineProperty(Segmenter.prototype.constructor, 'length', {
		value: 0,
		writable: false,
		enumerable: false,
		configurable: true,
		});
		// https://github.com/tc39/test262/blob/main/test/intl402/Segmenter/constructor/supportedLocalesOf/length.js
		Object.defineProperty(Segmenter.supportedLocalesOf, 'length', {
		value: 1,
		writable: false,
		enumerable: false,
		configurable: true,
		});
		}
		catch (_a) {
		// Meta fix so we're test262-compliant, not important
		}
		// IE11 does not have Symbol
		if (typeof Symbol !== "undefined") {
		Object.defineProperty(Segmenter.prototype, Symbol.toStringTag, {
		value: "Intl.Segmenter",
		writable: false,
		enumerable: false,
		configurable: true
		});
		}
		//github.com/tc39/test262/blob/main/test/intl402/Segmenter/constructor/length.js
		Object.defineProperty(Segmenter.prototype.constructor, "length", {
		value: 0,
		writable: false,
		enumerable: false,
		configurable: true
		});
		// https://github.com/tc39/test262/blob/main/test/intl402/Segmenter/constructor/supportedLocalesOf/length.js
		Object.defineProperty(Segmenter.supportedLocalesOf, "length", {
		value: 1,
		writable: false,
		enumerable: false,
		configurable: true
		});
		} catch {}

+4

-1

test262-main.d.ts

		@@ -1,1 +0,4 @@
		import './polyfill-force.js';
		// @generated
		// @ts-nocheck
		import "./polyfill-force.js";
		export {};

+1

-1

test262-main.js

		// @generated
		// @ts-nocheck
		import './polyfill-force.js';
		import "./polyfill-force.js";

polyfill.iife.js

Sorry, the diff of this file is too big to display

src/cldr-segmentation-rules.generated.d.ts

Sorry, the diff of this file is too big to display

src/cldr-segmentation-rules.generated.js

Sorry, the diff of this file is too big to display

@formatjs/intl-segmenter - npm Package Compare versions

New alerts

Fixed alerts

Worsened metrics

Dependency changes