import { type List, type Thunk } from '../../types/list/module.f.ts';
		import type { Array1, Array2, Array3 } from '../../types/array/module.f.ts';
		/**
		* An unsigned 8-bit integer, represents a single byte.
		*/
		export type U8 = number;
		/**
		* A singed 32-bit integer.
		*/
		export type I32 = number;
		/**
		* Represents an unsigend 8-bit type - U8 or the end-of-file indicator.
		* The U8 represents the byte itself, and null indicates that reading does not return anything else.
		*/
		export type ByteOrEof = U8 \| null;
		/**
		* Represents the state of a UTF-8 decoding operation that contains at least one byte.
		*/
		export type Utf8NonEmptyState = Array1<number> \| Array2<number> \| Array3<number>;
		/**
		* Represents the state of a UTF-8 decoding operation, which can be either `null` (no state)
		* or a non-empty state containing one or more bytes.
		*/
		export type Utf8State = null \| Utf8NonEmptyState;
		export type U8 = number;
		export type I32 = number;
		/**
		* Maps a list of Unicode code points to a stream of UTF-8 bytes.
		*
		* @param input - A list of Unicode code points to be converted.
		* @returns A thunk that lazily produces a sequence of UTF-8 bytes.
		*/
		export declare const fromCodePointList: (input: List<number>) => Thunk<U8>;
		/**
		* Converts a list of UTF-8 bytes into a list of Unicode code points.
		*
		* @param input - A list of UTF-8 bytes.
		* @returns A list of Unicode code points or error codes.
		*/
		export declare const toCodePointList: (input: List<U8>) => List<I32>;

144

text/utf8/module.f.js

		@@ -1,3 +0,16 @@
		import { flatMap, flat, stateScan } from "../../types/list/module.f.js";
		import { flat, flatMap, stateScan } from "../../types/list/module.f.js";
		/**
		* Error mask constant used to represent invalid code points or encoding errors in UTF-8.
		*/
		const errorMask = 0b1000_0000_0000_0000_0000_0000_0000_0000;
		/**
		* Converts a Unicode code point to a sequence of UTF-8 bytes.
		* @param input The Unicode code point to be converted. Valid range:
		* - 0x0000 to 0x007F for 1-byte sequences.
		* - 0x0080 to 0x07FF for 2-byte sequences.
		* - 0x0800 to 0xFFFF for 3-byte sequences.
		* - 0x10000 to 0x10FFFF for 4-byte sequences.
		* @returns A readonly array of UTF-8 bytes representing the input code point.
		* - Returns `[errorMask]` if the input does not match valid UTF-8 encoding rules.
		*/
		const codePointToUtf8 = (input) => {
		@@ -11,24 +24,54 @@ if (input >= 0x0000 && input <= 0x007f) {
		if (input >= 0x0800 && input <= 0xffff) {
		return [input >> 12 \| 0b1110_0000, input >> 6 & 0b0011_1111 \| 0b1000_0000, input & 0b0011_1111 \| 0b1000_0000];
		return [
		input >> 12 \| 0b1110_0000,
		input >> 6 & 0b0011_1111 \| 0b1000_0000,
		input & 0b0011_1111 \| 0b1000_0000,
		];
		}
		if (input >= 0x10000 && input <= 0x10ffff) {
		return [input >> 18 \| 0b1111_0000, input >> 12 & 0b0011_1111 \| 0b1000_0000, input >> 6 & 0b0011_1111 \| 0b1000_0000, input & 0b0011_1111 \| 0b1000_0000];
		return [
		input >> 18 \| 0b1111_0000,
		input >> 12 & 0b0011_1111 \| 0b1000_0000,
		input >> 6 & 0b0011_1111 \| 0b1000_0000,
		input & 0b0011_1111 \| 0b1000_0000,
		];
		}
		if ((input & errorMask) !== 0) {
		if ((input & 0b1000_0000_0000_0000) !== 0) {
		return [input >> 12 & 0b0000_0111 \| 0b1111_0000, input >> 6 & 0b0011_1111 \| 0b1000_0000, input & 0b0011_1111 \| 0b1000_0000];
		return [
		input >> 12 & 0b0000_0111 \| 0b1111_0000,
		input >> 6 & 0b0011_1111 \| 0b1000_0000,
		input & 0b0011_1111 \| 0b1000_0000,
		];
		}
		if ((input & 0b0000_0100_0000_0000) !== 0) {
		return [input >> 6 & 0b0000_1111 \| 0b1110_0000, input & 0b0011_1111 \| 0b1000_0000];
		return [
		input >> 6 & 0b0000_1111 \| 0b1110_0000,
		input & 0b0011_1111 \| 0b1000_0000,
		];
		}
		if ((input & 0b0000_0010_0000_0000) !== 0) {
		return [input >> 6 & 0b0000_0111 \| 0b1111_0000, input & 0b0011_1111 \| 0b1000_0000];
		return [
		input >> 6 & 0b0000_0111 \| 0b1111_0000,
		input & 0b0011_1111 \| 0b1000_0000,
		];
		}
		if ((input & 0b0000_0000_1000_0000) !== 0) {
		if ((input & 0b0000_0000_1000_0000) !== 0)
		return [input & 0b1111_1111];
		}
		}
		return [errorMask];
		};
		/**
		* Maps a list of Unicode code points to a stream of UTF-8 bytes.
		*
		* @param input - A list of Unicode code points to be converted.
		* @returns A thunk that lazily produces a sequence of UTF-8 bytes.
		*/
		export const fromCodePointList = flatMap(codePointToUtf8);
		/**
		* Converts a non-empty UTF-8 decoding state to an error code.
		*
		* @param state - A non-empty UTF-8 decoding state.
		* @returns An I32 error code derived from the invalid UTF-8 state.
		*/
		const utf8StateToError = (state) => {
		@@ -45,3 +88,4 @@ let x;
		? ((s0 & 0b0000_1111) << 6) + (s1 & 0b0011_1111) + 0b0000_0100_0000_0000
		: ((s0 & 0b0000_0111) << 6) + (s1 & 0b0011_1111) + 0b0000_0010_0000_0000;
		: ((s0 & 0b0000_0111) << 6) + (s1 & 0b0011_1111) +
		0b0000_0010_0000_0000;
		break;
		@@ -51,3 +95,4 @@ }
		const [s0, s1, s2] = state;
		x = ((s0 & 0b0000_0111) << 12) + ((s1 & 0b0011_1111) << 6) + (s2 & 0b0011_1111) + 0b1000_0000_0000_0000;
		x = ((s0 & 0b0000_0111) << 12) + ((s1 & 0b0011_1111) << 6) +
		(s2 & 0b0011_1111) + 0b1000_0000_0000_0000;
		break;
		@@ -60,3 +105,12 @@ }
		};
		const utf8ByteToCodePointOp = state => byte => {
		/**
		* Decodes a byte into a Unicode code point, using a given UTF-8 state.
		*
		* @param state - The current UTF-8 decoding state.
		* @param byte - A single byte to decode.
		* @returns A tuple containing:
		* - A list of decoded Unicode code points or error codes.
		* - The updated UTF-8 state.
		*/
		const utf8ByteToCodePointOp = (state) => (byte) => {
		if (byte < 0x00 \|\| byte > 0xff) {
		@@ -66,8 +120,6 @@ return [[errorMask], state];
		if (state === null) {
		if (byte < 0b1000_0000) {
		if (byte < 0b1000_0000)
		return [[byte], null];
		}
		if (byte >= 0b1100_0010 && byte <= 0b1111_0100) {
		if (byte >= 0b1100_0010 && byte <= 0b1111_0100)
		return [[], [byte]];
		}
		return [[byte \| errorMask], null];
		@@ -82,5 +134,4 @@ }
		}
		if (s0 < 0b1111_1000) {
		if (s0 < 0b1111_1000)
		return [[], [s0, byte]];
		}
		break;
		@@ -91,7 +142,9 @@ }
		if (s0 < 0b1111_0000) {
		return [[((s0 & 0b0000_1111) << 12) + ((s1 & 0b0011_1111) << 6) + (byte & 0b0011_1111)], null];
		return [[
		((s0 & 0b0000_1111) << 12) + ((s1 & 0b0011_1111) << 6) +
		(byte & 0b0011_1111),
		], null];
		}
		if (s0 < 0b1111_1000) {
		if (s0 < 0b1111_1000)
		return [[], [s0, s1, byte]];
		}
		break;
		@@ -101,3 +154,6 @@ }
		const [s0, s1, s2] = state;
		return [[((s0 & 0b0000_0111) << 18) + ((s1 & 0b0011_1111) << 12) + ((s2 & 0b0011_1111) << 6) + (byte & 0b0011_1111)], null];
		return [[
		((s0 & 0b0000_0111) << 18) + ((s1 & 0b0011_1111) << 12) +
		((s2 & 0b0011_1111) << 6) + (byte & 0b0011_1111),
		], null];
		}
		@@ -107,13 +163,45 @@ }
		const error = utf8StateToError(state);
		if (byte < 0b1000_0000) {
		if (byte < 0b1000_0000)
		return [[error, byte], null];
		}
		if (byte >= 0b1100_0010 && byte <= 0b1111_0100) {
		if (byte >= 0b1100_0010 && byte <= 0b1111_0100)
		return [[error], [byte]];
		}
		return [[error, byte \| errorMask], null];
		};
		const utf8EofToCodePointOp = (state) => [state === null ? null : [utf8StateToError(state)], null];
		const utf8ByteOrEofToCodePointOp = state => input => input === null ? utf8EofToCodePointOp(state) : utf8ByteToCodePointOp(state)(input);
		/**
		* Handles the end-of-file (EOF) case for UTF-8 decoding.
		*
		* @param state - The current UTF-8 decoding state.
		* @returns A tuple containing:
		* - A list of decoded Unicode code points or error codes.
		* - The reset UTF-8 state (`null`).
		*/
		const utf8EofToCodePointOp = (state) => [
		state === null ? null : [utf8StateToError(state)],
		null,
		];
		/**
		* Combines UTF-8 byte and EOF handling into a single decoding operation.
		*
		* @param state - The current UTF-8 decoding state.
		* @param input - The next byte or EOF indicator.
		* @returns A tuple containing:
		* - A list of decoded Unicode code points or error codes.
		* - The updated UTF-8 state.
		*/
		const utf8ByteOrEofToCodePointOp = (state) => (input) => input === null ? utf8EofToCodePointOp(state) : utf8ByteToCodePointOp(state)(input);
		/**
		* A constant representing the end-of-file (EOF) marker for UTF-8 decoding.
		*
		* @remarks
		* This is used as a sentinel value in decoding operations to signify the
		* termination of input. The list contains a single `null` value, which
		* represents the EOF condition.
		*/
		const eofList = [null];
		export const toCodePointList = input => flat(stateScan(utf8ByteOrEofToCodePointOp)(null)(flat([input, eofList])));
		/**
		* Converts a list of UTF-8 bytes into a list of Unicode code points.
		*
		* @param input - A list of UTF-8 bytes.
		* @returns A list of Unicode code points or error codes.
		*/
		export const toCodePointList = (input) => flat(stateScan(utf8ByteOrEofToCodePointOp)(null)(flat([input, eofList])));

types/bit_vec/module.f.d.ts

		@@ -21,4 +21,4 @@ import { type List, type Thunk } from '../list/module.f.ts';
		* const vec4 = vec(4n)
		* const v0 = vec4(5n) // 0x15n
		* const v1 = vec4(0x5FEn) // 0x1En
		* const v0 = vec4(5n) // 0x15n = 0b1_0101
		* const v1 = vec4(0x5FEn) // 0x1En = 0b1_1110
		* ```
		@@ -25,0 +25,0 @@ */

types/bit_vec/module.f.js

		@@ -0,1 +1,13 @@
		/**
		* @module
		* @description
		* ```
		* MSb is most-significant bit first.
		* - byte: 0x53 = 0b0101_0011
		* - 0123_4567
		* LSb is least-significant bit first.
		* - byte: 0x53 = 0b0101_0011
		* - 7654_3210
		* ```
		*/
		import { log2, mask } from "../bigint/module.f.js";
		@@ -19,4 +31,4 @@ import { flip } from "../function/module.f.js";
		* const vec4 = vec(4n)
		* const v0 = vec4(5n) // 0x15n
		* const v1 = vec4(0x5FEn) // 0x1En
		* const v0 = vec4(5n) // 0x15n = 0b1_0101
		* const v1 = vec4(0x5FEn) // 0x1En = 0b1_1110
		* ```
		@@ -77,3 +89,3 @@ */
		return b => (b << aLen) \| (a & m);
		}
		},
		};
		@@ -100,3 +112,3 @@ /**
		},
		concat: flip(lsb.concat)
		concat: flip(lsb.concat),
		};
		@@ -103,0 +115,0 @@ const appendU8 = ({ concat }) => (u8) => (a) => concat(a)(vec8(BigInt(u8)));

functionalscript - npm Package Compare versions

Improved metrics