icu_properties
Advanced tools
| { | ||
| "git": { | ||
| "sha1": "38a49da495248dd1ded84cf306e4ca42e64d5bb3" | ||
| }, | ||
| "path_in_vcs": "components/properties" | ||
| } |
| # This file is automatically @generated by Cargo. | ||
| # It is not intended for manual editing. | ||
| version = 3 | ||
| [[package]] | ||
| name = "cobs" | ||
| version = "0.3.0" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "0fa961b519f0b462e3a3b4a34b64d119eeaca1d59af726fe450bbba07a9fc0a1" | ||
| dependencies = [ | ||
| "thiserror", | ||
| ] | ||
| [[package]] | ||
| name = "databake" | ||
| version = "0.2.0" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "ff6ee9e2d2afb173bcdeee45934c89ec341ab26f91c9933774fc15c2b58f83ef" | ||
| dependencies = [ | ||
| "databake-derive", | ||
| "proc-macro2", | ||
| "quote", | ||
| ] | ||
| [[package]] | ||
| name = "databake-derive" | ||
| version = "0.2.0" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "6834770958c7b84223607e49758ec0dde273c4df915e734aad50f62968a4c134" | ||
| dependencies = [ | ||
| "proc-macro2", | ||
| "quote", | ||
| "syn", | ||
| "synstructure", | ||
| ] | ||
| [[package]] | ||
| name = "displaydoc" | ||
| version = "0.2.5" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" | ||
| dependencies = [ | ||
| "proc-macro2", | ||
| "quote", | ||
| "syn", | ||
| ] | ||
| [[package]] | ||
| name = "erased-serde" | ||
| version = "0.4.8" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "259d404d09818dec19332e31d94558aeb442fea04c817006456c24b5460bbd4b" | ||
| dependencies = [ | ||
| "serde", | ||
| "serde_core", | ||
| "typeid", | ||
| ] | ||
| [[package]] | ||
| name = "icu_collections" | ||
| version = "2.1.1" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "4c6b649701667bbe825c3b7e6388cb521c23d88644678e83c0c4d0a621a34b43" | ||
| dependencies = [ | ||
| "databake", | ||
| "displaydoc", | ||
| "potential_utf", | ||
| "serde", | ||
| "yoke", | ||
| "zerofrom", | ||
| "zerovec", | ||
| ] | ||
| [[package]] | ||
| name = "icu_locale_core" | ||
| version = "2.1.1" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "edba7861004dd3714265b4db54a3c390e880ab658fec5f7db895fae2046b5bb6" | ||
| dependencies = [ | ||
| "databake", | ||
| "displaydoc", | ||
| "litemap", | ||
| "serde", | ||
| "tinystr", | ||
| "writeable", | ||
| "zerovec", | ||
| ] | ||
| [[package]] | ||
| name = "icu_properties" | ||
| version = "2.1.1" | ||
| dependencies = [ | ||
| "databake", | ||
| "icu_collections", | ||
| "icu_locale_core", | ||
| "icu_properties_data", | ||
| "icu_provider", | ||
| "serde", | ||
| "unicode-bidi", | ||
| "zerotrie", | ||
| "zerovec", | ||
| ] | ||
| [[package]] | ||
| name = "icu_properties_data" | ||
| version = "2.1.1" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "02845b3647bb045f1100ecd6480ff52f34c35f82d9880e029d329c21d1054899" | ||
| [[package]] | ||
| name = "icu_provider" | ||
| version = "2.1.1" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "85962cf0ce02e1e0a629cc34e7ca3e373ce20dda4c4d7294bbd0bf1fdb59e614" | ||
| dependencies = [ | ||
| "databake", | ||
| "displaydoc", | ||
| "erased-serde", | ||
| "icu_locale_core", | ||
| "postcard", | ||
| "serde", | ||
| "stable_deref_trait", | ||
| "writeable", | ||
| "yoke", | ||
| "zerofrom", | ||
| "zerotrie", | ||
| "zerovec", | ||
| ] | ||
| [[package]] | ||
| name = "litemap" | ||
| version = "0.8.1" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "6373607a59f0be73a39b6fe456b8192fcc3585f602af20751600e974dd455e77" | ||
| dependencies = [ | ||
| "serde_core", | ||
| ] | ||
| [[package]] | ||
| name = "postcard" | ||
| version = "1.1.3" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "6764c3b5dd454e283a30e6dfe78e9b31096d9e32036b5d1eaac7a6119ccb9a24" | ||
| dependencies = [ | ||
| "cobs", | ||
| "serde", | ||
| ] | ||
| [[package]] | ||
| name = "potential_utf" | ||
| version = "0.1.4" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "b73949432f5e2a09657003c25bca5e19a0e9c84f8058ca374f49e0ebe605af77" | ||
| dependencies = [ | ||
| "serde_core", | ||
| "zerovec", | ||
| ] | ||
| [[package]] | ||
| name = "proc-macro2" | ||
| version = "1.0.103" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8" | ||
| dependencies = [ | ||
| "unicode-ident", | ||
| ] | ||
| [[package]] | ||
| name = "quote" | ||
| version = "1.0.41" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "ce25767e7b499d1b604768e7cde645d14cc8584231ea6b295e9c9eb22c02e1d1" | ||
| dependencies = [ | ||
| "proc-macro2", | ||
| ] | ||
| [[package]] | ||
| name = "serde" | ||
| version = "1.0.228" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" | ||
| dependencies = [ | ||
| "serde_core", | ||
| "serde_derive", | ||
| ] | ||
| [[package]] | ||
| name = "serde_core" | ||
| version = "1.0.228" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" | ||
| dependencies = [ | ||
| "serde_derive", | ||
| ] | ||
| [[package]] | ||
| name = "serde_derive" | ||
| version = "1.0.228" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" | ||
| dependencies = [ | ||
| "proc-macro2", | ||
| "quote", | ||
| "syn", | ||
| ] | ||
| [[package]] | ||
| name = "stable_deref_trait" | ||
| version = "1.2.1" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" | ||
| [[package]] | ||
| name = "syn" | ||
| version = "2.0.108" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "da58917d35242480a05c2897064da0a80589a2a0476c9a3f2fdc83b53502e917" | ||
| dependencies = [ | ||
| "proc-macro2", | ||
| "quote", | ||
| "unicode-ident", | ||
| ] | ||
| [[package]] | ||
| name = "synstructure" | ||
| version = "0.13.2" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" | ||
| dependencies = [ | ||
| "proc-macro2", | ||
| "quote", | ||
| "syn", | ||
| ] | ||
| [[package]] | ||
| name = "thiserror" | ||
| version = "2.0.17" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "f63587ca0f12b72a0600bcba1d40081f830876000bb46dd2337a3051618f4fc8" | ||
| dependencies = [ | ||
| "thiserror-impl", | ||
| ] | ||
| [[package]] | ||
| name = "thiserror-impl" | ||
| version = "2.0.17" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913" | ||
| dependencies = [ | ||
| "proc-macro2", | ||
| "quote", | ||
| "syn", | ||
| ] | ||
| [[package]] | ||
| name = "tinystr" | ||
| version = "0.8.2" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "42d3e9c45c09de15d06dd8acf5f4e0e399e85927b7f00711024eb7ae10fa4869" | ||
| dependencies = [ | ||
| "displaydoc", | ||
| "serde_core", | ||
| "zerovec", | ||
| ] | ||
| [[package]] | ||
| name = "typeid" | ||
| version = "1.0.3" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "bc7d623258602320d5c55d1bc22793b57daff0ec7efc270ea7d55ce1d5f5471c" | ||
| [[package]] | ||
| name = "unicode-bidi" | ||
| version = "0.3.18" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "5c1cb5db39152898a79168971543b1cb5020dff7fe43c8dc468b0885f5e29df5" | ||
| [[package]] | ||
| name = "unicode-ident" | ||
| version = "1.0.20" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "462eeb75aeb73aea900253ce739c8e18a67423fadf006037cd3ff27e82748a06" | ||
| [[package]] | ||
| name = "writeable" | ||
| version = "0.6.2" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9" | ||
| [[package]] | ||
| name = "yoke" | ||
| version = "0.8.1" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "72d6e5c6afb84d73944e5cedb052c4680d5657337201555f9f2a16b7406d4954" | ||
| dependencies = [ | ||
| "stable_deref_trait", | ||
| "yoke-derive", | ||
| "zerofrom", | ||
| ] | ||
| [[package]] | ||
| name = "yoke-derive" | ||
| version = "0.8.1" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d" | ||
| dependencies = [ | ||
| "proc-macro2", | ||
| "quote", | ||
| "syn", | ||
| "synstructure", | ||
| ] | ||
| [[package]] | ||
| name = "zerofrom" | ||
| version = "0.1.6" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "50cc42e0333e05660c3587f3bf9d0478688e15d870fab3346451ce7f8c9fbea5" | ||
| dependencies = [ | ||
| "zerofrom-derive", | ||
| ] | ||
| [[package]] | ||
| name = "zerofrom-derive" | ||
| version = "0.1.6" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" | ||
| dependencies = [ | ||
| "proc-macro2", | ||
| "quote", | ||
| "syn", | ||
| "synstructure", | ||
| ] | ||
| [[package]] | ||
| name = "zerotrie" | ||
| version = "0.2.3" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "2a59c17a5562d507e4b54960e8569ebee33bee890c70aa3fe7b97e85a9fd7851" | ||
| dependencies = [ | ||
| "databake", | ||
| "displaydoc", | ||
| "litemap", | ||
| "serde_core", | ||
| "yoke", | ||
| "zerofrom", | ||
| "zerovec", | ||
| ] | ||
| [[package]] | ||
| name = "zerovec" | ||
| version = "0.11.5" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "6c28719294829477f525be0186d13efa9a3c602f7ec202ca9e353d310fb9a002" | ||
| dependencies = [ | ||
| "databake", | ||
| "serde", | ||
| "yoke", | ||
| "zerofrom", | ||
| "zerovec-derive", | ||
| ] | ||
| [[package]] | ||
| name = "zerovec-derive" | ||
| version = "0.11.2" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3" | ||
| dependencies = [ | ||
| "proc-macro2", | ||
| "quote", | ||
| "syn", | ||
| ] |
| # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO | ||
| # | ||
| # When uploading crates to the registry Cargo will automatically | ||
| # "normalize" Cargo.toml files for maximal compatibility | ||
| # with all versions of Cargo and also rewrite `path` dependencies | ||
| # to registry (e.g., crates.io) dependencies. | ||
| # | ||
| # If you are reading this file be aware that the original Cargo.toml | ||
| # will likely look very different (and much more reasonable). | ||
| # See Cargo.toml.orig for the original contents. | ||
| [package] | ||
| edition = "2021" | ||
| rust-version = "1.83" | ||
| name = "icu_properties" | ||
| version = "2.1.1" | ||
| authors = ["The ICU4X Project Developers"] | ||
| build = false | ||
| include = [ | ||
| "data/**/*", | ||
| "src/**/*", | ||
| "examples/**/*", | ||
| "benches/**/*", | ||
| "tests/**/*", | ||
| "Cargo.toml", | ||
| "LICENSE", | ||
| "README.md", | ||
| "build.rs", | ||
| ] | ||
| autolib = false | ||
| autobins = false | ||
| autoexamples = false | ||
| autotests = false | ||
| autobenches = false | ||
| description = "Definitions for Unicode properties" | ||
| homepage = "https://icu4x.unicode.org" | ||
| readme = "README.md" | ||
| categories = ["internationalization"] | ||
| license = "Unicode-3.0" | ||
| repository = "https://github.com/unicode-org/icu4x" | ||
| [package.metadata.docs.rs] | ||
| all-features = true | ||
| [features] | ||
| alloc = [ | ||
| "zerovec/alloc", | ||
| "icu_collections/alloc", | ||
| "serde?/alloc", | ||
| ] | ||
| compiled_data = [ | ||
| "dep:icu_properties_data", | ||
| "icu_provider/baked", | ||
| ] | ||
| datagen = [ | ||
| "serde", | ||
| "dep:databake", | ||
| "zerovec/databake", | ||
| "icu_collections/databake", | ||
| "icu_locale_core/databake", | ||
| "zerotrie/databake", | ||
| "icu_provider/export", | ||
| ] | ||
| default = ["compiled_data"] | ||
| serde = [ | ||
| "dep:serde", | ||
| "icu_locale_core/serde", | ||
| "zerovec/serde", | ||
| "icu_collections/serde", | ||
| "icu_provider/serde", | ||
| "zerotrie/serde", | ||
| ] | ||
| unicode_bidi = ["dep:unicode-bidi"] | ||
| [lib] | ||
| name = "icu_properties" | ||
| path = "src/lib.rs" | ||
| [dependencies.databake] | ||
| version = "0.2.0" | ||
| features = ["derive"] | ||
| optional = true | ||
| default-features = false | ||
| [dependencies.icu_collections] | ||
| version = "~2.1.1" | ||
| default-features = false | ||
| [dependencies.icu_locale_core] | ||
| version = "2.1.1" | ||
| features = ["zerovec"] | ||
| default-features = false | ||
| [dependencies.icu_properties_data] | ||
| version = "~2.1.1" | ||
| optional = true | ||
| default-features = false | ||
| [dependencies.icu_provider] | ||
| version = "2.1.1" | ||
| default-features = false | ||
| [dependencies.serde] | ||
| version = "1.0.220" | ||
| features = ["derive"] | ||
| optional = true | ||
| default-features = false | ||
| [dependencies.unicode-bidi] | ||
| version = "0.3.11" | ||
| optional = true | ||
| default-features = false | ||
| [dependencies.zerotrie] | ||
| version = "0.2.0" | ||
| features = [ | ||
| "yoke", | ||
| "zerofrom", | ||
| ] | ||
| default-features = false | ||
| [dependencies.zerovec] | ||
| version = "0.11.3" | ||
| features = [ | ||
| "derive", | ||
| "yoke", | ||
| ] | ||
| default-features = false | ||
| [dev-dependencies] |
Sorry, the diff of this file is not supported yet
| UNICODE LICENSE V3 | ||
| COPYRIGHT AND PERMISSION NOTICE | ||
| Copyright © 2020-2024 Unicode, Inc. | ||
| NOTICE TO USER: Carefully read the following legal agreement. BY | ||
| DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING DATA FILES, AND/OR | ||
| SOFTWARE, YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE | ||
| TERMS AND CONDITIONS OF THIS AGREEMENT. IF YOU DO NOT AGREE, DO NOT | ||
| DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE THE DATA FILES OR SOFTWARE. | ||
| Permission is hereby granted, free of charge, to any person obtaining a | ||
| copy of data files and any associated documentation (the "Data Files") or | ||
| software and any associated documentation (the "Software") to deal in the | ||
| Data Files or Software without restriction, including without limitation | ||
| the rights to use, copy, modify, merge, publish, distribute, and/or sell | ||
| copies of the Data Files or Software, and to permit persons to whom the | ||
| Data Files or Software are furnished to do so, provided that either (a) | ||
| this copyright and permission notice appear with all copies of the Data | ||
| Files or Software, or (b) this copyright and permission notice appear in | ||
| associated Documentation. | ||
| THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY | ||
| KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
| MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF | ||
| THIRD PARTY RIGHTS. | ||
| IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE | ||
| BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, | ||
| OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, | ||
| WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, | ||
| ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THE DATA | ||
| FILES OR SOFTWARE. | ||
| Except as contained in this notice, the name of a copyright holder shall | ||
| not be used in advertising or otherwise to promote the sale, use or other | ||
| dealings in these Data Files or Software without prior written | ||
| authorization of the copyright holder. | ||
| SPDX-License-Identifier: Unicode-3.0 | ||
| — | ||
| Portions of ICU4X may have been adapted from ICU4C and/or ICU4J. | ||
| ICU 1.8.1 to ICU 57.1 © 1995-2016 International Business Machines Corporation and others. |
| # icu_properties [](https://crates.io/crates/icu_properties) | ||
| <!-- cargo-rdme start --> | ||
| Definitions of [Unicode Properties] and APIs for | ||
| retrieving property data in an appropriate data structure. | ||
| This module is published as its own crate ([`icu_properties`](https://docs.rs/icu_properties/latest/icu_properties/)) | ||
| and as part of the [`icu`](https://docs.rs/icu/latest/icu/) crate. See the latter for more details on the ICU4X project. | ||
| APIs that return a `CodePointSetData` exist for binary properties and certain enumerated | ||
| properties. | ||
| APIs that return a `CodePointMapData` exist for certain enumerated properties. | ||
| ## Examples | ||
| ### Property data as `CodePointSetData`s | ||
| ```rust | ||
| use icu::properties::{CodePointSetData, CodePointMapData}; | ||
| use icu::properties::props::{GeneralCategory, Emoji}; | ||
| // A binary property as a `CodePointSetData` | ||
| assert!(CodePointSetData::new::<Emoji>().contains('🎃')); // U+1F383 JACK-O-LANTERN | ||
| assert!(!CodePointSetData::new::<Emoji>().contains('木')); // U+6728 | ||
| // An individual enumerated property value as a `CodePointSetData` | ||
| let line_sep_data = CodePointMapData::<GeneralCategory>::new() | ||
| .get_set_for_value(GeneralCategory::LineSeparator); | ||
| let line_sep = line_sep_data.as_borrowed(); | ||
| assert!(line_sep.contains('\u{2028}')); | ||
| assert!(!line_sep.contains('\u{2029}')); | ||
| ``` | ||
| ### Property data as `CodePointMapData`s | ||
| ```rust | ||
| use icu::properties::CodePointMapData; | ||
| use icu::properties::props::Script; | ||
| assert_eq!(CodePointMapData::<Script>::new().get('🎃'), Script::Common); // U+1F383 JACK-O-LANTERN | ||
| assert_eq!(CodePointMapData::<Script>::new().get('木'), Script::Han); // U+6728 | ||
| ``` | ||
| [`ICU4X`]: ../icu/index.html | ||
| [Unicode Properties]: https://unicode-org.github.io/icu/userguide/strings/properties.html | ||
| <!-- cargo-rdme end --> | ||
| ## More Information | ||
| For more information on development, authorship, contributing etc. please visit [`ICU4X home page`](https://github.com/unicode-org/icu4x). |
| // This file is part of ICU4X. For terms of use, please see the file | ||
| // called LICENSE at the top level of the ICU4X source tree | ||
| // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). | ||
| use crate::{props::EnumeratedProperty, provider::PropertyEnumBidiMirroringGlyphV1}; | ||
| use icu_collections::codepointtrie::TrieValue; | ||
| use zerovec::ule::{AsULE, RawBytesULE}; | ||
| /// This is a bitpacked combination of the `Bidi_Mirroring_Glyph`, | ||
| /// `Bidi_Mirrored`, and `Bidi_Paired_Bracket_Type` properties. | ||
| #[derive(Debug, Eq, PartialEq, Clone, Copy, Default)] | ||
| #[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))] | ||
| #[cfg_attr(feature = "datagen", databake(path = icu_properties::props))] | ||
| #[cfg_attr(feature = "serde", derive(serde::Deserialize))] | ||
| #[allow(clippy::exhaustive_structs)] // needed for baked construction | ||
| pub struct BidiMirroringGlyph { | ||
| /// The mirroring glyph | ||
| pub mirroring_glyph: Option<char>, | ||
| /// Whether the glyph is mirrored | ||
| pub mirrored: bool, | ||
| /// The paired bracket type | ||
| pub paired_bracket_type: BidiPairedBracketType, | ||
| } | ||
| impl EnumeratedProperty for BidiMirroringGlyph { | ||
| type DataMarker = PropertyEnumBidiMirroringGlyphV1; | ||
| #[cfg(feature = "compiled_data")] | ||
| const SINGLETON: &'static crate::provider::PropertyCodePointMap<'static, Self> = | ||
| crate::provider::Baked::SINGLETON_PROPERTY_ENUM_BIDI_MIRRORING_GLYPH_V1; | ||
| const NAME: &'static [u8] = b"Bidi_Mirroring_Glyph"; | ||
| const SHORT_NAME: &'static [u8] = b"Bidi_Mirroring_Glyph"; | ||
| } | ||
| impl crate::private::Sealed for BidiMirroringGlyph {} | ||
| impl AsULE for BidiMirroringGlyph { | ||
| type ULE = zerovec::ule::RawBytesULE<3>; | ||
| fn to_unaligned(self) -> Self::ULE { | ||
| let [a, b, c, _] = TrieValue::to_u32(self).to_le_bytes(); | ||
| RawBytesULE([a, b, c]) | ||
| } | ||
| fn from_unaligned(unaligned: Self::ULE) -> Self { | ||
| let [a, b, c] = unaligned.0; | ||
| TrieValue::try_from_u32(u32::from_le_bytes([a, b, c, 0])).unwrap_or_default() | ||
| } | ||
| } | ||
| /// The enum represents Bidi_Paired_Bracket_Type. | ||
| /// | ||
| /// It does not implement [`EnumeratedProperty`], instead it can be obtained | ||
| /// through the bitpacked [`BidiMirroringGlyph`] property. | ||
| /// | ||
| /// If you have a use case this property without also needing the [`BidiMirroringGlyph`] | ||
| /// property, and need to optimize data size, please file an issue. | ||
| #[derive(Debug, Eq, PartialEq, Copy, Clone, Default)] | ||
| #[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))] | ||
| #[cfg_attr(feature = "datagen", databake(path = icu_properties::props))] | ||
| #[cfg_attr(feature = "serde", derive(serde::Deserialize))] | ||
| #[non_exhaustive] | ||
| pub enum BidiPairedBracketType { | ||
| /// Represents Bidi_Paired_Bracket_Type=Open. | ||
| Open, | ||
| /// Represents Bidi_Paired_Bracket_Type=Close. | ||
| Close, | ||
| /// Represents Bidi_Paired_Bracket_Type=None. | ||
| #[default] | ||
| None, | ||
| } | ||
| /// Implements [`unicode_bidi::BidiDataSource`] on [`CodePointMapDataBorrowed<BidiClass>`](crate::CodePointMapDataBorrowed). | ||
| /// | ||
| /// ✨ *Enabled with the `unicode_bidi` Cargo feature.* | ||
| /// | ||
| /// # Examples | ||
| /// | ||
| ///``` | ||
| /// use icu::properties::CodePointMapData; | ||
| /// use icu::properties::props::BidiClass; | ||
| /// use unicode_bidi::BidiInfo; | ||
| /// | ||
| /// // This example text is defined using `concat!` because some browsers | ||
| /// // and text editors have trouble displaying bidi strings. | ||
| /// let text = concat!["א", // RTL#1 | ||
| /// "ב", // RTL#2 | ||
| /// "ג", // RTL#3 | ||
| /// "a", // LTR#1 | ||
| /// "b", // LTR#2 | ||
| /// "c", // LTR#3 | ||
| /// ]; // | ||
| /// | ||
| /// | ||
| /// let bidi_map = CodePointMapData::<BidiClass>::new(); | ||
| /// | ||
| /// // Resolve embedding levels within the text. Pass `None` to detect the | ||
| /// // paragraph level automatically. | ||
| /// let bidi_info = BidiInfo::new_with_data_source(&bidi_map, text, None); | ||
| /// | ||
| /// // This paragraph has embedding level 1 because its first strong character is RTL. | ||
| /// assert_eq!(bidi_info.paragraphs.len(), 1); | ||
| /// let para = &bidi_info.paragraphs[0]; | ||
| /// assert_eq!(para.level.number(), 1); | ||
| /// assert!(para.level.is_rtl()); | ||
| /// | ||
| /// // Re-ordering is done after wrapping each paragraph into a sequence of | ||
| /// // lines. For this example, I'll just use a single line that spans the | ||
| /// // entire paragraph. | ||
| /// let line = para.range.clone(); | ||
| /// | ||
| /// let display = bidi_info.reorder_line(para, line); | ||
| /// assert_eq!(display, concat!["a", // LTR#1 | ||
| /// "b", // LTR#2 | ||
| /// "c", // LTR#3 | ||
| /// "ג", // RTL#3 | ||
| /// "ב", // RTL#2 | ||
| /// "א", // RTL#1 | ||
| /// ]); | ||
| /// ``` | ||
| #[cfg(feature = "unicode_bidi")] | ||
| impl unicode_bidi::data_source::BidiDataSource | ||
| for crate::CodePointMapDataBorrowed<'_, crate::props::BidiClass> | ||
| { | ||
| fn bidi_class(&self, c: char) -> unicode_bidi::BidiClass { | ||
| use crate::props::BidiClass; | ||
| match self.get(c) { | ||
| BidiClass::LeftToRight => unicode_bidi::BidiClass::L, | ||
| BidiClass::RightToLeft => unicode_bidi::BidiClass::R, | ||
| BidiClass::EuropeanNumber => unicode_bidi::BidiClass::EN, | ||
| BidiClass::EuropeanSeparator => unicode_bidi::BidiClass::ES, | ||
| BidiClass::EuropeanTerminator => unicode_bidi::BidiClass::ET, | ||
| BidiClass::ArabicNumber => unicode_bidi::BidiClass::AN, | ||
| BidiClass::CommonSeparator => unicode_bidi::BidiClass::CS, | ||
| BidiClass::ParagraphSeparator => unicode_bidi::BidiClass::B, | ||
| BidiClass::SegmentSeparator => unicode_bidi::BidiClass::S, | ||
| BidiClass::WhiteSpace => unicode_bidi::BidiClass::WS, | ||
| BidiClass::OtherNeutral => unicode_bidi::BidiClass::ON, | ||
| BidiClass::LeftToRightEmbedding => unicode_bidi::BidiClass::LRE, | ||
| BidiClass::LeftToRightOverride => unicode_bidi::BidiClass::LRO, | ||
| BidiClass::ArabicLetter => unicode_bidi::BidiClass::AL, | ||
| BidiClass::RightToLeftEmbedding => unicode_bidi::BidiClass::RLE, | ||
| BidiClass::RightToLeftOverride => unicode_bidi::BidiClass::RLO, | ||
| BidiClass::PopDirectionalFormat => unicode_bidi::BidiClass::PDF, | ||
| BidiClass::NonspacingMark => unicode_bidi::BidiClass::NSM, | ||
| BidiClass::BoundaryNeutral => unicode_bidi::BidiClass::BN, | ||
| BidiClass::FirstStrongIsolate => unicode_bidi::BidiClass::FSI, | ||
| BidiClass::LeftToRightIsolate => unicode_bidi::BidiClass::LRI, | ||
| BidiClass::RightToLeftIsolate => unicode_bidi::BidiClass::RLI, | ||
| BidiClass::PopDirectionalIsolate => unicode_bidi::BidiClass::PDI, | ||
| // This must not happen. | ||
| _ => unicode_bidi::BidiClass::ON, | ||
| } | ||
| } | ||
| } |
| // This file is part of ICU4X. For terms of use, please see the file | ||
| // called LICENSE at the top level of the ICU4X source tree | ||
| // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). | ||
| #[cfg(feature = "alloc")] | ||
| use crate::code_point_set::CodePointSetData; | ||
| use crate::props::GeneralCategory; | ||
| use crate::props::GeneralCategoryGroup; | ||
| use crate::provider::*; | ||
| use core::ops::RangeInclusive; | ||
| use icu_collections::codepointtrie::{CodePointMapRange, CodePointTrie, TrieValue}; | ||
| use icu_provider::marker::ErasedMarker; | ||
| use icu_provider::prelude::*; | ||
| /// A wrapper around code point map data. | ||
| /// | ||
| /// It is returned by APIs that return Unicode | ||
| /// property data in a map-like form, ex: enumerated property value data keyed | ||
| /// by code point. Access its data via the borrowed version, | ||
| /// [`CodePointMapDataBorrowed`]. | ||
| #[derive(Debug, Clone)] | ||
| pub struct CodePointMapData<T: TrieValue> { | ||
| data: DataPayload<ErasedMarker<PropertyCodePointMap<'static, T>>>, | ||
| } | ||
| impl<T: TrieValue> CodePointMapData<T> { | ||
| /// Creates a new [`CodePointMapData`] for a [`EnumeratedProperty`]. | ||
| /// | ||
| /// See the documentation on [`EnumeratedProperty`] implementations for details. | ||
| /// | ||
| /// ✨ *Enabled with the `compiled_data` Cargo feature.* | ||
| /// | ||
| /// [📚 Help choosing a constructor](icu_provider::constructors) | ||
| #[cfg(feature = "compiled_data")] | ||
| #[expect(clippy::new_ret_no_self)] | ||
| pub const fn new() -> CodePointMapDataBorrowed<'static, T> | ||
| where | ||
| T: EnumeratedProperty, | ||
| { | ||
| CodePointMapDataBorrowed::new() | ||
| } | ||
| #[doc = icu_provider::gen_buffer_unstable_docs!(UNSTABLE, Self::new)] | ||
| pub fn try_new_unstable( | ||
| provider: &(impl DataProvider<T::DataMarker> + ?Sized), | ||
| ) -> Result<Self, DataError> | ||
| where | ||
| T: EnumeratedProperty, | ||
| { | ||
| Ok(Self { | ||
| data: provider.load(Default::default())?.payload.cast(), | ||
| }) | ||
| } | ||
| /// Construct a borrowed version of this type that can be queried. | ||
| /// | ||
| /// This avoids a potential small underlying cost per API call (like `get()`) by consolidating it | ||
| /// up front. | ||
| /// | ||
| /// This owned version if returned by functions that use a runtime data provider. | ||
| #[inline] | ||
| pub fn as_borrowed(&self) -> CodePointMapDataBorrowed<'_, T> { | ||
| CodePointMapDataBorrowed { | ||
| map: self.data.get(), | ||
| } | ||
| } | ||
| /// Convert this map to a map around another type | ||
| /// | ||
| /// Typically useful for type-erasing maps into maps around integers. | ||
| /// | ||
| /// ✨ *Enabled with the `alloc` Cargo feature.* | ||
| /// | ||
| /// # Panics | ||
| /// Will panic if T and P are different sizes | ||
| /// | ||
| /// # Example | ||
| /// | ||
| /// ``` | ||
| /// use icu::properties::CodePointMapData; | ||
| /// use icu::properties::props::GeneralCategory; | ||
| /// | ||
| /// let data = CodePointMapData::<GeneralCategory>::new().static_to_owned(); | ||
| /// | ||
| /// let gc = data.try_into_converted::<u8>().unwrap(); | ||
| /// let gc = gc.as_borrowed(); | ||
| /// | ||
| /// assert_eq!(gc.get('木'), GeneralCategory::OtherLetter as u8); // U+6728 | ||
| /// assert_eq!(gc.get('🎃'), GeneralCategory::OtherSymbol as u8); // U+1F383 JACK-O-LANTERN | ||
| /// ``` | ||
| #[cfg(feature = "alloc")] | ||
| pub fn try_into_converted<P>(self) -> Result<CodePointMapData<P>, zerovec::ule::UleError> | ||
| where | ||
| P: TrieValue, | ||
| { | ||
| self.data | ||
| .try_map_project(|data, _| data.try_into_converted()) | ||
| .map(CodePointMapData::from_data::<ErasedMarker<PropertyCodePointMap<'static, P>>>) | ||
| } | ||
| /// Construct a new one from loaded data | ||
| /// | ||
| /// Typically it is preferable to use getters like [`load_general_category()`] instead | ||
| pub(crate) fn from_data<M>(data: DataPayload<M>) -> Self | ||
| where | ||
| M: DynamicDataMarker<DataStruct = PropertyCodePointMap<'static, T>>, | ||
| { | ||
| Self { data: data.cast() } | ||
| } | ||
| /// Construct a new one an owned [`CodePointTrie`] | ||
| pub fn from_code_point_trie(trie: CodePointTrie<'static, T>) -> Self { | ||
| let set = PropertyCodePointMap::from_code_point_trie(trie); | ||
| CodePointMapData::from_data( | ||
| DataPayload::<ErasedMarker<PropertyCodePointMap<'static, T>>>::from_owned(set), | ||
| ) | ||
| } | ||
| /// Convert this type to a [`CodePointTrie`] as a borrowed value. | ||
| /// | ||
| /// The data backing this is extensible and supports multiple implementations. | ||
| /// Currently it is always [`CodePointTrie`]; however in the future more backends may be | ||
| /// added, and users may select which at data generation time. | ||
| /// | ||
| /// This method returns an `Option` in order to return `None` when the backing data provider | ||
| /// cannot return a [`CodePointTrie`], or cannot do so within the expected constant time | ||
| /// constraint. | ||
| pub fn as_code_point_trie(&self) -> Option<&CodePointTrie<'_, T>> { | ||
| self.data.get().as_code_point_trie() | ||
| } | ||
| /// Convert this type to a [`CodePointTrie`], borrowing if possible, | ||
| /// otherwise allocating a new [`CodePointTrie`]. | ||
| /// | ||
| /// The data backing this is extensible and supports multiple implementations. | ||
| /// Currently it is always [`CodePointTrie`]; however in the future more backends may be | ||
| /// added, and users may select which at data generation time. | ||
| /// | ||
| /// The performance of the conversion to this specific return type will vary | ||
| /// depending on the data structure that is backing `self`. | ||
| pub fn to_code_point_trie(&self) -> CodePointTrie<'_, T> { | ||
| self.data.get().to_code_point_trie() | ||
| } | ||
| } | ||
| /// A borrowed wrapper around code point set data, returned by | ||
| /// [`CodePointSetData::as_borrowed()`]. More efficient to query. | ||
| #[derive(Clone, Copy, Debug)] | ||
| pub struct CodePointMapDataBorrowed<'a, T: TrieValue> { | ||
| map: &'a PropertyCodePointMap<'a, T>, | ||
| } | ||
| impl<'a, T: TrieValue> CodePointMapDataBorrowed<'a, T> { | ||
| /// Get the value this map has associated with code point `ch` | ||
| /// | ||
| /// # Example | ||
| /// | ||
| /// ``` | ||
| /// use icu::properties::CodePointMapData; | ||
| /// use icu::properties::props::GeneralCategory; | ||
| /// | ||
| /// let gc = CodePointMapData::<GeneralCategory>::new(); | ||
| /// | ||
| /// assert_eq!(gc.get('木'), GeneralCategory::OtherLetter); // U+6728 | ||
| /// assert_eq!(gc.get('🎃'), GeneralCategory::OtherSymbol); // U+1F383 JACK-O-LANTERN | ||
| /// ``` | ||
| #[inline] | ||
| pub fn get(self, ch: char) -> T { | ||
| self.map.get(ch) | ||
| } | ||
| /// See [`Self::get`]. | ||
| #[inline] | ||
| pub fn get32(self, ch: u32) -> T { | ||
| self.map.get32(ch) | ||
| } | ||
| /// Get a [`CodePointSetData`] for all elements corresponding to a particular value | ||
| /// | ||
| /// ✨ *Enabled with the `alloc` Cargo feature.* | ||
| /// | ||
| /// # Example | ||
| /// | ||
| /// ``` | ||
| /// use icu::properties::props::GeneralCategory; | ||
| /// use icu::properties::CodePointMapData; | ||
| /// | ||
| /// let gc = CodePointMapData::<GeneralCategory>::new(); | ||
| /// | ||
| /// let other_letter_set_data = | ||
| /// gc.get_set_for_value(GeneralCategory::OtherLetter); | ||
| /// let other_letter_set = other_letter_set_data.as_borrowed(); | ||
| /// | ||
| /// assert!(other_letter_set.contains('木')); // U+6728 | ||
| /// assert!(!other_letter_set.contains('🎃')); // U+1F383 JACK-O-LANTERN | ||
| /// ``` | ||
| #[cfg(feature = "alloc")] | ||
| pub fn get_set_for_value(self, value: T) -> CodePointSetData { | ||
| let set = self.map.get_set_for_value(value); | ||
| CodePointSetData::from_code_point_inversion_list(set) | ||
| } | ||
| /// Yields an [`Iterator`] returning ranges of consecutive code points that | ||
| /// share the same value in the [`CodePointMapData`]. | ||
| /// | ||
| /// # Examples | ||
| /// | ||
| /// ``` | ||
| /// use icu::properties::props::GeneralCategory; | ||
| /// use icu::properties::CodePointMapData; | ||
| /// | ||
| /// let gc = CodePointMapData::<GeneralCategory>::new(); | ||
| /// let mut ranges = gc.iter_ranges(); | ||
| /// let next = ranges.next().unwrap(); | ||
| /// assert_eq!(next.range, 0..=31); | ||
| /// assert_eq!(next.value, GeneralCategory::Control); | ||
| /// let next = ranges.next().unwrap(); | ||
| /// assert_eq!(next.range, 32..=32); | ||
| /// assert_eq!(next.value, GeneralCategory::SpaceSeparator); | ||
| /// ``` | ||
| pub fn iter_ranges(self) -> impl Iterator<Item = CodePointMapRange<T>> + 'a { | ||
| self.map.iter_ranges() | ||
| } | ||
| /// Yields an [`Iterator`] returning ranges of consecutive code points that | ||
| /// share the same value `v` in the [`CodePointMapData`]. | ||
| /// | ||
| /// # Examples | ||
| /// | ||
| /// | ||
| /// ``` | ||
| /// use icu::properties::props::GeneralCategory; | ||
| /// use icu::properties::CodePointMapData; | ||
| /// | ||
| /// let gc = CodePointMapData::<GeneralCategory>::new(); | ||
| /// let mut ranges = gc.iter_ranges_for_value(GeneralCategory::UppercaseLetter); | ||
| /// assert_eq!(ranges.next().unwrap(), 'A' as u32..='Z' as u32); | ||
| /// assert_eq!(ranges.next().unwrap(), 'À' as u32..='Ö' as u32); | ||
| /// assert_eq!(ranges.next().unwrap(), 'Ø' as u32..='Þ' as u32); | ||
| /// ``` | ||
| pub fn iter_ranges_for_value(self, val: T) -> impl Iterator<Item = RangeInclusive<u32>> + 'a { | ||
| self.map | ||
| .iter_ranges() | ||
| .filter(move |r| r.value == val) | ||
| .map(|r| r.range) | ||
| } | ||
| /// Yields an [`Iterator`] returning ranges of consecutive code points that | ||
| /// do *not* have the value `v` in the [`CodePointMapData`]. | ||
| pub fn iter_ranges_for_value_complemented( | ||
| self, | ||
| val: T, | ||
| ) -> impl Iterator<Item = RangeInclusive<u32>> + 'a { | ||
| self.map | ||
| .iter_ranges_mapped(move |value| value != val) | ||
| .filter(|v| v.value) | ||
| .map(|v| v.range) | ||
| } | ||
| /// Exposed for FFI needs, could be exposed in general in the future but we should | ||
| /// have a use case first. | ||
| /// | ||
| /// FFI needs this since it operates on erased maps and can't use `iter_ranges_for_group()` | ||
| #[doc(hidden)] // used by FFI code | ||
| pub fn iter_ranges_mapped<U: Eq + 'a>( | ||
| self, | ||
| predicate: impl FnMut(T) -> U + Copy + 'a, | ||
| ) -> impl Iterator<Item = CodePointMapRange<U>> + 'a { | ||
| self.map.iter_ranges_mapped(predicate) | ||
| } | ||
| } | ||
| impl CodePointMapDataBorrowed<'_, GeneralCategory> { | ||
| /// Get a [`CodePointSetData`] for all elements corresponding to a particular value group | ||
| /// | ||
| /// ✨ *Enabled with the `alloc` Cargo feature.* | ||
| /// | ||
| /// # Example | ||
| /// | ||
| /// ``` | ||
| /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup}; | ||
| /// use icu::properties::CodePointMapData; | ||
| /// | ||
| /// let gc = CodePointMapData::<GeneralCategory>::new(); | ||
| /// | ||
| /// let other_letter_set_data = | ||
| /// gc.get_set_for_value_group(GeneralCategoryGroup::OtherLetter); | ||
| /// let other_letter_set = other_letter_set_data.as_borrowed(); | ||
| /// | ||
| /// assert!(other_letter_set.contains('木')); // U+6728 | ||
| /// assert!(!other_letter_set.contains('🎃')); // U+1F383 JACK-O-LANTERN | ||
| /// ``` | ||
| #[cfg(feature = "alloc")] | ||
| pub fn get_set_for_value_group(self, value: GeneralCategoryGroup) -> crate::CodePointSetData { | ||
| let matching_gc_ranges = self | ||
| .iter_ranges() | ||
| .filter(|cpm_range| (1 << cpm_range.value as u32) & value.0 != 0) | ||
| .map(|cpm_range| cpm_range.range); | ||
| CodePointSetData::from_code_point_inversion_list(matching_gc_ranges.collect()) | ||
| } | ||
| } | ||
| #[cfg(feature = "compiled_data")] | ||
| impl<T: EnumeratedProperty> Default for CodePointMapDataBorrowed<'static, T> { | ||
| fn default() -> Self { | ||
| Self::new() | ||
| } | ||
| } | ||
| impl<T: TrieValue> CodePointMapDataBorrowed<'static, T> { | ||
| /// Creates a new [`CodePointMapDataBorrowed`] for a [`EnumeratedProperty`]. | ||
| /// | ||
| /// See the documentation on [`EnumeratedProperty`] implementations for details. | ||
| /// | ||
| /// ✨ *Enabled with the `compiled_data` Cargo feature.* | ||
| /// | ||
| /// [📚 Help choosing a constructor](icu_provider::constructors) | ||
| #[cfg(feature = "compiled_data")] | ||
| pub const fn new() -> Self | ||
| where | ||
| T: EnumeratedProperty, | ||
| { | ||
| CodePointMapDataBorrowed { map: T::SINGLETON } | ||
| } | ||
| /// Cheaply converts a [`CodePointMapDataBorrowed<'static>`] into a [`CodePointMapData`]. | ||
| /// | ||
| /// Note: Due to branching and indirection, using [`CodePointMapData`] might inhibit some | ||
| /// compile-time optimizations that are possible with [`CodePointMapDataBorrowed`]. | ||
| pub const fn static_to_owned(self) -> CodePointMapData<T> { | ||
| CodePointMapData { | ||
| data: DataPayload::from_static_ref(self.map), | ||
| } | ||
| } | ||
| } | ||
| impl<'a> CodePointMapDataBorrowed<'a, GeneralCategory> { | ||
| /// Yields an [`Iterator`] returning ranges of consecutive code points that | ||
| /// have a `General_Category` value belonging to the specified [`GeneralCategoryGroup`] | ||
| /// | ||
| /// # Examples | ||
| /// | ||
| /// ``` | ||
| /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup}; | ||
| /// use icu::properties::CodePointMapData; | ||
| /// | ||
| /// let gc = CodePointMapData::<GeneralCategory>::new(); | ||
| /// let mut ranges = gc.iter_ranges_for_group(GeneralCategoryGroup::Letter); | ||
| /// assert_eq!(ranges.next().unwrap(), 'A' as u32..='Z' as u32); | ||
| /// assert_eq!(ranges.next().unwrap(), 'a' as u32..='z' as u32); | ||
| /// assert_eq!(ranges.next().unwrap(), 'ª' as u32..='ª' as u32); | ||
| /// assert_eq!(ranges.next().unwrap(), 'µ' as u32..='µ' as u32); | ||
| /// assert_eq!(ranges.next().unwrap(), 'º' as u32..='º' as u32); | ||
| /// assert_eq!(ranges.next().unwrap(), 'À' as u32..='Ö' as u32); | ||
| /// assert_eq!(ranges.next().unwrap(), 'Ø' as u32..='ö' as u32); | ||
| /// ``` | ||
| pub fn iter_ranges_for_group( | ||
| self, | ||
| group: GeneralCategoryGroup, | ||
| ) -> impl Iterator<Item = RangeInclusive<u32>> + 'a { | ||
| self.map | ||
| .iter_ranges_mapped(move |value| group.contains(value)) | ||
| .filter(|v| v.value) | ||
| .map(|v| v.range) | ||
| } | ||
| } | ||
| /// A Unicode character property that assigns a value to each code point. | ||
| /// | ||
| /// The descriptions of most properties are taken from [`TR44`], the documentation for the | ||
| /// Unicode Character Database. | ||
| /// | ||
| /// <div class="stab unstable"> | ||
| /// 🚫 This trait is sealed; it cannot be implemented by user code. If an API requests an item that implements this | ||
| /// trait, please consider using a type from the implementors listed below. | ||
| /// </div> | ||
| /// | ||
| /// [`TR44`]: https://www.unicode.org/reports/tr44 | ||
| pub trait EnumeratedProperty: crate::private::Sealed + TrieValue { | ||
| #[doc(hidden)] | ||
| type DataMarker: DataMarker<DataStruct = PropertyCodePointMap<'static, Self>>; | ||
| #[doc(hidden)] | ||
| #[cfg(feature = "compiled_data")] | ||
| const SINGLETON: &'static PropertyCodePointMap<'static, Self>; | ||
| /// The name of this property | ||
| const NAME: &'static [u8]; | ||
| /// The abbreviated name of this property, if it exists, otherwise the name | ||
| const SHORT_NAME: &'static [u8]; | ||
| /// Convenience method for `CodePointMapData::new().get(ch)` | ||
| /// | ||
| /// ✨ *Enabled with the `compiled_data` Cargo feature.* | ||
| #[cfg(feature = "compiled_data")] | ||
| fn for_char(ch: char) -> Self { | ||
| CodePointMapData::new().get(ch) | ||
| } | ||
| } |
| // This file is part of ICU4X. For terms of use, please see the file | ||
| // called LICENSE at the top level of the ICU4X source tree | ||
| // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). | ||
| use crate::provider::*; | ||
| use core::ops::RangeInclusive; | ||
| use icu_collections::codepointinvlist::CodePointInversionList; | ||
| use icu_provider::marker::ErasedMarker; | ||
| use icu_provider::prelude::*; | ||
| /// A set of Unicode code points. Access its data via the borrowed version, | ||
| /// [`CodePointSetDataBorrowed`]. | ||
| /// | ||
| /// # Example | ||
| /// ```rust | ||
| /// use icu::properties::CodePointSetData; | ||
| /// use icu::properties::props::Alphabetic; | ||
| /// | ||
| /// let alphabetic = CodePointSetData::new::<Alphabetic>(); | ||
| /// | ||
| /// assert!(!alphabetic.contains('3')); | ||
| /// assert!(!alphabetic.contains('੩')); // U+0A69 GURMUKHI DIGIT THREE | ||
| /// assert!(alphabetic.contains('A')); | ||
| /// assert!(alphabetic.contains('Ä')); // U+00C4 LATIN CAPITAL LETTER A WITH DIAERESIS | ||
| /// ``` | ||
| #[derive(Debug)] | ||
| pub struct CodePointSetData { | ||
| data: DataPayload<ErasedMarker<PropertyCodePointSet<'static>>>, | ||
| } | ||
| impl CodePointSetData { | ||
| /// Creates a new [`CodePointSetDataBorrowed`] for a [`BinaryProperty`]. | ||
| /// | ||
| /// ✨ *Enabled with the `compiled_data` Cargo feature.* | ||
| /// | ||
| /// [📚 Help choosing a constructor](icu_provider::constructors) | ||
| #[expect(clippy::new_ret_no_self)] | ||
| #[cfg(feature = "compiled_data")] | ||
| pub const fn new<P: BinaryProperty>() -> CodePointSetDataBorrowed<'static> { | ||
| CodePointSetDataBorrowed::new::<P>() | ||
| } | ||
| #[doc = icu_provider::gen_buffer_unstable_docs!(UNSTABLE, Self::new)] | ||
| pub fn try_new_unstable<P: BinaryProperty>( | ||
| provider: &(impl DataProvider<P::DataMarker> + ?Sized), | ||
| ) -> Result<CodePointSetData, DataError> { | ||
| Ok(CodePointSetData::from_data( | ||
| provider.load(Default::default())?.payload, | ||
| )) | ||
| } | ||
| /// Construct a borrowed version of this type that can be queried. | ||
| /// | ||
| /// This owned version if returned by functions that use a runtime data provider. | ||
| #[inline] | ||
| pub fn as_borrowed(&self) -> CodePointSetDataBorrowed<'_> { | ||
| CodePointSetDataBorrowed { | ||
| set: self.data.get(), | ||
| } | ||
| } | ||
| /// Construct a new one from loaded data | ||
| /// | ||
| /// Typically it is preferable to use getters like [`load_ascii_hex_digit()`] instead | ||
| pub(crate) fn from_data<M>(data: DataPayload<M>) -> Self | ||
| where | ||
| M: DynamicDataMarker<DataStruct = PropertyCodePointSet<'static>>, | ||
| { | ||
| Self { data: data.cast() } | ||
| } | ||
| /// Construct a new owned [`CodePointInversionList`] | ||
| pub fn from_code_point_inversion_list(set: CodePointInversionList<'static>) -> Self { | ||
| let set = PropertyCodePointSet::from_code_point_inversion_list(set); | ||
| CodePointSetData::from_data( | ||
| DataPayload::<ErasedMarker<PropertyCodePointSet<'static>>>::from_owned(set), | ||
| ) | ||
| } | ||
| /// Convert this type to a [`CodePointInversionList`] as a borrowed value. | ||
| /// | ||
| /// The data backing this is extensible and supports multiple implementations. | ||
| /// Currently it is always [`CodePointInversionList`]; however in the future more backends may be | ||
| /// added, and users may select which at data generation time. | ||
| /// | ||
| /// This method returns an `Option` in order to return `None` when the backing data provider | ||
| /// cannot return a [`CodePointInversionList`], or cannot do so within the expected constant time | ||
| /// constraint. | ||
| pub fn as_code_point_inversion_list(&self) -> Option<&CodePointInversionList<'_>> { | ||
| self.data.get().as_code_point_inversion_list() | ||
| } | ||
| /// Convert this type to a [`CodePointInversionList`], borrowing if possible, | ||
| /// otherwise allocating a new [`CodePointInversionList`]. | ||
| /// | ||
| /// The data backing this is extensible and supports multiple implementations. | ||
| /// Currently it is always [`CodePointInversionList`]; however in the future more backends may be | ||
| /// added, and users may select which at data generation time. | ||
| /// | ||
| /// The performance of the conversion to this specific return type will vary | ||
| /// depending on the data structure that is backing `self`. | ||
| pub fn to_code_point_inversion_list(&self) -> CodePointInversionList<'_> { | ||
| self.data.get().to_code_point_inversion_list() | ||
| } | ||
| } | ||
| /// A borrowed wrapper around code point set data, returned by | ||
| /// [`CodePointSetData::as_borrowed()`]. More efficient to query. | ||
| #[derive(Clone, Copy, Debug)] | ||
| pub struct CodePointSetDataBorrowed<'a> { | ||
| set: &'a PropertyCodePointSet<'a>, | ||
| } | ||
| impl CodePointSetDataBorrowed<'static> { | ||
| /// Creates a new [`CodePointSetData`] for a [`BinaryProperty`]. | ||
| /// | ||
| /// ✨ *Enabled with the `compiled_data` Cargo feature.* | ||
| /// | ||
| /// [📚 Help choosing a constructor](icu_provider::constructors) | ||
| #[inline] | ||
| #[cfg(feature = "compiled_data")] | ||
| pub const fn new<P: BinaryProperty>() -> Self { | ||
| CodePointSetDataBorrowed { set: P::SINGLETON } | ||
| } | ||
| /// Cheaply converts a [`CodePointSetDataBorrowed<'static>`] into a [`CodePointSetData`]. | ||
| /// | ||
| /// Note: Due to branching and indirection, using [`CodePointSetData`] might inhibit some | ||
| /// compile-time optimizations that are possible with [`CodePointSetDataBorrowed`]. | ||
| pub const fn static_to_owned(self) -> CodePointSetData { | ||
| CodePointSetData { | ||
| data: DataPayload::from_static_ref(self.set), | ||
| } | ||
| } | ||
| } | ||
| impl<'a> CodePointSetDataBorrowed<'a> { | ||
| /// Check if the set contains a character | ||
| /// | ||
| /// ```rust | ||
| /// use icu::properties::CodePointSetData; | ||
| /// use icu::properties::props::Alphabetic; | ||
| /// | ||
| /// let alphabetic = CodePointSetData::new::<Alphabetic>(); | ||
| /// | ||
| /// assert!(!alphabetic.contains('3')); | ||
| /// assert!(!alphabetic.contains('੩')); // U+0A69 GURMUKHI DIGIT THREE | ||
| /// assert!(alphabetic.contains('A')); | ||
| /// assert!(alphabetic.contains('Ä')); // U+00C4 LATIN CAPITAL LETTER A WITH DIAERESIS | ||
| /// ``` | ||
| #[inline] | ||
| pub fn contains(self, ch: char) -> bool { | ||
| self.set.contains(ch) | ||
| } | ||
| /// See [`Self::contains`]. | ||
| #[inline] | ||
| pub fn contains32(self, ch: u32) -> bool { | ||
| self.set.contains32(ch) | ||
| } | ||
| // Yields an [`Iterator`] returning the ranges of the code points that are | ||
| /// included in the [`CodePointSetData`] | ||
| /// | ||
| /// Ranges are returned as [`RangeInclusive`], which is inclusive of its | ||
| /// `end` bound value. An end-inclusive behavior matches the ICU4C/J | ||
| /// behavior of ranges, ex: `UnicodeSet::contains(UChar32 start, UChar32 end)`. | ||
| /// | ||
| /// # Example | ||
| /// | ||
| /// ``` | ||
| /// use icu::properties::props::Alphabetic; | ||
| /// use icu::properties::CodePointSetData; | ||
| /// | ||
| /// let alphabetic = CodePointSetData::new::<Alphabetic>(); | ||
| /// let mut ranges = alphabetic.iter_ranges(); | ||
| /// | ||
| /// assert_eq!(Some(0x0041..=0x005A), ranges.next()); // 'A'..'Z' | ||
| /// assert_eq!(Some(0x0061..=0x007A), ranges.next()); // 'a'..'z' | ||
| /// ``` | ||
| #[inline] | ||
| pub fn iter_ranges(self) -> impl Iterator<Item = RangeInclusive<u32>> + 'a { | ||
| self.set.iter_ranges() | ||
| } | ||
| // Yields an [`Iterator`] returning the ranges of the code points that are | ||
| /// *not* included in the [`CodePointSetData`] | ||
| /// | ||
| /// Ranges are returned as [`RangeInclusive`], which is inclusive of its | ||
| /// `end` bound value. An end-inclusive behavior matches the ICU4C/J | ||
| /// behavior of ranges, ex: `UnicodeSet::contains(UChar32 start, UChar32 end)`. | ||
| /// | ||
| /// # Example | ||
| /// | ||
| /// ``` | ||
| /// use icu::properties::props::Alphabetic; | ||
| /// use icu::properties::CodePointSetData; | ||
| /// | ||
| /// let alphabetic = CodePointSetData::new::<Alphabetic>(); | ||
| /// let mut ranges = alphabetic.iter_ranges(); | ||
| /// | ||
| /// assert_eq!(Some(0x0041..=0x005A), ranges.next()); // 'A'..'Z' | ||
| /// assert_eq!(Some(0x0061..=0x007A), ranges.next()); // 'a'..'z' | ||
| /// ``` | ||
| #[inline] | ||
| pub fn iter_ranges_complemented(self) -> impl Iterator<Item = RangeInclusive<u32>> + 'a { | ||
| self.set.iter_ranges_complemented() | ||
| } | ||
| } | ||
| /// A binary Unicode character property. | ||
| /// | ||
| /// The descriptions of most properties are taken from [`TR44`], the documentation for the | ||
| /// Unicode Character Database. Some properties are instead defined in [`TR18`], the | ||
| /// documentation for Unicode regular expressions. In particular, Annex C of this document | ||
| /// defines properties for POSIX compatibility. | ||
| /// | ||
| /// <div class="stab unstable"> | ||
| /// 🚫 This trait is sealed; it cannot be implemented by user code. If an API requests an item that implements this | ||
| /// trait, please consider using a type from the implementors listed below. | ||
| /// </div> | ||
| /// | ||
| /// [`TR44`]: https://www.unicode.org/reports/tr44 | ||
| /// [`TR18`]: https://www.unicode.org/reports/tr18 | ||
| pub trait BinaryProperty: crate::private::Sealed + Sized { | ||
| #[doc(hidden)] | ||
| type DataMarker: DataMarker<DataStruct = PropertyCodePointSet<'static>>; | ||
| #[doc(hidden)] | ||
| #[cfg(feature = "compiled_data")] | ||
| const SINGLETON: &'static PropertyCodePointSet<'static>; | ||
| /// The name of this property | ||
| const NAME: &'static [u8]; | ||
| /// The abbreviated name of this property, if it exists, otherwise the name | ||
| const SHORT_NAME: &'static [u8]; | ||
| /// Convenience method for `CodePointSetData::new().contains(ch)` | ||
| /// | ||
| /// ✨ *Enabled with the `compiled_data` Cargo feature.* | ||
| #[cfg(feature = "compiled_data")] | ||
| fn for_char(ch: char) -> bool { | ||
| CodePointSetData::new::<Self>().contains(ch) | ||
| } | ||
| } | ||
| #[cfg(test)] | ||
| mod tests { | ||
| #[test] | ||
| fn test_general_category() { | ||
| use icu::properties::props::GeneralCategory; | ||
| use icu::properties::props::GeneralCategoryGroup; | ||
| use icu::properties::CodePointMapData; | ||
| let digits_data = CodePointMapData::<GeneralCategory>::new() | ||
| .get_set_for_value_group(GeneralCategoryGroup::Number); | ||
| let digits = digits_data.as_borrowed(); | ||
| assert!(digits.contains('5')); | ||
| assert!(digits.contains('\u{0665}')); // U+0665 ARABIC-INDIC DIGIT FIVE | ||
| assert!(digits.contains('\u{096b}')); // U+0969 DEVANAGARI DIGIT FIVE | ||
| assert!(!digits.contains('A')); | ||
| } | ||
| #[test] | ||
| fn test_script() { | ||
| use icu::properties::props::Script; | ||
| use icu::properties::CodePointMapData; | ||
| let thai_data = CodePointMapData::<Script>::new().get_set_for_value(Script::Thai); | ||
| let thai = thai_data.as_borrowed(); | ||
| assert!(thai.contains('\u{0e01}')); // U+0E01 THAI CHARACTER KO KAI | ||
| assert!(thai.contains('\u{0e50}')); // U+0E50 THAI DIGIT ZERO | ||
| assert!(!thai.contains('A')); | ||
| assert!(!thai.contains('\u{0e3f}')); // U+0E50 THAI CURRENCY SYMBOL BAHT | ||
| } | ||
| #[test] | ||
| fn test_gc_groupings() { | ||
| use icu::properties::props::{GeneralCategory, GeneralCategoryGroup}; | ||
| use icu::properties::CodePointMapData; | ||
| use icu_collections::codepointinvlist::CodePointInversionListBuilder; | ||
| let test_group = |category: GeneralCategoryGroup, subcategories: &[GeneralCategory]| { | ||
| let category_set = | ||
| CodePointMapData::<GeneralCategory>::new().get_set_for_value_group(category); | ||
| let category_set = category_set | ||
| .as_code_point_inversion_list() | ||
| .expect("The data should be valid"); | ||
| let mut builder = CodePointInversionListBuilder::new(); | ||
| for &subcategory in subcategories { | ||
| let gc_set_data = | ||
| CodePointMapData::<GeneralCategory>::new().get_set_for_value(subcategory); | ||
| let gc_set = gc_set_data.as_borrowed(); | ||
| for range in gc_set.iter_ranges() { | ||
| builder.add_range32(range); | ||
| } | ||
| } | ||
| let combined_set = builder.build(); | ||
| println!("{category:?} {subcategories:?}"); | ||
| assert_eq!( | ||
| category_set.get_inversion_list_vec(), | ||
| combined_set.get_inversion_list_vec() | ||
| ); | ||
| }; | ||
| test_group( | ||
| GeneralCategoryGroup::Letter, | ||
| &[ | ||
| GeneralCategory::UppercaseLetter, | ||
| GeneralCategory::LowercaseLetter, | ||
| GeneralCategory::TitlecaseLetter, | ||
| GeneralCategory::ModifierLetter, | ||
| GeneralCategory::OtherLetter, | ||
| ], | ||
| ); | ||
| test_group( | ||
| GeneralCategoryGroup::Other, | ||
| &[ | ||
| GeneralCategory::Control, | ||
| GeneralCategory::Format, | ||
| GeneralCategory::Unassigned, | ||
| GeneralCategory::PrivateUse, | ||
| GeneralCategory::Surrogate, | ||
| ], | ||
| ); | ||
| test_group( | ||
| GeneralCategoryGroup::Mark, | ||
| &[ | ||
| GeneralCategory::SpacingMark, | ||
| GeneralCategory::EnclosingMark, | ||
| GeneralCategory::NonspacingMark, | ||
| ], | ||
| ); | ||
| test_group( | ||
| GeneralCategoryGroup::Number, | ||
| &[ | ||
| GeneralCategory::DecimalNumber, | ||
| GeneralCategory::LetterNumber, | ||
| GeneralCategory::OtherNumber, | ||
| ], | ||
| ); | ||
| test_group( | ||
| GeneralCategoryGroup::Punctuation, | ||
| &[ | ||
| GeneralCategory::ConnectorPunctuation, | ||
| GeneralCategory::DashPunctuation, | ||
| GeneralCategory::ClosePunctuation, | ||
| GeneralCategory::FinalPunctuation, | ||
| GeneralCategory::InitialPunctuation, | ||
| GeneralCategory::OtherPunctuation, | ||
| GeneralCategory::OpenPunctuation, | ||
| ], | ||
| ); | ||
| test_group( | ||
| GeneralCategoryGroup::Symbol, | ||
| &[ | ||
| GeneralCategory::CurrencySymbol, | ||
| GeneralCategory::ModifierSymbol, | ||
| GeneralCategory::MathSymbol, | ||
| GeneralCategory::OtherSymbol, | ||
| ], | ||
| ); | ||
| test_group( | ||
| GeneralCategoryGroup::Separator, | ||
| &[ | ||
| GeneralCategory::LineSeparator, | ||
| GeneralCategory::ParagraphSeparator, | ||
| GeneralCategory::SpaceSeparator, | ||
| ], | ||
| ); | ||
| } | ||
| #[test] | ||
| fn test_gc_surrogate() { | ||
| use icu::properties::props::GeneralCategory; | ||
| use icu::properties::CodePointMapData; | ||
| let surrogates_data = CodePointMapData::<GeneralCategory>::new() | ||
| .get_set_for_value(GeneralCategory::Surrogate); | ||
| let surrogates = surrogates_data.as_borrowed(); | ||
| assert!(surrogates.contains32(0xd800)); | ||
| assert!(surrogates.contains32(0xd900)); | ||
| assert!(surrogates.contains32(0xdfff)); | ||
| assert!(!surrogates.contains('A')); | ||
| } | ||
| } |
| // This file is part of ICU4X. For terms of use, please see the file | ||
| // called LICENSE at the top level of the ICU4X source tree | ||
| // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). | ||
| use crate::provider::*; | ||
| use icu_collections::codepointinvliststringlist::CodePointInversionListAndStringList; | ||
| use icu_provider::marker::ErasedMarker; | ||
| use icu_provider::prelude::*; | ||
| /// A wrapper around `UnicodeSet` data (characters and strings) | ||
| #[derive(Debug)] | ||
| pub struct EmojiSetData { | ||
| data: DataPayload<ErasedMarker<PropertyUnicodeSet<'static>>>, | ||
| } | ||
| impl EmojiSetData { | ||
| /// Creates a new [`EmojiSetDataBorrowed`] for a [`EmojiSet`]. | ||
| /// | ||
| /// See the documentation on [`EmojiSet`] implementations for details. | ||
| /// | ||
| /// ✨ *Enabled with the `compiled_data` Cargo feature.* | ||
| /// | ||
| /// [📚 Help choosing a constructor](icu_provider::constructors) | ||
| #[cfg(feature = "compiled_data")] | ||
| #[expect(clippy::new_ret_no_self)] | ||
| pub const fn new<P: EmojiSet>() -> EmojiSetDataBorrowed<'static> { | ||
| EmojiSetDataBorrowed::new::<P>() | ||
| } | ||
| /// A version of `new()` that uses custom data provided by a [`DataProvider`]. | ||
| /// | ||
| /// Note that this will return an owned version of the data. Functionality is available on | ||
| /// the borrowed version, accessible through [`EmojiSetData::as_borrowed`]. | ||
| pub fn try_new_unstable<P: EmojiSet>( | ||
| provider: &(impl DataProvider<P::DataMarker> + ?Sized), | ||
| ) -> Result<EmojiSetData, DataError> { | ||
| Ok(EmojiSetData::from_data( | ||
| provider.load(Default::default())?.payload, | ||
| )) | ||
| } | ||
| /// Construct a borrowed version of this type that can be queried. | ||
| /// | ||
| /// This avoids a potential small underlying cost per API call (ex: `contains()`) by consolidating it | ||
| /// up front. | ||
| #[inline] | ||
| pub fn as_borrowed(&self) -> EmojiSetDataBorrowed<'_> { | ||
| EmojiSetDataBorrowed { | ||
| set: self.data.get(), | ||
| } | ||
| } | ||
| /// Construct a new one from loaded data | ||
| /// | ||
| /// Typically it is preferable to use getters instead | ||
| pub(crate) fn from_data<M>(data: DataPayload<M>) -> Self | ||
| where | ||
| M: DynamicDataMarker<DataStruct = PropertyUnicodeSet<'static>>, | ||
| { | ||
| Self { data: data.cast() } | ||
| } | ||
| /// Construct a new owned [`CodePointInversionListAndStringList`] | ||
| pub fn from_code_point_inversion_list_string_list( | ||
| set: CodePointInversionListAndStringList<'static>, | ||
| ) -> Self { | ||
| let set = PropertyUnicodeSet::from_code_point_inversion_list_string_list(set); | ||
| EmojiSetData::from_data( | ||
| DataPayload::<ErasedMarker<PropertyUnicodeSet<'static>>>::from_owned(set), | ||
| ) | ||
| } | ||
| /// Convert this type to a [`CodePointInversionListAndStringList`] as a borrowed value. | ||
| /// | ||
| /// The data backing this is extensible and supports multiple implementations. | ||
| /// Currently it is always [`CodePointInversionListAndStringList`]; however in the future more backends may be | ||
| /// added, and users may select which at data generation time. | ||
| /// | ||
| /// This method returns an `Option` in order to return `None` when the backing data provider | ||
| /// cannot return a [`CodePointInversionListAndStringList`], or cannot do so within the expected constant time | ||
| /// constraint. | ||
| pub fn as_code_point_inversion_list_string_list( | ||
| &self, | ||
| ) -> Option<&CodePointInversionListAndStringList<'_>> { | ||
| self.data.get().as_code_point_inversion_list_string_list() | ||
| } | ||
| /// Convert this type to a [`CodePointInversionListAndStringList`], borrowing if possible, | ||
| /// otherwise allocating a new [`CodePointInversionListAndStringList`]. | ||
| /// | ||
| /// The data backing this is extensible and supports multiple implementations. | ||
| /// Currently it is always [`CodePointInversionListAndStringList`]; however in the future more backends may be | ||
| /// added, and users may select which at data generation time. | ||
| /// | ||
| /// The performance of the conversion to this specific return type will vary | ||
| /// depending on the data structure that is backing `self`. | ||
| pub fn to_code_point_inversion_list_string_list( | ||
| &self, | ||
| ) -> CodePointInversionListAndStringList<'_> { | ||
| self.data.get().to_code_point_inversion_list_string_list() | ||
| } | ||
| } | ||
| /// A borrowed wrapper around code point set data, returned by | ||
| /// [`EmojiSetData::as_borrowed()`]. More efficient to query. | ||
| #[derive(Clone, Copy, Debug)] | ||
| pub struct EmojiSetDataBorrowed<'a> { | ||
| set: &'a PropertyUnicodeSet<'a>, | ||
| } | ||
| impl EmojiSetDataBorrowed<'_> { | ||
| /// Check if the set contains the string. Strings consisting of one character | ||
| /// are treated as a character/code point. | ||
| /// | ||
| /// This matches ICU behavior for ICU's `UnicodeSet`. | ||
| #[inline] | ||
| pub fn contains_str(self, s: &str) -> bool { | ||
| self.set.contains_str(s) | ||
| } | ||
| /// Check if the set contains the code point. | ||
| #[inline] | ||
| pub fn contains(self, ch: char) -> bool { | ||
| self.set.contains(ch) | ||
| } | ||
| /// See [`Self::contains`]. | ||
| #[inline] | ||
| pub fn contains32(self, cp: u32) -> bool { | ||
| self.set.contains32(cp) | ||
| } | ||
| } | ||
| impl EmojiSetDataBorrowed<'static> { | ||
| /// Creates a new [`EmojiSetDataBorrowed`] for a [`EmojiSet`]. | ||
| /// | ||
| /// See the documentation on [`EmojiSet`] implementations for details. | ||
| /// | ||
| /// ✨ *Enabled with the `compiled_data` Cargo feature.* | ||
| /// | ||
| /// [📚 Help choosing a constructor](icu_provider::constructors) | ||
| #[inline] | ||
| #[cfg(feature = "compiled_data")] | ||
| pub const fn new<P: EmojiSet>() -> Self { | ||
| EmojiSetDataBorrowed { set: P::SINGLETON } | ||
| } | ||
| /// Cheaply converts a [`EmojiSetDataBorrowed<'static>`] into a [`EmojiSetData`]. | ||
| /// | ||
| /// Note: Due to branching and indirection, using [`EmojiSetData`] might inhibit some | ||
| /// compile-time optimizations that are possible with [`EmojiSetDataBorrowed`]. | ||
| pub const fn static_to_owned(self) -> EmojiSetData { | ||
| EmojiSetData { | ||
| data: DataPayload::from_static_ref(self.set), | ||
| } | ||
| } | ||
| } | ||
| /// An Emoji set as defined by [`Unicode Technical Standard #51`](https://unicode.org/reports/tr51/#Emoji_Sets>). | ||
| /// | ||
| /// <div class="stab unstable"> | ||
| /// 🚫 This trait is sealed; it cannot be implemented by user code. If an API requests an item that implements this | ||
| /// trait, please consider using a type from the implementors listed below. | ||
| /// </div> | ||
| pub trait EmojiSet: crate::private::Sealed { | ||
| #[doc(hidden)] | ||
| type DataMarker: DataMarker<DataStruct = PropertyUnicodeSet<'static>>; | ||
| #[doc(hidden)] | ||
| #[cfg(feature = "compiled_data")] | ||
| const SINGLETON: &'static PropertyUnicodeSet<'static>; | ||
| } |
| // This file is part of ICU4X. For terms of use, please see the file | ||
| // called LICENSE at the top level of the ICU4X source tree | ||
| // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). | ||
| //! Definitions of [Unicode Properties] and APIs for | ||
| //! retrieving property data in an appropriate data structure. | ||
| //! | ||
| //! This module is published as its own crate ([`icu_properties`](https://docs.rs/icu_properties/latest/icu_properties/)) | ||
| //! and as part of the [`icu`](https://docs.rs/icu/latest/icu/) crate. See the latter for more details on the ICU4X project. | ||
| //! | ||
| //! APIs that return a [`CodePointSetData`] exist for binary properties and certain enumerated | ||
| //! properties. | ||
| //! | ||
| //! APIs that return a [`CodePointMapData`] exist for certain enumerated properties. | ||
| //! | ||
| //! # Examples | ||
| //! | ||
| //! ## Property data as `CodePointSetData`s | ||
| //! | ||
| //! ``` | ||
| //! use icu::properties::{CodePointSetData, CodePointMapData}; | ||
| //! use icu::properties::props::{GeneralCategory, Emoji}; | ||
| //! | ||
| //! // A binary property as a `CodePointSetData` | ||
| //! | ||
| //! assert!(CodePointSetData::new::<Emoji>().contains('🎃')); // U+1F383 JACK-O-LANTERN | ||
| //! assert!(!CodePointSetData::new::<Emoji>().contains('木')); // U+6728 | ||
| //! | ||
| //! // An individual enumerated property value as a `CodePointSetData` | ||
| //! | ||
| //! let line_sep_data = CodePointMapData::<GeneralCategory>::new() | ||
| //! .get_set_for_value(GeneralCategory::LineSeparator); | ||
| //! let line_sep = line_sep_data.as_borrowed(); | ||
| //! | ||
| //! assert!(line_sep.contains('\u{2028}')); | ||
| //! assert!(!line_sep.contains('\u{2029}')); | ||
| //! ``` | ||
| //! | ||
| //! ## Property data as `CodePointMapData`s | ||
| //! | ||
| //! ``` | ||
| //! use icu::properties::CodePointMapData; | ||
| //! use icu::properties::props::Script; | ||
| //! | ||
| //! assert_eq!(CodePointMapData::<Script>::new().get('🎃'), Script::Common); // U+1F383 JACK-O-LANTERN | ||
| //! assert_eq!(CodePointMapData::<Script>::new().get('木'), Script::Han); // U+6728 | ||
| //! ``` | ||
| //! | ||
| //! [`ICU4X`]: ../icu/index.html | ||
| //! [Unicode Properties]: https://unicode-org.github.io/icu/userguide/strings/properties.html | ||
| //! [`CodePointSetData`]: crate::CodePointSetData | ||
| //! [`CodePointMapData`]: crate::CodePointMapData | ||
| // https://github.com/unicode-org/icu4x/blob/main/documents/process/boilerplate.md#library-annotations | ||
| #![cfg_attr(not(any(test, doc)), no_std)] | ||
| #![cfg_attr( | ||
| not(test), | ||
| deny( | ||
| clippy::indexing_slicing, | ||
| clippy::unwrap_used, | ||
| clippy::expect_used, | ||
| clippy::panic, | ||
| clippy::exhaustive_structs, | ||
| clippy::exhaustive_enums, | ||
| clippy::trivially_copy_pass_by_ref, | ||
| missing_debug_implementations, | ||
| ) | ||
| )] | ||
| #![warn(missing_docs)] | ||
| #[cfg(feature = "alloc")] | ||
| extern crate alloc; | ||
| mod code_point_set; | ||
| pub use code_point_set::{CodePointSetData, CodePointSetDataBorrowed}; | ||
| mod code_point_map; | ||
| pub use code_point_map::{CodePointMapData, CodePointMapDataBorrowed}; | ||
| mod emoji; | ||
| pub use emoji::{EmojiSetData, EmojiSetDataBorrowed}; | ||
| mod names; | ||
| pub use names::{ | ||
| PropertyNamesLong, PropertyNamesLongBorrowed, PropertyNamesShort, PropertyNamesShortBorrowed, | ||
| PropertyParser, PropertyParserBorrowed, | ||
| }; | ||
| mod runtime; | ||
| // NOTE: The Pernosco debugger has special knowledge | ||
| // of the `CanonicalCombiningClass` struct inside the `props` | ||
| // module. Please do not change the crate-module-qualified | ||
| // name of that struct without coordination. | ||
| pub mod props; | ||
| pub mod provider; | ||
| pub mod script; | ||
| mod bidi; | ||
| mod trievalue; | ||
| mod private { | ||
| pub trait Sealed {} | ||
| } |
| // This file is part of ICU4X. For terms of use, please see the file | ||
| // called LICENSE at the top level of the ICU4X source tree | ||
| // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). | ||
| use crate::props::*; | ||
| use crate::provider::names::*; | ||
| use core::marker::PhantomData; | ||
| use icu_collections::codepointtrie::TrieValue; | ||
| use icu_provider::marker::ErasedMarker; | ||
| use icu_provider::prelude::*; | ||
| use yoke::Yokeable; | ||
| use zerotrie::cursor::ZeroTrieSimpleAsciiCursor; | ||
| /// A struct capable of looking up a property value from a string name. | ||
| /// Access its data by calling [`Self::as_borrowed()`] and using the methods on | ||
| /// [`PropertyParserBorrowed`]. | ||
| /// | ||
| /// The name can be a short name (`Lu`), a long name(`Uppercase_Letter`), | ||
| /// or an alias. | ||
| /// | ||
| /// Property names can be looked up using "strict" matching (looking for a name | ||
| /// that matches exactly), or "loose matching", where the name is allowed to deviate | ||
| /// in terms of ASCII casing, whitespace, underscores, and hyphens. | ||
| /// | ||
| /// # Example | ||
| /// | ||
| /// ``` | ||
| /// use icu::properties::props::GeneralCategory; | ||
| /// use icu::properties::PropertyParser; | ||
| /// | ||
| /// let lookup = PropertyParser::<GeneralCategory>::new(); | ||
| /// // short name for value | ||
| /// assert_eq!( | ||
| /// lookup.get_strict("Lu"), | ||
| /// Some(GeneralCategory::UppercaseLetter) | ||
| /// ); | ||
| /// assert_eq!( | ||
| /// lookup.get_strict("Pd"), | ||
| /// Some(GeneralCategory::DashPunctuation) | ||
| /// ); | ||
| /// // long name for value | ||
| /// assert_eq!( | ||
| /// lookup.get_strict("Uppercase_Letter"), | ||
| /// Some(GeneralCategory::UppercaseLetter) | ||
| /// ); | ||
| /// assert_eq!( | ||
| /// lookup.get_strict("Dash_Punctuation"), | ||
| /// Some(GeneralCategory::DashPunctuation) | ||
| /// ); | ||
| /// // name has incorrect casing | ||
| /// assert_eq!(lookup.get_strict("dashpunctuation"), None); | ||
| /// // loose matching of name | ||
| /// assert_eq!( | ||
| /// lookup.get_loose("dash-punctuation"), | ||
| /// Some(GeneralCategory::DashPunctuation) | ||
| /// ); | ||
| /// // fake property | ||
| /// assert_eq!(lookup.get_strict("Animated_Gif"), None); | ||
| /// ``` | ||
| #[derive(Debug)] | ||
| pub struct PropertyParser<T> { | ||
| map: DataPayload<ErasedMarker<PropertyValueNameToEnumMap<'static>>>, | ||
| markers: PhantomData<fn() -> T>, | ||
| } | ||
| /// A borrowed wrapper around property value name-to-enum data, returned by | ||
| /// [`PropertyParser::as_borrowed()`]. More efficient to query. | ||
| #[derive(Debug)] | ||
| pub struct PropertyParserBorrowed<'a, T> { | ||
| map: &'a PropertyValueNameToEnumMap<'a>, | ||
| markers: PhantomData<fn() -> T>, | ||
| } | ||
| impl<T> Clone for PropertyParserBorrowed<'_, T> { | ||
| fn clone(&self) -> Self { | ||
| *self | ||
| } | ||
| } | ||
| impl<T> Copy for PropertyParserBorrowed<'_, T> {} | ||
| impl<T> PropertyParser<T> { | ||
| /// Creates a new instance of `PropertyParser<T>` using compiled data. | ||
| /// | ||
| /// ✨ *Enabled with the `compiled_data` Cargo feature.* | ||
| /// | ||
| /// [📚 Help choosing a constructor](icu_provider::constructors) | ||
| #[cfg(feature = "compiled_data")] | ||
| #[expect(clippy::new_ret_no_self)] | ||
| pub fn new() -> PropertyParserBorrowed<'static, T> | ||
| where | ||
| T: ParseableEnumeratedProperty, | ||
| { | ||
| PropertyParserBorrowed::new() | ||
| } | ||
| #[doc = icu_provider::gen_buffer_unstable_docs!(UNSTABLE, Self::new)] | ||
| pub fn try_new_unstable( | ||
| provider: &(impl DataProvider<T::DataMarker> + ?Sized), | ||
| ) -> Result<Self, DataError> | ||
| where | ||
| T: ParseableEnumeratedProperty, | ||
| { | ||
| Ok(Self { | ||
| map: provider.load(Default::default())?.payload.cast(), | ||
| markers: PhantomData, | ||
| }) | ||
| } | ||
| /// Construct a borrowed version of this type that can be queried. | ||
| /// | ||
| /// This avoids a potential small underlying cost per API call (like `get_strict()`) by consolidating it | ||
| /// up front. | ||
| #[inline] | ||
| pub fn as_borrowed(&self) -> PropertyParserBorrowed<'_, T> { | ||
| PropertyParserBorrowed { | ||
| map: self.map.get(), | ||
| markers: PhantomData, | ||
| } | ||
| } | ||
| #[doc(hidden)] // used by FFI code | ||
| pub fn erase(self) -> PropertyParser<u16> { | ||
| PropertyParser { | ||
| map: self.map.cast(), | ||
| markers: PhantomData, | ||
| } | ||
| } | ||
| } | ||
| impl<T: TrieValue> PropertyParserBorrowed<'_, T> { | ||
| /// Get the property value as a u16, doing a strict search looking for | ||
| /// names that match exactly | ||
| /// | ||
| /// # Example | ||
| /// | ||
| /// ``` | ||
| /// use icu::properties::props::GeneralCategory; | ||
| /// use icu::properties::PropertyParser; | ||
| /// | ||
| /// let lookup = PropertyParser::<GeneralCategory>::new(); | ||
| /// assert_eq!( | ||
| /// lookup.get_strict_u16("Lu"), | ||
| /// Some(GeneralCategory::UppercaseLetter as u16) | ||
| /// ); | ||
| /// assert_eq!( | ||
| /// lookup.get_strict_u16("Uppercase_Letter"), | ||
| /// Some(GeneralCategory::UppercaseLetter as u16) | ||
| /// ); | ||
| /// // does not do loose matching | ||
| /// assert_eq!(lookup.get_strict_u16("UppercaseLetter"), None); | ||
| /// ``` | ||
| #[inline] | ||
| pub fn get_strict_u16(self, name: &str) -> Option<u16> { | ||
| get_strict_u16(self.map, name) | ||
| } | ||
| /// Get the property value as a `T`, doing a strict search looking for | ||
| /// names that match exactly | ||
| /// | ||
| /// # Example | ||
| /// | ||
| /// ``` | ||
| /// use icu::properties::props::GeneralCategory; | ||
| /// use icu::properties::PropertyParser; | ||
| /// | ||
| /// let lookup = PropertyParser::<GeneralCategory>::new(); | ||
| /// assert_eq!( | ||
| /// lookup.get_strict("Lu"), | ||
| /// Some(GeneralCategory::UppercaseLetter) | ||
| /// ); | ||
| /// assert_eq!( | ||
| /// lookup.get_strict("Uppercase_Letter"), | ||
| /// Some(GeneralCategory::UppercaseLetter) | ||
| /// ); | ||
| /// // does not do loose matching | ||
| /// assert_eq!(lookup.get_strict("UppercaseLetter"), None); | ||
| /// ``` | ||
| #[inline] | ||
| pub fn get_strict(self, name: &str) -> Option<T> { | ||
| T::try_from_u32(self.get_strict_u16(name)? as u32).ok() | ||
| } | ||
| /// Get the property value as a u16, doing a loose search looking for | ||
| /// names that match case-insensitively, ignoring ASCII hyphens, underscores, and | ||
| /// whitespaces. | ||
| /// | ||
| /// # Example | ||
| /// | ||
| /// ``` | ||
| /// use icu::properties::props::GeneralCategory; | ||
| /// use icu::properties::PropertyParser; | ||
| /// | ||
| /// let lookup = PropertyParser::<GeneralCategory>::new(); | ||
| /// assert_eq!( | ||
| /// lookup.get_loose_u16("Lu"), | ||
| /// Some(GeneralCategory::UppercaseLetter as u16) | ||
| /// ); | ||
| /// assert_eq!( | ||
| /// lookup.get_loose_u16("Uppercase_Letter"), | ||
| /// Some(GeneralCategory::UppercaseLetter as u16) | ||
| /// ); | ||
| /// // does do loose matching | ||
| /// assert_eq!( | ||
| /// lookup.get_loose_u16("UppercaseLetter"), | ||
| /// Some(GeneralCategory::UppercaseLetter as u16) | ||
| /// ); | ||
| /// ``` | ||
| #[inline] | ||
| pub fn get_loose_u16(self, name: &str) -> Option<u16> { | ||
| get_loose_u16(self.map, name) | ||
| } | ||
| /// Get the property value as a `T`, doing a loose search looking for | ||
| /// names that match case-insensitively, ignoring ASCII hyphens, underscores, and | ||
| /// whitespaces. | ||
| /// | ||
| /// # Example | ||
| /// | ||
| /// ``` | ||
| /// use icu::properties::props::GeneralCategory; | ||
| /// use icu::properties::PropertyParser; | ||
| /// | ||
| /// let lookup = PropertyParser::<GeneralCategory>::new(); | ||
| /// assert_eq!( | ||
| /// lookup.get_loose("Lu"), | ||
| /// Some(GeneralCategory::UppercaseLetter) | ||
| /// ); | ||
| /// assert_eq!( | ||
| /// lookup.get_loose("Uppercase_Letter"), | ||
| /// Some(GeneralCategory::UppercaseLetter) | ||
| /// ); | ||
| /// // does do loose matching | ||
| /// assert_eq!( | ||
| /// lookup.get_loose("UppercaseLetter"), | ||
| /// Some(GeneralCategory::UppercaseLetter) | ||
| /// ); | ||
| /// ``` | ||
| #[inline] | ||
| pub fn get_loose(self, name: &str) -> Option<T> { | ||
| T::try_from_u32(self.get_loose_u16(name)? as u32).ok() | ||
| } | ||
| } | ||
| #[cfg(feature = "compiled_data")] | ||
| impl<T: ParseableEnumeratedProperty> Default for PropertyParserBorrowed<'static, T> { | ||
| fn default() -> Self { | ||
| Self::new() | ||
| } | ||
| } | ||
| impl<T: TrieValue> PropertyParserBorrowed<'static, T> { | ||
| /// Creates a new instance of `PropertyParserBorrowed<T>` using compiled data. | ||
| /// | ||
| /// ✨ *Enabled with the `compiled_data` Cargo feature.* | ||
| /// | ||
| /// [📚 Help choosing a constructor](icu_provider::constructors) | ||
| #[cfg(feature = "compiled_data")] | ||
| pub fn new() -> Self | ||
| where | ||
| T: ParseableEnumeratedProperty, | ||
| { | ||
| Self { | ||
| map: T::SINGLETON, | ||
| markers: PhantomData, | ||
| } | ||
| } | ||
| /// Cheaply converts a [`PropertyParserBorrowed<'static>`] into a [`PropertyParser`]. | ||
| /// | ||
| /// Note: Due to branching and indirection, using [`PropertyParser`] might inhibit some | ||
| /// compile-time optimizations that are possible with [`PropertyParserBorrowed`]. | ||
| pub const fn static_to_owned(self) -> PropertyParser<T> { | ||
| PropertyParser { | ||
| map: DataPayload::from_static_ref(self.map), | ||
| markers: PhantomData, | ||
| } | ||
| } | ||
| } | ||
| /// Avoid monomorphizing multiple copies of this function | ||
| fn get_strict_u16(payload: &PropertyValueNameToEnumMap<'_>, name: &str) -> Option<u16> { | ||
| payload.map.get(name).and_then(|i| i.try_into().ok()) | ||
| } | ||
| /// Avoid monomorphizing multiple copies of this function | ||
| fn get_loose_u16(payload: &PropertyValueNameToEnumMap<'_>, name: &str) -> Option<u16> { | ||
| fn recurse(mut cursor: ZeroTrieSimpleAsciiCursor, mut rest: &[u8]) -> Option<usize> { | ||
| if cursor.is_empty() { | ||
| return None; | ||
| } | ||
| // Skip whitespace, underscore, hyphen in trie. | ||
| for skip in [b'\t', b'\n', b'\x0C', b'\r', b' ', 0x0B, b'_', b'-'] { | ||
| let mut skip_cursor = cursor.clone(); | ||
| skip_cursor.step(skip); | ||
| if let Some(r) = recurse(skip_cursor, rest) { | ||
| return Some(r); | ||
| } | ||
| } | ||
| let ascii = loop { | ||
| let Some((&a, r)) = rest.split_first() else { | ||
| return cursor.take_value(); | ||
| }; | ||
| rest = r; | ||
| // Skip whitespace, underscore, hyphen in input | ||
| if !matches!( | ||
| a, | ||
| b'\t' | b'\n' | b'\x0C' | b'\r' | b' ' | 0x0B | b'_' | b'-' | ||
| ) { | ||
| break a; | ||
| } | ||
| }; | ||
| let mut other_case_cursor = cursor.clone(); | ||
| cursor.step(ascii); | ||
| other_case_cursor.step(if ascii.is_ascii_lowercase() { | ||
| ascii.to_ascii_uppercase() | ||
| } else { | ||
| ascii.to_ascii_lowercase() | ||
| }); | ||
| // This uses the call stack as the DFS stack. The recursion will terminate as | ||
| // rest's length is strictly shrinking. The call stack's depth is limited by | ||
| // name.len(). | ||
| recurse(cursor, rest).or_else(|| recurse(other_case_cursor, rest)) | ||
| } | ||
| recurse(payload.map.cursor(), name.as_bytes()).and_then(|i| i.try_into().ok()) | ||
| } | ||
| /// A struct capable of looking up a property name from a value | ||
| /// Access its data by calling [`Self::as_borrowed()`] and using the methods on | ||
| /// [`PropertyNamesLongBorrowed`]. | ||
| /// | ||
| /// # Example | ||
| /// | ||
| /// ``` | ||
| /// use icu::properties::props::CanonicalCombiningClass; | ||
| /// use icu::properties::PropertyNamesLong; | ||
| /// | ||
| /// let names = PropertyNamesLong::<CanonicalCombiningClass>::new(); | ||
| /// assert_eq!( | ||
| /// names.get(CanonicalCombiningClass::KanaVoicing), | ||
| /// Some("Kana_Voicing") | ||
| /// ); | ||
| /// assert_eq!( | ||
| /// names.get(CanonicalCombiningClass::AboveLeft), | ||
| /// Some("Above_Left") | ||
| /// ); | ||
| /// ``` | ||
| pub struct PropertyNamesLong<T: NamedEnumeratedProperty> { | ||
| map: DataPayload<ErasedMarker<T::DataStructLong>>, | ||
| } | ||
| impl<T: NamedEnumeratedProperty> core::fmt::Debug for PropertyNamesLong<T> { | ||
| fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { | ||
| f.debug_struct("PropertyNamesLong") | ||
| // .field("map", &self.map) | ||
| .finish() | ||
| } | ||
| } | ||
| /// A borrowed wrapper around property value name-to-enum data, returned by | ||
| /// [`PropertyNamesLong::as_borrowed()`]. More efficient to query. | ||
| #[derive(Debug)] | ||
| pub struct PropertyNamesLongBorrowed<'a, T: NamedEnumeratedProperty> { | ||
| map: &'a T::DataStructLongBorrowed<'a>, | ||
| } | ||
| impl<T: NamedEnumeratedProperty> Clone for PropertyNamesLongBorrowed<'_, T> { | ||
| fn clone(&self) -> Self { | ||
| *self | ||
| } | ||
| } | ||
| impl<T: NamedEnumeratedProperty> Copy for PropertyNamesLongBorrowed<'_, T> {} | ||
| impl<T: NamedEnumeratedProperty> PropertyNamesLong<T> { | ||
| /// Creates a new instance of `PropertyNamesLongBorrowed<T>`. | ||
| /// | ||
| /// ✨ *Enabled with the `compiled_data` Cargo feature.* | ||
| /// | ||
| /// [📚 Help choosing a constructor](icu_provider::constructors) | ||
| #[cfg(feature = "compiled_data")] | ||
| #[expect(clippy::new_ret_no_self)] | ||
| pub fn new() -> PropertyNamesLongBorrowed<'static, T> { | ||
| PropertyNamesLongBorrowed::new() | ||
| } | ||
| #[doc = icu_provider::gen_buffer_unstable_docs!(UNSTABLE, Self::new)] | ||
| pub fn try_new_unstable( | ||
| provider: &(impl DataProvider<T::DataMarkerLong> + ?Sized), | ||
| ) -> Result<Self, DataError> { | ||
| Ok(Self { | ||
| map: provider.load(Default::default())?.payload.cast(), | ||
| }) | ||
| } | ||
| /// Construct a borrowed version of this type that can be queried. | ||
| /// | ||
| /// This avoids a potential small underlying cost per API call (like `get_static()`) by consolidating it | ||
| /// up front. | ||
| #[inline] | ||
| pub fn as_borrowed(&self) -> PropertyNamesLongBorrowed<'_, T> { | ||
| PropertyNamesLongBorrowed { | ||
| map: T::nep_long_identity(self.map.get()), | ||
| } | ||
| } | ||
| } | ||
| impl<'a, T: NamedEnumeratedProperty> PropertyNamesLongBorrowed<'a, T> { | ||
| /// Get the property name given a value | ||
| /// | ||
| /// # Example | ||
| /// | ||
| /// ```rust | ||
| /// use icu::properties::props::CanonicalCombiningClass; | ||
| /// use icu::properties::PropertyNamesLong; | ||
| /// | ||
| /// let lookup = PropertyNamesLong::<CanonicalCombiningClass>::new(); | ||
| /// assert_eq!( | ||
| /// lookup.get(CanonicalCombiningClass::KanaVoicing), | ||
| /// Some("Kana_Voicing") | ||
| /// ); | ||
| /// assert_eq!( | ||
| /// lookup.get(CanonicalCombiningClass::AboveLeft), | ||
| /// Some("Above_Left") | ||
| /// ); | ||
| /// ``` | ||
| #[inline] | ||
| pub fn get(self, property: T) -> Option<&'a str> { | ||
| self.map.get(property.to_u32()) | ||
| } | ||
| } | ||
| #[cfg(feature = "compiled_data")] | ||
| impl<T: NamedEnumeratedProperty> Default for PropertyNamesLongBorrowed<'static, T> { | ||
| fn default() -> Self { | ||
| Self::new() | ||
| } | ||
| } | ||
| impl<T: NamedEnumeratedProperty> PropertyNamesLongBorrowed<'static, T> { | ||
| /// Creates a new instance of `PropertyNamesLongBorrowed<T>`. | ||
| /// | ||
| /// ✨ *Enabled with the `compiled_data` Cargo feature.* | ||
| /// | ||
| /// [📚 Help choosing a constructor](icu_provider::constructors) | ||
| #[cfg(feature = "compiled_data")] | ||
| pub fn new() -> Self { | ||
| Self { | ||
| map: T::SINGLETON_LONG, | ||
| } | ||
| } | ||
| /// Cheaply converts a [`PropertyNamesLongBorrowed<'static>`] into a [`PropertyNamesLong`]. | ||
| /// | ||
| /// Note: Due to branching and indirection, using [`PropertyNamesLong`] might inhibit some | ||
| /// compile-time optimizations that are possible with [`PropertyNamesLongBorrowed`]. | ||
| /// | ||
| /// This is currently not `const` unlike other `static_to_owned()` functions since it needs | ||
| /// const traits to do that safely | ||
| pub fn static_to_owned(self) -> PropertyNamesLong<T> { | ||
| PropertyNamesLong { | ||
| map: DataPayload::from_static_ref(T::nep_long_identity_static(self.map)), | ||
| } | ||
| } | ||
| } | ||
| /// A struct capable of looking up a property name from a value | ||
| /// Access its data by calling [`Self::as_borrowed()`] and using the methods on | ||
| /// [`PropertyNamesShortBorrowed`]. | ||
| /// | ||
| /// # Example | ||
| /// | ||
| /// ``` | ||
| /// use icu::properties::props::CanonicalCombiningClass; | ||
| /// use icu::properties::PropertyNamesShort; | ||
| /// | ||
| /// let names = PropertyNamesShort::<CanonicalCombiningClass>::new(); | ||
| /// assert_eq!(names.get(CanonicalCombiningClass::KanaVoicing), Some("KV")); | ||
| /// assert_eq!(names.get(CanonicalCombiningClass::AboveLeft), Some("AL")); | ||
| /// ``` | ||
| pub struct PropertyNamesShort<T: NamedEnumeratedProperty> { | ||
| map: DataPayload<ErasedMarker<T::DataStructShort>>, | ||
| } | ||
| impl<T: NamedEnumeratedProperty> core::fmt::Debug for PropertyNamesShort<T> { | ||
| fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { | ||
| f.debug_struct("PropertyNamesShort") | ||
| // .field("map", &self.map) | ||
| .finish() | ||
| } | ||
| } | ||
| /// A borrowed wrapper around property value name-to-enum data, returned by | ||
| /// [`PropertyNamesShort::as_borrowed()`]. More efficient to query. | ||
| #[derive(Debug)] | ||
| pub struct PropertyNamesShortBorrowed<'a, T: NamedEnumeratedProperty> { | ||
| map: &'a T::DataStructShortBorrowed<'a>, | ||
| } | ||
| impl<T: NamedEnumeratedProperty> Clone for PropertyNamesShortBorrowed<'_, T> { | ||
| fn clone(&self) -> Self { | ||
| *self | ||
| } | ||
| } | ||
| impl<T: NamedEnumeratedProperty> Copy for PropertyNamesShortBorrowed<'_, T> {} | ||
| impl<T: NamedEnumeratedProperty> PropertyNamesShort<T> { | ||
| /// Creates a new instance of `PropertyNamesShortBorrowed<T>`. | ||
| /// | ||
| /// ✨ *Enabled with the `compiled_data` Cargo feature.* | ||
| /// | ||
| /// [📚 Help choosing a constructor](icu_provider::constructors) | ||
| #[cfg(feature = "compiled_data")] | ||
| #[expect(clippy::new_ret_no_self)] | ||
| pub fn new() -> PropertyNamesShortBorrowed<'static, T> { | ||
| PropertyNamesShortBorrowed::new() | ||
| } | ||
| #[doc = icu_provider::gen_buffer_unstable_docs!(UNSTABLE, Self::new)] | ||
| pub fn try_new_unstable( | ||
| provider: &(impl DataProvider<T::DataMarkerShort> + ?Sized), | ||
| ) -> Result<Self, DataError> { | ||
| Ok(Self { | ||
| map: provider.load(Default::default())?.payload.cast(), | ||
| }) | ||
| } | ||
| /// Construct a borrowed version of this type that can be queried. | ||
| /// | ||
| /// This avoids a potential small underlying cost per API call (like `get_static()`) by consolidating it | ||
| /// up front. | ||
| #[inline] | ||
| pub fn as_borrowed(&self) -> PropertyNamesShortBorrowed<'_, T> { | ||
| PropertyNamesShortBorrowed { | ||
| map: T::nep_short_identity(self.map.get()), | ||
| } | ||
| } | ||
| } | ||
| impl<'a, T: NamedEnumeratedProperty> PropertyNamesShortBorrowed<'a, T> { | ||
| /// Get the property name given a value | ||
| /// | ||
| /// # Example | ||
| /// | ||
| /// ```rust | ||
| /// use icu::properties::props::CanonicalCombiningClass; | ||
| /// use icu::properties::PropertyNamesShort; | ||
| /// | ||
| /// let lookup = PropertyNamesShort::<CanonicalCombiningClass>::new(); | ||
| /// assert_eq!(lookup.get(CanonicalCombiningClass::KanaVoicing), Some("KV")); | ||
| /// assert_eq!(lookup.get(CanonicalCombiningClass::AboveLeft), Some("AL")); | ||
| /// ``` | ||
| #[inline] | ||
| pub fn get(self, property: T) -> Option<&'a str> { | ||
| self.map.get(property.to_u32()) | ||
| } | ||
| } | ||
| impl PropertyNamesShortBorrowed<'_, Script> { | ||
| /// Gets the "name" of a script property as a `icu::locale::subtags::Script`. | ||
| /// | ||
| /// This method is available only on `PropertyNamesShortBorrowed<Script>`. | ||
| /// | ||
| /// # Example | ||
| /// | ||
| /// ```rust | ||
| /// use icu::locale::subtags::script; | ||
| /// use icu::properties::props::Script; | ||
| /// use icu::properties::PropertyNamesShort; | ||
| /// | ||
| /// let lookup = PropertyNamesShort::<Script>::new(); | ||
| /// assert_eq!( | ||
| /// lookup.get_locale_script(Script::Brahmi), | ||
| /// Some(script!("Brah")) | ||
| /// ); | ||
| /// assert_eq!( | ||
| /// lookup.get_locale_script(Script::Hangul), | ||
| /// Some(script!("Hang")) | ||
| /// ); | ||
| /// ``` | ||
| /// | ||
| /// For the reverse direction, use property parsing as normal: | ||
| /// ``` | ||
| /// use icu::locale::subtags::script; | ||
| /// use icu::properties::props::Script; | ||
| /// use icu::properties::PropertyParser; | ||
| /// | ||
| /// let parser = PropertyParser::<Script>::new(); | ||
| /// assert_eq!( | ||
| /// parser.get_strict(script!("Brah").as_str()), | ||
| /// Some(Script::Brahmi) | ||
| /// ); | ||
| /// assert_eq!( | ||
| /// parser.get_strict(script!("Hang").as_str()), | ||
| /// Some(Script::Hangul) | ||
| /// ); | ||
| /// ``` | ||
| #[inline] | ||
| pub fn get_locale_script(self, property: Script) -> Option<icu_locale_core::subtags::Script> { | ||
| let prop = usize::try_from(property.to_u32()).ok()?; | ||
| self.map.map.get(prop).and_then(|o| o.0) | ||
| } | ||
| } | ||
| #[cfg(feature = "compiled_data")] | ||
| impl<T: NamedEnumeratedProperty> Default for PropertyNamesShortBorrowed<'static, T> { | ||
| fn default() -> Self { | ||
| Self::new() | ||
| } | ||
| } | ||
| impl<T: NamedEnumeratedProperty> PropertyNamesShortBorrowed<'static, T> { | ||
| /// Creates a new instance of `PropertyNamesShortBorrowed<T>`. | ||
| /// | ||
| /// ✨ *Enabled with the `compiled_data` Cargo feature.* | ||
| /// | ||
| /// [📚 Help choosing a constructor](icu_provider::constructors) | ||
| #[cfg(feature = "compiled_data")] | ||
| pub fn new() -> Self { | ||
| Self { | ||
| map: T::SINGLETON_SHORT, | ||
| } | ||
| } | ||
| /// Cheaply converts a [`PropertyNamesShortBorrowed<'static>`] into a [`PropertyNamesShort`]. | ||
| /// | ||
| /// Note: Due to branching and indirection, using [`PropertyNamesShort`] might inhibit some | ||
| /// compile-time optimizations that are possible with [`PropertyNamesShortBorrowed`]. | ||
| /// | ||
| /// This is currently not `const` unlike other `static_to_owned()` functions since it needs | ||
| /// const traits to do that safely | ||
| pub fn static_to_owned(self) -> PropertyNamesShort<T> { | ||
| PropertyNamesShort { | ||
| map: DataPayload::from_static_ref(T::nep_short_identity_static(self.map)), | ||
| } | ||
| } | ||
| } | ||
| /// A property whose value names can be parsed from strings. | ||
| pub trait ParseableEnumeratedProperty: crate::private::Sealed + TrieValue { | ||
| #[doc(hidden)] | ||
| type DataMarker: DataMarker<DataStruct = PropertyValueNameToEnumMap<'static>>; | ||
| #[doc(hidden)] | ||
| #[cfg(feature = "compiled_data")] | ||
| const SINGLETON: &'static PropertyValueNameToEnumMap<'static>; | ||
| } | ||
| // Abstract over Linear/Sparse/Script representation | ||
| // This trait is implicitly sealed by not being exported. | ||
| pub trait PropertyEnumToValueNameLookup { | ||
| fn get(&self, prop: u32) -> Option<&str>; | ||
| } | ||
| impl PropertyEnumToValueNameLookup for PropertyEnumToValueNameLinearMap<'_> { | ||
| fn get(&self, prop: u32) -> Option<&str> { | ||
| self.map.get(usize::try_from(prop).ok()?) | ||
| } | ||
| } | ||
| #[cfg(feature = "alloc")] | ||
| impl PropertyEnumToValueNameLookup for PropertyEnumToValueNameSparseMap<'_> { | ||
| fn get(&self, prop: u32) -> Option<&str> { | ||
| self.map.get(&u16::try_from(prop).ok()?) | ||
| } | ||
| } | ||
| impl PropertyEnumToValueNameLookup for PropertyScriptToIcuScriptMap<'_> { | ||
| fn get(&self, prop: u32) -> Option<&str> { | ||
| self.map | ||
| .get_ule_ref(usize::try_from(prop).ok()?) | ||
| .and_then(|no| no.as_ref()) | ||
| .map(|s| s.as_str()) | ||
| } | ||
| } | ||
| /// A property whose value names can be represented as strings. | ||
| pub trait NamedEnumeratedProperty: ParseableEnumeratedProperty { | ||
| #[doc(hidden)] | ||
| type DataStructLong: 'static | ||
| + for<'a> Yokeable<'a, Output = Self::DataStructLongBorrowed<'a>> | ||
| + PropertyEnumToValueNameLookup; | ||
| #[doc(hidden)] | ||
| type DataStructShort: 'static | ||
| + for<'a> Yokeable<'a, Output = Self::DataStructShortBorrowed<'a>> | ||
| + PropertyEnumToValueNameLookup; | ||
| #[doc(hidden)] | ||
| type DataStructLongBorrowed<'a>: PropertyEnumToValueNameLookup; | ||
| #[doc(hidden)] | ||
| type DataStructShortBorrowed<'a>: PropertyEnumToValueNameLookup; | ||
| #[doc(hidden)] | ||
| type DataMarkerLong: DataMarker<DataStruct = Self::DataStructLong>; | ||
| #[doc(hidden)] | ||
| type DataMarkerShort: DataMarker<DataStruct = Self::DataStructShort>; | ||
| #[doc(hidden)] | ||
| #[cfg(feature = "compiled_data")] | ||
| const SINGLETON_LONG: &'static Self::DataStructLongBorrowed<'static>; | ||
| #[doc(hidden)] | ||
| #[cfg(feature = "compiled_data")] | ||
| const SINGLETON_SHORT: &'static Self::DataStructShortBorrowed<'static>; | ||
| // These wouldn't be necessary if Yoke used GATs (#6057) | ||
| #[doc(hidden)] | ||
| fn nep_long_identity<'a>( | ||
| stat: &'a <Self::DataStructLong as Yokeable<'a>>::Output, | ||
| ) -> &'a Self::DataStructLongBorrowed<'a>; | ||
| #[doc(hidden)] | ||
| fn nep_long_identity_static( | ||
| stat: &'static Self::DataStructLongBorrowed<'static>, | ||
| ) -> &'static Self::DataStructLong; | ||
| #[doc(hidden)] | ||
| fn nep_short_identity<'a>( | ||
| stat: &'a <Self::DataStructShort as Yokeable<'a>>::Output, | ||
| ) -> &'a Self::DataStructShortBorrowed<'a>; | ||
| #[doc(hidden)] | ||
| fn nep_short_identity_static( | ||
| stat: &'static Self::DataStructShortBorrowed<'static>, | ||
| ) -> &'static Self::DataStructShort; | ||
| /// Convenience method for `PropertyParser::new().get_loose(s)` | ||
| /// | ||
| /// ✨ *Enabled with the `compiled_data` Cargo feature.* | ||
| #[cfg(feature = "compiled_data")] | ||
| fn try_from_str(s: &str) -> Option<Self> { | ||
| PropertyParser::new().get_loose(s) | ||
| } | ||
| /// Convenience method for `PropertyNamesLong::new().get(*self).unwrap()` | ||
| /// | ||
| /// ✨ *Enabled with the `compiled_data` Cargo feature.* | ||
| #[cfg(feature = "compiled_data")] | ||
| fn long_name(&self) -> &'static str { | ||
| PropertyNamesLong::new().get(*self).unwrap_or("unreachable") | ||
| } | ||
| /// Convenience method for `PropertyNamesShort::new().get(*self).unwrap()` | ||
| /// | ||
| /// ✨ *Enabled with the `compiled_data` Cargo feature.* | ||
| #[cfg(feature = "compiled_data")] | ||
| fn short_name(&self) -> &'static str { | ||
| PropertyNamesShort::new() | ||
| .get(*self) | ||
| .unwrap_or("unreachable") | ||
| } | ||
| } | ||
| macro_rules! impl_value_getter { | ||
| ( | ||
| impl $ty:ident { | ||
| $marker_n2e:ident / $singleton_n2e:ident; | ||
| $( | ||
| $(#[$meta:meta])* | ||
| $data_struct_s:ident / $marker_e2sn:ident / $singleton_e2sn:ident; | ||
| $data_struct_l:ident / $marker_e2ln:ident / $singleton_e2ln:ident; | ||
| )? | ||
| } | ||
| ) => { | ||
| impl ParseableEnumeratedProperty for $ty { | ||
| type DataMarker = $marker_n2e; | ||
| #[cfg(feature = "compiled_data")] | ||
| const SINGLETON: &'static PropertyValueNameToEnumMap<'static> = crate::provider::Baked::$singleton_n2e; | ||
| } | ||
| $( | ||
| $(#[$meta])* | ||
| impl NamedEnumeratedProperty for $ty { | ||
| type DataStructLong = $data_struct_l<'static>; | ||
| type DataStructShort = $data_struct_s<'static>; | ||
| type DataStructLongBorrowed<'a> = $data_struct_l<'a>; | ||
| type DataStructShortBorrowed<'a> = $data_struct_s<'a>; | ||
| type DataMarkerLong = crate::provider::$marker_e2ln; | ||
| type DataMarkerShort = crate::provider::$marker_e2sn; | ||
| #[cfg(feature = "compiled_data")] | ||
| const SINGLETON_LONG: &'static Self::DataStructLong = crate::provider::Baked::$singleton_e2ln; | ||
| #[cfg(feature = "compiled_data")] | ||
| const SINGLETON_SHORT: &'static Self::DataStructShort = crate::provider::Baked::$singleton_e2sn; | ||
| fn nep_long_identity<'a>(yoked: &'a $data_struct_l<'a>) -> &'a Self::DataStructLongBorrowed<'a> { | ||
| yoked | ||
| } | ||
| fn nep_long_identity_static(stat: &'static $data_struct_l<'static>) -> &'static $data_struct_l<'static> { | ||
| stat | ||
| } | ||
| fn nep_short_identity<'a>(yoked: &'a $data_struct_s<'a>) -> &'a Self::DataStructShortBorrowed<'a> { | ||
| yoked | ||
| } | ||
| fn nep_short_identity_static(stat: &'static $data_struct_s<'static>) -> &'static $data_struct_s<'static> { | ||
| stat | ||
| } | ||
| } | ||
| )? | ||
| }; | ||
| } | ||
| impl_value_getter! { | ||
| impl BidiClass { | ||
| PropertyNameParseBidiClassV1 / SINGLETON_PROPERTY_NAME_PARSE_BIDI_CLASS_V1; | ||
| PropertyEnumToValueNameLinearMap / PropertyNameShortBidiClassV1 / SINGLETON_PROPERTY_NAME_SHORT_BIDI_CLASS_V1; | ||
| PropertyEnumToValueNameLinearMap / PropertyNameLongBidiClassV1 / SINGLETON_PROPERTY_NAME_LONG_BIDI_CLASS_V1; | ||
| } | ||
| } | ||
| impl_value_getter! { | ||
| impl GeneralCategory { | ||
| PropertyNameParseGeneralCategoryV1 / SINGLETON_PROPERTY_NAME_PARSE_GENERAL_CATEGORY_V1; | ||
| PropertyEnumToValueNameLinearMap / PropertyNameShortGeneralCategoryV1 / SINGLETON_PROPERTY_NAME_SHORT_GENERAL_CATEGORY_V1; | ||
| PropertyEnumToValueNameLinearMap / PropertyNameLongGeneralCategoryV1 / SINGLETON_PROPERTY_NAME_LONG_GENERAL_CATEGORY_V1; | ||
| } | ||
| } | ||
| impl_value_getter! { | ||
| impl GeneralCategoryGroup { | ||
| PropertyNameParseGeneralCategoryMaskV1 / SINGLETON_PROPERTY_NAME_PARSE_GENERAL_CATEGORY_MASK_V1; | ||
| } | ||
| } | ||
| impl_value_getter! { | ||
| impl Script { | ||
| PropertyNameParseScriptV1 / SINGLETON_PROPERTY_NAME_PARSE_SCRIPT_V1; | ||
| PropertyScriptToIcuScriptMap / PropertyNameShortScriptV1 / SINGLETON_PROPERTY_NAME_SHORT_SCRIPT_V1; | ||
| PropertyEnumToValueNameLinearMap / PropertyNameLongScriptV1 / SINGLETON_PROPERTY_NAME_LONG_SCRIPT_V1; | ||
| } | ||
| } | ||
| impl_value_getter! { | ||
| impl HangulSyllableType { | ||
| PropertyNameParseHangulSyllableTypeV1 / SINGLETON_PROPERTY_NAME_PARSE_HANGUL_SYLLABLE_TYPE_V1; | ||
| PropertyEnumToValueNameLinearMap / PropertyNameShortHangulSyllableTypeV1 / SINGLETON_PROPERTY_NAME_SHORT_HANGUL_SYLLABLE_TYPE_V1; | ||
| PropertyEnumToValueNameLinearMap / PropertyNameLongHangulSyllableTypeV1 / SINGLETON_PROPERTY_NAME_LONG_HANGUL_SYLLABLE_TYPE_V1; | ||
| } | ||
| } | ||
| impl_value_getter! { | ||
| impl EastAsianWidth { | ||
| PropertyNameParseEastAsianWidthV1 / SINGLETON_PROPERTY_NAME_PARSE_EAST_ASIAN_WIDTH_V1; | ||
| PropertyEnumToValueNameLinearMap / PropertyNameShortEastAsianWidthV1 / SINGLETON_PROPERTY_NAME_SHORT_EAST_ASIAN_WIDTH_V1; | ||
| PropertyEnumToValueNameLinearMap / PropertyNameLongEastAsianWidthV1 / SINGLETON_PROPERTY_NAME_LONG_EAST_ASIAN_WIDTH_V1; | ||
| } | ||
| } | ||
| impl_value_getter! { | ||
| impl LineBreak { | ||
| PropertyNameParseLineBreakV1 / SINGLETON_PROPERTY_NAME_PARSE_LINE_BREAK_V1; | ||
| PropertyEnumToValueNameLinearMap / PropertyNameShortLineBreakV1 / SINGLETON_PROPERTY_NAME_SHORT_LINE_BREAK_V1; | ||
| PropertyEnumToValueNameLinearMap / PropertyNameLongLineBreakV1 / SINGLETON_PROPERTY_NAME_LONG_LINE_BREAK_V1; | ||
| } | ||
| } | ||
| impl_value_getter! { | ||
| impl GraphemeClusterBreak { | ||
| PropertyNameParseGraphemeClusterBreakV1 / SINGLETON_PROPERTY_NAME_PARSE_GRAPHEME_CLUSTER_BREAK_V1; | ||
| PropertyEnumToValueNameLinearMap / PropertyNameShortGraphemeClusterBreakV1 / SINGLETON_PROPERTY_NAME_SHORT_GRAPHEME_CLUSTER_BREAK_V1; | ||
| PropertyEnumToValueNameLinearMap / PropertyNameLongGraphemeClusterBreakV1 / SINGLETON_PROPERTY_NAME_LONG_GRAPHEME_CLUSTER_BREAK_V1; | ||
| } | ||
| } | ||
| impl_value_getter! { | ||
| impl WordBreak { | ||
| PropertyNameParseWordBreakV1 / SINGLETON_PROPERTY_NAME_PARSE_WORD_BREAK_V1; | ||
| PropertyEnumToValueNameLinearMap / PropertyNameShortWordBreakV1 / SINGLETON_PROPERTY_NAME_SHORT_WORD_BREAK_V1; | ||
| PropertyEnumToValueNameLinearMap / PropertyNameLongWordBreakV1 / SINGLETON_PROPERTY_NAME_LONG_WORD_BREAK_V1; | ||
| } | ||
| } | ||
| impl_value_getter! { | ||
| impl SentenceBreak { | ||
| PropertyNameParseSentenceBreakV1 / SINGLETON_PROPERTY_NAME_PARSE_SENTENCE_BREAK_V1; | ||
| PropertyEnumToValueNameLinearMap / PropertyNameShortSentenceBreakV1 / SINGLETON_PROPERTY_NAME_SHORT_SENTENCE_BREAK_V1; | ||
| PropertyEnumToValueNameLinearMap / PropertyNameLongSentenceBreakV1 / SINGLETON_PROPERTY_NAME_LONG_SENTENCE_BREAK_V1; | ||
| } | ||
| } | ||
| impl_value_getter! { | ||
| impl CanonicalCombiningClass { | ||
| PropertyNameParseCanonicalCombiningClassV1 / SINGLETON_PROPERTY_NAME_PARSE_CANONICAL_COMBINING_CLASS_V1; | ||
| #[cfg(feature = "alloc")] | ||
| /// ✨ *Enabled with the `alloc` Cargo feature.* | ||
| PropertyEnumToValueNameSparseMap / PropertyNameShortCanonicalCombiningClassV1 / SINGLETON_PROPERTY_NAME_SHORT_CANONICAL_COMBINING_CLASS_V1; | ||
| PropertyEnumToValueNameSparseMap / PropertyNameLongCanonicalCombiningClassV1 / SINGLETON_PROPERTY_NAME_LONG_CANONICAL_COMBINING_CLASS_V1; | ||
| } | ||
| } | ||
| impl_value_getter! { | ||
| impl IndicSyllabicCategory { | ||
| PropertyNameParseIndicSyllabicCategoryV1 / SINGLETON_PROPERTY_NAME_PARSE_INDIC_SYLLABIC_CATEGORY_V1; | ||
| PropertyEnumToValueNameLinearMap / PropertyNameShortIndicSyllabicCategoryV1 / SINGLETON_PROPERTY_NAME_SHORT_INDIC_SYLLABIC_CATEGORY_V1; | ||
| PropertyEnumToValueNameLinearMap / PropertyNameLongIndicSyllabicCategoryV1 / SINGLETON_PROPERTY_NAME_LONG_INDIC_SYLLABIC_CATEGORY_V1; | ||
| } | ||
| } | ||
| impl_value_getter! { | ||
| impl JoiningType { | ||
| PropertyNameParseJoiningTypeV1 / SINGLETON_PROPERTY_NAME_PARSE_JOINING_TYPE_V1; | ||
| PropertyEnumToValueNameLinearMap / PropertyNameShortJoiningTypeV1 / SINGLETON_PROPERTY_NAME_SHORT_JOINING_TYPE_V1; | ||
| PropertyEnumToValueNameLinearMap / PropertyNameLongJoiningTypeV1 / SINGLETON_PROPERTY_NAME_LONG_JOINING_TYPE_V1; | ||
| } | ||
| } | ||
| impl_value_getter! { | ||
| impl VerticalOrientation { | ||
| PropertyNameParseVerticalOrientationV1 / SINGLETON_PROPERTY_NAME_PARSE_VERTICAL_ORIENTATION_V1; | ||
| PropertyEnumToValueNameLinearMap / PropertyNameShortVerticalOrientationV1 / SINGLETON_PROPERTY_NAME_SHORT_VERTICAL_ORIENTATION_V1; | ||
| PropertyEnumToValueNameLinearMap / PropertyNameLongVerticalOrientationV1 / SINGLETON_PROPERTY_NAME_LONG_VERTICAL_ORIENTATION_V1; | ||
| } | ||
| } |
Sorry, the diff of this file is too big to display
| // This file is part of ICU4X. For terms of use, please see the file | ||
| // called LICENSE at the top level of the ICU4X source tree | ||
| // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). | ||
| // Provider structs must be stable | ||
| #![allow(clippy::exhaustive_structs, clippy::exhaustive_enums)] | ||
| //! 🚧 \[Unstable\] Data provider struct definitions for this ICU4X component. | ||
| //! | ||
| //! <div class="stab unstable"> | ||
| //! 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways, | ||
| //! including in SemVer minor releases. While the serde representation of data structs is guaranteed | ||
| //! to be stable, their Rust representation might not be. Use with caution. | ||
| //! </div> | ||
| //! | ||
| //! Read more about data providers: [`icu_provider`] | ||
| pub mod names; | ||
| #[cfg(feature = "alloc")] | ||
| pub use names::{ | ||
| PropertyNameLongCanonicalCombiningClassV1, PropertyNameShortCanonicalCombiningClassV1, | ||
| }; | ||
| pub use names::{ | ||
| PropertyNameLongBidiClassV1, PropertyNameLongEastAsianWidthV1, | ||
| PropertyNameLongGeneralCategoryV1, PropertyNameLongGraphemeClusterBreakV1, | ||
| PropertyNameLongHangulSyllableTypeV1, PropertyNameLongIndicSyllabicCategoryV1, | ||
| PropertyNameLongJoiningTypeV1, PropertyNameLongLineBreakV1, PropertyNameLongScriptV1, | ||
| PropertyNameLongSentenceBreakV1, PropertyNameLongVerticalOrientationV1, | ||
| PropertyNameLongWordBreakV1, PropertyNameParseBidiClassV1, | ||
| PropertyNameParseCanonicalCombiningClassV1, PropertyNameParseEastAsianWidthV1, | ||
| PropertyNameParseGeneralCategoryMaskV1, PropertyNameParseGeneralCategoryV1, | ||
| PropertyNameParseGraphemeClusterBreakV1, PropertyNameParseHangulSyllableTypeV1, | ||
| PropertyNameParseIndicSyllabicCategoryV1, PropertyNameParseJoiningTypeV1, | ||
| PropertyNameParseLineBreakV1, PropertyNameParseScriptV1, PropertyNameParseSentenceBreakV1, | ||
| PropertyNameParseVerticalOrientationV1, PropertyNameParseWordBreakV1, | ||
| PropertyNameShortBidiClassV1, PropertyNameShortEastAsianWidthV1, | ||
| PropertyNameShortGeneralCategoryV1, PropertyNameShortGraphemeClusterBreakV1, | ||
| PropertyNameShortHangulSyllableTypeV1, PropertyNameShortIndicSyllabicCategoryV1, | ||
| PropertyNameShortJoiningTypeV1, PropertyNameShortLineBreakV1, PropertyNameShortScriptV1, | ||
| PropertyNameShortSentenceBreakV1, PropertyNameShortVerticalOrientationV1, | ||
| PropertyNameShortWordBreakV1, | ||
| }; | ||
| pub use crate::props::gc::GeneralCategoryULE; | ||
| use crate::props::*; | ||
| use crate::script::ScriptWithExt; | ||
| use core::ops::RangeInclusive; | ||
| use icu_collections::codepointinvlist::CodePointInversionList; | ||
| use icu_collections::codepointinvliststringlist::CodePointInversionListAndStringList; | ||
| use icu_collections::codepointtrie::{CodePointMapRange, CodePointTrie, TrieValue}; | ||
| use icu_provider::prelude::*; | ||
| use zerofrom::ZeroFrom; | ||
| use zerovec::{VarZeroVec, ZeroSlice}; | ||
| #[cfg(feature = "compiled_data")] | ||
| #[derive(Debug)] | ||
| /// Baked data | ||
| /// | ||
| /// <div class="stab unstable"> | ||
| /// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways, | ||
| /// including in SemVer minor releases. In particular, the `DataProvider` implementations are only | ||
| /// guaranteed to match with this version's `*_unstable` providers. Use with caution. | ||
| /// </div> | ||
| pub struct Baked; | ||
| #[cfg(feature = "compiled_data")] | ||
| #[allow(unused_imports)] | ||
| const _: () = { | ||
| use icu_properties_data::*; | ||
| pub mod icu { | ||
| pub use crate as properties; | ||
| pub use icu_collections as collections; | ||
| } | ||
| make_provider!(Baked); | ||
| impl_property_binary_alnum_v1!(Baked); | ||
| impl_property_binary_alphabetic_v1!(Baked); | ||
| impl_property_binary_ascii_hex_digit_v1!(Baked); | ||
| impl_property_binary_basic_emoji_v1!(Baked); | ||
| impl_property_binary_bidi_control_v1!(Baked); | ||
| impl_property_binary_bidi_mirrored_v1!(Baked); | ||
| impl_property_binary_blank_v1!(Baked); | ||
| impl_property_binary_case_ignorable_v1!(Baked); | ||
| impl_property_binary_case_sensitive_v1!(Baked); | ||
| impl_property_binary_cased_v1!(Baked); | ||
| impl_property_binary_changes_when_casefolded_v1!(Baked); | ||
| impl_property_binary_changes_when_casemapped_v1!(Baked); | ||
| impl_property_binary_changes_when_lowercased_v1!(Baked); | ||
| impl_property_binary_changes_when_nfkc_casefolded_v1!(Baked); | ||
| impl_property_binary_changes_when_titlecased_v1!(Baked); | ||
| impl_property_binary_changes_when_uppercased_v1!(Baked); | ||
| impl_property_binary_dash_v1!(Baked); | ||
| impl_property_binary_default_ignorable_code_point_v1!(Baked); | ||
| impl_property_binary_deprecated_v1!(Baked); | ||
| impl_property_binary_diacritic_v1!(Baked); | ||
| impl_property_binary_emoji_component_v1!(Baked); | ||
| impl_property_binary_emoji_modifier_base_v1!(Baked); | ||
| impl_property_binary_emoji_modifier_v1!(Baked); | ||
| impl_property_binary_emoji_presentation_v1!(Baked); | ||
| impl_property_binary_emoji_v1!(Baked); | ||
| impl_property_binary_extended_pictographic_v1!(Baked); | ||
| impl_property_binary_extender_v1!(Baked); | ||
| impl_property_binary_full_composition_exclusion_v1!(Baked); | ||
| impl_property_binary_graph_v1!(Baked); | ||
| impl_property_binary_grapheme_base_v1!(Baked); | ||
| impl_property_binary_grapheme_extend_v1!(Baked); | ||
| impl_property_binary_grapheme_link_v1!(Baked); | ||
| impl_property_binary_hex_digit_v1!(Baked); | ||
| impl_property_binary_hyphen_v1!(Baked); | ||
| impl_property_binary_id_compat_math_continue_v1!(Baked); | ||
| impl_property_binary_id_compat_math_start_v1!(Baked); | ||
| impl_property_binary_id_continue_v1!(Baked); | ||
| impl_property_binary_id_start_v1!(Baked); | ||
| impl_property_binary_ideographic_v1!(Baked); | ||
| impl_property_binary_ids_binary_operator_v1!(Baked); | ||
| impl_property_binary_ids_trinary_operator_v1!(Baked); | ||
| impl_property_binary_ids_unary_operator_v1!(Baked); | ||
| impl_property_binary_join_control_v1!(Baked); | ||
| impl_property_binary_logical_order_exception_v1!(Baked); | ||
| impl_property_binary_lowercase_v1!(Baked); | ||
| impl_property_binary_math_v1!(Baked); | ||
| impl_property_binary_modifier_combining_mark_v1!(Baked); | ||
| impl_property_binary_nfc_inert_v1!(Baked); | ||
| impl_property_binary_nfd_inert_v1!(Baked); | ||
| impl_property_binary_nfkc_inert_v1!(Baked); | ||
| impl_property_binary_nfkd_inert_v1!(Baked); | ||
| impl_property_binary_noncharacter_code_point_v1!(Baked); | ||
| impl_property_binary_pattern_syntax_v1!(Baked); | ||
| impl_property_binary_pattern_white_space_v1!(Baked); | ||
| impl_property_binary_prepended_concatenation_mark_v1!(Baked); | ||
| impl_property_binary_print_v1!(Baked); | ||
| impl_property_binary_quotation_mark_v1!(Baked); | ||
| impl_property_binary_radical_v1!(Baked); | ||
| impl_property_binary_regional_indicator_v1!(Baked); | ||
| impl_property_binary_segment_starter_v1!(Baked); | ||
| impl_property_binary_sentence_terminal_v1!(Baked); | ||
| impl_property_binary_soft_dotted_v1!(Baked); | ||
| impl_property_binary_terminal_punctuation_v1!(Baked); | ||
| impl_property_binary_unified_ideograph_v1!(Baked); | ||
| impl_property_binary_uppercase_v1!(Baked); | ||
| impl_property_binary_variation_selector_v1!(Baked); | ||
| impl_property_binary_white_space_v1!(Baked); | ||
| impl_property_binary_xdigit_v1!(Baked); | ||
| impl_property_binary_xid_continue_v1!(Baked); | ||
| impl_property_binary_xid_start_v1!(Baked); | ||
| impl_property_enum_bidi_class_v1!(Baked); | ||
| impl_property_enum_bidi_mirroring_glyph_v1!(Baked); | ||
| impl_property_enum_canonical_combining_class_v1!(Baked); | ||
| impl_property_enum_east_asian_width_v1!(Baked); | ||
| impl_property_enum_general_category_v1!(Baked); | ||
| impl_property_enum_grapheme_cluster_break_v1!(Baked); | ||
| impl_property_enum_hangul_syllable_type_v1!(Baked); | ||
| impl_property_enum_indic_conjunct_break_v1!(Baked); | ||
| impl_property_enum_indic_syllabic_category_v1!(Baked); | ||
| impl_property_enum_joining_type_v1!(Baked); | ||
| impl_property_enum_line_break_v1!(Baked); | ||
| impl_property_enum_script_v1!(Baked); | ||
| impl_property_enum_sentence_break_v1!(Baked); | ||
| impl_property_enum_vertical_orientation_v1!(Baked); | ||
| impl_property_enum_word_break_v1!(Baked); | ||
| impl_property_name_long_bidi_class_v1!(Baked); | ||
| #[cfg(feature = "alloc")] | ||
| impl_property_name_long_canonical_combining_class_v1!(Baked); | ||
| impl_property_name_long_east_asian_width_v1!(Baked); | ||
| impl_property_name_long_general_category_v1!(Baked); | ||
| impl_property_name_long_grapheme_cluster_break_v1!(Baked); | ||
| impl_property_name_long_hangul_syllable_type_v1!(Baked); | ||
| impl_property_name_long_indic_syllabic_category_v1!(Baked); | ||
| impl_property_name_long_joining_type_v1!(Baked); | ||
| impl_property_name_long_line_break_v1!(Baked); | ||
| impl_property_name_long_script_v1!(Baked); | ||
| impl_property_name_long_sentence_break_v1!(Baked); | ||
| impl_property_name_long_vertical_orientation_v1!(Baked); | ||
| impl_property_name_long_word_break_v1!(Baked); | ||
| impl_property_name_parse_bidi_class_v1!(Baked); | ||
| impl_property_name_parse_canonical_combining_class_v1!(Baked); | ||
| impl_property_name_parse_east_asian_width_v1!(Baked); | ||
| impl_property_name_parse_general_category_mask_v1!(Baked); | ||
| impl_property_name_parse_general_category_v1!(Baked); | ||
| impl_property_name_parse_grapheme_cluster_break_v1!(Baked); | ||
| impl_property_name_parse_hangul_syllable_type_v1!(Baked); | ||
| impl_property_name_parse_indic_syllabic_category_v1!(Baked); | ||
| impl_property_name_parse_joining_type_v1!(Baked); | ||
| impl_property_name_parse_line_break_v1!(Baked); | ||
| impl_property_name_parse_script_v1!(Baked); | ||
| impl_property_name_parse_sentence_break_v1!(Baked); | ||
| impl_property_name_parse_vertical_orientation_v1!(Baked); | ||
| impl_property_name_parse_word_break_v1!(Baked); | ||
| impl_property_name_short_bidi_class_v1!(Baked); | ||
| #[cfg(feature = "alloc")] | ||
| impl_property_name_short_canonical_combining_class_v1!(Baked); | ||
| impl_property_name_short_east_asian_width_v1!(Baked); | ||
| impl_property_name_short_general_category_v1!(Baked); | ||
| impl_property_name_short_grapheme_cluster_break_v1!(Baked); | ||
| impl_property_name_short_hangul_syllable_type_v1!(Baked); | ||
| impl_property_name_short_indic_syllabic_category_v1!(Baked); | ||
| impl_property_name_short_joining_type_v1!(Baked); | ||
| impl_property_name_short_line_break_v1!(Baked); | ||
| impl_property_name_short_script_v1!(Baked); | ||
| impl_property_name_short_sentence_break_v1!(Baked); | ||
| impl_property_name_short_vertical_orientation_v1!(Baked); | ||
| impl_property_name_short_word_break_v1!(Baked); | ||
| impl_property_script_with_extensions_v1!(Baked); | ||
| }; | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryAlnumV1` | ||
| PropertyBinaryAlnumV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryAlphabeticV1` | ||
| PropertyBinaryAlphabeticV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryAsciiHexDigitV1` | ||
| PropertyBinaryAsciiHexDigitV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryBidiControlV1` | ||
| PropertyBinaryBidiControlV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryBidiMirroredV1` | ||
| PropertyBinaryBidiMirroredV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryBlankV1` | ||
| PropertyBinaryBlankV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryCasedV1` | ||
| PropertyBinaryCasedV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryCaseIgnorableV1` | ||
| PropertyBinaryCaseIgnorableV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryCaseSensitiveV1` | ||
| PropertyBinaryCaseSensitiveV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryChangesWhenCasefoldedV1` | ||
| PropertyBinaryChangesWhenCasefoldedV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryChangesWhenCasemappedV1` | ||
| PropertyBinaryChangesWhenCasemappedV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryChangesWhenLowercasedV1` | ||
| PropertyBinaryChangesWhenLowercasedV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryChangesWhenNfkcCasefoldedV1` | ||
| PropertyBinaryChangesWhenNfkcCasefoldedV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryChangesWhenTitlecasedV1` | ||
| PropertyBinaryChangesWhenTitlecasedV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryChangesWhenUppercasedV1` | ||
| PropertyBinaryChangesWhenUppercasedV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryDashV1` | ||
| PropertyBinaryDashV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryDefaultIgnorableCodePointV1` | ||
| PropertyBinaryDefaultIgnorableCodePointV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryDeprecatedV1` | ||
| PropertyBinaryDeprecatedV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryDiacriticV1` | ||
| PropertyBinaryDiacriticV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryEmojiComponentV1` | ||
| PropertyBinaryEmojiComponentV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryEmojiModifierBaseV1` | ||
| PropertyBinaryEmojiModifierBaseV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryEmojiModifierV1` | ||
| PropertyBinaryEmojiModifierV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryEmojiPresentationV1` | ||
| PropertyBinaryEmojiPresentationV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryEmojiV1` | ||
| PropertyBinaryEmojiV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryExtendedPictographicV1` | ||
| PropertyBinaryExtendedPictographicV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryExtenderV1` | ||
| PropertyBinaryExtenderV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryFullCompositionExclusionV1` | ||
| PropertyBinaryFullCompositionExclusionV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryGraphemeBaseV1` | ||
| PropertyBinaryGraphemeBaseV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryGraphemeExtendV1` | ||
| PropertyBinaryGraphemeExtendV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryGraphemeLinkV1` | ||
| PropertyBinaryGraphemeLinkV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryGraphV1` | ||
| PropertyBinaryGraphV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryHexDigitV1` | ||
| PropertyBinaryHexDigitV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryHyphenV1` | ||
| PropertyBinaryHyphenV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryIdCompatMathContinueV1` | ||
| PropertyBinaryIdCompatMathContinueV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryIdCompatMathStartV1` | ||
| PropertyBinaryIdCompatMathStartV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryIdContinueV1` | ||
| PropertyBinaryIdContinueV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryIdeographicV1` | ||
| PropertyBinaryIdeographicV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryIdsBinaryOperatorV1` | ||
| PropertyBinaryIdsBinaryOperatorV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryIdStartV1` | ||
| PropertyBinaryIdStartV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryIdsTrinaryOperatorV1` | ||
| PropertyBinaryIdsTrinaryOperatorV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryIdsUnaryOperatorV1` | ||
| PropertyBinaryIdsUnaryOperatorV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryJoinControlV1` | ||
| PropertyBinaryJoinControlV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryLogicalOrderExceptionV1` | ||
| PropertyBinaryLogicalOrderExceptionV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryLowercaseV1` | ||
| PropertyBinaryLowercaseV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryMathV1` | ||
| PropertyBinaryMathV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryModifierCombiningMarkV1` | ||
| PropertyBinaryModifierCombiningMarkV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryNfcInertV1` | ||
| PropertyBinaryNfcInertV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryNfdInertV1` | ||
| PropertyBinaryNfdInertV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryNfkcInertV1` | ||
| PropertyBinaryNfkcInertV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryNfkdInertV1` | ||
| PropertyBinaryNfkdInertV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryNoncharacterCodePointV1` | ||
| PropertyBinaryNoncharacterCodePointV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryPatternSyntaxV1` | ||
| PropertyBinaryPatternSyntaxV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryPatternWhiteSpaceV1` | ||
| PropertyBinaryPatternWhiteSpaceV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryPrependedConcatenationMarkV1` | ||
| PropertyBinaryPrependedConcatenationMarkV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryPrintV1` | ||
| PropertyBinaryPrintV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryQuotationMarkV1` | ||
| PropertyBinaryQuotationMarkV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryRadicalV1` | ||
| PropertyBinaryRadicalV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryRegionalIndicatorV1` | ||
| PropertyBinaryRegionalIndicatorV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinarySegmentStarterV1` | ||
| PropertyBinarySegmentStarterV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinarySentenceTerminalV1` | ||
| PropertyBinarySentenceTerminalV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinarySoftDottedV1` | ||
| PropertyBinarySoftDottedV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryTerminalPunctuationV1` | ||
| PropertyBinaryTerminalPunctuationV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryUnifiedIdeographV1` | ||
| PropertyBinaryUnifiedIdeographV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryUppercaseV1` | ||
| PropertyBinaryUppercaseV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryVariationSelectorV1` | ||
| PropertyBinaryVariationSelectorV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryWhiteSpaceV1` | ||
| PropertyBinaryWhiteSpaceV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryXdigitV1` | ||
| PropertyBinaryXdigitV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryXidContinueV1` | ||
| PropertyBinaryXidContinueV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryXidStartV1` | ||
| PropertyBinaryXidStartV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// Data marker for the 'BidiClass' Unicode property | ||
| PropertyEnumBidiClassV1, | ||
| PropertyCodePointMap<'static, crate::props::BidiClass>, | ||
| is_singleton = true, | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// Data marker for the 'CanonicalCombiningClass' Unicode property | ||
| PropertyEnumCanonicalCombiningClassV1, | ||
| PropertyCodePointMap<'static, crate::props::CanonicalCombiningClass>, | ||
| is_singleton = true, | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// Data marker for the 'EastAsianWidth' Unicode property | ||
| PropertyEnumEastAsianWidthV1, | ||
| PropertyCodePointMap<'static, crate::props::EastAsianWidth>, | ||
| is_singleton = true, | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// Data marker for the 'GeneralCategory' Unicode property | ||
| PropertyEnumGeneralCategoryV1, | ||
| PropertyCodePointMap<'static, crate::props::GeneralCategory>, | ||
| is_singleton = true, | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// Data marker for the 'GraphemeClusterBreak' Unicode property | ||
| PropertyEnumGraphemeClusterBreakV1, | ||
| PropertyCodePointMap<'static, crate::props::GraphemeClusterBreak>, | ||
| is_singleton = true, | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// Data marker for the 'HangulSyllableType' Unicode property | ||
| PropertyEnumHangulSyllableTypeV1, | ||
| PropertyCodePointMap<'static, crate::props::HangulSyllableType>, | ||
| is_singleton = true, | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// Data marker for the 'IndicConjunctBreak' Unicode property | ||
| PropertyEnumIndicConjunctBreakV1, | ||
| PropertyCodePointMap<'static, crate::props::IndicConjunctBreak>, | ||
| is_singleton = true, | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// Data marker for the 'IndicSyllabicCategory' Unicode property | ||
| PropertyEnumIndicSyllabicCategoryV1, | ||
| PropertyCodePointMap<'static, crate::props::IndicSyllabicCategory>, | ||
| is_singleton = true, | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// Data marker for the 'JoiningType' Unicode property | ||
| PropertyEnumJoiningTypeV1, | ||
| PropertyCodePointMap<'static, crate::props::JoiningType>, | ||
| is_singleton = true, | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// Data marker for the 'LineBreak' Unicode property | ||
| PropertyEnumLineBreakV1, | ||
| PropertyCodePointMap<'static, crate::props::LineBreak>, | ||
| is_singleton = true, | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// Data marker for the 'Script' Unicode property | ||
| PropertyEnumScriptV1, | ||
| PropertyCodePointMap<'static, crate::props::Script>, | ||
| is_singleton = true, | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// Data marker for the 'SentenceBreak' Unicode property | ||
| PropertyEnumSentenceBreakV1, | ||
| PropertyCodePointMap<'static, crate::props::SentenceBreak>, | ||
| is_singleton = true, | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// Data marker for the 'Vertical_Orientation' Unicode property | ||
| PropertyEnumVerticalOrientationV1, | ||
| PropertyCodePointMap<'static, crate::props::VerticalOrientation>, | ||
| is_singleton = true, | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// Data marker for the 'WordBreak' Unicode property | ||
| PropertyEnumWordBreakV1, | ||
| PropertyCodePointMap<'static, crate::props::WordBreak>, | ||
| is_singleton = true, | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// Data marker for the 'BidiMirroringGlyph' Unicode property | ||
| PropertyEnumBidiMirroringGlyphV1, | ||
| PropertyCodePointMap<'static, crate::bidi::BidiMirroringGlyph>, | ||
| is_singleton = true, | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryBasicEmojiV1` | ||
| PropertyBinaryBasicEmojiV1, | ||
| PropertyUnicodeSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyScriptWithExtensionsV1` | ||
| PropertyScriptWithExtensionsV1, | ||
| ScriptWithExtensionsProperty<'static>, | ||
| is_singleton = true | ||
| ); | ||
| /// All data keys in this module. | ||
| pub const MARKERS: &[DataMarkerInfo] = &[ | ||
| PropertyNameLongBidiClassV1::INFO, | ||
| #[cfg(feature = "alloc")] | ||
| PropertyNameLongCanonicalCombiningClassV1::INFO, | ||
| PropertyNameLongEastAsianWidthV1::INFO, | ||
| PropertyNameLongGeneralCategoryV1::INFO, | ||
| PropertyNameLongGraphemeClusterBreakV1::INFO, | ||
| PropertyNameLongHangulSyllableTypeV1::INFO, | ||
| PropertyNameLongIndicSyllabicCategoryV1::INFO, | ||
| PropertyNameLongJoiningTypeV1::INFO, | ||
| PropertyNameLongLineBreakV1::INFO, | ||
| PropertyNameLongScriptV1::INFO, | ||
| PropertyNameLongSentenceBreakV1::INFO, | ||
| PropertyNameLongVerticalOrientationV1::INFO, | ||
| PropertyNameLongWordBreakV1::INFO, | ||
| PropertyNameParseBidiClassV1::INFO, | ||
| PropertyNameParseCanonicalCombiningClassV1::INFO, | ||
| PropertyNameParseEastAsianWidthV1::INFO, | ||
| PropertyNameParseGeneralCategoryMaskV1::INFO, | ||
| PropertyNameParseGeneralCategoryV1::INFO, | ||
| PropertyNameParseGraphemeClusterBreakV1::INFO, | ||
| PropertyNameParseHangulSyllableTypeV1::INFO, | ||
| PropertyNameParseIndicSyllabicCategoryV1::INFO, | ||
| PropertyNameParseJoiningTypeV1::INFO, | ||
| PropertyNameParseLineBreakV1::INFO, | ||
| PropertyNameParseScriptV1::INFO, | ||
| PropertyNameParseSentenceBreakV1::INFO, | ||
| PropertyNameParseVerticalOrientationV1::INFO, | ||
| PropertyNameParseWordBreakV1::INFO, | ||
| PropertyNameShortBidiClassV1::INFO, | ||
| #[cfg(feature = "alloc")] | ||
| PropertyNameShortCanonicalCombiningClassV1::INFO, | ||
| PropertyNameShortEastAsianWidthV1::INFO, | ||
| PropertyNameShortGeneralCategoryV1::INFO, | ||
| PropertyNameShortGraphemeClusterBreakV1::INFO, | ||
| PropertyNameShortHangulSyllableTypeV1::INFO, | ||
| PropertyNameShortIndicSyllabicCategoryV1::INFO, | ||
| PropertyNameShortJoiningTypeV1::INFO, | ||
| PropertyNameShortLineBreakV1::INFO, | ||
| PropertyNameShortScriptV1::INFO, | ||
| PropertyNameShortSentenceBreakV1::INFO, | ||
| PropertyNameShortVerticalOrientationV1::INFO, | ||
| PropertyNameShortWordBreakV1::INFO, | ||
| PropertyBinaryAlnumV1::INFO, | ||
| PropertyBinaryAlphabeticV1::INFO, | ||
| PropertyBinaryAsciiHexDigitV1::INFO, | ||
| PropertyBinaryBidiControlV1::INFO, | ||
| PropertyBinaryBidiMirroredV1::INFO, | ||
| PropertyBinaryBlankV1::INFO, | ||
| PropertyBinaryCasedV1::INFO, | ||
| PropertyBinaryCaseIgnorableV1::INFO, | ||
| PropertyBinaryCaseSensitiveV1::INFO, | ||
| PropertyBinaryChangesWhenCasefoldedV1::INFO, | ||
| PropertyBinaryChangesWhenCasemappedV1::INFO, | ||
| PropertyBinaryChangesWhenLowercasedV1::INFO, | ||
| PropertyBinaryChangesWhenNfkcCasefoldedV1::INFO, | ||
| PropertyBinaryChangesWhenTitlecasedV1::INFO, | ||
| PropertyBinaryChangesWhenUppercasedV1::INFO, | ||
| PropertyBinaryDashV1::INFO, | ||
| PropertyBinaryDefaultIgnorableCodePointV1::INFO, | ||
| PropertyBinaryDeprecatedV1::INFO, | ||
| PropertyBinaryDiacriticV1::INFO, | ||
| PropertyBinaryEmojiComponentV1::INFO, | ||
| PropertyBinaryEmojiModifierBaseV1::INFO, | ||
| PropertyBinaryEmojiModifierV1::INFO, | ||
| PropertyBinaryEmojiPresentationV1::INFO, | ||
| PropertyBinaryEmojiV1::INFO, | ||
| PropertyBinaryExtendedPictographicV1::INFO, | ||
| PropertyBinaryExtenderV1::INFO, | ||
| PropertyBinaryFullCompositionExclusionV1::INFO, | ||
| PropertyBinaryGraphemeBaseV1::INFO, | ||
| PropertyBinaryGraphemeExtendV1::INFO, | ||
| PropertyBinaryGraphemeLinkV1::INFO, | ||
| PropertyBinaryGraphV1::INFO, | ||
| PropertyBinaryHexDigitV1::INFO, | ||
| PropertyBinaryHyphenV1::INFO, | ||
| PropertyBinaryIdCompatMathContinueV1::INFO, | ||
| PropertyBinaryIdCompatMathStartV1::INFO, | ||
| PropertyBinaryIdContinueV1::INFO, | ||
| PropertyBinaryIdeographicV1::INFO, | ||
| PropertyBinaryIdsBinaryOperatorV1::INFO, | ||
| PropertyBinaryIdStartV1::INFO, | ||
| PropertyBinaryIdsTrinaryOperatorV1::INFO, | ||
| PropertyBinaryIdsUnaryOperatorV1::INFO, | ||
| PropertyBinaryJoinControlV1::INFO, | ||
| PropertyBinaryLogicalOrderExceptionV1::INFO, | ||
| PropertyBinaryLowercaseV1::INFO, | ||
| PropertyBinaryMathV1::INFO, | ||
| PropertyBinaryModifierCombiningMarkV1::INFO, | ||
| PropertyBinaryNfcInertV1::INFO, | ||
| PropertyBinaryNfdInertV1::INFO, | ||
| PropertyBinaryNfkcInertV1::INFO, | ||
| PropertyBinaryNfkdInertV1::INFO, | ||
| PropertyBinaryNoncharacterCodePointV1::INFO, | ||
| PropertyBinaryPatternSyntaxV1::INFO, | ||
| PropertyBinaryPatternWhiteSpaceV1::INFO, | ||
| PropertyBinaryPrependedConcatenationMarkV1::INFO, | ||
| PropertyBinaryPrintV1::INFO, | ||
| PropertyBinaryQuotationMarkV1::INFO, | ||
| PropertyBinaryRadicalV1::INFO, | ||
| PropertyBinaryRegionalIndicatorV1::INFO, | ||
| PropertyBinarySegmentStarterV1::INFO, | ||
| PropertyBinarySentenceTerminalV1::INFO, | ||
| PropertyBinarySoftDottedV1::INFO, | ||
| PropertyBinaryTerminalPunctuationV1::INFO, | ||
| PropertyBinaryUnifiedIdeographV1::INFO, | ||
| PropertyBinaryUppercaseV1::INFO, | ||
| PropertyBinaryVariationSelectorV1::INFO, | ||
| PropertyBinaryWhiteSpaceV1::INFO, | ||
| PropertyBinaryXdigitV1::INFO, | ||
| PropertyBinaryXidContinueV1::INFO, | ||
| PropertyBinaryXidStartV1::INFO, | ||
| PropertyEnumBidiClassV1::INFO, | ||
| PropertyEnumCanonicalCombiningClassV1::INFO, | ||
| PropertyEnumEastAsianWidthV1::INFO, | ||
| PropertyEnumGeneralCategoryV1::INFO, | ||
| PropertyEnumGraphemeClusterBreakV1::INFO, | ||
| PropertyEnumHangulSyllableTypeV1::INFO, | ||
| PropertyEnumIndicConjunctBreakV1::INFO, | ||
| PropertyEnumIndicSyllabicCategoryV1::INFO, | ||
| PropertyEnumJoiningTypeV1::INFO, | ||
| PropertyEnumLineBreakV1::INFO, | ||
| PropertyEnumScriptV1::INFO, | ||
| PropertyEnumSentenceBreakV1::INFO, | ||
| PropertyEnumVerticalOrientationV1::INFO, | ||
| PropertyEnumWordBreakV1::INFO, | ||
| PropertyEnumBidiMirroringGlyphV1::INFO, | ||
| PropertyBinaryBasicEmojiV1::INFO, | ||
| PropertyScriptWithExtensionsV1::INFO, | ||
| ]; | ||
| /// A set of characters which share a particular property value. | ||
| /// | ||
| /// This data enum is extensible, more backends may be added in the future. | ||
| /// Old data can be used with newer code but not vice versa. | ||
| /// | ||
| /// <div class="stab unstable"> | ||
| /// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways, | ||
| /// including in SemVer minor releases. While the serde representation of data structs is guaranteed | ||
| /// to be stable, their Rust representation might not be. Use with caution. | ||
| /// </div> | ||
| #[derive(Debug, Eq, PartialEq, Clone, yoke::Yokeable, zerofrom::ZeroFrom)] | ||
| #[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))] | ||
| #[cfg_attr(feature = "datagen", databake(path = icu_properties::provider))] | ||
| #[cfg_attr(feature = "serde", derive(serde::Deserialize))] | ||
| #[non_exhaustive] | ||
| pub enum PropertyCodePointSet<'data> { | ||
| /// The set of characters, represented as an inversion list | ||
| InversionList(#[cfg_attr(feature = "serde", serde(borrow))] CodePointInversionList<'data>), | ||
| // new variants should go BELOW existing ones | ||
| // Serde serializes based on variant name and index in the enum | ||
| // https://docs.rs/serde/latest/serde/trait.Serializer.html#tymethod.serialize_unit_variant | ||
| } | ||
| icu_provider::data_struct!( | ||
| PropertyCodePointSet<'_>, | ||
| #[cfg(feature = "datagen")] | ||
| ); | ||
| // See CodePointSetData for documentation of these functions | ||
| impl<'data> PropertyCodePointSet<'data> { | ||
| #[inline] | ||
| pub(crate) fn contains(&self, ch: char) -> bool { | ||
| match *self { | ||
| Self::InversionList(ref l) => l.contains(ch), | ||
| } | ||
| } | ||
| #[inline] | ||
| pub(crate) fn contains32(&self, ch: u32) -> bool { | ||
| match *self { | ||
| Self::InversionList(ref l) => l.contains32(ch), | ||
| } | ||
| } | ||
| #[inline] | ||
| pub(crate) fn iter_ranges(&self) -> impl Iterator<Item = RangeInclusive<u32>> + '_ { | ||
| match *self { | ||
| Self::InversionList(ref l) => l.iter_ranges(), | ||
| } | ||
| } | ||
| #[inline] | ||
| pub(crate) fn iter_ranges_complemented( | ||
| &self, | ||
| ) -> impl Iterator<Item = RangeInclusive<u32>> + '_ { | ||
| match *self { | ||
| Self::InversionList(ref l) => l.iter_ranges_complemented(), | ||
| } | ||
| } | ||
| #[inline] | ||
| pub(crate) fn from_code_point_inversion_list(l: CodePointInversionList<'static>) -> Self { | ||
| Self::InversionList(l) | ||
| } | ||
| #[inline] | ||
| pub(crate) fn as_code_point_inversion_list( | ||
| &'_ self, | ||
| ) -> Option<&'_ CodePointInversionList<'data>> { | ||
| match *self { | ||
| Self::InversionList(ref l) => Some(l), | ||
| // any other backing data structure that cannot return a CPInvList in O(1) time should return None | ||
| } | ||
| } | ||
| #[inline] | ||
| pub(crate) fn to_code_point_inversion_list(&self) -> CodePointInversionList<'_> { | ||
| match *self { | ||
| Self::InversionList(ref t) => ZeroFrom::zero_from(t), | ||
| } | ||
| } | ||
| } | ||
| /// A map efficiently storing data about individual characters. | ||
| /// | ||
| /// This data enum is extensible, more backends may be added in the future. | ||
| /// Old data can be used with newer code but not vice versa. | ||
| /// | ||
| /// <div class="stab unstable"> | ||
| /// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways, | ||
| /// including in SemVer minor releases. While the serde representation of data structs is guaranteed | ||
| /// to be stable, their Rust representation might not be. Use with caution. | ||
| /// </div> | ||
| #[derive(Clone, Debug, Eq, PartialEq, yoke::Yokeable, zerofrom::ZeroFrom)] | ||
| #[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))] | ||
| #[cfg_attr(feature = "datagen", databake(path = icu_properties::provider))] | ||
| #[cfg_attr(feature = "serde", derive(serde::Deserialize))] | ||
| #[non_exhaustive] | ||
| pub enum PropertyCodePointMap<'data, T: TrieValue> { | ||
| /// A codepoint trie storing the data | ||
| CodePointTrie(#[cfg_attr(feature = "serde", serde(borrow))] CodePointTrie<'data, T>), | ||
| // new variants should go BELOW existing ones | ||
| // Serde serializes based on variant name and index in the enum | ||
| // https://docs.rs/serde/latest/serde/trait.Serializer.html#tymethod.serialize_unit_variant | ||
| } | ||
| icu_provider::data_struct!( | ||
| <T: TrieValue> PropertyCodePointMap<'_, T>, | ||
| #[cfg(feature = "datagen")] | ||
| ); | ||
| // See CodePointMapData for documentation of these functions | ||
| impl<'data, T: TrieValue> PropertyCodePointMap<'data, T> { | ||
| #[inline] | ||
| pub(crate) fn get32(&self, ch: u32) -> T { | ||
| match *self { | ||
| Self::CodePointTrie(ref t) => t.get32(ch), | ||
| } | ||
| } | ||
| #[inline] | ||
| pub(crate) fn get(&self, c: char) -> T { | ||
| match *self { | ||
| Self::CodePointTrie(ref t) => t.get(c), | ||
| } | ||
| } | ||
| #[inline] | ||
| #[cfg(feature = "alloc")] | ||
| pub(crate) fn try_into_converted<P>( | ||
| self, | ||
| ) -> Result<PropertyCodePointMap<'data, P>, zerovec::ule::UleError> | ||
| where | ||
| P: TrieValue, | ||
| { | ||
| match self { | ||
| Self::CodePointTrie(t) => t | ||
| .try_into_converted() | ||
| .map(PropertyCodePointMap::CodePointTrie), | ||
| } | ||
| } | ||
| #[inline] | ||
| #[cfg(feature = "alloc")] | ||
| pub(crate) fn get_set_for_value(&self, value: T) -> CodePointInversionList<'static> { | ||
| match *self { | ||
| Self::CodePointTrie(ref t) => t.get_set_for_value(value), | ||
| } | ||
| } | ||
| #[inline] | ||
| pub(crate) fn iter_ranges(&self) -> impl Iterator<Item = CodePointMapRange<T>> + '_ { | ||
| match *self { | ||
| Self::CodePointTrie(ref t) => t.iter_ranges(), | ||
| } | ||
| } | ||
| #[inline] | ||
| pub(crate) fn iter_ranges_mapped<'a, U: Eq + 'a>( | ||
| &'a self, | ||
| map: impl FnMut(T) -> U + Copy + 'a, | ||
| ) -> impl Iterator<Item = CodePointMapRange<U>> + 'a { | ||
| match *self { | ||
| Self::CodePointTrie(ref t) => t.iter_ranges_mapped(map), | ||
| } | ||
| } | ||
| #[inline] | ||
| pub(crate) fn from_code_point_trie(trie: CodePointTrie<'static, T>) -> Self { | ||
| Self::CodePointTrie(trie) | ||
| } | ||
| #[inline] | ||
| pub(crate) fn as_code_point_trie(&self) -> Option<&CodePointTrie<'data, T>> { | ||
| match *self { | ||
| Self::CodePointTrie(ref t) => Some(t), | ||
| // any other backing data structure that cannot return a CPT in O(1) time should return None | ||
| } | ||
| } | ||
| #[inline] | ||
| pub(crate) fn to_code_point_trie(&self) -> CodePointTrie<'_, T> { | ||
| match *self { | ||
| Self::CodePointTrie(ref t) => ZeroFrom::zero_from(t), | ||
| } | ||
| } | ||
| } | ||
| /// A set of characters and strings which share a particular property value. | ||
| /// | ||
| /// <div class="stab unstable"> | ||
| /// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways, | ||
| /// including in SemVer minor releases. While the serde representation of data structs is guaranteed | ||
| /// to be stable, their Rust representation might not be. Use with caution. | ||
| /// </div> | ||
| #[derive(Debug, Eq, PartialEq, Clone, yoke::Yokeable, zerofrom::ZeroFrom)] | ||
| #[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))] | ||
| #[cfg_attr(feature = "datagen", databake(path = icu_properties::provider))] | ||
| #[cfg_attr(feature = "serde", derive(serde::Deserialize))] | ||
| #[non_exhaustive] | ||
| pub enum PropertyUnicodeSet<'data> { | ||
| /// A set representing characters in an inversion list, and the strings in a list. | ||
| CPInversionListStrList( | ||
| #[cfg_attr(feature = "serde", serde(borrow))] CodePointInversionListAndStringList<'data>, | ||
| ), | ||
| // new variants should go BELOW existing ones | ||
| // Serde serializes based on variant name and index in the enum | ||
| // https://docs.rs/serde/latest/serde/trait.Serializer.html#tymethod.serialize_unit_variant | ||
| } | ||
| icu_provider::data_struct!( | ||
| PropertyUnicodeSet<'_>, | ||
| #[cfg(feature = "datagen")] | ||
| ); | ||
| impl<'data> PropertyUnicodeSet<'data> { | ||
| #[inline] | ||
| pub(crate) fn contains_str(&self, s: &str) -> bool { | ||
| match *self { | ||
| Self::CPInversionListStrList(ref l) => l.contains_str(s), | ||
| } | ||
| } | ||
| #[inline] | ||
| pub(crate) fn contains32(&self, cp: u32) -> bool { | ||
| match *self { | ||
| Self::CPInversionListStrList(ref l) => l.contains32(cp), | ||
| } | ||
| } | ||
| #[inline] | ||
| pub(crate) fn contains(&self, ch: char) -> bool { | ||
| match *self { | ||
| Self::CPInversionListStrList(ref l) => l.contains(ch), | ||
| } | ||
| } | ||
| #[inline] | ||
| pub(crate) fn from_code_point_inversion_list_string_list( | ||
| l: CodePointInversionListAndStringList<'static>, | ||
| ) -> Self { | ||
| Self::CPInversionListStrList(l) | ||
| } | ||
| #[inline] | ||
| pub(crate) fn as_code_point_inversion_list_string_list( | ||
| &'_ self, | ||
| ) -> Option<&'_ CodePointInversionListAndStringList<'data>> { | ||
| match *self { | ||
| Self::CPInversionListStrList(ref l) => Some(l), | ||
| // any other backing data structure that cannot return a CPInversionListStrList in O(1) time should return None | ||
| } | ||
| } | ||
| #[inline] | ||
| pub(crate) fn to_code_point_inversion_list_string_list( | ||
| &self, | ||
| ) -> CodePointInversionListAndStringList<'_> { | ||
| match *self { | ||
| Self::CPInversionListStrList(ref t) => ZeroFrom::zero_from(t), | ||
| } | ||
| } | ||
| } | ||
| /// A struct that efficiently stores `Script` and `Script_Extensions` property data. | ||
| /// | ||
| /// <div class="stab unstable"> | ||
| /// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways, | ||
| /// including in SemVer minor releases. While the serde representation of data structs is guaranteed | ||
| /// to be stable, their Rust representation might not be. Use with caution. | ||
| /// </div> | ||
| #[derive(Debug, Eq, PartialEq, Clone, yoke::Yokeable, zerofrom::ZeroFrom)] | ||
| #[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))] | ||
| #[cfg_attr(feature = "datagen", databake(path = icu_properties::provider))] | ||
| #[cfg_attr(feature = "serde", derive(serde::Deserialize))] | ||
| pub struct ScriptWithExtensionsProperty<'data> { | ||
| /// Note: The `ScriptWithExt` values in this array will assume a 12-bit layout. The 2 | ||
| /// higher order bits 11..10 will indicate how to deduce the Script value and | ||
| /// Script_Extensions value, nearly matching the representation | ||
| /// [in ICU](https://github.com/unicode-org/icu/blob/main/icu4c/source/common/uprops.h): | ||
| /// | ||
| /// | High order 2 bits value | Script | Script_Extensions | | ||
| /// |-------------------------|--------------------------------------------------------|----------------------------------------------------------------| | ||
| /// | 3 | First value in sub-array, index given by lower 10 bits | Sub-array excluding first value, index given by lower 10 bits | | ||
| /// | 2 | Script=Inherited | Entire sub-array, index given by lower 10 bits | | ||
| /// | 1 | Script=Common | Entire sub-array, index given by lower 10 bits | | ||
| /// | 0 | Value in lower 10 bits | `[ Script value ]` single-element array | | ||
| /// | ||
| /// When the lower 10 bits of the value are used as an index, that index is | ||
| /// used for the outer-level vector of the nested `extensions` structure. | ||
| #[cfg_attr(feature = "serde", serde(borrow))] | ||
| pub trie: CodePointTrie<'data, ScriptWithExt>, | ||
| /// This companion structure stores Script_Extensions values, which are | ||
| /// themselves arrays / vectors. This structure only stores the values for | ||
| /// cases in which `scx(cp) != [ sc(cp) ]`. Each sub-vector is distinct. The | ||
| /// sub-vector represents the Script_Extensions array value for a code point, | ||
| /// and may also indicate Script value, as described for the `trie` field. | ||
| #[cfg_attr(feature = "serde", serde(borrow))] | ||
| pub extensions: VarZeroVec<'data, ZeroSlice<Script>>, | ||
| } | ||
| icu_provider::data_struct!( | ||
| ScriptWithExtensionsProperty<'_>, | ||
| #[cfg(feature = "datagen")] | ||
| ); |
| // This file is part of ICU4X. For terms of use, please see the file | ||
| // called LICENSE at the top level of the ICU4X source tree | ||
| // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). | ||
| //! 🚧 \[Unstable\] Property names-related data for this component | ||
| //! | ||
| //! <div class="stab unstable"> | ||
| //! 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways, | ||
| //! including in SemVer minor releases. While the serde representation of data structs is guaranteed | ||
| //! to be stable, their Rust representation might not be. Use with caution. | ||
| //! </div> | ||
| //! | ||
| //! Read more about data providers: [`icu_provider`] | ||
| use icu_locale_core::subtags::Script; | ||
| use icu_provider::prelude::{yoke, zerofrom}; | ||
| use zerotrie::ZeroTrieSimpleAscii; | ||
| use zerovec::ule::NichedOption; | ||
| use zerovec::{VarZeroVec, ZeroVec}; | ||
| icu_provider::data_marker!( | ||
| /// `PropertyNameParseBidiClassV1` | ||
| PropertyNameParseBidiClassV1, | ||
| PropertyValueNameToEnumMap<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyNameParseCanonicalCombiningClassV1` | ||
| PropertyNameParseCanonicalCombiningClassV1, | ||
| PropertyValueNameToEnumMap<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyNameParseEastAsianWidthV1` | ||
| PropertyNameParseEastAsianWidthV1, | ||
| PropertyValueNameToEnumMap<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyNameParseGeneralCategoryMaskV1` | ||
| PropertyNameParseGeneralCategoryMaskV1, | ||
| PropertyValueNameToEnumMap<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyNameParseGeneralCategoryV1` | ||
| PropertyNameParseGeneralCategoryV1, | ||
| PropertyValueNameToEnumMap<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyNameParseGraphemeClusterBreakV1` | ||
| PropertyNameParseGraphemeClusterBreakV1, | ||
| PropertyValueNameToEnumMap<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyNameParseHangulSyllableTypeV1` | ||
| PropertyNameParseHangulSyllableTypeV1, | ||
| PropertyValueNameToEnumMap<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyNameParseIndicSyllabicCategoryV1` | ||
| PropertyNameParseIndicSyllabicCategoryV1, | ||
| PropertyValueNameToEnumMap<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyNameParseJoiningTypeV1` | ||
| PropertyNameParseJoiningTypeV1, | ||
| PropertyValueNameToEnumMap<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyNameParseLineBreakV1` | ||
| PropertyNameParseLineBreakV1, | ||
| PropertyValueNameToEnumMap<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyNameParseScriptV1` | ||
| PropertyNameParseScriptV1, | ||
| PropertyValueNameToEnumMap<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyNameParseSentenceBreakV1` | ||
| PropertyNameParseSentenceBreakV1, | ||
| PropertyValueNameToEnumMap<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyNameParseVerticalOrientationV1` | ||
| PropertyNameParseVerticalOrientationV1, | ||
| PropertyValueNameToEnumMap<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyNameParseWordBreakV1` | ||
| PropertyNameParseWordBreakV1, | ||
| PropertyValueNameToEnumMap<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyNameLongBidiClassV1` | ||
| PropertyNameLongBidiClassV1, | ||
| PropertyEnumToValueNameLinearMap<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyNameShortBidiClassV1` | ||
| PropertyNameShortBidiClassV1, | ||
| PropertyEnumToValueNameLinearMap<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyNameLongEastAsianWidthV1` | ||
| PropertyNameLongEastAsianWidthV1, | ||
| PropertyEnumToValueNameLinearMap<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyNameShortEastAsianWidthV1` | ||
| PropertyNameShortEastAsianWidthV1, | ||
| PropertyEnumToValueNameLinearMap<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyNameLongGeneralCategoryV1` | ||
| PropertyNameLongGeneralCategoryV1, | ||
| PropertyEnumToValueNameLinearMap<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyNameShortGeneralCategoryV1` | ||
| PropertyNameShortGeneralCategoryV1, | ||
| PropertyEnumToValueNameLinearMap<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyNameLongGraphemeClusterBreakV1` | ||
| PropertyNameLongGraphemeClusterBreakV1, | ||
| PropertyEnumToValueNameLinearMap<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyNameShortGraphemeClusterBreakV1` | ||
| PropertyNameShortGraphemeClusterBreakV1, | ||
| PropertyEnumToValueNameLinearMap<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyNameLongHangulSyllableTypeV1` | ||
| PropertyNameLongHangulSyllableTypeV1, | ||
| PropertyEnumToValueNameLinearMap<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyNameShortHangulSyllableTypeV1` | ||
| PropertyNameShortHangulSyllableTypeV1, | ||
| PropertyEnumToValueNameLinearMap<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyNameLongIndicSyllabicCategoryV1` | ||
| PropertyNameLongIndicSyllabicCategoryV1, | ||
| PropertyEnumToValueNameLinearMap<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyNameShortIndicSyllabicCategoryV1` | ||
| PropertyNameShortIndicSyllabicCategoryV1, | ||
| PropertyEnumToValueNameLinearMap<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyNameLongJoiningTypeV1` | ||
| PropertyNameLongJoiningTypeV1, | ||
| PropertyEnumToValueNameLinearMap<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyNameShortJoiningTypeV1` | ||
| PropertyNameShortJoiningTypeV1, | ||
| PropertyEnumToValueNameLinearMap<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyNameLongLineBreakV1` | ||
| PropertyNameLongLineBreakV1, | ||
| PropertyEnumToValueNameLinearMap<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyNameShortLineBreakV1` | ||
| PropertyNameShortLineBreakV1, | ||
| PropertyEnumToValueNameLinearMap<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyNameLongScriptV1` | ||
| PropertyNameLongScriptV1, | ||
| PropertyEnumToValueNameLinearMap<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyNameLongSentenceBreakV1` | ||
| PropertyNameLongSentenceBreakV1, | ||
| PropertyEnumToValueNameLinearMap<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyNameShortSentenceBreakV1` | ||
| PropertyNameShortSentenceBreakV1, | ||
| PropertyEnumToValueNameLinearMap<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyNameLongVerticalOrientationV1` | ||
| PropertyNameLongVerticalOrientationV1, | ||
| PropertyEnumToValueNameLinearMap<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyNameShortVerticalOrientationV1` | ||
| PropertyNameShortVerticalOrientationV1, | ||
| PropertyEnumToValueNameLinearMap<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyNameLongWordBreakV1` | ||
| PropertyNameLongWordBreakV1, | ||
| PropertyEnumToValueNameLinearMap<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyNameShortWordBreakV1` | ||
| PropertyNameShortWordBreakV1, | ||
| PropertyEnumToValueNameLinearMap<'static>, | ||
| is_singleton = true | ||
| ); | ||
| #[cfg(feature = "alloc")] | ||
| icu_provider::data_marker!( | ||
| /// `PropertyNameLongCanonicalCombiningClassV1` | ||
| PropertyNameLongCanonicalCombiningClassV1, | ||
| PropertyEnumToValueNameSparseMap<'static>, | ||
| is_singleton = true, | ||
| ); | ||
| #[cfg(feature = "alloc")] | ||
| icu_provider::data_marker!( | ||
| /// `PropertyNameShortCanonicalCombiningClassV1` | ||
| PropertyNameShortCanonicalCombiningClassV1, | ||
| PropertyEnumToValueNameSparseMap<'static>, | ||
| is_singleton = true, | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyNameShortScriptV1` | ||
| PropertyNameShortScriptV1, | ||
| PropertyScriptToIcuScriptMap<'static>, | ||
| is_singleton = true, | ||
| ); | ||
| /// A set of characters and strings which share a particular property value. | ||
| /// | ||
| /// <div class="stab unstable"> | ||
| /// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways, | ||
| /// including in SemVer minor releases. While the serde representation of data structs is guaranteed | ||
| /// to be stable, their Rust representation might not be. Use with caution. | ||
| /// </div> | ||
| #[derive(Debug, Clone, PartialEq, yoke::Yokeable, zerofrom::ZeroFrom)] | ||
| #[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))] | ||
| #[cfg_attr(feature = "datagen", databake(path = icu_properties::provider::names))] | ||
| #[cfg_attr(feature = "serde", derive(serde::Deserialize))] | ||
| pub struct PropertyValueNameToEnumMap<'data> { | ||
| /// A map from names to their value discriminant | ||
| #[cfg_attr(feature = "serde", serde(borrow))] | ||
| pub map: ZeroTrieSimpleAscii<ZeroVec<'data, u8>>, | ||
| } | ||
| icu_provider::data_struct!( | ||
| PropertyValueNameToEnumMap<'_>, | ||
| #[cfg(feature = "datagen")] | ||
| ); | ||
| /// A mapping of property values to their names. A single instance of this map will only cover | ||
| /// either long or short names, determined whilst loading data. | ||
| /// | ||
| /// <div class="stab unstable"> | ||
| /// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways, | ||
| /// including in SemVer minor releases. While the serde representation of data structs is guaranteed | ||
| /// to be stable, their Rust representation might not be. Use with caution. | ||
| /// </div> | ||
| #[derive(Debug, Clone, PartialEq, yoke::Yokeable, zerofrom::ZeroFrom)] | ||
| #[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))] | ||
| #[cfg_attr(feature = "datagen", databake(path = icu_properties::provider::names))] | ||
| #[cfg_attr(feature = "serde", derive(serde::Deserialize))] | ||
| #[yoke(prove_covariance_manually)] | ||
| #[cfg(feature = "alloc")] | ||
| pub struct PropertyEnumToValueNameSparseMap<'data> { | ||
| /// A map from the value discriminant to the names | ||
| #[cfg_attr(feature = "serde", serde(borrow))] | ||
| pub map: zerovec::ZeroMap<'data, u16, str>, | ||
| } | ||
| #[cfg(feature = "alloc")] | ||
| icu_provider::data_struct!( | ||
| PropertyEnumToValueNameSparseMap<'_>, | ||
| #[cfg(feature = "datagen")] | ||
| ); | ||
| /// A mapping of property values to their names. A single instance of this map will only cover | ||
| /// either long or short names, determined whilst loading data. | ||
| /// | ||
| /// <div class="stab unstable"> | ||
| /// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways, | ||
| /// including in SemVer minor releases. While the serde representation of data structs is guaranteed | ||
| /// to be stable, their Rust representation might not be. Use with caution. | ||
| /// </div> | ||
| #[derive(Debug, Clone, PartialEq, yoke::Yokeable, zerofrom::ZeroFrom)] | ||
| #[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))] | ||
| #[cfg_attr(feature = "datagen", databake(path = icu_properties::provider::names))] | ||
| #[cfg_attr(feature = "serde", derive(serde::Deserialize))] | ||
| #[yoke(prove_covariance_manually)] | ||
| pub struct PropertyEnumToValueNameLinearMap<'data> { | ||
| /// A map from the value discriminant (the index) to the names, for mostly | ||
| /// contiguous data. Empty strings count as missing. | ||
| #[cfg_attr(feature = "serde", serde(borrow))] | ||
| pub map: VarZeroVec<'data, str>, | ||
| } | ||
| icu_provider::data_struct!( | ||
| PropertyEnumToValueNameLinearMap<'_>, | ||
| #[cfg(feature = "datagen")] | ||
| ); | ||
| /// A mapping of property values to their names. A single instance of this map will only cover | ||
| /// either long or short names, determined whilst loading data. | ||
| /// | ||
| /// <div class="stab unstable"> | ||
| /// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways, | ||
| /// including in SemVer minor releases. While the serde representation of data structs is guaranteed | ||
| /// to be stable, their Rust representation might not be. Use with caution. | ||
| /// </div> | ||
| #[derive(Debug, Clone, PartialEq, yoke::Yokeable, zerofrom::ZeroFrom)] | ||
| #[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))] | ||
| #[cfg_attr(feature = "datagen", databake(path = icu_properties::provider::names))] | ||
| #[cfg_attr(feature = "serde", derive(serde::Deserialize))] | ||
| #[yoke(prove_covariance_manually)] | ||
| pub struct PropertyScriptToIcuScriptMap<'data> { | ||
| /// A map from the value discriminant (the index) to the names, for mostly | ||
| /// contiguous data. Empty strings count as missing. | ||
| #[cfg_attr(feature = "serde", serde(borrow))] | ||
| pub map: ZeroVec<'data, NichedOption<Script, 4>>, | ||
| } | ||
| icu_provider::data_struct!( | ||
| PropertyScriptToIcuScriptMap<'_>, | ||
| #[cfg(feature = "datagen")] | ||
| ); |
| // This file is part of ICU4X. For terms of use, please see the file | ||
| // called LICENSE at the top level of the ICU4X source tree | ||
| // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). | ||
| //! 🚧 \[Experimental\] This module is experimental and currently crate-private. Let us know if you | ||
| //! have a use case for this! | ||
| //! | ||
| //! This module contains utilities for working with properties where the specific property in use | ||
| //! is not known at compile time. | ||
| //! | ||
| //! For regex engines, [`crate::sets::load_for_ecma262_unstable()`] is a convenient API for working | ||
| //! with properties at runtime tailored for the use case of ECMA262-compatible regex engines. | ||
| use crate::provider::*; | ||
| use crate::CodePointSetData; | ||
| #[cfg(doc)] | ||
| use crate::{ | ||
| props::{GeneralCategory, GeneralCategoryGroup, Script}, | ||
| script, CodePointMapData, PropertyParser, | ||
| }; | ||
| use icu_provider::prelude::*; | ||
| /// This type can represent any binary Unicode property. | ||
| /// | ||
| /// This is intended to be used in situations where the exact unicode property needed is | ||
| /// only known at runtime, for example in regex engines. | ||
| /// | ||
| /// The values are intended to be identical to ICU4C's UProperty enum | ||
| #[non_exhaustive] | ||
| #[allow(missing_docs)] | ||
| #[allow(dead_code)] | ||
| #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] | ||
| enum BinaryProperty { | ||
| Alnum = 44, | ||
| Alphabetic = 0, | ||
| AsciiHexDigit = 1, | ||
| BidiControl = 2, | ||
| BidiMirrored = 3, | ||
| Blank = 45, | ||
| Cased = 49, | ||
| CaseIgnorable = 50, | ||
| CaseSensitive = 34, | ||
| ChangesWhenCasefolded = 54, | ||
| ChangesWhenCasemapped = 55, | ||
| ChangesWhenLowercased = 51, | ||
| ChangesWhenNfkcCasefolded = 56, | ||
| ChangesWhenTitlecased = 53, | ||
| ChangesWhenUppercased = 52, | ||
| Dash = 4, | ||
| DefaultIgnorableCodePoint = 5, | ||
| Deprecated = 6, | ||
| Diacritic = 7, | ||
| Emoji = 57, | ||
| EmojiComponent = 61, | ||
| EmojiModifier = 59, | ||
| EmojiModifierBase = 60, | ||
| EmojiPresentation = 58, | ||
| ExtendedPictographic = 64, | ||
| Extender = 8, | ||
| FullCompositionExclusion = 9, | ||
| Graph = 46, | ||
| GraphemeBase = 10, | ||
| GraphemeExtend = 11, | ||
| GraphemeLink = 12, | ||
| HexDigit = 13, | ||
| Hyphen = 14, | ||
| IdCompatMathContinue = 65, | ||
| IdCompatMathStart = 66, | ||
| IdContinue = 15, | ||
| Ideographic = 17, | ||
| IdsBinaryOperator = 18, | ||
| IdStart = 16, | ||
| IdsTrinaryOperator = 19, | ||
| IdsUnaryOperator = 67, | ||
| JoinControl = 20, | ||
| LogicalOrderException = 21, | ||
| Lowercase = 22, | ||
| Math = 23, | ||
| ModifierCombiningMark = 68, | ||
| NfcInert = 39, | ||
| NfdInert = 37, | ||
| NfkcInert = 40, | ||
| NfkdInert = 38, | ||
| NoncharacterCodePoint = 24, | ||
| PatternSyntax = 42, | ||
| PatternWhiteSpace = 43, | ||
| PrependedConcatenationMark = 63, | ||
| Print = 47, | ||
| QuotationMark = 25, | ||
| Radical = 26, | ||
| RegionalIndicator = 62, | ||
| SegmentStarter = 41, | ||
| SentenceTerminal = 35, | ||
| SoftDotted = 27, | ||
| TerminalPunctuation = 28, | ||
| UnifiedIdeograph = 29, | ||
| Uppercase = 30, | ||
| VariationSelector = 36, | ||
| WhiteSpace = 31, | ||
| Xdigit = 48, | ||
| XidContinue = 32, | ||
| XidStart = 33, | ||
| } | ||
| /// This type can represent any binary property over strings. | ||
| /// | ||
| /// This is intended to be used in situations where the exact unicode property needed is | ||
| /// only known at runtime, for example in regex engines. | ||
| /// | ||
| /// The values are intended to be identical to ICU4C's UProperty enum | ||
| #[non_exhaustive] | ||
| #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] | ||
| #[allow(dead_code)] | ||
| #[allow(missing_docs)] | ||
| enum StringBinaryProperty { | ||
| BasicEmoji = 65, | ||
| EmojiKeycapSequence = 66, | ||
| RgiEmoji = 71, | ||
| RgiEmojiFlagSequence = 68, | ||
| RgiEmojiModifierSequence = 67, | ||
| RgiEmojiTagSequence = 69, | ||
| RgiEmojiZWJSequence = 70, | ||
| } | ||
| /// This type can represent any enumerated Unicode property. | ||
| /// | ||
| /// This is intended to be used in situations where the exact unicode property needed is | ||
| /// only known at runtime, for example in regex engines. | ||
| /// | ||
| /// The values are intended to be identical to ICU4C's UProperty enum | ||
| #[non_exhaustive] | ||
| #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] | ||
| #[allow(dead_code)] | ||
| #[allow(missing_docs)] | ||
| enum EnumeratedProperty { | ||
| BidiClass = 0x1000, | ||
| BidiPairedBracketType = 0x1015, | ||
| Block = 0x1001, | ||
| CombiningClass = 0x1002, | ||
| DecompositionType = 0x1003, | ||
| EastAsianWidth = 0x1004, | ||
| GeneralCategory = 0x1005, | ||
| GraphemeClusterBreak = 0x1012, | ||
| HangulSyllableType = 0x100B, | ||
| IndicConjunctBreak = 0x101A, | ||
| IndicPositionalCategory = 0x1016, | ||
| IndicSyllabicCategory = 0x1017, | ||
| JoiningGroup = 0x1006, | ||
| JoiningType = 0x1007, | ||
| LeadCanonicalCombiningClass = 0x1010, | ||
| LineBreak = 0x1008, | ||
| NFCQuickCheck = 0x100E, | ||
| NFDQuickCheck = 0x100C, | ||
| NFKCQuickCheck = 0x100F, | ||
| NFKDQuickCheck = 0x100D, | ||
| NumericType = 0x1009, | ||
| Script = 0x100A, | ||
| SentenceBreak = 0x1013, | ||
| TrailCanonicalCombiningClass = 0x1011, | ||
| VerticalOrientation = 0x1018, | ||
| WordBreak = 0x1014, | ||
| } | ||
| /// This type can represent any Unicode mask property. | ||
| /// | ||
| /// This is intended to be used in situations where the exact unicode property needed is | ||
| /// only known at runtime, for example in regex engines. | ||
| /// | ||
| /// The values are intended to be identical to ICU4C's UProperty enum | ||
| #[non_exhaustive] | ||
| #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] | ||
| #[allow(dead_code)] | ||
| #[allow(missing_docs)] | ||
| enum MaskProperty { | ||
| GeneralCategoryMask = 0x2000, | ||
| } | ||
| /// This type can represent any numeric Unicode property. | ||
| /// | ||
| /// This is intended to be used in situations where the exact unicode property needed is | ||
| /// only known at runtime, for example in regex engines. | ||
| /// | ||
| /// The values are intended to be identical to ICU4C's UProperty enum | ||
| #[non_exhaustive] | ||
| #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] | ||
| #[allow(dead_code)] | ||
| #[allow(missing_docs)] | ||
| enum NumericProperty { | ||
| NumericValue = 0x3000, | ||
| } | ||
| /// This type can represent any Unicode string property. | ||
| /// | ||
| /// This is intended to be used in situations where the exact unicode property needed is | ||
| /// only known at runtime, for example in regex engines. | ||
| /// | ||
| /// The values are intended to be identical to ICU4C's UProperty enum | ||
| #[non_exhaustive] | ||
| #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] | ||
| #[allow(dead_code)] | ||
| #[allow(missing_docs)] | ||
| enum StringProperty { | ||
| Age = 0x4000, | ||
| BidiMirroringGlyph = 0x4001, | ||
| BidiPairedBracket = 0x400D, | ||
| CaseFolding = 0x4002, | ||
| ISOComment = 0x4003, | ||
| LowercaseMapping = 0x4004, | ||
| Name = 0x4005, | ||
| SimpleCaseFolding = 0x4006, | ||
| SimpleLowercaseMapping = 0x4007, | ||
| SimpleTitlecaseMapping = 0x4008, | ||
| SimpleUppercaseMapping = 0x4009, | ||
| TitlecaseMapping = 0x400A, | ||
| Unicode1Name = 0x400B, | ||
| UppercaseMapping = 0x400C, | ||
| } | ||
| #[non_exhaustive] | ||
| #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] | ||
| #[allow(dead_code)] | ||
| #[allow(missing_docs)] | ||
| enum MiscProperty { | ||
| ScriptExtensions = 0x7000, | ||
| } | ||
| impl CodePointSetData { | ||
| /// Returns a type capable of looking up values for a property specified as a string, as long as it is a | ||
| /// [binary property listed in ECMA-262][ecma], using strict matching on the names in the spec. | ||
| /// | ||
| /// This handles every property required by ECMA-262 `/u` regular expressions, except for: | ||
| /// | ||
| /// - `Script` and `General_Category`: handle these directly using property values parsed via | ||
| /// [`PropertyParser<GeneralCategory>`] and [`PropertyParser<Script>`] | ||
| /// if necessary. | ||
| /// - `Script_Extensions`: handle this directly using APIs from [`crate::script::ScriptWithExtensions`] | ||
| /// - `General_Category` mask values: Handle this alongside `General_Category` using [`GeneralCategoryGroup`], | ||
| /// using property values parsed via [`PropertyParser<GeneralCategory>`] if necessary | ||
| /// - `Assigned`, `All`, and `ASCII` pseudoproperties: Handle these using their equivalent sets: | ||
| /// - `Any` can be expressed as the range `[\u{0}-\u{10FFFF}]` | ||
| /// - `Assigned` can be expressed as the inverse of the set `gc=Cn` (i.e., `\P{gc=Cn}`). | ||
| /// - `ASCII` can be expressed as the range `[\u{0}-\u{7F}]` | ||
| /// - `General_Category` property values can themselves be treated like properties using a shorthand in ECMA262, | ||
| /// simply create the corresponding `GeneralCategory` set. | ||
| /// | ||
| /// ✨ *Enabled with the `compiled_data` Cargo feature.* | ||
| /// | ||
| /// [📚 Help choosing a constructor](icu_provider::constructors) | ||
| /// | ||
| /// ``` | ||
| /// use icu::properties::CodePointSetData; | ||
| /// | ||
| /// let emoji = CodePointSetData::new_for_ecma262(b"Emoji") | ||
| /// .expect("is an ECMA-262 property"); | ||
| /// | ||
| /// assert!(emoji.contains('🔥')); // U+1F525 FIRE | ||
| /// assert!(!emoji.contains('V')); | ||
| /// ``` | ||
| /// | ||
| /// [ecma]: https://tc39.es/ecma262/#table-binary-unicode-properties | ||
| #[cfg(feature = "compiled_data")] | ||
| pub fn new_for_ecma262(prop: &[u8]) -> Option<crate::CodePointSetDataBorrowed<'static>> { | ||
| use crate::props::*; | ||
| Some(match prop { | ||
| AsciiHexDigit::NAME | AsciiHexDigit::SHORT_NAME => Self::new::<AsciiHexDigit>(), | ||
| Alphabetic::NAME | Alphabetic::SHORT_NAME => Self::new::<Alphabetic>(), | ||
| BidiControl::NAME | BidiControl::SHORT_NAME => Self::new::<BidiControl>(), | ||
| BidiMirrored::NAME | BidiMirrored::SHORT_NAME => Self::new::<BidiMirrored>(), | ||
| CaseIgnorable::NAME | CaseIgnorable::SHORT_NAME => Self::new::<CaseIgnorable>(), | ||
| #[allow(unreachable_patterns)] // no short name | ||
| Cased::NAME | Cased::SHORT_NAME => Self::new::<Cased>(), | ||
| ChangesWhenCasefolded::NAME | ChangesWhenCasefolded::SHORT_NAME => { | ||
| Self::new::<ChangesWhenCasefolded>() | ||
| } | ||
| ChangesWhenCasemapped::NAME | ChangesWhenCasemapped::SHORT_NAME => { | ||
| Self::new::<ChangesWhenCasemapped>() | ||
| } | ||
| ChangesWhenLowercased::NAME | ChangesWhenLowercased::SHORT_NAME => { | ||
| Self::new::<ChangesWhenLowercased>() | ||
| } | ||
| ChangesWhenNfkcCasefolded::NAME | ChangesWhenNfkcCasefolded::SHORT_NAME => { | ||
| Self::new::<ChangesWhenNfkcCasefolded>() | ||
| } | ||
| ChangesWhenTitlecased::NAME | ChangesWhenTitlecased::SHORT_NAME => { | ||
| Self::new::<ChangesWhenTitlecased>() | ||
| } | ||
| ChangesWhenUppercased::NAME | ChangesWhenUppercased::SHORT_NAME => { | ||
| Self::new::<ChangesWhenUppercased>() | ||
| } | ||
| #[allow(unreachable_patterns)] // no short name | ||
| Dash::NAME | Dash::SHORT_NAME => Self::new::<Dash>(), | ||
| DefaultIgnorableCodePoint::NAME | DefaultIgnorableCodePoint::SHORT_NAME => { | ||
| Self::new::<DefaultIgnorableCodePoint>() | ||
| } | ||
| Deprecated::NAME | Deprecated::SHORT_NAME => Self::new::<Deprecated>(), | ||
| Diacritic::NAME | Diacritic::SHORT_NAME => Self::new::<Diacritic>(), | ||
| #[allow(unreachable_patterns)] // no short name | ||
| Emoji::NAME | Emoji::SHORT_NAME => Self::new::<Emoji>(), | ||
| EmojiComponent::NAME | EmojiComponent::SHORT_NAME => Self::new::<EmojiComponent>(), | ||
| EmojiModifier::NAME | EmojiModifier::SHORT_NAME => Self::new::<EmojiModifier>(), | ||
| EmojiModifierBase::NAME | EmojiModifierBase::SHORT_NAME => { | ||
| Self::new::<EmojiModifierBase>() | ||
| } | ||
| EmojiPresentation::NAME | EmojiPresentation::SHORT_NAME => { | ||
| Self::new::<EmojiPresentation>() | ||
| } | ||
| ExtendedPictographic::NAME | ExtendedPictographic::SHORT_NAME => { | ||
| Self::new::<ExtendedPictographic>() | ||
| } | ||
| Extender::NAME | Extender::SHORT_NAME => Self::new::<Extender>(), | ||
| GraphemeBase::NAME | GraphemeBase::SHORT_NAME => Self::new::<GraphemeBase>(), | ||
| GraphemeExtend::NAME | GraphemeExtend::SHORT_NAME => Self::new::<GraphemeExtend>(), | ||
| HexDigit::NAME | HexDigit::SHORT_NAME => Self::new::<HexDigit>(), | ||
| IdsBinaryOperator::NAME | IdsBinaryOperator::SHORT_NAME => { | ||
| Self::new::<IdsBinaryOperator>() | ||
| } | ||
| IdsTrinaryOperator::NAME | IdsTrinaryOperator::SHORT_NAME => { | ||
| Self::new::<IdsTrinaryOperator>() | ||
| } | ||
| IdContinue::NAME | IdContinue::SHORT_NAME => Self::new::<IdContinue>(), | ||
| IdStart::NAME | IdStart::SHORT_NAME => Self::new::<IdStart>(), | ||
| Ideographic::NAME | Ideographic::SHORT_NAME => Self::new::<Ideographic>(), | ||
| JoinControl::NAME | JoinControl::SHORT_NAME => Self::new::<JoinControl>(), | ||
| LogicalOrderException::NAME | LogicalOrderException::SHORT_NAME => { | ||
| Self::new::<LogicalOrderException>() | ||
| } | ||
| Lowercase::NAME | Lowercase::SHORT_NAME => Self::new::<Lowercase>(), | ||
| #[allow(unreachable_patterns)] // no short name | ||
| Math::NAME | Math::SHORT_NAME => Self::new::<Math>(), | ||
| NoncharacterCodePoint::NAME | NoncharacterCodePoint::SHORT_NAME => { | ||
| Self::new::<NoncharacterCodePoint>() | ||
| } | ||
| PatternSyntax::NAME | PatternSyntax::SHORT_NAME => Self::new::<PatternSyntax>(), | ||
| PatternWhiteSpace::NAME | PatternWhiteSpace::SHORT_NAME => { | ||
| Self::new::<PatternWhiteSpace>() | ||
| } | ||
| QuotationMark::NAME | QuotationMark::SHORT_NAME => Self::new::<QuotationMark>(), | ||
| #[allow(unreachable_patterns)] // no short name | ||
| Radical::NAME | Radical::SHORT_NAME => Self::new::<Radical>(), | ||
| RegionalIndicator::NAME | RegionalIndicator::SHORT_NAME => { | ||
| Self::new::<RegionalIndicator>() | ||
| } | ||
| SentenceTerminal::NAME | SentenceTerminal::SHORT_NAME => { | ||
| Self::new::<SentenceTerminal>() | ||
| } | ||
| SoftDotted::NAME | SoftDotted::SHORT_NAME => Self::new::<SoftDotted>(), | ||
| TerminalPunctuation::NAME | TerminalPunctuation::SHORT_NAME => { | ||
| Self::new::<TerminalPunctuation>() | ||
| } | ||
| UnifiedIdeograph::NAME | UnifiedIdeograph::SHORT_NAME => { | ||
| Self::new::<UnifiedIdeograph>() | ||
| } | ||
| Uppercase::NAME | Uppercase::SHORT_NAME => Self::new::<Uppercase>(), | ||
| VariationSelector::NAME | VariationSelector::SHORT_NAME => { | ||
| Self::new::<VariationSelector>() | ||
| } | ||
| WhiteSpace::NAME | WhiteSpace::SHORT_NAME => Self::new::<WhiteSpace>(), | ||
| XidContinue::NAME | XidContinue::SHORT_NAME => Self::new::<XidContinue>(), | ||
| XidStart::NAME | XidStart::SHORT_NAME => Self::new::<XidStart>(), | ||
| // Not an ECMA-262 property | ||
| _ => return None, | ||
| }) | ||
| } | ||
| icu_provider::gen_buffer_data_constructors!( | ||
| (prop: &[u8]) -> result: Option<Result<Self, DataError>>, | ||
| functions: [ | ||
| new_for_ecma262: skip, | ||
| try_new_for_ecma262_with_buffer_provider, | ||
| try_new_for_ecma262_unstable, | ||
| Self, | ||
| ] | ||
| ); | ||
| #[doc = icu_provider::gen_buffer_unstable_docs!(UNSTABLE, Self::new_for_ecma262)] | ||
| pub fn try_new_for_ecma262_unstable<P>( | ||
| provider: &P, | ||
| prop: &[u8], | ||
| ) -> Option<Result<Self, DataError>> | ||
| where | ||
| P: ?Sized | ||
| + DataProvider<PropertyBinaryAsciiHexDigitV1> | ||
| + DataProvider<PropertyBinaryAlphabeticV1> | ||
| + DataProvider<PropertyBinaryBidiControlV1> | ||
| + DataProvider<PropertyBinaryBidiMirroredV1> | ||
| + DataProvider<PropertyBinaryCaseIgnorableV1> | ||
| + DataProvider<PropertyBinaryCasedV1> | ||
| + DataProvider<PropertyBinaryChangesWhenCasefoldedV1> | ||
| + DataProvider<PropertyBinaryChangesWhenCasemappedV1> | ||
| + DataProvider<PropertyBinaryChangesWhenLowercasedV1> | ||
| + DataProvider<PropertyBinaryChangesWhenNfkcCasefoldedV1> | ||
| + DataProvider<PropertyBinaryChangesWhenTitlecasedV1> | ||
| + DataProvider<PropertyBinaryChangesWhenUppercasedV1> | ||
| + DataProvider<PropertyBinaryDashV1> | ||
| + DataProvider<PropertyBinaryDefaultIgnorableCodePointV1> | ||
| + DataProvider<PropertyBinaryDeprecatedV1> | ||
| + DataProvider<PropertyBinaryDiacriticV1> | ||
| + DataProvider<PropertyBinaryEmojiV1> | ||
| + DataProvider<PropertyBinaryEmojiComponentV1> | ||
| + DataProvider<PropertyBinaryEmojiModifierV1> | ||
| + DataProvider<PropertyBinaryEmojiModifierBaseV1> | ||
| + DataProvider<PropertyBinaryEmojiPresentationV1> | ||
| + DataProvider<PropertyBinaryExtendedPictographicV1> | ||
| + DataProvider<PropertyBinaryExtenderV1> | ||
| + DataProvider<PropertyBinaryGraphemeBaseV1> | ||
| + DataProvider<PropertyBinaryGraphemeExtendV1> | ||
| + DataProvider<PropertyBinaryHexDigitV1> | ||
| + DataProvider<PropertyBinaryIdsBinaryOperatorV1> | ||
| + DataProvider<PropertyBinaryIdsTrinaryOperatorV1> | ||
| + DataProvider<PropertyBinaryIdContinueV1> | ||
| + DataProvider<PropertyBinaryIdStartV1> | ||
| + DataProvider<PropertyBinaryIdeographicV1> | ||
| + DataProvider<PropertyBinaryJoinControlV1> | ||
| + DataProvider<PropertyBinaryLogicalOrderExceptionV1> | ||
| + DataProvider<PropertyBinaryLowercaseV1> | ||
| + DataProvider<PropertyBinaryMathV1> | ||
| + DataProvider<PropertyBinaryNoncharacterCodePointV1> | ||
| + DataProvider<PropertyBinaryPatternSyntaxV1> | ||
| + DataProvider<PropertyBinaryPatternWhiteSpaceV1> | ||
| + DataProvider<PropertyBinaryQuotationMarkV1> | ||
| + DataProvider<PropertyBinaryRadicalV1> | ||
| + DataProvider<PropertyBinaryRegionalIndicatorV1> | ||
| + DataProvider<PropertyBinarySentenceTerminalV1> | ||
| + DataProvider<PropertyBinarySoftDottedV1> | ||
| + DataProvider<PropertyBinaryTerminalPunctuationV1> | ||
| + DataProvider<PropertyBinaryUnifiedIdeographV1> | ||
| + DataProvider<PropertyBinaryUppercaseV1> | ||
| + DataProvider<PropertyBinaryVariationSelectorV1> | ||
| + DataProvider<PropertyBinaryWhiteSpaceV1> | ||
| + DataProvider<PropertyBinaryXidContinueV1> | ||
| + DataProvider<PropertyBinaryXidStartV1>, | ||
| { | ||
| use crate::props::*; | ||
| Some(match prop { | ||
| AsciiHexDigit::NAME | AsciiHexDigit::SHORT_NAME => { | ||
| Self::try_new_unstable::<AsciiHexDigit>(provider) | ||
| } | ||
| Alphabetic::NAME | Alphabetic::SHORT_NAME => { | ||
| Self::try_new_unstable::<Alphabetic>(provider) | ||
| } | ||
| BidiControl::NAME | BidiControl::SHORT_NAME => { | ||
| Self::try_new_unstable::<BidiControl>(provider) | ||
| } | ||
| BidiMirrored::NAME | BidiMirrored::SHORT_NAME => { | ||
| Self::try_new_unstable::<BidiMirrored>(provider) | ||
| } | ||
| CaseIgnorable::NAME | CaseIgnorable::SHORT_NAME => { | ||
| Self::try_new_unstable::<CaseIgnorable>(provider) | ||
| } | ||
| #[allow(unreachable_patterns)] // no short name | ||
| Cased::NAME | Cased::SHORT_NAME => Self::try_new_unstable::<Cased>(provider), | ||
| ChangesWhenCasefolded::NAME | ChangesWhenCasefolded::SHORT_NAME => { | ||
| Self::try_new_unstable::<ChangesWhenCasefolded>(provider) | ||
| } | ||
| ChangesWhenCasemapped::NAME | ChangesWhenCasemapped::SHORT_NAME => { | ||
| Self::try_new_unstable::<ChangesWhenCasemapped>(provider) | ||
| } | ||
| ChangesWhenLowercased::NAME | ChangesWhenLowercased::SHORT_NAME => { | ||
| Self::try_new_unstable::<ChangesWhenLowercased>(provider) | ||
| } | ||
| ChangesWhenNfkcCasefolded::NAME | ChangesWhenNfkcCasefolded::SHORT_NAME => { | ||
| Self::try_new_unstable::<ChangesWhenNfkcCasefolded>(provider) | ||
| } | ||
| ChangesWhenTitlecased::NAME | ChangesWhenTitlecased::SHORT_NAME => { | ||
| Self::try_new_unstable::<ChangesWhenTitlecased>(provider) | ||
| } | ||
| ChangesWhenUppercased::NAME | ChangesWhenUppercased::SHORT_NAME => { | ||
| Self::try_new_unstable::<ChangesWhenUppercased>(provider) | ||
| } | ||
| #[allow(unreachable_patterns)] // no short name | ||
| Dash::NAME | Dash::SHORT_NAME => Self::try_new_unstable::<Dash>(provider), | ||
| DefaultIgnorableCodePoint::NAME | DefaultIgnorableCodePoint::SHORT_NAME => { | ||
| Self::try_new_unstable::<DefaultIgnorableCodePoint>(provider) | ||
| } | ||
| Deprecated::NAME | Deprecated::SHORT_NAME => { | ||
| Self::try_new_unstable::<Deprecated>(provider) | ||
| } | ||
| Diacritic::NAME | Diacritic::SHORT_NAME => { | ||
| Self::try_new_unstable::<Diacritic>(provider) | ||
| } | ||
| #[allow(unreachable_patterns)] // no short name | ||
| Emoji::NAME | Emoji::SHORT_NAME => Self::try_new_unstable::<Emoji>(provider), | ||
| EmojiComponent::NAME | EmojiComponent::SHORT_NAME => { | ||
| Self::try_new_unstable::<EmojiComponent>(provider) | ||
| } | ||
| EmojiModifier::NAME | EmojiModifier::SHORT_NAME => { | ||
| Self::try_new_unstable::<EmojiModifier>(provider) | ||
| } | ||
| EmojiModifierBase::NAME | EmojiModifierBase::SHORT_NAME => { | ||
| Self::try_new_unstable::<EmojiModifierBase>(provider) | ||
| } | ||
| EmojiPresentation::NAME | EmojiPresentation::SHORT_NAME => { | ||
| Self::try_new_unstable::<EmojiPresentation>(provider) | ||
| } | ||
| ExtendedPictographic::NAME | ExtendedPictographic::SHORT_NAME => { | ||
| Self::try_new_unstable::<ExtendedPictographic>(provider) | ||
| } | ||
| Extender::NAME | Extender::SHORT_NAME => Self::try_new_unstable::<Extender>(provider), | ||
| GraphemeBase::NAME | GraphemeBase::SHORT_NAME => { | ||
| Self::try_new_unstable::<GraphemeBase>(provider) | ||
| } | ||
| GraphemeExtend::NAME | GraphemeExtend::SHORT_NAME => { | ||
| Self::try_new_unstable::<GraphemeExtend>(provider) | ||
| } | ||
| HexDigit::NAME | HexDigit::SHORT_NAME => Self::try_new_unstable::<HexDigit>(provider), | ||
| IdsBinaryOperator::NAME | IdsBinaryOperator::SHORT_NAME => { | ||
| Self::try_new_unstable::<IdsBinaryOperator>(provider) | ||
| } | ||
| IdsTrinaryOperator::NAME | IdsTrinaryOperator::SHORT_NAME => { | ||
| Self::try_new_unstable::<IdsTrinaryOperator>(provider) | ||
| } | ||
| IdContinue::NAME | IdContinue::SHORT_NAME => { | ||
| Self::try_new_unstable::<IdContinue>(provider) | ||
| } | ||
| IdStart::NAME | IdStart::SHORT_NAME => Self::try_new_unstable::<IdStart>(provider), | ||
| Ideographic::NAME | Ideographic::SHORT_NAME => { | ||
| Self::try_new_unstable::<Ideographic>(provider) | ||
| } | ||
| JoinControl::NAME | JoinControl::SHORT_NAME => { | ||
| Self::try_new_unstable::<JoinControl>(provider) | ||
| } | ||
| LogicalOrderException::NAME | LogicalOrderException::SHORT_NAME => { | ||
| Self::try_new_unstable::<LogicalOrderException>(provider) | ||
| } | ||
| Lowercase::NAME | Lowercase::SHORT_NAME => { | ||
| Self::try_new_unstable::<Lowercase>(provider) | ||
| } | ||
| #[allow(unreachable_patterns)] // no short name | ||
| Math::NAME | Math::SHORT_NAME => Self::try_new_unstable::<Math>(provider), | ||
| NoncharacterCodePoint::NAME | NoncharacterCodePoint::SHORT_NAME => { | ||
| Self::try_new_unstable::<NoncharacterCodePoint>(provider) | ||
| } | ||
| PatternSyntax::NAME | PatternSyntax::SHORT_NAME => { | ||
| Self::try_new_unstable::<PatternSyntax>(provider) | ||
| } | ||
| PatternWhiteSpace::NAME | PatternWhiteSpace::SHORT_NAME => { | ||
| Self::try_new_unstable::<PatternWhiteSpace>(provider) | ||
| } | ||
| QuotationMark::NAME | QuotationMark::SHORT_NAME => { | ||
| Self::try_new_unstable::<QuotationMark>(provider) | ||
| } | ||
| #[allow(unreachable_patterns)] // no short name | ||
| Radical::NAME | Radical::SHORT_NAME => Self::try_new_unstable::<Radical>(provider), | ||
| RegionalIndicator::NAME | RegionalIndicator::SHORT_NAME => { | ||
| Self::try_new_unstable::<RegionalIndicator>(provider) | ||
| } | ||
| SentenceTerminal::NAME | SentenceTerminal::SHORT_NAME => { | ||
| Self::try_new_unstable::<SentenceTerminal>(provider) | ||
| } | ||
| SoftDotted::NAME | SoftDotted::SHORT_NAME => { | ||
| Self::try_new_unstable::<SoftDotted>(provider) | ||
| } | ||
| TerminalPunctuation::NAME | TerminalPunctuation::SHORT_NAME => { | ||
| Self::try_new_unstable::<TerminalPunctuation>(provider) | ||
| } | ||
| UnifiedIdeograph::NAME | UnifiedIdeograph::SHORT_NAME => { | ||
| Self::try_new_unstable::<UnifiedIdeograph>(provider) | ||
| } | ||
| Uppercase::NAME | Uppercase::SHORT_NAME => { | ||
| Self::try_new_unstable::<Uppercase>(provider) | ||
| } | ||
| VariationSelector::NAME | VariationSelector::SHORT_NAME => { | ||
| Self::try_new_unstable::<VariationSelector>(provider) | ||
| } | ||
| WhiteSpace::NAME | WhiteSpace::SHORT_NAME => { | ||
| Self::try_new_unstable::<WhiteSpace>(provider) | ||
| } | ||
| XidContinue::NAME | XidContinue::SHORT_NAME => { | ||
| Self::try_new_unstable::<XidContinue>(provider) | ||
| } | ||
| XidStart::NAME | XidStart::SHORT_NAME => Self::try_new_unstable::<XidStart>(provider), | ||
| // Not an ECMA-262 property | ||
| _ => return None, | ||
| }) | ||
| } | ||
| } |
| // This file is part of ICU4X. For terms of use, please see the file | ||
| // called LICENSE at the top level of the ICU4X source tree | ||
| // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). | ||
| //! Data and APIs for supporting Script_Extensions property | ||
| //! values in an efficient structure. | ||
| use crate::props::Script; | ||
| use crate::provider::*; | ||
| #[cfg(feature = "alloc")] | ||
| use core::iter::FromIterator; | ||
| use core::ops::RangeInclusive; | ||
| #[cfg(feature = "alloc")] | ||
| use icu_collections::codepointinvlist::CodePointInversionList; | ||
| use icu_provider::prelude::*; | ||
| use zerovec::{ule::AsULE, ZeroSlice}; | ||
| /// The number of bits at the low-end of a `ScriptWithExt` value used for | ||
| /// storing the `Script` value (or `extensions` index). | ||
| const SCRIPT_VAL_LENGTH: u16 = 10; | ||
| /// The bit mask necessary to retrieve the `Script` value (or `extensions` index) | ||
| /// from a `ScriptWithExt` value. | ||
| const SCRIPT_X_SCRIPT_VAL: u16 = (1 << SCRIPT_VAL_LENGTH) - 1; | ||
| /// An internal-use only pseudo-property that represents the values stored in | ||
| /// the trie of the special data structure [`ScriptWithExtensionsProperty`]. | ||
| /// | ||
| /// Note: The will assume a 12-bit layout. The 2 higher order bits in positions | ||
| /// 11..10 will indicate how to deduce the Script value and Script_Extensions, | ||
| /// and the lower 10 bits 9..0 indicate either the Script value or the index | ||
| /// into the `extensions` structure. | ||
| #[derive(Copy, Clone, Debug, Eq, PartialEq)] | ||
| #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] | ||
| #[cfg_attr(feature = "datagen", derive(databake::Bake))] | ||
| #[cfg_attr(feature = "datagen", databake(path = icu_properties::script))] | ||
| #[repr(transparent)] | ||
| #[doc(hidden)] | ||
| // `ScriptWithExt` not intended as public-facing but for `ScriptWithExtensionsProperty` constructor | ||
| #[allow(clippy::exhaustive_structs)] // this type is stable | ||
| pub struct ScriptWithExt(pub u16); | ||
| #[allow(missing_docs)] // These constants don't need individual documentation. | ||
| #[allow(non_upper_case_globals)] | ||
| #[doc(hidden)] // `ScriptWithExt` not intended as public-facing but for `ScriptWithExtensionsProperty` constructor | ||
| impl ScriptWithExt { | ||
| pub const Unknown: ScriptWithExt = ScriptWithExt(0); | ||
| } | ||
| impl AsULE for ScriptWithExt { | ||
| type ULE = <u16 as AsULE>::ULE; | ||
| #[inline] | ||
| fn to_unaligned(self) -> Self::ULE { | ||
| Script(self.0).to_unaligned() | ||
| } | ||
| #[inline] | ||
| fn from_unaligned(unaligned: Self::ULE) -> Self { | ||
| ScriptWithExt(Script::from_unaligned(unaligned).0) | ||
| } | ||
| } | ||
| #[doc(hidden)] // `ScriptWithExt` not intended as public-facing but for `ScriptWithExtensionsProperty` constructor | ||
| impl ScriptWithExt { | ||
| /// Returns whether the [`ScriptWithExt`] value has Script_Extensions and | ||
| /// also indicates a Script value of [`Script::Common`]. | ||
| /// | ||
| /// # Examples | ||
| /// | ||
| /// ``` | ||
| /// use icu::properties::script::ScriptWithExt; | ||
| /// | ||
| /// assert!(ScriptWithExt(0x04FF).is_common()); | ||
| /// assert!(ScriptWithExt(0x0400).is_common()); | ||
| /// | ||
| /// assert!(!ScriptWithExt(0x08FF).is_common()); | ||
| /// assert!(!ScriptWithExt(0x0800).is_common()); | ||
| /// | ||
| /// assert!(!ScriptWithExt(0x0CFF).is_common()); | ||
| /// assert!(!ScriptWithExt(0x0C00).is_common()); | ||
| /// | ||
| /// assert!(!ScriptWithExt(0xFF).is_common()); | ||
| /// assert!(!ScriptWithExt(0x0).is_common()); | ||
| /// ``` | ||
| pub fn is_common(&self) -> bool { | ||
| self.0 >> SCRIPT_VAL_LENGTH == 1 | ||
| } | ||
| /// Returns whether the [`ScriptWithExt`] value has Script_Extensions and | ||
| /// also indicates a Script value of [`Script::Inherited`]. | ||
| /// | ||
| /// # Examples | ||
| /// | ||
| /// ``` | ||
| /// use icu::properties::script::ScriptWithExt; | ||
| /// | ||
| /// assert!(!ScriptWithExt(0x04FF).is_inherited()); | ||
| /// assert!(!ScriptWithExt(0x0400).is_inherited()); | ||
| /// | ||
| /// assert!(ScriptWithExt(0x08FF).is_inherited()); | ||
| /// assert!(ScriptWithExt(0x0800).is_inherited()); | ||
| /// | ||
| /// assert!(!ScriptWithExt(0x0CFF).is_inherited()); | ||
| /// assert!(!ScriptWithExt(0x0C00).is_inherited()); | ||
| /// | ||
| /// assert!(!ScriptWithExt(0xFF).is_inherited()); | ||
| /// assert!(!ScriptWithExt(0x0).is_inherited()); | ||
| /// ``` | ||
| pub fn is_inherited(&self) -> bool { | ||
| self.0 >> SCRIPT_VAL_LENGTH == 2 | ||
| } | ||
| /// Returns whether the [`ScriptWithExt`] value has Script_Extensions and | ||
| /// also indicates that the Script value is neither [`Script::Common`] nor | ||
| /// [`Script::Inherited`]. | ||
| /// | ||
| /// # Examples | ||
| /// | ||
| /// ``` | ||
| /// use icu::properties::script::ScriptWithExt; | ||
| /// | ||
| /// assert!(!ScriptWithExt(0x04FF).is_other()); | ||
| /// assert!(!ScriptWithExt(0x0400).is_other()); | ||
| /// | ||
| /// assert!(!ScriptWithExt(0x08FF).is_other()); | ||
| /// assert!(!ScriptWithExt(0x0800).is_other()); | ||
| /// | ||
| /// assert!(ScriptWithExt(0x0CFF).is_other()); | ||
| /// assert!(ScriptWithExt(0x0C00).is_other()); | ||
| /// | ||
| /// assert!(!ScriptWithExt(0xFF).is_other()); | ||
| /// assert!(!ScriptWithExt(0x0).is_other()); | ||
| /// ``` | ||
| pub fn is_other(&self) -> bool { | ||
| self.0 >> SCRIPT_VAL_LENGTH == 3 | ||
| } | ||
| /// Returns whether the [`ScriptWithExt`] value has Script_Extensions. | ||
| /// | ||
| /// # Examples | ||
| /// | ||
| /// ``` | ||
| /// use icu::properties::script::ScriptWithExt; | ||
| /// | ||
| /// assert!(ScriptWithExt(0x04FF).has_extensions()); | ||
| /// assert!(ScriptWithExt(0x0400).has_extensions()); | ||
| /// | ||
| /// assert!(ScriptWithExt(0x08FF).has_extensions()); | ||
| /// assert!(ScriptWithExt(0x0800).has_extensions()); | ||
| /// | ||
| /// assert!(ScriptWithExt(0x0CFF).has_extensions()); | ||
| /// assert!(ScriptWithExt(0x0C00).has_extensions()); | ||
| /// | ||
| /// assert!(!ScriptWithExt(0xFF).has_extensions()); | ||
| /// assert!(!ScriptWithExt(0x0).has_extensions()); | ||
| /// ``` | ||
| pub fn has_extensions(&self) -> bool { | ||
| let high_order_bits = self.0 >> SCRIPT_VAL_LENGTH; | ||
| high_order_bits > 0 | ||
| } | ||
| } | ||
| impl From<ScriptWithExt> for u32 { | ||
| fn from(swe: ScriptWithExt) -> Self { | ||
| swe.0 as u32 | ||
| } | ||
| } | ||
| impl From<ScriptWithExt> for Script { | ||
| fn from(swe: ScriptWithExt) -> Self { | ||
| Script(swe.0) | ||
| } | ||
| } | ||
| /// A struct that wraps a [`Script`] array, such as in the return value for | ||
| /// [`get_script_extensions_val()`](ScriptWithExtensionsBorrowed::get_script_extensions_val). | ||
| #[derive(Copy, Clone, Debug, Eq, PartialEq)] | ||
| pub struct ScriptExtensionsSet<'a> { | ||
| values: &'a ZeroSlice<Script>, | ||
| } | ||
| impl<'a> ScriptExtensionsSet<'a> { | ||
| /// Returns whether this set contains the given script. | ||
| /// | ||
| /// # Example | ||
| /// | ||
| /// ``` | ||
| /// use icu::properties::props::Script; | ||
| /// use icu::properties::script::ScriptWithExtensions; | ||
| /// let swe = ScriptWithExtensions::new(); | ||
| /// | ||
| /// assert!(swe | ||
| /// .get_script_extensions_val('\u{11303}') // GRANTHA SIGN VISARGA | ||
| /// .contains(&Script::Grantha)); | ||
| /// ``` | ||
| pub fn contains(&self, x: &Script) -> bool { | ||
| ZeroSlice::binary_search(self.values, x).is_ok() | ||
| } | ||
| /// Gets an iterator over the elements. | ||
| /// | ||
| /// # Example | ||
| /// | ||
| /// ``` | ||
| /// use icu::properties::props::Script; | ||
| /// use icu::properties::script::ScriptWithExtensions; | ||
| /// let swe = ScriptWithExtensions::new(); | ||
| /// | ||
| /// assert_eq!( | ||
| /// swe.get_script_extensions_val('௫') // U+0BEB TAMIL DIGIT FIVE | ||
| /// .iter() | ||
| /// .collect::<Vec<_>>(), | ||
| /// [Script::Tamil, Script::Grantha] | ||
| /// ); | ||
| /// ``` | ||
| pub fn iter(&self) -> impl DoubleEndedIterator<Item = Script> + 'a { | ||
| ZeroSlice::iter(self.values) | ||
| } | ||
| /// For accessing this set as an array instead of an iterator | ||
| #[doc(hidden)] // used by FFI code | ||
| pub fn array_len(&self) -> usize { | ||
| self.values.len() | ||
| } | ||
| /// For accessing this set as an array instead of an iterator | ||
| #[doc(hidden)] // used by FFI code | ||
| pub fn array_get(&self, index: usize) -> Option<Script> { | ||
| self.values.get(index) | ||
| } | ||
| } | ||
| /// A struct that represents the data for the Script and Script_Extensions properties. | ||
| /// | ||
| /// ✨ *Enabled with the `compiled_data` Cargo feature.* | ||
| /// | ||
| /// [📚 Help choosing a constructor](icu_provider::constructors) | ||
| /// | ||
| /// Most useful methods are on [`ScriptWithExtensionsBorrowed`] obtained by calling [`ScriptWithExtensions::as_borrowed()`] | ||
| /// | ||
| /// # Examples | ||
| /// | ||
| /// ``` | ||
| /// use icu::properties::script::ScriptWithExtensions; | ||
| /// use icu::properties::props::Script; | ||
| /// let swe = ScriptWithExtensions::new(); | ||
| /// | ||
| /// // get the `Script` property value | ||
| /// assert_eq!(swe.get_script_val('ـ'), Script::Common); // U+0640 ARABIC TATWEEL | ||
| /// assert_eq!(swe.get_script_val('\u{0650}'), Script::Inherited); // U+0650 ARABIC KASRA | ||
| /// assert_eq!(swe.get_script_val('٠'), Script::Arabic); // // U+0660 ARABIC-INDIC DIGIT ZERO | ||
| /// assert_eq!(swe.get_script_val('ﷲ'), Script::Arabic); // U+FDF2 ARABIC LIGATURE ALLAH ISOLATED FORM | ||
| /// | ||
| /// // get the `Script_Extensions` property value | ||
| /// assert_eq!( | ||
| /// swe.get_script_extensions_val('ـ') // U+0640 ARABIC TATWEEL | ||
| /// .iter().collect::<Vec<_>>(), | ||
| /// [Script::Arabic, Script::Syriac, Script::Mandaic, Script::Manichaean, | ||
| /// Script::PsalterPahlavi, Script::Adlam, Script::HanifiRohingya, Script::Sogdian, | ||
| /// Script::OldUyghur] | ||
| /// ); | ||
| /// assert_eq!( | ||
| /// swe.get_script_extensions_val('🥳') // U+1F973 FACE WITH PARTY HORN AND PARTY HAT | ||
| /// .iter().collect::<Vec<_>>(), | ||
| /// [Script::Common] | ||
| /// ); | ||
| /// assert_eq!( | ||
| /// swe.get_script_extensions_val('\u{200D}') // ZERO WIDTH JOINER | ||
| /// .iter().collect::<Vec<_>>(), | ||
| /// [Script::Inherited] | ||
| /// ); | ||
| /// assert_eq!( | ||
| /// swe.get_script_extensions_val('௫') // U+0BEB TAMIL DIGIT FIVE | ||
| /// .iter().collect::<Vec<_>>(), | ||
| /// [Script::Tamil, Script::Grantha] | ||
| /// ); | ||
| /// | ||
| /// // check containment of a `Script` value in the `Script_Extensions` value | ||
| /// // U+0650 ARABIC KASRA | ||
| /// assert!(!swe.has_script('\u{0650}', Script::Inherited)); // main Script value | ||
| /// assert!(swe.has_script('\u{0650}', Script::Arabic)); | ||
| /// assert!(swe.has_script('\u{0650}', Script::Syriac)); | ||
| /// assert!(!swe.has_script('\u{0650}', Script::Thaana)); | ||
| /// | ||
| /// // get a `CodePointInversionList` for when `Script` value is contained in `Script_Extensions` value | ||
| /// let syriac = swe.get_script_extensions_set(Script::Syriac); | ||
| /// assert!(syriac.contains('\u{0650}')); // ARABIC KASRA | ||
| /// assert!(!syriac.contains('٠')); // ARABIC-INDIC DIGIT ZERO | ||
| /// assert!(!syriac.contains('ﷲ')); // ARABIC LIGATURE ALLAH ISOLATED FORM | ||
| /// assert!(syriac.contains('܀')); // SYRIAC END OF PARAGRAPH | ||
| /// assert!(syriac.contains('\u{074A}')); // SYRIAC BARREKH | ||
| /// ``` | ||
| #[derive(Debug)] | ||
| pub struct ScriptWithExtensions { | ||
| data: DataPayload<PropertyScriptWithExtensionsV1>, | ||
| } | ||
| /// A borrowed wrapper around script extension data, returned by | ||
| /// [`ScriptWithExtensions::as_borrowed()`]. More efficient to query. | ||
| #[derive(Clone, Copy, Debug)] | ||
| pub struct ScriptWithExtensionsBorrowed<'a> { | ||
| data: &'a ScriptWithExtensionsProperty<'a>, | ||
| } | ||
| impl ScriptWithExtensions { | ||
| /// Creates a new instance of `ScriptWithExtensionsBorrowed` using compiled data. | ||
| /// | ||
| /// ✨ *Enabled with the `compiled_data` Cargo feature.* | ||
| /// | ||
| /// [📚 Help choosing a constructor](icu_provider::constructors) | ||
| #[cfg(feature = "compiled_data")] | ||
| #[expect(clippy::new_ret_no_self)] | ||
| pub fn new() -> ScriptWithExtensionsBorrowed<'static> { | ||
| ScriptWithExtensionsBorrowed::new() | ||
| } | ||
| icu_provider::gen_buffer_data_constructors!( | ||
| () -> result: Result<ScriptWithExtensions, DataError>, | ||
| functions: [ | ||
| new: skip, | ||
| try_new_with_buffer_provider, | ||
| try_new_unstable, | ||
| Self, | ||
| ] | ||
| ); | ||
| #[doc = icu_provider::gen_buffer_unstable_docs!(UNSTABLE, Self::new)] | ||
| pub fn try_new_unstable( | ||
| provider: &(impl DataProvider<PropertyScriptWithExtensionsV1> + ?Sized), | ||
| ) -> Result<Self, DataError> { | ||
| Ok(ScriptWithExtensions::from_data( | ||
| provider.load(Default::default())?.payload, | ||
| )) | ||
| } | ||
| /// Construct a borrowed version of this type that can be queried. | ||
| /// | ||
| /// This avoids a potential small underlying cost per API call (ex: `contains()`) by consolidating it | ||
| /// up front. | ||
| #[inline] | ||
| pub fn as_borrowed(&self) -> ScriptWithExtensionsBorrowed<'_> { | ||
| ScriptWithExtensionsBorrowed { | ||
| data: self.data.get(), | ||
| } | ||
| } | ||
| /// Construct a new one from loaded data | ||
| /// | ||
| /// Typically it is preferable to use getters like [`load_script_with_extensions_unstable()`] instead | ||
| pub(crate) fn from_data(data: DataPayload<PropertyScriptWithExtensionsV1>) -> Self { | ||
| Self { data } | ||
| } | ||
| } | ||
| impl<'a> ScriptWithExtensionsBorrowed<'a> { | ||
| /// Returns the `Script` property value for this code point. | ||
| /// | ||
| /// # Examples | ||
| /// | ||
| /// ``` | ||
| /// use icu::properties::script::ScriptWithExtensions; | ||
| /// use icu::properties::props::Script; | ||
| /// | ||
| /// let swe = ScriptWithExtensions::new(); | ||
| /// | ||
| /// // U+0640 ARABIC TATWEEL | ||
| /// assert_eq!(swe.get_script_val('ـ'), Script::Common); // main Script value | ||
| /// assert_ne!(swe.get_script_val('ـ'), Script::Arabic); | ||
| /// assert_ne!(swe.get_script_val('ـ'), Script::Syriac); | ||
| /// assert_ne!(swe.get_script_val('ـ'), Script::Thaana); | ||
| /// | ||
| /// // U+0650 ARABIC KASRA | ||
| /// assert_eq!(swe.get_script_val('\u{0650}'), Script::Inherited); // main Script value | ||
| /// assert_ne!(swe.get_script_val('\u{0650}'), Script::Arabic); | ||
| /// assert_ne!(swe.get_script_val('\u{0650}'), Script::Syriac); | ||
| /// assert_ne!(swe.get_script_val('\u{0650}'), Script::Thaana); | ||
| /// | ||
| /// // U+0660 ARABIC-INDIC DIGIT ZERO | ||
| /// assert_ne!(swe.get_script_val('٠'), Script::Common); | ||
| /// assert_eq!(swe.get_script_val('٠'), Script::Arabic); // main Script value | ||
| /// assert_ne!(swe.get_script_val('٠'), Script::Syriac); | ||
| /// assert_ne!(swe.get_script_val('٠'), Script::Thaana); | ||
| /// | ||
| /// // U+FDF2 ARABIC LIGATURE ALLAH ISOLATED FORM | ||
| /// assert_ne!(swe.get_script_val('ﷲ'), Script::Common); | ||
| /// assert_eq!(swe.get_script_val('ﷲ'), Script::Arabic); // main Script value | ||
| /// assert_ne!(swe.get_script_val('ﷲ'), Script::Syriac); | ||
| /// assert_ne!(swe.get_script_val('ﷲ'), Script::Thaana); | ||
| /// ``` | ||
| pub fn get_script_val(self, ch: char) -> Script { | ||
| self.get_script_val32(ch as u32) | ||
| } | ||
| /// See [`Self::get_script_val`]. | ||
| pub fn get_script_val32(self, code_point: u32) -> Script { | ||
| let sc_with_ext = self.data.trie.get32(code_point); | ||
| if sc_with_ext.is_other() { | ||
| let ext_idx = sc_with_ext.0 & SCRIPT_X_SCRIPT_VAL; | ||
| let scx_val = self.data.extensions.get(ext_idx as usize); | ||
| let scx_first_sc = scx_val.and_then(|scx| scx.get(0)); | ||
| let default_sc_val = Script::Unknown; | ||
| scx_first_sc.unwrap_or(default_sc_val) | ||
| } else if sc_with_ext.is_common() { | ||
| Script::Common | ||
| } else if sc_with_ext.is_inherited() { | ||
| Script::Inherited | ||
| } else { | ||
| let script_val = sc_with_ext.0; | ||
| Script(script_val) | ||
| } | ||
| } | ||
| // Returns the Script_Extensions value for a code_point when the trie value | ||
| // is already known. | ||
| // This private helper method exists to prevent code duplication in callers like | ||
| // `get_script_extensions_val`, `get_script_extensions_set`, and `has_script`. | ||
| fn get_scx_val_using_trie_val( | ||
| self, | ||
| sc_with_ext_ule: &'a <ScriptWithExt as AsULE>::ULE, | ||
| ) -> &'a ZeroSlice<Script> { | ||
| let sc_with_ext = ScriptWithExt::from_unaligned(*sc_with_ext_ule); | ||
| if sc_with_ext.is_other() { | ||
| let ext_idx = sc_with_ext.0 & SCRIPT_X_SCRIPT_VAL; | ||
| let ext_subarray = self.data.extensions.get(ext_idx as usize); | ||
| // In the OTHER case, where the 2 higher-order bits of the | ||
| // `ScriptWithExt` value in the trie doesn't indicate the Script value, | ||
| // the Script value is copied/inserted into the first position of the | ||
| // `extensions` array. So we must remove it to return the actual scx array val. | ||
| let scx_slice = ext_subarray | ||
| .and_then(|zslice| zslice.as_ule_slice().get(1..)) | ||
| .unwrap_or_default(); | ||
| ZeroSlice::from_ule_slice(scx_slice) | ||
| } else if sc_with_ext.is_common() || sc_with_ext.is_inherited() { | ||
| let ext_idx = sc_with_ext.0 & SCRIPT_X_SCRIPT_VAL; | ||
| let scx_val = self.data.extensions.get(ext_idx as usize); | ||
| scx_val.unwrap_or_default() | ||
| } else { | ||
| // Note: `Script` and `ScriptWithExt` are both represented as the same | ||
| // u16 value when the `ScriptWithExt` has no higher-order bits set. | ||
| let script_ule_slice = core::slice::from_ref(sc_with_ext_ule); | ||
| ZeroSlice::from_ule_slice(script_ule_slice) | ||
| } | ||
| } | ||
| /// Return the `Script_Extensions` property value for this code point. | ||
| /// | ||
| /// If `code_point` has Script_Extensions, then return the Script codes in | ||
| /// the Script_Extensions. In this case, the Script property value | ||
| /// (normally Common or Inherited) is not included in the [`ScriptExtensionsSet`]. | ||
| /// | ||
| /// If c does not have Script_Extensions, then the one Script code is put | ||
| /// into the [`ScriptExtensionsSet`] and also returned. | ||
| /// | ||
| /// If c is not a valid code point, then return an empty [`ScriptExtensionsSet`]. | ||
| /// | ||
| /// # Examples | ||
| /// | ||
| /// ``` | ||
| /// use icu::properties::script::ScriptWithExtensions; | ||
| /// use icu::properties::props::Script; | ||
| /// | ||
| /// let swe = ScriptWithExtensions::new(); | ||
| /// | ||
| /// assert_eq!( | ||
| /// swe.get_script_extensions_val('𐓐') // U+104D0 OSAGE CAPITAL LETTER KHA | ||
| /// .iter() | ||
| /// .collect::<Vec<_>>(), | ||
| /// [Script::Osage] | ||
| /// ); | ||
| /// assert_eq!( | ||
| /// swe.get_script_extensions_val('🥳') // U+1F973 FACE WITH PARTY HORN AND PARTY HAT | ||
| /// .iter() | ||
| /// .collect::<Vec<_>>(), | ||
| /// [Script::Common] | ||
| /// ); | ||
| /// assert_eq!( | ||
| /// swe.get_script_extensions_val('\u{200D}') // ZERO WIDTH JOINER | ||
| /// .iter() | ||
| /// .collect::<Vec<_>>(), | ||
| /// [Script::Inherited] | ||
| /// ); | ||
| /// assert_eq!( | ||
| /// swe.get_script_extensions_val('௫') // U+0BEB TAMIL DIGIT FIVE | ||
| /// .iter() | ||
| /// .collect::<Vec<_>>(), | ||
| /// [Script::Tamil, Script::Grantha] | ||
| /// ); | ||
| /// ``` | ||
| pub fn get_script_extensions_val(self, ch: char) -> ScriptExtensionsSet<'a> { | ||
| self.get_script_extensions_val32(ch as u32) | ||
| } | ||
| /// See [`Self::get_script_extensions_val`]. | ||
| pub fn get_script_extensions_val32(self, code_point: u32) -> ScriptExtensionsSet<'a> { | ||
| let sc_with_ext_ule = self.data.trie.get32_ule(code_point); | ||
| ScriptExtensionsSet { | ||
| values: match sc_with_ext_ule { | ||
| Some(ule_ref) => self.get_scx_val_using_trie_val(ule_ref), | ||
| None => ZeroSlice::from_ule_slice(&[]), | ||
| }, | ||
| } | ||
| } | ||
| /// Returns whether `script` is contained in the Script_Extensions | ||
| /// property value if the code_point has Script_Extensions, otherwise | ||
| /// if the code point does not have Script_Extensions then returns | ||
| /// whether the Script property value matches. | ||
| /// | ||
| /// Some characters are commonly used in multiple scripts. For more information, | ||
| /// see UAX #24: <http://www.unicode.org/reports/tr24/>. | ||
| /// | ||
| /// # Examples | ||
| /// | ||
| /// ``` | ||
| /// use icu::properties::script::ScriptWithExtensions; | ||
| /// use icu::properties::props::Script; | ||
| /// | ||
| /// let swe = ScriptWithExtensions::new(); | ||
| /// | ||
| /// // U+0650 ARABIC KASRA | ||
| /// assert!(!swe.has_script('\u{0650}', Script::Inherited)); // main Script value | ||
| /// assert!(swe.has_script('\u{0650}', Script::Arabic)); | ||
| /// assert!(swe.has_script('\u{0650}', Script::Syriac)); | ||
| /// assert!(!swe.has_script('\u{0650}', Script::Thaana)); | ||
| /// | ||
| /// // U+0660 ARABIC-INDIC DIGIT ZERO | ||
| /// assert!(!swe.has_script('٠', Script::Common)); // main Script value | ||
| /// assert!(swe.has_script('٠', Script::Arabic)); | ||
| /// assert!(!swe.has_script('٠', Script::Syriac)); | ||
| /// assert!(swe.has_script('٠', Script::Thaana)); | ||
| /// | ||
| /// // U+FDF2 ARABIC LIGATURE ALLAH ISOLATED FORM | ||
| /// assert!(!swe.has_script('ﷲ', Script::Common)); | ||
| /// assert!(swe.has_script('ﷲ', Script::Arabic)); // main Script value | ||
| /// assert!(!swe.has_script('ﷲ', Script::Syriac)); | ||
| /// assert!(swe.has_script('ﷲ', Script::Thaana)); | ||
| /// ``` | ||
| pub fn has_script(self, ch: char, script: Script) -> bool { | ||
| self.has_script32(ch as u32, script) | ||
| } | ||
| /// See [`Self::has_script`]. | ||
| pub fn has_script32(self, code_point: u32, script: Script) -> bool { | ||
| let sc_with_ext_ule = if let Some(scwe_ule) = self.data.trie.get32_ule(code_point) { | ||
| scwe_ule | ||
| } else { | ||
| return false; | ||
| }; | ||
| let sc_with_ext = <ScriptWithExt as AsULE>::from_unaligned(*sc_with_ext_ule); | ||
| if !sc_with_ext.has_extensions() { | ||
| let script_val = sc_with_ext.0; | ||
| script == Script(script_val) | ||
| } else { | ||
| let scx_val = self.get_scx_val_using_trie_val(sc_with_ext_ule); | ||
| let script_find = scx_val.iter().find(|&sc| sc == script); | ||
| script_find.is_some() | ||
| } | ||
| } | ||
| /// Returns all of the matching `CodePointMapRange`s for the given [`Script`] | ||
| /// in which `has_script` will return true for all of the contained code points. | ||
| /// | ||
| /// # Examples | ||
| /// | ||
| /// ``` | ||
| /// use icu::properties::props::Script; | ||
| /// use icu::properties::script::ScriptWithExtensions; | ||
| /// | ||
| /// let swe = ScriptWithExtensions::new(); | ||
| /// | ||
| /// let syriac_script_extensions_ranges = | ||
| /// swe.get_script_extensions_ranges(Script::Syriac); | ||
| /// | ||
| /// let exp_ranges = [ | ||
| /// 0x0303..=0x0304, // COMBINING TILDE..COMBINING MACRON | ||
| /// 0x0307..=0x0308, // COMBINING DOT ABOVE..COMBINING DIAERESIS | ||
| /// 0x030A..=0x030A, // COMBINING RING ABOVE | ||
| /// 0x0323..=0x0325, // COMBINING DOT BELOW..COMBINING RING BELOW | ||
| /// 0x032D..=0x032E, // COMBINING CIRCUMFLEX ACCENT BELOW..COMBINING BREVE BELOW | ||
| /// 0x0330..=0x0331, // COMBINING TILDE BELOW..COMBINING MACRON BELOW | ||
| /// 0x060C..=0x060C, // ARABIC COMMA | ||
| /// 0x061B..=0x061C, // ARABIC SEMICOLON, ARABIC LETTER MARK | ||
| /// 0x061F..=0x061F, // ARABIC QUESTION MARK | ||
| /// 0x0640..=0x0640, // ARABIC TATWEEL | ||
| /// 0x064B..=0x0655, // ARABIC FATHATAN..ARABIC HAMZA BELOW | ||
| /// 0x0670..=0x0670, // ARABIC LETTER SUPERSCRIPT ALEF | ||
| /// 0x0700..=0x070D, // Syriac block begins at U+0700 | ||
| /// 0x070F..=0x074A, // Syriac block | ||
| /// 0x074D..=0x074F, // Syriac block ends at U+074F | ||
| /// 0x0860..=0x086A, // Syriac Supplement block is U+0860..=U+086F | ||
| /// 0x1DF8..=0x1DF8, // COMBINING DOT ABOVE LEFT | ||
| /// 0x1DFA..=0x1DFA, // COMBINING DOT BELOW LEFT | ||
| /// ]; | ||
| /// | ||
| /// assert_eq!( | ||
| /// syriac_script_extensions_ranges.collect::<Vec<_>>(), | ||
| /// exp_ranges | ||
| /// ); | ||
| /// ``` | ||
| pub fn get_script_extensions_ranges( | ||
| self, | ||
| script: Script, | ||
| ) -> impl Iterator<Item = RangeInclusive<u32>> + 'a { | ||
| self.data | ||
| .trie | ||
| .iter_ranges_mapped(move |value| { | ||
| let sc_with_ext = ScriptWithExt(value.0); | ||
| if sc_with_ext.has_extensions() { | ||
| self.get_scx_val_using_trie_val(&sc_with_ext.to_unaligned()) | ||
| .iter() | ||
| .any(|sc| sc == script) | ||
| } else { | ||
| script == sc_with_ext.into() | ||
| } | ||
| }) | ||
| .filter(|v| v.value) | ||
| .map(|v| v.range) | ||
| } | ||
| /// Returns a [`CodePointInversionList`] for the given [`Script`] which represents all | ||
| /// code points for which `has_script` will return true. | ||
| /// | ||
| /// ✨ *Enabled with the `alloc` Cargo feature.* | ||
| /// | ||
| /// # Examples | ||
| /// | ||
| /// ``` | ||
| /// use icu::properties::script::ScriptWithExtensions; | ||
| /// use icu::properties::props::Script; | ||
| /// | ||
| /// let swe = ScriptWithExtensions::new(); | ||
| /// | ||
| /// let syriac = swe.get_script_extensions_set(Script::Syriac); | ||
| /// | ||
| /// assert!(!syriac.contains('؞')); // ARABIC TRIPLE DOT PUNCTUATION MARK | ||
| /// assert!(syriac.contains('؟')); // ARABIC QUESTION MARK | ||
| /// assert!(!syriac.contains('ؠ')); // ARABIC LETTER KASHMIRI YEH | ||
| /// | ||
| /// assert!(syriac.contains('܀')); // SYRIAC END OF PARAGRAPH | ||
| /// assert!(syriac.contains('\u{074A}')); // SYRIAC BARREKH | ||
| /// assert!(!syriac.contains('\u{074B}')); // unassigned | ||
| /// assert!(syriac.contains('ݏ')); // SYRIAC LETTER SOGDIAN FE | ||
| /// assert!(!syriac.contains('ݐ')); // ARABIC LETTER BEH WITH THREE DOTS HORIZONTALLY BELOW | ||
| /// | ||
| /// assert!(syriac.contains('\u{1DF8}')); // COMBINING DOT ABOVE LEFT | ||
| /// assert!(!syriac.contains('\u{1DF9}')); // COMBINING WIDE INVERTED BRIDGE BELOW | ||
| /// assert!(syriac.contains('\u{1DFA}')); // COMBINING DOT BELOW LEFT | ||
| /// assert!(!syriac.contains('\u{1DFB}')); // COMBINING DELETION MARK | ||
| /// ``` | ||
| #[cfg(feature = "alloc")] | ||
| pub fn get_script_extensions_set(self, script: Script) -> CodePointInversionList<'a> { | ||
| CodePointInversionList::from_iter(self.get_script_extensions_ranges(script)) | ||
| } | ||
| } | ||
| #[cfg(feature = "compiled_data")] | ||
| impl Default for ScriptWithExtensionsBorrowed<'static> { | ||
| fn default() -> Self { | ||
| Self::new() | ||
| } | ||
| } | ||
| impl ScriptWithExtensionsBorrowed<'static> { | ||
| /// Creates a new instance of `ScriptWithExtensionsBorrowed` using compiled data. | ||
| /// | ||
| /// ✨ *Enabled with the `compiled_data` Cargo feature.* | ||
| /// | ||
| /// [📚 Help choosing a constructor](icu_provider::constructors) | ||
| #[cfg(feature = "compiled_data")] | ||
| pub fn new() -> Self { | ||
| Self { | ||
| data: crate::provider::Baked::SINGLETON_PROPERTY_SCRIPT_WITH_EXTENSIONS_V1, | ||
| } | ||
| } | ||
| /// Cheaply converts a [`ScriptWithExtensionsBorrowed<'static>`] into a [`ScriptWithExtensions`]. | ||
| /// | ||
| /// Note: Due to branching and indirection, using [`ScriptWithExtensions`] might inhibit some | ||
| /// compile-time optimizations that are possible with [`ScriptWithExtensionsBorrowed`]. | ||
| pub const fn static_to_owned(self) -> ScriptWithExtensions { | ||
| ScriptWithExtensions { | ||
| data: DataPayload::from_static_ref(self.data), | ||
| } | ||
| } | ||
| } | ||
| #[cfg(test)] | ||
| mod tests { | ||
| use super::*; | ||
| #[test] | ||
| /// Regression test for https://github.com/unicode-org/icu4x/issues/6041 | ||
| fn test_scx_regression_6041() { | ||
| let scripts = ScriptWithExtensions::new() | ||
| .get_script_extensions_val('\u{2bc}') | ||
| .iter() | ||
| .collect::<Vec<_>>(); | ||
| assert_eq!( | ||
| scripts, | ||
| [ | ||
| Script::Bengali, | ||
| Script::Cyrillic, | ||
| Script::Devanagari, | ||
| Script::Latin, | ||
| Script::Thai, | ||
| Script::Lisu, | ||
| Script::Toto | ||
| ] | ||
| ); | ||
| } | ||
| } |
| // This file is part of ICU4X. For terms of use, please see the file | ||
| // called LICENSE at the top level of the ICU4X source tree | ||
| // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). | ||
| use crate::bidi::BidiMirroringGlyph; | ||
| use crate::props::{ | ||
| BidiClass, CanonicalCombiningClass, EastAsianWidth, GeneralCategory, GeneralCategoryGroup, | ||
| GraphemeClusterBreak, HangulSyllableType, IndicConjunctBreak, IndicSyllabicCategory, | ||
| JoiningType, LineBreak, Script, SentenceBreak, VerticalOrientation, WordBreak, | ||
| }; | ||
| use crate::script::ScriptWithExt; | ||
| use core::convert::TryInto; | ||
| use core::num::TryFromIntError; | ||
| use zerovec::ule::{AsULE, RawBytesULE}; | ||
| use icu_collections::codepointtrie::TrieValue; | ||
| use core::convert::TryFrom; | ||
| impl TrieValue for CanonicalCombiningClass { | ||
| type TryFromU32Error = TryFromIntError; | ||
| fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { | ||
| u8::try_from(i).map(Self) | ||
| } | ||
| fn to_u32(self) -> u32 { | ||
| u32::from(self.0) | ||
| } | ||
| } | ||
| impl TrieValue for BidiClass { | ||
| type TryFromU32Error = TryFromIntError; | ||
| fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { | ||
| u8::try_from(i).map(Self) | ||
| } | ||
| fn to_u32(self) -> u32 { | ||
| u32::from(self.0) | ||
| } | ||
| } | ||
| impl TrieValue for GeneralCategory { | ||
| type TryFromU32Error = &'static str; | ||
| fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { | ||
| // If the u32 is out of range, fall back to u8::MAX, which is out of range of the GeneralCategory enum. | ||
| GeneralCategory::new_from_u8(i.try_into().unwrap_or(u8::MAX)) | ||
| .ok_or("Cannot parse GeneralCategory from integer") | ||
| } | ||
| fn to_u32(self) -> u32 { | ||
| u32::from(self as u8) | ||
| } | ||
| } | ||
| impl TrieValue for Script { | ||
| type TryFromU32Error = TryFromIntError; | ||
| fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { | ||
| u16::try_from(i).map(Script) | ||
| } | ||
| fn to_u32(self) -> u32 { | ||
| u32::from(self.0) | ||
| } | ||
| } | ||
| impl TrieValue for HangulSyllableType { | ||
| type TryFromU32Error = TryFromIntError; | ||
| fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { | ||
| u8::try_from(i).map(Self) | ||
| } | ||
| fn to_u32(self) -> u32 { | ||
| u32::from(self.0) | ||
| } | ||
| } | ||
| impl TrieValue for ScriptWithExt { | ||
| type TryFromU32Error = TryFromIntError; | ||
| fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { | ||
| u16::try_from(i).map(Self) | ||
| } | ||
| fn to_u32(self) -> u32 { | ||
| u32::from(self.0) | ||
| } | ||
| } | ||
| impl TrieValue for EastAsianWidth { | ||
| type TryFromU32Error = TryFromIntError; | ||
| fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { | ||
| u8::try_from(i).map(Self) | ||
| } | ||
| fn to_u32(self) -> u32 { | ||
| u32::from(self.0) | ||
| } | ||
| } | ||
| impl TrieValue for LineBreak { | ||
| type TryFromU32Error = TryFromIntError; | ||
| fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { | ||
| u8::try_from(i).map(Self) | ||
| } | ||
| fn to_u32(self) -> u32 { | ||
| u32::from(self.0) | ||
| } | ||
| } | ||
| impl TrieValue for GraphemeClusterBreak { | ||
| type TryFromU32Error = TryFromIntError; | ||
| fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { | ||
| u8::try_from(i).map(Self) | ||
| } | ||
| fn to_u32(self) -> u32 { | ||
| u32::from(self.0) | ||
| } | ||
| } | ||
| impl TrieValue for WordBreak { | ||
| type TryFromU32Error = TryFromIntError; | ||
| fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { | ||
| u8::try_from(i).map(Self) | ||
| } | ||
| fn to_u32(self) -> u32 { | ||
| u32::from(self.0) | ||
| } | ||
| } | ||
| impl TrieValue for SentenceBreak { | ||
| type TryFromU32Error = TryFromIntError; | ||
| fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { | ||
| u8::try_from(i).map(Self) | ||
| } | ||
| fn to_u32(self) -> u32 { | ||
| u32::from(self.0) | ||
| } | ||
| } | ||
| impl TrieValue for IndicConjunctBreak { | ||
| type TryFromU32Error = TryFromIntError; | ||
| fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { | ||
| u8::try_from(i).map(Self) | ||
| } | ||
| fn to_u32(self) -> u32 { | ||
| u32::from(self.0) | ||
| } | ||
| } | ||
| impl TrieValue for IndicSyllabicCategory { | ||
| type TryFromU32Error = TryFromIntError; | ||
| fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { | ||
| u8::try_from(i).map(Self) | ||
| } | ||
| fn to_u32(self) -> u32 { | ||
| u32::from(self.0) | ||
| } | ||
| } | ||
| impl TrieValue for VerticalOrientation { | ||
| type TryFromU32Error = TryFromIntError; | ||
| fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { | ||
| u8::try_from(i).map(Self) | ||
| } | ||
| fn to_u32(self) -> u32 { | ||
| u32::from(self.0) | ||
| } | ||
| } | ||
| // GCG is not used inside tries, but it is used in the name lookup type, and we want | ||
| // to squeeze it into a u16 for storage. Its named mask values are specced so we can | ||
| // do this in code. | ||
| // | ||
| // This is done by: | ||
| // - Single-value masks are translated to their corresponding GeneralCategory values | ||
| // - we know all of the multi-value masks and we give them special values | ||
| // - Anything else goes to 0xFF00, though this code path shouldn't be hit unless working with malformed icuexportdata | ||
| // | ||
| // In the reverse direction, unknown values go to the empty mask, but this codepath should not be hit except | ||
| // with malformed ICU4X generated data. | ||
| impl AsULE for GeneralCategoryGroup { | ||
| type ULE = RawBytesULE<2>; | ||
| fn to_unaligned(self) -> Self::ULE { | ||
| let value = gcg_to_packed_u16(self); | ||
| value.to_unaligned() | ||
| } | ||
| fn from_unaligned(ule: Self::ULE) -> Self { | ||
| let value = ule.as_unsigned_int(); | ||
| packed_u16_to_gcg(value) | ||
| } | ||
| } | ||
| fn packed_u16_to_gcg(value: u16) -> GeneralCategoryGroup { | ||
| match value { | ||
| 0xFFFF => GeneralCategoryGroup::CasedLetter, | ||
| 0xFFFE => GeneralCategoryGroup::Letter, | ||
| 0xFFFD => GeneralCategoryGroup::Mark, | ||
| 0xFFFC => GeneralCategoryGroup::Number, | ||
| 0xFFFB => GeneralCategoryGroup::Separator, | ||
| 0xFFFA => GeneralCategoryGroup::Other, | ||
| 0xFFF9 => GeneralCategoryGroup::Punctuation, | ||
| 0xFFF8 => GeneralCategoryGroup::Symbol, | ||
| v if v < 32 => GeneralCategory::new_from_u8(v as u8) | ||
| .map(|gc| gc.into()) | ||
| .unwrap_or(GeneralCategoryGroup(0)), | ||
| // unknown values produce an empty mask | ||
| _ => GeneralCategoryGroup(0), | ||
| } | ||
| } | ||
| fn gcg_to_packed_u16(gcg: GeneralCategoryGroup) -> u16 { | ||
| // if it's a single property, translate to that property | ||
| if gcg.0.is_power_of_two() { | ||
| // inverse operation of a bitshift | ||
| gcg.0.trailing_zeros() as u16 | ||
| } else { | ||
| match gcg { | ||
| GeneralCategoryGroup::CasedLetter => 0xFFFF, | ||
| GeneralCategoryGroup::Letter => 0xFFFE, | ||
| GeneralCategoryGroup::Mark => 0xFFFD, | ||
| GeneralCategoryGroup::Number => 0xFFFC, | ||
| GeneralCategoryGroup::Separator => 0xFFFB, | ||
| GeneralCategoryGroup::Other => 0xFFFA, | ||
| GeneralCategoryGroup::Punctuation => 0xFFF9, | ||
| GeneralCategoryGroup::Symbol => 0xFFF8, | ||
| _ => 0xFF00, // random sentinel value | ||
| } | ||
| } | ||
| } | ||
| impl TrieValue for GeneralCategoryGroup { | ||
| type TryFromU32Error = TryFromIntError; | ||
| fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { | ||
| // Even though we're dealing with u32s here, TrieValue is about converting | ||
| // trie storage types to the actual type. This type will always be a packed u16 | ||
| // in our case since the names map upcasts from u16 | ||
| u16::try_from(i).map(packed_u16_to_gcg) | ||
| } | ||
| fn to_u32(self) -> u32 { | ||
| u32::from(gcg_to_packed_u16(self)) | ||
| } | ||
| } | ||
| impl TrieValue for BidiMirroringGlyph { | ||
| type TryFromU32Error = u32; | ||
| fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { | ||
| let code_point = i & 0x1FFFFF; | ||
| let mirroring_glyph = if code_point == 0 { | ||
| None | ||
| } else { | ||
| Some(char::try_from_u32(code_point).map_err(|_| i)?) | ||
| }; | ||
| let mirrored = ((i >> 21) & 0x1) == 1; | ||
| let paired_bracket_type = { | ||
| let value = ((i >> 22) & 0x3) as u8; | ||
| match value { | ||
| 0 => crate::bidi::BidiPairedBracketType::None, | ||
| 1 => crate::bidi::BidiPairedBracketType::Open, | ||
| 2 => crate::bidi::BidiPairedBracketType::Close, | ||
| _ => return Err(i), | ||
| } | ||
| }; | ||
| Ok(Self { | ||
| mirrored, | ||
| mirroring_glyph, | ||
| paired_bracket_type, | ||
| }) | ||
| } | ||
| fn to_u32(self) -> u32 { | ||
| self.mirroring_glyph.unwrap_or_default() as u32 | ||
| | ((self.mirrored as u32) << 21) | ||
| | (match self.paired_bracket_type { | ||
| crate::bidi::BidiPairedBracketType::None => 0, | ||
| crate::bidi::BidiPairedBracketType::Open => 1, | ||
| crate::bidi::BidiPairedBracketType::Close => 2, | ||
| } << 22) | ||
| } | ||
| } | ||
| impl TrieValue for JoiningType { | ||
| type TryFromU32Error = TryFromIntError; | ||
| fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { | ||
| u8::try_from(i).map(Self) | ||
| } | ||
| fn to_u32(self) -> u32 { | ||
| u32::from(self.0) | ||
| } | ||
| } |
| { | ||
| "git": { | ||
| "sha1": "29dfe2790b6cfdab94ca6a6b69f58ce54802dbf7" | ||
| }, | ||
| "path_in_vcs": "components/properties" | ||
| } |
| # This file is automatically @generated by Cargo. | ||
| # It is not intended for manual editing. | ||
| version = 3 | ||
| [[package]] | ||
| name = "cobs" | ||
| version = "0.3.0" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "0fa961b519f0b462e3a3b4a34b64d119eeaca1d59af726fe450bbba07a9fc0a1" | ||
| dependencies = [ | ||
| "thiserror", | ||
| ] | ||
| [[package]] | ||
| name = "databake" | ||
| version = "0.2.0" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "ff6ee9e2d2afb173bcdeee45934c89ec341ab26f91c9933774fc15c2b58f83ef" | ||
| dependencies = [ | ||
| "databake-derive", | ||
| "proc-macro2", | ||
| "quote", | ||
| ] | ||
| [[package]] | ||
| name = "databake-derive" | ||
| version = "0.2.0" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "6834770958c7b84223607e49758ec0dde273c4df915e734aad50f62968a4c134" | ||
| dependencies = [ | ||
| "proc-macro2", | ||
| "quote", | ||
| "syn", | ||
| "synstructure", | ||
| ] | ||
| [[package]] | ||
| name = "displaydoc" | ||
| version = "0.2.5" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" | ||
| dependencies = [ | ||
| "proc-macro2", | ||
| "quote", | ||
| "syn", | ||
| ] | ||
| [[package]] | ||
| name = "erased-serde" | ||
| version = "0.4.8" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "259d404d09818dec19332e31d94558aeb442fea04c817006456c24b5460bbd4b" | ||
| dependencies = [ | ||
| "serde", | ||
| "serde_core", | ||
| "typeid", | ||
| ] | ||
| [[package]] | ||
| name = "icu_collections" | ||
| version = "2.1.0" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "f578a71f2bfaf7ceb30b519a645ae48024b45f9eecbe060a31a004d7b4ba9462" | ||
| dependencies = [ | ||
| "databake", | ||
| "displaydoc", | ||
| "potential_utf", | ||
| "serde", | ||
| "yoke", | ||
| "zerofrom", | ||
| "zerovec", | ||
| ] | ||
| [[package]] | ||
| name = "icu_locale_core" | ||
| version = "2.1.0" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "4c219b62bf5a06801012446193fdfcbd7970e876823aba4c62def2ce957dcb44" | ||
| dependencies = [ | ||
| "databake", | ||
| "displaydoc", | ||
| "litemap", | ||
| "serde", | ||
| "tinystr", | ||
| "writeable", | ||
| "zerovec", | ||
| ] | ||
| [[package]] | ||
| name = "icu_properties" | ||
| version = "2.1.0" | ||
| dependencies = [ | ||
| "databake", | ||
| "icu_collections", | ||
| "icu_locale_core", | ||
| "icu_properties_data", | ||
| "icu_provider", | ||
| "serde", | ||
| "unicode-bidi", | ||
| "zerotrie", | ||
| "zerovec", | ||
| ] | ||
| [[package]] | ||
| name = "icu_properties_data" | ||
| version = "2.1.0" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "17fa55bf868e28e638ed132bcee1e5c21ba2c1e52c15e7c78b781858e7b54342" | ||
| [[package]] | ||
| name = "icu_provider" | ||
| version = "2.1.0" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "f64958e359123591ae1f17a27b5fc9ebdb50c98b04e0401146154de1d8fe3e44" | ||
| dependencies = [ | ||
| "databake", | ||
| "displaydoc", | ||
| "erased-serde", | ||
| "icu_locale_core", | ||
| "postcard", | ||
| "serde", | ||
| "stable_deref_trait", | ||
| "writeable", | ||
| "yoke", | ||
| "zerofrom", | ||
| "zerotrie", | ||
| "zerovec", | ||
| ] | ||
| [[package]] | ||
| name = "litemap" | ||
| version = "0.8.1" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "6373607a59f0be73a39b6fe456b8192fcc3585f602af20751600e974dd455e77" | ||
| dependencies = [ | ||
| "serde_core", | ||
| ] | ||
| [[package]] | ||
| name = "postcard" | ||
| version = "1.1.3" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "6764c3b5dd454e283a30e6dfe78e9b31096d9e32036b5d1eaac7a6119ccb9a24" | ||
| dependencies = [ | ||
| "cobs", | ||
| "serde", | ||
| ] | ||
| [[package]] | ||
| name = "potential_utf" | ||
| version = "0.1.4" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "b73949432f5e2a09657003c25bca5e19a0e9c84f8058ca374f49e0ebe605af77" | ||
| dependencies = [ | ||
| "serde_core", | ||
| "zerovec", | ||
| ] | ||
| [[package]] | ||
| name = "proc-macro2" | ||
| version = "1.0.103" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8" | ||
| dependencies = [ | ||
| "unicode-ident", | ||
| ] | ||
| [[package]] | ||
| name = "quote" | ||
| version = "1.0.41" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "ce25767e7b499d1b604768e7cde645d14cc8584231ea6b295e9c9eb22c02e1d1" | ||
| dependencies = [ | ||
| "proc-macro2", | ||
| ] | ||
| [[package]] | ||
| name = "serde" | ||
| version = "1.0.228" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" | ||
| dependencies = [ | ||
| "serde_core", | ||
| "serde_derive", | ||
| ] | ||
| [[package]] | ||
| name = "serde_core" | ||
| version = "1.0.228" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" | ||
| dependencies = [ | ||
| "serde_derive", | ||
| ] | ||
| [[package]] | ||
| name = "serde_derive" | ||
| version = "1.0.228" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" | ||
| dependencies = [ | ||
| "proc-macro2", | ||
| "quote", | ||
| "syn", | ||
| ] | ||
| [[package]] | ||
| name = "stable_deref_trait" | ||
| version = "1.2.1" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" | ||
| [[package]] | ||
| name = "syn" | ||
| version = "2.0.108" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "da58917d35242480a05c2897064da0a80589a2a0476c9a3f2fdc83b53502e917" | ||
| dependencies = [ | ||
| "proc-macro2", | ||
| "quote", | ||
| "unicode-ident", | ||
| ] | ||
| [[package]] | ||
| name = "synstructure" | ||
| version = "0.13.2" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" | ||
| dependencies = [ | ||
| "proc-macro2", | ||
| "quote", | ||
| "syn", | ||
| ] | ||
| [[package]] | ||
| name = "thiserror" | ||
| version = "2.0.17" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "f63587ca0f12b72a0600bcba1d40081f830876000bb46dd2337a3051618f4fc8" | ||
| dependencies = [ | ||
| "thiserror-impl", | ||
| ] | ||
| [[package]] | ||
| name = "thiserror-impl" | ||
| version = "2.0.17" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913" | ||
| dependencies = [ | ||
| "proc-macro2", | ||
| "quote", | ||
| "syn", | ||
| ] | ||
| [[package]] | ||
| name = "tinystr" | ||
| version = "0.8.2" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "42d3e9c45c09de15d06dd8acf5f4e0e399e85927b7f00711024eb7ae10fa4869" | ||
| dependencies = [ | ||
| "displaydoc", | ||
| "serde_core", | ||
| "zerovec", | ||
| ] | ||
| [[package]] | ||
| name = "typeid" | ||
| version = "1.0.3" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "bc7d623258602320d5c55d1bc22793b57daff0ec7efc270ea7d55ce1d5f5471c" | ||
| [[package]] | ||
| name = "unicode-bidi" | ||
| version = "0.3.18" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "5c1cb5db39152898a79168971543b1cb5020dff7fe43c8dc468b0885f5e29df5" | ||
| [[package]] | ||
| name = "unicode-ident" | ||
| version = "1.0.20" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "462eeb75aeb73aea900253ce739c8e18a67423fadf006037cd3ff27e82748a06" | ||
| [[package]] | ||
| name = "writeable" | ||
| version = "0.6.2" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9" | ||
| [[package]] | ||
| name = "yoke" | ||
| version = "0.8.1" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "72d6e5c6afb84d73944e5cedb052c4680d5657337201555f9f2a16b7406d4954" | ||
| dependencies = [ | ||
| "stable_deref_trait", | ||
| "yoke-derive", | ||
| "zerofrom", | ||
| ] | ||
| [[package]] | ||
| name = "yoke-derive" | ||
| version = "0.8.1" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d" | ||
| dependencies = [ | ||
| "proc-macro2", | ||
| "quote", | ||
| "syn", | ||
| "synstructure", | ||
| ] | ||
| [[package]] | ||
| name = "zerofrom" | ||
| version = "0.1.6" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "50cc42e0333e05660c3587f3bf9d0478688e15d870fab3346451ce7f8c9fbea5" | ||
| dependencies = [ | ||
| "zerofrom-derive", | ||
| ] | ||
| [[package]] | ||
| name = "zerofrom-derive" | ||
| version = "0.1.6" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" | ||
| dependencies = [ | ||
| "proc-macro2", | ||
| "quote", | ||
| "syn", | ||
| "synstructure", | ||
| ] | ||
| [[package]] | ||
| name = "zerotrie" | ||
| version = "0.2.3" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "2a59c17a5562d507e4b54960e8569ebee33bee890c70aa3fe7b97e85a9fd7851" | ||
| dependencies = [ | ||
| "databake", | ||
| "displaydoc", | ||
| "litemap", | ||
| "serde_core", | ||
| "yoke", | ||
| "zerofrom", | ||
| "zerovec", | ||
| ] | ||
| [[package]] | ||
| name = "zerovec" | ||
| version = "0.11.5" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "6c28719294829477f525be0186d13efa9a3c602f7ec202ca9e353d310fb9a002" | ||
| dependencies = [ | ||
| "databake", | ||
| "serde", | ||
| "yoke", | ||
| "zerofrom", | ||
| "zerovec-derive", | ||
| ] | ||
| [[package]] | ||
| name = "zerovec-derive" | ||
| version = "0.11.2" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3" | ||
| dependencies = [ | ||
| "proc-macro2", | ||
| "quote", | ||
| "syn", | ||
| ] |
| # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO | ||
| # | ||
| # When uploading crates to the registry Cargo will automatically | ||
| # "normalize" Cargo.toml files for maximal compatibility | ||
| # with all versions of Cargo and also rewrite `path` dependencies | ||
| # to registry (e.g., crates.io) dependencies. | ||
| # | ||
| # If you are reading this file be aware that the original Cargo.toml | ||
| # will likely look very different (and much more reasonable). | ||
| # See Cargo.toml.orig for the original contents. | ||
| [package] | ||
| edition = "2021" | ||
| rust-version = "1.83" | ||
| name = "icu_properties" | ||
| version = "2.1.0" | ||
| authors = ["The ICU4X Project Developers"] | ||
| build = false | ||
| include = [ | ||
| "data/**/*", | ||
| "src/**/*", | ||
| "examples/**/*", | ||
| "benches/**/*", | ||
| "tests/**/*", | ||
| "Cargo.toml", | ||
| "LICENSE", | ||
| "README.md", | ||
| "build.rs", | ||
| ] | ||
| autolib = false | ||
| autobins = false | ||
| autoexamples = false | ||
| autotests = false | ||
| autobenches = false | ||
| description = "Definitions for Unicode properties" | ||
| homepage = "https://icu4x.unicode.org" | ||
| readme = "README.md" | ||
| categories = ["internationalization"] | ||
| license = "Unicode-3.0" | ||
| repository = "https://github.com/unicode-org/icu4x" | ||
| [package.metadata.docs.rs] | ||
| all-features = true | ||
| [features] | ||
| alloc = [ | ||
| "zerovec/alloc", | ||
| "icu_collections/alloc", | ||
| "serde?/alloc", | ||
| ] | ||
| compiled_data = [ | ||
| "dep:icu_properties_data", | ||
| "icu_provider/baked", | ||
| ] | ||
| datagen = [ | ||
| "serde", | ||
| "dep:databake", | ||
| "zerovec/databake", | ||
| "icu_collections/databake", | ||
| "icu_locale_core/databake", | ||
| "zerotrie/databake", | ||
| "icu_provider/export", | ||
| ] | ||
| default = ["compiled_data"] | ||
| serde = [ | ||
| "dep:serde", | ||
| "icu_locale_core/serde", | ||
| "zerovec/serde", | ||
| "icu_collections/serde", | ||
| "icu_provider/serde", | ||
| "zerotrie/serde", | ||
| ] | ||
| unicode_bidi = ["dep:unicode-bidi"] | ||
| [lib] | ||
| name = "icu_properties" | ||
| path = "src/lib.rs" | ||
| [dependencies.databake] | ||
| version = "0.2.0" | ||
| features = ["derive"] | ||
| optional = true | ||
| default-features = false | ||
| [dependencies.icu_collections] | ||
| version = "~2.1.0" | ||
| default-features = false | ||
| [dependencies.icu_locale_core] | ||
| version = "2.0.0" | ||
| features = ["zerovec"] | ||
| default-features = false | ||
| [dependencies.icu_properties_data] | ||
| version = "~2.1.0" | ||
| optional = true | ||
| default-features = false | ||
| [dependencies.icu_provider] | ||
| version = "2.0.0" | ||
| default-features = false | ||
| [dependencies.serde] | ||
| version = "1.0.220" | ||
| features = ["derive"] | ||
| optional = true | ||
| default-features = false | ||
| [dependencies.unicode-bidi] | ||
| version = "0.3.11" | ||
| optional = true | ||
| default-features = false | ||
| [dependencies.zerotrie] | ||
| version = "0.2.0" | ||
| features = [ | ||
| "yoke", | ||
| "zerofrom", | ||
| ] | ||
| default-features = false | ||
| [dependencies.zerovec] | ||
| version = "0.11.3" | ||
| features = [ | ||
| "derive", | ||
| "yoke", | ||
| ] | ||
| default-features = false | ||
| [dev-dependencies] |
Sorry, the diff of this file is not supported yet
| UNICODE LICENSE V3 | ||
| COPYRIGHT AND PERMISSION NOTICE | ||
| Copyright © 2020-2024 Unicode, Inc. | ||
| NOTICE TO USER: Carefully read the following legal agreement. BY | ||
| DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING DATA FILES, AND/OR | ||
| SOFTWARE, YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE | ||
| TERMS AND CONDITIONS OF THIS AGREEMENT. IF YOU DO NOT AGREE, DO NOT | ||
| DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE THE DATA FILES OR SOFTWARE. | ||
| Permission is hereby granted, free of charge, to any person obtaining a | ||
| copy of data files and any associated documentation (the "Data Files") or | ||
| software and any associated documentation (the "Software") to deal in the | ||
| Data Files or Software without restriction, including without limitation | ||
| the rights to use, copy, modify, merge, publish, distribute, and/or sell | ||
| copies of the Data Files or Software, and to permit persons to whom the | ||
| Data Files or Software are furnished to do so, provided that either (a) | ||
| this copyright and permission notice appear with all copies of the Data | ||
| Files or Software, or (b) this copyright and permission notice appear in | ||
| associated Documentation. | ||
| THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY | ||
| KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
| MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF | ||
| THIRD PARTY RIGHTS. | ||
| IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE | ||
| BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, | ||
| OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, | ||
| WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, | ||
| ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THE DATA | ||
| FILES OR SOFTWARE. | ||
| Except as contained in this notice, the name of a copyright holder shall | ||
| not be used in advertising or otherwise to promote the sale, use or other | ||
| dealings in these Data Files or Software without prior written | ||
| authorization of the copyright holder. | ||
| SPDX-License-Identifier: Unicode-3.0 | ||
| — | ||
| Portions of ICU4X may have been adapted from ICU4C and/or ICU4J. | ||
| ICU 1.8.1 to ICU 57.1 © 1995-2016 International Business Machines Corporation and others. |
| # icu_properties [](https://crates.io/crates/icu_properties) | ||
| <!-- cargo-rdme start --> | ||
| Definitions of [Unicode Properties] and APIs for | ||
| retrieving property data in an appropriate data structure. | ||
| This module is published as its own crate ([`icu_properties`](https://docs.rs/icu_properties/latest/icu_properties/)) | ||
| and as part of the [`icu`](https://docs.rs/icu/latest/icu/) crate. See the latter for more details on the ICU4X project. | ||
| APIs that return a `CodePointSetData` exist for binary properties and certain enumerated | ||
| properties. | ||
| APIs that return a `CodePointMapData` exist for certain enumerated properties. | ||
| ## Examples | ||
| ### Property data as `CodePointSetData`s | ||
| ```rust | ||
| use icu::properties::{CodePointSetData, CodePointMapData}; | ||
| use icu::properties::props::{GeneralCategory, Emoji}; | ||
| // A binary property as a `CodePointSetData` | ||
| assert!(CodePointSetData::new::<Emoji>().contains('🎃')); // U+1F383 JACK-O-LANTERN | ||
| assert!(!CodePointSetData::new::<Emoji>().contains('木')); // U+6728 | ||
| // An individual enumerated property value as a `CodePointSetData` | ||
| let line_sep_data = CodePointMapData::<GeneralCategory>::new() | ||
| .get_set_for_value(GeneralCategory::LineSeparator); | ||
| let line_sep = line_sep_data.as_borrowed(); | ||
| assert!(line_sep.contains('\u{2028}')); | ||
| assert!(!line_sep.contains('\u{2029}')); | ||
| ``` | ||
| ### Property data as `CodePointMapData`s | ||
| ```rust | ||
| use icu::properties::CodePointMapData; | ||
| use icu::properties::props::Script; | ||
| assert_eq!(CodePointMapData::<Script>::new().get('🎃'), Script::Common); // U+1F383 JACK-O-LANTERN | ||
| assert_eq!(CodePointMapData::<Script>::new().get('木'), Script::Han); // U+6728 | ||
| ``` | ||
| [`ICU4X`]: ../icu/index.html | ||
| [Unicode Properties]: https://unicode-org.github.io/icu/userguide/strings/properties.html | ||
| <!-- cargo-rdme end --> | ||
| ## More Information | ||
| For more information on development, authorship, contributing etc. please visit [`ICU4X home page`](https://github.com/unicode-org/icu4x). |
| // This file is part of ICU4X. For terms of use, please see the file | ||
| // called LICENSE at the top level of the ICU4X source tree | ||
| // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). | ||
| use crate::{props::EnumeratedProperty, provider::PropertyEnumBidiMirroringGlyphV1}; | ||
| use icu_collections::codepointtrie::TrieValue; | ||
| use zerovec::ule::{AsULE, RawBytesULE}; | ||
| /// This is a bitpacked combination of the `Bidi_Mirroring_Glyph`, | ||
| /// `Bidi_Mirrored`, and `Bidi_Paired_Bracket_Type` properties. | ||
| #[derive(Debug, Eq, PartialEq, Clone, Copy, Default)] | ||
| #[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))] | ||
| #[cfg_attr(feature = "datagen", databake(path = icu_properties::props))] | ||
| #[cfg_attr(feature = "serde", derive(serde::Deserialize))] | ||
| #[allow(clippy::exhaustive_structs)] // needed for baked construction | ||
| pub struct BidiMirroringGlyph { | ||
| /// The mirroring glyph | ||
| pub mirroring_glyph: Option<char>, | ||
| /// Whether the glyph is mirrored | ||
| pub mirrored: bool, | ||
| /// The paired bracket type | ||
| pub paired_bracket_type: BidiPairedBracketType, | ||
| } | ||
| impl EnumeratedProperty for BidiMirroringGlyph { | ||
| type DataMarker = PropertyEnumBidiMirroringGlyphV1; | ||
| #[cfg(feature = "compiled_data")] | ||
| const SINGLETON: &'static crate::provider::PropertyCodePointMap<'static, Self> = | ||
| crate::provider::Baked::SINGLETON_PROPERTY_ENUM_BIDI_MIRRORING_GLYPH_V1; | ||
| const NAME: &'static [u8] = b"Bidi_Mirroring_Glyph"; | ||
| const SHORT_NAME: &'static [u8] = b"Bidi_Mirroring_Glyph"; | ||
| } | ||
| impl crate::private::Sealed for BidiMirroringGlyph {} | ||
| impl AsULE for BidiMirroringGlyph { | ||
| type ULE = zerovec::ule::RawBytesULE<3>; | ||
| fn to_unaligned(self) -> Self::ULE { | ||
| let [a, b, c, _] = TrieValue::to_u32(self).to_le_bytes(); | ||
| RawBytesULE([a, b, c]) | ||
| } | ||
| fn from_unaligned(unaligned: Self::ULE) -> Self { | ||
| let [a, b, c] = unaligned.0; | ||
| TrieValue::try_from_u32(u32::from_le_bytes([a, b, c, 0])).unwrap_or_default() | ||
| } | ||
| } | ||
| /// The enum represents Bidi_Paired_Bracket_Type. | ||
| /// | ||
| /// It does not implement [`EnumeratedProperty`], instead it can be obtained | ||
| /// through the bitpacked [`BidiMirroringGlyph`] property. | ||
| /// | ||
| /// If you have a use case this property without also needing the [`BidiMirroringGlyph`] | ||
| /// property, and need to optimize data size, please file an issue. | ||
| #[derive(Debug, Eq, PartialEq, Copy, Clone, Default)] | ||
| #[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))] | ||
| #[cfg_attr(feature = "datagen", databake(path = icu_properties::props))] | ||
| #[cfg_attr(feature = "serde", derive(serde::Deserialize))] | ||
| #[non_exhaustive] | ||
| pub enum BidiPairedBracketType { | ||
| /// Represents Bidi_Paired_Bracket_Type=Open. | ||
| Open, | ||
| /// Represents Bidi_Paired_Bracket_Type=Close. | ||
| Close, | ||
| /// Represents Bidi_Paired_Bracket_Type=None. | ||
| #[default] | ||
| None, | ||
| } | ||
| /// Implements [`unicode_bidi::BidiDataSource`] on [`CodePointMapDataBorrowed<BidiClass>`](crate::CodePointMapDataBorrowed). | ||
| /// | ||
| /// ✨ *Enabled with the `unicode_bidi` Cargo feature.* | ||
| /// | ||
| /// # Examples | ||
| /// | ||
| ///``` | ||
| /// use icu::properties::CodePointMapData; | ||
| /// use icu::properties::props::BidiClass; | ||
| /// use unicode_bidi::BidiInfo; | ||
| /// | ||
| /// // This example text is defined using `concat!` because some browsers | ||
| /// // and text editors have trouble displaying bidi strings. | ||
| /// let text = concat!["א", // RTL#1 | ||
| /// "ב", // RTL#2 | ||
| /// "ג", // RTL#3 | ||
| /// "a", // LTR#1 | ||
| /// "b", // LTR#2 | ||
| /// "c", // LTR#3 | ||
| /// ]; // | ||
| /// | ||
| /// | ||
| /// let bidi_map = CodePointMapData::<BidiClass>::new(); | ||
| /// | ||
| /// // Resolve embedding levels within the text. Pass `None` to detect the | ||
| /// // paragraph level automatically. | ||
| /// let bidi_info = BidiInfo::new_with_data_source(&bidi_map, text, None); | ||
| /// | ||
| /// // This paragraph has embedding level 1 because its first strong character is RTL. | ||
| /// assert_eq!(bidi_info.paragraphs.len(), 1); | ||
| /// let para = &bidi_info.paragraphs[0]; | ||
| /// assert_eq!(para.level.number(), 1); | ||
| /// assert!(para.level.is_rtl()); | ||
| /// | ||
| /// // Re-ordering is done after wrapping each paragraph into a sequence of | ||
| /// // lines. For this example, I'll just use a single line that spans the | ||
| /// // entire paragraph. | ||
| /// let line = para.range.clone(); | ||
| /// | ||
| /// let display = bidi_info.reorder_line(para, line); | ||
| /// assert_eq!(display, concat!["a", // LTR#1 | ||
| /// "b", // LTR#2 | ||
| /// "c", // LTR#3 | ||
| /// "ג", // RTL#3 | ||
| /// "ב", // RTL#2 | ||
| /// "א", // RTL#1 | ||
| /// ]); | ||
| /// ``` | ||
| #[cfg(feature = "unicode_bidi")] | ||
| impl unicode_bidi::data_source::BidiDataSource | ||
| for crate::CodePointMapDataBorrowed<'_, crate::props::BidiClass> | ||
| { | ||
| fn bidi_class(&self, c: char) -> unicode_bidi::BidiClass { | ||
| use crate::props::BidiClass; | ||
| match self.get(c) { | ||
| BidiClass::LeftToRight => unicode_bidi::BidiClass::L, | ||
| BidiClass::RightToLeft => unicode_bidi::BidiClass::R, | ||
| BidiClass::EuropeanNumber => unicode_bidi::BidiClass::EN, | ||
| BidiClass::EuropeanSeparator => unicode_bidi::BidiClass::ES, | ||
| BidiClass::EuropeanTerminator => unicode_bidi::BidiClass::ET, | ||
| BidiClass::ArabicNumber => unicode_bidi::BidiClass::AN, | ||
| BidiClass::CommonSeparator => unicode_bidi::BidiClass::CS, | ||
| BidiClass::ParagraphSeparator => unicode_bidi::BidiClass::B, | ||
| BidiClass::SegmentSeparator => unicode_bidi::BidiClass::S, | ||
| BidiClass::WhiteSpace => unicode_bidi::BidiClass::WS, | ||
| BidiClass::OtherNeutral => unicode_bidi::BidiClass::ON, | ||
| BidiClass::LeftToRightEmbedding => unicode_bidi::BidiClass::LRE, | ||
| BidiClass::LeftToRightOverride => unicode_bidi::BidiClass::LRO, | ||
| BidiClass::ArabicLetter => unicode_bidi::BidiClass::AL, | ||
| BidiClass::RightToLeftEmbedding => unicode_bidi::BidiClass::RLE, | ||
| BidiClass::RightToLeftOverride => unicode_bidi::BidiClass::RLO, | ||
| BidiClass::PopDirectionalFormat => unicode_bidi::BidiClass::PDF, | ||
| BidiClass::NonspacingMark => unicode_bidi::BidiClass::NSM, | ||
| BidiClass::BoundaryNeutral => unicode_bidi::BidiClass::BN, | ||
| BidiClass::FirstStrongIsolate => unicode_bidi::BidiClass::FSI, | ||
| BidiClass::LeftToRightIsolate => unicode_bidi::BidiClass::LRI, | ||
| BidiClass::RightToLeftIsolate => unicode_bidi::BidiClass::RLI, | ||
| BidiClass::PopDirectionalIsolate => unicode_bidi::BidiClass::PDI, | ||
| // This must not happen. | ||
| _ => unicode_bidi::BidiClass::ON, | ||
| } | ||
| } | ||
| } |
| // This file is part of ICU4X. For terms of use, please see the file | ||
| // called LICENSE at the top level of the ICU4X source tree | ||
| // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). | ||
| #[cfg(feature = "alloc")] | ||
| use crate::code_point_set::CodePointSetData; | ||
| use crate::props::GeneralCategory; | ||
| use crate::props::GeneralCategoryGroup; | ||
| use crate::provider::*; | ||
| use core::ops::RangeInclusive; | ||
| use icu_collections::codepointtrie::{CodePointMapRange, CodePointTrie, TrieValue}; | ||
| use icu_provider::marker::ErasedMarker; | ||
| use icu_provider::prelude::*; | ||
| /// A wrapper around code point map data. | ||
| /// | ||
| /// It is returned by APIs that return Unicode | ||
| /// property data in a map-like form, ex: enumerated property value data keyed | ||
| /// by code point. Access its data via the borrowed version, | ||
| /// [`CodePointMapDataBorrowed`]. | ||
| #[derive(Debug, Clone)] | ||
| pub struct CodePointMapData<T: TrieValue> { | ||
| data: DataPayload<ErasedMarker<PropertyCodePointMap<'static, T>>>, | ||
| } | ||
| impl<T: TrieValue> CodePointMapData<T> { | ||
| /// Creates a new [`CodePointMapData`] for a [`EnumeratedProperty`]. | ||
| /// | ||
| /// See the documentation on [`EnumeratedProperty`] implementations for details. | ||
| /// | ||
| /// ✨ *Enabled with the `compiled_data` Cargo feature.* | ||
| /// | ||
| /// [📚 Help choosing a constructor](icu_provider::constructors) | ||
| #[cfg(feature = "compiled_data")] | ||
| #[expect(clippy::new_ret_no_self)] | ||
| pub const fn new() -> CodePointMapDataBorrowed<'static, T> | ||
| where | ||
| T: EnumeratedProperty, | ||
| { | ||
| CodePointMapDataBorrowed::new() | ||
| } | ||
| #[doc = icu_provider::gen_buffer_unstable_docs!(UNSTABLE, Self::new)] | ||
| pub fn try_new_unstable( | ||
| provider: &(impl DataProvider<T::DataMarker> + ?Sized), | ||
| ) -> Result<Self, DataError> | ||
| where | ||
| T: EnumeratedProperty, | ||
| { | ||
| Ok(Self { | ||
| data: provider.load(Default::default())?.payload.cast(), | ||
| }) | ||
| } | ||
| /// Construct a borrowed version of this type that can be queried. | ||
| /// | ||
| /// This avoids a potential small underlying cost per API call (like `get()`) by consolidating it | ||
| /// up front. | ||
| /// | ||
| /// This owned version if returned by functions that use a runtime data provider. | ||
| #[inline] | ||
| pub fn as_borrowed(&self) -> CodePointMapDataBorrowed<'_, T> { | ||
| CodePointMapDataBorrowed { | ||
| map: self.data.get(), | ||
| } | ||
| } | ||
| /// Convert this map to a map around another type | ||
| /// | ||
| /// Typically useful for type-erasing maps into maps around integers. | ||
| /// | ||
| /// ✨ *Enabled with the `alloc` Cargo feature.* | ||
| /// | ||
| /// # Panics | ||
| /// Will panic if T and P are different sizes | ||
| /// | ||
| /// # Example | ||
| /// | ||
| /// ``` | ||
| /// use icu::properties::CodePointMapData; | ||
| /// use icu::properties::props::GeneralCategory; | ||
| /// | ||
| /// let data = CodePointMapData::<GeneralCategory>::new().static_to_owned(); | ||
| /// | ||
| /// let gc = data.try_into_converted::<u8>().unwrap(); | ||
| /// let gc = gc.as_borrowed(); | ||
| /// | ||
| /// assert_eq!(gc.get('木'), GeneralCategory::OtherLetter as u8); // U+6728 | ||
| /// assert_eq!(gc.get('🎃'), GeneralCategory::OtherSymbol as u8); // U+1F383 JACK-O-LANTERN | ||
| /// ``` | ||
| #[cfg(feature = "alloc")] | ||
| pub fn try_into_converted<P>(self) -> Result<CodePointMapData<P>, zerovec::ule::UleError> | ||
| where | ||
| P: TrieValue, | ||
| { | ||
| self.data | ||
| .try_map_project(|data, _| data.try_into_converted()) | ||
| .map(CodePointMapData::from_data::<ErasedMarker<PropertyCodePointMap<'static, P>>>) | ||
| } | ||
| /// Construct a new one from loaded data | ||
| /// | ||
| /// Typically it is preferable to use getters like [`load_general_category()`] instead | ||
| pub(crate) fn from_data<M>(data: DataPayload<M>) -> Self | ||
| where | ||
| M: DynamicDataMarker<DataStruct = PropertyCodePointMap<'static, T>>, | ||
| { | ||
| Self { data: data.cast() } | ||
| } | ||
| /// Construct a new one an owned [`CodePointTrie`] | ||
| pub fn from_code_point_trie(trie: CodePointTrie<'static, T>) -> Self { | ||
| let set = PropertyCodePointMap::from_code_point_trie(trie); | ||
| CodePointMapData::from_data( | ||
| DataPayload::<ErasedMarker<PropertyCodePointMap<'static, T>>>::from_owned(set), | ||
| ) | ||
| } | ||
| /// Convert this type to a [`CodePointTrie`] as a borrowed value. | ||
| /// | ||
| /// The data backing this is extensible and supports multiple implementations. | ||
| /// Currently it is always [`CodePointTrie`]; however in the future more backends may be | ||
| /// added, and users may select which at data generation time. | ||
| /// | ||
| /// This method returns an `Option` in order to return `None` when the backing data provider | ||
| /// cannot return a [`CodePointTrie`], or cannot do so within the expected constant time | ||
| /// constraint. | ||
| pub fn as_code_point_trie(&self) -> Option<&CodePointTrie<'_, T>> { | ||
| self.data.get().as_code_point_trie() | ||
| } | ||
| /// Convert this type to a [`CodePointTrie`], borrowing if possible, | ||
| /// otherwise allocating a new [`CodePointTrie`]. | ||
| /// | ||
| /// The data backing this is extensible and supports multiple implementations. | ||
| /// Currently it is always [`CodePointTrie`]; however in the future more backends may be | ||
| /// added, and users may select which at data generation time. | ||
| /// | ||
| /// The performance of the conversion to this specific return type will vary | ||
| /// depending on the data structure that is backing `self`. | ||
| pub fn to_code_point_trie(&self) -> CodePointTrie<'_, T> { | ||
| self.data.get().to_code_point_trie() | ||
| } | ||
| } | ||
| /// A borrowed wrapper around code point set data, returned by | ||
| /// [`CodePointSetData::as_borrowed()`]. More efficient to query. | ||
| #[derive(Clone, Copy, Debug)] | ||
| pub struct CodePointMapDataBorrowed<'a, T: TrieValue> { | ||
| map: &'a PropertyCodePointMap<'a, T>, | ||
| } | ||
| impl<'a, T: TrieValue> CodePointMapDataBorrowed<'a, T> { | ||
| /// Get the value this map has associated with code point `ch` | ||
| /// | ||
| /// # Example | ||
| /// | ||
| /// ``` | ||
| /// use icu::properties::CodePointMapData; | ||
| /// use icu::properties::props::GeneralCategory; | ||
| /// | ||
| /// let gc = CodePointMapData::<GeneralCategory>::new(); | ||
| /// | ||
| /// assert_eq!(gc.get('木'), GeneralCategory::OtherLetter); // U+6728 | ||
| /// assert_eq!(gc.get('🎃'), GeneralCategory::OtherSymbol); // U+1F383 JACK-O-LANTERN | ||
| /// ``` | ||
| #[inline] | ||
| pub fn get(self, ch: char) -> T { | ||
| self.map.get(ch) | ||
| } | ||
| /// See [`Self::get`]. | ||
| #[inline] | ||
| pub fn get32(self, ch: u32) -> T { | ||
| self.map.get32(ch) | ||
| } | ||
| /// Get a [`CodePointSetData`] for all elements corresponding to a particular value | ||
| /// | ||
| /// ✨ *Enabled with the `alloc` Cargo feature.* | ||
| /// | ||
| /// # Example | ||
| /// | ||
| /// ``` | ||
| /// use icu::properties::props::GeneralCategory; | ||
| /// use icu::properties::CodePointMapData; | ||
| /// | ||
| /// let gc = CodePointMapData::<GeneralCategory>::new(); | ||
| /// | ||
| /// let other_letter_set_data = | ||
| /// gc.get_set_for_value(GeneralCategory::OtherLetter); | ||
| /// let other_letter_set = other_letter_set_data.as_borrowed(); | ||
| /// | ||
| /// assert!(other_letter_set.contains('木')); // U+6728 | ||
| /// assert!(!other_letter_set.contains('🎃')); // U+1F383 JACK-O-LANTERN | ||
| /// ``` | ||
| #[cfg(feature = "alloc")] | ||
| pub fn get_set_for_value(self, value: T) -> CodePointSetData { | ||
| let set = self.map.get_set_for_value(value); | ||
| CodePointSetData::from_code_point_inversion_list(set) | ||
| } | ||
| /// Yields an [`Iterator`] returning ranges of consecutive code points that | ||
| /// share the same value in the [`CodePointMapData`]. | ||
| /// | ||
| /// # Examples | ||
| /// | ||
| /// ``` | ||
| /// use icu::properties::props::GeneralCategory; | ||
| /// use icu::properties::CodePointMapData; | ||
| /// | ||
| /// let gc = CodePointMapData::<GeneralCategory>::new(); | ||
| /// let mut ranges = gc.iter_ranges(); | ||
| /// let next = ranges.next().unwrap(); | ||
| /// assert_eq!(next.range, 0..=31); | ||
| /// assert_eq!(next.value, GeneralCategory::Control); | ||
| /// let next = ranges.next().unwrap(); | ||
| /// assert_eq!(next.range, 32..=32); | ||
| /// assert_eq!(next.value, GeneralCategory::SpaceSeparator); | ||
| /// ``` | ||
| pub fn iter_ranges(self) -> impl Iterator<Item = CodePointMapRange<T>> + 'a { | ||
| self.map.iter_ranges() | ||
| } | ||
| /// Yields an [`Iterator`] returning ranges of consecutive code points that | ||
| /// share the same value `v` in the [`CodePointMapData`]. | ||
| /// | ||
| /// # Examples | ||
| /// | ||
| /// | ||
| /// ``` | ||
| /// use icu::properties::props::GeneralCategory; | ||
| /// use icu::properties::CodePointMapData; | ||
| /// | ||
| /// let gc = CodePointMapData::<GeneralCategory>::new(); | ||
| /// let mut ranges = gc.iter_ranges_for_value(GeneralCategory::UppercaseLetter); | ||
| /// assert_eq!(ranges.next().unwrap(), 'A' as u32..='Z' as u32); | ||
| /// assert_eq!(ranges.next().unwrap(), 'À' as u32..='Ö' as u32); | ||
| /// assert_eq!(ranges.next().unwrap(), 'Ø' as u32..='Þ' as u32); | ||
| /// ``` | ||
| pub fn iter_ranges_for_value(self, val: T) -> impl Iterator<Item = RangeInclusive<u32>> + 'a { | ||
| self.map | ||
| .iter_ranges() | ||
| .filter(move |r| r.value == val) | ||
| .map(|r| r.range) | ||
| } | ||
| /// Yields an [`Iterator`] returning ranges of consecutive code points that | ||
| /// do *not* have the value `v` in the [`CodePointMapData`]. | ||
| pub fn iter_ranges_for_value_complemented( | ||
| self, | ||
| val: T, | ||
| ) -> impl Iterator<Item = RangeInclusive<u32>> + 'a { | ||
| self.map | ||
| .iter_ranges_mapped(move |value| value != val) | ||
| .filter(|v| v.value) | ||
| .map(|v| v.range) | ||
| } | ||
| /// Exposed for FFI needs, could be exposed in general in the future but we should | ||
| /// have a use case first. | ||
| /// | ||
| /// FFI needs this since it operates on erased maps and can't use `iter_ranges_for_group()` | ||
| #[doc(hidden)] // used by FFI code | ||
| pub fn iter_ranges_mapped<U: Eq + 'a>( | ||
| self, | ||
| predicate: impl FnMut(T) -> U + Copy + 'a, | ||
| ) -> impl Iterator<Item = CodePointMapRange<U>> + 'a { | ||
| self.map.iter_ranges_mapped(predicate) | ||
| } | ||
| } | ||
| impl CodePointMapDataBorrowed<'_, GeneralCategory> { | ||
| /// Get a [`CodePointSetData`] for all elements corresponding to a particular value group | ||
| /// | ||
| /// ✨ *Enabled with the `alloc` Cargo feature.* | ||
| /// | ||
| /// # Example | ||
| /// | ||
| /// ``` | ||
| /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup}; | ||
| /// use icu::properties::CodePointMapData; | ||
| /// | ||
| /// let gc = CodePointMapData::<GeneralCategory>::new(); | ||
| /// | ||
| /// let other_letter_set_data = | ||
| /// gc.get_set_for_value_group(GeneralCategoryGroup::OtherLetter); | ||
| /// let other_letter_set = other_letter_set_data.as_borrowed(); | ||
| /// | ||
| /// assert!(other_letter_set.contains('木')); // U+6728 | ||
| /// assert!(!other_letter_set.contains('🎃')); // U+1F383 JACK-O-LANTERN | ||
| /// ``` | ||
| #[cfg(feature = "alloc")] | ||
| pub fn get_set_for_value_group(self, value: GeneralCategoryGroup) -> crate::CodePointSetData { | ||
| let matching_gc_ranges = self | ||
| .iter_ranges() | ||
| .filter(|cpm_range| (1 << cpm_range.value as u32) & value.0 != 0) | ||
| .map(|cpm_range| cpm_range.range); | ||
| CodePointSetData::from_code_point_inversion_list(matching_gc_ranges.collect()) | ||
| } | ||
| } | ||
| #[cfg(feature = "compiled_data")] | ||
| impl<T: EnumeratedProperty> Default for CodePointMapDataBorrowed<'static, T> { | ||
| fn default() -> Self { | ||
| Self::new() | ||
| } | ||
| } | ||
| impl<T: TrieValue> CodePointMapDataBorrowed<'static, T> { | ||
| /// Creates a new [`CodePointMapDataBorrowed`] for a [`EnumeratedProperty`]. | ||
| /// | ||
| /// See the documentation on [`EnumeratedProperty`] implementations for details. | ||
| /// | ||
| /// ✨ *Enabled with the `compiled_data` Cargo feature.* | ||
| /// | ||
| /// [📚 Help choosing a constructor](icu_provider::constructors) | ||
| #[cfg(feature = "compiled_data")] | ||
| pub const fn new() -> Self | ||
| where | ||
| T: EnumeratedProperty, | ||
| { | ||
| CodePointMapDataBorrowed { map: T::SINGLETON } | ||
| } | ||
| /// Cheaply converts a [`CodePointMapDataBorrowed<'static>`] into a [`CodePointMapData`]. | ||
| /// | ||
| /// Note: Due to branching and indirection, using [`CodePointMapData`] might inhibit some | ||
| /// compile-time optimizations that are possible with [`CodePointMapDataBorrowed`]. | ||
| pub const fn static_to_owned(self) -> CodePointMapData<T> { | ||
| CodePointMapData { | ||
| data: DataPayload::from_static_ref(self.map), | ||
| } | ||
| } | ||
| } | ||
| impl<'a> CodePointMapDataBorrowed<'a, GeneralCategory> { | ||
| /// Yields an [`Iterator`] returning ranges of consecutive code points that | ||
| /// have a `General_Category` value belonging to the specified [`GeneralCategoryGroup`] | ||
| /// | ||
| /// # Examples | ||
| /// | ||
| /// ``` | ||
| /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup}; | ||
| /// use icu::properties::CodePointMapData; | ||
| /// | ||
| /// let gc = CodePointMapData::<GeneralCategory>::new(); | ||
| /// let mut ranges = gc.iter_ranges_for_group(GeneralCategoryGroup::Letter); | ||
| /// assert_eq!(ranges.next().unwrap(), 'A' as u32..='Z' as u32); | ||
| /// assert_eq!(ranges.next().unwrap(), 'a' as u32..='z' as u32); | ||
| /// assert_eq!(ranges.next().unwrap(), 'ª' as u32..='ª' as u32); | ||
| /// assert_eq!(ranges.next().unwrap(), 'µ' as u32..='µ' as u32); | ||
| /// assert_eq!(ranges.next().unwrap(), 'º' as u32..='º' as u32); | ||
| /// assert_eq!(ranges.next().unwrap(), 'À' as u32..='Ö' as u32); | ||
| /// assert_eq!(ranges.next().unwrap(), 'Ø' as u32..='ö' as u32); | ||
| /// ``` | ||
| pub fn iter_ranges_for_group( | ||
| self, | ||
| group: GeneralCategoryGroup, | ||
| ) -> impl Iterator<Item = RangeInclusive<u32>> + 'a { | ||
| self.map | ||
| .iter_ranges_mapped(move |value| group.contains(value)) | ||
| .filter(|v| v.value) | ||
| .map(|v| v.range) | ||
| } | ||
| } | ||
| /// A Unicode character property that assigns a value to each code point. | ||
| /// | ||
| /// The descriptions of most properties are taken from [`TR44`], the documentation for the | ||
| /// Unicode Character Database. | ||
| /// | ||
| /// <div class="stab unstable"> | ||
| /// 🚫 This trait is sealed; it cannot be implemented by user code. If an API requests an item that implements this | ||
| /// trait, please consider using a type from the implementors listed below. | ||
| /// </div> | ||
| /// | ||
| /// [`TR44`]: https://www.unicode.org/reports/tr44 | ||
| pub trait EnumeratedProperty: crate::private::Sealed + TrieValue { | ||
| #[doc(hidden)] | ||
| type DataMarker: DataMarker<DataStruct = PropertyCodePointMap<'static, Self>>; | ||
| #[doc(hidden)] | ||
| #[cfg(feature = "compiled_data")] | ||
| const SINGLETON: &'static PropertyCodePointMap<'static, Self>; | ||
| /// The name of this property | ||
| const NAME: &'static [u8]; | ||
| /// The abbreviated name of this property, if it exists, otherwise the name | ||
| const SHORT_NAME: &'static [u8]; | ||
| /// Convenience method for `CodePointMapData::new().get(ch)` | ||
| /// | ||
| /// ✨ *Enabled with the `compiled_data` Cargo feature.* | ||
| #[cfg(feature = "compiled_data")] | ||
| fn for_char(ch: char) -> Self { | ||
| CodePointMapData::new().get(ch) | ||
| } | ||
| } |
| // This file is part of ICU4X. For terms of use, please see the file | ||
| // called LICENSE at the top level of the ICU4X source tree | ||
| // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). | ||
| use crate::provider::*; | ||
| use core::ops::RangeInclusive; | ||
| use icu_collections::codepointinvlist::CodePointInversionList; | ||
| use icu_provider::marker::ErasedMarker; | ||
| use icu_provider::prelude::*; | ||
| /// A set of Unicode code points. Access its data via the borrowed version, | ||
| /// [`CodePointSetDataBorrowed`]. | ||
| /// | ||
| /// # Example | ||
| /// ```rust | ||
| /// use icu::properties::CodePointSetData; | ||
| /// use icu::properties::props::Alphabetic; | ||
| /// | ||
| /// let alphabetic = CodePointSetData::new::<Alphabetic>(); | ||
| /// | ||
| /// assert!(!alphabetic.contains('3')); | ||
| /// assert!(!alphabetic.contains('੩')); // U+0A69 GURMUKHI DIGIT THREE | ||
| /// assert!(alphabetic.contains('A')); | ||
| /// assert!(alphabetic.contains('Ä')); // U+00C4 LATIN CAPITAL LETTER A WITH DIAERESIS | ||
| /// ``` | ||
| #[derive(Debug)] | ||
| pub struct CodePointSetData { | ||
| data: DataPayload<ErasedMarker<PropertyCodePointSet<'static>>>, | ||
| } | ||
| impl CodePointSetData { | ||
| /// Creates a new [`CodePointSetDataBorrowed`] for a [`BinaryProperty`]. | ||
| /// | ||
| /// ✨ *Enabled with the `compiled_data` Cargo feature.* | ||
| /// | ||
| /// [📚 Help choosing a constructor](icu_provider::constructors) | ||
| #[expect(clippy::new_ret_no_self)] | ||
| #[cfg(feature = "compiled_data")] | ||
| pub const fn new<P: BinaryProperty>() -> CodePointSetDataBorrowed<'static> { | ||
| CodePointSetDataBorrowed::new::<P>() | ||
| } | ||
| #[doc = icu_provider::gen_buffer_unstable_docs!(UNSTABLE, Self::new)] | ||
| pub fn try_new_unstable<P: BinaryProperty>( | ||
| provider: &(impl DataProvider<P::DataMarker> + ?Sized), | ||
| ) -> Result<CodePointSetData, DataError> { | ||
| Ok(CodePointSetData::from_data( | ||
| provider.load(Default::default())?.payload, | ||
| )) | ||
| } | ||
| /// Construct a borrowed version of this type that can be queried. | ||
| /// | ||
| /// This owned version if returned by functions that use a runtime data provider. | ||
| #[inline] | ||
| pub fn as_borrowed(&self) -> CodePointSetDataBorrowed<'_> { | ||
| CodePointSetDataBorrowed { | ||
| set: self.data.get(), | ||
| } | ||
| } | ||
| /// Construct a new one from loaded data | ||
| /// | ||
| /// Typically it is preferable to use getters like [`load_ascii_hex_digit()`] instead | ||
| pub(crate) fn from_data<M>(data: DataPayload<M>) -> Self | ||
| where | ||
| M: DynamicDataMarker<DataStruct = PropertyCodePointSet<'static>>, | ||
| { | ||
| Self { data: data.cast() } | ||
| } | ||
| /// Construct a new owned [`CodePointInversionList`] | ||
| pub fn from_code_point_inversion_list(set: CodePointInversionList<'static>) -> Self { | ||
| let set = PropertyCodePointSet::from_code_point_inversion_list(set); | ||
| CodePointSetData::from_data( | ||
| DataPayload::<ErasedMarker<PropertyCodePointSet<'static>>>::from_owned(set), | ||
| ) | ||
| } | ||
| /// Convert this type to a [`CodePointInversionList`] as a borrowed value. | ||
| /// | ||
| /// The data backing this is extensible and supports multiple implementations. | ||
| /// Currently it is always [`CodePointInversionList`]; however in the future more backends may be | ||
| /// added, and users may select which at data generation time. | ||
| /// | ||
| /// This method returns an `Option` in order to return `None` when the backing data provider | ||
| /// cannot return a [`CodePointInversionList`], or cannot do so within the expected constant time | ||
| /// constraint. | ||
| pub fn as_code_point_inversion_list(&self) -> Option<&CodePointInversionList<'_>> { | ||
| self.data.get().as_code_point_inversion_list() | ||
| } | ||
| /// Convert this type to a [`CodePointInversionList`], borrowing if possible, | ||
| /// otherwise allocating a new [`CodePointInversionList`]. | ||
| /// | ||
| /// The data backing this is extensible and supports multiple implementations. | ||
| /// Currently it is always [`CodePointInversionList`]; however in the future more backends may be | ||
| /// added, and users may select which at data generation time. | ||
| /// | ||
| /// The performance of the conversion to this specific return type will vary | ||
| /// depending on the data structure that is backing `self`. | ||
| pub fn to_code_point_inversion_list(&self) -> CodePointInversionList<'_> { | ||
| self.data.get().to_code_point_inversion_list() | ||
| } | ||
| } | ||
| /// A borrowed wrapper around code point set data, returned by | ||
| /// [`CodePointSetData::as_borrowed()`]. More efficient to query. | ||
| #[derive(Clone, Copy, Debug)] | ||
| pub struct CodePointSetDataBorrowed<'a> { | ||
| set: &'a PropertyCodePointSet<'a>, | ||
| } | ||
| impl CodePointSetDataBorrowed<'static> { | ||
| /// Creates a new [`CodePointSetData`] for a [`BinaryProperty`]. | ||
| /// | ||
| /// ✨ *Enabled with the `compiled_data` Cargo feature.* | ||
| /// | ||
| /// [📚 Help choosing a constructor](icu_provider::constructors) | ||
| #[inline] | ||
| #[cfg(feature = "compiled_data")] | ||
| pub const fn new<P: BinaryProperty>() -> Self { | ||
| CodePointSetDataBorrowed { set: P::SINGLETON } | ||
| } | ||
| /// Cheaply converts a [`CodePointSetDataBorrowed<'static>`] into a [`CodePointSetData`]. | ||
| /// | ||
| /// Note: Due to branching and indirection, using [`CodePointSetData`] might inhibit some | ||
| /// compile-time optimizations that are possible with [`CodePointSetDataBorrowed`]. | ||
| pub const fn static_to_owned(self) -> CodePointSetData { | ||
| CodePointSetData { | ||
| data: DataPayload::from_static_ref(self.set), | ||
| } | ||
| } | ||
| } | ||
| impl<'a> CodePointSetDataBorrowed<'a> { | ||
| /// Check if the set contains a character | ||
| /// | ||
| /// ```rust | ||
| /// use icu::properties::CodePointSetData; | ||
| /// use icu::properties::props::Alphabetic; | ||
| /// | ||
| /// let alphabetic = CodePointSetData::new::<Alphabetic>(); | ||
| /// | ||
| /// assert!(!alphabetic.contains('3')); | ||
| /// assert!(!alphabetic.contains('੩')); // U+0A69 GURMUKHI DIGIT THREE | ||
| /// assert!(alphabetic.contains('A')); | ||
| /// assert!(alphabetic.contains('Ä')); // U+00C4 LATIN CAPITAL LETTER A WITH DIAERESIS | ||
| /// ``` | ||
| #[inline] | ||
| pub fn contains(self, ch: char) -> bool { | ||
| self.set.contains(ch) | ||
| } | ||
| /// See [`Self::contains`]. | ||
| #[inline] | ||
| pub fn contains32(self, ch: u32) -> bool { | ||
| self.set.contains32(ch) | ||
| } | ||
| // Yields an [`Iterator`] returning the ranges of the code points that are | ||
| /// included in the [`CodePointSetData`] | ||
| /// | ||
| /// Ranges are returned as [`RangeInclusive`], which is inclusive of its | ||
| /// `end` bound value. An end-inclusive behavior matches the ICU4C/J | ||
| /// behavior of ranges, ex: `UnicodeSet::contains(UChar32 start, UChar32 end)`. | ||
| /// | ||
| /// # Example | ||
| /// | ||
| /// ``` | ||
| /// use icu::properties::props::Alphabetic; | ||
| /// use icu::properties::CodePointSetData; | ||
| /// | ||
| /// let alphabetic = CodePointSetData::new::<Alphabetic>(); | ||
| /// let mut ranges = alphabetic.iter_ranges(); | ||
| /// | ||
| /// assert_eq!(Some(0x0041..=0x005A), ranges.next()); // 'A'..'Z' | ||
| /// assert_eq!(Some(0x0061..=0x007A), ranges.next()); // 'a'..'z' | ||
| /// ``` | ||
| #[inline] | ||
| pub fn iter_ranges(self) -> impl Iterator<Item = RangeInclusive<u32>> + 'a { | ||
| self.set.iter_ranges() | ||
| } | ||
| // Yields an [`Iterator`] returning the ranges of the code points that are | ||
| /// *not* included in the [`CodePointSetData`] | ||
| /// | ||
| /// Ranges are returned as [`RangeInclusive`], which is inclusive of its | ||
| /// `end` bound value. An end-inclusive behavior matches the ICU4C/J | ||
| /// behavior of ranges, ex: `UnicodeSet::contains(UChar32 start, UChar32 end)`. | ||
| /// | ||
| /// # Example | ||
| /// | ||
| /// ``` | ||
| /// use icu::properties::props::Alphabetic; | ||
| /// use icu::properties::CodePointSetData; | ||
| /// | ||
| /// let alphabetic = CodePointSetData::new::<Alphabetic>(); | ||
| /// let mut ranges = alphabetic.iter_ranges(); | ||
| /// | ||
| /// assert_eq!(Some(0x0041..=0x005A), ranges.next()); // 'A'..'Z' | ||
| /// assert_eq!(Some(0x0061..=0x007A), ranges.next()); // 'a'..'z' | ||
| /// ``` | ||
| #[inline] | ||
| pub fn iter_ranges_complemented(self) -> impl Iterator<Item = RangeInclusive<u32>> + 'a { | ||
| self.set.iter_ranges_complemented() | ||
| } | ||
| } | ||
| /// A binary Unicode character property. | ||
| /// | ||
| /// The descriptions of most properties are taken from [`TR44`], the documentation for the | ||
| /// Unicode Character Database. Some properties are instead defined in [`TR18`], the | ||
| /// documentation for Unicode regular expressions. In particular, Annex C of this document | ||
| /// defines properties for POSIX compatibility. | ||
| /// | ||
| /// <div class="stab unstable"> | ||
| /// 🚫 This trait is sealed; it cannot be implemented by user code. If an API requests an item that implements this | ||
| /// trait, please consider using a type from the implementors listed below. | ||
| /// </div> | ||
| /// | ||
| /// [`TR44`]: https://www.unicode.org/reports/tr44 | ||
| /// [`TR18`]: https://www.unicode.org/reports/tr18 | ||
| pub trait BinaryProperty: crate::private::Sealed + Sized { | ||
| #[doc(hidden)] | ||
| type DataMarker: DataMarker<DataStruct = PropertyCodePointSet<'static>>; | ||
| #[doc(hidden)] | ||
| #[cfg(feature = "compiled_data")] | ||
| const SINGLETON: &'static PropertyCodePointSet<'static>; | ||
| /// The name of this property | ||
| const NAME: &'static [u8]; | ||
| /// The abbreviated name of this property, if it exists, otherwise the name | ||
| const SHORT_NAME: &'static [u8]; | ||
| /// Convenience method for `CodePointSetData::new().contains(ch)` | ||
| /// | ||
| /// ✨ *Enabled with the `compiled_data` Cargo feature.* | ||
| #[cfg(feature = "compiled_data")] | ||
| fn for_char(ch: char) -> bool { | ||
| CodePointSetData::new::<Self>().contains(ch) | ||
| } | ||
| } | ||
| #[cfg(test)] | ||
| mod tests { | ||
| #[test] | ||
| fn test_general_category() { | ||
| use icu::properties::props::GeneralCategory; | ||
| use icu::properties::props::GeneralCategoryGroup; | ||
| use icu::properties::CodePointMapData; | ||
| let digits_data = CodePointMapData::<GeneralCategory>::new() | ||
| .get_set_for_value_group(GeneralCategoryGroup::Number); | ||
| let digits = digits_data.as_borrowed(); | ||
| assert!(digits.contains('5')); | ||
| assert!(digits.contains('\u{0665}')); // U+0665 ARABIC-INDIC DIGIT FIVE | ||
| assert!(digits.contains('\u{096b}')); // U+0969 DEVANAGARI DIGIT FIVE | ||
| assert!(!digits.contains('A')); | ||
| } | ||
| #[test] | ||
| fn test_script() { | ||
| use icu::properties::props::Script; | ||
| use icu::properties::CodePointMapData; | ||
| let thai_data = CodePointMapData::<Script>::new().get_set_for_value(Script::Thai); | ||
| let thai = thai_data.as_borrowed(); | ||
| assert!(thai.contains('\u{0e01}')); // U+0E01 THAI CHARACTER KO KAI | ||
| assert!(thai.contains('\u{0e50}')); // U+0E50 THAI DIGIT ZERO | ||
| assert!(!thai.contains('A')); | ||
| assert!(!thai.contains('\u{0e3f}')); // U+0E50 THAI CURRENCY SYMBOL BAHT | ||
| } | ||
| #[test] | ||
| fn test_gc_groupings() { | ||
| use icu::properties::props::{GeneralCategory, GeneralCategoryGroup}; | ||
| use icu::properties::CodePointMapData; | ||
| use icu_collections::codepointinvlist::CodePointInversionListBuilder; | ||
| let test_group = |category: GeneralCategoryGroup, subcategories: &[GeneralCategory]| { | ||
| let category_set = | ||
| CodePointMapData::<GeneralCategory>::new().get_set_for_value_group(category); | ||
| let category_set = category_set | ||
| .as_code_point_inversion_list() | ||
| .expect("The data should be valid"); | ||
| let mut builder = CodePointInversionListBuilder::new(); | ||
| for &subcategory in subcategories { | ||
| let gc_set_data = | ||
| CodePointMapData::<GeneralCategory>::new().get_set_for_value(subcategory); | ||
| let gc_set = gc_set_data.as_borrowed(); | ||
| for range in gc_set.iter_ranges() { | ||
| builder.add_range32(range); | ||
| } | ||
| } | ||
| let combined_set = builder.build(); | ||
| println!("{category:?} {subcategories:?}"); | ||
| assert_eq!( | ||
| category_set.get_inversion_list_vec(), | ||
| combined_set.get_inversion_list_vec() | ||
| ); | ||
| }; | ||
| test_group( | ||
| GeneralCategoryGroup::Letter, | ||
| &[ | ||
| GeneralCategory::UppercaseLetter, | ||
| GeneralCategory::LowercaseLetter, | ||
| GeneralCategory::TitlecaseLetter, | ||
| GeneralCategory::ModifierLetter, | ||
| GeneralCategory::OtherLetter, | ||
| ], | ||
| ); | ||
| test_group( | ||
| GeneralCategoryGroup::Other, | ||
| &[ | ||
| GeneralCategory::Control, | ||
| GeneralCategory::Format, | ||
| GeneralCategory::Unassigned, | ||
| GeneralCategory::PrivateUse, | ||
| GeneralCategory::Surrogate, | ||
| ], | ||
| ); | ||
| test_group( | ||
| GeneralCategoryGroup::Mark, | ||
| &[ | ||
| GeneralCategory::SpacingMark, | ||
| GeneralCategory::EnclosingMark, | ||
| GeneralCategory::NonspacingMark, | ||
| ], | ||
| ); | ||
| test_group( | ||
| GeneralCategoryGroup::Number, | ||
| &[ | ||
| GeneralCategory::DecimalNumber, | ||
| GeneralCategory::LetterNumber, | ||
| GeneralCategory::OtherNumber, | ||
| ], | ||
| ); | ||
| test_group( | ||
| GeneralCategoryGroup::Punctuation, | ||
| &[ | ||
| GeneralCategory::ConnectorPunctuation, | ||
| GeneralCategory::DashPunctuation, | ||
| GeneralCategory::ClosePunctuation, | ||
| GeneralCategory::FinalPunctuation, | ||
| GeneralCategory::InitialPunctuation, | ||
| GeneralCategory::OtherPunctuation, | ||
| GeneralCategory::OpenPunctuation, | ||
| ], | ||
| ); | ||
| test_group( | ||
| GeneralCategoryGroup::Symbol, | ||
| &[ | ||
| GeneralCategory::CurrencySymbol, | ||
| GeneralCategory::ModifierSymbol, | ||
| GeneralCategory::MathSymbol, | ||
| GeneralCategory::OtherSymbol, | ||
| ], | ||
| ); | ||
| test_group( | ||
| GeneralCategoryGroup::Separator, | ||
| &[ | ||
| GeneralCategory::LineSeparator, | ||
| GeneralCategory::ParagraphSeparator, | ||
| GeneralCategory::SpaceSeparator, | ||
| ], | ||
| ); | ||
| } | ||
| #[test] | ||
| fn test_gc_surrogate() { | ||
| use icu::properties::props::GeneralCategory; | ||
| use icu::properties::CodePointMapData; | ||
| let surrogates_data = CodePointMapData::<GeneralCategory>::new() | ||
| .get_set_for_value(GeneralCategory::Surrogate); | ||
| let surrogates = surrogates_data.as_borrowed(); | ||
| assert!(surrogates.contains32(0xd800)); | ||
| assert!(surrogates.contains32(0xd900)); | ||
| assert!(surrogates.contains32(0xdfff)); | ||
| assert!(!surrogates.contains('A')); | ||
| } | ||
| } |
| // This file is part of ICU4X. For terms of use, please see the file | ||
| // called LICENSE at the top level of the ICU4X source tree | ||
| // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). | ||
| use crate::provider::*; | ||
| use icu_collections::codepointinvliststringlist::CodePointInversionListAndStringList; | ||
| use icu_provider::marker::ErasedMarker; | ||
| use icu_provider::prelude::*; | ||
| /// A wrapper around `UnicodeSet` data (characters and strings) | ||
| #[derive(Debug)] | ||
| pub struct EmojiSetData { | ||
| data: DataPayload<ErasedMarker<PropertyUnicodeSet<'static>>>, | ||
| } | ||
| impl EmojiSetData { | ||
| /// Creates a new [`EmojiSetDataBorrowed`] for a [`EmojiSet`]. | ||
| /// | ||
| /// See the documentation on [`EmojiSet`] implementations for details. | ||
| /// | ||
| /// ✨ *Enabled with the `compiled_data` Cargo feature.* | ||
| /// | ||
| /// [📚 Help choosing a constructor](icu_provider::constructors) | ||
| #[cfg(feature = "compiled_data")] | ||
| #[expect(clippy::new_ret_no_self)] | ||
| pub const fn new<P: EmojiSet>() -> EmojiSetDataBorrowed<'static> { | ||
| EmojiSetDataBorrowed::new::<P>() | ||
| } | ||
| /// A version of `new()` that uses custom data provided by a [`DataProvider`]. | ||
| /// | ||
| /// Note that this will return an owned version of the data. Functionality is available on | ||
| /// the borrowed version, accessible through [`EmojiSetData::as_borrowed`]. | ||
| pub fn try_new_unstable<P: EmojiSet>( | ||
| provider: &(impl DataProvider<P::DataMarker> + ?Sized), | ||
| ) -> Result<EmojiSetData, DataError> { | ||
| Ok(EmojiSetData::from_data( | ||
| provider.load(Default::default())?.payload, | ||
| )) | ||
| } | ||
| /// Construct a borrowed version of this type that can be queried. | ||
| /// | ||
| /// This avoids a potential small underlying cost per API call (ex: `contains()`) by consolidating it | ||
| /// up front. | ||
| #[inline] | ||
| pub fn as_borrowed(&self) -> EmojiSetDataBorrowed<'_> { | ||
| EmojiSetDataBorrowed { | ||
| set: self.data.get(), | ||
| } | ||
| } | ||
| /// Construct a new one from loaded data | ||
| /// | ||
| /// Typically it is preferable to use getters instead | ||
| pub(crate) fn from_data<M>(data: DataPayload<M>) -> Self | ||
| where | ||
| M: DynamicDataMarker<DataStruct = PropertyUnicodeSet<'static>>, | ||
| { | ||
| Self { data: data.cast() } | ||
| } | ||
| /// Construct a new owned [`CodePointInversionListAndStringList`] | ||
| pub fn from_code_point_inversion_list_string_list( | ||
| set: CodePointInversionListAndStringList<'static>, | ||
| ) -> Self { | ||
| let set = PropertyUnicodeSet::from_code_point_inversion_list_string_list(set); | ||
| EmojiSetData::from_data( | ||
| DataPayload::<ErasedMarker<PropertyUnicodeSet<'static>>>::from_owned(set), | ||
| ) | ||
| } | ||
| /// Convert this type to a [`CodePointInversionListAndStringList`] as a borrowed value. | ||
| /// | ||
| /// The data backing this is extensible and supports multiple implementations. | ||
| /// Currently it is always [`CodePointInversionListAndStringList`]; however in the future more backends may be | ||
| /// added, and users may select which at data generation time. | ||
| /// | ||
| /// This method returns an `Option` in order to return `None` when the backing data provider | ||
| /// cannot return a [`CodePointInversionListAndStringList`], or cannot do so within the expected constant time | ||
| /// constraint. | ||
| pub fn as_code_point_inversion_list_string_list( | ||
| &self, | ||
| ) -> Option<&CodePointInversionListAndStringList<'_>> { | ||
| self.data.get().as_code_point_inversion_list_string_list() | ||
| } | ||
| /// Convert this type to a [`CodePointInversionListAndStringList`], borrowing if possible, | ||
| /// otherwise allocating a new [`CodePointInversionListAndStringList`]. | ||
| /// | ||
| /// The data backing this is extensible and supports multiple implementations. | ||
| /// Currently it is always [`CodePointInversionListAndStringList`]; however in the future more backends may be | ||
| /// added, and users may select which at data generation time. | ||
| /// | ||
| /// The performance of the conversion to this specific return type will vary | ||
| /// depending on the data structure that is backing `self`. | ||
| pub fn to_code_point_inversion_list_string_list( | ||
| &self, | ||
| ) -> CodePointInversionListAndStringList<'_> { | ||
| self.data.get().to_code_point_inversion_list_string_list() | ||
| } | ||
| } | ||
| /// A borrowed wrapper around code point set data, returned by | ||
| /// [`EmojiSetData::as_borrowed()`]. More efficient to query. | ||
| #[derive(Clone, Copy, Debug)] | ||
| pub struct EmojiSetDataBorrowed<'a> { | ||
| set: &'a PropertyUnicodeSet<'a>, | ||
| } | ||
| impl EmojiSetDataBorrowed<'_> { | ||
| /// Check if the set contains the string. Strings consisting of one character | ||
| /// are treated as a character/code point. | ||
| /// | ||
| /// This matches ICU behavior for ICU's `UnicodeSet`. | ||
| #[inline] | ||
| pub fn contains_str(self, s: &str) -> bool { | ||
| self.set.contains_str(s) | ||
| } | ||
| /// Check if the set contains the code point. | ||
| #[inline] | ||
| pub fn contains(self, ch: char) -> bool { | ||
| self.set.contains(ch) | ||
| } | ||
| /// See [`Self::contains`]. | ||
| #[inline] | ||
| pub fn contains32(self, cp: u32) -> bool { | ||
| self.set.contains32(cp) | ||
| } | ||
| } | ||
| impl EmojiSetDataBorrowed<'static> { | ||
| /// Creates a new [`EmojiSetDataBorrowed`] for a [`EmojiSet`]. | ||
| /// | ||
| /// See the documentation on [`EmojiSet`] implementations for details. | ||
| /// | ||
| /// ✨ *Enabled with the `compiled_data` Cargo feature.* | ||
| /// | ||
| /// [📚 Help choosing a constructor](icu_provider::constructors) | ||
| #[inline] | ||
| #[cfg(feature = "compiled_data")] | ||
| pub const fn new<P: EmojiSet>() -> Self { | ||
| EmojiSetDataBorrowed { set: P::SINGLETON } | ||
| } | ||
| /// Cheaply converts a [`EmojiSetDataBorrowed<'static>`] into a [`EmojiSetData`]. | ||
| /// | ||
| /// Note: Due to branching and indirection, using [`EmojiSetData`] might inhibit some | ||
| /// compile-time optimizations that are possible with [`EmojiSetDataBorrowed`]. | ||
| pub const fn static_to_owned(self) -> EmojiSetData { | ||
| EmojiSetData { | ||
| data: DataPayload::from_static_ref(self.set), | ||
| } | ||
| } | ||
| } | ||
| /// An Emoji set as defined by [`Unicode Technical Standard #51`](https://unicode.org/reports/tr51/#Emoji_Sets>). | ||
| /// | ||
| /// <div class="stab unstable"> | ||
| /// 🚫 This trait is sealed; it cannot be implemented by user code. If an API requests an item that implements this | ||
| /// trait, please consider using a type from the implementors listed below. | ||
| /// </div> | ||
| pub trait EmojiSet: crate::private::Sealed { | ||
| #[doc(hidden)] | ||
| type DataMarker: DataMarker<DataStruct = PropertyUnicodeSet<'static>>; | ||
| #[doc(hidden)] | ||
| #[cfg(feature = "compiled_data")] | ||
| const SINGLETON: &'static PropertyUnicodeSet<'static>; | ||
| } |
| // This file is part of ICU4X. For terms of use, please see the file | ||
| // called LICENSE at the top level of the ICU4X source tree | ||
| // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). | ||
| //! Definitions of [Unicode Properties] and APIs for | ||
| //! retrieving property data in an appropriate data structure. | ||
| //! | ||
| //! This module is published as its own crate ([`icu_properties`](https://docs.rs/icu_properties/latest/icu_properties/)) | ||
| //! and as part of the [`icu`](https://docs.rs/icu/latest/icu/) crate. See the latter for more details on the ICU4X project. | ||
| //! | ||
| //! APIs that return a [`CodePointSetData`] exist for binary properties and certain enumerated | ||
| //! properties. | ||
| //! | ||
| //! APIs that return a [`CodePointMapData`] exist for certain enumerated properties. | ||
| //! | ||
| //! # Examples | ||
| //! | ||
| //! ## Property data as `CodePointSetData`s | ||
| //! | ||
| //! ``` | ||
| //! use icu::properties::{CodePointSetData, CodePointMapData}; | ||
| //! use icu::properties::props::{GeneralCategory, Emoji}; | ||
| //! | ||
| //! // A binary property as a `CodePointSetData` | ||
| //! | ||
| //! assert!(CodePointSetData::new::<Emoji>().contains('🎃')); // U+1F383 JACK-O-LANTERN | ||
| //! assert!(!CodePointSetData::new::<Emoji>().contains('木')); // U+6728 | ||
| //! | ||
| //! // An individual enumerated property value as a `CodePointSetData` | ||
| //! | ||
| //! let line_sep_data = CodePointMapData::<GeneralCategory>::new() | ||
| //! .get_set_for_value(GeneralCategory::LineSeparator); | ||
| //! let line_sep = line_sep_data.as_borrowed(); | ||
| //! | ||
| //! assert!(line_sep.contains('\u{2028}')); | ||
| //! assert!(!line_sep.contains('\u{2029}')); | ||
| //! ``` | ||
| //! | ||
| //! ## Property data as `CodePointMapData`s | ||
| //! | ||
| //! ``` | ||
| //! use icu::properties::CodePointMapData; | ||
| //! use icu::properties::props::Script; | ||
| //! | ||
| //! assert_eq!(CodePointMapData::<Script>::new().get('🎃'), Script::Common); // U+1F383 JACK-O-LANTERN | ||
| //! assert_eq!(CodePointMapData::<Script>::new().get('木'), Script::Han); // U+6728 | ||
| //! ``` | ||
| //! | ||
| //! [`ICU4X`]: ../icu/index.html | ||
| //! [Unicode Properties]: https://unicode-org.github.io/icu/userguide/strings/properties.html | ||
| //! [`CodePointSetData`]: crate::CodePointSetData | ||
| //! [`CodePointMapData`]: crate::CodePointMapData | ||
| // https://github.com/unicode-org/icu4x/blob/main/documents/process/boilerplate.md#library-annotations | ||
| #![cfg_attr(not(any(test, doc)), no_std)] | ||
| #![cfg_attr( | ||
| not(test), | ||
| deny( | ||
| clippy::indexing_slicing, | ||
| clippy::unwrap_used, | ||
| clippy::expect_used, | ||
| clippy::panic, | ||
| clippy::exhaustive_structs, | ||
| clippy::exhaustive_enums, | ||
| clippy::trivially_copy_pass_by_ref, | ||
| missing_debug_implementations, | ||
| ) | ||
| )] | ||
| #![warn(missing_docs)] | ||
| #[cfg(feature = "alloc")] | ||
| extern crate alloc; | ||
| mod code_point_set; | ||
| pub use code_point_set::{CodePointSetData, CodePointSetDataBorrowed}; | ||
| mod code_point_map; | ||
| pub use code_point_map::{CodePointMapData, CodePointMapDataBorrowed}; | ||
| mod emoji; | ||
| pub use emoji::{EmojiSetData, EmojiSetDataBorrowed}; | ||
| mod names; | ||
| pub use names::{ | ||
| PropertyNamesLong, PropertyNamesLongBorrowed, PropertyNamesShort, PropertyNamesShortBorrowed, | ||
| PropertyParser, PropertyParserBorrowed, | ||
| }; | ||
| mod runtime; | ||
| // NOTE: The Pernosco debugger has special knowledge | ||
| // of the `CanonicalCombiningClass` struct inside the `props` | ||
| // module. Please do not change the crate-module-qualified | ||
| // name of that struct without coordination. | ||
| pub mod props; | ||
| pub mod provider; | ||
| pub mod script; | ||
| mod bidi; | ||
| mod trievalue; | ||
| mod private { | ||
| pub trait Sealed {} | ||
| } |
| // This file is part of ICU4X. For terms of use, please see the file | ||
| // called LICENSE at the top level of the ICU4X source tree | ||
| // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). | ||
| use crate::props::*; | ||
| use crate::provider::names::*; | ||
| use core::marker::PhantomData; | ||
| use icu_collections::codepointtrie::TrieValue; | ||
| use icu_provider::marker::ErasedMarker; | ||
| use icu_provider::prelude::*; | ||
| use yoke::Yokeable; | ||
| use zerotrie::cursor::ZeroTrieSimpleAsciiCursor; | ||
| /// A struct capable of looking up a property value from a string name. | ||
| /// Access its data by calling [`Self::as_borrowed()`] and using the methods on | ||
| /// [`PropertyParserBorrowed`]. | ||
| /// | ||
| /// The name can be a short name (`Lu`), a long name(`Uppercase_Letter`), | ||
| /// or an alias. | ||
| /// | ||
| /// Property names can be looked up using "strict" matching (looking for a name | ||
| /// that matches exactly), or "loose matching", where the name is allowed to deviate | ||
| /// in terms of ASCII casing, whitespace, underscores, and hyphens. | ||
| /// | ||
| /// # Example | ||
| /// | ||
| /// ``` | ||
| /// use icu::properties::props::GeneralCategory; | ||
| /// use icu::properties::PropertyParser; | ||
| /// | ||
| /// let lookup = PropertyParser::<GeneralCategory>::new(); | ||
| /// // short name for value | ||
| /// assert_eq!( | ||
| /// lookup.get_strict("Lu"), | ||
| /// Some(GeneralCategory::UppercaseLetter) | ||
| /// ); | ||
| /// assert_eq!( | ||
| /// lookup.get_strict("Pd"), | ||
| /// Some(GeneralCategory::DashPunctuation) | ||
| /// ); | ||
| /// // long name for value | ||
| /// assert_eq!( | ||
| /// lookup.get_strict("Uppercase_Letter"), | ||
| /// Some(GeneralCategory::UppercaseLetter) | ||
| /// ); | ||
| /// assert_eq!( | ||
| /// lookup.get_strict("Dash_Punctuation"), | ||
| /// Some(GeneralCategory::DashPunctuation) | ||
| /// ); | ||
| /// // name has incorrect casing | ||
| /// assert_eq!(lookup.get_strict("dashpunctuation"), None); | ||
| /// // loose matching of name | ||
| /// assert_eq!( | ||
| /// lookup.get_loose("dash-punctuation"), | ||
| /// Some(GeneralCategory::DashPunctuation) | ||
| /// ); | ||
| /// // fake property | ||
| /// assert_eq!(lookup.get_strict("Animated_Gif"), None); | ||
| /// ``` | ||
| #[derive(Debug)] | ||
| pub struct PropertyParser<T> { | ||
| map: DataPayload<ErasedMarker<PropertyValueNameToEnumMap<'static>>>, | ||
| markers: PhantomData<fn() -> T>, | ||
| } | ||
| /// A borrowed wrapper around property value name-to-enum data, returned by | ||
| /// [`PropertyParser::as_borrowed()`]. More efficient to query. | ||
| #[derive(Debug)] | ||
| pub struct PropertyParserBorrowed<'a, T> { | ||
| map: &'a PropertyValueNameToEnumMap<'a>, | ||
| markers: PhantomData<fn() -> T>, | ||
| } | ||
| impl<T> Clone for PropertyParserBorrowed<'_, T> { | ||
| fn clone(&self) -> Self { | ||
| *self | ||
| } | ||
| } | ||
| impl<T> Copy for PropertyParserBorrowed<'_, T> {} | ||
| impl<T> PropertyParser<T> { | ||
| /// Creates a new instance of `PropertyParser<T>` using compiled data. | ||
| /// | ||
| /// ✨ *Enabled with the `compiled_data` Cargo feature.* | ||
| /// | ||
| /// [📚 Help choosing a constructor](icu_provider::constructors) | ||
| #[cfg(feature = "compiled_data")] | ||
| #[expect(clippy::new_ret_no_self)] | ||
| pub fn new() -> PropertyParserBorrowed<'static, T> | ||
| where | ||
| T: ParseableEnumeratedProperty, | ||
| { | ||
| PropertyParserBorrowed::new() | ||
| } | ||
| #[doc = icu_provider::gen_buffer_unstable_docs!(UNSTABLE, Self::new)] | ||
| pub fn try_new_unstable( | ||
| provider: &(impl DataProvider<T::DataMarker> + ?Sized), | ||
| ) -> Result<Self, DataError> | ||
| where | ||
| T: ParseableEnumeratedProperty, | ||
| { | ||
| Ok(Self { | ||
| map: provider.load(Default::default())?.payload.cast(), | ||
| markers: PhantomData, | ||
| }) | ||
| } | ||
| /// Construct a borrowed version of this type that can be queried. | ||
| /// | ||
| /// This avoids a potential small underlying cost per API call (like `get_strict()`) by consolidating it | ||
| /// up front. | ||
| #[inline] | ||
| pub fn as_borrowed(&self) -> PropertyParserBorrowed<'_, T> { | ||
| PropertyParserBorrowed { | ||
| map: self.map.get(), | ||
| markers: PhantomData, | ||
| } | ||
| } | ||
| #[doc(hidden)] // used by FFI code | ||
| pub fn erase(self) -> PropertyParser<u16> { | ||
| PropertyParser { | ||
| map: self.map.cast(), | ||
| markers: PhantomData, | ||
| } | ||
| } | ||
| } | ||
| impl<T: TrieValue> PropertyParserBorrowed<'_, T> { | ||
| /// Get the property value as a u16, doing a strict search looking for | ||
| /// names that match exactly | ||
| /// | ||
| /// # Example | ||
| /// | ||
| /// ``` | ||
| /// use icu::properties::props::GeneralCategory; | ||
| /// use icu::properties::PropertyParser; | ||
| /// | ||
| /// let lookup = PropertyParser::<GeneralCategory>::new(); | ||
| /// assert_eq!( | ||
| /// lookup.get_strict_u16("Lu"), | ||
| /// Some(GeneralCategory::UppercaseLetter as u16) | ||
| /// ); | ||
| /// assert_eq!( | ||
| /// lookup.get_strict_u16("Uppercase_Letter"), | ||
| /// Some(GeneralCategory::UppercaseLetter as u16) | ||
| /// ); | ||
| /// // does not do loose matching | ||
| /// assert_eq!(lookup.get_strict_u16("UppercaseLetter"), None); | ||
| /// ``` | ||
| #[inline] | ||
| pub fn get_strict_u16(self, name: &str) -> Option<u16> { | ||
| get_strict_u16(self.map, name) | ||
| } | ||
| /// Get the property value as a `T`, doing a strict search looking for | ||
| /// names that match exactly | ||
| /// | ||
| /// # Example | ||
| /// | ||
| /// ``` | ||
| /// use icu::properties::props::GeneralCategory; | ||
| /// use icu::properties::PropertyParser; | ||
| /// | ||
| /// let lookup = PropertyParser::<GeneralCategory>::new(); | ||
| /// assert_eq!( | ||
| /// lookup.get_strict("Lu"), | ||
| /// Some(GeneralCategory::UppercaseLetter) | ||
| /// ); | ||
| /// assert_eq!( | ||
| /// lookup.get_strict("Uppercase_Letter"), | ||
| /// Some(GeneralCategory::UppercaseLetter) | ||
| /// ); | ||
| /// // does not do loose matching | ||
| /// assert_eq!(lookup.get_strict("UppercaseLetter"), None); | ||
| /// ``` | ||
| #[inline] | ||
| pub fn get_strict(self, name: &str) -> Option<T> { | ||
| T::try_from_u32(self.get_strict_u16(name)? as u32).ok() | ||
| } | ||
| /// Get the property value as a u16, doing a loose search looking for | ||
| /// names that match case-insensitively, ignoring ASCII hyphens, underscores, and | ||
| /// whitespaces. | ||
| /// | ||
| /// # Example | ||
| /// | ||
| /// ``` | ||
| /// use icu::properties::props::GeneralCategory; | ||
| /// use icu::properties::PropertyParser; | ||
| /// | ||
| /// let lookup = PropertyParser::<GeneralCategory>::new(); | ||
| /// assert_eq!( | ||
| /// lookup.get_loose_u16("Lu"), | ||
| /// Some(GeneralCategory::UppercaseLetter as u16) | ||
| /// ); | ||
| /// assert_eq!( | ||
| /// lookup.get_loose_u16("Uppercase_Letter"), | ||
| /// Some(GeneralCategory::UppercaseLetter as u16) | ||
| /// ); | ||
| /// // does do loose matching | ||
| /// assert_eq!( | ||
| /// lookup.get_loose_u16("UppercaseLetter"), | ||
| /// Some(GeneralCategory::UppercaseLetter as u16) | ||
| /// ); | ||
| /// ``` | ||
| #[inline] | ||
| pub fn get_loose_u16(self, name: &str) -> Option<u16> { | ||
| get_loose_u16(self.map, name) | ||
| } | ||
| /// Get the property value as a `T`, doing a loose search looking for | ||
| /// names that match case-insensitively, ignoring ASCII hyphens, underscores, and | ||
| /// whitespaces. | ||
| /// | ||
| /// # Example | ||
| /// | ||
| /// ``` | ||
| /// use icu::properties::props::GeneralCategory; | ||
| /// use icu::properties::PropertyParser; | ||
| /// | ||
| /// let lookup = PropertyParser::<GeneralCategory>::new(); | ||
| /// assert_eq!( | ||
| /// lookup.get_loose("Lu"), | ||
| /// Some(GeneralCategory::UppercaseLetter) | ||
| /// ); | ||
| /// assert_eq!( | ||
| /// lookup.get_loose("Uppercase_Letter"), | ||
| /// Some(GeneralCategory::UppercaseLetter) | ||
| /// ); | ||
| /// // does do loose matching | ||
| /// assert_eq!( | ||
| /// lookup.get_loose("UppercaseLetter"), | ||
| /// Some(GeneralCategory::UppercaseLetter) | ||
| /// ); | ||
| /// ``` | ||
| #[inline] | ||
| pub fn get_loose(self, name: &str) -> Option<T> { | ||
| T::try_from_u32(self.get_loose_u16(name)? as u32).ok() | ||
| } | ||
| } | ||
| #[cfg(feature = "compiled_data")] | ||
| impl<T: ParseableEnumeratedProperty> Default for PropertyParserBorrowed<'static, T> { | ||
| fn default() -> Self { | ||
| Self::new() | ||
| } | ||
| } | ||
| impl<T: TrieValue> PropertyParserBorrowed<'static, T> { | ||
| /// Creates a new instance of `PropertyParserBorrowed<T>` using compiled data. | ||
| /// | ||
| /// ✨ *Enabled with the `compiled_data` Cargo feature.* | ||
| /// | ||
| /// [📚 Help choosing a constructor](icu_provider::constructors) | ||
| #[cfg(feature = "compiled_data")] | ||
| pub fn new() -> Self | ||
| where | ||
| T: ParseableEnumeratedProperty, | ||
| { | ||
| Self { | ||
| map: T::SINGLETON, | ||
| markers: PhantomData, | ||
| } | ||
| } | ||
| /// Cheaply converts a [`PropertyParserBorrowed<'static>`] into a [`PropertyParser`]. | ||
| /// | ||
| /// Note: Due to branching and indirection, using [`PropertyParser`] might inhibit some | ||
| /// compile-time optimizations that are possible with [`PropertyParserBorrowed`]. | ||
| pub const fn static_to_owned(self) -> PropertyParser<T> { | ||
| PropertyParser { | ||
| map: DataPayload::from_static_ref(self.map), | ||
| markers: PhantomData, | ||
| } | ||
| } | ||
| } | ||
| /// Avoid monomorphizing multiple copies of this function | ||
| fn get_strict_u16(payload: &PropertyValueNameToEnumMap<'_>, name: &str) -> Option<u16> { | ||
| payload.map.get(name).and_then(|i| i.try_into().ok()) | ||
| } | ||
| /// Avoid monomorphizing multiple copies of this function | ||
| fn get_loose_u16(payload: &PropertyValueNameToEnumMap<'_>, name: &str) -> Option<u16> { | ||
| fn recurse(mut cursor: ZeroTrieSimpleAsciiCursor, mut rest: &[u8]) -> Option<usize> { | ||
| if cursor.is_empty() { | ||
| return None; | ||
| } | ||
| // Skip whitespace, underscore, hyphen in trie. | ||
| for skip in [b'\t', b'\n', b'\x0C', b'\r', b' ', 0x0B, b'_', b'-'] { | ||
| let mut skip_cursor = cursor.clone(); | ||
| skip_cursor.step(skip); | ||
| if let Some(r) = recurse(skip_cursor, rest) { | ||
| return Some(r); | ||
| } | ||
| } | ||
| let ascii = loop { | ||
| let Some((&a, r)) = rest.split_first() else { | ||
| return cursor.take_value(); | ||
| }; | ||
| rest = r; | ||
| // Skip whitespace, underscore, hyphen in input | ||
| if !matches!( | ||
| a, | ||
| b'\t' | b'\n' | b'\x0C' | b'\r' | b' ' | 0x0B | b'_' | b'-' | ||
| ) { | ||
| break a; | ||
| } | ||
| }; | ||
| let mut other_case_cursor = cursor.clone(); | ||
| cursor.step(ascii); | ||
| other_case_cursor.step(if ascii.is_ascii_lowercase() { | ||
| ascii.to_ascii_uppercase() | ||
| } else { | ||
| ascii.to_ascii_lowercase() | ||
| }); | ||
| // This uses the call stack as the DFS stack. The recursion will terminate as | ||
| // rest's length is strictly shrinking. The call stack's depth is limited by | ||
| // name.len(). | ||
| recurse(cursor, rest).or_else(|| recurse(other_case_cursor, rest)) | ||
| } | ||
| recurse(payload.map.cursor(), name.as_bytes()).and_then(|i| i.try_into().ok()) | ||
| } | ||
| /// A struct capable of looking up a property name from a value | ||
| /// Access its data by calling [`Self::as_borrowed()`] and using the methods on | ||
| /// [`PropertyNamesLongBorrowed`]. | ||
| /// | ||
| /// # Example | ||
| /// | ||
| /// ``` | ||
| /// use icu::properties::props::CanonicalCombiningClass; | ||
| /// use icu::properties::PropertyNamesLong; | ||
| /// | ||
| /// let names = PropertyNamesLong::<CanonicalCombiningClass>::new(); | ||
| /// assert_eq!( | ||
| /// names.get(CanonicalCombiningClass::KanaVoicing), | ||
| /// Some("Kana_Voicing") | ||
| /// ); | ||
| /// assert_eq!( | ||
| /// names.get(CanonicalCombiningClass::AboveLeft), | ||
| /// Some("Above_Left") | ||
| /// ); | ||
| /// ``` | ||
| pub struct PropertyNamesLong<T: NamedEnumeratedProperty> { | ||
| map: DataPayload<ErasedMarker<T::DataStructLong>>, | ||
| } | ||
| impl<T: NamedEnumeratedProperty> core::fmt::Debug for PropertyNamesLong<T> { | ||
| fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { | ||
| f.debug_struct("PropertyNamesLong") | ||
| // .field("map", &self.map) | ||
| .finish() | ||
| } | ||
| } | ||
| /// A borrowed wrapper around property value name-to-enum data, returned by | ||
| /// [`PropertyNamesLong::as_borrowed()`]. More efficient to query. | ||
| #[derive(Debug)] | ||
| pub struct PropertyNamesLongBorrowed<'a, T: NamedEnumeratedProperty> { | ||
| map: &'a T::DataStructLongBorrowed<'a>, | ||
| } | ||
| impl<T: NamedEnumeratedProperty> Clone for PropertyNamesLongBorrowed<'_, T> { | ||
| fn clone(&self) -> Self { | ||
| *self | ||
| } | ||
| } | ||
| impl<T: NamedEnumeratedProperty> Copy for PropertyNamesLongBorrowed<'_, T> {} | ||
| impl<T: NamedEnumeratedProperty> PropertyNamesLong<T> { | ||
| /// Creates a new instance of `PropertyNamesLongBorrowed<T>`. | ||
| /// | ||
| /// ✨ *Enabled with the `compiled_data` Cargo feature.* | ||
| /// | ||
| /// [📚 Help choosing a constructor](icu_provider::constructors) | ||
| #[cfg(feature = "compiled_data")] | ||
| #[expect(clippy::new_ret_no_self)] | ||
| pub fn new() -> PropertyNamesLongBorrowed<'static, T> { | ||
| PropertyNamesLongBorrowed::new() | ||
| } | ||
| #[doc = icu_provider::gen_buffer_unstable_docs!(UNSTABLE, Self::new)] | ||
| pub fn try_new_unstable( | ||
| provider: &(impl DataProvider<T::DataMarkerLong> + ?Sized), | ||
| ) -> Result<Self, DataError> { | ||
| Ok(Self { | ||
| map: provider.load(Default::default())?.payload.cast(), | ||
| }) | ||
| } | ||
| /// Construct a borrowed version of this type that can be queried. | ||
| /// | ||
| /// This avoids a potential small underlying cost per API call (like `get_static()`) by consolidating it | ||
| /// up front. | ||
| #[inline] | ||
| pub fn as_borrowed(&self) -> PropertyNamesLongBorrowed<'_, T> { | ||
| PropertyNamesLongBorrowed { | ||
| map: T::nep_long_identity(self.map.get()), | ||
| } | ||
| } | ||
| } | ||
| impl<'a, T: NamedEnumeratedProperty> PropertyNamesLongBorrowed<'a, T> { | ||
| /// Get the property name given a value | ||
| /// | ||
| /// # Example | ||
| /// | ||
| /// ```rust | ||
| /// use icu::properties::props::CanonicalCombiningClass; | ||
| /// use icu::properties::PropertyNamesLong; | ||
| /// | ||
| /// let lookup = PropertyNamesLong::<CanonicalCombiningClass>::new(); | ||
| /// assert_eq!( | ||
| /// lookup.get(CanonicalCombiningClass::KanaVoicing), | ||
| /// Some("Kana_Voicing") | ||
| /// ); | ||
| /// assert_eq!( | ||
| /// lookup.get(CanonicalCombiningClass::AboveLeft), | ||
| /// Some("Above_Left") | ||
| /// ); | ||
| /// ``` | ||
| #[inline] | ||
| pub fn get(self, property: T) -> Option<&'a str> { | ||
| self.map.get(property.to_u32()) | ||
| } | ||
| } | ||
| #[cfg(feature = "compiled_data")] | ||
| impl<T: NamedEnumeratedProperty> Default for PropertyNamesLongBorrowed<'static, T> { | ||
| fn default() -> Self { | ||
| Self::new() | ||
| } | ||
| } | ||
| impl<T: NamedEnumeratedProperty> PropertyNamesLongBorrowed<'static, T> { | ||
| /// Creates a new instance of `PropertyNamesLongBorrowed<T>`. | ||
| /// | ||
| /// ✨ *Enabled with the `compiled_data` Cargo feature.* | ||
| /// | ||
| /// [📚 Help choosing a constructor](icu_provider::constructors) | ||
| #[cfg(feature = "compiled_data")] | ||
| pub fn new() -> Self { | ||
| Self { | ||
| map: T::SINGLETON_LONG, | ||
| } | ||
| } | ||
| /// Cheaply converts a [`PropertyNamesLongBorrowed<'static>`] into a [`PropertyNamesLong`]. | ||
| /// | ||
| /// Note: Due to branching and indirection, using [`PropertyNamesLong`] might inhibit some | ||
| /// compile-time optimizations that are possible with [`PropertyNamesLongBorrowed`]. | ||
| /// | ||
| /// This is currently not `const` unlike other `static_to_owned()` functions since it needs | ||
| /// const traits to do that safely | ||
| pub fn static_to_owned(self) -> PropertyNamesLong<T> { | ||
| PropertyNamesLong { | ||
| map: DataPayload::from_static_ref(T::nep_long_identity_static(self.map)), | ||
| } | ||
| } | ||
| } | ||
| /// A struct capable of looking up a property name from a value | ||
| /// Access its data by calling [`Self::as_borrowed()`] and using the methods on | ||
| /// [`PropertyNamesShortBorrowed`]. | ||
| /// | ||
| /// # Example | ||
| /// | ||
| /// ``` | ||
| /// use icu::properties::props::CanonicalCombiningClass; | ||
| /// use icu::properties::PropertyNamesShort; | ||
| /// | ||
| /// let names = PropertyNamesShort::<CanonicalCombiningClass>::new(); | ||
| /// assert_eq!(names.get(CanonicalCombiningClass::KanaVoicing), Some("KV")); | ||
| /// assert_eq!(names.get(CanonicalCombiningClass::AboveLeft), Some("AL")); | ||
| /// ``` | ||
| pub struct PropertyNamesShort<T: NamedEnumeratedProperty> { | ||
| map: DataPayload<ErasedMarker<T::DataStructShort>>, | ||
| } | ||
| impl<T: NamedEnumeratedProperty> core::fmt::Debug for PropertyNamesShort<T> { | ||
| fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { | ||
| f.debug_struct("PropertyNamesShort") | ||
| // .field("map", &self.map) | ||
| .finish() | ||
| } | ||
| } | ||
| /// A borrowed wrapper around property value name-to-enum data, returned by | ||
| /// [`PropertyNamesShort::as_borrowed()`]. More efficient to query. | ||
| #[derive(Debug)] | ||
| pub struct PropertyNamesShortBorrowed<'a, T: NamedEnumeratedProperty> { | ||
| map: &'a T::DataStructShortBorrowed<'a>, | ||
| } | ||
| impl<T: NamedEnumeratedProperty> Clone for PropertyNamesShortBorrowed<'_, T> { | ||
| fn clone(&self) -> Self { | ||
| *self | ||
| } | ||
| } | ||
| impl<T: NamedEnumeratedProperty> Copy for PropertyNamesShortBorrowed<'_, T> {} | ||
| impl<T: NamedEnumeratedProperty> PropertyNamesShort<T> { | ||
| /// Creates a new instance of `PropertyNamesShortBorrowed<T>`. | ||
| /// | ||
| /// ✨ *Enabled with the `compiled_data` Cargo feature.* | ||
| /// | ||
| /// [📚 Help choosing a constructor](icu_provider::constructors) | ||
| #[cfg(feature = "compiled_data")] | ||
| #[expect(clippy::new_ret_no_self)] | ||
| pub fn new() -> PropertyNamesShortBorrowed<'static, T> { | ||
| PropertyNamesShortBorrowed::new() | ||
| } | ||
| #[doc = icu_provider::gen_buffer_unstable_docs!(UNSTABLE, Self::new)] | ||
| pub fn try_new_unstable( | ||
| provider: &(impl DataProvider<T::DataMarkerShort> + ?Sized), | ||
| ) -> Result<Self, DataError> { | ||
| Ok(Self { | ||
| map: provider.load(Default::default())?.payload.cast(), | ||
| }) | ||
| } | ||
| /// Construct a borrowed version of this type that can be queried. | ||
| /// | ||
| /// This avoids a potential small underlying cost per API call (like `get_static()`) by consolidating it | ||
| /// up front. | ||
| #[inline] | ||
| pub fn as_borrowed(&self) -> PropertyNamesShortBorrowed<'_, T> { | ||
| PropertyNamesShortBorrowed { | ||
| map: T::nep_short_identity(self.map.get()), | ||
| } | ||
| } | ||
| } | ||
| impl<'a, T: NamedEnumeratedProperty> PropertyNamesShortBorrowed<'a, T> { | ||
| /// Get the property name given a value | ||
| /// | ||
| /// # Example | ||
| /// | ||
| /// ```rust | ||
| /// use icu::properties::props::CanonicalCombiningClass; | ||
| /// use icu::properties::PropertyNamesShort; | ||
| /// | ||
| /// let lookup = PropertyNamesShort::<CanonicalCombiningClass>::new(); | ||
| /// assert_eq!(lookup.get(CanonicalCombiningClass::KanaVoicing), Some("KV")); | ||
| /// assert_eq!(lookup.get(CanonicalCombiningClass::AboveLeft), Some("AL")); | ||
| /// ``` | ||
| #[inline] | ||
| pub fn get(self, property: T) -> Option<&'a str> { | ||
| self.map.get(property.to_u32()) | ||
| } | ||
| } | ||
| impl PropertyNamesShortBorrowed<'_, Script> { | ||
| /// Gets the "name" of a script property as a `icu::locale::subtags::Script`. | ||
| /// | ||
| /// This method is available only on `PropertyNamesShortBorrowed<Script>`. | ||
| /// | ||
| /// # Example | ||
| /// | ||
| /// ```rust | ||
| /// use icu::locale::subtags::script; | ||
| /// use icu::properties::props::Script; | ||
| /// use icu::properties::PropertyNamesShort; | ||
| /// | ||
| /// let lookup = PropertyNamesShort::<Script>::new(); | ||
| /// assert_eq!( | ||
| /// lookup.get_locale_script(Script::Brahmi), | ||
| /// Some(script!("Brah")) | ||
| /// ); | ||
| /// assert_eq!( | ||
| /// lookup.get_locale_script(Script::Hangul), | ||
| /// Some(script!("Hang")) | ||
| /// ); | ||
| /// ``` | ||
| /// | ||
| /// For the reverse direction, use property parsing as normal: | ||
| /// ``` | ||
| /// use icu::locale::subtags::script; | ||
| /// use icu::properties::props::Script; | ||
| /// use icu::properties::PropertyParser; | ||
| /// | ||
| /// let parser = PropertyParser::<Script>::new(); | ||
| /// assert_eq!( | ||
| /// parser.get_strict(script!("Brah").as_str()), | ||
| /// Some(Script::Brahmi) | ||
| /// ); | ||
| /// assert_eq!( | ||
| /// parser.get_strict(script!("Hang").as_str()), | ||
| /// Some(Script::Hangul) | ||
| /// ); | ||
| /// ``` | ||
| #[inline] | ||
| pub fn get_locale_script(self, property: Script) -> Option<icu_locale_core::subtags::Script> { | ||
| let prop = usize::try_from(property.to_u32()).ok()?; | ||
| self.map.map.get(prop).and_then(|o| o.0) | ||
| } | ||
| } | ||
| #[cfg(feature = "compiled_data")] | ||
| impl<T: NamedEnumeratedProperty> Default for PropertyNamesShortBorrowed<'static, T> { | ||
| fn default() -> Self { | ||
| Self::new() | ||
| } | ||
| } | ||
| impl<T: NamedEnumeratedProperty> PropertyNamesShortBorrowed<'static, T> { | ||
| /// Creates a new instance of `PropertyNamesShortBorrowed<T>`. | ||
| /// | ||
| /// ✨ *Enabled with the `compiled_data` Cargo feature.* | ||
| /// | ||
| /// [📚 Help choosing a constructor](icu_provider::constructors) | ||
| #[cfg(feature = "compiled_data")] | ||
| pub fn new() -> Self { | ||
| Self { | ||
| map: T::SINGLETON_SHORT, | ||
| } | ||
| } | ||
| /// Cheaply converts a [`PropertyNamesShortBorrowed<'static>`] into a [`PropertyNamesShort`]. | ||
| /// | ||
| /// Note: Due to branching and indirection, using [`PropertyNamesShort`] might inhibit some | ||
| /// compile-time optimizations that are possible with [`PropertyNamesShortBorrowed`]. | ||
| /// | ||
| /// This is currently not `const` unlike other `static_to_owned()` functions since it needs | ||
| /// const traits to do that safely | ||
| pub fn static_to_owned(self) -> PropertyNamesShort<T> { | ||
| PropertyNamesShort { | ||
| map: DataPayload::from_static_ref(T::nep_short_identity_static(self.map)), | ||
| } | ||
| } | ||
| } | ||
| /// A property whose value names can be parsed from strings. | ||
| pub trait ParseableEnumeratedProperty: crate::private::Sealed + TrieValue { | ||
| #[doc(hidden)] | ||
| type DataMarker: DataMarker<DataStruct = PropertyValueNameToEnumMap<'static>>; | ||
| #[doc(hidden)] | ||
| #[cfg(feature = "compiled_data")] | ||
| const SINGLETON: &'static PropertyValueNameToEnumMap<'static>; | ||
| } | ||
| // Abstract over Linear/Sparse/Script representation | ||
| // This trait is implicitly sealed by not being exported. | ||
| pub trait PropertyEnumToValueNameLookup { | ||
| fn get(&self, prop: u32) -> Option<&str>; | ||
| } | ||
| impl PropertyEnumToValueNameLookup for PropertyEnumToValueNameLinearMap<'_> { | ||
| fn get(&self, prop: u32) -> Option<&str> { | ||
| self.map.get(usize::try_from(prop).ok()?) | ||
| } | ||
| } | ||
| #[cfg(feature = "alloc")] | ||
| impl PropertyEnumToValueNameLookup for PropertyEnumToValueNameSparseMap<'_> { | ||
| fn get(&self, prop: u32) -> Option<&str> { | ||
| self.map.get(&u16::try_from(prop).ok()?) | ||
| } | ||
| } | ||
| impl PropertyEnumToValueNameLookup for PropertyScriptToIcuScriptMap<'_> { | ||
| fn get(&self, prop: u32) -> Option<&str> { | ||
| self.map | ||
| .get_ule_ref(usize::try_from(prop).ok()?) | ||
| .and_then(|no| no.as_ref()) | ||
| .map(|s| s.as_str()) | ||
| } | ||
| } | ||
| /// A property whose value names can be represented as strings. | ||
| pub trait NamedEnumeratedProperty: ParseableEnumeratedProperty { | ||
| #[doc(hidden)] | ||
| type DataStructLong: 'static | ||
| + for<'a> Yokeable<'a, Output = Self::DataStructLongBorrowed<'a>> | ||
| + PropertyEnumToValueNameLookup; | ||
| #[doc(hidden)] | ||
| type DataStructShort: 'static | ||
| + for<'a> Yokeable<'a, Output = Self::DataStructShortBorrowed<'a>> | ||
| + PropertyEnumToValueNameLookup; | ||
| #[doc(hidden)] | ||
| type DataStructLongBorrowed<'a>: PropertyEnumToValueNameLookup; | ||
| #[doc(hidden)] | ||
| type DataStructShortBorrowed<'a>: PropertyEnumToValueNameLookup; | ||
| #[doc(hidden)] | ||
| type DataMarkerLong: DataMarker<DataStruct = Self::DataStructLong>; | ||
| #[doc(hidden)] | ||
| type DataMarkerShort: DataMarker<DataStruct = Self::DataStructShort>; | ||
| #[doc(hidden)] | ||
| #[cfg(feature = "compiled_data")] | ||
| const SINGLETON_LONG: &'static Self::DataStructLongBorrowed<'static>; | ||
| #[doc(hidden)] | ||
| #[cfg(feature = "compiled_data")] | ||
| const SINGLETON_SHORT: &'static Self::DataStructShortBorrowed<'static>; | ||
| // These wouldn't be necessary if Yoke used GATs (#6057) | ||
| #[doc(hidden)] | ||
| fn nep_long_identity<'a>( | ||
| stat: &'a <Self::DataStructLong as Yokeable<'a>>::Output, | ||
| ) -> &'a Self::DataStructLongBorrowed<'a>; | ||
| #[doc(hidden)] | ||
| fn nep_long_identity_static( | ||
| stat: &'static Self::DataStructLongBorrowed<'static>, | ||
| ) -> &'static Self::DataStructLong; | ||
| #[doc(hidden)] | ||
| fn nep_short_identity<'a>( | ||
| stat: &'a <Self::DataStructShort as Yokeable<'a>>::Output, | ||
| ) -> &'a Self::DataStructShortBorrowed<'a>; | ||
| #[doc(hidden)] | ||
| fn nep_short_identity_static( | ||
| stat: &'static Self::DataStructShortBorrowed<'static>, | ||
| ) -> &'static Self::DataStructShort; | ||
| /// Convenience method for `PropertyParser::new().get_loose(s)` | ||
| /// | ||
| /// ✨ *Enabled with the `compiled_data` Cargo feature.* | ||
| #[cfg(feature = "compiled_data")] | ||
| fn try_from_str(s: &str) -> Option<Self> { | ||
| PropertyParser::new().get_loose(s) | ||
| } | ||
| /// Convenience method for `PropertyNamesLong::new().get(*self).unwrap()` | ||
| /// | ||
| /// ✨ *Enabled with the `compiled_data` Cargo feature.* | ||
| #[cfg(feature = "compiled_data")] | ||
| fn long_name(&self) -> &'static str { | ||
| PropertyNamesLong::new().get(*self).unwrap_or("unreachable") | ||
| } | ||
| /// Convenience method for `PropertyNamesShort::new().get(*self).unwrap()` | ||
| /// | ||
| /// ✨ *Enabled with the `compiled_data` Cargo feature.* | ||
| #[cfg(feature = "compiled_data")] | ||
| fn short_name(&self) -> &'static str { | ||
| PropertyNamesShort::new() | ||
| .get(*self) | ||
| .unwrap_or("unreachable") | ||
| } | ||
| } | ||
| macro_rules! impl_value_getter { | ||
| ( | ||
| impl $ty:ident { | ||
| $marker_n2e:ident / $singleton_n2e:ident; | ||
| $( | ||
| $(#[$meta:meta])* | ||
| $data_struct_s:ident / $marker_e2sn:ident / $singleton_e2sn:ident; | ||
| $data_struct_l:ident / $marker_e2ln:ident / $singleton_e2ln:ident; | ||
| )? | ||
| } | ||
| ) => { | ||
| impl ParseableEnumeratedProperty for $ty { | ||
| type DataMarker = $marker_n2e; | ||
| #[cfg(feature = "compiled_data")] | ||
| const SINGLETON: &'static PropertyValueNameToEnumMap<'static> = crate::provider::Baked::$singleton_n2e; | ||
| } | ||
| $( | ||
| $(#[$meta])* | ||
| impl NamedEnumeratedProperty for $ty { | ||
| type DataStructLong = $data_struct_l<'static>; | ||
| type DataStructShort = $data_struct_s<'static>; | ||
| type DataStructLongBorrowed<'a> = $data_struct_l<'a>; | ||
| type DataStructShortBorrowed<'a> = $data_struct_s<'a>; | ||
| type DataMarkerLong = crate::provider::$marker_e2ln; | ||
| type DataMarkerShort = crate::provider::$marker_e2sn; | ||
| #[cfg(feature = "compiled_data")] | ||
| const SINGLETON_LONG: &'static Self::DataStructLong = crate::provider::Baked::$singleton_e2ln; | ||
| #[cfg(feature = "compiled_data")] | ||
| const SINGLETON_SHORT: &'static Self::DataStructShort = crate::provider::Baked::$singleton_e2sn; | ||
| fn nep_long_identity<'a>(yoked: &'a $data_struct_l<'a>) -> &'a Self::DataStructLongBorrowed<'a> { | ||
| yoked | ||
| } | ||
| fn nep_long_identity_static(stat: &'static $data_struct_l<'static>) -> &'static $data_struct_l<'static> { | ||
| stat | ||
| } | ||
| fn nep_short_identity<'a>(yoked: &'a $data_struct_s<'a>) -> &'a Self::DataStructShortBorrowed<'a> { | ||
| yoked | ||
| } | ||
| fn nep_short_identity_static(stat: &'static $data_struct_s<'static>) -> &'static $data_struct_s<'static> { | ||
| stat | ||
| } | ||
| } | ||
| )? | ||
| }; | ||
| } | ||
| impl_value_getter! { | ||
| impl BidiClass { | ||
| PropertyNameParseBidiClassV1 / SINGLETON_PROPERTY_NAME_PARSE_BIDI_CLASS_V1; | ||
| PropertyEnumToValueNameLinearMap / PropertyNameShortBidiClassV1 / SINGLETON_PROPERTY_NAME_SHORT_BIDI_CLASS_V1; | ||
| PropertyEnumToValueNameLinearMap / PropertyNameLongBidiClassV1 / SINGLETON_PROPERTY_NAME_LONG_BIDI_CLASS_V1; | ||
| } | ||
| } | ||
| impl_value_getter! { | ||
| impl GeneralCategory { | ||
| PropertyNameParseGeneralCategoryV1 / SINGLETON_PROPERTY_NAME_PARSE_GENERAL_CATEGORY_V1; | ||
| PropertyEnumToValueNameLinearMap / PropertyNameShortGeneralCategoryV1 / SINGLETON_PROPERTY_NAME_SHORT_GENERAL_CATEGORY_V1; | ||
| PropertyEnumToValueNameLinearMap / PropertyNameLongGeneralCategoryV1 / SINGLETON_PROPERTY_NAME_LONG_GENERAL_CATEGORY_V1; | ||
| } | ||
| } | ||
| impl_value_getter! { | ||
| impl GeneralCategoryGroup { | ||
| PropertyNameParseGeneralCategoryMaskV1 / SINGLETON_PROPERTY_NAME_PARSE_GENERAL_CATEGORY_MASK_V1; | ||
| } | ||
| } | ||
| impl_value_getter! { | ||
| impl Script { | ||
| PropertyNameParseScriptV1 / SINGLETON_PROPERTY_NAME_PARSE_SCRIPT_V1; | ||
| PropertyScriptToIcuScriptMap / PropertyNameShortScriptV1 / SINGLETON_PROPERTY_NAME_SHORT_SCRIPT_V1; | ||
| PropertyEnumToValueNameLinearMap / PropertyNameLongScriptV1 / SINGLETON_PROPERTY_NAME_LONG_SCRIPT_V1; | ||
| } | ||
| } | ||
| impl_value_getter! { | ||
| impl HangulSyllableType { | ||
| PropertyNameParseHangulSyllableTypeV1 / SINGLETON_PROPERTY_NAME_PARSE_HANGUL_SYLLABLE_TYPE_V1; | ||
| PropertyEnumToValueNameLinearMap / PropertyNameShortHangulSyllableTypeV1 / SINGLETON_PROPERTY_NAME_SHORT_HANGUL_SYLLABLE_TYPE_V1; | ||
| PropertyEnumToValueNameLinearMap / PropertyNameLongHangulSyllableTypeV1 / SINGLETON_PROPERTY_NAME_LONG_HANGUL_SYLLABLE_TYPE_V1; | ||
| } | ||
| } | ||
| impl_value_getter! { | ||
| impl EastAsianWidth { | ||
| PropertyNameParseEastAsianWidthV1 / SINGLETON_PROPERTY_NAME_PARSE_EAST_ASIAN_WIDTH_V1; | ||
| PropertyEnumToValueNameLinearMap / PropertyNameShortEastAsianWidthV1 / SINGLETON_PROPERTY_NAME_SHORT_EAST_ASIAN_WIDTH_V1; | ||
| PropertyEnumToValueNameLinearMap / PropertyNameLongEastAsianWidthV1 / SINGLETON_PROPERTY_NAME_LONG_EAST_ASIAN_WIDTH_V1; | ||
| } | ||
| } | ||
| impl_value_getter! { | ||
| impl LineBreak { | ||
| PropertyNameParseLineBreakV1 / SINGLETON_PROPERTY_NAME_PARSE_LINE_BREAK_V1; | ||
| PropertyEnumToValueNameLinearMap / PropertyNameShortLineBreakV1 / SINGLETON_PROPERTY_NAME_SHORT_LINE_BREAK_V1; | ||
| PropertyEnumToValueNameLinearMap / PropertyNameLongLineBreakV1 / SINGLETON_PROPERTY_NAME_LONG_LINE_BREAK_V1; | ||
| } | ||
| } | ||
| impl_value_getter! { | ||
| impl GraphemeClusterBreak { | ||
| PropertyNameParseGraphemeClusterBreakV1 / SINGLETON_PROPERTY_NAME_PARSE_GRAPHEME_CLUSTER_BREAK_V1; | ||
| PropertyEnumToValueNameLinearMap / PropertyNameShortGraphemeClusterBreakV1 / SINGLETON_PROPERTY_NAME_SHORT_GRAPHEME_CLUSTER_BREAK_V1; | ||
| PropertyEnumToValueNameLinearMap / PropertyNameLongGraphemeClusterBreakV1 / SINGLETON_PROPERTY_NAME_LONG_GRAPHEME_CLUSTER_BREAK_V1; | ||
| } | ||
| } | ||
| impl_value_getter! { | ||
| impl WordBreak { | ||
| PropertyNameParseWordBreakV1 / SINGLETON_PROPERTY_NAME_PARSE_WORD_BREAK_V1; | ||
| PropertyEnumToValueNameLinearMap / PropertyNameShortWordBreakV1 / SINGLETON_PROPERTY_NAME_SHORT_WORD_BREAK_V1; | ||
| PropertyEnumToValueNameLinearMap / PropertyNameLongWordBreakV1 / SINGLETON_PROPERTY_NAME_LONG_WORD_BREAK_V1; | ||
| } | ||
| } | ||
| impl_value_getter! { | ||
| impl SentenceBreak { | ||
| PropertyNameParseSentenceBreakV1 / SINGLETON_PROPERTY_NAME_PARSE_SENTENCE_BREAK_V1; | ||
| PropertyEnumToValueNameLinearMap / PropertyNameShortSentenceBreakV1 / SINGLETON_PROPERTY_NAME_SHORT_SENTENCE_BREAK_V1; | ||
| PropertyEnumToValueNameLinearMap / PropertyNameLongSentenceBreakV1 / SINGLETON_PROPERTY_NAME_LONG_SENTENCE_BREAK_V1; | ||
| } | ||
| } | ||
| impl_value_getter! { | ||
| impl CanonicalCombiningClass { | ||
| PropertyNameParseCanonicalCombiningClassV1 / SINGLETON_PROPERTY_NAME_PARSE_CANONICAL_COMBINING_CLASS_V1; | ||
| #[cfg(feature = "alloc")] | ||
| /// ✨ *Enabled with the `alloc` Cargo feature.* | ||
| PropertyEnumToValueNameSparseMap / PropertyNameShortCanonicalCombiningClassV1 / SINGLETON_PROPERTY_NAME_SHORT_CANONICAL_COMBINING_CLASS_V1; | ||
| PropertyEnumToValueNameSparseMap / PropertyNameLongCanonicalCombiningClassV1 / SINGLETON_PROPERTY_NAME_LONG_CANONICAL_COMBINING_CLASS_V1; | ||
| } | ||
| } | ||
| impl_value_getter! { | ||
| impl IndicSyllabicCategory { | ||
| PropertyNameParseIndicSyllabicCategoryV1 / SINGLETON_PROPERTY_NAME_PARSE_INDIC_SYLLABIC_CATEGORY_V1; | ||
| PropertyEnumToValueNameLinearMap / PropertyNameShortIndicSyllabicCategoryV1 / SINGLETON_PROPERTY_NAME_SHORT_INDIC_SYLLABIC_CATEGORY_V1; | ||
| PropertyEnumToValueNameLinearMap / PropertyNameLongIndicSyllabicCategoryV1 / SINGLETON_PROPERTY_NAME_LONG_INDIC_SYLLABIC_CATEGORY_V1; | ||
| } | ||
| } | ||
| impl_value_getter! { | ||
| impl JoiningType { | ||
| PropertyNameParseJoiningTypeV1 / SINGLETON_PROPERTY_NAME_PARSE_JOINING_TYPE_V1; | ||
| PropertyEnumToValueNameLinearMap / PropertyNameShortJoiningTypeV1 / SINGLETON_PROPERTY_NAME_SHORT_JOINING_TYPE_V1; | ||
| PropertyEnumToValueNameLinearMap / PropertyNameLongJoiningTypeV1 / SINGLETON_PROPERTY_NAME_LONG_JOINING_TYPE_V1; | ||
| } | ||
| } | ||
| impl_value_getter! { | ||
| impl VerticalOrientation { | ||
| PropertyNameParseVerticalOrientationV1 / SINGLETON_PROPERTY_NAME_PARSE_VERTICAL_ORIENTATION_V1; | ||
| PropertyEnumToValueNameLinearMap / PropertyNameShortVerticalOrientationV1 / SINGLETON_PROPERTY_NAME_SHORT_VERTICAL_ORIENTATION_V1; | ||
| PropertyEnumToValueNameLinearMap / PropertyNameLongVerticalOrientationV1 / SINGLETON_PROPERTY_NAME_LONG_VERTICAL_ORIENTATION_V1; | ||
| } | ||
| } |
Sorry, the diff of this file is too big to display
| // This file is part of ICU4X. For terms of use, please see the file | ||
| // called LICENSE at the top level of the ICU4X source tree | ||
| // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). | ||
| // Provider structs must be stable | ||
| #![allow(clippy::exhaustive_structs, clippy::exhaustive_enums)] | ||
| //! 🚧 \[Unstable\] Data provider struct definitions for this ICU4X component. | ||
| //! | ||
| //! <div class="stab unstable"> | ||
| //! 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways, | ||
| //! including in SemVer minor releases. While the serde representation of data structs is guaranteed | ||
| //! to be stable, their Rust representation might not be. Use with caution. | ||
| //! </div> | ||
| //! | ||
| //! Read more about data providers: [`icu_provider`] | ||
| pub mod names; | ||
| #[cfg(feature = "alloc")] | ||
| pub use names::{ | ||
| PropertyNameLongCanonicalCombiningClassV1, PropertyNameShortCanonicalCombiningClassV1, | ||
| }; | ||
| pub use names::{ | ||
| PropertyNameLongBidiClassV1, PropertyNameLongEastAsianWidthV1, | ||
| PropertyNameLongGeneralCategoryV1, PropertyNameLongGraphemeClusterBreakV1, | ||
| PropertyNameLongHangulSyllableTypeV1, PropertyNameLongIndicSyllabicCategoryV1, | ||
| PropertyNameLongJoiningTypeV1, PropertyNameLongLineBreakV1, PropertyNameLongScriptV1, | ||
| PropertyNameLongSentenceBreakV1, PropertyNameLongVerticalOrientationV1, | ||
| PropertyNameLongWordBreakV1, PropertyNameParseBidiClassV1, | ||
| PropertyNameParseCanonicalCombiningClassV1, PropertyNameParseEastAsianWidthV1, | ||
| PropertyNameParseGeneralCategoryMaskV1, PropertyNameParseGeneralCategoryV1, | ||
| PropertyNameParseGraphemeClusterBreakV1, PropertyNameParseHangulSyllableTypeV1, | ||
| PropertyNameParseIndicSyllabicCategoryV1, PropertyNameParseJoiningTypeV1, | ||
| PropertyNameParseLineBreakV1, PropertyNameParseScriptV1, PropertyNameParseSentenceBreakV1, | ||
| PropertyNameParseVerticalOrientationV1, PropertyNameParseWordBreakV1, | ||
| PropertyNameShortBidiClassV1, PropertyNameShortEastAsianWidthV1, | ||
| PropertyNameShortGeneralCategoryV1, PropertyNameShortGraphemeClusterBreakV1, | ||
| PropertyNameShortHangulSyllableTypeV1, PropertyNameShortIndicSyllabicCategoryV1, | ||
| PropertyNameShortJoiningTypeV1, PropertyNameShortLineBreakV1, PropertyNameShortScriptV1, | ||
| PropertyNameShortSentenceBreakV1, PropertyNameShortVerticalOrientationV1, | ||
| PropertyNameShortWordBreakV1, | ||
| }; | ||
| pub use crate::props::gc::GeneralCategoryULE; | ||
| use crate::props::*; | ||
| use crate::script::ScriptWithExt; | ||
| use core::ops::RangeInclusive; | ||
| use icu_collections::codepointinvlist::CodePointInversionList; | ||
| use icu_collections::codepointinvliststringlist::CodePointInversionListAndStringList; | ||
| use icu_collections::codepointtrie::{CodePointMapRange, CodePointTrie, TrieValue}; | ||
| use icu_provider::prelude::*; | ||
| use zerofrom::ZeroFrom; | ||
| use zerovec::{VarZeroVec, ZeroSlice}; | ||
| #[cfg(feature = "compiled_data")] | ||
| #[derive(Debug)] | ||
| /// Baked data | ||
| /// | ||
| /// <div class="stab unstable"> | ||
| /// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways, | ||
| /// including in SemVer minor releases. In particular, the `DataProvider` implementations are only | ||
| /// guaranteed to match with this version's `*_unstable` providers. Use with caution. | ||
| /// </div> | ||
| pub struct Baked; | ||
| #[cfg(feature = "compiled_data")] | ||
| #[allow(unused_imports)] | ||
| const _: () = { | ||
| use icu_properties_data::*; | ||
| pub mod icu { | ||
| pub use crate as properties; | ||
| pub use icu_collections as collections; | ||
| } | ||
| make_provider!(Baked); | ||
| impl_property_binary_alnum_v1!(Baked); | ||
| impl_property_binary_alphabetic_v1!(Baked); | ||
| impl_property_binary_ascii_hex_digit_v1!(Baked); | ||
| impl_property_binary_basic_emoji_v1!(Baked); | ||
| impl_property_binary_bidi_control_v1!(Baked); | ||
| impl_property_binary_bidi_mirrored_v1!(Baked); | ||
| impl_property_binary_blank_v1!(Baked); | ||
| impl_property_binary_case_ignorable_v1!(Baked); | ||
| impl_property_binary_case_sensitive_v1!(Baked); | ||
| impl_property_binary_cased_v1!(Baked); | ||
| impl_property_binary_changes_when_casefolded_v1!(Baked); | ||
| impl_property_binary_changes_when_casemapped_v1!(Baked); | ||
| impl_property_binary_changes_when_lowercased_v1!(Baked); | ||
| impl_property_binary_changes_when_nfkc_casefolded_v1!(Baked); | ||
| impl_property_binary_changes_when_titlecased_v1!(Baked); | ||
| impl_property_binary_changes_when_uppercased_v1!(Baked); | ||
| impl_property_binary_dash_v1!(Baked); | ||
| impl_property_binary_default_ignorable_code_point_v1!(Baked); | ||
| impl_property_binary_deprecated_v1!(Baked); | ||
| impl_property_binary_diacritic_v1!(Baked); | ||
| impl_property_binary_emoji_component_v1!(Baked); | ||
| impl_property_binary_emoji_modifier_base_v1!(Baked); | ||
| impl_property_binary_emoji_modifier_v1!(Baked); | ||
| impl_property_binary_emoji_presentation_v1!(Baked); | ||
| impl_property_binary_emoji_v1!(Baked); | ||
| impl_property_binary_extended_pictographic_v1!(Baked); | ||
| impl_property_binary_extender_v1!(Baked); | ||
| impl_property_binary_full_composition_exclusion_v1!(Baked); | ||
| impl_property_binary_graph_v1!(Baked); | ||
| impl_property_binary_grapheme_base_v1!(Baked); | ||
| impl_property_binary_grapheme_extend_v1!(Baked); | ||
| impl_property_binary_grapheme_link_v1!(Baked); | ||
| impl_property_binary_hex_digit_v1!(Baked); | ||
| impl_property_binary_hyphen_v1!(Baked); | ||
| impl_property_binary_id_compat_math_continue_v1!(Baked); | ||
| impl_property_binary_id_compat_math_start_v1!(Baked); | ||
| impl_property_binary_id_continue_v1!(Baked); | ||
| impl_property_binary_id_start_v1!(Baked); | ||
| impl_property_binary_ideographic_v1!(Baked); | ||
| impl_property_binary_ids_binary_operator_v1!(Baked); | ||
| impl_property_binary_ids_trinary_operator_v1!(Baked); | ||
| impl_property_binary_ids_unary_operator_v1!(Baked); | ||
| impl_property_binary_join_control_v1!(Baked); | ||
| impl_property_binary_logical_order_exception_v1!(Baked); | ||
| impl_property_binary_lowercase_v1!(Baked); | ||
| impl_property_binary_math_v1!(Baked); | ||
| impl_property_binary_modifier_combining_mark_v1!(Baked); | ||
| impl_property_binary_nfc_inert_v1!(Baked); | ||
| impl_property_binary_nfd_inert_v1!(Baked); | ||
| impl_property_binary_nfkc_inert_v1!(Baked); | ||
| impl_property_binary_nfkd_inert_v1!(Baked); | ||
| impl_property_binary_noncharacter_code_point_v1!(Baked); | ||
| impl_property_binary_pattern_syntax_v1!(Baked); | ||
| impl_property_binary_pattern_white_space_v1!(Baked); | ||
| impl_property_binary_prepended_concatenation_mark_v1!(Baked); | ||
| impl_property_binary_print_v1!(Baked); | ||
| impl_property_binary_quotation_mark_v1!(Baked); | ||
| impl_property_binary_radical_v1!(Baked); | ||
| impl_property_binary_regional_indicator_v1!(Baked); | ||
| impl_property_binary_segment_starter_v1!(Baked); | ||
| impl_property_binary_sentence_terminal_v1!(Baked); | ||
| impl_property_binary_soft_dotted_v1!(Baked); | ||
| impl_property_binary_terminal_punctuation_v1!(Baked); | ||
| impl_property_binary_unified_ideograph_v1!(Baked); | ||
| impl_property_binary_uppercase_v1!(Baked); | ||
| impl_property_binary_variation_selector_v1!(Baked); | ||
| impl_property_binary_white_space_v1!(Baked); | ||
| impl_property_binary_xdigit_v1!(Baked); | ||
| impl_property_binary_xid_continue_v1!(Baked); | ||
| impl_property_binary_xid_start_v1!(Baked); | ||
| impl_property_enum_bidi_class_v1!(Baked); | ||
| impl_property_enum_bidi_mirroring_glyph_v1!(Baked); | ||
| impl_property_enum_canonical_combining_class_v1!(Baked); | ||
| impl_property_enum_east_asian_width_v1!(Baked); | ||
| impl_property_enum_general_category_v1!(Baked); | ||
| impl_property_enum_grapheme_cluster_break_v1!(Baked); | ||
| impl_property_enum_hangul_syllable_type_v1!(Baked); | ||
| impl_property_enum_indic_conjunct_break_v1!(Baked); | ||
| impl_property_enum_indic_syllabic_category_v1!(Baked); | ||
| impl_property_enum_joining_type_v1!(Baked); | ||
| impl_property_enum_line_break_v1!(Baked); | ||
| impl_property_enum_script_v1!(Baked); | ||
| impl_property_enum_sentence_break_v1!(Baked); | ||
| impl_property_enum_vertical_orientation_v1!(Baked); | ||
| impl_property_enum_word_break_v1!(Baked); | ||
| impl_property_name_long_bidi_class_v1!(Baked); | ||
| #[cfg(feature = "alloc")] | ||
| impl_property_name_long_canonical_combining_class_v1!(Baked); | ||
| impl_property_name_long_east_asian_width_v1!(Baked); | ||
| impl_property_name_long_general_category_v1!(Baked); | ||
| impl_property_name_long_grapheme_cluster_break_v1!(Baked); | ||
| impl_property_name_long_hangul_syllable_type_v1!(Baked); | ||
| impl_property_name_long_indic_syllabic_category_v1!(Baked); | ||
| impl_property_name_long_joining_type_v1!(Baked); | ||
| impl_property_name_long_line_break_v1!(Baked); | ||
| impl_property_name_long_script_v1!(Baked); | ||
| impl_property_name_long_sentence_break_v1!(Baked); | ||
| impl_property_name_long_vertical_orientation_v1!(Baked); | ||
| impl_property_name_long_word_break_v1!(Baked); | ||
| impl_property_name_parse_bidi_class_v1!(Baked); | ||
| impl_property_name_parse_canonical_combining_class_v1!(Baked); | ||
| impl_property_name_parse_east_asian_width_v1!(Baked); | ||
| impl_property_name_parse_general_category_mask_v1!(Baked); | ||
| impl_property_name_parse_general_category_v1!(Baked); | ||
| impl_property_name_parse_grapheme_cluster_break_v1!(Baked); | ||
| impl_property_name_parse_hangul_syllable_type_v1!(Baked); | ||
| impl_property_name_parse_indic_syllabic_category_v1!(Baked); | ||
| impl_property_name_parse_joining_type_v1!(Baked); | ||
| impl_property_name_parse_line_break_v1!(Baked); | ||
| impl_property_name_parse_script_v1!(Baked); | ||
| impl_property_name_parse_sentence_break_v1!(Baked); | ||
| impl_property_name_parse_vertical_orientation_v1!(Baked); | ||
| impl_property_name_parse_word_break_v1!(Baked); | ||
| impl_property_name_short_bidi_class_v1!(Baked); | ||
| #[cfg(feature = "alloc")] | ||
| impl_property_name_short_canonical_combining_class_v1!(Baked); | ||
| impl_property_name_short_east_asian_width_v1!(Baked); | ||
| impl_property_name_short_general_category_v1!(Baked); | ||
| impl_property_name_short_grapheme_cluster_break_v1!(Baked); | ||
| impl_property_name_short_hangul_syllable_type_v1!(Baked); | ||
| impl_property_name_short_indic_syllabic_category_v1!(Baked); | ||
| impl_property_name_short_joining_type_v1!(Baked); | ||
| impl_property_name_short_line_break_v1!(Baked); | ||
| impl_property_name_short_script_v1!(Baked); | ||
| impl_property_name_short_sentence_break_v1!(Baked); | ||
| impl_property_name_short_vertical_orientation_v1!(Baked); | ||
| impl_property_name_short_word_break_v1!(Baked); | ||
| impl_property_script_with_extensions_v1!(Baked); | ||
| }; | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryAlnumV1` | ||
| PropertyBinaryAlnumV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryAlphabeticV1` | ||
| PropertyBinaryAlphabeticV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryAsciiHexDigitV1` | ||
| PropertyBinaryAsciiHexDigitV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryBidiControlV1` | ||
| PropertyBinaryBidiControlV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryBidiMirroredV1` | ||
| PropertyBinaryBidiMirroredV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryBlankV1` | ||
| PropertyBinaryBlankV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryCasedV1` | ||
| PropertyBinaryCasedV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryCaseIgnorableV1` | ||
| PropertyBinaryCaseIgnorableV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryCaseSensitiveV1` | ||
| PropertyBinaryCaseSensitiveV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryChangesWhenCasefoldedV1` | ||
| PropertyBinaryChangesWhenCasefoldedV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryChangesWhenCasemappedV1` | ||
| PropertyBinaryChangesWhenCasemappedV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryChangesWhenLowercasedV1` | ||
| PropertyBinaryChangesWhenLowercasedV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryChangesWhenNfkcCasefoldedV1` | ||
| PropertyBinaryChangesWhenNfkcCasefoldedV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryChangesWhenTitlecasedV1` | ||
| PropertyBinaryChangesWhenTitlecasedV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryChangesWhenUppercasedV1` | ||
| PropertyBinaryChangesWhenUppercasedV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryDashV1` | ||
| PropertyBinaryDashV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryDefaultIgnorableCodePointV1` | ||
| PropertyBinaryDefaultIgnorableCodePointV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryDeprecatedV1` | ||
| PropertyBinaryDeprecatedV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryDiacriticV1` | ||
| PropertyBinaryDiacriticV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryEmojiComponentV1` | ||
| PropertyBinaryEmojiComponentV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryEmojiModifierBaseV1` | ||
| PropertyBinaryEmojiModifierBaseV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryEmojiModifierV1` | ||
| PropertyBinaryEmojiModifierV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryEmojiPresentationV1` | ||
| PropertyBinaryEmojiPresentationV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryEmojiV1` | ||
| PropertyBinaryEmojiV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryExtendedPictographicV1` | ||
| PropertyBinaryExtendedPictographicV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryExtenderV1` | ||
| PropertyBinaryExtenderV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryFullCompositionExclusionV1` | ||
| PropertyBinaryFullCompositionExclusionV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryGraphemeBaseV1` | ||
| PropertyBinaryGraphemeBaseV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryGraphemeExtendV1` | ||
| PropertyBinaryGraphemeExtendV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryGraphemeLinkV1` | ||
| PropertyBinaryGraphemeLinkV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryGraphV1` | ||
| PropertyBinaryGraphV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryHexDigitV1` | ||
| PropertyBinaryHexDigitV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryHyphenV1` | ||
| PropertyBinaryHyphenV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryIdCompatMathContinueV1` | ||
| PropertyBinaryIdCompatMathContinueV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryIdCompatMathStartV1` | ||
| PropertyBinaryIdCompatMathStartV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryIdContinueV1` | ||
| PropertyBinaryIdContinueV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryIdeographicV1` | ||
| PropertyBinaryIdeographicV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryIdsBinaryOperatorV1` | ||
| PropertyBinaryIdsBinaryOperatorV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryIdStartV1` | ||
| PropertyBinaryIdStartV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryIdsTrinaryOperatorV1` | ||
| PropertyBinaryIdsTrinaryOperatorV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryIdsUnaryOperatorV1` | ||
| PropertyBinaryIdsUnaryOperatorV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryJoinControlV1` | ||
| PropertyBinaryJoinControlV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryLogicalOrderExceptionV1` | ||
| PropertyBinaryLogicalOrderExceptionV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryLowercaseV1` | ||
| PropertyBinaryLowercaseV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryMathV1` | ||
| PropertyBinaryMathV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryModifierCombiningMarkV1` | ||
| PropertyBinaryModifierCombiningMarkV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryNfcInertV1` | ||
| PropertyBinaryNfcInertV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryNfdInertV1` | ||
| PropertyBinaryNfdInertV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryNfkcInertV1` | ||
| PropertyBinaryNfkcInertV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryNfkdInertV1` | ||
| PropertyBinaryNfkdInertV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryNoncharacterCodePointV1` | ||
| PropertyBinaryNoncharacterCodePointV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryPatternSyntaxV1` | ||
| PropertyBinaryPatternSyntaxV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryPatternWhiteSpaceV1` | ||
| PropertyBinaryPatternWhiteSpaceV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryPrependedConcatenationMarkV1` | ||
| PropertyBinaryPrependedConcatenationMarkV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryPrintV1` | ||
| PropertyBinaryPrintV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryQuotationMarkV1` | ||
| PropertyBinaryQuotationMarkV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryRadicalV1` | ||
| PropertyBinaryRadicalV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryRegionalIndicatorV1` | ||
| PropertyBinaryRegionalIndicatorV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinarySegmentStarterV1` | ||
| PropertyBinarySegmentStarterV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinarySentenceTerminalV1` | ||
| PropertyBinarySentenceTerminalV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinarySoftDottedV1` | ||
| PropertyBinarySoftDottedV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryTerminalPunctuationV1` | ||
| PropertyBinaryTerminalPunctuationV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryUnifiedIdeographV1` | ||
| PropertyBinaryUnifiedIdeographV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryUppercaseV1` | ||
| PropertyBinaryUppercaseV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryVariationSelectorV1` | ||
| PropertyBinaryVariationSelectorV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryWhiteSpaceV1` | ||
| PropertyBinaryWhiteSpaceV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryXdigitV1` | ||
| PropertyBinaryXdigitV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryXidContinueV1` | ||
| PropertyBinaryXidContinueV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryXidStartV1` | ||
| PropertyBinaryXidStartV1, | ||
| PropertyCodePointSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// Data marker for the 'BidiClass' Unicode property | ||
| PropertyEnumBidiClassV1, | ||
| PropertyCodePointMap<'static, crate::props::BidiClass>, | ||
| is_singleton = true, | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// Data marker for the 'CanonicalCombiningClass' Unicode property | ||
| PropertyEnumCanonicalCombiningClassV1, | ||
| PropertyCodePointMap<'static, crate::props::CanonicalCombiningClass>, | ||
| is_singleton = true, | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// Data marker for the 'EastAsianWidth' Unicode property | ||
| PropertyEnumEastAsianWidthV1, | ||
| PropertyCodePointMap<'static, crate::props::EastAsianWidth>, | ||
| is_singleton = true, | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// Data marker for the 'GeneralCategory' Unicode property | ||
| PropertyEnumGeneralCategoryV1, | ||
| PropertyCodePointMap<'static, crate::props::GeneralCategory>, | ||
| is_singleton = true, | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// Data marker for the 'GraphemeClusterBreak' Unicode property | ||
| PropertyEnumGraphemeClusterBreakV1, | ||
| PropertyCodePointMap<'static, crate::props::GraphemeClusterBreak>, | ||
| is_singleton = true, | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// Data marker for the 'HangulSyllableType' Unicode property | ||
| PropertyEnumHangulSyllableTypeV1, | ||
| PropertyCodePointMap<'static, crate::props::HangulSyllableType>, | ||
| is_singleton = true, | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// Data marker for the 'IndicConjunctBreak' Unicode property | ||
| PropertyEnumIndicConjunctBreakV1, | ||
| PropertyCodePointMap<'static, crate::props::IndicConjunctBreak>, | ||
| is_singleton = true, | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// Data marker for the 'IndicSyllabicCategory' Unicode property | ||
| PropertyEnumIndicSyllabicCategoryV1, | ||
| PropertyCodePointMap<'static, crate::props::IndicSyllabicCategory>, | ||
| is_singleton = true, | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// Data marker for the 'JoiningType' Unicode property | ||
| PropertyEnumJoiningTypeV1, | ||
| PropertyCodePointMap<'static, crate::props::JoiningType>, | ||
| is_singleton = true, | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// Data marker for the 'LineBreak' Unicode property | ||
| PropertyEnumLineBreakV1, | ||
| PropertyCodePointMap<'static, crate::props::LineBreak>, | ||
| is_singleton = true, | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// Data marker for the 'Script' Unicode property | ||
| PropertyEnumScriptV1, | ||
| PropertyCodePointMap<'static, crate::props::Script>, | ||
| is_singleton = true, | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// Data marker for the 'SentenceBreak' Unicode property | ||
| PropertyEnumSentenceBreakV1, | ||
| PropertyCodePointMap<'static, crate::props::SentenceBreak>, | ||
| is_singleton = true, | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// Data marker for the 'Vertical_Orientation' Unicode property | ||
| PropertyEnumVerticalOrientationV1, | ||
| PropertyCodePointMap<'static, crate::props::VerticalOrientation>, | ||
| is_singleton = true, | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// Data marker for the 'WordBreak' Unicode property | ||
| PropertyEnumWordBreakV1, | ||
| PropertyCodePointMap<'static, crate::props::WordBreak>, | ||
| is_singleton = true, | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// Data marker for the 'BidiMirroringGlyph' Unicode property | ||
| PropertyEnumBidiMirroringGlyphV1, | ||
| PropertyCodePointMap<'static, crate::bidi::BidiMirroringGlyph>, | ||
| is_singleton = true, | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyBinaryBasicEmojiV1` | ||
| PropertyBinaryBasicEmojiV1, | ||
| PropertyUnicodeSet<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyScriptWithExtensionsV1` | ||
| PropertyScriptWithExtensionsV1, | ||
| ScriptWithExtensionsProperty<'static>, | ||
| is_singleton = true | ||
| ); | ||
| /// All data keys in this module. | ||
| pub const MARKERS: &[DataMarkerInfo] = &[ | ||
| PropertyNameLongBidiClassV1::INFO, | ||
| #[cfg(feature = "alloc")] | ||
| PropertyNameLongCanonicalCombiningClassV1::INFO, | ||
| PropertyNameLongEastAsianWidthV1::INFO, | ||
| PropertyNameLongGeneralCategoryV1::INFO, | ||
| PropertyNameLongGraphemeClusterBreakV1::INFO, | ||
| PropertyNameLongHangulSyllableTypeV1::INFO, | ||
| PropertyNameLongIndicSyllabicCategoryV1::INFO, | ||
| PropertyNameLongJoiningTypeV1::INFO, | ||
| PropertyNameLongLineBreakV1::INFO, | ||
| PropertyNameLongScriptV1::INFO, | ||
| PropertyNameLongSentenceBreakV1::INFO, | ||
| PropertyNameLongVerticalOrientationV1::INFO, | ||
| PropertyNameLongWordBreakV1::INFO, | ||
| PropertyNameParseBidiClassV1::INFO, | ||
| PropertyNameParseCanonicalCombiningClassV1::INFO, | ||
| PropertyNameParseEastAsianWidthV1::INFO, | ||
| PropertyNameParseGeneralCategoryMaskV1::INFO, | ||
| PropertyNameParseGeneralCategoryV1::INFO, | ||
| PropertyNameParseGraphemeClusterBreakV1::INFO, | ||
| PropertyNameParseHangulSyllableTypeV1::INFO, | ||
| PropertyNameParseIndicSyllabicCategoryV1::INFO, | ||
| PropertyNameParseJoiningTypeV1::INFO, | ||
| PropertyNameParseLineBreakV1::INFO, | ||
| PropertyNameParseScriptV1::INFO, | ||
| PropertyNameParseSentenceBreakV1::INFO, | ||
| PropertyNameParseVerticalOrientationV1::INFO, | ||
| PropertyNameParseWordBreakV1::INFO, | ||
| PropertyNameShortBidiClassV1::INFO, | ||
| #[cfg(feature = "alloc")] | ||
| PropertyNameShortCanonicalCombiningClassV1::INFO, | ||
| PropertyNameShortEastAsianWidthV1::INFO, | ||
| PropertyNameShortGeneralCategoryV1::INFO, | ||
| PropertyNameShortGraphemeClusterBreakV1::INFO, | ||
| PropertyNameShortHangulSyllableTypeV1::INFO, | ||
| PropertyNameShortIndicSyllabicCategoryV1::INFO, | ||
| PropertyNameShortJoiningTypeV1::INFO, | ||
| PropertyNameShortLineBreakV1::INFO, | ||
| PropertyNameShortScriptV1::INFO, | ||
| PropertyNameShortSentenceBreakV1::INFO, | ||
| PropertyNameShortVerticalOrientationV1::INFO, | ||
| PropertyNameShortWordBreakV1::INFO, | ||
| PropertyBinaryAlnumV1::INFO, | ||
| PropertyBinaryAlphabeticV1::INFO, | ||
| PropertyBinaryAsciiHexDigitV1::INFO, | ||
| PropertyBinaryBidiControlV1::INFO, | ||
| PropertyBinaryBidiMirroredV1::INFO, | ||
| PropertyBinaryBlankV1::INFO, | ||
| PropertyBinaryCasedV1::INFO, | ||
| PropertyBinaryCaseIgnorableV1::INFO, | ||
| PropertyBinaryCaseSensitiveV1::INFO, | ||
| PropertyBinaryChangesWhenCasefoldedV1::INFO, | ||
| PropertyBinaryChangesWhenCasemappedV1::INFO, | ||
| PropertyBinaryChangesWhenLowercasedV1::INFO, | ||
| PropertyBinaryChangesWhenNfkcCasefoldedV1::INFO, | ||
| PropertyBinaryChangesWhenTitlecasedV1::INFO, | ||
| PropertyBinaryChangesWhenUppercasedV1::INFO, | ||
| PropertyBinaryDashV1::INFO, | ||
| PropertyBinaryDefaultIgnorableCodePointV1::INFO, | ||
| PropertyBinaryDeprecatedV1::INFO, | ||
| PropertyBinaryDiacriticV1::INFO, | ||
| PropertyBinaryEmojiComponentV1::INFO, | ||
| PropertyBinaryEmojiModifierBaseV1::INFO, | ||
| PropertyBinaryEmojiModifierV1::INFO, | ||
| PropertyBinaryEmojiPresentationV1::INFO, | ||
| PropertyBinaryEmojiV1::INFO, | ||
| PropertyBinaryExtendedPictographicV1::INFO, | ||
| PropertyBinaryExtenderV1::INFO, | ||
| PropertyBinaryFullCompositionExclusionV1::INFO, | ||
| PropertyBinaryGraphemeBaseV1::INFO, | ||
| PropertyBinaryGraphemeExtendV1::INFO, | ||
| PropertyBinaryGraphemeLinkV1::INFO, | ||
| PropertyBinaryGraphV1::INFO, | ||
| PropertyBinaryHexDigitV1::INFO, | ||
| PropertyBinaryHyphenV1::INFO, | ||
| PropertyBinaryIdCompatMathContinueV1::INFO, | ||
| PropertyBinaryIdCompatMathStartV1::INFO, | ||
| PropertyBinaryIdContinueV1::INFO, | ||
| PropertyBinaryIdeographicV1::INFO, | ||
| PropertyBinaryIdsBinaryOperatorV1::INFO, | ||
| PropertyBinaryIdStartV1::INFO, | ||
| PropertyBinaryIdsTrinaryOperatorV1::INFO, | ||
| PropertyBinaryIdsUnaryOperatorV1::INFO, | ||
| PropertyBinaryJoinControlV1::INFO, | ||
| PropertyBinaryLogicalOrderExceptionV1::INFO, | ||
| PropertyBinaryLowercaseV1::INFO, | ||
| PropertyBinaryMathV1::INFO, | ||
| PropertyBinaryModifierCombiningMarkV1::INFO, | ||
| PropertyBinaryNfcInertV1::INFO, | ||
| PropertyBinaryNfdInertV1::INFO, | ||
| PropertyBinaryNfkcInertV1::INFO, | ||
| PropertyBinaryNfkdInertV1::INFO, | ||
| PropertyBinaryNoncharacterCodePointV1::INFO, | ||
| PropertyBinaryPatternSyntaxV1::INFO, | ||
| PropertyBinaryPatternWhiteSpaceV1::INFO, | ||
| PropertyBinaryPrependedConcatenationMarkV1::INFO, | ||
| PropertyBinaryPrintV1::INFO, | ||
| PropertyBinaryQuotationMarkV1::INFO, | ||
| PropertyBinaryRadicalV1::INFO, | ||
| PropertyBinaryRegionalIndicatorV1::INFO, | ||
| PropertyBinarySegmentStarterV1::INFO, | ||
| PropertyBinarySentenceTerminalV1::INFO, | ||
| PropertyBinarySoftDottedV1::INFO, | ||
| PropertyBinaryTerminalPunctuationV1::INFO, | ||
| PropertyBinaryUnifiedIdeographV1::INFO, | ||
| PropertyBinaryUppercaseV1::INFO, | ||
| PropertyBinaryVariationSelectorV1::INFO, | ||
| PropertyBinaryWhiteSpaceV1::INFO, | ||
| PropertyBinaryXdigitV1::INFO, | ||
| PropertyBinaryXidContinueV1::INFO, | ||
| PropertyBinaryXidStartV1::INFO, | ||
| PropertyEnumBidiClassV1::INFO, | ||
| PropertyEnumCanonicalCombiningClassV1::INFO, | ||
| PropertyEnumEastAsianWidthV1::INFO, | ||
| PropertyEnumGeneralCategoryV1::INFO, | ||
| PropertyEnumGraphemeClusterBreakV1::INFO, | ||
| PropertyEnumHangulSyllableTypeV1::INFO, | ||
| PropertyEnumIndicConjunctBreakV1::INFO, | ||
| PropertyEnumIndicSyllabicCategoryV1::INFO, | ||
| PropertyEnumJoiningTypeV1::INFO, | ||
| PropertyEnumLineBreakV1::INFO, | ||
| PropertyEnumScriptV1::INFO, | ||
| PropertyEnumSentenceBreakV1::INFO, | ||
| PropertyEnumVerticalOrientationV1::INFO, | ||
| PropertyEnumWordBreakV1::INFO, | ||
| PropertyEnumBidiMirroringGlyphV1::INFO, | ||
| PropertyBinaryBasicEmojiV1::INFO, | ||
| PropertyScriptWithExtensionsV1::INFO, | ||
| ]; | ||
| /// A set of characters which share a particular property value. | ||
| /// | ||
| /// This data enum is extensible, more backends may be added in the future. | ||
| /// Old data can be used with newer code but not vice versa. | ||
| /// | ||
| /// <div class="stab unstable"> | ||
| /// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways, | ||
| /// including in SemVer minor releases. While the serde representation of data structs is guaranteed | ||
| /// to be stable, their Rust representation might not be. Use with caution. | ||
| /// </div> | ||
| #[derive(Debug, Eq, PartialEq, Clone, yoke::Yokeable, zerofrom::ZeroFrom)] | ||
| #[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))] | ||
| #[cfg_attr(feature = "datagen", databake(path = icu_properties::provider))] | ||
| #[cfg_attr(feature = "serde", derive(serde::Deserialize))] | ||
| #[non_exhaustive] | ||
| pub enum PropertyCodePointSet<'data> { | ||
| /// The set of characters, represented as an inversion list | ||
| InversionList(#[cfg_attr(feature = "serde", serde(borrow))] CodePointInversionList<'data>), | ||
| // new variants should go BELOW existing ones | ||
| // Serde serializes based on variant name and index in the enum | ||
| // https://docs.rs/serde/latest/serde/trait.Serializer.html#tymethod.serialize_unit_variant | ||
| } | ||
| icu_provider::data_struct!( | ||
| PropertyCodePointSet<'_>, | ||
| #[cfg(feature = "datagen")] | ||
| ); | ||
| // See CodePointSetData for documentation of these functions | ||
| impl<'data> PropertyCodePointSet<'data> { | ||
| #[inline] | ||
| pub(crate) fn contains(&self, ch: char) -> bool { | ||
| match *self { | ||
| Self::InversionList(ref l) => l.contains(ch), | ||
| } | ||
| } | ||
| #[inline] | ||
| pub(crate) fn contains32(&self, ch: u32) -> bool { | ||
| match *self { | ||
| Self::InversionList(ref l) => l.contains32(ch), | ||
| } | ||
| } | ||
| #[inline] | ||
| pub(crate) fn iter_ranges(&self) -> impl Iterator<Item = RangeInclusive<u32>> + '_ { | ||
| match *self { | ||
| Self::InversionList(ref l) => l.iter_ranges(), | ||
| } | ||
| } | ||
| #[inline] | ||
| pub(crate) fn iter_ranges_complemented( | ||
| &self, | ||
| ) -> impl Iterator<Item = RangeInclusive<u32>> + '_ { | ||
| match *self { | ||
| Self::InversionList(ref l) => l.iter_ranges_complemented(), | ||
| } | ||
| } | ||
| #[inline] | ||
| pub(crate) fn from_code_point_inversion_list(l: CodePointInversionList<'static>) -> Self { | ||
| Self::InversionList(l) | ||
| } | ||
| #[inline] | ||
| pub(crate) fn as_code_point_inversion_list( | ||
| &'_ self, | ||
| ) -> Option<&'_ CodePointInversionList<'data>> { | ||
| match *self { | ||
| Self::InversionList(ref l) => Some(l), | ||
| // any other backing data structure that cannot return a CPInvList in O(1) time should return None | ||
| } | ||
| } | ||
| #[inline] | ||
| pub(crate) fn to_code_point_inversion_list(&self) -> CodePointInversionList<'_> { | ||
| match *self { | ||
| Self::InversionList(ref t) => ZeroFrom::zero_from(t), | ||
| } | ||
| } | ||
| } | ||
| /// A map efficiently storing data about individual characters. | ||
| /// | ||
| /// This data enum is extensible, more backends may be added in the future. | ||
| /// Old data can be used with newer code but not vice versa. | ||
| /// | ||
| /// <div class="stab unstable"> | ||
| /// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways, | ||
| /// including in SemVer minor releases. While the serde representation of data structs is guaranteed | ||
| /// to be stable, their Rust representation might not be. Use with caution. | ||
| /// </div> | ||
| #[derive(Clone, Debug, Eq, PartialEq, yoke::Yokeable, zerofrom::ZeroFrom)] | ||
| #[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))] | ||
| #[cfg_attr(feature = "datagen", databake(path = icu_properties::provider))] | ||
| #[cfg_attr(feature = "serde", derive(serde::Deserialize))] | ||
| #[non_exhaustive] | ||
| pub enum PropertyCodePointMap<'data, T: TrieValue> { | ||
| /// A codepoint trie storing the data | ||
| CodePointTrie(#[cfg_attr(feature = "serde", serde(borrow))] CodePointTrie<'data, T>), | ||
| // new variants should go BELOW existing ones | ||
| // Serde serializes based on variant name and index in the enum | ||
| // https://docs.rs/serde/latest/serde/trait.Serializer.html#tymethod.serialize_unit_variant | ||
| } | ||
| icu_provider::data_struct!( | ||
| <T: TrieValue> PropertyCodePointMap<'_, T>, | ||
| #[cfg(feature = "datagen")] | ||
| ); | ||
| // See CodePointMapData for documentation of these functions | ||
| impl<'data, T: TrieValue> PropertyCodePointMap<'data, T> { | ||
| #[inline] | ||
| pub(crate) fn get32(&self, ch: u32) -> T { | ||
| match *self { | ||
| Self::CodePointTrie(ref t) => t.get32(ch), | ||
| } | ||
| } | ||
| #[inline] | ||
| pub(crate) fn get(&self, c: char) -> T { | ||
| match *self { | ||
| Self::CodePointTrie(ref t) => t.get(c), | ||
| } | ||
| } | ||
| #[inline] | ||
| #[cfg(feature = "alloc")] | ||
| pub(crate) fn try_into_converted<P>( | ||
| self, | ||
| ) -> Result<PropertyCodePointMap<'data, P>, zerovec::ule::UleError> | ||
| where | ||
| P: TrieValue, | ||
| { | ||
| match self { | ||
| Self::CodePointTrie(t) => t | ||
| .try_into_converted() | ||
| .map(PropertyCodePointMap::CodePointTrie), | ||
| } | ||
| } | ||
| #[inline] | ||
| #[cfg(feature = "alloc")] | ||
| pub(crate) fn get_set_for_value(&self, value: T) -> CodePointInversionList<'static> { | ||
| match *self { | ||
| Self::CodePointTrie(ref t) => t.get_set_for_value(value), | ||
| } | ||
| } | ||
| #[inline] | ||
| pub(crate) fn iter_ranges(&self) -> impl Iterator<Item = CodePointMapRange<T>> + '_ { | ||
| match *self { | ||
| Self::CodePointTrie(ref t) => t.iter_ranges(), | ||
| } | ||
| } | ||
| #[inline] | ||
| pub(crate) fn iter_ranges_mapped<'a, U: Eq + 'a>( | ||
| &'a self, | ||
| map: impl FnMut(T) -> U + Copy + 'a, | ||
| ) -> impl Iterator<Item = CodePointMapRange<U>> + 'a { | ||
| match *self { | ||
| Self::CodePointTrie(ref t) => t.iter_ranges_mapped(map), | ||
| } | ||
| } | ||
| #[inline] | ||
| pub(crate) fn from_code_point_trie(trie: CodePointTrie<'static, T>) -> Self { | ||
| Self::CodePointTrie(trie) | ||
| } | ||
| #[inline] | ||
| pub(crate) fn as_code_point_trie(&self) -> Option<&CodePointTrie<'data, T>> { | ||
| match *self { | ||
| Self::CodePointTrie(ref t) => Some(t), | ||
| // any other backing data structure that cannot return a CPT in O(1) time should return None | ||
| } | ||
| } | ||
| #[inline] | ||
| pub(crate) fn to_code_point_trie(&self) -> CodePointTrie<'_, T> { | ||
| match *self { | ||
| Self::CodePointTrie(ref t) => ZeroFrom::zero_from(t), | ||
| } | ||
| } | ||
| } | ||
| /// A set of characters and strings which share a particular property value. | ||
| /// | ||
| /// <div class="stab unstable"> | ||
| /// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways, | ||
| /// including in SemVer minor releases. While the serde representation of data structs is guaranteed | ||
| /// to be stable, their Rust representation might not be. Use with caution. | ||
| /// </div> | ||
| #[derive(Debug, Eq, PartialEq, Clone, yoke::Yokeable, zerofrom::ZeroFrom)] | ||
| #[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))] | ||
| #[cfg_attr(feature = "datagen", databake(path = icu_properties::provider))] | ||
| #[cfg_attr(feature = "serde", derive(serde::Deserialize))] | ||
| #[non_exhaustive] | ||
| pub enum PropertyUnicodeSet<'data> { | ||
| /// A set representing characters in an inversion list, and the strings in a list. | ||
| CPInversionListStrList( | ||
| #[cfg_attr(feature = "serde", serde(borrow))] CodePointInversionListAndStringList<'data>, | ||
| ), | ||
| // new variants should go BELOW existing ones | ||
| // Serde serializes based on variant name and index in the enum | ||
| // https://docs.rs/serde/latest/serde/trait.Serializer.html#tymethod.serialize_unit_variant | ||
| } | ||
| icu_provider::data_struct!( | ||
| PropertyUnicodeSet<'_>, | ||
| #[cfg(feature = "datagen")] | ||
| ); | ||
| impl<'data> PropertyUnicodeSet<'data> { | ||
| #[inline] | ||
| pub(crate) fn contains_str(&self, s: &str) -> bool { | ||
| match *self { | ||
| Self::CPInversionListStrList(ref l) => l.contains_str(s), | ||
| } | ||
| } | ||
| #[inline] | ||
| pub(crate) fn contains32(&self, cp: u32) -> bool { | ||
| match *self { | ||
| Self::CPInversionListStrList(ref l) => l.contains32(cp), | ||
| } | ||
| } | ||
| #[inline] | ||
| pub(crate) fn contains(&self, ch: char) -> bool { | ||
| match *self { | ||
| Self::CPInversionListStrList(ref l) => l.contains(ch), | ||
| } | ||
| } | ||
| #[inline] | ||
| pub(crate) fn from_code_point_inversion_list_string_list( | ||
| l: CodePointInversionListAndStringList<'static>, | ||
| ) -> Self { | ||
| Self::CPInversionListStrList(l) | ||
| } | ||
| #[inline] | ||
| pub(crate) fn as_code_point_inversion_list_string_list( | ||
| &'_ self, | ||
| ) -> Option<&'_ CodePointInversionListAndStringList<'data>> { | ||
| match *self { | ||
| Self::CPInversionListStrList(ref l) => Some(l), | ||
| // any other backing data structure that cannot return a CPInversionListStrList in O(1) time should return None | ||
| } | ||
| } | ||
| #[inline] | ||
| pub(crate) fn to_code_point_inversion_list_string_list( | ||
| &self, | ||
| ) -> CodePointInversionListAndStringList<'_> { | ||
| match *self { | ||
| Self::CPInversionListStrList(ref t) => ZeroFrom::zero_from(t), | ||
| } | ||
| } | ||
| } | ||
| /// A struct that efficiently stores `Script` and `Script_Extensions` property data. | ||
| /// | ||
| /// <div class="stab unstable"> | ||
| /// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways, | ||
| /// including in SemVer minor releases. While the serde representation of data structs is guaranteed | ||
| /// to be stable, their Rust representation might not be. Use with caution. | ||
| /// </div> | ||
| #[derive(Debug, Eq, PartialEq, Clone, yoke::Yokeable, zerofrom::ZeroFrom)] | ||
| #[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))] | ||
| #[cfg_attr(feature = "datagen", databake(path = icu_properties::provider))] | ||
| #[cfg_attr(feature = "serde", derive(serde::Deserialize))] | ||
| pub struct ScriptWithExtensionsProperty<'data> { | ||
| /// Note: The `ScriptWithExt` values in this array will assume a 12-bit layout. The 2 | ||
| /// higher order bits 11..10 will indicate how to deduce the Script value and | ||
| /// Script_Extensions value, nearly matching the representation | ||
| /// [in ICU](https://github.com/unicode-org/icu/blob/main/icu4c/source/common/uprops.h): | ||
| /// | ||
| /// | High order 2 bits value | Script | Script_Extensions | | ||
| /// |-------------------------|--------------------------------------------------------|----------------------------------------------------------------| | ||
| /// | 3 | First value in sub-array, index given by lower 10 bits | Sub-array excluding first value, index given by lower 10 bits | | ||
| /// | 2 | Script=Inherited | Entire sub-array, index given by lower 10 bits | | ||
| /// | 1 | Script=Common | Entire sub-array, index given by lower 10 bits | | ||
| /// | 0 | Value in lower 10 bits | `[ Script value ]` single-element array | | ||
| /// | ||
| /// When the lower 10 bits of the value are used as an index, that index is | ||
| /// used for the outer-level vector of the nested `extensions` structure. | ||
| #[cfg_attr(feature = "serde", serde(borrow))] | ||
| pub trie: CodePointTrie<'data, ScriptWithExt>, | ||
| /// This companion structure stores Script_Extensions values, which are | ||
| /// themselves arrays / vectors. This structure only stores the values for | ||
| /// cases in which `scx(cp) != [ sc(cp) ]`. Each sub-vector is distinct. The | ||
| /// sub-vector represents the Script_Extensions array value for a code point, | ||
| /// and may also indicate Script value, as described for the `trie` field. | ||
| #[cfg_attr(feature = "serde", serde(borrow))] | ||
| pub extensions: VarZeroVec<'data, ZeroSlice<Script>>, | ||
| } | ||
| icu_provider::data_struct!( | ||
| ScriptWithExtensionsProperty<'_>, | ||
| #[cfg(feature = "datagen")] | ||
| ); |
| // This file is part of ICU4X. For terms of use, please see the file | ||
| // called LICENSE at the top level of the ICU4X source tree | ||
| // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). | ||
| //! 🚧 \[Unstable\] Property names-related data for this component | ||
| //! | ||
| //! <div class="stab unstable"> | ||
| //! 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways, | ||
| //! including in SemVer minor releases. While the serde representation of data structs is guaranteed | ||
| //! to be stable, their Rust representation might not be. Use with caution. | ||
| //! </div> | ||
| //! | ||
| //! Read more about data providers: [`icu_provider`] | ||
| use icu_locale_core::subtags::Script; | ||
| use icu_provider::prelude::{yoke, zerofrom}; | ||
| use zerotrie::ZeroTrieSimpleAscii; | ||
| use zerovec::ule::NichedOption; | ||
| use zerovec::{VarZeroVec, ZeroVec}; | ||
| icu_provider::data_marker!( | ||
| /// `PropertyNameParseBidiClassV1` | ||
| PropertyNameParseBidiClassV1, | ||
| PropertyValueNameToEnumMap<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyNameParseCanonicalCombiningClassV1` | ||
| PropertyNameParseCanonicalCombiningClassV1, | ||
| PropertyValueNameToEnumMap<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyNameParseEastAsianWidthV1` | ||
| PropertyNameParseEastAsianWidthV1, | ||
| PropertyValueNameToEnumMap<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyNameParseGeneralCategoryMaskV1` | ||
| PropertyNameParseGeneralCategoryMaskV1, | ||
| PropertyValueNameToEnumMap<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyNameParseGeneralCategoryV1` | ||
| PropertyNameParseGeneralCategoryV1, | ||
| PropertyValueNameToEnumMap<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyNameParseGraphemeClusterBreakV1` | ||
| PropertyNameParseGraphemeClusterBreakV1, | ||
| PropertyValueNameToEnumMap<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyNameParseHangulSyllableTypeV1` | ||
| PropertyNameParseHangulSyllableTypeV1, | ||
| PropertyValueNameToEnumMap<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyNameParseIndicSyllabicCategoryV1` | ||
| PropertyNameParseIndicSyllabicCategoryV1, | ||
| PropertyValueNameToEnumMap<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyNameParseJoiningTypeV1` | ||
| PropertyNameParseJoiningTypeV1, | ||
| PropertyValueNameToEnumMap<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyNameParseLineBreakV1` | ||
| PropertyNameParseLineBreakV1, | ||
| PropertyValueNameToEnumMap<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyNameParseScriptV1` | ||
| PropertyNameParseScriptV1, | ||
| PropertyValueNameToEnumMap<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyNameParseSentenceBreakV1` | ||
| PropertyNameParseSentenceBreakV1, | ||
| PropertyValueNameToEnumMap<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyNameParseVerticalOrientationV1` | ||
| PropertyNameParseVerticalOrientationV1, | ||
| PropertyValueNameToEnumMap<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyNameParseWordBreakV1` | ||
| PropertyNameParseWordBreakV1, | ||
| PropertyValueNameToEnumMap<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyNameLongBidiClassV1` | ||
| PropertyNameLongBidiClassV1, | ||
| PropertyEnumToValueNameLinearMap<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyNameShortBidiClassV1` | ||
| PropertyNameShortBidiClassV1, | ||
| PropertyEnumToValueNameLinearMap<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyNameLongEastAsianWidthV1` | ||
| PropertyNameLongEastAsianWidthV1, | ||
| PropertyEnumToValueNameLinearMap<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyNameShortEastAsianWidthV1` | ||
| PropertyNameShortEastAsianWidthV1, | ||
| PropertyEnumToValueNameLinearMap<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyNameLongGeneralCategoryV1` | ||
| PropertyNameLongGeneralCategoryV1, | ||
| PropertyEnumToValueNameLinearMap<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyNameShortGeneralCategoryV1` | ||
| PropertyNameShortGeneralCategoryV1, | ||
| PropertyEnumToValueNameLinearMap<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyNameLongGraphemeClusterBreakV1` | ||
| PropertyNameLongGraphemeClusterBreakV1, | ||
| PropertyEnumToValueNameLinearMap<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyNameShortGraphemeClusterBreakV1` | ||
| PropertyNameShortGraphemeClusterBreakV1, | ||
| PropertyEnumToValueNameLinearMap<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyNameLongHangulSyllableTypeV1` | ||
| PropertyNameLongHangulSyllableTypeV1, | ||
| PropertyEnumToValueNameLinearMap<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyNameShortHangulSyllableTypeV1` | ||
| PropertyNameShortHangulSyllableTypeV1, | ||
| PropertyEnumToValueNameLinearMap<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyNameLongIndicSyllabicCategoryV1` | ||
| PropertyNameLongIndicSyllabicCategoryV1, | ||
| PropertyEnumToValueNameLinearMap<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyNameShortIndicSyllabicCategoryV1` | ||
| PropertyNameShortIndicSyllabicCategoryV1, | ||
| PropertyEnumToValueNameLinearMap<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyNameLongJoiningTypeV1` | ||
| PropertyNameLongJoiningTypeV1, | ||
| PropertyEnumToValueNameLinearMap<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyNameShortJoiningTypeV1` | ||
| PropertyNameShortJoiningTypeV1, | ||
| PropertyEnumToValueNameLinearMap<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyNameLongLineBreakV1` | ||
| PropertyNameLongLineBreakV1, | ||
| PropertyEnumToValueNameLinearMap<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyNameShortLineBreakV1` | ||
| PropertyNameShortLineBreakV1, | ||
| PropertyEnumToValueNameLinearMap<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyNameLongScriptV1` | ||
| PropertyNameLongScriptV1, | ||
| PropertyEnumToValueNameLinearMap<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyNameLongSentenceBreakV1` | ||
| PropertyNameLongSentenceBreakV1, | ||
| PropertyEnumToValueNameLinearMap<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyNameShortSentenceBreakV1` | ||
| PropertyNameShortSentenceBreakV1, | ||
| PropertyEnumToValueNameLinearMap<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyNameLongVerticalOrientationV1` | ||
| PropertyNameLongVerticalOrientationV1, | ||
| PropertyEnumToValueNameLinearMap<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyNameShortVerticalOrientationV1` | ||
| PropertyNameShortVerticalOrientationV1, | ||
| PropertyEnumToValueNameLinearMap<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyNameLongWordBreakV1` | ||
| PropertyNameLongWordBreakV1, | ||
| PropertyEnumToValueNameLinearMap<'static>, | ||
| is_singleton = true | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyNameShortWordBreakV1` | ||
| PropertyNameShortWordBreakV1, | ||
| PropertyEnumToValueNameLinearMap<'static>, | ||
| is_singleton = true | ||
| ); | ||
| #[cfg(feature = "alloc")] | ||
| icu_provider::data_marker!( | ||
| /// `PropertyNameLongCanonicalCombiningClassV1` | ||
| PropertyNameLongCanonicalCombiningClassV1, | ||
| PropertyEnumToValueNameSparseMap<'static>, | ||
| is_singleton = true, | ||
| ); | ||
| #[cfg(feature = "alloc")] | ||
| icu_provider::data_marker!( | ||
| /// `PropertyNameShortCanonicalCombiningClassV1` | ||
| PropertyNameShortCanonicalCombiningClassV1, | ||
| PropertyEnumToValueNameSparseMap<'static>, | ||
| is_singleton = true, | ||
| ); | ||
| icu_provider::data_marker!( | ||
| /// `PropertyNameShortScriptV1` | ||
| PropertyNameShortScriptV1, | ||
| PropertyScriptToIcuScriptMap<'static>, | ||
| is_singleton = true, | ||
| ); | ||
| /// A set of characters and strings which share a particular property value. | ||
| /// | ||
| /// <div class="stab unstable"> | ||
| /// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways, | ||
| /// including in SemVer minor releases. While the serde representation of data structs is guaranteed | ||
| /// to be stable, their Rust representation might not be. Use with caution. | ||
| /// </div> | ||
| #[derive(Debug, Clone, PartialEq, yoke::Yokeable, zerofrom::ZeroFrom)] | ||
| #[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))] | ||
| #[cfg_attr(feature = "datagen", databake(path = icu_properties::provider::names))] | ||
| #[cfg_attr(feature = "serde", derive(serde::Deserialize))] | ||
| pub struct PropertyValueNameToEnumMap<'data> { | ||
| /// A map from names to their value discriminant | ||
| #[cfg_attr(feature = "serde", serde(borrow))] | ||
| pub map: ZeroTrieSimpleAscii<ZeroVec<'data, u8>>, | ||
| } | ||
| icu_provider::data_struct!( | ||
| PropertyValueNameToEnumMap<'_>, | ||
| #[cfg(feature = "datagen")] | ||
| ); | ||
| /// A mapping of property values to their names. A single instance of this map will only cover | ||
| /// either long or short names, determined whilst loading data. | ||
| /// | ||
| /// <div class="stab unstable"> | ||
| /// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways, | ||
| /// including in SemVer minor releases. While the serde representation of data structs is guaranteed | ||
| /// to be stable, their Rust representation might not be. Use with caution. | ||
| /// </div> | ||
| #[derive(Debug, Clone, PartialEq, yoke::Yokeable, zerofrom::ZeroFrom)] | ||
| #[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))] | ||
| #[cfg_attr(feature = "datagen", databake(path = icu_properties::provider::names))] | ||
| #[cfg_attr(feature = "serde", derive(serde::Deserialize))] | ||
| #[yoke(prove_covariance_manually)] | ||
| #[cfg(feature = "alloc")] | ||
| pub struct PropertyEnumToValueNameSparseMap<'data> { | ||
| /// A map from the value discriminant to the names | ||
| #[cfg_attr(feature = "serde", serde(borrow))] | ||
| pub map: zerovec::ZeroMap<'data, u16, str>, | ||
| } | ||
| #[cfg(feature = "alloc")] | ||
| icu_provider::data_struct!( | ||
| PropertyEnumToValueNameSparseMap<'_>, | ||
| #[cfg(feature = "datagen")] | ||
| ); | ||
| /// A mapping of property values to their names. A single instance of this map will only cover | ||
| /// either long or short names, determined whilst loading data. | ||
| /// | ||
| /// <div class="stab unstable"> | ||
| /// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways, | ||
| /// including in SemVer minor releases. While the serde representation of data structs is guaranteed | ||
| /// to be stable, their Rust representation might not be. Use with caution. | ||
| /// </div> | ||
| #[derive(Debug, Clone, PartialEq, yoke::Yokeable, zerofrom::ZeroFrom)] | ||
| #[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))] | ||
| #[cfg_attr(feature = "datagen", databake(path = icu_properties::provider::names))] | ||
| #[cfg_attr(feature = "serde", derive(serde::Deserialize))] | ||
| #[yoke(prove_covariance_manually)] | ||
| pub struct PropertyEnumToValueNameLinearMap<'data> { | ||
| /// A map from the value discriminant (the index) to the names, for mostly | ||
| /// contiguous data. Empty strings count as missing. | ||
| #[cfg_attr(feature = "serde", serde(borrow))] | ||
| pub map: VarZeroVec<'data, str>, | ||
| } | ||
| icu_provider::data_struct!( | ||
| PropertyEnumToValueNameLinearMap<'_>, | ||
| #[cfg(feature = "datagen")] | ||
| ); | ||
| /// A mapping of property values to their names. A single instance of this map will only cover | ||
| /// either long or short names, determined whilst loading data. | ||
| /// | ||
| /// <div class="stab unstable"> | ||
| /// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways, | ||
| /// including in SemVer minor releases. While the serde representation of data structs is guaranteed | ||
| /// to be stable, their Rust representation might not be. Use with caution. | ||
| /// </div> | ||
| #[derive(Debug, Clone, PartialEq, yoke::Yokeable, zerofrom::ZeroFrom)] | ||
| #[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))] | ||
| #[cfg_attr(feature = "datagen", databake(path = icu_properties::provider::names))] | ||
| #[cfg_attr(feature = "serde", derive(serde::Deserialize))] | ||
| #[yoke(prove_covariance_manually)] | ||
| pub struct PropertyScriptToIcuScriptMap<'data> { | ||
| /// A map from the value discriminant (the index) to the names, for mostly | ||
| /// contiguous data. Empty strings count as missing. | ||
| #[cfg_attr(feature = "serde", serde(borrow))] | ||
| pub map: ZeroVec<'data, NichedOption<Script, 4>>, | ||
| } | ||
| icu_provider::data_struct!( | ||
| PropertyScriptToIcuScriptMap<'_>, | ||
| #[cfg(feature = "datagen")] | ||
| ); |
| // This file is part of ICU4X. For terms of use, please see the file | ||
| // called LICENSE at the top level of the ICU4X source tree | ||
| // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). | ||
| //! 🚧 \[Experimental\] This module is experimental and currently crate-private. Let us know if you | ||
| //! have a use case for this! | ||
| //! | ||
| //! This module contains utilities for working with properties where the specific property in use | ||
| //! is not known at compile time. | ||
| //! | ||
| //! For regex engines, [`crate::sets::load_for_ecma262_unstable()`] is a convenient API for working | ||
| //! with properties at runtime tailored for the use case of ECMA262-compatible regex engines. | ||
| use crate::provider::*; | ||
| use crate::CodePointSetData; | ||
| #[cfg(doc)] | ||
| use crate::{ | ||
| props::{GeneralCategory, GeneralCategoryGroup, Script}, | ||
| script, CodePointMapData, PropertyParser, | ||
| }; | ||
| use icu_provider::prelude::*; | ||
| /// This type can represent any binary Unicode property. | ||
| /// | ||
| /// This is intended to be used in situations where the exact unicode property needed is | ||
| /// only known at runtime, for example in regex engines. | ||
| /// | ||
| /// The values are intended to be identical to ICU4C's UProperty enum | ||
| #[non_exhaustive] | ||
| #[allow(missing_docs)] | ||
| #[allow(dead_code)] | ||
| #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] | ||
| enum BinaryProperty { | ||
| Alnum = 44, | ||
| Alphabetic = 0, | ||
| AsciiHexDigit = 1, | ||
| BidiControl = 2, | ||
| BidiMirrored = 3, | ||
| Blank = 45, | ||
| Cased = 49, | ||
| CaseIgnorable = 50, | ||
| CaseSensitive = 34, | ||
| ChangesWhenCasefolded = 54, | ||
| ChangesWhenCasemapped = 55, | ||
| ChangesWhenLowercased = 51, | ||
| ChangesWhenNfkcCasefolded = 56, | ||
| ChangesWhenTitlecased = 53, | ||
| ChangesWhenUppercased = 52, | ||
| Dash = 4, | ||
| DefaultIgnorableCodePoint = 5, | ||
| Deprecated = 6, | ||
| Diacritic = 7, | ||
| Emoji = 57, | ||
| EmojiComponent = 61, | ||
| EmojiModifier = 59, | ||
| EmojiModifierBase = 60, | ||
| EmojiPresentation = 58, | ||
| ExtendedPictographic = 64, | ||
| Extender = 8, | ||
| FullCompositionExclusion = 9, | ||
| Graph = 46, | ||
| GraphemeBase = 10, | ||
| GraphemeExtend = 11, | ||
| GraphemeLink = 12, | ||
| HexDigit = 13, | ||
| Hyphen = 14, | ||
| IdCompatMathContinue = 65, | ||
| IdCompatMathStart = 66, | ||
| IdContinue = 15, | ||
| Ideographic = 17, | ||
| IdsBinaryOperator = 18, | ||
| IdStart = 16, | ||
| IdsTrinaryOperator = 19, | ||
| IdsUnaryOperator = 67, | ||
| JoinControl = 20, | ||
| LogicalOrderException = 21, | ||
| Lowercase = 22, | ||
| Math = 23, | ||
| ModifierCombiningMark = 68, | ||
| NfcInert = 39, | ||
| NfdInert = 37, | ||
| NfkcInert = 40, | ||
| NfkdInert = 38, | ||
| NoncharacterCodePoint = 24, | ||
| PatternSyntax = 42, | ||
| PatternWhiteSpace = 43, | ||
| PrependedConcatenationMark = 63, | ||
| Print = 47, | ||
| QuotationMark = 25, | ||
| Radical = 26, | ||
| RegionalIndicator = 62, | ||
| SegmentStarter = 41, | ||
| SentenceTerminal = 35, | ||
| SoftDotted = 27, | ||
| TerminalPunctuation = 28, | ||
| UnifiedIdeograph = 29, | ||
| Uppercase = 30, | ||
| VariationSelector = 36, | ||
| WhiteSpace = 31, | ||
| Xdigit = 48, | ||
| XidContinue = 32, | ||
| XidStart = 33, | ||
| } | ||
| /// This type can represent any binary property over strings. | ||
| /// | ||
| /// This is intended to be used in situations where the exact unicode property needed is | ||
| /// only known at runtime, for example in regex engines. | ||
| /// | ||
| /// The values are intended to be identical to ICU4C's UProperty enum | ||
| #[non_exhaustive] | ||
| #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] | ||
| #[allow(dead_code)] | ||
| #[allow(missing_docs)] | ||
| enum StringBinaryProperty { | ||
| BasicEmoji = 65, | ||
| EmojiKeycapSequence = 66, | ||
| RgiEmoji = 71, | ||
| RgiEmojiFlagSequence = 68, | ||
| RgiEmojiModifierSequence = 67, | ||
| RgiEmojiTagSequence = 69, | ||
| RgiEmojiZWJSequence = 70, | ||
| } | ||
| /// This type can represent any enumerated Unicode property. | ||
| /// | ||
| /// This is intended to be used in situations where the exact unicode property needed is | ||
| /// only known at runtime, for example in regex engines. | ||
| /// | ||
| /// The values are intended to be identical to ICU4C's UProperty enum | ||
| #[non_exhaustive] | ||
| #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] | ||
| #[allow(dead_code)] | ||
| #[allow(missing_docs)] | ||
| enum EnumeratedProperty { | ||
| BidiClass = 0x1000, | ||
| BidiPairedBracketType = 0x1015, | ||
| Block = 0x1001, | ||
| CombiningClass = 0x1002, | ||
| DecompositionType = 0x1003, | ||
| EastAsianWidth = 0x1004, | ||
| GeneralCategory = 0x1005, | ||
| GraphemeClusterBreak = 0x1012, | ||
| HangulSyllableType = 0x100B, | ||
| IndicConjunctBreak = 0x101A, | ||
| IndicPositionalCategory = 0x1016, | ||
| IndicSyllabicCategory = 0x1017, | ||
| JoiningGroup = 0x1006, | ||
| JoiningType = 0x1007, | ||
| LeadCanonicalCombiningClass = 0x1010, | ||
| LineBreak = 0x1008, | ||
| NFCQuickCheck = 0x100E, | ||
| NFDQuickCheck = 0x100C, | ||
| NFKCQuickCheck = 0x100F, | ||
| NFKDQuickCheck = 0x100D, | ||
| NumericType = 0x1009, | ||
| Script = 0x100A, | ||
| SentenceBreak = 0x1013, | ||
| TrailCanonicalCombiningClass = 0x1011, | ||
| VerticalOrientation = 0x1018, | ||
| WordBreak = 0x1014, | ||
| } | ||
| /// This type can represent any Unicode mask property. | ||
| /// | ||
| /// This is intended to be used in situations where the exact unicode property needed is | ||
| /// only known at runtime, for example in regex engines. | ||
| /// | ||
| /// The values are intended to be identical to ICU4C's UProperty enum | ||
| #[non_exhaustive] | ||
| #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] | ||
| #[allow(dead_code)] | ||
| #[allow(missing_docs)] | ||
| enum MaskProperty { | ||
| GeneralCategoryMask = 0x2000, | ||
| } | ||
| /// This type can represent any numeric Unicode property. | ||
| /// | ||
| /// This is intended to be used in situations where the exact unicode property needed is | ||
| /// only known at runtime, for example in regex engines. | ||
| /// | ||
| /// The values are intended to be identical to ICU4C's UProperty enum | ||
| #[non_exhaustive] | ||
| #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] | ||
| #[allow(dead_code)] | ||
| #[allow(missing_docs)] | ||
| enum NumericProperty { | ||
| NumericValue = 0x3000, | ||
| } | ||
| /// This type can represent any Unicode string property. | ||
| /// | ||
| /// This is intended to be used in situations where the exact unicode property needed is | ||
| /// only known at runtime, for example in regex engines. | ||
| /// | ||
| /// The values are intended to be identical to ICU4C's UProperty enum | ||
| #[non_exhaustive] | ||
| #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] | ||
| #[allow(dead_code)] | ||
| #[allow(missing_docs)] | ||
| enum StringProperty { | ||
| Age = 0x4000, | ||
| BidiMirroringGlyph = 0x4001, | ||
| BidiPairedBracket = 0x400D, | ||
| CaseFolding = 0x4002, | ||
| ISOComment = 0x4003, | ||
| LowercaseMapping = 0x4004, | ||
| Name = 0x4005, | ||
| SimpleCaseFolding = 0x4006, | ||
| SimpleLowercaseMapping = 0x4007, | ||
| SimpleTitlecaseMapping = 0x4008, | ||
| SimpleUppercaseMapping = 0x4009, | ||
| TitlecaseMapping = 0x400A, | ||
| Unicode1Name = 0x400B, | ||
| UppercaseMapping = 0x400C, | ||
| } | ||
| #[non_exhaustive] | ||
| #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] | ||
| #[allow(dead_code)] | ||
| #[allow(missing_docs)] | ||
| enum MiscProperty { | ||
| ScriptExtensions = 0x7000, | ||
| } | ||
| impl CodePointSetData { | ||
| /// Returns a type capable of looking up values for a property specified as a string, as long as it is a | ||
| /// [binary property listed in ECMA-262][ecma], using strict matching on the names in the spec. | ||
| /// | ||
| /// This handles every property required by ECMA-262 `/u` regular expressions, except for: | ||
| /// | ||
| /// - `Script` and `General_Category`: handle these directly using property values parsed via | ||
| /// [`PropertyParser<GeneralCategory>`] and [`PropertyParser<Script>`] | ||
| /// if necessary. | ||
| /// - `Script_Extensions`: handle this directly using APIs from [`crate::script::ScriptWithExtensions`] | ||
| /// - `General_Category` mask values: Handle this alongside `General_Category` using [`GeneralCategoryGroup`], | ||
| /// using property values parsed via [`PropertyParser<GeneralCategory>`] if necessary | ||
| /// - `Assigned`, `All`, and `ASCII` pseudoproperties: Handle these using their equivalent sets: | ||
| /// - `Any` can be expressed as the range `[\u{0}-\u{10FFFF}]` | ||
| /// - `Assigned` can be expressed as the inverse of the set `gc=Cn` (i.e., `\P{gc=Cn}`). | ||
| /// - `ASCII` can be expressed as the range `[\u{0}-\u{7F}]` | ||
| /// - `General_Category` property values can themselves be treated like properties using a shorthand in ECMA262, | ||
| /// simply create the corresponding `GeneralCategory` set. | ||
| /// | ||
| /// ✨ *Enabled with the `compiled_data` Cargo feature.* | ||
| /// | ||
| /// [📚 Help choosing a constructor](icu_provider::constructors) | ||
| /// | ||
| /// ``` | ||
| /// use icu::properties::CodePointSetData; | ||
| /// | ||
| /// let emoji = CodePointSetData::new_for_ecma262(b"Emoji") | ||
| /// .expect("is an ECMA-262 property"); | ||
| /// | ||
| /// assert!(emoji.contains('🔥')); // U+1F525 FIRE | ||
| /// assert!(!emoji.contains('V')); | ||
| /// ``` | ||
| /// | ||
| /// [ecma]: https://tc39.es/ecma262/#table-binary-unicode-properties | ||
| #[cfg(feature = "compiled_data")] | ||
| pub fn new_for_ecma262(prop: &[u8]) -> Option<crate::CodePointSetDataBorrowed<'static>> { | ||
| use crate::props::*; | ||
| Some(match prop { | ||
| AsciiHexDigit::NAME | AsciiHexDigit::SHORT_NAME => Self::new::<AsciiHexDigit>(), | ||
| Alphabetic::NAME | Alphabetic::SHORT_NAME => Self::new::<Alphabetic>(), | ||
| BidiControl::NAME | BidiControl::SHORT_NAME => Self::new::<BidiControl>(), | ||
| BidiMirrored::NAME | BidiMirrored::SHORT_NAME => Self::new::<BidiMirrored>(), | ||
| CaseIgnorable::NAME | CaseIgnorable::SHORT_NAME => Self::new::<CaseIgnorable>(), | ||
| #[allow(unreachable_patterns)] // no short name | ||
| Cased::NAME | Cased::SHORT_NAME => Self::new::<Cased>(), | ||
| ChangesWhenCasefolded::NAME | ChangesWhenCasefolded::SHORT_NAME => { | ||
| Self::new::<ChangesWhenCasefolded>() | ||
| } | ||
| ChangesWhenCasemapped::NAME | ChangesWhenCasemapped::SHORT_NAME => { | ||
| Self::new::<ChangesWhenCasemapped>() | ||
| } | ||
| ChangesWhenLowercased::NAME | ChangesWhenLowercased::SHORT_NAME => { | ||
| Self::new::<ChangesWhenLowercased>() | ||
| } | ||
| ChangesWhenNfkcCasefolded::NAME | ChangesWhenNfkcCasefolded::SHORT_NAME => { | ||
| Self::new::<ChangesWhenNfkcCasefolded>() | ||
| } | ||
| ChangesWhenTitlecased::NAME | ChangesWhenTitlecased::SHORT_NAME => { | ||
| Self::new::<ChangesWhenTitlecased>() | ||
| } | ||
| ChangesWhenUppercased::NAME | ChangesWhenUppercased::SHORT_NAME => { | ||
| Self::new::<ChangesWhenUppercased>() | ||
| } | ||
| #[allow(unreachable_patterns)] // no short name | ||
| Dash::NAME | Dash::SHORT_NAME => Self::new::<Dash>(), | ||
| DefaultIgnorableCodePoint::NAME | DefaultIgnorableCodePoint::SHORT_NAME => { | ||
| Self::new::<DefaultIgnorableCodePoint>() | ||
| } | ||
| Deprecated::NAME | Deprecated::SHORT_NAME => Self::new::<Deprecated>(), | ||
| Diacritic::NAME | Diacritic::SHORT_NAME => Self::new::<Diacritic>(), | ||
| #[allow(unreachable_patterns)] // no short name | ||
| Emoji::NAME | Emoji::SHORT_NAME => Self::new::<Emoji>(), | ||
| EmojiComponent::NAME | EmojiComponent::SHORT_NAME => Self::new::<EmojiComponent>(), | ||
| EmojiModifier::NAME | EmojiModifier::SHORT_NAME => Self::new::<EmojiModifier>(), | ||
| EmojiModifierBase::NAME | EmojiModifierBase::SHORT_NAME => { | ||
| Self::new::<EmojiModifierBase>() | ||
| } | ||
| EmojiPresentation::NAME | EmojiPresentation::SHORT_NAME => { | ||
| Self::new::<EmojiPresentation>() | ||
| } | ||
| ExtendedPictographic::NAME | ExtendedPictographic::SHORT_NAME => { | ||
| Self::new::<ExtendedPictographic>() | ||
| } | ||
| Extender::NAME | Extender::SHORT_NAME => Self::new::<Extender>(), | ||
| GraphemeBase::NAME | GraphemeBase::SHORT_NAME => Self::new::<GraphemeBase>(), | ||
| GraphemeExtend::NAME | GraphemeExtend::SHORT_NAME => Self::new::<GraphemeExtend>(), | ||
| HexDigit::NAME | HexDigit::SHORT_NAME => Self::new::<HexDigit>(), | ||
| IdsBinaryOperator::NAME | IdsBinaryOperator::SHORT_NAME => { | ||
| Self::new::<IdsBinaryOperator>() | ||
| } | ||
| IdsTrinaryOperator::NAME | IdsTrinaryOperator::SHORT_NAME => { | ||
| Self::new::<IdsTrinaryOperator>() | ||
| } | ||
| IdContinue::NAME | IdContinue::SHORT_NAME => Self::new::<IdContinue>(), | ||
| IdStart::NAME | IdStart::SHORT_NAME => Self::new::<IdStart>(), | ||
| Ideographic::NAME | Ideographic::SHORT_NAME => Self::new::<Ideographic>(), | ||
| JoinControl::NAME | JoinControl::SHORT_NAME => Self::new::<JoinControl>(), | ||
| LogicalOrderException::NAME | LogicalOrderException::SHORT_NAME => { | ||
| Self::new::<LogicalOrderException>() | ||
| } | ||
| Lowercase::NAME | Lowercase::SHORT_NAME => Self::new::<Lowercase>(), | ||
| #[allow(unreachable_patterns)] // no short name | ||
| Math::NAME | Math::SHORT_NAME => Self::new::<Math>(), | ||
| NoncharacterCodePoint::NAME | NoncharacterCodePoint::SHORT_NAME => { | ||
| Self::new::<NoncharacterCodePoint>() | ||
| } | ||
| PatternSyntax::NAME | PatternSyntax::SHORT_NAME => Self::new::<PatternSyntax>(), | ||
| PatternWhiteSpace::NAME | PatternWhiteSpace::SHORT_NAME => { | ||
| Self::new::<PatternWhiteSpace>() | ||
| } | ||
| QuotationMark::NAME | QuotationMark::SHORT_NAME => Self::new::<QuotationMark>(), | ||
| #[allow(unreachable_patterns)] // no short name | ||
| Radical::NAME | Radical::SHORT_NAME => Self::new::<Radical>(), | ||
| RegionalIndicator::NAME | RegionalIndicator::SHORT_NAME => { | ||
| Self::new::<RegionalIndicator>() | ||
| } | ||
| SentenceTerminal::NAME | SentenceTerminal::SHORT_NAME => { | ||
| Self::new::<SentenceTerminal>() | ||
| } | ||
| SoftDotted::NAME | SoftDotted::SHORT_NAME => Self::new::<SoftDotted>(), | ||
| TerminalPunctuation::NAME | TerminalPunctuation::SHORT_NAME => { | ||
| Self::new::<TerminalPunctuation>() | ||
| } | ||
| UnifiedIdeograph::NAME | UnifiedIdeograph::SHORT_NAME => { | ||
| Self::new::<UnifiedIdeograph>() | ||
| } | ||
| Uppercase::NAME | Uppercase::SHORT_NAME => Self::new::<Uppercase>(), | ||
| VariationSelector::NAME | VariationSelector::SHORT_NAME => { | ||
| Self::new::<VariationSelector>() | ||
| } | ||
| WhiteSpace::NAME | WhiteSpace::SHORT_NAME => Self::new::<WhiteSpace>(), | ||
| XidContinue::NAME | XidContinue::SHORT_NAME => Self::new::<XidContinue>(), | ||
| XidStart::NAME | XidStart::SHORT_NAME => Self::new::<XidStart>(), | ||
| // Not an ECMA-262 property | ||
| _ => return None, | ||
| }) | ||
| } | ||
| icu_provider::gen_buffer_data_constructors!( | ||
| (prop: &[u8]) -> result: Option<Result<Self, DataError>>, | ||
| functions: [ | ||
| new_for_ecma262: skip, | ||
| try_new_for_ecma262_with_buffer_provider, | ||
| try_new_for_ecma262_unstable, | ||
| Self, | ||
| ] | ||
| ); | ||
| #[doc = icu_provider::gen_buffer_unstable_docs!(UNSTABLE, Self::new_for_ecma262)] | ||
| pub fn try_new_for_ecma262_unstable<P>( | ||
| provider: &P, | ||
| prop: &[u8], | ||
| ) -> Option<Result<Self, DataError>> | ||
| where | ||
| P: ?Sized | ||
| + DataProvider<PropertyBinaryAsciiHexDigitV1> | ||
| + DataProvider<PropertyBinaryAlphabeticV1> | ||
| + DataProvider<PropertyBinaryBidiControlV1> | ||
| + DataProvider<PropertyBinaryBidiMirroredV1> | ||
| + DataProvider<PropertyBinaryCaseIgnorableV1> | ||
| + DataProvider<PropertyBinaryCasedV1> | ||
| + DataProvider<PropertyBinaryChangesWhenCasefoldedV1> | ||
| + DataProvider<PropertyBinaryChangesWhenCasemappedV1> | ||
| + DataProvider<PropertyBinaryChangesWhenLowercasedV1> | ||
| + DataProvider<PropertyBinaryChangesWhenNfkcCasefoldedV1> | ||
| + DataProvider<PropertyBinaryChangesWhenTitlecasedV1> | ||
| + DataProvider<PropertyBinaryChangesWhenUppercasedV1> | ||
| + DataProvider<PropertyBinaryDashV1> | ||
| + DataProvider<PropertyBinaryDefaultIgnorableCodePointV1> | ||
| + DataProvider<PropertyBinaryDeprecatedV1> | ||
| + DataProvider<PropertyBinaryDiacriticV1> | ||
| + DataProvider<PropertyBinaryEmojiV1> | ||
| + DataProvider<PropertyBinaryEmojiComponentV1> | ||
| + DataProvider<PropertyBinaryEmojiModifierV1> | ||
| + DataProvider<PropertyBinaryEmojiModifierBaseV1> | ||
| + DataProvider<PropertyBinaryEmojiPresentationV1> | ||
| + DataProvider<PropertyBinaryExtendedPictographicV1> | ||
| + DataProvider<PropertyBinaryExtenderV1> | ||
| + DataProvider<PropertyBinaryGraphemeBaseV1> | ||
| + DataProvider<PropertyBinaryGraphemeExtendV1> | ||
| + DataProvider<PropertyBinaryHexDigitV1> | ||
| + DataProvider<PropertyBinaryIdsBinaryOperatorV1> | ||
| + DataProvider<PropertyBinaryIdsTrinaryOperatorV1> | ||
| + DataProvider<PropertyBinaryIdContinueV1> | ||
| + DataProvider<PropertyBinaryIdStartV1> | ||
| + DataProvider<PropertyBinaryIdeographicV1> | ||
| + DataProvider<PropertyBinaryJoinControlV1> | ||
| + DataProvider<PropertyBinaryLogicalOrderExceptionV1> | ||
| + DataProvider<PropertyBinaryLowercaseV1> | ||
| + DataProvider<PropertyBinaryMathV1> | ||
| + DataProvider<PropertyBinaryNoncharacterCodePointV1> | ||
| + DataProvider<PropertyBinaryPatternSyntaxV1> | ||
| + DataProvider<PropertyBinaryPatternWhiteSpaceV1> | ||
| + DataProvider<PropertyBinaryQuotationMarkV1> | ||
| + DataProvider<PropertyBinaryRadicalV1> | ||
| + DataProvider<PropertyBinaryRegionalIndicatorV1> | ||
| + DataProvider<PropertyBinarySentenceTerminalV1> | ||
| + DataProvider<PropertyBinarySoftDottedV1> | ||
| + DataProvider<PropertyBinaryTerminalPunctuationV1> | ||
| + DataProvider<PropertyBinaryUnifiedIdeographV1> | ||
| + DataProvider<PropertyBinaryUppercaseV1> | ||
| + DataProvider<PropertyBinaryVariationSelectorV1> | ||
| + DataProvider<PropertyBinaryWhiteSpaceV1> | ||
| + DataProvider<PropertyBinaryXidContinueV1> | ||
| + DataProvider<PropertyBinaryXidStartV1>, | ||
| { | ||
| use crate::props::*; | ||
| Some(match prop { | ||
| AsciiHexDigit::NAME | AsciiHexDigit::SHORT_NAME => { | ||
| Self::try_new_unstable::<AsciiHexDigit>(provider) | ||
| } | ||
| Alphabetic::NAME | Alphabetic::SHORT_NAME => { | ||
| Self::try_new_unstable::<Alphabetic>(provider) | ||
| } | ||
| BidiControl::NAME | BidiControl::SHORT_NAME => { | ||
| Self::try_new_unstable::<BidiControl>(provider) | ||
| } | ||
| BidiMirrored::NAME | BidiMirrored::SHORT_NAME => { | ||
| Self::try_new_unstable::<BidiMirrored>(provider) | ||
| } | ||
| CaseIgnorable::NAME | CaseIgnorable::SHORT_NAME => { | ||
| Self::try_new_unstable::<CaseIgnorable>(provider) | ||
| } | ||
| #[allow(unreachable_patterns)] // no short name | ||
| Cased::NAME | Cased::SHORT_NAME => Self::try_new_unstable::<Cased>(provider), | ||
| ChangesWhenCasefolded::NAME | ChangesWhenCasefolded::SHORT_NAME => { | ||
| Self::try_new_unstable::<ChangesWhenCasefolded>(provider) | ||
| } | ||
| ChangesWhenCasemapped::NAME | ChangesWhenCasemapped::SHORT_NAME => { | ||
| Self::try_new_unstable::<ChangesWhenCasemapped>(provider) | ||
| } | ||
| ChangesWhenLowercased::NAME | ChangesWhenLowercased::SHORT_NAME => { | ||
| Self::try_new_unstable::<ChangesWhenLowercased>(provider) | ||
| } | ||
| ChangesWhenNfkcCasefolded::NAME | ChangesWhenNfkcCasefolded::SHORT_NAME => { | ||
| Self::try_new_unstable::<ChangesWhenNfkcCasefolded>(provider) | ||
| } | ||
| ChangesWhenTitlecased::NAME | ChangesWhenTitlecased::SHORT_NAME => { | ||
| Self::try_new_unstable::<ChangesWhenTitlecased>(provider) | ||
| } | ||
| ChangesWhenUppercased::NAME | ChangesWhenUppercased::SHORT_NAME => { | ||
| Self::try_new_unstable::<ChangesWhenUppercased>(provider) | ||
| } | ||
| #[allow(unreachable_patterns)] // no short name | ||
| Dash::NAME | Dash::SHORT_NAME => Self::try_new_unstable::<Dash>(provider), | ||
| DefaultIgnorableCodePoint::NAME | DefaultIgnorableCodePoint::SHORT_NAME => { | ||
| Self::try_new_unstable::<DefaultIgnorableCodePoint>(provider) | ||
| } | ||
| Deprecated::NAME | Deprecated::SHORT_NAME => { | ||
| Self::try_new_unstable::<Deprecated>(provider) | ||
| } | ||
| Diacritic::NAME | Diacritic::SHORT_NAME => { | ||
| Self::try_new_unstable::<Diacritic>(provider) | ||
| } | ||
| #[allow(unreachable_patterns)] // no short name | ||
| Emoji::NAME | Emoji::SHORT_NAME => Self::try_new_unstable::<Emoji>(provider), | ||
| EmojiComponent::NAME | EmojiComponent::SHORT_NAME => { | ||
| Self::try_new_unstable::<EmojiComponent>(provider) | ||
| } | ||
| EmojiModifier::NAME | EmojiModifier::SHORT_NAME => { | ||
| Self::try_new_unstable::<EmojiModifier>(provider) | ||
| } | ||
| EmojiModifierBase::NAME | EmojiModifierBase::SHORT_NAME => { | ||
| Self::try_new_unstable::<EmojiModifierBase>(provider) | ||
| } | ||
| EmojiPresentation::NAME | EmojiPresentation::SHORT_NAME => { | ||
| Self::try_new_unstable::<EmojiPresentation>(provider) | ||
| } | ||
| ExtendedPictographic::NAME | ExtendedPictographic::SHORT_NAME => { | ||
| Self::try_new_unstable::<ExtendedPictographic>(provider) | ||
| } | ||
| Extender::NAME | Extender::SHORT_NAME => Self::try_new_unstable::<Extender>(provider), | ||
| GraphemeBase::NAME | GraphemeBase::SHORT_NAME => { | ||
| Self::try_new_unstable::<GraphemeBase>(provider) | ||
| } | ||
| GraphemeExtend::NAME | GraphemeExtend::SHORT_NAME => { | ||
| Self::try_new_unstable::<GraphemeExtend>(provider) | ||
| } | ||
| HexDigit::NAME | HexDigit::SHORT_NAME => Self::try_new_unstable::<HexDigit>(provider), | ||
| IdsBinaryOperator::NAME | IdsBinaryOperator::SHORT_NAME => { | ||
| Self::try_new_unstable::<IdsBinaryOperator>(provider) | ||
| } | ||
| IdsTrinaryOperator::NAME | IdsTrinaryOperator::SHORT_NAME => { | ||
| Self::try_new_unstable::<IdsTrinaryOperator>(provider) | ||
| } | ||
| IdContinue::NAME | IdContinue::SHORT_NAME => { | ||
| Self::try_new_unstable::<IdContinue>(provider) | ||
| } | ||
| IdStart::NAME | IdStart::SHORT_NAME => Self::try_new_unstable::<IdStart>(provider), | ||
| Ideographic::NAME | Ideographic::SHORT_NAME => { | ||
| Self::try_new_unstable::<Ideographic>(provider) | ||
| } | ||
| JoinControl::NAME | JoinControl::SHORT_NAME => { | ||
| Self::try_new_unstable::<JoinControl>(provider) | ||
| } | ||
| LogicalOrderException::NAME | LogicalOrderException::SHORT_NAME => { | ||
| Self::try_new_unstable::<LogicalOrderException>(provider) | ||
| } | ||
| Lowercase::NAME | Lowercase::SHORT_NAME => { | ||
| Self::try_new_unstable::<Lowercase>(provider) | ||
| } | ||
| #[allow(unreachable_patterns)] // no short name | ||
| Math::NAME | Math::SHORT_NAME => Self::try_new_unstable::<Math>(provider), | ||
| NoncharacterCodePoint::NAME | NoncharacterCodePoint::SHORT_NAME => { | ||
| Self::try_new_unstable::<NoncharacterCodePoint>(provider) | ||
| } | ||
| PatternSyntax::NAME | PatternSyntax::SHORT_NAME => { | ||
| Self::try_new_unstable::<PatternSyntax>(provider) | ||
| } | ||
| PatternWhiteSpace::NAME | PatternWhiteSpace::SHORT_NAME => { | ||
| Self::try_new_unstable::<PatternWhiteSpace>(provider) | ||
| } | ||
| QuotationMark::NAME | QuotationMark::SHORT_NAME => { | ||
| Self::try_new_unstable::<QuotationMark>(provider) | ||
| } | ||
| #[allow(unreachable_patterns)] // no short name | ||
| Radical::NAME | Radical::SHORT_NAME => Self::try_new_unstable::<Radical>(provider), | ||
| RegionalIndicator::NAME | RegionalIndicator::SHORT_NAME => { | ||
| Self::try_new_unstable::<RegionalIndicator>(provider) | ||
| } | ||
| SentenceTerminal::NAME | SentenceTerminal::SHORT_NAME => { | ||
| Self::try_new_unstable::<SentenceTerminal>(provider) | ||
| } | ||
| SoftDotted::NAME | SoftDotted::SHORT_NAME => { | ||
| Self::try_new_unstable::<SoftDotted>(provider) | ||
| } | ||
| TerminalPunctuation::NAME | TerminalPunctuation::SHORT_NAME => { | ||
| Self::try_new_unstable::<TerminalPunctuation>(provider) | ||
| } | ||
| UnifiedIdeograph::NAME | UnifiedIdeograph::SHORT_NAME => { | ||
| Self::try_new_unstable::<UnifiedIdeograph>(provider) | ||
| } | ||
| Uppercase::NAME | Uppercase::SHORT_NAME => { | ||
| Self::try_new_unstable::<Uppercase>(provider) | ||
| } | ||
| VariationSelector::NAME | VariationSelector::SHORT_NAME => { | ||
| Self::try_new_unstable::<VariationSelector>(provider) | ||
| } | ||
| WhiteSpace::NAME | WhiteSpace::SHORT_NAME => { | ||
| Self::try_new_unstable::<WhiteSpace>(provider) | ||
| } | ||
| XidContinue::NAME | XidContinue::SHORT_NAME => { | ||
| Self::try_new_unstable::<XidContinue>(provider) | ||
| } | ||
| XidStart::NAME | XidStart::SHORT_NAME => Self::try_new_unstable::<XidStart>(provider), | ||
| // Not an ECMA-262 property | ||
| _ => return None, | ||
| }) | ||
| } | ||
| } |
| // This file is part of ICU4X. For terms of use, please see the file | ||
| // called LICENSE at the top level of the ICU4X source tree | ||
| // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). | ||
| //! Data and APIs for supporting Script_Extensions property | ||
| //! values in an efficient structure. | ||
| use crate::props::Script; | ||
| use crate::provider::*; | ||
| #[cfg(feature = "alloc")] | ||
| use core::iter::FromIterator; | ||
| use core::ops::RangeInclusive; | ||
| #[cfg(feature = "alloc")] | ||
| use icu_collections::codepointinvlist::CodePointInversionList; | ||
| use icu_provider::prelude::*; | ||
| use zerovec::{ule::AsULE, ZeroSlice}; | ||
| /// The number of bits at the low-end of a `ScriptWithExt` value used for | ||
| /// storing the `Script` value (or `extensions` index). | ||
| const SCRIPT_VAL_LENGTH: u16 = 10; | ||
| /// The bit mask necessary to retrieve the `Script` value (or `extensions` index) | ||
| /// from a `ScriptWithExt` value. | ||
| const SCRIPT_X_SCRIPT_VAL: u16 = (1 << SCRIPT_VAL_LENGTH) - 1; | ||
| /// An internal-use only pseudo-property that represents the values stored in | ||
| /// the trie of the special data structure [`ScriptWithExtensionsProperty`]. | ||
| /// | ||
| /// Note: The will assume a 12-bit layout. The 2 higher order bits in positions | ||
| /// 11..10 will indicate how to deduce the Script value and Script_Extensions, | ||
| /// and the lower 10 bits 9..0 indicate either the Script value or the index | ||
| /// into the `extensions` structure. | ||
| #[derive(Copy, Clone, Debug, Eq, PartialEq)] | ||
| #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] | ||
| #[cfg_attr(feature = "datagen", derive(databake::Bake))] | ||
| #[cfg_attr(feature = "datagen", databake(path = icu_properties::script))] | ||
| #[repr(transparent)] | ||
| #[doc(hidden)] | ||
| // `ScriptWithExt` not intended as public-facing but for `ScriptWithExtensionsProperty` constructor | ||
| #[allow(clippy::exhaustive_structs)] // this type is stable | ||
| pub struct ScriptWithExt(pub u16); | ||
| #[allow(missing_docs)] // These constants don't need individual documentation. | ||
| #[allow(non_upper_case_globals)] | ||
| #[doc(hidden)] // `ScriptWithExt` not intended as public-facing but for `ScriptWithExtensionsProperty` constructor | ||
| impl ScriptWithExt { | ||
| pub const Unknown: ScriptWithExt = ScriptWithExt(0); | ||
| } | ||
| impl AsULE for ScriptWithExt { | ||
| type ULE = <u16 as AsULE>::ULE; | ||
| #[inline] | ||
| fn to_unaligned(self) -> Self::ULE { | ||
| Script(self.0).to_unaligned() | ||
| } | ||
| #[inline] | ||
| fn from_unaligned(unaligned: Self::ULE) -> Self { | ||
| ScriptWithExt(Script::from_unaligned(unaligned).0) | ||
| } | ||
| } | ||
| #[doc(hidden)] // `ScriptWithExt` not intended as public-facing but for `ScriptWithExtensionsProperty` constructor | ||
| impl ScriptWithExt { | ||
| /// Returns whether the [`ScriptWithExt`] value has Script_Extensions and | ||
| /// also indicates a Script value of [`Script::Common`]. | ||
| /// | ||
| /// # Examples | ||
| /// | ||
| /// ``` | ||
| /// use icu::properties::script::ScriptWithExt; | ||
| /// | ||
| /// assert!(ScriptWithExt(0x04FF).is_common()); | ||
| /// assert!(ScriptWithExt(0x0400).is_common()); | ||
| /// | ||
| /// assert!(!ScriptWithExt(0x08FF).is_common()); | ||
| /// assert!(!ScriptWithExt(0x0800).is_common()); | ||
| /// | ||
| /// assert!(!ScriptWithExt(0x0CFF).is_common()); | ||
| /// assert!(!ScriptWithExt(0x0C00).is_common()); | ||
| /// | ||
| /// assert!(!ScriptWithExt(0xFF).is_common()); | ||
| /// assert!(!ScriptWithExt(0x0).is_common()); | ||
| /// ``` | ||
| pub fn is_common(&self) -> bool { | ||
| self.0 >> SCRIPT_VAL_LENGTH == 1 | ||
| } | ||
| /// Returns whether the [`ScriptWithExt`] value has Script_Extensions and | ||
| /// also indicates a Script value of [`Script::Inherited`]. | ||
| /// | ||
| /// # Examples | ||
| /// | ||
| /// ``` | ||
| /// use icu::properties::script::ScriptWithExt; | ||
| /// | ||
| /// assert!(!ScriptWithExt(0x04FF).is_inherited()); | ||
| /// assert!(!ScriptWithExt(0x0400).is_inherited()); | ||
| /// | ||
| /// assert!(ScriptWithExt(0x08FF).is_inherited()); | ||
| /// assert!(ScriptWithExt(0x0800).is_inherited()); | ||
| /// | ||
| /// assert!(!ScriptWithExt(0x0CFF).is_inherited()); | ||
| /// assert!(!ScriptWithExt(0x0C00).is_inherited()); | ||
| /// | ||
| /// assert!(!ScriptWithExt(0xFF).is_inherited()); | ||
| /// assert!(!ScriptWithExt(0x0).is_inherited()); | ||
| /// ``` | ||
| pub fn is_inherited(&self) -> bool { | ||
| self.0 >> SCRIPT_VAL_LENGTH == 2 | ||
| } | ||
| /// Returns whether the [`ScriptWithExt`] value has Script_Extensions and | ||
| /// also indicates that the Script value is neither [`Script::Common`] nor | ||
| /// [`Script::Inherited`]. | ||
| /// | ||
| /// # Examples | ||
| /// | ||
| /// ``` | ||
| /// use icu::properties::script::ScriptWithExt; | ||
| /// | ||
| /// assert!(!ScriptWithExt(0x04FF).is_other()); | ||
| /// assert!(!ScriptWithExt(0x0400).is_other()); | ||
| /// | ||
| /// assert!(!ScriptWithExt(0x08FF).is_other()); | ||
| /// assert!(!ScriptWithExt(0x0800).is_other()); | ||
| /// | ||
| /// assert!(ScriptWithExt(0x0CFF).is_other()); | ||
| /// assert!(ScriptWithExt(0x0C00).is_other()); | ||
| /// | ||
| /// assert!(!ScriptWithExt(0xFF).is_other()); | ||
| /// assert!(!ScriptWithExt(0x0).is_other()); | ||
| /// ``` | ||
| pub fn is_other(&self) -> bool { | ||
| self.0 >> SCRIPT_VAL_LENGTH == 3 | ||
| } | ||
| /// Returns whether the [`ScriptWithExt`] value has Script_Extensions. | ||
| /// | ||
| /// # Examples | ||
| /// | ||
| /// ``` | ||
| /// use icu::properties::script::ScriptWithExt; | ||
| /// | ||
| /// assert!(ScriptWithExt(0x04FF).has_extensions()); | ||
| /// assert!(ScriptWithExt(0x0400).has_extensions()); | ||
| /// | ||
| /// assert!(ScriptWithExt(0x08FF).has_extensions()); | ||
| /// assert!(ScriptWithExt(0x0800).has_extensions()); | ||
| /// | ||
| /// assert!(ScriptWithExt(0x0CFF).has_extensions()); | ||
| /// assert!(ScriptWithExt(0x0C00).has_extensions()); | ||
| /// | ||
| /// assert!(!ScriptWithExt(0xFF).has_extensions()); | ||
| /// assert!(!ScriptWithExt(0x0).has_extensions()); | ||
| /// ``` | ||
| pub fn has_extensions(&self) -> bool { | ||
| let high_order_bits = self.0 >> SCRIPT_VAL_LENGTH; | ||
| high_order_bits > 0 | ||
| } | ||
| } | ||
| impl From<ScriptWithExt> for u32 { | ||
| fn from(swe: ScriptWithExt) -> Self { | ||
| swe.0 as u32 | ||
| } | ||
| } | ||
| impl From<ScriptWithExt> for Script { | ||
| fn from(swe: ScriptWithExt) -> Self { | ||
| Script(swe.0) | ||
| } | ||
| } | ||
| /// A struct that wraps a [`Script`] array, such as in the return value for | ||
| /// [`get_script_extensions_val()`](ScriptWithExtensionsBorrowed::get_script_extensions_val). | ||
| #[derive(Copy, Clone, Debug, Eq, PartialEq)] | ||
| pub struct ScriptExtensionsSet<'a> { | ||
| values: &'a ZeroSlice<Script>, | ||
| } | ||
| impl<'a> ScriptExtensionsSet<'a> { | ||
| /// Returns whether this set contains the given script. | ||
| /// | ||
| /// # Example | ||
| /// | ||
| /// ``` | ||
| /// use icu::properties::props::Script; | ||
| /// use icu::properties::script::ScriptWithExtensions; | ||
| /// let swe = ScriptWithExtensions::new(); | ||
| /// | ||
| /// assert!(swe | ||
| /// .get_script_extensions_val('\u{11303}') // GRANTHA SIGN VISARGA | ||
| /// .contains(&Script::Grantha)); | ||
| /// ``` | ||
| pub fn contains(&self, x: &Script) -> bool { | ||
| ZeroSlice::binary_search(self.values, x).is_ok() | ||
| } | ||
| /// Gets an iterator over the elements. | ||
| /// | ||
| /// # Example | ||
| /// | ||
| /// ``` | ||
| /// use icu::properties::props::Script; | ||
| /// use icu::properties::script::ScriptWithExtensions; | ||
| /// let swe = ScriptWithExtensions::new(); | ||
| /// | ||
| /// assert_eq!( | ||
| /// swe.get_script_extensions_val('௫') // U+0BEB TAMIL DIGIT FIVE | ||
| /// .iter() | ||
| /// .collect::<Vec<_>>(), | ||
| /// [Script::Tamil, Script::Grantha] | ||
| /// ); | ||
| /// ``` | ||
| pub fn iter(&self) -> impl DoubleEndedIterator<Item = Script> + 'a { | ||
| ZeroSlice::iter(self.values) | ||
| } | ||
| /// For accessing this set as an array instead of an iterator | ||
| #[doc(hidden)] // used by FFI code | ||
| pub fn array_len(&self) -> usize { | ||
| self.values.len() | ||
| } | ||
| /// For accessing this set as an array instead of an iterator | ||
| #[doc(hidden)] // used by FFI code | ||
| pub fn array_get(&self, index: usize) -> Option<Script> { | ||
| self.values.get(index) | ||
| } | ||
| } | ||
| /// A struct that represents the data for the Script and Script_Extensions properties. | ||
| /// | ||
| /// ✨ *Enabled with the `compiled_data` Cargo feature.* | ||
| /// | ||
| /// [📚 Help choosing a constructor](icu_provider::constructors) | ||
| /// | ||
| /// Most useful methods are on [`ScriptWithExtensionsBorrowed`] obtained by calling [`ScriptWithExtensions::as_borrowed()`] | ||
| /// | ||
| /// # Examples | ||
| /// | ||
| /// ``` | ||
| /// use icu::properties::script::ScriptWithExtensions; | ||
| /// use icu::properties::props::Script; | ||
| /// let swe = ScriptWithExtensions::new(); | ||
| /// | ||
| /// // get the `Script` property value | ||
| /// assert_eq!(swe.get_script_val('ـ'), Script::Common); // U+0640 ARABIC TATWEEL | ||
| /// assert_eq!(swe.get_script_val('\u{0650}'), Script::Inherited); // U+0650 ARABIC KASRA | ||
| /// assert_eq!(swe.get_script_val('٠'), Script::Arabic); // // U+0660 ARABIC-INDIC DIGIT ZERO | ||
| /// assert_eq!(swe.get_script_val('ﷲ'), Script::Arabic); // U+FDF2 ARABIC LIGATURE ALLAH ISOLATED FORM | ||
| /// | ||
| /// // get the `Script_Extensions` property value | ||
| /// assert_eq!( | ||
| /// swe.get_script_extensions_val('ـ') // U+0640 ARABIC TATWEEL | ||
| /// .iter().collect::<Vec<_>>(), | ||
| /// [Script::Arabic, Script::Syriac, Script::Mandaic, Script::Manichaean, | ||
| /// Script::PsalterPahlavi, Script::Adlam, Script::HanifiRohingya, Script::Sogdian, | ||
| /// Script::OldUyghur] | ||
| /// ); | ||
| /// assert_eq!( | ||
| /// swe.get_script_extensions_val('🥳') // U+1F973 FACE WITH PARTY HORN AND PARTY HAT | ||
| /// .iter().collect::<Vec<_>>(), | ||
| /// [Script::Common] | ||
| /// ); | ||
| /// assert_eq!( | ||
| /// swe.get_script_extensions_val('\u{200D}') // ZERO WIDTH JOINER | ||
| /// .iter().collect::<Vec<_>>(), | ||
| /// [Script::Inherited] | ||
| /// ); | ||
| /// assert_eq!( | ||
| /// swe.get_script_extensions_val('௫') // U+0BEB TAMIL DIGIT FIVE | ||
| /// .iter().collect::<Vec<_>>(), | ||
| /// [Script::Tamil, Script::Grantha] | ||
| /// ); | ||
| /// | ||
| /// // check containment of a `Script` value in the `Script_Extensions` value | ||
| /// // U+0650 ARABIC KASRA | ||
| /// assert!(!swe.has_script('\u{0650}', Script::Inherited)); // main Script value | ||
| /// assert!(swe.has_script('\u{0650}', Script::Arabic)); | ||
| /// assert!(swe.has_script('\u{0650}', Script::Syriac)); | ||
| /// assert!(!swe.has_script('\u{0650}', Script::Thaana)); | ||
| /// | ||
| /// // get a `CodePointInversionList` for when `Script` value is contained in `Script_Extensions` value | ||
| /// let syriac = swe.get_script_extensions_set(Script::Syriac); | ||
| /// assert!(syriac.contains('\u{0650}')); // ARABIC KASRA | ||
| /// assert!(!syriac.contains('٠')); // ARABIC-INDIC DIGIT ZERO | ||
| /// assert!(!syriac.contains('ﷲ')); // ARABIC LIGATURE ALLAH ISOLATED FORM | ||
| /// assert!(syriac.contains('܀')); // SYRIAC END OF PARAGRAPH | ||
| /// assert!(syriac.contains('\u{074A}')); // SYRIAC BARREKH | ||
| /// ``` | ||
| #[derive(Debug)] | ||
| pub struct ScriptWithExtensions { | ||
| data: DataPayload<PropertyScriptWithExtensionsV1>, | ||
| } | ||
| /// A borrowed wrapper around script extension data, returned by | ||
| /// [`ScriptWithExtensions::as_borrowed()`]. More efficient to query. | ||
| #[derive(Clone, Copy, Debug)] | ||
| pub struct ScriptWithExtensionsBorrowed<'a> { | ||
| data: &'a ScriptWithExtensionsProperty<'a>, | ||
| } | ||
| impl ScriptWithExtensions { | ||
| /// Creates a new instance of `ScriptWithExtensionsBorrowed` using compiled data. | ||
| /// | ||
| /// ✨ *Enabled with the `compiled_data` Cargo feature.* | ||
| /// | ||
| /// [📚 Help choosing a constructor](icu_provider::constructors) | ||
| #[cfg(feature = "compiled_data")] | ||
| #[expect(clippy::new_ret_no_self)] | ||
| pub fn new() -> ScriptWithExtensionsBorrowed<'static> { | ||
| ScriptWithExtensionsBorrowed::new() | ||
| } | ||
| icu_provider::gen_buffer_data_constructors!( | ||
| () -> result: Result<ScriptWithExtensions, DataError>, | ||
| functions: [ | ||
| new: skip, | ||
| try_new_with_buffer_provider, | ||
| try_new_unstable, | ||
| Self, | ||
| ] | ||
| ); | ||
| #[doc = icu_provider::gen_buffer_unstable_docs!(UNSTABLE, Self::new)] | ||
| pub fn try_new_unstable( | ||
| provider: &(impl DataProvider<PropertyScriptWithExtensionsV1> + ?Sized), | ||
| ) -> Result<Self, DataError> { | ||
| Ok(ScriptWithExtensions::from_data( | ||
| provider.load(Default::default())?.payload, | ||
| )) | ||
| } | ||
| /// Construct a borrowed version of this type that can be queried. | ||
| /// | ||
| /// This avoids a potential small underlying cost per API call (ex: `contains()`) by consolidating it | ||
| /// up front. | ||
| #[inline] | ||
| pub fn as_borrowed(&self) -> ScriptWithExtensionsBorrowed<'_> { | ||
| ScriptWithExtensionsBorrowed { | ||
| data: self.data.get(), | ||
| } | ||
| } | ||
| /// Construct a new one from loaded data | ||
| /// | ||
| /// Typically it is preferable to use getters like [`load_script_with_extensions_unstable()`] instead | ||
| pub(crate) fn from_data(data: DataPayload<PropertyScriptWithExtensionsV1>) -> Self { | ||
| Self { data } | ||
| } | ||
| } | ||
| impl<'a> ScriptWithExtensionsBorrowed<'a> { | ||
| /// Returns the `Script` property value for this code point. | ||
| /// | ||
| /// # Examples | ||
| /// | ||
| /// ``` | ||
| /// use icu::properties::script::ScriptWithExtensions; | ||
| /// use icu::properties::props::Script; | ||
| /// | ||
| /// let swe = ScriptWithExtensions::new(); | ||
| /// | ||
| /// // U+0640 ARABIC TATWEEL | ||
| /// assert_eq!(swe.get_script_val('ـ'), Script::Common); // main Script value | ||
| /// assert_ne!(swe.get_script_val('ـ'), Script::Arabic); | ||
| /// assert_ne!(swe.get_script_val('ـ'), Script::Syriac); | ||
| /// assert_ne!(swe.get_script_val('ـ'), Script::Thaana); | ||
| /// | ||
| /// // U+0650 ARABIC KASRA | ||
| /// assert_eq!(swe.get_script_val('\u{0650}'), Script::Inherited); // main Script value | ||
| /// assert_ne!(swe.get_script_val('\u{0650}'), Script::Arabic); | ||
| /// assert_ne!(swe.get_script_val('\u{0650}'), Script::Syriac); | ||
| /// assert_ne!(swe.get_script_val('\u{0650}'), Script::Thaana); | ||
| /// | ||
| /// // U+0660 ARABIC-INDIC DIGIT ZERO | ||
| /// assert_ne!(swe.get_script_val('٠'), Script::Common); | ||
| /// assert_eq!(swe.get_script_val('٠'), Script::Arabic); // main Script value | ||
| /// assert_ne!(swe.get_script_val('٠'), Script::Syriac); | ||
| /// assert_ne!(swe.get_script_val('٠'), Script::Thaana); | ||
| /// | ||
| /// // U+FDF2 ARABIC LIGATURE ALLAH ISOLATED FORM | ||
| /// assert_ne!(swe.get_script_val('ﷲ'), Script::Common); | ||
| /// assert_eq!(swe.get_script_val('ﷲ'), Script::Arabic); // main Script value | ||
| /// assert_ne!(swe.get_script_val('ﷲ'), Script::Syriac); | ||
| /// assert_ne!(swe.get_script_val('ﷲ'), Script::Thaana); | ||
| /// ``` | ||
| pub fn get_script_val(self, ch: char) -> Script { | ||
| self.get_script_val32(ch as u32) | ||
| } | ||
| /// See [`Self::get_script_val`]. | ||
| pub fn get_script_val32(self, code_point: u32) -> Script { | ||
| let sc_with_ext = self.data.trie.get32(code_point); | ||
| if sc_with_ext.is_other() { | ||
| let ext_idx = sc_with_ext.0 & SCRIPT_X_SCRIPT_VAL; | ||
| let scx_val = self.data.extensions.get(ext_idx as usize); | ||
| let scx_first_sc = scx_val.and_then(|scx| scx.get(0)); | ||
| let default_sc_val = Script::Unknown; | ||
| scx_first_sc.unwrap_or(default_sc_val) | ||
| } else if sc_with_ext.is_common() { | ||
| Script::Common | ||
| } else if sc_with_ext.is_inherited() { | ||
| Script::Inherited | ||
| } else { | ||
| let script_val = sc_with_ext.0; | ||
| Script(script_val) | ||
| } | ||
| } | ||
| // Returns the Script_Extensions value for a code_point when the trie value | ||
| // is already known. | ||
| // This private helper method exists to prevent code duplication in callers like | ||
| // `get_script_extensions_val`, `get_script_extensions_set`, and `has_script`. | ||
| fn get_scx_val_using_trie_val( | ||
| self, | ||
| sc_with_ext_ule: &'a <ScriptWithExt as AsULE>::ULE, | ||
| ) -> &'a ZeroSlice<Script> { | ||
| let sc_with_ext = ScriptWithExt::from_unaligned(*sc_with_ext_ule); | ||
| if sc_with_ext.is_other() { | ||
| let ext_idx = sc_with_ext.0 & SCRIPT_X_SCRIPT_VAL; | ||
| let ext_subarray = self.data.extensions.get(ext_idx as usize); | ||
| // In the OTHER case, where the 2 higher-order bits of the | ||
| // `ScriptWithExt` value in the trie doesn't indicate the Script value, | ||
| // the Script value is copied/inserted into the first position of the | ||
| // `extensions` array. So we must remove it to return the actual scx array val. | ||
| let scx_slice = ext_subarray | ||
| .and_then(|zslice| zslice.as_ule_slice().get(1..)) | ||
| .unwrap_or_default(); | ||
| ZeroSlice::from_ule_slice(scx_slice) | ||
| } else if sc_with_ext.is_common() || sc_with_ext.is_inherited() { | ||
| let ext_idx = sc_with_ext.0 & SCRIPT_X_SCRIPT_VAL; | ||
| let scx_val = self.data.extensions.get(ext_idx as usize); | ||
| scx_val.unwrap_or_default() | ||
| } else { | ||
| // Note: `Script` and `ScriptWithExt` are both represented as the same | ||
| // u16 value when the `ScriptWithExt` has no higher-order bits set. | ||
| let script_ule_slice = core::slice::from_ref(sc_with_ext_ule); | ||
| ZeroSlice::from_ule_slice(script_ule_slice) | ||
| } | ||
| } | ||
| /// Return the `Script_Extensions` property value for this code point. | ||
| /// | ||
| /// If `code_point` has Script_Extensions, then return the Script codes in | ||
| /// the Script_Extensions. In this case, the Script property value | ||
| /// (normally Common or Inherited) is not included in the [`ScriptExtensionsSet`]. | ||
| /// | ||
| /// If c does not have Script_Extensions, then the one Script code is put | ||
| /// into the [`ScriptExtensionsSet`] and also returned. | ||
| /// | ||
| /// If c is not a valid code point, then return an empty [`ScriptExtensionsSet`]. | ||
| /// | ||
| /// # Examples | ||
| /// | ||
| /// ``` | ||
| /// use icu::properties::script::ScriptWithExtensions; | ||
| /// use icu::properties::props::Script; | ||
| /// | ||
| /// let swe = ScriptWithExtensions::new(); | ||
| /// | ||
| /// assert_eq!( | ||
| /// swe.get_script_extensions_val('𐓐') // U+104D0 OSAGE CAPITAL LETTER KHA | ||
| /// .iter() | ||
| /// .collect::<Vec<_>>(), | ||
| /// [Script::Osage] | ||
| /// ); | ||
| /// assert_eq!( | ||
| /// swe.get_script_extensions_val('🥳') // U+1F973 FACE WITH PARTY HORN AND PARTY HAT | ||
| /// .iter() | ||
| /// .collect::<Vec<_>>(), | ||
| /// [Script::Common] | ||
| /// ); | ||
| /// assert_eq!( | ||
| /// swe.get_script_extensions_val('\u{200D}') // ZERO WIDTH JOINER | ||
| /// .iter() | ||
| /// .collect::<Vec<_>>(), | ||
| /// [Script::Inherited] | ||
| /// ); | ||
| /// assert_eq!( | ||
| /// swe.get_script_extensions_val('௫') // U+0BEB TAMIL DIGIT FIVE | ||
| /// .iter() | ||
| /// .collect::<Vec<_>>(), | ||
| /// [Script::Tamil, Script::Grantha] | ||
| /// ); | ||
| /// ``` | ||
| pub fn get_script_extensions_val(self, ch: char) -> ScriptExtensionsSet<'a> { | ||
| self.get_script_extensions_val32(ch as u32) | ||
| } | ||
| /// See [`Self::get_script_extensions_val`]. | ||
| pub fn get_script_extensions_val32(self, code_point: u32) -> ScriptExtensionsSet<'a> { | ||
| let sc_with_ext_ule = self.data.trie.get32_ule(code_point); | ||
| ScriptExtensionsSet { | ||
| values: match sc_with_ext_ule { | ||
| Some(ule_ref) => self.get_scx_val_using_trie_val(ule_ref), | ||
| None => ZeroSlice::from_ule_slice(&[]), | ||
| }, | ||
| } | ||
| } | ||
| /// Returns whether `script` is contained in the Script_Extensions | ||
| /// property value if the code_point has Script_Extensions, otherwise | ||
| /// if the code point does not have Script_Extensions then returns | ||
| /// whether the Script property value matches. | ||
| /// | ||
| /// Some characters are commonly used in multiple scripts. For more information, | ||
| /// see UAX #24: <http://www.unicode.org/reports/tr24/>. | ||
| /// | ||
| /// # Examples | ||
| /// | ||
| /// ``` | ||
| /// use icu::properties::script::ScriptWithExtensions; | ||
| /// use icu::properties::props::Script; | ||
| /// | ||
| /// let swe = ScriptWithExtensions::new(); | ||
| /// | ||
| /// // U+0650 ARABIC KASRA | ||
| /// assert!(!swe.has_script('\u{0650}', Script::Inherited)); // main Script value | ||
| /// assert!(swe.has_script('\u{0650}', Script::Arabic)); | ||
| /// assert!(swe.has_script('\u{0650}', Script::Syriac)); | ||
| /// assert!(!swe.has_script('\u{0650}', Script::Thaana)); | ||
| /// | ||
| /// // U+0660 ARABIC-INDIC DIGIT ZERO | ||
| /// assert!(!swe.has_script('٠', Script::Common)); // main Script value | ||
| /// assert!(swe.has_script('٠', Script::Arabic)); | ||
| /// assert!(!swe.has_script('٠', Script::Syriac)); | ||
| /// assert!(swe.has_script('٠', Script::Thaana)); | ||
| /// | ||
| /// // U+FDF2 ARABIC LIGATURE ALLAH ISOLATED FORM | ||
| /// assert!(!swe.has_script('ﷲ', Script::Common)); | ||
| /// assert!(swe.has_script('ﷲ', Script::Arabic)); // main Script value | ||
| /// assert!(!swe.has_script('ﷲ', Script::Syriac)); | ||
| /// assert!(swe.has_script('ﷲ', Script::Thaana)); | ||
| /// ``` | ||
| pub fn has_script(self, ch: char, script: Script) -> bool { | ||
| self.has_script32(ch as u32, script) | ||
| } | ||
| /// See [`Self::has_script`]. | ||
| pub fn has_script32(self, code_point: u32, script: Script) -> bool { | ||
| let sc_with_ext_ule = if let Some(scwe_ule) = self.data.trie.get32_ule(code_point) { | ||
| scwe_ule | ||
| } else { | ||
| return false; | ||
| }; | ||
| let sc_with_ext = <ScriptWithExt as AsULE>::from_unaligned(*sc_with_ext_ule); | ||
| if !sc_with_ext.has_extensions() { | ||
| let script_val = sc_with_ext.0; | ||
| script == Script(script_val) | ||
| } else { | ||
| let scx_val = self.get_scx_val_using_trie_val(sc_with_ext_ule); | ||
| let script_find = scx_val.iter().find(|&sc| sc == script); | ||
| script_find.is_some() | ||
| } | ||
| } | ||
| /// Returns all of the matching `CodePointMapRange`s for the given [`Script`] | ||
| /// in which `has_script` will return true for all of the contained code points. | ||
| /// | ||
| /// # Examples | ||
| /// | ||
| /// ``` | ||
| /// use icu::properties::props::Script; | ||
| /// use icu::properties::script::ScriptWithExtensions; | ||
| /// | ||
| /// let swe = ScriptWithExtensions::new(); | ||
| /// | ||
| /// let syriac_script_extensions_ranges = | ||
| /// swe.get_script_extensions_ranges(Script::Syriac); | ||
| /// | ||
| /// let exp_ranges = [ | ||
| /// 0x0303..=0x0304, // COMBINING TILDE..COMBINING MACRON | ||
| /// 0x0307..=0x0308, // COMBINING DOT ABOVE..COMBINING DIAERESIS | ||
| /// 0x030A..=0x030A, // COMBINING RING ABOVE | ||
| /// 0x0323..=0x0325, // COMBINING DOT BELOW..COMBINING RING BELOW | ||
| /// 0x032D..=0x032E, // COMBINING CIRCUMFLEX ACCENT BELOW..COMBINING BREVE BELOW | ||
| /// 0x0330..=0x0331, // COMBINING TILDE BELOW..COMBINING MACRON BELOW | ||
| /// 0x060C..=0x060C, // ARABIC COMMA | ||
| /// 0x061B..=0x061C, // ARABIC SEMICOLON, ARABIC LETTER MARK | ||
| /// 0x061F..=0x061F, // ARABIC QUESTION MARK | ||
| /// 0x0640..=0x0640, // ARABIC TATWEEL | ||
| /// 0x064B..=0x0655, // ARABIC FATHATAN..ARABIC HAMZA BELOW | ||
| /// 0x0670..=0x0670, // ARABIC LETTER SUPERSCRIPT ALEF | ||
| /// 0x0700..=0x070D, // Syriac block begins at U+0700 | ||
| /// 0x070F..=0x074A, // Syriac block | ||
| /// 0x074D..=0x074F, // Syriac block ends at U+074F | ||
| /// 0x0860..=0x086A, // Syriac Supplement block is U+0860..=U+086F | ||
| /// 0x1DF8..=0x1DF8, // COMBINING DOT ABOVE LEFT | ||
| /// 0x1DFA..=0x1DFA, // COMBINING DOT BELOW LEFT | ||
| /// ]; | ||
| /// | ||
| /// assert_eq!( | ||
| /// syriac_script_extensions_ranges.collect::<Vec<_>>(), | ||
| /// exp_ranges | ||
| /// ); | ||
| /// ``` | ||
| pub fn get_script_extensions_ranges( | ||
| self, | ||
| script: Script, | ||
| ) -> impl Iterator<Item = RangeInclusive<u32>> + 'a { | ||
| self.data | ||
| .trie | ||
| .iter_ranges_mapped(move |value| { | ||
| let sc_with_ext = ScriptWithExt(value.0); | ||
| if sc_with_ext.has_extensions() { | ||
| self.get_scx_val_using_trie_val(&sc_with_ext.to_unaligned()) | ||
| .iter() | ||
| .any(|sc| sc == script) | ||
| } else { | ||
| script == sc_with_ext.into() | ||
| } | ||
| }) | ||
| .filter(|v| v.value) | ||
| .map(|v| v.range) | ||
| } | ||
| /// Returns a [`CodePointInversionList`] for the given [`Script`] which represents all | ||
| /// code points for which `has_script` will return true. | ||
| /// | ||
| /// ✨ *Enabled with the `alloc` Cargo feature.* | ||
| /// | ||
| /// # Examples | ||
| /// | ||
| /// ``` | ||
| /// use icu::properties::script::ScriptWithExtensions; | ||
| /// use icu::properties::props::Script; | ||
| /// | ||
| /// let swe = ScriptWithExtensions::new(); | ||
| /// | ||
| /// let syriac = swe.get_script_extensions_set(Script::Syriac); | ||
| /// | ||
| /// assert!(!syriac.contains('؞')); // ARABIC TRIPLE DOT PUNCTUATION MARK | ||
| /// assert!(syriac.contains('؟')); // ARABIC QUESTION MARK | ||
| /// assert!(!syriac.contains('ؠ')); // ARABIC LETTER KASHMIRI YEH | ||
| /// | ||
| /// assert!(syriac.contains('܀')); // SYRIAC END OF PARAGRAPH | ||
| /// assert!(syriac.contains('\u{074A}')); // SYRIAC BARREKH | ||
| /// assert!(!syriac.contains('\u{074B}')); // unassigned | ||
| /// assert!(syriac.contains('ݏ')); // SYRIAC LETTER SOGDIAN FE | ||
| /// assert!(!syriac.contains('ݐ')); // ARABIC LETTER BEH WITH THREE DOTS HORIZONTALLY BELOW | ||
| /// | ||
| /// assert!(syriac.contains('\u{1DF8}')); // COMBINING DOT ABOVE LEFT | ||
| /// assert!(!syriac.contains('\u{1DF9}')); // COMBINING WIDE INVERTED BRIDGE BELOW | ||
| /// assert!(syriac.contains('\u{1DFA}')); // COMBINING DOT BELOW LEFT | ||
| /// assert!(!syriac.contains('\u{1DFB}')); // COMBINING DELETION MARK | ||
| /// ``` | ||
| #[cfg(feature = "alloc")] | ||
| pub fn get_script_extensions_set(self, script: Script) -> CodePointInversionList<'a> { | ||
| CodePointInversionList::from_iter(self.get_script_extensions_ranges(script)) | ||
| } | ||
| } | ||
| #[cfg(feature = "compiled_data")] | ||
| impl Default for ScriptWithExtensionsBorrowed<'static> { | ||
| fn default() -> Self { | ||
| Self::new() | ||
| } | ||
| } | ||
| impl ScriptWithExtensionsBorrowed<'static> { | ||
| /// Creates a new instance of `ScriptWithExtensionsBorrowed` using compiled data. | ||
| /// | ||
| /// ✨ *Enabled with the `compiled_data` Cargo feature.* | ||
| /// | ||
| /// [📚 Help choosing a constructor](icu_provider::constructors) | ||
| #[cfg(feature = "compiled_data")] | ||
| pub fn new() -> Self { | ||
| Self { | ||
| data: crate::provider::Baked::SINGLETON_PROPERTY_SCRIPT_WITH_EXTENSIONS_V1, | ||
| } | ||
| } | ||
| /// Cheaply converts a [`ScriptWithExtensionsBorrowed<'static>`] into a [`ScriptWithExtensions`]. | ||
| /// | ||
| /// Note: Due to branching and indirection, using [`ScriptWithExtensions`] might inhibit some | ||
| /// compile-time optimizations that are possible with [`ScriptWithExtensionsBorrowed`]. | ||
| pub const fn static_to_owned(self) -> ScriptWithExtensions { | ||
| ScriptWithExtensions { | ||
| data: DataPayload::from_static_ref(self.data), | ||
| } | ||
| } | ||
| } | ||
| #[cfg(test)] | ||
| mod tests { | ||
| use super::*; | ||
| #[test] | ||
| /// Regression test for https://github.com/unicode-org/icu4x/issues/6041 | ||
| fn test_scx_regression_6041() { | ||
| let scripts = ScriptWithExtensions::new() | ||
| .get_script_extensions_val('\u{2bc}') | ||
| .iter() | ||
| .collect::<Vec<_>>(); | ||
| assert_eq!( | ||
| scripts, | ||
| [ | ||
| Script::Bengali, | ||
| Script::Cyrillic, | ||
| Script::Devanagari, | ||
| Script::Latin, | ||
| Script::Thai, | ||
| Script::Lisu, | ||
| Script::Toto | ||
| ] | ||
| ); | ||
| } | ||
| } |
| // This file is part of ICU4X. For terms of use, please see the file | ||
| // called LICENSE at the top level of the ICU4X source tree | ||
| // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). | ||
| use crate::bidi::BidiMirroringGlyph; | ||
| use crate::props::{ | ||
| BidiClass, CanonicalCombiningClass, EastAsianWidth, GeneralCategory, GeneralCategoryGroup, | ||
| GraphemeClusterBreak, HangulSyllableType, IndicConjunctBreak, IndicSyllabicCategory, | ||
| JoiningType, LineBreak, Script, SentenceBreak, VerticalOrientation, WordBreak, | ||
| }; | ||
| use crate::script::ScriptWithExt; | ||
| use core::convert::TryInto; | ||
| use core::num::TryFromIntError; | ||
| use zerovec::ule::{AsULE, RawBytesULE}; | ||
| use icu_collections::codepointtrie::TrieValue; | ||
| use core::convert::TryFrom; | ||
| impl TrieValue for CanonicalCombiningClass { | ||
| type TryFromU32Error = TryFromIntError; | ||
| fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { | ||
| u8::try_from(i).map(Self) | ||
| } | ||
| fn to_u32(self) -> u32 { | ||
| u32::from(self.0) | ||
| } | ||
| } | ||
| impl TrieValue for BidiClass { | ||
| type TryFromU32Error = TryFromIntError; | ||
| fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { | ||
| u8::try_from(i).map(Self) | ||
| } | ||
| fn to_u32(self) -> u32 { | ||
| u32::from(self.0) | ||
| } | ||
| } | ||
| impl TrieValue for GeneralCategory { | ||
| type TryFromU32Error = &'static str; | ||
| fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { | ||
| // If the u32 is out of range, fall back to u8::MAX, which is out of range of the GeneralCategory enum. | ||
| GeneralCategory::new_from_u8(i.try_into().unwrap_or(u8::MAX)) | ||
| .ok_or("Cannot parse GeneralCategory from integer") | ||
| } | ||
| fn to_u32(self) -> u32 { | ||
| u32::from(self as u8) | ||
| } | ||
| } | ||
| impl TrieValue for Script { | ||
| type TryFromU32Error = TryFromIntError; | ||
| fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { | ||
| u16::try_from(i).map(Script) | ||
| } | ||
| fn to_u32(self) -> u32 { | ||
| u32::from(self.0) | ||
| } | ||
| } | ||
| impl TrieValue for HangulSyllableType { | ||
| type TryFromU32Error = TryFromIntError; | ||
| fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { | ||
| u8::try_from(i).map(Self) | ||
| } | ||
| fn to_u32(self) -> u32 { | ||
| u32::from(self.0) | ||
| } | ||
| } | ||
| impl TrieValue for ScriptWithExt { | ||
| type TryFromU32Error = TryFromIntError; | ||
| fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { | ||
| u16::try_from(i).map(Self) | ||
| } | ||
| fn to_u32(self) -> u32 { | ||
| u32::from(self.0) | ||
| } | ||
| } | ||
| impl TrieValue for EastAsianWidth { | ||
| type TryFromU32Error = TryFromIntError; | ||
| fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { | ||
| u8::try_from(i).map(Self) | ||
| } | ||
| fn to_u32(self) -> u32 { | ||
| u32::from(self.0) | ||
| } | ||
| } | ||
| impl TrieValue for LineBreak { | ||
| type TryFromU32Error = TryFromIntError; | ||
| fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { | ||
| u8::try_from(i).map(Self) | ||
| } | ||
| fn to_u32(self) -> u32 { | ||
| u32::from(self.0) | ||
| } | ||
| } | ||
| impl TrieValue for GraphemeClusterBreak { | ||
| type TryFromU32Error = TryFromIntError; | ||
| fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { | ||
| u8::try_from(i).map(Self) | ||
| } | ||
| fn to_u32(self) -> u32 { | ||
| u32::from(self.0) | ||
| } | ||
| } | ||
| impl TrieValue for WordBreak { | ||
| type TryFromU32Error = TryFromIntError; | ||
| fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { | ||
| u8::try_from(i).map(Self) | ||
| } | ||
| fn to_u32(self) -> u32 { | ||
| u32::from(self.0) | ||
| } | ||
| } | ||
| impl TrieValue for SentenceBreak { | ||
| type TryFromU32Error = TryFromIntError; | ||
| fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { | ||
| u8::try_from(i).map(Self) | ||
| } | ||
| fn to_u32(self) -> u32 { | ||
| u32::from(self.0) | ||
| } | ||
| } | ||
| impl TrieValue for IndicConjunctBreak { | ||
| type TryFromU32Error = TryFromIntError; | ||
| fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { | ||
| u8::try_from(i).map(Self) | ||
| } | ||
| fn to_u32(self) -> u32 { | ||
| u32::from(self.0) | ||
| } | ||
| } | ||
| impl TrieValue for IndicSyllabicCategory { | ||
| type TryFromU32Error = TryFromIntError; | ||
| fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { | ||
| u8::try_from(i).map(Self) | ||
| } | ||
| fn to_u32(self) -> u32 { | ||
| u32::from(self.0) | ||
| } | ||
| } | ||
| impl TrieValue for VerticalOrientation { | ||
| type TryFromU32Error = TryFromIntError; | ||
| fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { | ||
| u8::try_from(i).map(Self) | ||
| } | ||
| fn to_u32(self) -> u32 { | ||
| u32::from(self.0) | ||
| } | ||
| } | ||
| // GCG is not used inside tries, but it is used in the name lookup type, and we want | ||
| // to squeeze it into a u16 for storage. Its named mask values are specced so we can | ||
| // do this in code. | ||
| // | ||
| // This is done by: | ||
| // - Single-value masks are translated to their corresponding GeneralCategory values | ||
| // - we know all of the multi-value masks and we give them special values | ||
| // - Anything else goes to 0xFF00, though this code path shouldn't be hit unless working with malformed icuexportdata | ||
| // | ||
| // In the reverse direction, unknown values go to the empty mask, but this codepath should not be hit except | ||
| // with malformed ICU4X generated data. | ||
| impl AsULE for GeneralCategoryGroup { | ||
| type ULE = RawBytesULE<2>; | ||
| fn to_unaligned(self) -> Self::ULE { | ||
| let value = gcg_to_packed_u16(self); | ||
| value.to_unaligned() | ||
| } | ||
| fn from_unaligned(ule: Self::ULE) -> Self { | ||
| let value = ule.as_unsigned_int(); | ||
| packed_u16_to_gcg(value) | ||
| } | ||
| } | ||
| fn packed_u16_to_gcg(value: u16) -> GeneralCategoryGroup { | ||
| match value { | ||
| 0xFFFF => GeneralCategoryGroup::CasedLetter, | ||
| 0xFFFE => GeneralCategoryGroup::Letter, | ||
| 0xFFFD => GeneralCategoryGroup::Mark, | ||
| 0xFFFC => GeneralCategoryGroup::Number, | ||
| 0xFFFB => GeneralCategoryGroup::Separator, | ||
| 0xFFFA => GeneralCategoryGroup::Other, | ||
| 0xFFF9 => GeneralCategoryGroup::Punctuation, | ||
| 0xFFF8 => GeneralCategoryGroup::Symbol, | ||
| v if v < 32 => GeneralCategory::new_from_u8(v as u8) | ||
| .map(|gc| gc.into()) | ||
| .unwrap_or(GeneralCategoryGroup(0)), | ||
| // unknown values produce an empty mask | ||
| _ => GeneralCategoryGroup(0), | ||
| } | ||
| } | ||
| fn gcg_to_packed_u16(gcg: GeneralCategoryGroup) -> u16 { | ||
| // if it's a single property, translate to that property | ||
| if gcg.0.is_power_of_two() { | ||
| // inverse operation of a bitshift | ||
| gcg.0.trailing_zeros() as u16 | ||
| } else { | ||
| match gcg { | ||
| GeneralCategoryGroup::CasedLetter => 0xFFFF, | ||
| GeneralCategoryGroup::Letter => 0xFFFE, | ||
| GeneralCategoryGroup::Mark => 0xFFFD, | ||
| GeneralCategoryGroup::Number => 0xFFFC, | ||
| GeneralCategoryGroup::Separator => 0xFFFB, | ||
| GeneralCategoryGroup::Other => 0xFFFA, | ||
| GeneralCategoryGroup::Punctuation => 0xFFF9, | ||
| GeneralCategoryGroup::Symbol => 0xFFF8, | ||
| _ => 0xFF00, // random sentinel value | ||
| } | ||
| } | ||
| } | ||
| impl TrieValue for GeneralCategoryGroup { | ||
| type TryFromU32Error = TryFromIntError; | ||
| fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { | ||
| // Even though we're dealing with u32s here, TrieValue is about converting | ||
| // trie storage types to the actual type. This type will always be a packed u16 | ||
| // in our case since the names map upcasts from u16 | ||
| u16::try_from(i).map(packed_u16_to_gcg) | ||
| } | ||
| fn to_u32(self) -> u32 { | ||
| u32::from(gcg_to_packed_u16(self)) | ||
| } | ||
| } | ||
| impl TrieValue for BidiMirroringGlyph { | ||
| type TryFromU32Error = u32; | ||
| fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { | ||
| let code_point = i & 0x1FFFFF; | ||
| let mirroring_glyph = if code_point == 0 { | ||
| None | ||
| } else { | ||
| Some(char::try_from_u32(code_point).map_err(|_| i)?) | ||
| }; | ||
| let mirrored = ((i >> 21) & 0x1) == 1; | ||
| let paired_bracket_type = { | ||
| let value = ((i >> 22) & 0x3) as u8; | ||
| match value { | ||
| 0 => crate::bidi::BidiPairedBracketType::None, | ||
| 1 => crate::bidi::BidiPairedBracketType::Open, | ||
| 2 => crate::bidi::BidiPairedBracketType::Close, | ||
| _ => return Err(i), | ||
| } | ||
| }; | ||
| Ok(Self { | ||
| mirrored, | ||
| mirroring_glyph, | ||
| paired_bracket_type, | ||
| }) | ||
| } | ||
| fn to_u32(self) -> u32 { | ||
| self.mirroring_glyph.unwrap_or_default() as u32 | ||
| | ((self.mirrored as u32) << 21) | ||
| | (match self.paired_bracket_type { | ||
| crate::bidi::BidiPairedBracketType::None => 0, | ||
| crate::bidi::BidiPairedBracketType::Open => 1, | ||
| crate::bidi::BidiPairedBracketType::Close => 2, | ||
| } << 22) | ||
| } | ||
| } | ||
| impl TrieValue for JoiningType { | ||
| type TryFromU32Error = TryFromIntError; | ||
| fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> { | ||
| u8::try_from(i).map(Self) | ||
| } | ||
| fn to_u32(self) -> u32 { | ||
| u32::from(self.0) | ||
| } | ||
| } |