unicode-ident
Advanced tools
| { | ||
| "git": { | ||
| "sha1": "dc018bf1ca82d295f72a84e7ed432e5d2bcbe2fe" | ||
| "sha1": "12f0a68b138f1914e9abff76bb1d438b4a953f49" | ||
| }, | ||
| "path_in_vcs": "" | ||
| } |
@@ -30,4 +30,3 @@ name: CI | ||
| - run: cargo install ucd-generate | ||
| - # FIXME: https://www.unicode.org/Public/latest/ucd/UCD.zip still points to 16.0.0, not 17.0.0 | ||
| run: curl https://www.unicode.org/Public/17.0.0/ucd/UCD.zip --location --remote-name --silent --show-error --fail --retry 2 | ||
| - run: curl https://www.unicode.org/Public/latest/ucd/UCD.zip --location --remote-name --silent --show-error --fail --retry 2 | ||
| - run: unzip UCD.zip -d UCD | ||
@@ -34,0 +33,0 @@ - run: ucd-generate property-bool UCD --include XID_Start,XID_Continue > tests/tables/tables.rs |
+62
-59
@@ -22,5 +22,5 @@ # This file is automatically @generated by Cargo. | ||
| name = "anstyle" | ||
| version = "1.0.11" | ||
| version = "1.0.13" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "862ed96ca487e809f1c8e5a8447f6ee2cf102f846893800b20cebdf541fc6bbd" | ||
| checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78" | ||
@@ -35,5 +35,5 @@ [[package]] | ||
| name = "bytemuck" | ||
| version = "1.23.2" | ||
| version = "1.24.0" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "3995eaeebcdf32f91f980d360f78732ddc061097ab4e39991ae7a6ace9194677" | ||
| checksum = "1fbdf580320f38b612e485521afda1ee26d10cc9884efaaa750d383e13e3c5f4" | ||
@@ -54,5 +54,5 @@ [[package]] | ||
| name = "cfg-if" | ||
| version = "1.0.3" | ||
| version = "1.0.4" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "2fd1289c04a9ea8cb22300a459a72a385d7c73d3259e2ed7dcb2af674838cfa9" | ||
| checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" | ||
@@ -88,5 +88,5 @@ [[package]] | ||
| name = "clap" | ||
| version = "4.5.47" | ||
| version = "4.5.50" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "7eac00902d9d136acd712710d71823fb8ac8004ca445a89e73a41d45aa712931" | ||
| checksum = "0c2cfd7bf8a6017ddaa4e32ffe7403d547790db06bd171c1c53926faab501623" | ||
| dependencies = [ | ||
@@ -98,5 +98,5 @@ "clap_builder", | ||
| name = "clap_builder" | ||
| version = "4.5.47" | ||
| version = "4.5.50" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "2ad9bbf750e73b5884fb8a211a9424a1906c1e156724260fdae972f31d70e1d6" | ||
| checksum = "0a4c05b9e80c5ccd3a7ef080ad7b6ba7d6fc00a985b8b157197075677c82c7a0" | ||
| dependencies = [ | ||
@@ -109,5 +109,5 @@ "anstyle", | ||
| name = "clap_lex" | ||
| version = "0.7.5" | ||
| version = "0.7.6" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "b94f61472cee1439c0b966b47e3aca9ae07e45d070759512cd390ea2bebc6675" | ||
| checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d" | ||
@@ -165,5 +165,5 @@ [[package]] | ||
| name = "getrandom" | ||
| version = "0.3.3" | ||
| version = "0.3.4" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4" | ||
| checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" | ||
| dependencies = [ | ||
@@ -173,3 +173,3 @@ "cfg-if", | ||
| "r-efi", | ||
| "wasi", | ||
| "wasip2", | ||
| ] | ||
@@ -179,8 +179,9 @@ | ||
| name = "half" | ||
| version = "2.6.0" | ||
| version = "2.7.1" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "459196ed295495a68f7d7fe1d84f6c4b7ff0e21fe3017b2f283c6fac3ad803c9" | ||
| checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b" | ||
| dependencies = [ | ||
| "cfg-if", | ||
| "crunchy", | ||
| "zerocopy", | ||
| ] | ||
@@ -205,11 +206,11 @@ | ||
| name = "libc" | ||
| version = "0.2.175" | ||
| version = "0.2.177" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "6a82ae493e598baaea5209805c49bbf2ea7de956d50d7da0da1164f9c6d28543" | ||
| checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976" | ||
| [[package]] | ||
| name = "memchr" | ||
| version = "2.7.5" | ||
| version = "2.7.6" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0" | ||
| checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" | ||
@@ -246,3 +247,3 @@ [[package]] | ||
| dependencies = [ | ||
| "unicode-ident 1.0.18", | ||
| "unicode-ident 1.0.19", | ||
| ] | ||
@@ -252,5 +253,5 @@ | ||
| name = "quote" | ||
| version = "1.0.40" | ||
| version = "1.0.41" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" | ||
| checksum = "ce25767e7b499d1b604768e7cde645d14cc8584231ea6b295e9c9eb22c02e1d1" | ||
| dependencies = [ | ||
@@ -297,5 +298,5 @@ "proc-macro2", | ||
| name = "regex" | ||
| version = "1.11.2" | ||
| version = "1.12.2" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "23d7fd106d8c02486a8d64e778353d1cffe08ce79ac2e82f540c86d0facf6912" | ||
| checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4" | ||
| dependencies = [ | ||
@@ -310,5 +311,5 @@ "aho-corasick", | ||
| name = "regex-automata" | ||
| version = "0.4.10" | ||
| version = "0.4.13" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "6b9458fa0bfeeac22b5ca447c63aaf45f28439a709ccd244698632f9aa6394d6" | ||
| checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c" | ||
| dependencies = [ | ||
@@ -322,5 +323,5 @@ "aho-corasick", | ||
| name = "regex-syntax" | ||
| version = "0.8.6" | ||
| version = "0.8.8" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "caf4aa5b0f434c91fe5c7f1ecb6a5ece2130b02ad2a590589dda5146df959001" | ||
| checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" | ||
@@ -354,6 +355,7 @@ [[package]] | ||
| name = "serde" | ||
| version = "1.0.219" | ||
| version = "1.0.228" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" | ||
| checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" | ||
| dependencies = [ | ||
| "serde_core", | ||
| "serde_derive", | ||
@@ -363,6 +365,15 @@ ] | ||
| [[package]] | ||
| name = "serde_core" | ||
| version = "1.0.228" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" | ||
| dependencies = [ | ||
| "serde_derive", | ||
| ] | ||
| [[package]] | ||
| name = "serde_derive" | ||
| version = "1.0.219" | ||
| version = "1.0.228" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" | ||
| checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" | ||
| dependencies = [ | ||
@@ -376,5 +387,5 @@ "proc-macro2", | ||
| name = "serde_json" | ||
| version = "1.0.143" | ||
| version = "1.0.145" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "d401abef1d108fbd9cbaebc3e46611f4b1021f714a0597a71f41ee463f5f4a5a" | ||
| checksum = "402a6f66d8c709116cf22f558eab210f5a50187f702eb4d7e5ef38d9a7f1c79c" | ||
| dependencies = [ | ||
@@ -385,2 +396,3 @@ "itoa", | ||
| "serde", | ||
| "serde_core", | ||
| ] | ||
@@ -390,9 +402,9 @@ | ||
| name = "syn" | ||
| version = "2.0.106" | ||
| version = "2.0.107" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "ede7c438028d4436d71104916910f5bb611972c5cfd7f89b8300a8186e6fada6" | ||
| checksum = "2a26dbd934e5451d21ef060c018dae56fc073894c5a7896f882928a76e6d081b" | ||
| dependencies = [ | ||
| "proc-macro2", | ||
| "quote", | ||
| "unicode-ident 1.0.18", | ||
| "unicode-ident 1.0.19", | ||
| ] | ||
@@ -418,9 +430,9 @@ | ||
| name = "unicode-ident" | ||
| version = "1.0.18" | ||
| version = "1.0.19" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" | ||
| checksum = "f63a545481291138910575129486daeaf8ac54aee4387fe7906919f7830c7d9d" | ||
| [[package]] | ||
| name = "unicode-ident" | ||
| version = "1.0.19" | ||
| version = "1.0.20" | ||
| dependencies = [ | ||
@@ -452,15 +464,6 @@ "criterion", | ||
| [[package]] | ||
| name = "wasi" | ||
| version = "0.14.5+wasi-0.2.4" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "a4494f6290a82f5fe584817a676a34b9d6763e8d9d18204009fb31dceca98fd4" | ||
| dependencies = [ | ||
| "wasip2", | ||
| ] | ||
| [[package]] | ||
| name = "wasip2" | ||
| version = "1.0.0+wasi-0.2.4" | ||
| version = "1.0.1+wasi-0.2.4" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "03fa2761397e5bd52002cd7e73110c71af2109aca4e521a9f40473fe685b0a24" | ||
| checksum = "0562428422c63773dad2c345a1882263bbf4d65cf3f42e90921f787ef5ad58e7" | ||
| dependencies = [ | ||
@@ -481,11 +484,11 @@ "wit-bindgen", | ||
| name = "windows-link" | ||
| version = "0.2.0" | ||
| version = "0.2.1" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "45e46c0661abb7180e7b9c281db115305d49ca1709ab8242adf09666d2173c65" | ||
| checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" | ||
| [[package]] | ||
| name = "windows-sys" | ||
| version = "0.61.0" | ||
| version = "0.61.2" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "e201184e40b2ede64bc2ea34968b28e33622acdbbf37104f0e4a33f7abe657aa" | ||
| checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" | ||
| dependencies = [ | ||
@@ -497,5 +500,5 @@ "windows-link", | ||
| name = "wit-bindgen" | ||
| version = "0.45.1" | ||
| version = "0.46.0" | ||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||
| checksum = "5c573471f125075647d03df72e026074b7203790d41351cd6edc96f46bcccd36" | ||
| checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" | ||
@@ -502,0 +505,0 @@ [[package]] |
+2
-1
@@ -16,3 +16,3 @@ # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO | ||
| name = "unicode-ident" | ||
| version = "1.0.19" | ||
| version = "1.0.20" | ||
| authors = ["David Tolnay <dtolnay@gmail.com>"] | ||
@@ -44,2 +44,3 @@ build = false | ||
| "--generate-link-to-definition", | ||
| "--generate-macro-expansion", | ||
| "--extern-html-root-url=core=https://doc.rust-lang.org", | ||
@@ -46,0 +47,0 @@ "--extern-html-root-url=alloc=https://doc.rust-lang.org", |
+16
-24
@@ -44,7 +44,7 @@ Unicode ident | ||
| |---|---|---|---|---|---| | ||
| | **`unicode-ident`** | 10.5 K | 1.03 ns | 1.02 ns | 1.11 ns | 1.66 ns | | ||
| | **`unicode-xid`** | 12.0 K | 2.57 ns | 2.74 ns | 3.20 ns | 9.35 ns | | ||
| | **`ucd-trie`** | 10.4 K | 1.27 ns | 1.27 ns | 1.41 ns | 2.53 ns | | ||
| | **`fst`** | 144 K | 49.3 ns | 49.1 ns | 47.1 ns | 27.9 ns | | ||
| | **`roaring`** | 66.1 K | 4.10 ns | 4.05 ns | 4.02 ns | 5.12 ns | | ||
| | **`unicode-ident`** | 10.3 K | 0.41 ns | 0.44 ns | 0.44 ns | 0.93 ns | | ||
| | **`unicode-xid`** | 12.0 K | 2.43 ns | 2.50 ns | 2.85 ns | 8.65 ns | | ||
| | **`ucd-trie`** | 10.4 K | 1.28 ns | 1.25 ns | 1.20 ns | 1.97 ns | | ||
| | **`fst`** | 144 K | 50.9 ns | 51.0 ns | 48.5 ns | 26.7 ns | | ||
| | **`roaring`** | 66.1 K | 4.28 ns | 4.22 ns | 4.25 ns | 4.61 ns | | ||
@@ -239,23 +239,15 @@ Source code for the benchmark is provided in the *bench* directory of this repo | ||
| mov eax, edi | ||
| mov ecx, offset unicode_ident::ZERO | ||
| shr eax, 9 | ||
| lea rcx, [rip + unicode_ident::tables::TRIE_START] | ||
| add rcx, rax | ||
| xor eax, eax | ||
| cmp edi, 201728 | ||
| cmovb rax, rcx | ||
| test rax, rax | ||
| lea rcx, [rip + .L__unnamed_1] | ||
| cmovne rcx, rax | ||
| cmp edi, 210432 | ||
| lea rax, [rax + unicode_ident::tables::TRIE_START] | ||
| cmovb rcx, rax | ||
| movzx eax, byte ptr [rcx] | ||
| shl rax, 5 | ||
| mov ecx, edi | ||
| shr ecx, 3 | ||
| and ecx, 63 | ||
| add rcx, rax | ||
| lea rax, [rip + unicode_ident::tables::LEAF] | ||
| mov al, byte ptr [rax + rcx] | ||
| and dil, 7 | ||
| mov ecx, edi | ||
| shr al, cl | ||
| and al, 1 | ||
| mov ecx, 1539 | ||
| bextr ecx, edi, ecx | ||
| and edi, 7 | ||
| shl eax, 5 | ||
| movzx eax, byte ptr [rax + rcx + unicode_ident::tables::LEAF] | ||
| bt eax, edi | ||
| setb al | ||
| ret | ||
@@ -262,0 +254,0 @@ ``` |
+20
-11
@@ -46,7 +46,7 @@ //! [![github]](https://github.com/dtolnay/unicode-ident) [![crates-io]](https://crates.io/crates/unicode-ident) [![docs-rs]](https://docs.rs/unicode-ident) | ||
| //! |---|---|---|---|---|---| | ||
| //! | **`unicode-ident`** | 10.5 K | 1.03 ns | 1.02 ns | 1.11 ns | 1.66 ns | | ||
| //! | **`unicode-xid`** | 12.0 K | 2.57 ns | 2.74 ns | 3.20 ns | 9.35 ns | | ||
| //! | **`ucd-trie`** | 10.4 K | 1.27 ns | 1.27 ns | 1.41 ns | 2.53 ns | | ||
| //! | **`fst`** | 144 K | 49.3 ns | 49.1 ns | 47.1 ns | 27.9 ns | | ||
| //! | **`roaring`** | 66.1 K | 4.10 ns | 4.05 ns | 4.02 ns | 5.12 ns | | ||
| //! | **`unicode-ident`** | 10.3 K | 0.41 ns | 0.44 ns | 0.44 ns | 0.93 ns | | ||
| //! | **`unicode-xid`** | 12.0 K | 2.43 ns | 2.50 ns | 2.85 ns | 8.65 ns | | ||
| //! | **`ucd-trie`** | 10.4 K | 1.28 ns | 1.25 ns | 1.20 ns | 1.97 ns | | ||
| //! | **`fst`** | 144 K | 50.9 ns | 51.0 ns | 48.5 ns | 26.7 ns | | ||
| //! | **`roaring`** | 66.1 K | 4.28 ns | 4.22 ns | 4.25 ns | 4.61 ns | | ||
| //! | ||
@@ -246,4 +246,8 @@ //! Source code for the benchmark is provided in the *bench* directory of this | ||
| #![no_std] | ||
| #![doc(html_root_url = "https://docs.rs/unicode-ident/1.0.19")] | ||
| #![allow(clippy::doc_markdown, clippy::must_use_candidate)] | ||
| #![doc(html_root_url = "https://docs.rs/unicode-ident/1.0.20")] | ||
| #![allow( | ||
| clippy::doc_markdown, | ||
| clippy::must_use_candidate, | ||
| clippy::unreadable_literal | ||
| )] | ||
@@ -255,8 +259,10 @@ #[rustfmt::skip] | ||
| static ZERO: u8 = 0; | ||
| /// Whether the character has the Unicode property XID\_Start. | ||
| pub fn is_xid_start(ch: char) -> bool { | ||
| if ch.is_ascii() { | ||
| return ASCII_START.0[ch as usize]; | ||
| return ASCII_START & (1 << ch as u128) != 0; | ||
| } | ||
| let chunk = *TRIE_START.0.get(ch as usize / 8 / CHUNK).unwrap_or(&0); | ||
| let chunk = *TRIE_START.0.get(ch as usize / 8 / CHUNK).unwrap_or(&ZERO); | ||
| let offset = chunk as usize * CHUNK / 2 + ch as usize / 8 % CHUNK; | ||
@@ -269,7 +275,10 @@ unsafe { LEAF.0.get_unchecked(offset) }.wrapping_shr(ch as u32 % 8) & 1 != 0 | ||
| if ch.is_ascii() { | ||
| return ASCII_CONTINUE.0[ch as usize]; | ||
| return ASCII_CONTINUE & (1 << ch as u128) != 0; | ||
| } | ||
| let chunk = *TRIE_CONTINUE.0.get(ch as usize / 8 / CHUNK).unwrap_or(&0); | ||
| let chunk = *TRIE_CONTINUE | ||
| .0 | ||
| .get(ch as usize / 8 / CHUNK) | ||
| .unwrap_or(&ZERO); | ||
| let offset = chunk as usize * CHUNK / 2 + ch as usize / 8 % CHUNK; | ||
| unsafe { LEAF.0.get_unchecked(offset) }.wrapping_shr(ch as u32 % 8) & 1 != 0 | ||
| } |
@@ -16,3 +16,3 @@ #![allow(clippy::let_underscore_untyped, clippy::unreadable_literal)] | ||
| + size_of_val(&tables::LEAF); | ||
| assert_eq!(10472, size); | ||
| assert_eq!(10248, size); | ||
| } | ||
@@ -19,0 +19,0 @@ |
Sorry, the diff of this file is not supported yet
Sorry, the diff of this file is too big to display