
Research
Malicious npm Package Brand-Squats TanStack to Exfiltrate Environment Variables
A brand-squatted TanStack npm package used postinstall scripts to steal .env files and exfiltrate developer secrets to an attacker-controlled endpoint.
tokenize-this
Advanced tools
It turns a string into tokens.
var tokenizer = new TokenizeThis();
var str = 'Tokenize this!';
var tokens = [];
tokenizer.tokenize(str, function(token) {
tokens.push(token);
});
equals(tokens, ['Tokenize', 'this', '!']);
By default, it can tokenize math-based strings.
var tokenizer = new TokenizeThis();
var str = '5 + 6 -(4/2) + gcd(10, 5)';
var tokens = [];
tokenizer.tokenize(str, function(token) {
tokens.push(token);
});
equals(tokens, [5, '+', 6, '-', '(', 4, '/', 2, ')', '+', 'gcd', '(', 10, ',', 5, ')']);
...Or SQL.
var tokenizer = new TokenizeThis();
var str = 'SELECT COUNT(id), 5+6 FROM `users` WHERE name = "shaun persad" AND hobby IS NULL';
var tokens = [];
tokenizer.tokenize(str, function(token, surroundedBy) {
if (surroundedBy) {
tokens.push(surroundedBy+token+surroundedBy);
} else {
tokens.push(token);
}
});
equals(tokens, [
'SELECT',
'COUNT', '(', 'id', ')',
',',
5, '+', 6,
'FROM', '`users`',
'WHERE',
'name', '=', '"shaun persad"',
'AND',
'hobby', 'IS', null
]);
npm install tokenize-this.
// or if in the browser: <script src="tokenize-this/tokenize-this.min.js"></script>
require it, create a new instance, then call tokenize.
// var TokenizeThis = require('tokenize-this');
// OR
// var TokenizeThis = require('tokenize-this/tokenize-this.min.js'); // for node.js < 4.0
// OR
// <script src="tokenize-this/tokenize-this.min.js"></script> <!-- if in browser -->
var tokenizer = new TokenizeThis();
var str = 'Hi!, I want to add 5+6';
var tokens = [];
tokenizer.tokenize(str, function(token) {
tokens.push(token);
});
equals(tokens, ['Hi', '!', ',', 'I', 'want', 'to', 'add', 5, '+', 6]);
This can be used to tokenize many forms of data, like JSON into key-value pairs.
var jsonConfig = {
shouldTokenize: ['{', '}', '[', ']'],
shouldMatch: ['"'],
shouldDelimitBy: [' ', "\n", "\r", "\t", ':', ','],
convertLiterals: true
};
var tokenizer = new TokenizeThis(jsonConfig);
var str = '[{name:"Shaun Persad", id: 5}, { gender : null}]';
var tokens = [];
tokenizer.tokenize(str, function(token) {
tokens.push(token);
});
equals(tokens, ['[', '{', 'name', 'Shaun Persad', 'id', 5, '}', '{', 'gender', null, '}', ']']);
Here it is tokenizing XML like a boss.
var xmlConfig = {
shouldTokenize: ['<?', '?>', '<!', '<', '</', '>', '/>', '='],
shouldMatch: ['"'],
shouldDelimitBy: [' ', "\n", "\r", "\t"],
convertLiterals: true
};
var tokenizer = new TokenizeThis(xmlConfig);
var str = `
<?xml-stylesheet href="catalog.xsl" type="text/xsl"?>
<!DOCTYPE catalog SYSTEM "catalog.dtd">
<catalog>
<product description="Cardigan Sweater" product_image="cardigan.jpg">
<size description="Large" />
<color_swatch image="red_cardigan.jpg">
Red
</color_swatch>
</product>
</catalog>
`;
var tokens = [];
tokenizer.tokenize(str, function(token) {
tokens.push(token);
});
equals(tokens,
[
'<?', 'xml-stylesheet', 'href', '=', 'catalog.xsl', 'type', '=', 'text/xsl', '?>',
'<!', 'DOCTYPE', 'catalog', 'SYSTEM', 'catalog.dtd', '>',
'<', 'catalog', '>',
'<', 'product', 'description', '=', 'Cardigan Sweater', 'product_image', '=', 'cardigan.jpg', '>',
'<', 'size', 'description', '=', 'Large', '/>',
'<', 'color_swatch', 'image', '=', 'red_cardigan.jpg', '>',
'Red',
'</', 'color_swatch', '>',
'</', 'product', '>',
'</', 'catalog', '>'
]
);
The above examples are the first steps in writing parsers for those formats. The next would be parsing the stream of tokens based on the format-specific rules, e.g. SQL.
sends each token to the forEachToken(token:String, surroundedBy:String, index:Integer) callback.
var tokenizer = new TokenizeThis();
var str = 'Tokenize "this"!';
var tokens = [];
var indices = [];
var forEachToken = function(token, surroundedBy, index) {
tokens.push(surroundedBy+token+surroundedBy);
indices.push(index);
};
tokenizer.tokenize(str, forEachToken);
equals(tokens, ['Tokenize', '"this"', '!']);
equals(indices, [8, 14, 15]);
it converts true, false, null, and numbers into their literal versions.
var tokenizer = new TokenizeThis();
var str = 'true false null TRUE FALSE NULL 1 2 3.4 5.6789';
var tokens = [];
tokenizer.tokenize(str, function(token, surroundedBy) {
tokens.push(token);
});
equals(tokens, [true, false, null, true, false, null, 1, 2, 3.4, 5.6789]);
The default config object used when no config is supplied.
var config = {
shouldTokenize: ['(', ')', ',', '*', '/', '%', '+', '-', '=', '!=', '!', '<', '>', '<=', '>=', '^'],
shouldMatch: ['"', "'", '`'],
shouldDelimitBy: [' ', "\n", "\r", "\t"],
convertLiterals: true,
escapeCharacter: "\\"
};
equals(TokenizeThis.defaultConfig, config);
You can change converting to literals with the convertLiterals config option.
var config = {
convertLiterals: false
};
var tokenizer = new TokenizeThis(config);
var str = 'true false null TRUE FALSE NULL 1 2 3.4 5.6789';
var tokens = [];
tokenizer.tokenize(str, function(token, surroundedBy) {
tokens.push(token);
});
equals(tokens, ['true', 'false', 'null', 'TRUE', 'FALSE', 'NULL', '1', '2', '3.4', '5.6789']);
Any strings surrounded by the quotes specified in the shouldMatch option are treated as whole tokens.
var config = {
shouldMatch: ['"', '`', '#']
};
var tokenizer = new TokenizeThis(config);
var str = '"hi there" `this is a test` #of quotes#';
var tokens = [];
var tokensQuoted = [];
tokenizer.tokenize(str, function(token, surroundedBy) {
tokens.push(token);
tokensQuoted.push(surroundedBy+token+surroundedBy);
});
equals(tokens, ['hi there', 'this is a test', 'of quotes']);
equals(tokensQuoted, ['"hi there"', '`this is a test`', '#of quotes#']);
Quotes can be escaped via a backslash.
var tokenizer = new TokenizeThis();
var str = 'These are "\\"quotes\\""';
var tokens = [];
tokenizer.tokenize(str, function(token, surroundedBy) {
tokens.push(token);
});
equals(tokens, ['These', 'are', '"quotes"']);
The escape character can be specified with the escapeCharacter option.
var config = {
escapeCharacter: '#'
};
var tokenizer = new TokenizeThis(config);
var str = 'These are "#"quotes#""';
var tokens = [];
tokenizer.tokenize(str, function(token, surroundedBy) {
tokens.push(token);
});
equals(tokens, ['These', 'are', '"quotes"']);
FAQs
Turns a string into tokens.
The npm package tokenize-this receives a total of 16,979 weekly downloads. As such, tokenize-this popularity was classified as popular.
We found that tokenize-this demonstrated a not healthy version release cadence and project activity because the last version was released a year ago. It has 1 open source maintainer collaborating on the project.
Did you know?

Socket for GitHub automatically highlights issues in each pull request and monitors the health of all your open source dependencies. Discover the contents of your packages and block harmful activity before you install or update your dependencies.

Research
A brand-squatted TanStack npm package used postinstall scripts to steal .env files and exfiltrate developer secrets to an attacker-controlled endpoint.

Research
Compromised SAP CAP npm packages download and execute unverified binaries, creating urgent supply chain risk for affected developers and CI/CD environments.

Company News
Socket has acquired Secure Annex to expand extension security across browsers, IDEs, and AI tools.