header-generator
Advanced tools
Comparing version 0.0.1-beta.1 to 0.0.1-beta.2
{ | ||
"name": "header-generator", | ||
"version": "0.0.1-beta.1", | ||
"version": "0.0.1-beta.2", | ||
"description": "NodeJs package for generating browser-like headers.", | ||
@@ -17,3 +17,3 @@ "author": { | ||
"dependencies": { | ||
"bayesian-network": "git+https://github.com/apify/generative-bayesian-network.git#main", | ||
"generative-bayesian-network": "0.1.0-beta.1", | ||
"ow": "^0.23.0" | ||
@@ -20,0 +20,0 @@ }, |
@@ -69,3 +69,3 @@ # Header generator | ||
* [`new HeaderGenerator(options)`](#new_HeaderGenerator_new) | ||
* [`.getHeaders(options)`](#HeaderGenerator+getHeaders) | ||
* [`.getHeaders(options, requestDependentHeaders)`](#HeaderGenerator+getHeaders) | ||
@@ -88,3 +88,3 @@ | ||
#### `headerGenerator.getHeaders(options)` | ||
#### `headerGenerator.getHeaders(options, requestDependentHeaders)` | ||
Generates a single set of headers using a combination of the default options specified in the constructor | ||
@@ -97,2 +97,3 @@ and their possible overrides provided here. | ||
| options | [<code>HeaderGeneratorOptions</code>](#HeaderGeneratorOptions) | specifies options that should be overridden for this one call | | ||
| requestDependentHeaders | <code>Object</code> | specifies known values of headers dependent on the particular request | | ||
@@ -102,34 +103,12 @@ | ||
<a name="prepareHttpBrowserObject"></a> | ||
<a name="BrowserSpecification"></a> | ||
### `prepareHttpBrowserObject(httpBrowserString)` | ||
### `BrowserSpecification` | ||
| Param | Type | Description | | ||
| --- | --- | --- | | ||
| httpBrowserString | <code>string</code> | a string containing the browser name, version and http version, such as "chrome/88.0.4324.182|2" | | ||
* * * | ||
<a name="prepareBrowserObject"></a> | ||
### `prepareBrowserObject(browserString)` | ||
| Param | Type | Description | | ||
| --- | --- | --- | | ||
| browserString | <code>string</code> | a string containing the browser name and version, such as "chrome/88.0.4324.182" | | ||
* * * | ||
<a name="Browser"></a> | ||
### `Browser` | ||
| Param | Type | Description | | ||
| --- | --- | --- | | ||
| name | <code>string</code> | One of "chrome", "firefox" and "safari". | | ||
| name | <code>string</code> | One of `chrome`, `firefox` and `safari`. | | ||
| minVersion | <code>number</code> | Minimal version of browser used. | | ||
| maxVersion | <code>number</code> | Maximal version of browser used. | | ||
| httpVersion | <code>string</code> | Either 1 or 2. If none specified the global `httpVersion` is used. | | ||
| httpVersion | <code>string</code> | Http version to be used to generate headers (the headers differ depending on the version). Either 1 or 2. If none specified the httpVersion specified in `HeaderGeneratorOptions` is used. | | ||
@@ -145,7 +124,7 @@ | ||
| --- | --- | --- | | ||
| browsers | [<code>Array.<Browser></code>](#Browser) | List of Browsers to generate the headers for. | | ||
| operatingSystems | <code>Array.<string></code> | List of operating systems to generate the headers for. The options are "windows", "macos", "linux", "android" and "ios". | | ||
| devices | <code>Array.<string></code> | List of devices to generate the headers for. Options are "desktop" and "mobile". | | ||
| locales | <code>Array.<string></code> | List of at most 10 languages to include in the `Accept-Language` request header. | | ||
| httpVersion | <code>string</code> | Http version to be used to generate headers (the headers differ depending on the version). Can be either 1 or 2. | | ||
| browsers | [<code>Array.<BrowserSpecification></code>](#BrowserSpecification) | List of BrowserSpecifications to generate the headers for. | | ||
| operatingSystems | <code>Array.<string></code> | List of operating systems to generate the headers for. The options are `windows`, `macos`, `linux`, `android` and `ios`. | | ||
| devices | <code>Array.<string></code> | List of devices to generate the headers for. Options are `desktop` and `mobile`. | | ||
| locales | <code>Array.<string></code> | List of at most 10 languages to include in the [Accept-Language](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Language) request header in the language format accepted by that header, for example `en`, `en-US` or `de`. | | ||
| httpVersion | <code>string</code> | Http version to be used to generate headers (the headers differ depending on the version). Can be either 1 or 2. Default value is 2. | | ||
@@ -152,0 +131,0 @@ |
@@ -1,15 +0,23 @@ | ||
const fs = require('fs'); | ||
const path = require('path'); | ||
const { BayesianNetwork } = require('bayesian-network'); | ||
const { BayesianNetwork } = require('generative-bayesian-network'); | ||
const { default: ow } = require('ow'); | ||
const headerNetworkDefinitionPath = path.join(__dirname, './data_files/header-network-definition.json'); | ||
const inputNetworkDefinitionPath = path.join(__dirname, './data_files/input-network-definition.json'); | ||
const browserHelperFilePath = path.join(__dirname, './data_files/browser-helper-file.json'); | ||
const BROWSER_HTTP_NODE_NAME = '*BROWSER_HTTP'; | ||
const OPERATING_SYSTEM_NODE_NAME = '*OPERATING_SYSTEM'; | ||
const DEVICE_NODE_NAME = '*DEVICE'; | ||
const browserHttpNodeName = '*BROWSER_HTTP'; | ||
const operatingSystemNodeName = '*OPERATING_SYSTEM'; | ||
const deviceNodeName = '*DEVICE'; | ||
const missingValueDatasetToken = '*MISSING_VALUE*'; | ||
const MISSING_VALUE_DATASET_TOKEN = '*MISSING_VALUE*'; | ||
const headerNetworkDefinition = require('./data_files/header-network-definition.json'); | ||
const inputNetworkDefinition = require('./data_files/input-network-definition.json'); | ||
const headersOrder = require('./data_files/headers-order.json'); | ||
const uniqueBrowserStrings = require('./data_files/browser-helper-file.json'); | ||
const uniqueBrowsers = []; | ||
for (const browserString of uniqueBrowserStrings) { | ||
// There are headers without user agents in the datasets we used to configure the generator. They should be disregarded. | ||
if (browserString !== MISSING_VALUE_DATASET_TOKEN) { | ||
uniqueBrowsers.push(prepareHttpBrowserObject(browserString)); | ||
} | ||
} | ||
const http2SecFetchAttributes = { | ||
@@ -29,2 +37,5 @@ mode: 'sec-fetch-mode', | ||
/* | ||
* @private | ||
*/ | ||
function getRandomInteger(minimum, maximum) { | ||
@@ -34,2 +45,5 @@ return minimum + Math.floor(Math.random() * (maximum - minimum + 1)); | ||
/* | ||
* @private | ||
*/ | ||
function shuffleArray(array) { | ||
@@ -49,2 +63,5 @@ if (array.length > 1) { | ||
/* | ||
* @private | ||
*/ | ||
function browserVersionIsLesserOrEquals(browserVersionL, browserVersionR) { | ||
@@ -55,7 +72,9 @@ return browserVersionL[0] <= browserVersionR[0]; | ||
/** | ||
* Extract structured information about a browser and http version in the form of an object from httpBrowserString. | ||
* @param {string} httpBrowserString - a string containing the browser name, version and http version, such as "chrome/88.0.4324.182|2" | ||
* @private | ||
*/ | ||
function prepareHttpBrowserObject(httpBrowserString) { | ||
const [browserString, httpVersion] = httpBrowserString.split('|'); | ||
const browserObject = browserString === missingValueDatasetToken ? { name: missingValueDatasetToken } : prepareBrowserObject(browserString); | ||
const browserObject = browserString === MISSING_VALUE_DATASET_TOKEN ? { name: MISSING_VALUE_DATASET_TOKEN } : prepareBrowserObject(browserString); | ||
return { | ||
@@ -71,3 +90,5 @@ ...browserObject, | ||
/** | ||
* Extract structured information about a browser in the form of an object from browserString. | ||
* @param {string} browserString - a string containing the browser name and version, such as "chrome/88.0.4324.182" | ||
* @private | ||
*/ | ||
@@ -89,3 +110,3 @@ function prepareBrowserObject(browserString) { | ||
const browserShape = { | ||
const browserSpecificationShape = { | ||
name: ow.string, | ||
@@ -98,3 +119,3 @@ minVersion: ow.optional.number, | ||
const headerGeneratorOptionsShape = { | ||
browsers: ow.optional.array.ofType(ow.object.exactShape(browserShape)), | ||
browsers: ow.optional.array.ofType(ow.object.exactShape(browserSpecificationShape)), | ||
operatingSystems: ow.optional.array.ofType(ow.string), | ||
@@ -107,17 +128,20 @@ devices: ow.optional.array.ofType(ow.string), | ||
/** | ||
* @typedef Browser | ||
* @param {string} name - One of "chrome", "firefox" and "safari". | ||
* @typedef BrowserSpecification | ||
* @param {string} name - One of `chrome`, `firefox` and `safari`. | ||
* @param {number} minVersion - Minimal version of browser used. | ||
* @param {number} maxVersion - Maximal version of browser used. | ||
* @param {string} httpVersion - Either 1 or 2. If none specified the global `httpVersion` is used. | ||
* @param {string} httpVersion - Http version to be used to generate headers (the headers differ depending on the version). | ||
* Either 1 or 2. If none specified the httpVersion specified in `HeaderGeneratorOptions` is used. | ||
*/ | ||
/** | ||
* @typedef HeaderGeneratorOptions | ||
* @param {Array<Browser>} browsers - List of Browsers to generate the headers for. | ||
* @param {Array<BrowserSpecification>} browsers - List of BrowserSpecifications to generate the headers for. | ||
* @param {Array<string>} operatingSystems - List of operating systems to generate the headers for. | ||
* The options are "windows", "macos", "linux", "android" and "ios". | ||
* @param {Array<string>} devices - List of devices to generate the headers for. Options are "desktop" and "mobile". | ||
* @param {Array<string>} locales - List of at most 10 languages to include in the `Accept-Language` request header. | ||
* The options are `windows`, `macos`, `linux`, `android` and `ios`. | ||
* @param {Array<string>} devices - List of devices to generate the headers for. Options are `desktop` and `mobile`. | ||
* @param {Array<string>} locales - List of at most 10 languages to include in the | ||
* [Accept-Language](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Language) request header | ||
* in the language format accepted by that header, for example `en`, `en-US` or `de`. | ||
* @param {string} httpVersion - Http version to be used to generate headers (the headers differ depending on the version). | ||
* Can be either 1 or 2. | ||
* Can be either 1 or 2. Default value is 2. | ||
*/ | ||
@@ -134,36 +158,12 @@ | ||
ow(options, 'HeaderGeneratorOptions', ow.object.exactShape(headerGeneratorOptionsShape)); | ||
this.defaultOptions = options; | ||
const uniqueBrowserStrings = JSON.parse(fs.readFileSync(browserHelperFilePath, { encoding: 'utf8' })); | ||
this.uniqueBrowsers = []; | ||
for (const browserString of uniqueBrowserStrings) { | ||
if (browserString === missingValueDatasetToken) { | ||
this.uniqueBrowsers.push({ | ||
name: missingValueDatasetToken, | ||
}); | ||
} else { | ||
this.uniqueBrowsers.push(prepareHttpBrowserObject(browserString)); | ||
} | ||
this.defaultOptions = JSON.parse(JSON.stringify(options)); | ||
// Use a default setup when the necessary values are not provided | ||
if (!this.defaultOptions.locales) { | ||
this.defaultOptions.locales = ['en-US']; | ||
} | ||
this.inputGeneratorNetwork = new BayesianNetwork(inputNetworkDefinitionPath); | ||
this.headerGeneratorNetwork = new BayesianNetwork(headerNetworkDefinitionPath); | ||
} | ||
/** | ||
* Generates a single set of headers using a combination of the default options specified in the constructor | ||
* and their possible overrides provided here. | ||
* @param {HeaderGeneratorOptions} options - specifies options that should be overridden for this one call | ||
*/ | ||
getHeaders(options = {}) { | ||
ow(options, 'HeaderGeneratorOptions', ow.object.exactShape(headerGeneratorOptionsShape)); | ||
const headerOptions = { ...this.defaultOptions, ...options }; | ||
// Set up defaults | ||
if (!headerOptions.locales) { | ||
headerOptions.locales = ['en-US']; | ||
if (!this.defaultOptions.httpVersion) { | ||
this.defaultOptions.httpVersion = '2'; | ||
} | ||
if (!headerOptions.httpVersion) { | ||
headerOptions.httpVersion = '2'; | ||
} | ||
if (!headerOptions.browsers) { | ||
headerOptions.browsers = [ | ||
if (!this.defaultOptions.browsers) { | ||
this.defaultOptions.browsers = [ | ||
{ name: 'chrome' }, | ||
@@ -174,4 +174,4 @@ { name: 'firefox' }, | ||
} | ||
if (!headerOptions.operatingSystems) { | ||
headerOptions.operatingSystems = [ | ||
if (!this.defaultOptions.operatingSystems) { | ||
this.defaultOptions.operatingSystems = [ | ||
'windows', | ||
@@ -185,2 +185,16 @@ 'macos', | ||
this.inputGeneratorNetwork = new BayesianNetwork(inputNetworkDefinition); | ||
this.headerGeneratorNetwork = new BayesianNetwork(headerNetworkDefinition); | ||
} | ||
/** | ||
* Generates a single set of headers using a combination of the default options specified in the constructor | ||
* and their possible overrides provided here. | ||
* @param {HeaderGeneratorOptions} options - specifies options that should be overridden for this one call | ||
* @param {Object} requestDependentHeaders - specifies known values of headers dependent on the particular request | ||
*/ | ||
getHeaders(options = {}, requestDependentHeaders = {}) { | ||
ow(options, 'HeaderGeneratorOptions', ow.object.exactShape(headerGeneratorOptionsShape)); | ||
const headerOptions = JSON.parse(JSON.stringify({ ...this.defaultOptions, ...options })); | ||
headerOptions.browsers = headerOptions.browsers.map((browserObject) => { | ||
@@ -198,3 +212,3 @@ if (!browserObject.httpVersion) { | ||
for (const browser of headerOptions.browsers) { | ||
for (const browserOption of this.uniqueBrowsers) { | ||
for (const browserOption of uniqueBrowsers) { | ||
if (browser.name === browserOption.name) { | ||
@@ -210,12 +224,12 @@ if ((!browser.minVersion || browserVersionIsLesserOrEquals([browser.minVersion], browserOption.version)) | ||
possibleAttributeValues[browserHttpNodeName] = browserHttpOptions; | ||
possibleAttributeValues[BROWSER_HTTP_NODE_NAME] = browserHttpOptions; | ||
possibleAttributeValues[operatingSystemNodeName] = headerOptions.operatingSystems; | ||
possibleAttributeValues[OPERATING_SYSTEM_NODE_NAME] = headerOptions.operatingSystems; | ||
if (headerOptions.devices) { | ||
possibleAttributeValues[deviceNodeName] = headerOptions.devices; | ||
possibleAttributeValues[DEVICE_NODE_NAME] = headerOptions.devices; | ||
} | ||
// Generate a sample of input attributes consistent with the data used to create the definition files if possible. | ||
const inputSample = this.inputGeneratorNetwork.generateSampleWheneverPossible(possibleAttributeValues); | ||
const inputSample = this.inputGeneratorNetwork.generateConsistentSampleWhenPossible(possibleAttributeValues); | ||
@@ -227,6 +241,6 @@ if (!inputSample) { | ||
// Generate the actual headers | ||
const generatedSample = this.headerGeneratorNetwork.generateSample(inputSample); | ||
let generatedSample = this.headerGeneratorNetwork.generateSample(inputSample); | ||
// Manually fill the accept-language header with random ordering of the locales from input | ||
const generatedHttpAndBrowser = prepareHttpBrowserObject(generatedSample[browserHttpNodeName]); | ||
const generatedHttpAndBrowser = prepareHttpBrowserObject(generatedSample[BROWSER_HTTP_NODE_NAME]); | ||
let secFetchAttributeNames = http2SecFetchAttributes; | ||
@@ -291,6 +305,16 @@ let acceptLanguageFieldName = 'accept-language'; | ||
for (const attribute of Object.keys(generatedSample)) { | ||
if (attribute.startsWith('*') || generatedSample[attribute] === missingValueDatasetToken) delete generatedSample[attribute]; | ||
if (attribute.startsWith('*') || generatedSample[attribute] === MISSING_VALUE_DATASET_TOKEN) delete generatedSample[attribute]; | ||
} | ||
return generatedSample; | ||
generatedSample = { ...generatedSample, ...requestDependentHeaders }; | ||
// Order the headers in an order depending on the browser | ||
const orderedSample = {}; | ||
for (const attribute of headersOrder[generatedHttpAndBrowser.name]) { | ||
if (attribute in generatedSample) { | ||
orderedSample[attribute] = generatedSample[attribute]; | ||
} | ||
} | ||
return orderedSample; | ||
} | ||
@@ -297,0 +321,0 @@ } |
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
License Policy Violation
LicenseThis package is not allowed per your license policy. Review the package's license to ensure compliance.
Found 1 instance in 1 package
Git dependency
Supply chain riskContains a dependency which resolves to a remote git URL. Dependencies fetched from git URLs are not immutable can be used to inject untrusted code or reduce the likelihood of a reproducible install.
Found 1 instance in 1 package
Filesystem access
Supply chain riskAccesses the file system, and could potentially read sensitive data.
Found 1 instance in 1 package
4040127
9
362
0
0
129