@npmcli/arborist
Advanced tools
| const { isNodeGypPackage } = require('@npmcli/node-gyp') | ||
| // Returns the install-relevant lifecycle scripts that would run for a | ||
| // given arborist Node, or `{}` if there are none. | ||
| // | ||
| // Includes: | ||
| // - explicit preinstall/install/postinstall | ||
| // - prepare, but only for non-registry sources (git, file, link, remote) | ||
| // - synthetic `node-gyp rebuild`, when `binding.gyp` is present on disk | ||
| // and the package does not opt out via `gypfile: false` or define its | ||
| // own install / preinstall script | ||
| // Lifecycle-script enumeration boundary. | ||
| // | ||
| // IMPORTANT: this helper decides whether `prepare` should be included | ||
| // in the enumerated install scripts (true for non-registry sources only). | ||
| // It is NOT a policy-matching predicate. The policy matcher in | ||
| // script-allowed.js uses `isRegistryNode`, which is strictly tied to | ||
| // versionFromTgz(node.resolved). The two helpers exist separately on | ||
| // purpose: | ||
| // | ||
| // - `hasNonRegistryShape` (here): "should we consider running prepare | ||
| // on this node?" — a yes/no for what to enumerate. | ||
| // - `isRegistryNode` (script-allowed.js): "do we trust this node's | ||
| // identity enough to apply a policy entry?" — a security check. | ||
| // | ||
| // The looser fallback here (treating unknown-resolved nodes as registry, | ||
| // thus skipping `prepare`) is the safer default for enumeration: we'd | ||
| // rather omit a script we should have run than synthesise one for a | ||
| // non-registry source we couldn't confirm. The policy matcher's stricter | ||
| // behaviour is correct for its boundary; the two helpers must not be | ||
| // merged. | ||
| const hasNonRegistryShape = (node) => { | ||
| if (typeof node.isRegistryDependency === 'boolean') { | ||
| return !node.isRegistryDependency | ||
| } | ||
| if (!node.resolved) { | ||
| return false | ||
| } | ||
| return !/^https?:\/\/[^/]+\/.+\/-\/[^/]+-\d/.test(node.resolved) | ||
| } | ||
| const getInstallScripts = async (node) => { | ||
| /* istanbul ignore next: arborist Nodes always carry a `package` object; | ||
| defensive fallbacks for non-arborist callers. */ | ||
| const pkg = node.package || {} | ||
| /* istanbul ignore next */ | ||
| const scripts = pkg.scripts || {} | ||
| const collected = {} | ||
| if (scripts.preinstall) { | ||
| collected.preinstall = scripts.preinstall | ||
| } | ||
| if (scripts.install) { | ||
| collected.install = scripts.install | ||
| } | ||
| if (scripts.postinstall) { | ||
| collected.postinstall = scripts.postinstall | ||
| } | ||
| if (scripts.prepare && hasNonRegistryShape(node)) { | ||
| collected.prepare = scripts.prepare | ||
| } | ||
| const hasExplicitGypGate = !!(collected.preinstall || collected.install) | ||
| if ( | ||
| !hasExplicitGypGate && | ||
| pkg.gypfile !== false && | ||
| await isNodeGypPackage(node.path).catch(() => false) | ||
| ) { | ||
| collected.install = 'node-gyp rebuild' | ||
| } | ||
| // Lockfile-only nodes (e.g. `npm ci` before reify) carry | ||
| // `hasInstallScript: true` but no enumerated scripts: the lockfile | ||
| // records the presence flag but never the script bodies. Without this | ||
| // fallback the strict-allow-scripts preflight would miss them entirely | ||
| // and let postinstall run. We can't recover the real script body | ||
| // without fetching the manifest, so emit a sentinel describing that | ||
| // install scripts are present. | ||
| if (Object.keys(collected).length === 0 && node.hasInstallScript === true) { | ||
| collected.install = '(install scripts present)' | ||
| } | ||
| return collected | ||
| } | ||
| module.exports = getInstallScripts | ||
| module.exports.getInstallScripts = getInstallScripts |
| const npa = require('npm-package-arg') | ||
| const semver = require('semver') | ||
| const versionFromTgz = require('./version-from-tgz.js') | ||
| // Identity matcher for the allowScripts policy. | ||
| // | ||
| // Returns: | ||
| // - true: at least one allow entry matches and no deny entry matches | ||
| // - false: at least one deny entry matches (deny wins on conflict) | ||
| // - null: no entry matches (unreviewed) | ||
| // | ||
| // `policy` is a flat object of `spec-key -> boolean`, where spec-key is | ||
| // anything `npm-package-arg` can parse. `node` is an arborist Node. | ||
| // | ||
| // Identity rules (see RFC npm/rfcs#868): | ||
| // - registry deps match by the name+version parsed from the lockfile's | ||
| // resolved URL, NOT by `node.packageName` / `node.version`. Those two | ||
| // getters return `node.package.name` / `node.package.version`, which | ||
| // come from the tarball's own package.json and are therefore | ||
| // attacker-controlled. A package can publish a tarball claiming any | ||
| // name; the only trusted name is the one baked into the registry URL. | ||
| // - tarball / file / link / remote: exact match on node.resolved | ||
| // - git: match on hosted.ssh() plus a short-SHA prefix of the | ||
| // resolved committish | ||
| const isScriptAllowed = (node, policy) => { | ||
| // Bundled dependencies cannot be allowlisted in Phase 1. The RFC defers | ||
| // allowlisting them to a follow-up RFC because matching by name@version | ||
| // from the bundled tarball would reintroduce manifest confusion (a | ||
| // bundled tarball can claim any name and version). Returning null here | ||
| // marks bundled deps as unreviewed regardless of any policy entries, so | ||
| // their install scripts surface in the Phase 1 advisory warning and | ||
| // (eventually) get blocked at the install-time gate. | ||
| if (node.inBundle) { | ||
| return null | ||
| } | ||
| if (!policy || typeof policy !== 'object') { | ||
| return null | ||
| } | ||
| let anyAllow = false | ||
| let anyDeny = false | ||
| for (const [key, value] of Object.entries(policy)) { | ||
| if (!matches(node, key)) { | ||
| continue | ||
| } | ||
| if (value === false) { | ||
| anyDeny = true | ||
| continue | ||
| } | ||
| /* istanbul ignore else: policy values are strictly true/false; | ||
| defensive guard against unexpected coercions. */ | ||
| if (value === true) { | ||
| anyAllow = true | ||
| } | ||
| } | ||
| if (anyDeny) { | ||
| return false | ||
| } | ||
| if (anyAllow) { | ||
| return true | ||
| } | ||
| return null | ||
| } | ||
| const matches = (node, key) => { | ||
| let parsed | ||
| try { | ||
| parsed = npa(key) | ||
| } catch { | ||
| return false | ||
| } | ||
| switch (parsed.type) { | ||
| case 'tag': | ||
| case 'range': | ||
| case 'version': | ||
| return matchRegistry(node, parsed) | ||
| case 'git': | ||
| return matchGit(node, parsed) | ||
| case 'file': | ||
| case 'directory': | ||
| return matchFileOrDir(node, parsed) | ||
| case 'remote': | ||
| return matchRemote(node, parsed) | ||
| case 'alias': | ||
| // Disallowed: aliases as policy keys do not match anything. | ||
| // The user has to address the real package name. | ||
| return false | ||
| /* istanbul ignore next: switch above covers every npa type we expect; | ||
| defensive fallback for future npa types. */ | ||
| default: | ||
| return false | ||
| } | ||
| } | ||
| const matchRegistry = (node, parsed) => { | ||
| // If this node is not a registry dep, refuse the match. A registry-style | ||
| // key (`pkg`, `pkg@1`, `pkg@1 || 2`) must not match a tarball or git node | ||
| // even if their names happen to coincide. | ||
| if (!isRegistryNode(node)) { | ||
| return false | ||
| } | ||
| // Derive the trusted name+version from the lockfile's resolved URL. | ||
| // Never use `node.packageName` / `node.version` here: those read from | ||
| // the tarball's own package.json and can be forged by a malicious | ||
| // publisher to bypass an allowScripts entry. | ||
| const trusted = getTrustedRegistryIdentity(node) | ||
| if (!trusted || trusted.name !== parsed.name) { | ||
| return false | ||
| } | ||
| // `tag` covers `pkg@latest`. Rejected up front by validatePolicy in | ||
| // resolve-allow-scripts.js because tags look like a pin but can't be | ||
| // verified at install time. Defense-in-depth: if one slips through | ||
| // (e.g. arborist invoked directly without the resolver), don't match. | ||
| if (parsed.type === 'tag') { | ||
| /* istanbul ignore next: validatePolicy filters this; defensive */ | ||
| return false | ||
| } | ||
| // `range` includes `pkg@^1`, `pkg@1 || 2`, `pkg@*`, `pkg@>=0`, and bare | ||
| // names like `pkg` (npa parses these as range with fetchSpec='*'). The | ||
| // RFC permits bare names (name-only allow) and exact versions joined by | ||
| // `||`; ranges like ^/~/>=/< are rejected because they would silently | ||
| // allow versions the user has never reviewed. | ||
| if (parsed.type === 'range') { | ||
| // Bare name or `pkg@*`: treat as name-only allow. | ||
| if (parsed.fetchSpec === '*' || parsed.rawSpec === '' || parsed.rawSpec === '*') { | ||
| return true | ||
| } | ||
| if (!trusted.version || !isExactVersionDisjunction(parsed.fetchSpec)) { | ||
| return false | ||
| } | ||
| return semver.satisfies(trusted.version, parsed.fetchSpec, { loose: true }) | ||
| } | ||
| // `version` is an exact pin like `pkg@1.2.3`. | ||
| /* istanbul ignore else: parsed.type at this point is always 'version'; | ||
| the istanbul-ignored fallback below handles the impossible case. */ | ||
| if (parsed.type === 'version') { | ||
| return trusted.version === parsed.fetchSpec | ||
| } | ||
| /* istanbul ignore next: parsed.type is constrained to tag/range/version | ||
| by the caller; this final fallback is defensive. */ | ||
| return false | ||
| } | ||
| // Derive a registry node's trusted name+version. | ||
| // | ||
| // Preferred source: the lockfile's resolved URL parsed via | ||
| // versionFromTgz. arborist records the URL when it first adds the dep, | ||
| // before any tarball is unpacked, so the URL cannot be forged by the | ||
| // package's own package.json. | ||
| // | ||
| // Fallback for lockfiles produced with omit-lockfile-registry-resolved | ||
| // (where the URL is absent): take the dep name from an incoming | ||
| // dependency edge. The edge's spec was written by the consumer (or by an | ||
| // upstream package.json), not by the installed tarball. For aliases like | ||
| // `"trusted": "npm:naughty@1.0.0"`, the underlying registered package | ||
| // name is parsed out of the alias `subSpec`. The install location | ||
| // (`node_modules/trusted`) is deliberately not consulted because for | ||
| // aliases it carries only the alias name, which would let a malicious | ||
| // publisher bypass an allowScripts entry written for the real package. | ||
| // | ||
| // Version is left null in the fallback case because the only remaining | ||
| // source for it (`node.version`) reads from the tarball. | ||
| // | ||
| // Returns `{ name, version }` or `null` if no trusted identity exists. | ||
| const getTrustedRegistryIdentity = (node) => { | ||
| if (node.resolved && typeof node.resolved === 'string') { | ||
| const parsed = versionFromTgz('', node.resolved) | ||
| /* istanbul ignore else: versionFromTgz returns either a complete | ||
| { name, version } or null; partial objects are not produced. */ | ||
| if (parsed && parsed.name && parsed.version) { | ||
| return parsed | ||
| } | ||
| } | ||
| const name = nameFromEdges(node) | ||
| if (name) { | ||
| return { name, version: null } | ||
| } | ||
| return null | ||
| } | ||
| const nameFromEdges = (node) => { | ||
| if (!node.edgesIn || typeof node.edgesIn[Symbol.iterator] !== 'function') { | ||
| return null | ||
| } | ||
| for (const edge of node.edgesIn) { | ||
| let parsed | ||
| try { | ||
| parsed = npa.resolve(edge.name, edge.spec) | ||
| } catch { | ||
| continue | ||
| } | ||
| // Aliases: trust the underlying registered package, not the alias. | ||
| if (parsed.type === 'alias' && parsed.subSpec && parsed.subSpec.registry) { | ||
| return parsed.subSpec.name | ||
| } | ||
| // Non-aliased registry edge: the edge name is the package name as | ||
| // written by the consumer / upstream, which is trusted (it is not | ||
| // read from the installed tarball). | ||
| if (parsed.registry) { | ||
| return parsed.name | ||
| } | ||
| } | ||
| return null | ||
| } | ||
| // True if `rangeSpec` is one or more exact versions joined by `||`. Anything | ||
| // containing comparator operators (^, ~, >=, <, *) returns false. | ||
| const isExactVersionDisjunction = (rangeSpec) => { | ||
| /* istanbul ignore next: caller always passes parsed.fetchSpec, which | ||
| npa guarantees to be a non-empty string for range specs. */ | ||
| if (typeof rangeSpec !== 'string' || rangeSpec.trim() === '') { | ||
| return false | ||
| } | ||
| const parts = rangeSpec.split('||').map(p => p.trim()) | ||
| /* istanbul ignore next: String.prototype.split always returns at least | ||
| one element; defensive guard only. */ | ||
| if (parts.length === 0) { | ||
| return false | ||
| } | ||
| return parts.every(p => p !== '' && semver.valid(p) !== null) | ||
| } | ||
| const matchGit = (node, parsed) => { | ||
| if (!node.resolved || !node.resolved.startsWith('git')) { | ||
| return false | ||
| } | ||
| let nodeParsed | ||
| try { | ||
| nodeParsed = npa(node.resolved) | ||
| } catch { | ||
| /* istanbul ignore next: npa parsing a git URL we already validated | ||
| starts with `git` should not throw; defensive guard only. */ | ||
| return false | ||
| } | ||
| // Compare the host/repo. Both sides should resolve to the same canonical | ||
| // ssh URL. | ||
| const noCommittish = { noCommittish: true } | ||
| const keyHost = parsed.hosted?.ssh(noCommittish) | ||
| const nodeHost = nodeParsed.hosted?.ssh(noCommittish) | ||
| if (keyHost && nodeHost) { | ||
| if (keyHost !== nodeHost) { | ||
| return false | ||
| } | ||
| } else if (parsed.fetchSpec && nodeParsed.fetchSpec) { | ||
| // Non-hosted git URLs: fall back to fetch spec. | ||
| if (parsed.fetchSpec !== nodeParsed.fetchSpec) { | ||
| return false | ||
| } | ||
| } else { | ||
| return false | ||
| } | ||
| // If the policy key has no committish, name-only match. | ||
| const keyCommittish = parsed.gitCommittish || parsed.hosted?.committish | ||
| if (!keyCommittish) { | ||
| return true | ||
| } | ||
| // Match the resolved full SHA against the key's committish. Users | ||
| // typically write short SHAs in the policy; the lockfile stores 40-char | ||
| // SHAs. Direction matters: the lockfile's full SHA must START WITH the | ||
| // key's short SHA, never the reverse. A longer key matching a shorter | ||
| // resolved committish would let a malformed lockfile or a divergent | ||
| // resolver allow scripts the user never approved. | ||
| const nodeCommittish = nodeParsed.gitCommittish || nodeParsed.hosted?.committish || '' | ||
| if (!nodeCommittish) { | ||
| return false | ||
| } | ||
| return nodeCommittish.startsWith(keyCommittish) | ||
| } | ||
| const matchFileOrDir = (node, parsed) => { | ||
| if (!node.resolved) { | ||
| return false | ||
| } | ||
| return node.resolved === parsed.saveSpec || node.resolved === parsed.fetchSpec | ||
| } | ||
| const matchRemote = (node, parsed) => { | ||
| if (!node.resolved) { | ||
| return false | ||
| } | ||
| return node.resolved === parsed.fetchSpec || node.resolved === parsed.saveSpec | ||
| } | ||
| const isRegistryNode = (node) => { | ||
| // Prefer arborist's edge-based check when available (real Node objects). | ||
| // It inspects the incoming edges' specs and only returns true if every | ||
| // edge resolves to a registry spec, which is much harder to spoof than | ||
| // the URL. | ||
| if (typeof node.isRegistryDependency === 'boolean') { | ||
| return node.isRegistryDependency | ||
| } | ||
| // Fall back to URL parsing for nodes without the arborist getter | ||
| // (e.g. test fixtures, lockfiles with omit-lockfile-registry-resolved). | ||
| // Treat the node as a registry dep when: | ||
| // - resolved is missing entirely (omitLockfileRegistryResolved), | ||
| // - resolved is an https/http URL pointing at a registry tarball, or | ||
| // - resolved is undefined and the node has a version (defensive). | ||
| if (!node.resolved) { | ||
| return !!node.version | ||
| } | ||
| // Registry tarballs live at `<host>/<pkg-name>/-/<pkg-name>-<version>.tgz`. | ||
| // Require a path segment before `/-/` so an attacker can't lift a | ||
| // registry-style allow entry to a hostile URL like | ||
| // `https://evil.com/-/trusted-1.0.0.tgz`. | ||
| return /^https?:\/\/[^/]+\/.+\/-\/[^/]+-\d/.test(node.resolved) | ||
| } | ||
| // Trusted display identity for human-facing output (`npm install` | ||
| // advisory, `npm approve-scripts --allow-scripts-pending`). Same idea as | ||
| // getTrustedRegistryIdentity, but for DISPLAY only — version falls back | ||
| // to node.version when the URL doesn't carry one. Must never be used | ||
| // for policy matching. | ||
| const trustedDisplay = (node) => { | ||
| const trusted = getTrustedRegistryIdentity(node) | ||
| /* istanbul ignore next: defensive fallbacks for nodes without name/version */ | ||
| return { | ||
| name: (trusted && trusted.name) || node.name || null, | ||
| version: (trusted && trusted.version) || node.version || null, | ||
| } | ||
| } | ||
| module.exports = isScriptAllowed | ||
| module.exports.isScriptAllowed = isScriptAllowed | ||
| module.exports.isExactVersionDisjunction = isExactVersionDisjunction | ||
| module.exports.getTrustedRegistryIdentity = getTrustedRegistryIdentity | ||
| module.exports.trustedDisplay = trustedDisplay |
@@ -103,4 +103,6 @@ // The arborist manages three trees: | ||
| Arborist: this.constructor, | ||
| allowScripts: options.allowScripts ?? null, | ||
| binLinks: 'binLinks' in options ? !!options.binLinks : true, | ||
| cache: options.cache || `${homedir()}/.npm/_cacache`, | ||
| dangerouslyAllowAllScripts: !!options.dangerouslyAllowAllScripts, | ||
| dryRun: !!options.dryRun, | ||
@@ -107,0 +109,0 @@ formatPackageLock: 'formatPackageLock' in options ? !!options.formatPackageLock : true, |
@@ -338,3 +338,4 @@ const { mkdirSync } = require('node:fs') | ||
| // Create workspace Link. For root declared deps, link at root node_modules/. For undeclared deps, link at the workspace's own node_modules/ (self-link). | ||
| // Declared workspaces are symlinked at root node_modules/. | ||
| // Undeclared workspaces get a tree-only Link kept for diff/filter participation but not materialized on disk. | ||
| const isDeclared = this.#rootDeclaredDeps.has(wsName) | ||
@@ -352,3 +353,3 @@ const wsLink = new IsolatedLink({ | ||
| if (!isDeclared) { | ||
| workspace.children.set(wsName, wsLink) | ||
| wsLink.isUndeclaredWorkspaceLink = true | ||
| } | ||
@@ -355,0 +356,0 @@ root.children.set(wsName, wsLink) |
@@ -15,2 +15,3 @@ // Arborist.rebuild({path = this.path}) will do all the binlinks and | ||
| const { resolve } = require('node:path') | ||
| const { isScriptAllowed } = require('../script-allowed.js') | ||
@@ -229,2 +230,14 @@ const boolEnv = b => b ? '1' : '' | ||
| // Phase 1 allowScripts gate: a `false` verdict from the policy matcher | ||
| // means the user explicitly denied install scripts for this node, so skip | ||
| // it. `true` and `null` (unreviewed) both fall through to the existing | ||
| // detection logic — unreviewed nodes still run their scripts in Phase 1 | ||
| // and are surfaced via the post-reify advisory warning. The global | ||
| // --ignore-scripts kill switch in #build() still takes precedence, and | ||
| // --dangerously-allow-all-scripts bypasses this gate entirely. | ||
| if (!this.options.dangerouslyAllowAllScripts && | ||
| isScriptAllowed(node, this.options.allowScripts) === false) { | ||
| return | ||
| } | ||
| if (this.#oldMeta === null) { | ||
@@ -231,0 +244,0 @@ const { root: { meta } } = node |
+1
-1
| { | ||
| "name": "@npmcli/arborist", | ||
| "version": "9.6.0", | ||
| "version": "9.7.0", | ||
| "description": "Manage node_modules trees", | ||
@@ -5,0 +5,0 @@ "dependencies": { |
Sorry, the diff of this file is too big to display
Network access
Supply chain riskThis module accesses the network.
Found 1 instance in 1 package
Environment variable access
Supply chain riskPackage accesses environment variables, which may be a sign of credential stuffing or data theft.
Found 6 instances in 1 package
AI-detected potential code anomaly
Supply chain riskAI has identified unusual behaviors that may pose a security risk.
Found 1 instance in 1 package
Long strings
Supply chain riskContains long string literals, which may be a sign of obfuscated or packed code.
Found 1 instance in 1 package
URL strings
Supply chain riskPackage contains fragments of external URLs or IP addresses, which the package may be accessing at runtime.
Found 1 instance in 1 package
Network access
Supply chain riskThis module accesses the network.
Found 1 instance in 1 package
Environment variable access
Supply chain riskPackage accesses environment variables, which may be a sign of credential stuffing or data theft.
Found 6 instances in 1 package
AI-detected potential code anomaly
Supply chain riskAI has identified unusual behaviors that may pose a security risk.
Found 1 instance in 1 package
Long strings
Supply chain riskContains long string literals, which may be a sign of obfuscated or packed code.
Found 1 instance in 1 package
URL strings
Supply chain riskPackage contains fragments of external URLs or IP addresses, which the package may be accessing at runtime.
Found 1 instance in 1 package
519392
3.4%65
3.17%13523
3.17%21
5%