Skip to content

Commit a589a1a

Browse files
GeoffreyBoothtargos
authored andcommitted
esm: detect ESM syntax in ambiguous JavaScript
PR-URL: #50096 Reviewed-By: Yagiz Nizipli <[email protected]> Reviewed-By: Benjamin Gruenbaum <[email protected]> Reviewed-By: Guy Bedford <[email protected]>
1 parent ab5985d commit a589a1a

30 files changed

+475
-67
lines changed

benchmark/esm/detect-esm-syntax.js

+37
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
'use strict';
2+
3+
// This benchmarks the cost of running `containsModuleSyntax` on a CommonJS module being imported.
4+
// We use the TypeScript fixture because it's a very large CommonJS file with no ESM syntax: the worst case.
5+
const common = require('../common.js');
6+
const tmpdir = require('../../test/common/tmpdir.js');
7+
const fixtures = require('../../test/common/fixtures.js');
8+
const scriptPath = fixtures.path('snapshot', 'typescript.js');
9+
const fs = require('node:fs');
10+
11+
const bench = common.createBenchmark(main, {
12+
type: ['with-module-syntax-detection', 'without-module-syntax-detection'],
13+
n: [1e4],
14+
}, {
15+
flags: ['--experimental-detect-module'],
16+
});
17+
18+
const benchmarkDirectory = tmpdir.fileURL('benchmark-detect-esm-syntax');
19+
const ambiguousURL = new URL('./typescript.js', benchmarkDirectory);
20+
const explicitURL = new URL('./typescript.cjs', benchmarkDirectory);
21+
22+
async function main({ n, type }) {
23+
tmpdir.refresh();
24+
25+
fs.mkdirSync(benchmarkDirectory, { recursive: true });
26+
fs.cpSync(scriptPath, ambiguousURL);
27+
fs.cpSync(scriptPath, explicitURL);
28+
29+
bench.start();
30+
31+
for (let i = 0; i < n; i++) {
32+
const url = type === 'with-module-syntax-detection' ? ambiguousURL : explicitURL;
33+
await import(url);
34+
}
35+
36+
bench.end(n);
37+
}

doc/api/cli.md

+27
Original file line numberDiff line numberDiff line change
@@ -611,6 +611,32 @@ files with no extension will be treated as WebAssembly if they begin with the
611611
WebAssembly magic number (`\0asm`); otherwise they will be treated as ES module
612612
JavaScript.
613613

614+
### `--experimental-detect-module`
615+
616+
<!-- YAML
617+
added:
618+
- REPLACEME
619+
-->
620+
621+
> Stability: 1.0 - Early development
622+
623+
Node.js will inspect the source code of ambiguous input to determine whether it
624+
contains ES module syntax; if such syntax is detected, the input will be treated
625+
as an ES module.
626+
627+
Ambiguous input is defined as:
628+
629+
* Files with a `.js` extension or no extension; and either no controlling
630+
`package.json` file or one that lacks a `type` field; and
631+
`--experimental-default-type` is not specified.
632+
* String input (`--eval` or STDIN) when neither `--input-type` nor
633+
`--experimental-default-type` are specified.
634+
635+
ES module syntax is defined as syntax that would throw when evaluated as
636+
CommonJS. This includes `import` and `export` statements and `import.meta`
637+
references. It does _not_ include `import()` expressions, which are valid in
638+
CommonJS.
639+
614640
### `--experimental-import-meta-resolve`
615641

616642
<!-- YAML
@@ -2258,6 +2284,7 @@ Node.js options that are allowed are:
22582284
* `--enable-source-maps`
22592285
* `--experimental-abortcontroller`
22602286
* `--experimental-default-type`
2287+
* `--experimental-detect-module`
22612288
* `--experimental-import-meta-resolve`
22622289
* `--experimental-json-modules`
22632290
* `--experimental-loader`

doc/api/esm.md

+42-17
Original file line numberDiff line numberDiff line change
@@ -109,11 +109,21 @@ provides interoperability between them and its original module format,
109109

110110
Node.js has two module systems: [CommonJS][] modules and ECMAScript modules.
111111

112-
Authors can tell Node.js to use the ECMAScript modules loader via the `.mjs`
113-
file extension, the `package.json` [`"type"`][] field, the [`--input-type`][]
114-
flag, or the [`--experimental-default-type`][] flag. Outside of those cases,
115-
Node.js will use the CommonJS module loader. See [Determining module system][]
116-
for more details.
112+
Authors can tell Node.js to interpret JavaScript as an ES module via the `.mjs`
113+
file extension, the `package.json` [`"type"`][] field with a value `"module"`,
114+
the [`--input-type`][] flag with a value of `"module"`, or the
115+
[`--experimental-default-type`][] flag with a value of `"module"`. These are
116+
explicit markers of code being intended to run as an ES module.
117+
118+
Inversely, authors can tell Node.js to interpret JavaScript as CommonJS via the
119+
`.cjs` file extension, the `package.json` [`"type"`][] field with a value
120+
`"commonjs"`, the [`--input-type`][] flag with a value of `"commonjs"`, or the
121+
[`--experimental-default-type`][] flag with a value of `"commonjs"`.
122+
123+
When code lacks explicit markers for either module system, Node.js will inspect
124+
the source code of a module to look for ES module syntax. If such syntax is
125+
found, Node.js will run the code as an ES module; otherwise it will run the
126+
module as CommonJS. See [Determining module system][] for more details.
117127

118128
<!-- Anchors to make sure old links find a target -->
119129

@@ -1019,18 +1029,33 @@ _isImports_, _conditions_)
10191029
> 1. Return _"commonjs"_.
10201030
> 4. If _url_ ends in _".json"_, then
10211031
> 1. Return _"json"_.
1022-
> 5. Let _packageURL_ be the result of **LOOKUP\_PACKAGE\_SCOPE**(_url_).
1023-
> 6. Let _pjson_ be the result of **READ\_PACKAGE\_JSON**(_packageURL_).
1024-
> 7. If _pjson?.type_ exists and is _"module"_, then
1025-
> 1. If _url_ ends in _".js"_ or has no file extension, then
1026-
> 1. If `--experimental-wasm-modules` is enabled and the file at _url_
1027-
> contains the header for a WebAssembly module, then
1028-
> 1. Return _"wasm"_.
1029-
> 2. Otherwise,
1030-
> 1. Return _"module"_.
1031-
> 2. Return **undefined**.
1032-
> 8. Otherwise,
1033-
> 1. Return **undefined**.
1032+
> 5. If `--experimental-wasm-modules` is enabled and _url_ ends in
1033+
> _".wasm"_, then
1034+
> 1. Return _"wasm"_.
1035+
> 6. Let _packageURL_ be the result of **LOOKUP\_PACKAGE\_SCOPE**(_url_).
1036+
> 7. Let _pjson_ be the result of **READ\_PACKAGE\_JSON**(_packageURL_).
1037+
> 8. Let _packageType_ be **null**.
1038+
> 9. If _pjson?.type_ is _"module"_ or _"commonjs"_, then
1039+
> 1. Set _packageType_ to _pjson.type_.
1040+
> 10. If _url_ ends in _".js"_, then
1041+
> 1. If _packageType_ is not **null**, then
1042+
> 1. Return _packageType_.
1043+
> 2. If `--experimental-detect-module` is enabled and the source of
1044+
> module contains static import or export syntax, then
1045+
> 1. Return _"module"_.
1046+
> 3. Return _"commonjs"_.
1047+
> 11. If _url_ does not have any extension, then
1048+
> 1. If _packageType_ is _"module"_ and `--experimental-wasm-modules` is
1049+
> enabled and the file at _url_ contains the header for a WebAssembly
1050+
> module, then
1051+
> 1. Return _"wasm"_.
1052+
> 2. If _packageType_ is not **null**, then
1053+
> 1. Return _packageType_.
1054+
> 3. If `--experimental-detect-module` is enabled and the source of
1055+
> module contains static import or export syntax, then
1056+
> 1. Return _"module"_.
1057+
> 4. Return _"commonjs"_.
1058+
> 12. Return **undefined** (will throw during load phase).
10341059
10351060
**LOOKUP\_PACKAGE\_SCOPE**(_url_)
10361061

doc/api/modules.md

+4-2
Original file line numberDiff line numberDiff line change
@@ -80,8 +80,10 @@ By default, Node.js will treat the following as CommonJS modules:
8080
* Files with a `.js` extension when the nearest parent `package.json` file
8181
contains a top-level field [`"type"`][] with a value of `"commonjs"`.
8282

83-
* Files with a `.js` extension when the nearest parent `package.json` file
84-
doesn't contain a top-level field [`"type"`][]. Package authors should include
83+
* Files with a `.js` extension or without an extension, when the nearest parent
84+
`package.json` file doesn't contain a top-level field [`"type"`][] or there is
85+
no `package.json` in any parent folder; unless the file contains syntax that
86+
errors unless it is evaluated as an ES module. Package authors should include
8587
the [`"type"`][] field, even in packages where all sources are CommonJS. Being
8688
explicit about the `type` of the package will make things easier for build
8789
tools and loaders to determine how the files in the package should be

doc/api/packages.md

+8
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,14 @@ expressions:
6969
* Strings passed in as an argument to `--eval`, or piped to `node` via `STDIN`,
7070
with the flag `--input-type=module`.
7171

72+
* Code that contains syntax that only parses successfully as [ES modules][],
73+
such as `import` or `export` statements or `import.meta`, when the code has no
74+
explicit marker of how it should be interpreted. Explicit markers are `.mjs`
75+
or `.cjs` extensions, `package.json` `"type"` fields with either `"module"` or
76+
`"commonjs"` values, or `--input-type` or `--experimental-default-type` flags.
77+
Dynamic `import()` expressions are supported in either CommonJS or ES modules
78+
and would not cause a file to be treated as an ES module.
79+
7280
Node.js will treat the following as [CommonJS][] when passed to `node` as the
7381
initial input, or when referenced by `import` statements or `import()`
7482
expressions:

lib/internal/modules/esm/get_format.js

+43-21
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,7 @@ const {
1818

1919
const experimentalNetworkImports =
2020
getOptionValue('--experimental-network-imports');
21-
const defaultTypeFlag = getOptionValue('--experimental-default-type');
22-
// The next line is where we flip the default to ES modules someday.
23-
const defaultType = defaultTypeFlag === 'module' ? 'module' : 'commonjs';
21+
const { containsModuleSyntax } = internalBinding('contextify');
2422
const { getPackageType } = require('internal/modules/esm/resolve');
2523
const { fileURLToPath } = require('internal/url');
2624
const { ERR_UNKNOWN_FILE_EXTENSION } = require('internal/errors').codes;
@@ -85,42 +83,66 @@ function underNodeModules(url) {
8583

8684
/**
8785
* @param {URL} url
88-
* @param {{parentURL: string}} context
86+
* @param {{parentURL: string; source?: Buffer}} context
8987
* @param {boolean} ignoreErrors
9088
* @returns {string}
9189
*/
92-
function getFileProtocolModuleFormat(url, context, ignoreErrors) {
90+
function getFileProtocolModuleFormat(url, context = { __proto__: null }, ignoreErrors) {
91+
const { source } = context;
9392
const ext = extname(url);
9493

9594
if (ext === '.js') {
9695
const packageType = getPackageType(url);
9796
if (packageType !== 'none') {
9897
return packageType;
9998
}
99+
100100
// The controlling `package.json` file has no `type` field.
101-
if (defaultType === 'module') {
102-
// An exception to the type flag making ESM the default everywhere is that package scopes under `node_modules`
103-
// should retain the assumption that a lack of a `type` field means CommonJS.
104-
return underNodeModules(url) ? 'commonjs' : 'module';
101+
switch (getOptionValue('--experimental-default-type')) {
102+
case 'module': { // The user explicitly passed `--experimental-default-type=module`.
103+
// An exception to the type flag making ESM the default everywhere is that package scopes under `node_modules`
104+
// should retain the assumption that a lack of a `type` field means CommonJS.
105+
return underNodeModules(url) ? 'commonjs' : 'module';
106+
}
107+
case 'commonjs': { // The user explicitly passed `--experimental-default-type=commonjs`.
108+
return 'commonjs';
109+
}
110+
default: { // The user did not pass `--experimental-default-type`.
111+
// `source` is undefined when this is called from `defaultResolve`;
112+
// but this gets called again from `defaultLoad`/`defaultLoadSync`.
113+
if (source && getOptionValue('--experimental-detect-module')) {
114+
return containsModuleSyntax(`${source}`, fileURLToPath(url)) ? 'module' : 'commonjs';
115+
}
116+
return 'commonjs';
117+
}
105118
}
106-
return 'commonjs';
107119
}
108120

109121
if (ext === '') {
110122
const packageType = getPackageType(url);
111-
if (defaultType === 'commonjs') { // Legacy behavior
112-
if (packageType === 'none' || packageType === 'commonjs') {
113-
return 'commonjs';
114-
} // Else packageType === 'module'
123+
if (packageType === 'module') {
115124
return getFormatOfExtensionlessFile(url);
116-
} // Else defaultType === 'module'
117-
if (underNodeModules(url)) { // Exception for package scopes under `node_modules`
118-
return packageType === 'module' ? getFormatOfExtensionlessFile(url) : 'commonjs';
119125
}
120-
if (packageType === 'none' || packageType === 'module') {
121-
return getFormatOfExtensionlessFile(url);
122-
} // Else packageType === 'commonjs'
123-
return 'commonjs';
126+
if (packageType !== 'none') {
127+
return packageType; // 'commonjs' or future package types
128+
}
129+
130+
// The controlling `package.json` file has no `type` field.
131+
switch (getOptionValue('--experimental-default-type')) {
132+
case 'module': { // The user explicitly passed `--experimental-default-type=module`.
133+
return underNodeModules(url) ? 'commonjs' : getFormatOfExtensionlessFile(url);
134+
}
135+
case 'commonjs': { // The user explicitly passed `--experimental-default-type=commonjs`.
136+
return 'commonjs';
137+
}
138+
default: { // The user did not pass `--experimental-default-type`.
139+
if (source && getOptionValue('--experimental-detect-module') &&
140+
getFormatOfExtensionlessFile(url) === 'module') {
141+
return containsModuleSyntax(`${source}`, fileURLToPath(url)) ? 'module' : 'commonjs';
142+
}
143+
return 'commonjs';
144+
}
145+
}
124146
}
125147

126148
const format = extensionFormatMap[ext];

lib/internal/modules/esm/load.js

+21-15
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ const DATA_URL_PATTERN = /^[^/]+\/[^,;]+(?:[^,]*?)(;base64)?,([\s\S]*)$/;
3333
/**
3434
* @param {URL} url URL to the module
3535
* @param {ESModuleContext} context used to decorate error messages
36-
* @returns {{ responseURL: string, source: string | BufferView }}
36+
* @returns {Promise<{ responseURL: string, source: string | BufferView }>}
3737
*/
3838
async function getSource(url, context) {
3939
const { protocol, href } = url;
@@ -105,7 +105,7 @@ function getSourceSync(url, context) {
105105
* @param {LoadContext} context
106106
* @returns {LoadReturn}
107107
*/
108-
async function defaultLoad(url, context = kEmptyObject) {
108+
async function defaultLoad(url, context = { __proto__: null }) {
109109
let responseURL = url;
110110
let {
111111
importAttributes,
@@ -127,19 +127,24 @@ async function defaultLoad(url, context = kEmptyObject) {
127127

128128
throwIfUnsupportedURLScheme(urlInstance, experimentalNetworkImports);
129129

130-
format ??= await defaultGetFormat(urlInstance, context);
131-
132-
validateAttributes(url, format, importAttributes);
133-
134-
if (
135-
format === 'builtin' ||
136-
format === 'commonjs'
137-
) {
130+
if (urlInstance.protocol === 'node:') {
138131
source = null;
139132
} else if (source == null) {
140133
({ responseURL, source } = await getSource(urlInstance, context));
134+
context.source = source;
135+
}
136+
137+
if (format == null || format === 'commonjs') {
138+
// Now that we have the source for the module, run `defaultGetFormat` again in case we detect ESM syntax.
139+
format = await defaultGetFormat(urlInstance, context);
140+
}
141+
142+
if (format === 'commonjs') {
143+
source = null; // Let the CommonJS loader handle it (for now)
141144
}
142145

146+
validateAttributes(url, format, importAttributes);
147+
143148
return {
144149
__proto__: null,
145150
format,
@@ -178,16 +183,17 @@ function defaultLoadSync(url, context = kEmptyObject) {
178183

179184
throwIfUnsupportedURLScheme(urlInstance, false);
180185

181-
format ??= defaultGetFormat(urlInstance, context);
182-
183-
validateAttributes(url, format, importAttributes);
184-
185-
if (format === 'builtin') {
186+
if (urlInstance.protocol === 'node:') {
186187
source = null;
187188
} else if (source == null) {
188189
({ responseURL, source } = getSourceSync(urlInstance, context));
190+
context.source = source;
189191
}
190192

193+
format ??= defaultGetFormat(urlInstance, context);
194+
195+
validateAttributes(url, format, importAttributes);
196+
191197
return {
192198
__proto__: null,
193199
format,

lib/internal/modules/run_main.js

+14-1
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ const {
44
StringPrototypeEndsWith,
55
} = primordials;
66

7+
const { containsModuleSyntax } = internalBinding('contextify');
78
const { getOptionValue } = require('internal/options');
89
const path = require('path');
910

@@ -70,7 +71,19 @@ function shouldUseESMLoader(mainPath) {
7071
const { readPackageScope } = require('internal/modules/package_json_reader');
7172
const pkg = readPackageScope(mainPath);
7273
// No need to guard `pkg` as it can only be an object or `false`.
73-
return pkg.data?.type === 'module' || getOptionValue('--experimental-default-type') === 'module';
74+
switch (pkg.data?.type) {
75+
case 'module':
76+
return true;
77+
case 'commonjs':
78+
return false;
79+
default: { // No package.json or no `type` field.
80+
if (getOptionValue('--experimental-detect-module')) {
81+
// If the first argument of `containsModuleSyntax` is undefined, it will read `mainPath` from the file system.
82+
return containsModuleSyntax(undefined, mainPath);
83+
}
84+
return false;
85+
}
86+
}
7487
}
7588

7689
/**

0 commit comments

Comments
 (0)