From bc1e621b6645c9ee74119a184c5e8fb7c647aa69 Mon Sep 17 00:00:00 2001 From: Cheng Zhao Date: Mon, 22 Jan 2024 13:45:55 +0900 Subject: [PATCH 1/2] src: preload function for Environment This PR adds a |preload| arg to the node::LoadEnvironment to allow embedders to set a preload function for the environment, which will run after the environment is loaded and before the main script runs. This is similiar to the --require CLI option, but runs a C++ function, and can only be set by embedders. The preload function can be used by embedders to inject scripts before running the main script, for example: 1. In Electron it is used to initialize the ASAR virtual filesystem, inject custom process properties, etc. 2. In VS Code it can be used to reset the module search paths for extensions. --- lib/internal/process/pre_execution.js | 7 +++++++ src/api/environment.cc | 18 +++++++++++------ src/env-inl.h | 8 ++++++++ src/env.h | 4 ++++ src/node.h | 25 ++++++++++++++++++++++-- src/node_options.cc | 6 ++++++ src/node_snapshotable.cc | 13 +++++++++++++ src/node_worker.cc | 7 ++++++- src/node_worker.h | 1 + test/cctest/test_environment.cc | 28 +++++++++++++++++++++++++++ 10 files changed, 108 insertions(+), 9 deletions(-) diff --git a/lib/internal/process/pre_execution.js b/lib/internal/process/pre_execution.js index d0cd911a82ac52..c4e39798f29dcc 100644 --- a/lib/internal/process/pre_execution.js +++ b/lib/internal/process/pre_execution.js @@ -197,6 +197,9 @@ function setupUserModules(forceDefaultLoader = false) { } = require('internal/modules/helpers'); assert(!hasStartedUserCJSExecution()); assert(!hasStartedUserESMExecution()); + if (getEmbedderOptions().hasEmbedderPreload) { + runEmbedderPreload(); + } // Do not enable preload modules if custom loaders are disabled. // For example, loader workers are responsible for doing this themselves. // And preload modules are not supported in ShadowRealm as well. @@ -725,6 +728,10 @@ function initializeFrozenIntrinsics() { } } +function runEmbedderPreload() { + internalBinding('mksnapshot').runEmbedderPreload(process, require); +} + function loadPreloadModules() { // For user code, we preload modules if `-r` is passed const preloadModules = getOptionValue('--require'); diff --git a/src/api/environment.cc b/src/api/environment.cc index 29826aa2d79586..95e76a8adafec9 100644 --- a/src/api/environment.cc +++ b/src/api/environment.cc @@ -538,25 +538,31 @@ NODE_EXTERN std::unique_ptr GetInspectorParentHandle( #endif } -MaybeLocal LoadEnvironment( - Environment* env, - StartExecutionCallback cb) { +MaybeLocal LoadEnvironment(Environment* env, + StartExecutionCallback cb, + EmbedderPreloadCallback preload) { env->InitializeLibuv(); env->InitializeDiagnostics(); + if (preload) { + env->set_embedder_preload(std::move(preload)); + } return StartExecution(env, cb); } MaybeLocal LoadEnvironment(Environment* env, - std::string_view main_script_source_utf8) { + std::string_view main_script_source_utf8, + EmbedderPreloadCallback preload) { CHECK_NOT_NULL(main_script_source_utf8.data()); return LoadEnvironment( - env, [&](const StartExecutionCallbackInfo& info) -> MaybeLocal { + env, + [&](const StartExecutionCallbackInfo& info) -> MaybeLocal { Local main_script = ToV8Value(env->context(), main_script_source_utf8).ToLocalChecked(); return info.run_cjs->Call( env->context(), Null(env->isolate()), 1, &main_script); - }); + }, + std::move(preload)); } Environment* GetCurrentEnvironment(Local context) { diff --git a/src/env-inl.h b/src/env-inl.h index 61ecc4b9975080..666dad97b021f4 100644 --- a/src/env-inl.h +++ b/src/env-inl.h @@ -430,6 +430,14 @@ inline builtins::BuiltinLoader* Environment::builtin_loader() { return &builtin_loader_; } +inline const EmbedderPreloadCallback& Environment::embedder_preload() const { + return embedder_preload_; +} + +inline void Environment::set_embedder_preload(EmbedderPreloadCallback fn) { + embedder_preload_ = std::move(fn); +} + inline double Environment::new_async_id() { async_hooks()->async_id_fields()[AsyncHooks::kAsyncIdCounter] += 1; return async_hooks()->async_id_fields()[AsyncHooks::kAsyncIdCounter]; diff --git a/src/env.h b/src/env.h index 72393326cb8c5e..0d56edbe6ad647 100644 --- a/src/env.h +++ b/src/env.h @@ -999,6 +999,9 @@ class Environment : public MemoryRetainer { #endif // HAVE_INSPECTOR + inline const EmbedderPreloadCallback& embedder_preload() const; + inline void set_embedder_preload(EmbedderPreloadCallback fn); + inline void set_process_exit_handler( std::function&& handler); @@ -1204,6 +1207,7 @@ class Environment : public MemoryRetainer { std::unique_ptr principal_realm_ = nullptr; builtins::BuiltinLoader builtin_loader_; + EmbedderPreloadCallback embedder_preload_; // Used by allocate_managed_buffer() and release_managed_buffer() to keep // track of the BackingStore for a given pointer. diff --git a/src/node.h b/src/node.h index bf3382f4c952ca..06a583a564cee1 100644 --- a/src/node.h +++ b/src/node.h @@ -731,12 +731,33 @@ struct StartExecutionCallbackInfo { using StartExecutionCallback = std::function(const StartExecutionCallbackInfo&)>; +using EmbedderPreloadCallback = + std::function process, + v8::Local require)>; +// Run initialization for the environment. +// +// The |preload| function will run before executing the entry point, which +// is usually used by embedders to inject scripts. +// The function is guaranteed to run before the user land module loader running +// any user code, so it is safe to assume that at this point, no user code has +// been run yet. +// The function will be executed with preload(process, require), and the passed +// require function has access to internal Node.js modules. There is no +// stability guarantee about the internals exposed to the internal require +// function. Expect breakages when updating Node.js versions if the embedder +// imports internal modules with the internal require function. +// Worker threads created in the environment will also respect The |preload| +// function, so make sure the function is thread-safe. NODE_EXTERN v8::MaybeLocal LoadEnvironment( Environment* env, - StartExecutionCallback cb); + StartExecutionCallback cb, + EmbedderPreloadCallback preload = nullptr); NODE_EXTERN v8::MaybeLocal LoadEnvironment( - Environment* env, std::string_view main_script_source_utf8); + Environment* env, + std::string_view main_script_source_utf8, + EmbedderPreloadCallback preload = nullptr); NODE_EXTERN void FreeEnvironment(Environment* env); // Set a callback that is called when process.exit() is called from JS, diff --git a/src/node_options.cc b/src/node_options.cc index 7b5152172c5ce7..f4bd5e16402928 100644 --- a/src/node_options.cc +++ b/src/node_options.cc @@ -1304,6 +1304,12 @@ void GetEmbedderOptions(const FunctionCallbackInfo& args) { .IsNothing()) return; + if (ret->Set(context, + FIXED_ONE_BYTE_STRING(env->isolate(), "hasEmbedderPreload"), + Boolean::New(isolate, env->embedder_preload() != nullptr)) + .IsNothing()) + return; + args.GetReturnValue().Set(ret); } diff --git a/src/node_snapshotable.cc b/src/node_snapshotable.cc index 7e44fef542eaf5..79d22c40ab08c3 100644 --- a/src/node_snapshotable.cc +++ b/src/node_snapshotable.cc @@ -1410,6 +1410,17 @@ void SerializeSnapshotableObjects(Realm* realm, }); } +void RunEmbedderPreload(const FunctionCallbackInfo& args) { + Environment* env = Environment::GetCurrent(args); + CHECK(env->embedder_preload()); + CHECK_EQ(args.Length(), 2); + Local process_obj = args[0]; + Local require_fn = args[1]; + CHECK(process_obj->IsObject()); + CHECK(require_fn->IsFunction()); + env->embedder_preload()(env, process_obj, require_fn); +} + void CompileSerializeMain(const FunctionCallbackInfo& args) { CHECK(args[0]->IsString()); Local filename = args[0].As(); @@ -1533,6 +1544,7 @@ void CreatePerContextProperties(Local target, void CreatePerIsolateProperties(IsolateData* isolate_data, Local target) { Isolate* isolate = isolate_data->isolate(); + SetMethod(isolate, target, "runEmbedderPreload", RunEmbedderPreload); SetMethod(isolate, target, "compileSerializeMain", CompileSerializeMain); SetMethod(isolate, target, "setSerializeCallback", SetSerializeCallback); SetMethod(isolate, target, "setDeserializeCallback", SetDeserializeCallback); @@ -1545,6 +1557,7 @@ void CreatePerIsolateProperties(IsolateData* isolate_data, } void RegisterExternalReferences(ExternalReferenceRegistry* registry) { + registry->Register(RunEmbedderPreload); registry->Register(CompileSerializeMain); registry->Register(SetSerializeCallback); registry->Register(SetDeserializeCallback); diff --git a/src/node_worker.cc b/src/node_worker.cc index 552fdc438a0895..d4424810214787 100644 --- a/src/node_worker.cc +++ b/src/node_worker.cc @@ -63,6 +63,7 @@ Worker::Worker(Environment* env, thread_id_(AllocateEnvironmentThreadId()), name_(name), env_vars_(env_vars), + embedder_preload_(env->embedder_preload()), snapshot_data_(snapshot_data) { Debug(this, "Creating new worker instance with thread id %llu", thread_id_.id); @@ -387,8 +388,12 @@ void Worker::Run() { } Debug(this, "Created message port for worker %llu", thread_id_.id); - if (LoadEnvironment(env_.get(), StartExecutionCallback{}).IsEmpty()) + if (LoadEnvironment(env_.get(), + StartExecutionCallback{}, + std::move(embedder_preload_)) + .IsEmpty()) { return; + } Debug(this, "Loaded environment for worker %llu", thread_id_.id); } diff --git a/src/node_worker.h b/src/node_worker.h index 531e2b5287010f..07fd7b460654e1 100644 --- a/src/node_worker.h +++ b/src/node_worker.h @@ -114,6 +114,7 @@ class Worker : public AsyncWrap { std::unique_ptr child_port_data_; std::shared_ptr env_vars_; + EmbedderPreloadCallback embedder_preload_; // A raw flag that is used by creator and worker threads to // sync up on pre-mature termination of worker - while in the diff --git a/test/cctest/test_environment.cc b/test/cctest/test_environment.cc index 9b812408154287..64e38c83006a00 100644 --- a/test/cctest/test_environment.cc +++ b/test/cctest/test_environment.cc @@ -778,3 +778,31 @@ TEST_F(EnvironmentTest, RequestInterruptAtExit) { context->Exit(); } + +TEST_F(EnvironmentTest, EmbedderPreload) { + v8::HandleScope handle_scope(isolate_); + v8::Local context = node::NewContext(isolate_); + v8::Context::Scope context_scope(context); + + node::EmbedderPreloadCallback preload = [](node::Environment* env, + v8::Local process, + v8::Local require) { + CHECK(process->IsObject()); + CHECK(require->IsFunction()); + process.As() + ->Set(env->context(), + v8::String::NewFromUtf8Literal(env->isolate(), "prop"), + v8::String::NewFromUtf8Literal(env->isolate(), "preload")) + .Check(); + }; + + std::unique_ptr env( + node::CreateEnvironment(isolate_data_, context, {}, {}), + node::FreeEnvironment); + + v8::Local main_ret = + node::LoadEnvironment(env.get(), "return process.prop;", preload) + .ToLocalChecked(); + node::Utf8Value main_ret_str(isolate_, main_ret); + EXPECT_EQ(std::string(*main_ret_str), "preload"); +} From 1a7ee83fe635b4896d8ec1d145026c6e05b025c6 Mon Sep 17 00:00:00 2001 From: Cheng Zhao Date: Mon, 4 Mar 2024 08:53:13 +0900 Subject: [PATCH 2/2] fixup! update comment Co-authored-by: Joyee Cheung --- src/node.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/node.h b/src/node.h index 06a583a564cee1..b041a20318145b 100644 --- a/src/node.h +++ b/src/node.h @@ -738,8 +738,8 @@ using EmbedderPreloadCallback = // Run initialization for the environment. // -// The |preload| function will run before executing the entry point, which -// is usually used by embedders to inject scripts. +// The |preload| function, usually used by embedders to inject scripts, +// will be run by Node.js before Node.js executes the entry point. // The function is guaranteed to run before the user land module loader running // any user code, so it is safe to assume that at this point, no user code has // been run yet.