From b478d275d0f3987a2b47008f06b41f4e53121feb Mon Sep 17 00:00:00 2001
From: Dan Gohman <sunfish@mozilla.com>
Date: Tue, 10 Mar 2020 09:45:53 -0700
Subject: [PATCH 1/2] Lazy-initialize the environment variables.

This is the first in a series of PRs to make it easier to use WASI libc
in Wasm modules that don't have a `main` function. By initializing the
environment on demand, we avoid depending on having `__wasm_call_ctors`
run.

This uses weak symbols strategically to ensure that if `environ` is
used, it is initialized eagerly, but if only `getenv` and friends
are used, the environment is initialized lazily.

Eventually, I expect we'll have a convention for wasm modules without
main functions which will allow the `__wasm_call_ctors` function to be
called automatically, but this helps in simple cases for now.

Fixes #180.
---
 expected/wasm32-wasi/defined-symbols.txt      |  5 ++-
 expected/wasm32-wasi/include-all.c            |  1 +
 expected/wasm32-wasi/predefined-macros.txt    |  1 +
 .../headers/public/wasi/libc-environ.h        | 19 +++++++++
 libc-bottom-half/libpreopen/libpreopen.c      |  2 +-
 ...iron.c => __wasilibc_initialize_environ.c} | 40 ++++++++++++-------
 libc-bottom-half/sources/environ.c            | 26 ++++++++++++
 .../private/wasi/libc-environ-compat.h        | 12 ++++++
 libc-top-half/musl/src/env/clearenv.c         |  4 ++
 libc-top-half/musl/src/env/getenv.c           |  4 ++
 libc-top-half/musl/src/env/putenv.c           |  4 ++
 libc-top-half/musl/src/env/unsetenv.c         |  4 ++
 libc-top-half/musl/src/include/unistd.h       |  7 ++++
 13 files changed, 112 insertions(+), 17 deletions(-)
 create mode 100644 libc-bottom-half/headers/public/wasi/libc-environ.h
 rename libc-bottom-half/sources/{__environ.c => __wasilibc_initialize_environ.c} (54%)
 create mode 100644 libc-bottom-half/sources/environ.c
 create mode 100644 libc-top-half/headers/private/wasi/libc-environ-compat.h

diff --git a/expected/wasm32-wasi/defined-symbols.txt b/expected/wasm32-wasi/defined-symbols.txt
index 761f98031..dc20ddf72 100644
--- a/expected/wasm32-wasi/defined-symbols.txt
+++ b/expected/wasm32-wasi/defined-symbols.txt
@@ -39,7 +39,6 @@ __env_rm_add
 __env_rm_add
 __env_rm_add
 __env_rm_add
-__environ
 __exp2f_data
 __exp_data
 __expo2
@@ -252,8 +251,12 @@ __uflow
 __unlist_locked_file
 __uselocale
 __utc
+__wasilibc_ensure_environ
+__wasilibc_environ
+__wasilibc_environ
 __wasilibc_fd_renumber
 __wasilibc_find_relpath
+__wasilibc_initialize_environ
 __wasilibc_open_nomode
 __wasilibc_openat_nomode
 __wasilibc_register_preopened_fd
diff --git a/expected/wasm32-wasi/include-all.c b/expected/wasm32-wasi/include-all.c
index e1d62316e..71665bc96 100644
--- a/expected/wasm32-wasi/include-all.c
+++ b/expected/wasm32-wasi/include-all.c
@@ -166,6 +166,7 @@
 #include <unistd.h>
 #include <values.h>
 #include <wasi/api.h>
+#include <wasi/libc-environ.h>
 #include <wasi/libc-find-relpath.h>
 #include <wasi/libc.h>
 #include <wchar.h>
diff --git a/expected/wasm32-wasi/predefined-macros.txt b/expected/wasm32-wasi/predefined-macros.txt
index 9510bf2d3..ac4cd715c 100644
--- a/expected/wasm32-wasi/predefined-macros.txt
+++ b/expected/wasm32-wasi/predefined-macros.txt
@@ -3098,6 +3098,7 @@
 #define __va_copy(d,s) __builtin_va_copy(d,s)
 #define __wasi__ 1
 #define __wasi_api_h 
+#define __wasi_libc_environ_h 
 #define __wasi_libc_find_relpath_h 
 #define __wasi_libc_h 
 #define __wasilibc___errno_values_h 
diff --git a/libc-bottom-half/headers/public/wasi/libc-environ.h b/libc-bottom-half/headers/public/wasi/libc-environ.h
new file mode 100644
index 000000000..b404adda1
--- /dev/null
+++ b/libc-bottom-half/headers/public/wasi/libc-environ.h
@@ -0,0 +1,19 @@
+#ifndef __wasi_libc_environ_h
+#define __wasi_libc_environ_h
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/// Initialize the global environment variable state. Only needs to be
+/// called once; most users should call `__wasilibc_ensure_environ` instead.
+void __wasilibc_initialize_environ(void);
+
+/// If `__wasilibc_initialize_environ` has not yet been called, call it.
+void __wasilibc_ensure_environ(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/libc-bottom-half/libpreopen/libpreopen.c b/libc-bottom-half/libpreopen/libpreopen.c
index f8eca3ba2..fd1d6f960 100644
--- a/libc-bottom-half/libpreopen/libpreopen.c
+++ b/libc-bottom-half/libpreopen/libpreopen.c
@@ -524,7 +524,7 @@ __wasilibc_find_relpath(
 /// This is referenced by weak reference from crt1.c and lives in the same source
 /// file as `__wasilibc_find_relpath` so that it's linked in when it's needed.
 // Concerning the 51 -- see the comment by the constructor priority in
-// libc-bottom-half/sources/__environ.c.
+// libc-bottom-half/sources/__wasilibc_environ.c.
 __attribute__((constructor(51)))
 static void
 __wasilibc_populate_libpreopen(void)
diff --git a/libc-bottom-half/sources/__environ.c b/libc-bottom-half/sources/__wasilibc_initialize_environ.c
similarity index 54%
rename from libc-bottom-half/sources/__environ.c
rename to libc-bottom-half/sources/__wasilibc_initialize_environ.c
index 8ced8139a..fe6001a19 100644
--- a/libc-bottom-half/sources/__environ.c
+++ b/libc-bottom-half/sources/__wasilibc_initialize_environ.c
@@ -3,29 +3,38 @@
 #include <sysexits.h>
 #include <wasi/api.h>
 #include <wasi/libc.h>
+#include <wasi/libc-environ.h>
 
-static char *empty_environ[1] = { NULL };
-char **__environ = empty_environ;
-extern __typeof(__environ) _environ __attribute__((weak, alias("__environ")));
-extern __typeof(__environ) environ __attribute__((weak, alias("__environ")));
+/// If the program doesn't use `environ`, it'll get this version of
+/// `__wasilibc_environ`, which isn't initialized with a constructor function.
+/// `getenv` etc. call `__wasilibc_ensure_environ()` before accessing it.
+/// Statically-initialize it to an invalid pointer value so that we can
+/// detect if it's been explicitly initialized (we can't use `NULL` because
+/// `clearenv` sets it to NULL.
+char **__wasilibc_environ __attribute__((weak)) = (char **)-1;
 
-// We define this function here in the same source file as __environ, so that
-// this function is called in iff environment variable support is used.
-// Concerning the 50 -- levels up to 100 are reserved for the implementation,
-// so we an arbitrary number in the middle of the range to allow other
-// reserved things to go before or after.
-__attribute__((constructor(50)))
-static void __wasilibc_populate_environ(void) {
-    __wasi_errno_t err;
+// See the comments in libc-environ.h.
+void __wasilibc_ensure_environ(void) {
+    if (__wasilibc_environ == (char **)-1) {
+        __wasilibc_initialize_environ();
+    }
+}
+
+/// Avoid dynamic allocation for the case where there are no environment
+/// variables, but we still need a non-NULL pointer to an (empty) array.
+static char *empty_environ[1] = { NULL };
 
+// See the comments in libc-environ.h.
+void __wasilibc_initialize_environ(void) {
     // Get the sizes of the arrays we'll have to create to copy in the environment.
     size_t environ_count;
     size_t environ_buf_size;
-    err = __wasi_environ_sizes_get(&environ_count, &environ_buf_size);
+    __wasi_errno_t err = __wasi_environ_sizes_get(&environ_count, &environ_buf_size);
     if (err != __WASI_ERRNO_SUCCESS) {
         goto oserr;
     }
     if (environ_count == 0) {
+        __wasilibc_environ = empty_environ;
         return;
     }
 
@@ -49,7 +58,8 @@ static void __wasilibc_populate_environ(void) {
         goto software;
     }
 
-    // Fill the environment chars, and the __environ array with pointers into those chars.
+    // Fill the environment chars, and the `__wasilibc_environ` array with
+    // pointers into those chars.
     // TODO: Remove the casts on `environ_ptrs` and `environ_buf` once the witx is updated with char8 support.
     err = __wasi_environ_get((uint8_t **)environ_ptrs, (uint8_t *)environ_buf);
     if (err != __WASI_ERRNO_SUCCESS) {
@@ -58,7 +68,7 @@ static void __wasilibc_populate_environ(void) {
         goto oserr;
     }
 
-    __environ = environ_ptrs;
+    __wasilibc_environ = environ_ptrs;
     return;
 oserr:
     _Exit(EX_OSERR);
diff --git a/libc-bottom-half/sources/environ.c b/libc-bottom-half/sources/environ.c
new file mode 100644
index 000000000..bc5a07872
--- /dev/null
+++ b/libc-bottom-half/sources/environ.c
@@ -0,0 +1,26 @@
+#include <unistd.h>
+#include <stdlib.h>
+#include <sysexits.h>
+#include <wasi/api.h>
+#include <wasi/libc.h>
+#include <wasi/libc-environ.h>
+
+// If the program does use `environ`, it'll get this version of
+// `__wasilibc_environ`, which is initialized with a constructor function, so
+// that it's initialized whenever user code might want to access it.
+char **__wasilibc_environ;
+extern __typeof(__wasilibc_environ) _environ
+    __attribute__((weak, alias("__wasilibc_environ")));
+extern __typeof(__wasilibc_environ) environ
+    __attribute__((weak, alias("__wasilibc_environ")));
+
+// We define this function here in the same source file as
+// `__wasilibc_environ`, so that this function is called in iff environment
+// variable support is used.
+// Concerning the 50 -- levels up to 100 are reserved for the implementation,
+// so we an arbitrary number in the middle of the range to allow other
+// reserved things to go before or after.
+__attribute__((constructor(50)))
+static void __wasilibc_initialize_environ_eagerly(void) {
+    __wasilibc_initialize_environ();
+}
diff --git a/libc-top-half/headers/private/wasi/libc-environ-compat.h b/libc-top-half/headers/private/wasi/libc-environ-compat.h
new file mode 100644
index 000000000..fa2747dec
--- /dev/null
+++ b/libc-top-half/headers/private/wasi/libc-environ-compat.h
@@ -0,0 +1,12 @@
+// This header file is meant to be included withinin the body of a function
+// which uses `__environ`. Code using `__environ` expects it will be initialized
+// eagerly. `__wasilibc_environ` is initialized lazily. Provide `__environ` as
+// an alias and arrange for the lazy initialization to be performed.
+
+extern char **__wasilibc_environ;
+
+__wasilibc_ensure_environ();
+
+#ifndef __wasilibc_environ
+#define __environ __wasilibc_environ
+#endif
diff --git a/libc-top-half/musl/src/env/clearenv.c b/libc-top-half/musl/src/env/clearenv.c
index db8e8e94b..996267b3a 100644
--- a/libc-top-half/musl/src/env/clearenv.c
+++ b/libc-top-half/musl/src/env/clearenv.c
@@ -7,6 +7,10 @@ weak_alias(dummy, __env_rm_add);
 
 int clearenv()
 {
+#ifdef __wasilibc_unmodified_upstream // Lazy environment variable init.
+#else
+#include "wasi/libc-environ-compat.h"
+#endif
 	char **e = __environ;
 	__environ = 0;
 	if (e) while (*e) __env_rm_add(*e++, 0);
diff --git a/libc-top-half/musl/src/env/getenv.c b/libc-top-half/musl/src/env/getenv.c
index a90d39cf7..d50bc35a5 100644
--- a/libc-top-half/musl/src/env/getenv.c
+++ b/libc-top-half/musl/src/env/getenv.c
@@ -4,6 +4,10 @@
 
 char *getenv(const char *name)
 {
+#ifdef __wasilibc_unmodified_upstream // Lazy environment variable init.
+#else
+#include "wasi/libc-environ-compat.h"
+#endif
 	size_t l = __strchrnul(name, '=') - name;
 	if (l && !name[l] && __environ)
 		for (char **e = __environ; *e; e++)
diff --git a/libc-top-half/musl/src/env/putenv.c b/libc-top-half/musl/src/env/putenv.c
index dce8c8288..08cfa01ac 100644
--- a/libc-top-half/musl/src/env/putenv.c
+++ b/libc-top-half/musl/src/env/putenv.c
@@ -7,6 +7,10 @@ weak_alias(dummy, __env_rm_add);
 
 int __putenv(char *s, size_t l, char *r)
 {
+#ifdef __wasilibc_unmodified_upstream // Lazy environment variable init.
+#else
+#include "wasi/libc-environ-compat.h"
+#endif
 	size_t i=0;
 	if (__environ) {
 		for (char **e = __environ; *e; e++, i++)
diff --git a/libc-top-half/musl/src/env/unsetenv.c b/libc-top-half/musl/src/env/unsetenv.c
index b14c4c929..0d3261b2d 100644
--- a/libc-top-half/musl/src/env/unsetenv.c
+++ b/libc-top-half/musl/src/env/unsetenv.c
@@ -13,6 +13,10 @@ int unsetenv(const char *name)
 		errno = EINVAL;
 		return -1;
 	}
+#ifdef __wasilibc_unmodified_upstream // Lazy environment variable init.
+#else
+#include "wasi/libc-environ-compat.h"
+#endif
 	if (__environ) {
 		char **e = __environ, **eo = e;
 		for (; *e; e++)
diff --git a/libc-top-half/musl/src/include/unistd.h b/libc-top-half/musl/src/include/unistd.h
index 1b4605c7c..f372afe2d 100644
--- a/libc-top-half/musl/src/include/unistd.h
+++ b/libc-top-half/musl/src/include/unistd.h
@@ -3,7 +3,14 @@
 
 #include "../../include/unistd.h"
 
+#ifdef __wasilibc_unmodified_upstream // Lazy environment variable init.
 extern char **__environ;
+#else
+// To support lazy initialization of environment variables, `__environ` is
+// omitted, and a lazy `__wasilibc_environ` is used instead. Use
+// "wasi/libc-environ-compat.h" in functions that use `__environ`.
+#include "wasi/libc-environ.h"
+#endif
 
 hidden int __dup3(int, int, int);
 hidden int __mkostemps(char *, int, int);

From c0087388fc7915b09b69b6b772ed4ea8e312864e Mon Sep 17 00:00:00 2001
From: Dan Gohman <sunfish@mozilla.com>
Date: Tue, 10 Mar 2020 17:02:01 -0700
Subject: [PATCH 2/2] Add comments explaining the libc-environ-compat.h header
 usage.

---
 libc-top-half/musl/src/env/clearenv.c | 3 +++
 libc-top-half/musl/src/env/getenv.c   | 3 +++
 libc-top-half/musl/src/env/putenv.c   | 3 +++
 libc-top-half/musl/src/env/unsetenv.c | 3 +++
 4 files changed, 12 insertions(+)

diff --git a/libc-top-half/musl/src/env/clearenv.c b/libc-top-half/musl/src/env/clearenv.c
index 996267b3a..0abbec302 100644
--- a/libc-top-half/musl/src/env/clearenv.c
+++ b/libc-top-half/musl/src/env/clearenv.c
@@ -9,6 +9,9 @@ int clearenv()
 {
 #ifdef __wasilibc_unmodified_upstream // Lazy environment variable init.
 #else
+// This specialized header is included within the function body to arranges for
+// the environment variables to be lazily initialized. It redefined `__environ`,
+// so don't remove or reorder it with respect to other code.
 #include "wasi/libc-environ-compat.h"
 #endif
 	char **e = __environ;
diff --git a/libc-top-half/musl/src/env/getenv.c b/libc-top-half/musl/src/env/getenv.c
index d50bc35a5..346c333f8 100644
--- a/libc-top-half/musl/src/env/getenv.c
+++ b/libc-top-half/musl/src/env/getenv.c
@@ -6,6 +6,9 @@ char *getenv(const char *name)
 {
 #ifdef __wasilibc_unmodified_upstream // Lazy environment variable init.
 #else
+// This specialized header is included within the function body to arranges for
+// the environment variables to be lazily initialized. It redefined `__environ`,
+// so don't remove or reorder it with respect to other code.
 #include "wasi/libc-environ-compat.h"
 #endif
 	size_t l = __strchrnul(name, '=') - name;
diff --git a/libc-top-half/musl/src/env/putenv.c b/libc-top-half/musl/src/env/putenv.c
index 08cfa01ac..0d5989541 100644
--- a/libc-top-half/musl/src/env/putenv.c
+++ b/libc-top-half/musl/src/env/putenv.c
@@ -9,6 +9,9 @@ int __putenv(char *s, size_t l, char *r)
 {
 #ifdef __wasilibc_unmodified_upstream // Lazy environment variable init.
 #else
+// This specialized header is included within the function body to arranges for
+// the environment variables to be lazily initialized. It redefined `__environ`,
+// so don't remove or reorder it with respect to other code.
 #include "wasi/libc-environ-compat.h"
 #endif
 	size_t i=0;
diff --git a/libc-top-half/musl/src/env/unsetenv.c b/libc-top-half/musl/src/env/unsetenv.c
index 0d3261b2d..40f0eea6d 100644
--- a/libc-top-half/musl/src/env/unsetenv.c
+++ b/libc-top-half/musl/src/env/unsetenv.c
@@ -15,6 +15,9 @@ int unsetenv(const char *name)
 	}
 #ifdef __wasilibc_unmodified_upstream // Lazy environment variable init.
 #else
+// This specialized header is included within the function body to arranges for
+// the environment variables to be lazily initialized. It redefined `__environ`,
+// so don't remove or reorder it with respect to other code.
 #include "wasi/libc-environ-compat.h"
 #endif
 	if (__environ) {