From 7d1034acf2750519c3eac05cf6f71ef19f4d9ba0 Mon Sep 17 00:00:00 2001
From: Luke Wagner <mail@lukewagner.name>
Date: Thu, 10 Oct 2024 19:42:48 -0500
Subject: [PATCH 01/22] Add 'stream' and 'future' types

---
 design/mvp/Async.md                     | 152 +++-
 design/mvp/Binary.md                    |  17 +-
 design/mvp/CanonicalABI.md              | 863 +++++++++++++++++++---
 design/mvp/Explainer.md                 | 111 ++-
 design/mvp/canonical-abi/definitions.py | 468 ++++++++++--
 design/mvp/canonical-abi/run_tests.py   | 934 ++++++++++++++++++++++--
 6 files changed, 2313 insertions(+), 232 deletions(-)

diff --git a/design/mvp/Async.md b/design/mvp/Async.md
index 2a44f8c5..4b7e1b73 100644
--- a/design/mvp/Async.md
+++ b/design/mvp/Async.md
@@ -17,6 +17,7 @@ summary of the motivation and animated sketch of the design in action.
   * [Current task](#current-task)
   * [Subtask and Supertask](#subtask-and-supertask)
   * [Structured concurrency](#structured-concurrency)
+  * [Streams and Futures](#streams-and-futures)
   * [Waiting](#waiting)
   * [Backpressure](#backpressure)
   * [Returning](#returning)
@@ -106,8 +107,30 @@ Thus, backpressure combined with the partitioning of low-level state provided
 by the Component Model enables sync and async code to interoperate while
 preserving the expectations of both.
 
-[TODO](#todo): `future` and `stream` types that can be used in function
-signatures will be added next.
+In addition to being able to define and call whole functions asynchronously,
+the `stream` and `future` types can be used in function signatures to pass
+parameters and results incrementally over time, achieving finer-grained
+concurrency. Streams and futures are thus not defined to be free-standing
+resources with their own internal memory buffers (like a traditional channel or
+pipe) but, rather, more-primitive control-flow mechanisms that synchronize the
+incremental passing of parameters and results during cross-component calls.
+Higher-level resources like channels and pipes can then be defined in terms
+of these lower-level `stream` and `future` primitives, e.g.:
+```wit
+resource pipe {
+  constructor(buffer-size: u32);
+  write: func(bytes: stream<u8>) -> result;
+  read: func() -> stream<u8>;
+}
+```
+but also many other domain-specific concurrent resources like WASI HTTP request
+and response bodies or WASI blobs. Streams and futures are however high-level
+enough to be bound automatically to many source languages' built-in concurrency
+features like futures, promises, streams, generators and iterators, unlike
+lower-level concurrency primitives (like callbacks or `wasi:io@0.2.0`
+`pollable`s). Thus, the Component Model seeks to provide the lowest-level
+fine-grained concurrency primitives that are high-level and idiomatic enough to
+enable automatic generation of usable language-integrated bindings.
 
 
 ## Concepts
@@ -180,18 +203,80 @@ invocation of an export by the host. Moreover, at any one point in time, the
 set of tasks active in a linked component graph form a forest of async call
 trees which e.g., can be visualized using a traditional flamegraph.
 
-The Canonical ABI's Python code enforces Structured Concurrency by maintaining
-a simple per-[`Task`] `num_async_subtasks` counter that traps if not zero when
-the `Task` finishes.
+The Canonical ABI's Python code enforces Structured Concurrency by incrementing
+a per-[`Task`] counter when a `Subtask` is created, decrementing when a
+`Subtask` is destroyed, and trapping if the counter is not zero when the `Task`
+attempts to exit.
+
+### Streams and Futures
+
+Streams and Futures have two "ends": a *readable end* and *writable end*. When
+*consuming* a `stream` or `future` value as a parameter (of an export call
+with a `stream` or `future` somewhere in the parameter types) or result (of an
+import call with a `stream` or `future` somewhere in the result type), the
+receiver always gets *unique ownership* of the *readable end* of the `stream`
+or `future`. When *producing* a `stream` or `future` value as a parameter (of
+an import call) or result (of an export call), the producer can either
+*transfer ownership* of a readable end it has already received or it can
+create a fresh writable end (via `stream.new` or `future.new`) and lift this
+writable end (maintaining ownership of the writable end, but creating a fresh
+readable end for the receiver). To maintain the invariant that readable ends
+are unique, a writable end can be lifted at most once, trapping otherwise.
+
+Based on this, `stream<T>` and `future<T>` values can be passed between
+functions as if they were synchronous `list<T>` and `T` values, resp. For
+example, given `f` and `g` with types:
+```wit
+f: func(x: whatever) -> stream<T>;
+g: func(s: stream<T>) -> stuff;
+```
+`g(f(x))` works as you might hope, concurrently streaming `x` into `f` which
+concurrently streams its results into `g`. (The addition of [`error`](#TODO)
+will provide a generic answer to the question of what happens if `f`
+experiences an error: `f` can close its returned writable stream end with an
+`error` that will be propagated into `g` which should then propagate the error
+somehow into `stuff`.)
+
+If a component instance *would* receive the readable end of a stream for which
+it already owns the writable end, the readable end disappears and the existing
+writable end is received instead (since the guest can now handle the whole
+stream more efficiently wholly from within guest code). E.g., if the same
+component instance defined `f` and `g` above, the composition `g(f(x))` would
+just instruct the guest to stream directly from `f` into `g` without crossing a
+component boundary or performing any extra copies. Thus, strengthening the
+previously-mentioned invariant, the readable and writable ends of a stream are
+unique *and never in the same component*.
+
+Given the readable or writable end of a stream, core wasm code can call the
+imported `stream.read` or `stream.write` canonical built-ins, passing the
+pointer and length of a linear-memory buffer to write-into or read-from, resp.
+These built-ins can either return immediately if >0 elements were able to be
+written or read immediately (without blocking) or return a sentinel "blocked"
+value indicating that the read or write will execute concurrently. The
+readable and writable ends of streams and futures each have a well-defined
+parent `Task` that will receive "progress" events on all child streams/futures
+that have previously blocked.
+
+From a [structured-concurrency](#structured-concurrency) perspective, the
+readable and writable ends of streams and futures are leaves of the async call
+tree. Unlike subtasks, the parent of the readable ends of streams and future
+*can* change over time (when transferred via function call, as mentioned
+above). However, there is always *some* parent `Task` and this parent `Task`
+is prevented from orphaning its children using the same reference-counting
+guard mentioned above for subtasks.
 
 ### Waiting
 
 When a component asynchronously lowers an import, it is explicitly requesting
 that, if the import blocks, control flow be returned back to the calling task
-so that it can do something else. Eventually though a task may run out of other
+so that it can do something else. Similarly, if `stream.read` or `stream.write`
+would block, they return a "blocked" code so that the caller can continue to
+make progress on other things. But eventually, a task will run out of other
 things to do and will need to **wait** for progress on one of the task's
-subtasks. While a task is waiting, the runtime can switch to other running
-tasks or start new tasks by invoking exports.
+subtasks, readable stream ends, writable stream ends, readable future ends or
+writable future ends, which are collectively called its **waitables**. While a
+task is waiting on its waitables, the Component Model runtime can switch to
+other running tasks or start new tasks by invoking exports.
 
 The Canonical ABI provides two ways for a task to wait:
 * The task can call the [`task.wait`] built-in to synchronously wait for
@@ -234,13 +319,23 @@ the "started" state.
 
 ### Returning
 
-The way an async Core WebAssembly function returns its value is by calling
-[`task.return`], passing the core values that are to be lifted.
-
-The main reason to have `task.return` is so that a task can continue execution
-after returning its value. This is useful for various finalization tasks (such
-as logging, billing or metrics) that don't need to be on the critical path of
-returning a value to the caller.
+The way an async function returns its value is by calling [`task.return`],
+passing the core values that are to be lifted as *parameters*. Additionally,
+when the `always-task-return` `canonopt` is set, synchronous functions also
+return their values by calling `task.return` (as a more expressive and
+general alternative to `post-return`).
+
+Returning values by calling `task.return` allows a task to continue executing
+even after it has passed its initial results to the caller. This can be useful
+for various finalization tasks (freeing memory or performing logging, billing
+or metrics operations) that don't need to be on the critical path of returning
+a value to the caller, but the major use of executing code after `task.return`
+is to continue to read and write from streams and futures. For example, a
+stream transformer function of type `func(in: stream<T>) -> stream<U>` will
+immediately `task.return` a stream created via `stream.new` and then sit in a
+loop interleaving `stream.read`s (of the readable end passed for `in`) and
+`stream.write`s (of the writable end it `stream.new`ed) before exiting the
+task.
 
 A task may not call `task.return` unless it is in the "started" state. Once
 `task.return` is called, the task is in the "returned" state. A task can only
@@ -419,21 +514,24 @@ For now, this remains a [TODO](#todo) and validation will reject `async`-lifted
 
 ## TODO
 
-Native async support is being proposed in progressive chunks. The following
-features will be added in future chunks to complete "async" in Preview 3:
-* `future`/`stream`/`error`: add for use in function types for finer-grained
-  concurrency
-* `subtask.cancel`: allow a supertask to signal to a subtask that its result is
-  no longer wanted and to please wrap it up promptly
-* allow "tail-calling" a subtask so that the current wasm instance can be torn
-  down eagerly
-* `task.index`+`task.wake`: allow tasks in the same instance to wait on and
-  wake each other (async condvar-style)
+Native async support is being proposed incrementally. The following features
+will be added in future chunks roughly in the order list to complete the full
+"async" story:
+* add `error` type that can be included when closing a stream/future
 * `nonblocking` function type attribute: allow a function to declare in its
   type that it will not transitively do anything blocking
+* define what `async` means for `start` functions (top-level await + background
+  tasks), along with cross-task coordination built-ins
+* `subtask.cancel`: allow a supertask to signal to a subtask that its result is
+  no longer wanted and to please wrap it up promptly
+* zero-copy forwarding/splicing and built-in way to "tail-call" a subtask so
+  that the current wasm instance can be torn down eagerly while preserving
+  structured concurrency
+* some way to say "no more elements are coming for a while"
 * `recursive` function type attribute: allow a function to be reentered
-  recursively (instead of trapping)
-* enable `async` `start` functions
+  recursively (instead of trapping) and link inner and outer activations
+* allow pipelining multiple `stream.read`/`write` calls
+* allow chaining multiple async calls together ("promise pipelining")
 * integrate with `shared`: define how to lift and lower functions `async` *and*
   `shared`
 
diff --git a/design/mvp/Binary.md b/design/mvp/Binary.md
index cbac87fa..565272bc 100644
--- a/design/mvp/Binary.md
+++ b/design/mvp/Binary.md
@@ -202,6 +202,8 @@ defvaltype    ::= pvt:<primvaltype>                       => pvt
                 | 0x6a t?:<valtype>? u?:<valtype>?        => (result t? (error u)?)
                 | 0x69 i:<typeidx>                        => (own i)
                 | 0x68 i:<typeidx>                        => (borrow i)
+                | 0x66 i:<typeidx>                        => (stream i)
+                | 0x65 i:<typeidx>                        => (future i)
 labelvaltype  ::= l:<label'> t:<valtype>                  => l t
 case          ::= l:<label'> t?:<valtype>? 0x00           => (case l t?)
 label'        ::= len:<u32> l:<label>                     => l    (if len = |l|)
@@ -290,7 +292,19 @@ canon    ::= 0x00 0x00 f:<core:funcidx> opts:<opts> ft:<typeidx> => (canon lift
            | 0x0a m:<core:memdix>                                => (canon task.wait (memory m) (core func)) ๐Ÿ”€
            | 0x0b m:<core:memidx>                                => (canon task.poll (memory m) (core func)) ๐Ÿ”€
            | 0x0c                                                => (canon task.yield (core func)) ๐Ÿ”€
-           | 0x0d                                                => (canon subtask.drop (core func)) ๐Ÿ”€
+           | 0x0d                                                => (canon waitable.drop (core func)) ๐Ÿ”€
+           | 0x0e t:<typeidx>                                    => (canon stream.new t (core func)) ๐Ÿ”€
+           | 0x0f                                                => (canon stream.read (core func)) ๐Ÿ”€
+           | 0x10                                                => (canon stream.write (core func)) ๐Ÿ”€
+           | 0x11 async?:<async?>                                => (canon stream.cancel-read async? (core func)) ๐Ÿ”€
+           | 0x12 async?:<async?>                                => (canon stream.cancel-write async? (core func)) ๐Ÿ”€
+           | 0x13 t:<typeidx>                                    => (canon future.new t (core func)) ๐Ÿ”€
+           | 0x14                                                => (canon future.read (core func)) ๐Ÿ”€
+           | 0x15                                                => (canon future.write (core func)) ๐Ÿ”€
+           | 0x16 async?:<async?>                                => (canon future.cancel-read async? (core func)) ๐Ÿ”€
+           | 0x17 async?:<async?>                                => (canon future.cancel-write async? (core func)) ๐Ÿ”€
+async?   ::= 0x00                                                =>
+           | 0x01                                                => async
 opts     ::= opt*:vec(<canonopt>)                                => opt*
 canonopt ::= 0x00                                                => string-encoding=utf8
            | 0x01                                                => string-encoding=utf16
@@ -300,6 +314,7 @@ canonopt ::= 0x00                                                => string-encod
            | 0x05 f:<core:funcidx>                               => (post-return f)
            | 0x06                                                => async ๐Ÿ”€
            | 0x07 f:<core:funcidx>                               => (callback f) ๐Ÿ”€
+           | 0x08                                                => always-task-return ๐Ÿ”€
 ```
 Notes:
 * The second `0x00` byte in `canon` stands for the `func` sort and thus the
diff --git a/design/mvp/CanonicalABI.md b/design/mvp/CanonicalABI.md
index b4f8947c..d6a4bd6f 100644
--- a/design/mvp/CanonicalABI.md
+++ b/design/mvp/CanonicalABI.md
@@ -12,8 +12,10 @@ being specified here.
   * [Canonical ABI Options](#canonical-abi-options)
   * [Runtime State](#runtime-state)
     * [Resource State](#resource-state)
-    * [Async State](#async-state)
+    * [Task State](#task-state)
+    * [Buffer, Stream and Future State](#buffer-stream-and-future-state)
   * [Despecialization](#despecialization)
+  * [Type Predicates](#type-predicates)
   * [Alignment](#alignment)
   * [Element Size](#element-size)
   * [Loading](#loading)
@@ -33,6 +35,10 @@ being specified here.
   * [`canon task.wait`](#-canon-taskwait) ๐Ÿ”€
   * [`canon task.poll`](#-canon-taskpoll) ๐Ÿ”€
   * [`canon task.yield`](#-canon-taskyield) ๐Ÿ”€
+  * [`canon {stream,future}.new`](#-canon-streamfuturenew) ๐Ÿ”€
+  * [`canon {stream,future}.{read,write}`](#-canon-streamfuturereadwrite) ๐Ÿ”€
+  * [`canon {stream,future}.cancel-{read,write}`](#-canon-streamfuturecancel-readwrite) ๐Ÿ”€
+  * [`canon waitable.drop`](#-canon-waitabledrop) ๐Ÿ”€
 
 
 ## Supporting definitions
@@ -75,27 +81,37 @@ function's declared return type.
 
 ### Call Context
 
-The subsequent definitions depend on three kinds of ambient information:
-* static ABI options supplied via [`canonopt`]
-* dynamic state in the containing component instance
-* dynamic state in the [current task]
+Most Canonical ABI definitions depend on some ambient information which is
+established by the `canon lift`- or `canon lower`-defined function that is
+being called:
+* the ABI options supplied via [`canonopt`]
+* the containing component instance
+* the [current task]
 
-These sources of ambient context are stored as the respective `opts`, `inst`
-and `task` fields of the `CallContext` object:
+These pieces of ambient information are stored in the first three fields of
+the `CallContext` that is threaded through all the Python functions below as
+the `cx` parameter/field.
 ```python
 class CallContext:
   opts: CanonicalOptions
   inst: ComponentInstance
   task: Task
+  todo: int
 
   def __init__(self, opts, inst, task):
     self.opts = opts
     self.inst = inst
     self.task = task
+    self.todo = 0
+
+  def end_call(self):
+    trap_if(self.todo)
 ```
-The `cx` parameter in functions below refers to the ambient `CallContext`. The
-`Task` and `Subtask` classes derive `CallContext` and thus having a `task` or
-`subtask` also establishes the ambient `CallContext`.
+Additionally, import and export calls have a `todo` count that is incremented
+and decremented by various Canonical ABI rules below to track outstanding
+obligations to do something (e.g., drop a `borrow`ed handle) before the end of
+the call. The `Task` and `Subtask` classes derive `CallContext` and call
+`self.end_call()` when they complete.
 
 
 ### Canonical ABI Options
@@ -111,6 +127,7 @@ class CanonicalOptions:
   post_return: Optional[Callable] = None
   sync: bool = True # = !canonopt.async
   callback: Optional[Callable] = None
+  always_task_return: bool = False
 ```
 (Note that the `async` `canonopt` is inverted to `sync` here for the practical
 reason that `async` is a keyword and most branches below want to start with the
@@ -126,7 +143,7 @@ Canonical ABI and introduced below as the fields are used.
 ```python
 class ComponentInstance:
   resources: ResourceTables
-  async_subtasks: Table[Subtask]
+  waitables: Table[Subtask|StreamHandle|FutureHandle]
   num_tasks: int
   may_leave: bool
   backpressure: bool
@@ -136,7 +153,7 @@ class ComponentInstance:
 
   def __init__(self):
     self.resources = ResourceTables()
-    self.async_subtasks = Table[Subtask]()
+    self.waitables = Table[Subtask|StreamHandle|FutureHandle]()
     self.num_tasks = 0
     self.may_leave = True
     self.backpressure = False
@@ -290,7 +307,7 @@ statically eliminate the `own` and the `lend_count` xor `scope` fields,
 and guards thereof.
 
 
-#### Async State
+#### Task State
 
 Additional runtime state is required to implement the canonical built-ins and
 check that callers and callees uphold their respective parts of the call
@@ -312,9 +329,13 @@ class EventCode(IntEnum):
   CALL_RETURNED = CallState.RETURNED
   CALL_DONE = CallState.DONE
   YIELDED = 4
+  STREAM_READ = 5
+  STREAM_WRITE = 6
+  FUTURE_READ = 7
+  FUTURE_WRITE = 8
 
-EventTuple = tuple[EventCode, int]
-EventCallback = Callable[[], EventTuple]
+EventTuple = tuple[EventCode, int, int]
+EventCallback = Callable[[], Optional[EventTuple]]
 OnBlockCallback = Callable[[Awaitable], Awaitable]
 ```
 The `CallState` enum describes the linear sequence of states that an async call
@@ -411,7 +432,6 @@ class Task(CallContext):
   caller: Optional[Task]
   on_return: Optional[Callable]
   on_block: OnBlockCallback
-  need_to_drop: int
   events: list[EventCallback]
   has_events: asyncio.Event
 
@@ -421,7 +441,6 @@ class Task(CallContext):
     self.caller = caller
     self.on_return = on_return
     self.on_block = on_block
-    self.need_to_drop = 0
     self.events = []
     self.has_events = asyncio.Event()
 ```
@@ -535,28 +554,35 @@ using a `callback`, by returning to the event loop) to learn about progress
 made by async subtasks which are reported to this task by `notify`.
 ```python
   async def wait(self) -> EventTuple:
-    await self.wait_on(self.has_events.wait())
-    return self.next_event()
-
-  def next_event(self) -> EventTuple:
-    event = self.events.pop(0)
-    if not self.events:
-      self.has_events.clear()
-    return event()
+    while True:
+      await self.wait_on(self.has_events.wait())
+      if (e := self.maybe_next_event()):
+        return e
+
+  def maybe_next_event(self) -> EventTuple:
+    while self.events:
+      event = self.events.pop(0)
+      if (e := event()):
+        return e
+    self.has_events.clear()
+    return None
 
   def notify(self, event: EventCallback):
     self.events.append(event)
     self.has_events.set()
 ```
-Note that events are represented as *first-class functions* that are called by
-`maybe_next_event` to produce the tuple of scalar values that are actually
-delivered to core wasm. This allows an event source to report the latest status
-when the event is handed to the core wasm code instead of the status when the
-event was first generated. This allows multiple redundant events to be
-collapsed into one, reducing overhead. Although this Python code represents
-events as a list of closures, an optimizing implementation should be able to
-avoid actually allocating these things and instead embed a linked list of
-"ready" events into the table elements associated with the events.
+Note that events are represented as closures (first class functions) that
+either return a tuple of scalar values to deliver to core wasm, or `None`. This
+flexibility allows multiple redundant events to be collapsed into one (e.g.,
+when a `Subtask` advances `CallState` multiple times before the event enqueued
+by the initial state change is delivered) and also for events to be
+retroactively removed (e.g., when a `stream.cancel-read` "steals" a pending
+`STREAM_READ` event that was enqueued but not yet delivered). Although this
+Python code represents `events` as a list of closures, an optimizing
+implementation should be able to avoid dynamically allocating this list and
+instead represent `events` as a linked list embedded in the elements of the
+`waitables` table (noting that, by design, any given `watiables` element can be
+in the `events` list at most once).
 
 A task may also cooperatively yield (via `canon task.yield`), allowing the
 runtime to switch execution to another task without having to wait for any
@@ -574,9 +600,7 @@ assume other tasks can execute, just like with `task.wait`.
 ```python
   async def poll(self) -> Optional[EventTuple]:
     await self.yield_()
-    if not self.events:
-      return None
-    return self.next_event()
+    return self.maybe_next_event()
 ```
 
 The `return_` method is called by either `canon_task_return` or `canon_lift`
@@ -589,7 +613,7 @@ more than once which must be checked by `return_` and `exit`.
 ```python
   def return_(self, flat_results):
     trap_if(not self.on_return)
-    if self.opts.sync:
+    if self.opts.sync and not self.opts.always_task_return:
       maxflat = MAX_FLAT_RESULTS
     else:
       maxflat = MAX_FLAT_PARAMS
@@ -598,24 +622,26 @@ more than once which must be checked by `return_` and `exit`.
     self.on_return(vs)
     self.on_return = None
 ```
+The maximum flattened core wasm values depends on whether this is a normal
+synchronous call (in which return values are returned by core wasm) or a newer
+async or sychronous-using-`always-task-return` call, in which return values
+are passed as parameters to `canon task.return`.
 
 Lastly, when a task exits, the runtime enforces the guard conditions mentioned
-above and allows a pending task to start. The `need_to_drop` counter is
-incremented and decremented below as a way of ensuring that a task does
-something (like dropping a resource or subtask handle) before the task exits.
+above and allows a pending task to start.
 ```python
   def exit(self):
     assert(current_task.locked())
-    assert(not self.events)
+    assert(not self.maybe_next_event())
     assert(self.inst.num_tasks >= 1)
     trap_if(self.on_return)
-    trap_if(self.need_to_drop != 0)
     trap_if(self.inst.num_tasks == 1 and self.inst.backpressure)
     self.inst.num_tasks -= 1
     if self.opts.sync:
       assert(not self.inst.interruptible.is_set())
       self.inst.interruptible.set()
     self.maybe_start_pending_task()
+    self.end_call()
 ```
 
 While `canon_lift` creates `Task`s, `canon_lower` creates `Subtask` objects.
@@ -668,7 +694,7 @@ stored inline in the native stack frame.
 
 The `maybe_notify_supertask` method called by `on_start`, `on_return` and
 `finish` (next) only sends events to the supertask if this `Subtask` actually
-blocked and got added to the `async_subtasks` table (signalled by
+blocked and got added to the `waitables` table (as indicated by
 `notify_supertask` being set). Additionally, `maybe_notify_supertask` uses the
 `enqueued` flag and the fact that "events" are first-class functions to
 collapse N events down to 1 if a subtask advances state multiple times before
@@ -680,10 +706,10 @@ the event loop when only the most recent state matters.
       self.enqueued = True
       def subtask_event():
         self.enqueued = False
-        i = self.inst.async_subtasks.array.index(self)
+        i = self.inst.waitables.array.index(self)
         if self.state == CallState.DONE:
           self.release_lenders()
-        return (EventCode(self.state), i)
+        return (EventCode(self.state), i, 0)
       self.task.notify(subtask_event)
 ```
 
@@ -728,13 +754,321 @@ when the subtask finishes.
 ```
 
 Lastly, after a `Subtask` has finished and notified its supertask (thereby
-clearing `enqueued`), it may be dropped from the `async_subtasks` table:
+clearing `enqueued`), it may be dropped from the `waitables` table which
+effectively ends the call from the perspective of the caller and guards that
+the `Subtask`'s `todo` count is zero.
 ```python
   def drop(self):
     trap_if(self.enqueued)
     trap_if(self.state != CallState.DONE)
-    self.task.need_to_drop -= 1
+    self.task.todo -= 1
+    self.end_call()
+```
+
+
+#### Buffer, Stream and Future State
+
+At a high level, values of `stream` or `future` type are handles to special
+resources that components use to synchronize the directional copy of values
+between buffers supplied by the components involved, avoiding the need for
+intermediate buffers or copies. In support of the general [virtualization
+goals] of the Component Model, the host can be on either side of the copy
+unbeknownst to the component on the other side. Thus, the Python representation
+of lifted `future` and `stream` values are *abstract interfaces* that are meant
+to be implemented either by arbitrary host code *or* by wasm code using the
+Python classes below that end with `GuestImpl`:
+```python
+class Buffer:
+  MAX_LENGTH = 2**30 - 1
+
+class WritableBuffer(Buffer):
+  remain: Callable[[], int]
+  lower: Callable[[list[any]]]
+
+class ReadableStream:
+  closed: Callable[[], bool]
+  read: Callable[[WritableBuffer, OnBlockCallback], Awaitable]
+  cancel_read: Callable[[WritableBuffer, OnBlockCallback], Awaitable]
+  close: Callable[[]]
+```
+Going through the methods in these interfaces:
+* `remain` returns how many values may be `lower`ed into the `WritableBuffer`.
+* `read` may only be called if `!closed`. `read` is asynchronous (as indicated
+  by the `Awaitable` return type) and can block. If `read` blocks, it must call
+  the given `OnBlockCallback` to allow the async caller to make progress in the
+  meantime. `read` returns its values by calling `lower` 0..N times on the
+  given `WritableBuffer`. Once `read` returns, it must not hold onto a
+  reference to the given `WritableBuffer` (as if it was passed via `borrow`).
+* `cancel_read` must only be called while there is an outstanding blocked
+  `read` and must be given the same `WritableBuffer` that was passed to `read`.
+  `cancel_read` is async and must call `OnBlockCallback` if it blocks.
+  `cancel_read` must only return once the given `WritableBuffer` is guaranteed
+  not to be used again by the `read` being cancelled.
+* `close` may only be called if there is no active `read` and leaves the stream
+  `closed` without possibility of blocking.
+
+The abstract `WritableBuffer` interface is implemented by the
+`WritableBufferGuestImpl` class below. The `ReadableBufferGuestImpl` class is
+used by the stream implementation code below and is symmetric. The functions
+`load_list_from_valid_range` and `store_list_into_valid_range` used by these
+classes are defined below as part of normal `list` parameter lifting and
+lowering.
+```python
+class BufferGuestImpl(Buffer):
+  cx: CallContext
+  t: ValType
+  ptr: int
+  progress: int
+  length: int
+
+  def __init__(self, cx, t, ptr, length):
+    trap_if(length == 0 or length > Buffer.MAX_LENGTH)
+    trap_if(ptr != align_to(ptr, alignment(t)))
+    trap_if(ptr + length * elem_size(t) > len(cx.opts.memory))
+    self.cx = cx
+    self.t = t
+    self.ptr = ptr
+    self.progress = 0
+    self.length = length
+
+  def remain(self):
+    return self.length - self.progress
+
+class ReadableBufferGuestImpl(BufferGuestImpl):
+  def lift(self, n):
+    assert(n <= self.remain())
+    vs = load_list_from_valid_range(self.cx, self.ptr, n, self.t)
+    self.ptr += n * elem_size(self.t)
+    self.progress += n
+    return vs
+
+class WritableBufferGuestImpl(BufferGuestImpl, WritableBuffer):
+  def lower(self, vs):
+    assert(len(vs) <= self.remain())
+    store_list_into_valid_range(self.cx, vs, self.ptr, self.t)
+    self.ptr += len(vs) * elem_size(self.t)
+    self.progress += len(vs)
+```
+
+The `ReadableStreamGuestImpl` class implements `ReadableStream` for a stream
+created by wasm (via `canon stream.new`) and encapsulates the synchronization
+performed between the writer and reader ends of a `stream`. In addition to the
+`read` method defined as part of `ReadableStream` that can be called by the
+consumer of the `ReadableStream`, a `write` method is also defined that will be
+called (below) by the writable end of this same stream. Other than the fact
+that they copy in different directions, reading and writing work the same way
+and thus are defined by a single internal `rendezvous` method. The first time
+`rendezvous` is called, it will block until it is woken by a second call to
+`rendezvous` (necessarily in the opposite direction, as ensured by the CABI).
+Once this second `rendezvous` call arives, there is both a `ReadableBuffer` and
+`WritableBuffer` on hand, so a direct copy can be immediately performed (noting
+that `dst.lower(src.lift(...))` is meant to be fused into a single copy from
+`src`'s memory into `dst`'s memory).
+```python
+class ReadableStreamGuestImpl(ReadableStream):
+  is_closed: bool
+  other_buffer: Optional[Buffer]
+  other_future: Optional[asyncio.Future]
+
+  def __init__(self):
+    self.is_closed = False
+    self.other_buffer = None
+    self.other_future = None
+
+  def closed(self):
+    return self.is_closed
+
+  async def read(self, dst, on_block):
+    await self.rendezvous(dst, self.other_buffer, dst, on_block)
+  async def write(self, src, on_block):
+    await self.rendezvous(src, src, self.other_buffer, on_block)
+  async def rendezvous(self, this_buffer, src, dst, on_block):
+    assert(not self.is_closed)
+    if self.other_buffer:
+      ncopy = min(src.remain(), dst.remain())
+      assert(ncopy > 0)
+      dst.lower(src.lift(ncopy))
+      if not self.other_buffer.remain():
+        self.other_buffer = None
+      if self.other_future:
+        self.other_future.set_result(None)
+        self.other_future = None
+    else:
+      assert(not self.other_future)
+      self.other_buffer = this_buffer
+      self.other_future = asyncio.Future()
+      await on_block(self.other_future)
+      if self.other_buffer is this_buffer:
+        self.other_buffer = None
+```
+In this logic, we can see that `read` and `write` eagerly return once *any*
+values are read or written. Thus, if a source-language API needs to read or
+write an exact number of elements, it must loop. (New `read-full`/`write-full`
+variations could be added in the future that do not complete until
+`remain = 0`, but this would only be an optimization taht minimized call
+overhead, not a change in expressive power or algorithmic complexity.)
+
+One optimization intentionally enabled by the code above is that, after a
+rendezvous completes with some `n < remain` values being copied, the
+`other_buffer` is kept around (even after the `other_future` is resolved) to
+allow future rendezvous to keep reading or writing into the same buffer until
+the `await other_future` is resumed by the scheduler. Due to cooperative
+concurrency, this time window can be significant and thus this optimization can
+reduce task-switching overhead by batching up partial reads and writes into
+bigger reads or writes.
+
+However, this optimization creates a subtle corner case handled by the above
+code that is worth pointing out: between `other_future` being resolved and the
+`await other_future` resuming, `other_buffer` *may or may not* get cleared by
+another `rendezvous` and then subsequently replaced by another buffer waiting
+in the opposite direction. This case is handled by the `other_buffer is this_buffer`
+test before clearing `other_buffer`. Cancellation must use this same condition
+to determine whether to resolve `other_future` or not when cancelling a read or
+write:
+```python
+  async def cancel_read(self, dst, on_block):
+    await self.cancel_rendezvous(dst, on_block)
+  async def cancel_write(self, src, on_block):
+    await self.cancel_rendezvous(src, on_block)
+  async def cancel_rendezvous(self, this_buffer, on_block):
+    assert(not self.is_closed)
+    if not DETERMINISTIC_PROFILE and random.randint(0,1):
+      await on_block(asyncio.sleep(0))
+    if self.other_buffer is this_buffer:
+      self.other_buffer = None
+      if self.other_future:
+        self.other_future.set_result(None)
+        self.other_future = None
+```
+The random choice of whether or not to call `on_block` models the fact that, in
+general, cancelling a read or write operation may require a blocking operation
+to ensure that access to the buffer has been fully relinquished (e.g., the
+buffer may have been handed to the kernel or hardware and thus there may be a
+need to block to confirm that the kernel or hardware is done with the buffer,
+with [terrible bugs] otherwise).
+
+When called via the `ReadableStream` abstract interface, the `close` method can
+assume as a precondition that there is not an outstanding `read` and thus there
+is no need to block on a `cancel_read`. There may however be a pending write
+`await`ing `other_future`, but since we're on the reader end and we know that
+there are no concurrent `read`s, we can simple resolve `other_future` and move
+on without blocking on anything. `close` can also be called (below) from the
+writer direction, in which case all the above logic applies, in the opposite
+direction. Thus, there is only a single direction-agnostic `close` that is
+shared by both the reader and writer ends.
+```python
+  def close(self):
+    if not self.is_closed:
+      self.is_closed = True
+      self.other_buffer = None
+      if self.other_future:
+        self.other_future.set_result(None)
+        self.other_future = None
+    else:
+      assert(not self.other_buffer and not self.other_future)
+```
+
+With the above complex synchronization rules encapsulated by
+`ReadableStreamGuestImpl`, we can move on to the remaining rules and state that
+apply separately to the readable and writable handles that are stored in the
+`waitables` table. Both readable and writable handles store a reference to a
+`ReadableStream`. In the case of a `ReadableStreamHandle`, this
+`ReadableStream` can be host- or guet-implemented. However, in the case of
+`WritableStreamHandle`, the `ReadableStream` is definitely implemented by
+`ReadableStreamGuestImpl`. The point of these handles is to implement
+direction-agnostic `copy`, `cancel_copy` and `drop` methods that are called by
+the shared `canon stream.*` built-in code below.
+```python
+class StreamHandle:
+  stream: ReadableStream
+  t: ValType
+  cx: Optional[CallContext]
+  copying_buffer: Optional[Buffer]
+
+  def __init__(self, stream, t, cx):
+    self.stream = stream
+    self.t = t
+    self.cx = cx
+    self.copying_buffer = None
+
+  def drop(self):
+    trap_if(self.copying_buffer)
+    self.stream.close()
+    if self.cx:
+      self.cx.todo -= 1
+
+class ReadableStreamHandle(StreamHandle):
+  async def copy(self, dst, on_block):
+    await self.stream.read(dst, on_block)
+  async def cancel_copy(self, dst, on_block):
+    await self.stream.cancel_read(dst, on_block)
+
+class WritableStreamHandle(ReadableStreamGuestImpl, StreamHandle):
+  def __init__(self, t):
+    ReadableStreamGuestImpl.__init__(self)
+    StreamHandle.__init__(self, self, t, cx = None)
+  async def copy(self, src, on_block):
+    await self.write(src, on_block)
+  async def cancel_copy(self, src, on_block):
+    await self.cancel_write(src, on_block)
+```
+Considering the logic in `drop` (which is called polymorphically by
+`canon waitable.drop` below):
+* The trap if `copying_buffer` is set ensures the above-stated precondition
+  that `close` can only be called when there is no pending `read`/`write`.
+  `copying_buffer` is set below when `stream.{read,write}` starts and cleared
+  once wasm is notified of completion.
+* The `todo` decrement matches an increment when the handle's `cx`
+  field was set and is used to ensure that `cx` never points to a dead
+  `Subtask` (whose own `todo` increment ensures that `cx.task` also never
+  points to a dead `Task`).
+
+Given the above definition of how `stream` works, a `future` can simply be
+defined as a `stream` of exactly 1 value by having the `copy` and `cancel_copy`
+methods `close()` the stream as soon as they detect that the 1 `remain`ing
+value has been successfully copied:
+```python
+class FutureHandle(StreamHandle): pass
+
+class ReadableFutureHandle(FutureHandle):
+  async def copy(self, dst, on_block):
+    assert(dst.remain() == 1)
+    await self.stream.read(dst, on_block)
+    if dst.remain() == 0:
+      self.stream.close()
+
+  async def cancel_copy(self, dst, on_block):
+    await self.stream.cancel_read(dst, on_block)
+    if dst.remain() == 0:
+      self.stream.close()
+
+class WritableFutureHandle(ReadableStreamGuestImpl, FutureHandle):
+  def __init__(self, t):
+    ReadableStreamGuestImpl.__init__(self)
+    FutureHandle.__init__(self, self, t, cx = None)
+
+  async def copy(self, src, on_block):
+    assert(src.remain() == 1)
+    await self.write(src, on_block)
+    if src.remain() == 0:
+      self.close()
+
+  async def cancel_copy(self, src, on_block):
+    await self.cancel_write(src, on_block)
+    if src.remain() == 0:
+      self.close()
+
+  def drop(self):
+    trap_if(not self.closed())
+    FutureHandle.drop(self)
 ```
+The overridden `WritableFutureHandle.drop` method traps if the internal stream
+has not been closed (and thus the future value has not been written). (*Note
+that there is a [TODO](Async.md#TODO) to add an `error` type and new built-ins
+for dropping a stream or future handle with an `error` which will **not** trap,
+thus allowing a `future` to be resolved without producing a value iff it
+produces an `error`.*)
+
 
 ### Despecialization
 
@@ -758,6 +1092,36 @@ because they are given specialized canonical ABI representations distinct from
 their respective expansions.
 
 
+### Type Predicates
+
+The `contains_borrow` and `contains_async_value` predicates return whether the
+given type contains a `borrow` or `future/`stream`, respectively.
+```python
+def contains_borrow(t):
+  return contains(t, lambda u: isinstance(u, BorrowType))
+
+def contains_async_value(t):
+  return contains(t, lambda u: isinstance(u, StreamType | FutureType))
+
+def contains(t, p):
+  t = despecialize(t)
+  match t:
+    case None:
+      return False
+    case PrimValType() | OwnType() | BorrowType():
+      return p(t)
+    case ListType(u) | StreamType(u) | FutureType(u):
+      return p(t) or contains(u, p)
+    case RecordType(fields):
+      return p(t) or any(contains(f.t, p) for f in fields)
+    case VariantType(cases):
+      return p(t) or any(contains(c.t, p) for c in cases)
+    case FuncType():
+      return any(p(u) for u in t.param_types() + t.result_types())
+    case _:
+      assert(False)
+```
+
 ### Alignment
 
 Each value type is assigned an [alignment] which is used by subsequent
@@ -780,6 +1144,7 @@ def alignment(t):
     case VariantType(cases)          : return alignment_variant(cases)
     case FlagsType(labels)           : return alignment_flags(labels)
     case OwnType() | BorrowType()    : return 4
+    case StreamType() | FutureType() : return 4
 ```
 
 List alignment is the same as tuple alignment when the length is fixed and
@@ -868,6 +1233,7 @@ def elem_size(t):
     case VariantType(cases)          : return elem_size_variant(cases)
     case FlagsType(labels)           : return elem_size_flags(labels)
     case OwnType() | BorrowType()    : return 4
+    case StreamType() | FutureType() : return 4
 
 def elem_size_list(elem_type, maybe_length):
   if maybe_length is not None:
@@ -933,6 +1299,8 @@ def load(cx, ptr, t):
     case FlagsType(labels)  : return load_flags(cx, ptr, labels)
     case OwnType()          : return lift_own(cx, load_int(cx, ptr, 4), t)
     case BorrowType()       : return lift_borrow(cx, load_int(cx, ptr, 4), t)
+    case StreamType(t)      : return lift_stream(cx, load_int(cx, ptr, 4), t)
+    case FutureType(t)      : return lift_future(cx, load_int(cx, ptr, 4), t)
 ```
 
 Integers are loaded directly from memory, with their high-order bit interpreted
@@ -1168,6 +1536,58 @@ handle can be held). When `h` is a `borrow` handle, we just need to make sure
 that the callee task has a shorter liftime than the current task, which is only
 guaranteed if the callee is a subtask of the current task.
 
+Streams and futures are lifted in almost the same way, with the only difference
+being that it is a dynamic error to attempt to lift a `future` that has already
+been successfully read (`closed()`). In both cases, lifting the readable end
+transfers ownership of it while lifting the writable end leaves the writable
+end in place, but traps if the writable end has already been lifted before.
+Together, this ensures that at most one component holds each of the readable
+and writable ends of a stream. The `todo` increments must be matched by
+decrements in `StreamHandle.drop` for `CallContext.end_call` to not trap; this
+ensures that the writable stream handles cannot outlive the `Task` to which
+their events are sent (via `h.cx.task.notify()`).
+```python
+def lift_stream(cx, i, t):
+  return lift_async_value(ReadableStreamHandle, WritableStreamHandle, cx, i, t)
+
+def lift_future(cx, i, t):
+  v = lift_async_value(ReadableFutureHandle, WritableFutureHandle, cx, i, t)
+  trap_if(v.closed())
+  return v
+
+def lift_async_value(ReadableHandleT, WritableHandleT, cx, i, t):
+  h = cx.inst.waitables.get(i)
+  match h:
+    case ReadableHandleT():
+      trap_if(h.copying_buffer)
+      trap_if(contains_borrow(t) and cx.task is not h.cx)
+      h.cx.todo -= 1
+      cx.inst.waitables.remove(i)
+    case WritableHandleT():
+      trap_if(h.cx is not None)
+      assert(not h.copying_buffer)
+      h.cx = cx
+      h.cx.todo += 1
+    case _:
+      trap()
+  trap_if(h.t != t)
+  return h.stream
+```
+It's useful to observe that there are no lifetime issues with a `stream` or
+`future` of `borrow` handles due to the following:
+* Validation ensures that `stream<borrow<R>>` or `future<borrow<R>>` can only
+  be lifted as part of the parameters of an import call.
+* When lifting the writable end of a `stream` or `future` for an import call,
+  the code above stores the `Subtask` of the import call in the `cx` field of
+  the `WritableStreamHandle` so that when `ReadableBuffer.lift` transitively
+  calls `lift_borrow` (above), this same `Subtask` is passed as the `cx`
+  argument, thereby triggering the same bookkeeping as if the `borrow` was
+  passed as a normal synchronous parameter of the `Subtask`.
+* When lifting the readable end of a `stream` or `future` for an import call,
+  the `cx.task is not h.cx` condition ensures that `borrow`s are only copied
+  into subtasks with the same `Task` as scope (matching the analogous guard in
+  `lift_borrow`).
+
 
 ### Storing
 
@@ -1198,6 +1618,8 @@ def store(cx, v, t, ptr):
     case FlagsType(labels)  : store_flags(cx, v, ptr, labels)
     case OwnType()          : store_int(cx, lower_own(cx, v, t), ptr, 4)
     case BorrowType()       : store_int(cx, lower_borrow(cx, v, t), ptr, 4)
+    case StreamType(t)      : store_int(cx, lower_stream(cx, v, t), ptr, 4)
+    case FutureType(t)      : store_int(cx, lower_future(cx, v, t), ptr, 4)
 ```
 
 Integers are stored directly into memory. Because the input domain is exactly
@@ -1559,8 +1981,8 @@ def pack_flags_into_int(v, labels):
 
 Finally, `own` and `borrow` handles are lowered by initializing new handle
 elements in the current component instance's handle table. The increment of
-`need_to_drop` is complemented by a decrement in `canon_resource_drop` and
-ensures that all borrowed handles are dropped before the end of the task. 
+`todo` is complemented by a decrement in `canon_resource_drop` and ensures
+that all borrowed handles are dropped before the end of the task. 
 ```python
 def lower_own(cx, rep, t):
   h = ResourceHandle(rep, own=True)
@@ -1571,7 +1993,7 @@ def lower_borrow(cx, rep, t):
   if cx.inst is t.rt.impl:
     return rep
   h = ResourceHandle(rep, own=False, scope=cx)
-  cx.need_to_drop += 1
+  cx.todo += 1
   return cx.inst.resources.add(t.rt, h)
 ```
 The special case in `lower_borrow` is an optimization, recognizing that, when
@@ -1580,6 +2002,59 @@ type, the only thing the borrowed handle is good for is calling
 `resource.rep`, so lowering might as well avoid the overhead of creating an
 intermediate borrow handle.
 
+Lowering a `stream` or `future` is entirely symmetric. The
+`trap_if(v.closed())` in `lift_future` ensures the validity of the
+`assert(not v.closed())` in `lower_future`.
+```python
+def lower_stream(cx, v, t):
+  return lower_async_value(ReadableStreamHandle, WritableStreamHandle, cx, v, t)
+
+def lower_future(cx, v, t):
+  assert(not v.closed())
+  return lower_async_value(ReadableFutureHandle, WritableFutureHandle, cx, v, t)
+
+def lower_async_value(ReadableHandleT, WritableHandleT, cx, v, t):
+  assert(isinstance(v, ReadableStream))
+  if isinstance(v, WritableHandleT) and cx.inst is v.cx.inst:
+    i = cx.inst.waitables.array.index(v)
+    v.cx.todo -= 1
+    v.cx = None
+    assert(2**31 > Table.MAX_LENGTH >= i)
+    return i | (2**31)
+  else:
+    h = ReadableHandleT(v, t, cx)
+    h.cx.todo += 1
+    return cx.inst.waitables.add(h)
+```
+In the ordinary case, the abstract `ReadableStream` (which may come from the
+host or the guest) is stored in a `ReadableHandle` in the `waitables` table,
+incrementing `todo` to ensure that `StreamHandle.drop` is called before
+`Task.exit` so that readable stream and future handles cannot outlive the
+`Task` to which their events are sent (via `h.cx.task.notify()`).
+
+The interesting case is when a component receives back a `ReadableStream` that
+it itself holds the `WritableStreamHandle` for. Without specially handling
+this case, this would lead to copies from a single linear memory into itself
+which is both inefficient and raises subtle semantic interleaving questions
+that we would rather avoid. To avoid both, this case is detected and the
+`ReadableStream` is "unwrapped" to writable handle, returning the existing
+index of it in the `waitables` table, setting the high bit to signal this fact
+to guest code. Guest code must therefore handle this special case by
+collapsing the two ends of the stream to work fully without guest code (since
+the Canonical ABI is now wholly unnecessary to pass values from writer to
+reader).
+
+As with `lift_async_value`, it's useful to observe that there are no lifetime
+issues with a `stream` or `future` of `borrow` handles due to the following:
+* Validation ensures that `stream<borrow<R>>` or `future<borrow<R>>` can only
+  be lowered as part of the parameters of an export call.
+* When lowering a `stream` or `future`, the code above stores the `Task` of
+  the export call in the `cx` field of the `ReadableStreamHandle` so that when
+  `WritableBuffer.lower` transitively calls `lower_borrow` (above), this same
+  `Task` is passed as the `cx` argument, thereby triggering the same
+  bookkeeping as if the `borrow` was passed as a normal synchronous parameter
+  of the `Task`.
+
 
 ### Flattening
 
@@ -1666,6 +2141,7 @@ def flatten_type(t):
     case VariantType(cases)               : return flatten_variant(cases)
     case FlagsType(labels)                : return ['i32']
     case OwnType() | BorrowType()         : return ['i32']
+    case StreamType() | FutureType()      : return ['i32']
 ```
 
 List flattening of a fixed-length list uses the same flattening as a tuple
@@ -1760,6 +2236,8 @@ def lift_flat(cx, vi, t):
     case FlagsType(labels)  : return lift_flat_flags(vi, labels)
     case OwnType()          : return lift_own(cx, vi.next('i32'), t)
     case BorrowType()       : return lift_borrow(cx, vi.next('i32'), t)
+    case StreamType(t)      : return lift_stream(cx, vi.next('i32'), t)
+    case FutureType(t)      : return lift_future(cx, vi.next('i32'), t)
 ```
 
 Integers are lifted from core `i32` or `i64` values using the signedness of the
@@ -1886,6 +2364,8 @@ def lower_flat(cx, v, t):
     case FlagsType(labels)  : return lower_flat_flags(v, labels)
     case OwnType()          : return [lower_own(cx, v, t)]
     case BorrowType()       : return [lower_borrow(cx, v, t)]
+    case StreamType(t)      : return [lower_stream(cx, v, t)]
+    case FutureType(t)      : return [lower_future(cx, v, t)]
 ```
 
 Since component-level values are assumed in-range and, as previously stated,
@@ -2083,10 +2563,11 @@ async def canon_lift(opts, inst, ft, callee, caller, on_start, on_return, on_blo
   assert(types_match_values(flat_ft.params, flat_args))
   if opts.sync:
     flat_results = await call_and_trap_on_throw(callee, task, flat_args)
-    assert(types_match_values(flat_ft.results, flat_results))
-    task.return_(flat_results)
-    if opts.post_return is not None:
-      [] = await call_and_trap_on_throw(opts.post_return, task, flat_results)
+    if not opts.always_task_return:
+      assert(types_match_values(flat_ft.results, flat_results))
+      task.return_(flat_results)
+      if opts.post_return is not None:
+        [] = await call_and_trap_on_throw(opts.post_return, task, flat_results)
   else:
     if not opts.callback:
       [] = await call_and_trap_on_throw(callee, task, flat_args)
@@ -2099,12 +2580,16 @@ async def canon_lift(opts, inst, ft, callee, caller, on_start, on_return, on_blo
         ctx = packed_ctx & ~1
         if is_yield:
           await task.yield_()
-          event, payload = (EventCode.YIELDED, 0)
+          event, p1, p2 = (EventCode.YIELDED, 0, 0)
         else:
-          event, payload = await task.wait()
-        [packed_ctx] = await call_and_trap_on_throw(opts.callback, task, [ctx, event, payload])
+          event, p1, p2 = await task.wait()
+        [packed_ctx] = await call_and_trap_on_throw(opts.callback, task, [ctx, event, p1, p2])
   task.exit()
 ```
+In the `sync` case, if the `always-task-return` ABI option is *not* set, then
+`task.return_` will be called by `callee` to return values; otherwise,
+`task.return_` must be called by `canon_lift`.
+
 In the `async` case, there are two sub-cases depending on whether the
 `callback` `canonopt` was set. When `callback` is present, waiting happens in
 an "event loop" inside `canon_lift` which also allows yielding (i.e., allowing
@@ -2138,6 +2623,7 @@ where `$callee` has type `$ft`, validation specifies:
 * a `memory` is present if required by lifting and is a subtype of `(memory 1)`
 * a `realloc` is present if required by lifting and has type `(func (param i32 i32 i32 i32) (result i32))`
 * there is no `post-return` in `$opts`
+* if `contains_async_value($ft)`, then `$opts.async` must be set
 
 When instantiating component instance `$inst`:
 * Define `$f` to be the partially-bound closure: `canon_lower($opts, $ft, $callee)`
@@ -2155,6 +2641,7 @@ async def canon_lower(opts, ft, callee, task, flat_args):
   assert(types_match_values(flat_ft.params, flat_args))
   subtask = Subtask(opts, ft, task, flat_args)
   if opts.sync:
+    assert(not contains_async_value(ft))
     await task.call_sync(callee, task, subtask.on_start, subtask.on_return)
     flat_results = subtask.finish()
   else:
@@ -2164,8 +2651,8 @@ async def canon_lower(opts, ft, callee, task, flat_args):
     match await call_and_handle_blocking(do_call):
       case Blocked():
         subtask.notify_supertask = True
-        task.need_to_drop += 1
-        i = task.inst.async_subtasks.add(subtask)
+        task.todo += 1
+        i = task.inst.waitables.add(subtask)
         assert(0 < i <= Table.MAX_LENGTH < 2**30)
         assert(0 <= int(subtask.state) < 2**2)
         flat_results = [i | (int(subtask.state) << 30)]
@@ -2174,14 +2661,22 @@ async def canon_lower(opts, ft, callee, task, flat_args):
   assert(types_match_values(flat_ft.results, flat_results))
   return flat_results
 ```
-In the asynchronous case, if `do_call` blocks before `Subtask.finish`
-(signalled by `callee` calling `on_block`), the `Subtask` is added to the
-current component instance's `async_subtasks` table, giving it an `i32` index
-that will be returned by `task.wait` to signal progress on this subtask. The
-`need_to_drop` increment is matched by a decrement in `canon_subtask_drop`
-and ensures that all subtasks of a supertask complete before the supertask
-completes. The `notify_supertask` flag is set to tell `Subtask` methods
-(below) to asynchronously notify the supertask of progress.
+In the `sync` case, `Task.call_sync` ensures a fully-synchronous call to
+`callee` (that prevents any interleaved execution until `callee` returns). The
+`not contains_async_value(ft)` assertion is ensured by validation and reflects
+the fact that a function that takes or returns a `future` or `stream` is
+extremely likely to deadlock if called in this manner (since the whole point
+of these types is to allow control flow to switch back and forth between
+caller and callee).
+
+In the `async` case, if `do_call` blocks before `Subtask.finish` (signalled by
+`callee` calling `on_block`), the `Subtask` is added to the current component
+instance's `waitables` table, giving it an `i32` index that will be returned
+by `task.wait` to signal progress on this subtask. The `todo` increment is
+matched by a decrement in `canon_waitable_drop` and ensures that all subtasks
+of a supertask complete before the supertask completes. The `notify_supertask`
+flag is set to tell `Subtask` methods (below) to asynchronously notify the
+supertask of progress.
 
 Based on this, if the returned `subtask.state` is `STARTING`, the caller must
 keep the memory pointed by `flat_args` valid until `task.wait` indicates that
@@ -2266,7 +2761,7 @@ async def canon_resource_drop(rt, sync, task, i):
       else:
         task.trap_if_on_the_stack(rt.impl)
   else:
-    h.scope.need_to_drop -= 1
+    h.scope.todo -= 1
   return flat_results
 ```
 In general, the call to a resource's destructor is treated like a
@@ -2339,7 +2834,7 @@ and pass the results to the caller:
 ```python
 async def canon_task_return(task, core_ft, flat_args):
   trap_if(not task.inst.may_leave)
-  trap_if(task.opts.sync)
+  trap_if(task.opts.sync and not task.opts.always_task_return)
   trap_if(core_ft != flatten_functype(CanonicalOptions(), FuncType(task.ft.results, []), 'lower'))
   task.return_(flat_args)
   return []
@@ -2360,15 +2855,16 @@ validation specifies:
 * `$f` is given type `(func (param i32) (result i32))`
 
 Calling `$f` waits for progress to be made in a subtask of the current task,
-returning the event (which is currently simply an `CallState` value)
-and writing the subtask index as an outparam:
+returning the event (which is currently simply a `CallState` value) and
+writing the subtask index as an outparam:
 ```python
 async def canon_task_wait(opts, task, ptr):
   trap_if(not task.inst.may_leave)
   trap_if(task.opts.callback is not None)
-  event, payload = await task.wait()
+  event, p1, p2 = await task.wait()
   cx = CallContext(opts, task.inst, task)
-  store(cx, payload, U32Type(), ptr)
+  store(cx, p1, U32Type(), ptr)
+  store(cx, p2, U32Type(), ptr + 4)
   return [event]
 ```
 The `trap_if` ensures that, when a component uses a `callback` all events flow
@@ -2392,7 +2888,7 @@ validation specifies:
 
 Calling `$f` does a non-blocking check for whether an event is already
 available, returning whether or not there was such an event as a boolean and,
-if there was an event, storing the `i32` event+payload pair as an outparam.
+if there was an event, storing the `i32` event and payloads as outparams.
 ```python
 async def canon_task_poll(opts, task, ptr):
   trap_if(not task.inst.may_leave)
@@ -2400,7 +2896,7 @@ async def canon_task_poll(opts, task, ptr):
   if ret is None:
     return [0]
   cx = CallContext(opts, task.inst, task)
-  store(cx, ret, TupleType([U32Type(), U32Type()]), ptr)
+  store(cx, ret, TupleType([U32Type(), U32Type(), U32Type()]), ptr)
   return [1]
 ```
 Note that the `await` of `task.poll` indicates that `task.poll` can yield to
@@ -2425,21 +2921,221 @@ async def canon_task_yield(task):
   return []
 ```
 
-### ๐Ÿ”€ `canon subtask.drop`
+### ๐Ÿ”€ `canon {stream,future}.new`
+
+For canonical definitions:
+```wasm
+(canon stream.new $t (core func $f))
+(canon future.new $t (core func $f))
+```
+validation specifies:
+* `$f` is given type `(func (result i32))`
+
+Calling `$f` calls `canon_{stream,future}_new` which add a new writable end to
+the stream or future to the `waitables` table and return its index.
+```python
+async def canon_stream_new(elem_type, task):
+  trap_if(not task.inst.may_leave)
+  h = WritableStreamHandle(elem_type)
+  return [ task.inst.waitables.add(h) ]
+
+async def canon_future_new(t, task):
+  trap_if(not task.inst.may_leave)
+  h = WritableFutureHandle(t)
+  return [ task.inst.waitables.add(h) ]
+```
+Note that the new writable end initially has its `StreamHandle.cx` field set
+to `None` which means it can't be used to `read` or `write` (defined next)
+until it has been lifted as an import parameter or export result. Lifting this
+readable handle sets `cx` and creates a readable end on the other side of the
+call so that  can commence.
+
+### ๐Ÿ”€ `canon {stream,future}.{read,write}`
+
+For canonical definitions:
+```wasm
+(canon stream.read (core func $f))
+(canon stream.write (core func $f))
+```
+validation specifies:
+* `$f` is given type `(func (param i32 i32 i32) (result i32))`
+
+For canonical definitions:
+```wasm
+(canon future.read (core func $f))
+(canon future.write (core func $f))
+```
+validation specifies:
+* `$f` is given type `(func (param i32 i32) (result i32))`
+
+The implementation of these four built-ins all funnel down to a single type-
+and `EventCode`-parameterized `async_copy` function. `async_copy` reuses the
+same `call_and_handle_blocking` machinery that `async` `canon lower` used
+above to model `read`s and `write`s as-if they were async import calls. For
+the same reason that `canon lower` does not allow synchronously lowering
+functions that contain `stream` or `future` types in their signature (high
+likelihood of deadlock), there is no synchronous option for `read` or `write`.
+The actual copy happens via polymorphic dispatch to `copy`, which has been
+defined above by the 4 `{Readable,Writable}{Stream,Future}Handle` types:
+```python
+async def canon_stream_read(task, i, ptr, n):
+  return await async_copy(ReadableStreamHandle, WritableBufferGuestImpl,
+                          EventCode.STREAM_READ, task, i, ptr, n)
+
+async def canon_stream_write(task, i, ptr, n):
+  return await async_copy(WritableStreamHandle, ReadableBufferGuestImpl,
+                          EventCode.STREAM_WRITE, task, i, ptr, n)
+
+async def canon_future_read(task, i, ptr):
+  return await async_copy(ReadableFutureHandle, WritableBufferGuestImpl,
+                          EventCode.FUTURE_READ, task, i, ptr, 1)
+
+async def canon_future_write(task, i, ptr):
+  return await async_copy(WritableFutureHandle, ReadableBufferGuestImpl,
+                          EventCode.FUTURE_WRITE, task, i, ptr, 1)
+
+async def async_copy(HandleT, BufferT, event_code, task, i, ptr, n):
+  trap_if(not task.inst.may_leave)
+  h = task.inst.waitables.get(i)
+  trap_if(not isinstance(h, HandleT))
+  trap_if(not h.cx)
+  trap_if(h.copying_buffer)
+  buffer = BufferT(h.cx, h.t, ptr, n)
+  if h.stream.closed():
+    flat_results = [CLOSED]
+  else:
+    async def do_copy(on_block):
+      await h.copy(buffer, on_block)
+      def copy_event():
+        if h.copying_buffer is buffer:
+          h.copying_buffer = None
+          return (event_code, i, pack_async_copy_result(buffer, h))
+        else:
+          return None
+      h.cx.task.notify(copy_event)
+    match await call_and_handle_blocking(do_copy):
+      case Blocked():
+        h.copying_buffer = buffer
+        flat_results = [BLOCKED]
+      case Returned():
+        flat_results = [pack_async_copy_result(buffer, h)]
+  return flat_results
+```
+The trap if `not h.cx` prevents `write`s on the writable end of streams or
+futures that have not yet been lifted. The `copying_buffer` field serves as a
+boolean indication of whether an async `read` or `write` is already in
+progress, preventing multiple overlapping calls to `read` or `write`. (This
+restriction could be relaxed [in the future](Async.md#TODO) to allow greater
+pipeline parallelism.)
+
+One subtle corner case handled by this code that is worth pointing out is that,
+between the `h.cx.task.notify(copy_event)` and the wasm guest code calling
+`task.wait` to receive this event, the wasm guest code can first call
+`{stream,future}.cancel-{read,write}` (defined next) which will return the copy
+progress to the wasm guest code and reset `copying_buffer` to `None` to allow
+future `read`s or `write`s. Then the wasm guest code can call
+`{stream,future}.{read,write}` *again*, setting `copying_buffer` to a *new*
+buffer. Thus, `copy_event` must check `h.copying_buffer is buffer` at the last
+moment and remove the event otherwise (here: by returning `None`, which
+`task.wait` handles by discarding and waiting for the next event).
+
+When the copy completes, the progress is reported to the caller. The order of
+tests here indicates that, if some progress was made and then the stream was
+closed, only the progress is reported and the `CLOSED` status is left to be
+discovered on the next `read` or `write` call.
+```python
+BLOCKED = 0xffff_ffff
+CLOSED  = 0x8000_0000
+
+def pack_async_copy_result(buffer, h):
+  assert(buffer.progress <= Buffer.MAX_LENGTH < CLOSED < BLOCKED)
+  if buffer.progress:
+    return buffer.progress
+  if h.stream.closed():
+    return CLOSED
+  return 0
+```
+(When [`error`](Async.md#TODO) is added in a future PR, when the `CLOSED` bit
+is set, the low 31 bits will optionally contain the non-zero index of an
+`error` value in some new `errors` table.)
+
+### ๐Ÿ”€ `canon {stream,future}.cancel-{read,write}`
+
+For canonical definitions:
+```wasm
+(canon stream.cancel-read $async? (core func $f))
+(canon stream.cancel-write $async? (core func $f))
+(canon future.cancel-read $async? (core func $f))
+(canon future.cancel-write $async? (core func $f))
+```
+validation specifies:
+* `$f` is given type `(func (param i32) (result i32))`
+
+The implementation of these four built-ins all funnel down to a single
+type-parameterized `cancel_async_copy` function which makes a polymorphic call
+to `cancel_copy`, which has been defined above by the 4
+`{Readable,Writable}{Stream,Future}Handle` types. Unlike `read` and `write`,
+`cancel-read` and `cancel-write` *do* provide a synchronous calling option
+(represented as an optional `async` immediate in the `canon` definition)
+since there is not the same deadlock hazard. The ability to synchronously
+cancel a `read` or `write` (and regain ownership of the passed buffer) is
+crucial since some languages will need to cancel reading or writing from
+within the synchronous context of a destructor.
+```python
+async def canon_stream_cancel_read(sync, task, i):
+  return await cancel_async_copy(ReadableStreamHandle, sync, task, i)
+
+async def canon_stream_cancel_write(sync, task, i):
+  return await cancel_async_copy(WritableStreamHandle, sync, task, i)
+
+async def canon_future_cancel_read(sync, task, i):
+  return await cancel_async_copy(ReadableFutureHandle, sync, task, i)
+
+async def canon_future_cancel_write(sync, task, i):
+  return await cancel_async_copy(WritableFutureHandle, sync, task, i)
+
+async def cancel_async_copy(HandleT, sync, task, i):
+  trap_if(not task.inst.may_leave)
+  h = task.inst.waitables.get(i)
+  trap_if(not isinstance(h, HandleT))
+  trap_if(not h.copying_buffer)
+  if sync:
+    await task.call_sync(h.cancel_copy, h.copying_buffer)
+    flat_results = [h.copying_buffer.progress]
+    h.copying_buffer = None
+  else:
+    match await call_and_handle_blocking(h.cancel_copy, h.copying_buffer):
+      case Blocked():
+        flat_results = [BLOCKED]
+      case Returned():
+        flat_results = [h.copying_buffer.progress]
+        h.copying_buffer = None
+  return flat_results
+```
+As mentioned above for `async_copy`, if cancellation doesn't block, the
+buffer's progress is synchronously returned and the `copying_buffer` field of
+the `StreamHandle` is immediately reset to `None`, preventing any subsequent
+`{STREAM,FUTURE}_{READ,WRITE}` events from being delivered for the cancelled
+`read` or `write` in the future. In the `BLOCKED` case, there is no new
+`waitable` element allocated; the cancellation is simply reported as a normal
+`{STREAM,FUTURE}_{READ,WRITE}` event by the now-unblocked `read` or `write`.
+
+### ๐Ÿ”€ `canon waitable.drop`
 
 For a canonical definition:
 ```wasm
-(canon subtask.drop (core func $f))
+(canon waitable.drop (core func $f))
 ```
 validation specifies:
 * `$f` is given type `(func (param i32))`
 
-Calling `$f` removes the indicated subtask from the instance's table, trapping
-if various conditions aren't met in `Subtask.drop()`.
+Calling `$f` removes the indicated waitable (subtask, stream or future) from
+the instance's table, trapping if various conditions aren't met in the
+waitable's `drop()` method.
 ```python
-async def canon_subtask_drop(task, i):
+async def canon_waitable_drop(task, i):
   trap_if(not task.inst.may_leave)
-  task.inst.async_subtasks.remove(i).drop()
+  task.inst.waitables.remove(i).drop()
   return []
 ```
 
@@ -2511,6 +3207,7 @@ def canon_thread_hw_concurrency():
     return [NUM_ALLOWED_THREADS]
 ```
 
+[Virtualization Goals]: Goals.md
 [Canonical Definitions]: Explainer.md#canonical-definitions
 [`canonopt`]: Explainer.md#canonical-definitions
 [`canon`]: Explainer.md#canonical-definitions
@@ -2551,3 +3248,5 @@ def canon_thread_hw_concurrency():
 [Arbitrary Thread Parameters]: https://github.com/WebAssembly/shared-everything-threads/discussions/3
 [wasi-libc Convention]: https://github.com/WebAssembly/wasi-libc/blob/925ad6d7/libc-top-half/musl/src/thread/pthread_create.c#L318
 [Shared-Everything Threads]: https://github.com/WebAssembly/shared-everything-threads/blob/main/proposals/shared-everything-threads/Overview.md
+
+[Terrible Bugs]: https://devblogs.microsoft.com/oldnewthing/20110202-00/?p=11613
diff --git a/design/mvp/Explainer.md b/design/mvp/Explainer.md
index 2663f878..52009609 100644
--- a/design/mvp/Explainer.md
+++ b/design/mvp/Explainer.md
@@ -16,6 +16,7 @@ more user-focused explanation, take a look at the
       * [Numeric types](#numeric-types)
       * [Container types](#container-types)
       * [Handle types](#handle-types)
+      * [Asynchronous value types](#asynchronous-value-types)
     * [Specialized value types](#specialized-value-types)
     * [Definition types](#definition-types)
     * [Declarators](#declarators)
@@ -551,6 +552,8 @@ defvaltype    ::= bool
                 | (result <valtype>? (error <valtype>)?)
                 | (own <typeidx>)
                 | (borrow <typeidx>)
+                | (stream <typeidx>)
+                | (future <typeidx>)
 valtype       ::= <typeidx>
                 | <defvaltype>
 resourcetype  ::= (resource (rep i32) (dtor async? <funcidx> (callback <funcidx>)?)?)
@@ -601,6 +604,8 @@ sets of abstract values:
 | `list`                    | homogeneous, variable- or fixed-length [sequences] of values |
 | `own`                     | a unique, opaque address of a resource that will be destroyed when this value is dropped |
 | `borrow`                  | an opaque address of a resource that must be dropped before the current export call returns |
+| `stream`                  | an asynchronously-passed list of homogeneous values |
+| `future`                  | an asynchronously-passed single value |
 
 How these abstract values are produced and consumed from Core WebAssembly
 values and linear memory is configured by the component via *canonical lifting
@@ -656,6 +661,47 @@ immediate of a handle type must refer to a `resource` type (described below)
 that statically classifies the particular kinds of resources the handle can
 point to.
 
+##### Asynchronous value types
+
+The `stream` and `future` value types are both *asynchronous value types* that
+are used to deliver values incrementally over the course of a single async
+function call, instead of copying the values all-at-once as with other
+(synchronous) value types like `list`. The mechanism for performing these
+incremental copies avoids the need for intermediate buffering inside the
+`stream` or `future` value itself and instead uses buffers of memory whose
+size and allocation is controlled by the core wasm in the source and
+destination components. Thus, in the abstract, `stream` and `future` can be
+thought of as inter-component control-flow or synchronization mechanisms.
+
+Just like with handles, in the Component Model, async value types are
+lifted-from and lowered-into `i32` values that index an encapsulated
+per-component-instance table that is maintained by the canonical ABI built-ins
+[below](#canonical-definitions). The Component-Model-defined ABI for creating,
+writing-to and reading-from `stream` and `future` values is meant to be bound
+to analogous source-language features like promises, futures, streams,
+iterators, generators and channels so that developers can use these familiar
+high-level concepts when working directly with component types, without the
+need to manually write low-level async glue code. For languages like C without
+language-level concurrency support, these ABIs (described in detail in the
+[Canonical ABI explainer]) can be exposed directly as function imports and used
+like normal low-level Operation System I/O APIs.
+
+A `stream<T>` asynchronously passes zero or more `T` values in one direction
+between a source and destination, batched in chunks for efficiency. Streams
+are useful for:
+* improving latency by incrementally processing values as they arrive;
+* delivering potentially-large lists of values that might OOM wasm if passed
+  as a `list<T>`;
+* long-running or infinite streams of events.
+
+A `future` is a special case of `stream` and (in non-error scenarios) delivers
+exactly one value before being automatically closed. Because all imports can
+be [called asynchronously](Async.md), futures are not necessary to express a
+traditional `async` function -- all functions are effectively `async`. Instead
+futures are useful in more advanced scenarios where a parameter or result
+value may not be ready at the same time as the other synchronous parameters or
+results.
+
 #### Specialized value types
 
 The sets of values allowed for the remaining *specialized value types* are
@@ -1175,6 +1221,7 @@ canonopt ::= string-encoding=utf8
            | (post-return <core:funcidx>)
            | async ๐Ÿ”€
            | (callback <core:funcidx>) ๐Ÿ”€
+           | always-task-return ๐Ÿ”€
 ```
 While the production `externdesc` accepts any `sort`, the validation rules
 for `canon lift` would only allow the `func` sort. In the future, other sorts
@@ -1217,7 +1264,9 @@ results.
 or support (for exports) multiple concurrent (asynchronous) calls. This option
 can be applied to any component-level function type and changes the derived
 Canonical ABI significantly. See the [async explainer](Async.md) for more
-details.
+details. When a function signature contains a `future` or `stream`, validation
+requires the `async` option to be set (since a synchronous call to a function
+using these types is highly likely to deadlock).
 
 ๐Ÿ”€ The `(callback ...)` option may only be present in `canon lift` when the
 `async` option has also been set and specifies a core function that is
@@ -1230,6 +1279,13 @@ validated to have the following core function type:
 ```
 Again, see the [async explainer](Async.md) for more details.
 
+๐Ÿ”€ The `always-task-return` option may only be present in `canon lift` when
+`post-return` is not set and specifies that even synchronously-lifted functions
+will call `canon task.return` to return their results instead of returning
+them as core function results. This is a simpler alternative to `post-return`
+for freeing memory after lifting and thus `post-return` may be deprecated in
+the future.
+
 Based on this description of the AST, the [Canonical ABI explainer] gives a
 detailed walkthrough of the static and dynamic semantics of `lift` and `lower`.
 
@@ -1316,7 +1372,17 @@ canon ::= ...
         | (canon task.wait (memory <core:memidx>) (core func <id>?)) ๐Ÿ”€
         | (canon task.poll (memory <core:memidx>) (core func <id>?)) ๐Ÿ”€
         | (canon task.yield (core func <id>?)) ๐Ÿ”€
-        | (canon subtask.drop (core func <id>?)) ๐Ÿ”€
+        | (canon stream.new <typeidx> (core func <id>?)) ๐Ÿ”€
+        | (canon stream.read (core func <id>?)) ๐Ÿ”€
+        | (canon stream.write (core func <id>?)) ๐Ÿ”€
+        | (canon stream.cancel-read async? (core func <id>?)) ๐Ÿ”€
+        | (canon stream.cancel-write async? (core func <id>?)) ๐Ÿ”€
+        | (canon future.new <typeidx> (core func <id>?)) ๐Ÿ”€
+        | (canon future.read (core func <id>?)) ๐Ÿ”€
+        | (canon future.write (core func <id>?)) ๐Ÿ”€
+        | (canon future.cancel-read async? (core func <id>?)) ๐Ÿ”€
+        | (canon future.cancel-write async? (core func <id>?)) ๐Ÿ”€
+        | (canon waitable.drop (core func <id>?)) ๐Ÿ”€
         | (canon thread.spawn <typeidx> (core func <id>?)) ๐Ÿงต
         | (canon thread.hw_concurrency (core func <id>?)) ๐Ÿงต
 ```
@@ -1404,9 +1470,40 @@ switch to another task, allowing a long-running computation to cooperatively
 interleave with other tasks. (See also [`canon_task_yield`] in the Canonical
 ABI explainer.)
 
-The `subtask.drop` built-in has type `[i32] -> []` and removes the indicated
-[subtask](Async.md#subtask-and-supertask) from the current instance's subtask
-table, trapping if the subtask isn't done.
+The `{stream,future}.new` built-ins have type `[] -> [i32]` and return a new
+[writable end](Async.md#streams-and-futures) of a stream or future. (See
+[`canon_stream_new`] in the Canonical ABI explainer for details.)
+
+The `stream.{read,write]` built-ins have type `[i32 i32 i32] -> [i32]` and
+take an index to the matching [readable or writable end](Async.md#streams-and-futures)
+of a stream as the first parameter, a pointer to linear memory buffer as the
+second parameter and the number of elements worth of available space in the
+buffer. The return value is either the non-zero number of elements that have
+been eagerly read or else a sentinel "`BLOCKED`" value. (See
+[`canon_stream_read`] in the Canonical ABI explainer for details.)
+
+The `future.{read,write}` built-ins have type `[i32 i32] -> [i32]` and
+take an index to the matching [readable or writable end](Async.md#streams-and-futures)
+of a future as the first parameter and a pointer linear memory as the second
+parameter. The return value is either `1` if the future value was eagerly
+read or written to the pointer or the sentinel "`BLOCKED`" value otherwise.
+(See [`canon_future_read`] in the Canonical ABI explainer for details.)
+
+The `{stream,future}.cancel-{read,write}` built-ins have type `[i32] -> [i32]`
+and take an index to the matching [readable or writable end](Async.md#streams-and-futures)
+of a stream or future that has an outstanding "`BLOCKED`" read or write. If
+cancellation finished eagerly, the return value is the number of elements read
+or written into the given buffer (`0` or `1` for a `future`). If cancellation
+blocks, the return value is the sentinel "`BLOCKED`" value and the caller must
+`task.wait` for a `{STREAM,FUTURE}_{READ,WRITE}` event to indicate the
+completion of the `read` or `write`. (See [`canon_stream_cancel_read`] in the
+Canonical ABI explainer for details.)
+
+The `waitable.drop` built-in has type `[i32] -> []` and removes the indicated
+[subtask](Async.md#subtask-and-supertask) or [stream or future](Async.md#streams-and-futures)
+from the current instance's [waitables](Async.md#waiting) table, trapping if
+the subtask isn't done or the stream or future is in the middle of reading
+or writing.
 
 ##### ๐Ÿงต Threading built-ins
 
@@ -2237,6 +2334,10 @@ For some use-case-focused, worked examples, see:
 [`canon_task_wait`]: CanonicalABI.md#-canon-taskwait
 [`canon_task_poll`]: CanonicalABI.md#-canon-taskpoll
 [`canon_task_yield`]: CanonicalABI.md#-canon-taskyield
+[`canon_stream_new`]: CanonicalABI.md#-canon-streamfuturenew
+[`canon_stream_read`]: CanonicalABI.md#-canon-streamfuturereadwrite
+[`canon_future_read`]: CanonicalABI.md#-canon-streamfuturereadwrite
+[`canon_stream_cancel_read`]: CanonicalABI.md#-canon-streamfuturecancel-readwrite
 [Shared-Nothing]: ../high-level/Choices.md
 [Use Cases]: ../high-level/UseCases.md
 [Host Embeddings]: ../high-level/UseCases.md#hosts-embedding-components
diff --git a/design/mvp/canonical-abi/definitions.py b/design/mvp/canonical-abi/definitions.py
index ef165dca..2a2450eb 100644
--- a/design/mvp/canonical-abi/definitions.py
+++ b/design/mvp/canonical-abi/definitions.py
@@ -182,17 +182,31 @@ class OwnType(ValType):
 class BorrowType(ValType):
   rt: ResourceType
 
+@dataclass
+class StreamType(ValType):
+  t: ValType
+
+@dataclass
+class FutureType(ValType):
+  t: ValType
+
 ### Call Context
 
 class CallContext:
   opts: CanonicalOptions
   inst: ComponentInstance
   task: Task
+  todo: int
 
   def __init__(self, opts, inst, task):
     self.opts = opts
     self.inst = inst
     self.task = task
+    self.todo = 0
+
+  def end_call(self):
+    trap_if(self.todo)
+
 
 ### Canonical ABI Options
 
@@ -204,12 +218,13 @@ class CanonicalOptions:
   post_return: Optional[Callable] = None
   sync: bool = True # = !canonopt.async
   callback: Optional[Callable] = None
+  always_task_return: bool = False
 
 ### Runtime State
 
 class ComponentInstance:
   resources: ResourceTables
-  async_subtasks: Table[Subtask]
+  waitables: Table[Subtask|StreamHandle|FutureHandle]
   num_tasks: int
   may_leave: bool
   backpressure: bool
@@ -219,7 +234,7 @@ class ComponentInstance:
 
   def __init__(self):
     self.resources = ResourceTables()
-    self.async_subtasks = Table[Subtask]()
+    self.waitables = Table[Subtask|StreamHandle|FutureHandle]()
     self.num_tasks = 0
     self.may_leave = True
     self.backpressure = False
@@ -305,7 +320,7 @@ def __init__(self, rep, own, scope = None):
     self.scope = scope
     self.lend_count = 0
 
-#### Async State
+#### Task State
 
 class CallState(IntEnum):
   STARTING = 0
@@ -319,9 +334,13 @@ class EventCode(IntEnum):
   CALL_RETURNED = CallState.RETURNED
   CALL_DONE = CallState.DONE
   YIELDED = 4
+  STREAM_READ = 5
+  STREAM_WRITE = 6
+  FUTURE_READ = 7
+  FUTURE_WRITE = 8
 
-EventTuple = tuple[EventCode, int]
-EventCallback = Callable[[], EventTuple]
+EventTuple = tuple[EventCode, int, int]
+EventCallback = Callable[[], Optional[EventTuple]]
 OnBlockCallback = Callable[[Awaitable], Awaitable]
 
 current_task = asyncio.Lock()
@@ -360,7 +379,6 @@ class Task(CallContext):
   caller: Optional[Task]
   on_return: Optional[Callable]
   on_block: OnBlockCallback
-  need_to_drop: int
   events: list[EventCallback]
   has_events: asyncio.Event
 
@@ -370,7 +388,6 @@ def __init__(self, opts, inst, ft, caller, on_return, on_block):
     self.caller = caller
     self.on_return = on_return
     self.on_block = on_block
-    self.need_to_drop = 0
     self.events = []
     self.has_events = asyncio.Event()
 
@@ -427,14 +444,18 @@ async def call_sync(self, callee, *args):
       await callee(*args, self.on_block)
 
   async def wait(self) -> EventTuple:
-    await self.wait_on(self.has_events.wait())
-    return self.next_event()
-
-  def next_event(self) -> EventTuple:
-    event = self.events.pop(0)
-    if not self.events:
-      self.has_events.clear()
-    return event()
+    while True:
+      await self.wait_on(self.has_events.wait())
+      if (e := self.maybe_next_event()):
+        return e
+
+  def maybe_next_event(self) -> EventTuple:
+    while self.events:
+      event = self.events.pop(0)
+      if (e := event()):
+        return e
+    self.has_events.clear()
+    return None
 
   def notify(self, event: EventCallback):
     self.events.append(event)
@@ -445,13 +466,11 @@ async def yield_(self):
 
   async def poll(self) -> Optional[EventTuple]:
     await self.yield_()
-    if not self.events:
-      return None
-    return self.next_event()
+    return self.maybe_next_event()
 
   def return_(self, flat_results):
     trap_if(not self.on_return)
-    if self.opts.sync:
+    if self.opts.sync and not self.opts.always_task_return:
       maxflat = MAX_FLAT_RESULTS
     else:
       maxflat = MAX_FLAT_PARAMS
@@ -462,16 +481,16 @@ def return_(self, flat_results):
 
   def exit(self):
     assert(current_task.locked())
-    assert(not self.events)
+    assert(not self.maybe_next_event())
     assert(self.inst.num_tasks >= 1)
     trap_if(self.on_return)
-    trap_if(self.need_to_drop != 0)
     trap_if(self.inst.num_tasks == 1 and self.inst.backpressure)
     self.inst.num_tasks -= 1
     if self.opts.sync:
       assert(not self.inst.interruptible.is_set())
       self.inst.interruptible.set()
     self.maybe_start_pending_task()
+    self.end_call()
 
 class Subtask(CallContext):
   ft: FuncType
@@ -506,10 +525,10 @@ def maybe_notify_supertask(self):
       self.enqueued = True
       def subtask_event():
         self.enqueued = False
-        i = self.inst.async_subtasks.array.index(self)
+        i = self.inst.waitables.array.index(self)
         if self.state == CallState.DONE:
           self.release_lenders()
-        return (EventCode(self.state), i)
+        return (EventCode(self.state), i, 0)
       self.task.notify(subtask_event)
 
   def on_start(self):
@@ -540,7 +559,185 @@ def finish(self):
   def drop(self):
     trap_if(self.enqueued)
     trap_if(self.state != CallState.DONE)
-    self.task.need_to_drop -= 1
+    self.task.todo -= 1
+    self.end_call()
+
+#### Buffer, Stream and Future State
+
+class Buffer:
+  MAX_LENGTH = 2**30 - 1
+
+class WritableBuffer(Buffer):
+  remain: Callable[[], int]
+  lower: Callable[[list[any]]]
+
+class ReadableStream:
+  closed: Callable[[], bool]
+  read: Callable[[WritableBuffer, OnBlockCallback], Awaitable]
+  cancel_read: Callable[[WritableBuffer, OnBlockCallback], Awaitable]
+  close: Callable[[]]
+
+class BufferGuestImpl(Buffer):
+  cx: CallContext
+  t: ValType
+  ptr: int
+  progress: int
+  length: int
+
+  def __init__(self, cx, t, ptr, length):
+    trap_if(length == 0 or length > Buffer.MAX_LENGTH)
+    trap_if(ptr != align_to(ptr, alignment(t)))
+    trap_if(ptr + length * elem_size(t) > len(cx.opts.memory))
+    self.cx = cx
+    self.t = t
+    self.ptr = ptr
+    self.progress = 0
+    self.length = length
+
+  def remain(self):
+    return self.length - self.progress
+
+class ReadableBufferGuestImpl(BufferGuestImpl):
+  def lift(self, n):
+    assert(n <= self.remain())
+    vs = load_list_from_valid_range(self.cx, self.ptr, n, self.t)
+    self.ptr += n * elem_size(self.t)
+    self.progress += n
+    return vs
+
+class WritableBufferGuestImpl(BufferGuestImpl, WritableBuffer):
+  def lower(self, vs):
+    assert(len(vs) <= self.remain())
+    store_list_into_valid_range(self.cx, vs, self.ptr, self.t)
+    self.ptr += len(vs) * elem_size(self.t)
+    self.progress += len(vs)
+
+class ReadableStreamGuestImpl(ReadableStream):
+  is_closed: bool
+  other_buffer: Optional[Buffer]
+  other_future: Optional[asyncio.Future]
+
+  def __init__(self):
+    self.is_closed = False
+    self.other_buffer = None
+    self.other_future = None
+
+  def closed(self):
+    return self.is_closed
+
+  async def read(self, dst, on_block):
+    await self.rendezvous(dst, self.other_buffer, dst, on_block)
+  async def write(self, src, on_block):
+    await self.rendezvous(src, src, self.other_buffer, on_block)
+  async def rendezvous(self, this_buffer, src, dst, on_block):
+    assert(not self.is_closed)
+    if self.other_buffer:
+      ncopy = min(src.remain(), dst.remain())
+      assert(ncopy > 0)
+      dst.lower(src.lift(ncopy))
+      if not self.other_buffer.remain():
+        self.other_buffer = None
+      if self.other_future:
+        self.other_future.set_result(None)
+        self.other_future = None
+    else:
+      assert(not self.other_future)
+      self.other_buffer = this_buffer
+      self.other_future = asyncio.Future()
+      await on_block(self.other_future)
+      if self.other_buffer is this_buffer:
+        self.other_buffer = None
+
+  async def cancel_read(self, dst, on_block):
+    await self.cancel_rendezvous(dst, on_block)
+  async def cancel_write(self, src, on_block):
+    await self.cancel_rendezvous(src, on_block)
+  async def cancel_rendezvous(self, this_buffer, on_block):
+    assert(not self.is_closed)
+    if not DETERMINISTIC_PROFILE and random.randint(0,1):
+      await on_block(asyncio.sleep(0))
+    if self.other_buffer is this_buffer:
+      self.other_buffer = None
+      if self.other_future:
+        self.other_future.set_result(None)
+        self.other_future = None
+
+  def close(self):
+    if not self.is_closed:
+      self.is_closed = True
+      self.other_buffer = None
+      if self.other_future:
+        self.other_future.set_result(None)
+        self.other_future = None
+    else:
+      assert(not self.other_buffer and not self.other_future)
+
+class StreamHandle:
+  stream: ReadableStream
+  t: ValType
+  cx: Optional[CallContext]
+  copying_buffer: Optional[Buffer]
+
+  def __init__(self, stream, t, cx):
+    self.stream = stream
+    self.t = t
+    self.cx = cx
+    self.copying_buffer = None
+
+  def drop(self):
+    trap_if(self.copying_buffer)
+    self.stream.close()
+    if self.cx:
+      self.cx.todo -= 1
+
+class ReadableStreamHandle(StreamHandle):
+  async def copy(self, dst, on_block):
+    await self.stream.read(dst, on_block)
+  async def cancel_copy(self, dst, on_block):
+    await self.stream.cancel_read(dst, on_block)
+
+class WritableStreamHandle(ReadableStreamGuestImpl, StreamHandle):
+  def __init__(self, t):
+    ReadableStreamGuestImpl.__init__(self)
+    StreamHandle.__init__(self, self, t, cx = None)
+  async def copy(self, src, on_block):
+    await self.write(src, on_block)
+  async def cancel_copy(self, src, on_block):
+    await self.cancel_write(src, on_block)
+
+class FutureHandle(StreamHandle): pass
+
+class ReadableFutureHandle(FutureHandle):
+  async def copy(self, dst, on_block):
+    assert(dst.remain() == 1)
+    await self.stream.read(dst, on_block)
+    if dst.remain() == 0:
+      self.stream.close()
+
+  async def cancel_copy(self, dst, on_block):
+    await self.stream.cancel_read(dst, on_block)
+    if dst.remain() == 0:
+      self.stream.close()
+
+class WritableFutureHandle(ReadableStreamGuestImpl, FutureHandle):
+  def __init__(self, t):
+    ReadableStreamGuestImpl.__init__(self)
+    FutureHandle.__init__(self, self, t, cx = None)
+
+  async def copy(self, src, on_block):
+    assert(src.remain() == 1)
+    await self.write(src, on_block)
+    if src.remain() == 0:
+      self.close()
+
+  async def cancel_copy(self, src, on_block):
+    await self.cancel_write(src, on_block)
+    if src.remain() == 0:
+      self.close()
+
+  def drop(self):
+    trap_if(not self.closed())
+    FutureHandle.drop(self)
 
 ### Despecialization
 
@@ -552,6 +749,33 @@ def despecialize(t):
     case ResultType(ok, err) : return VariantType([ CaseType("ok", ok), CaseType("error", err) ])
     case _                   : return t
 
+### Type Predicates
+
+def contains_borrow(t):
+  return contains(t, lambda u: isinstance(u, BorrowType))
+
+def contains_async_value(t):
+  return contains(t, lambda u: isinstance(u, StreamType | FutureType))
+
+def contains(t, p):
+  t = despecialize(t)
+  match t:
+    case None:
+      return False
+    case PrimValType() | OwnType() | BorrowType():
+      return p(t)
+    case ListType(u) | StreamType(u) | FutureType(u):
+      return p(t) or contains(u, p)
+    case RecordType(fields):
+      return p(t) or any(contains(f.t, p) for f in fields)
+    case VariantType(cases):
+      return p(t) or any(contains(c.t, p) for c in cases)
+    case FuncType():
+      return any(p(u) for u in t.param_types() + t.result_types())
+    case _:
+      assert(False)
+
+
 ### Alignment
 
 def alignment(t):
@@ -570,6 +794,7 @@ def alignment(t):
     case VariantType(cases)          : return alignment_variant(cases)
     case FlagsType(labels)           : return alignment_flags(labels)
     case OwnType() | BorrowType()    : return 4
+    case StreamType() | FutureType() : return 4
 
 def alignment_list(elem_type, maybe_length):
   if maybe_length is not None:
@@ -626,6 +851,7 @@ def elem_size(t):
     case VariantType(cases)          : return elem_size_variant(cases)
     case FlagsType(labels)           : return elem_size_flags(labels)
     case OwnType() | BorrowType()    : return 4
+    case StreamType() | FutureType() : return 4
 
 def elem_size_list(elem_type, maybe_length):
   if maybe_length is not None:
@@ -685,6 +911,8 @@ def load(cx, ptr, t):
     case FlagsType(labels)  : return load_flags(cx, ptr, labels)
     case OwnType()          : return lift_own(cx, load_int(cx, ptr, 4), t)
     case BorrowType()       : return lift_borrow(cx, load_int(cx, ptr, 4), t)
+    case StreamType(t)      : return lift_stream(cx, load_int(cx, ptr, 4), t)
+    case FutureType(t)      : return lift_future(cx, load_int(cx, ptr, 4), t)
 
 def load_int(cx, ptr, nbytes, signed = False):
   return int.from_bytes(cx.opts.memory[ptr : ptr+nbytes], 'little', signed=signed)
@@ -840,6 +1068,32 @@ def lift_borrow(cx, i, t):
     trap_if(cx.task is not h.scope)
   return h.rep
 
+def lift_stream(cx, i, t):
+  return lift_async_value(ReadableStreamHandle, WritableStreamHandle, cx, i, t)
+
+def lift_future(cx, i, t):
+  v = lift_async_value(ReadableFutureHandle, WritableFutureHandle, cx, i, t)
+  trap_if(v.closed())
+  return v
+
+def lift_async_value(ReadableHandleT, WritableHandleT, cx, i, t):
+  h = cx.inst.waitables.get(i)
+  match h:
+    case ReadableHandleT():
+      trap_if(h.copying_buffer)
+      trap_if(contains_borrow(t) and cx.task is not h.cx)
+      h.cx.todo -= 1
+      cx.inst.waitables.remove(i)
+    case WritableHandleT():
+      trap_if(h.cx is not None)
+      assert(not h.copying_buffer)
+      h.cx = cx
+      h.cx.todo += 1
+    case _:
+      trap()
+  trap_if(h.t != t)
+  return h.stream
+
 ### Storing
 
 def store(cx, v, t, ptr):
@@ -865,6 +1119,8 @@ def store(cx, v, t, ptr):
     case FlagsType(labels)  : store_flags(cx, v, ptr, labels)
     case OwnType()          : store_int(cx, lower_own(cx, v, t), ptr, 4)
     case BorrowType()       : store_int(cx, lower_borrow(cx, v, t), ptr, 4)
+    case StreamType(t)      : store_int(cx, lower_stream(cx, v, t), ptr, 4)
+    case FutureType(t)      : store_int(cx, lower_future(cx, v, t), ptr, 4)
 
 def store_int(cx, v, ptr, nbytes, signed = False):
   cx.opts.memory[ptr : ptr+nbytes] = int.to_bytes(v, nbytes, 'little', signed=signed)
@@ -1124,9 +1380,29 @@ def lower_borrow(cx, rep, t):
   if cx.inst is t.rt.impl:
     return rep
   h = ResourceHandle(rep, own=False, scope=cx)
-  cx.need_to_drop += 1
+  cx.todo += 1
   return cx.inst.resources.add(t.rt, h)
 
+def lower_stream(cx, v, t):
+  return lower_async_value(ReadableStreamHandle, WritableStreamHandle, cx, v, t)
+
+def lower_future(cx, v, t):
+  assert(not v.closed())
+  return lower_async_value(ReadableFutureHandle, WritableFutureHandle, cx, v, t)
+
+def lower_async_value(ReadableHandleT, WritableHandleT, cx, v, t):
+  assert(isinstance(v, ReadableStream))
+  if isinstance(v, WritableHandleT) and cx.inst is v.cx.inst:
+    i = cx.inst.waitables.array.index(v)
+    v.cx.todo -= 1
+    v.cx = None
+    assert(2**31 > Table.MAX_LENGTH >= i)
+    return i | (2**31)
+  else:
+    h = ReadableHandleT(v, t, cx)
+    h.cx.todo += 1
+    return cx.inst.waitables.add(h)
+
 ### Flattening
 
 MAX_FLAT_PARAMS = 16
@@ -1179,6 +1455,7 @@ def flatten_type(t):
     case VariantType(cases)               : return flatten_variant(cases)
     case FlagsType(labels)                : return ['i32']
     case OwnType() | BorrowType()         : return ['i32']
+    case StreamType() | FutureType()      : return ['i32']
 
 def flatten_list(elem_type, maybe_length):
   if maybe_length is not None:
@@ -1245,6 +1522,8 @@ def lift_flat(cx, vi, t):
     case FlagsType(labels)  : return lift_flat_flags(vi, labels)
     case OwnType()          : return lift_own(cx, vi.next('i32'), t)
     case BorrowType()       : return lift_borrow(cx, vi.next('i32'), t)
+    case StreamType(t)      : return lift_stream(cx, vi.next('i32'), t)
+    case FutureType(t)      : return lift_future(cx, vi.next('i32'), t)
 
 def lift_flat_unsigned(vi, core_width, t_width):
   i = vi.next('i' + str(core_width))
@@ -1336,6 +1615,8 @@ def lower_flat(cx, v, t):
     case FlagsType(labels)  : return lower_flat_flags(v, labels)
     case OwnType()          : return [lower_own(cx, v, t)]
     case BorrowType()       : return [lower_borrow(cx, v, t)]
+    case StreamType(t)      : return [lower_stream(cx, v, t)]
+    case FutureType(t)      : return [lower_future(cx, v, t)]
 
 def lower_flat_signed(i, core_bits):
   if i < 0:
@@ -1438,10 +1719,11 @@ async def canon_lift(opts, inst, ft, callee, caller, on_start, on_return, on_blo
   assert(types_match_values(flat_ft.params, flat_args))
   if opts.sync:
     flat_results = await call_and_trap_on_throw(callee, task, flat_args)
-    assert(types_match_values(flat_ft.results, flat_results))
-    task.return_(flat_results)
-    if opts.post_return is not None:
-      [] = await call_and_trap_on_throw(opts.post_return, task, flat_results)
+    if not opts.always_task_return:
+      assert(types_match_values(flat_ft.results, flat_results))
+      task.return_(flat_results)
+      if opts.post_return is not None:
+        [] = await call_and_trap_on_throw(opts.post_return, task, flat_results)
   else:
     if not opts.callback:
       [] = await call_and_trap_on_throw(callee, task, flat_args)
@@ -1454,10 +1736,10 @@ async def canon_lift(opts, inst, ft, callee, caller, on_start, on_return, on_blo
         ctx = packed_ctx & ~1
         if is_yield:
           await task.yield_()
-          event, payload = (EventCode.YIELDED, 0)
+          event, p1, p2 = (EventCode.YIELDED, 0, 0)
         else:
-          event, payload = await task.wait()
-        [packed_ctx] = await call_and_trap_on_throw(opts.callback, task, [ctx, event, payload])
+          event, p1, p2 = await task.wait()
+        [packed_ctx] = await call_and_trap_on_throw(opts.callback, task, [ctx, event, p1, p2])
   task.exit()
 
 async def call_and_trap_on_throw(callee, task, args):
@@ -1474,6 +1756,7 @@ async def canon_lower(opts, ft, callee, task, flat_args):
   assert(types_match_values(flat_ft.params, flat_args))
   subtask = Subtask(opts, ft, task, flat_args)
   if opts.sync:
+    assert(not contains_async_value(ft))
     await task.call_sync(callee, task, subtask.on_start, subtask.on_return)
     flat_results = subtask.finish()
   else:
@@ -1483,8 +1766,8 @@ async def do_call(on_block):
     match await call_and_handle_blocking(do_call):
       case Blocked():
         subtask.notify_supertask = True
-        task.need_to_drop += 1
-        i = task.inst.async_subtasks.add(subtask)
+        task.todo += 1
+        i = task.inst.waitables.add(subtask)
         assert(0 < i <= Table.MAX_LENGTH < 2**30)
         assert(0 <= int(subtask.state) < 2**2)
         flat_results = [i | (int(subtask.state) << 30)]
@@ -1524,7 +1807,7 @@ async def canon_resource_drop(rt, sync, task, i):
       else:
         task.trap_if_on_the_stack(rt.impl)
   else:
-    h.scope.need_to_drop -= 1
+    h.scope.todo -= 1
   return flat_results
 
 ### `canon resource.rep`
@@ -1544,7 +1827,7 @@ async def canon_task_backpressure(task, flat_args):
 
 async def canon_task_return(task, core_ft, flat_args):
   trap_if(not task.inst.may_leave)
-  trap_if(task.opts.sync)
+  trap_if(task.opts.sync and not task.opts.always_task_return)
   trap_if(core_ft != flatten_functype(CanonicalOptions(), FuncType(task.ft.results, []), 'lower'))
   task.return_(flat_args)
   return []
@@ -1554,9 +1837,10 @@ async def canon_task_return(task, core_ft, flat_args):
 async def canon_task_wait(opts, task, ptr):
   trap_if(not task.inst.may_leave)
   trap_if(task.opts.callback is not None)
-  event, payload = await task.wait()
+  event, p1, p2 = await task.wait()
   cx = CallContext(opts, task.inst, task)
-  store(cx, payload, U32Type(), ptr)
+  store(cx, p1, U32Type(), ptr)
+  store(cx, p2, U32Type(), ptr + 4)
   return [event]
 
 ### ๐Ÿ”€ `canon task.poll`
@@ -1567,7 +1851,7 @@ async def canon_task_poll(opts, task, ptr):
   if ret is None:
     return [0]
   cx = CallContext(opts, task.inst, task)
-  store(cx, ret, TupleType([U32Type(), U32Type()]), ptr)
+  store(cx, ret, TupleType([U32Type(), U32Type(), U32Type()]), ptr)
   return [1]
 
 ### ๐Ÿ”€ `canon task.yield`
@@ -1578,9 +1862,109 @@ async def canon_task_yield(task):
   await task.yield_()
   return []
 
-### ๐Ÿ”€ `canon subtask.drop`
+### ๐Ÿ”€ `canon {stream,future}.new`
+
+async def canon_stream_new(elem_type, task):
+  trap_if(not task.inst.may_leave)
+  h = WritableStreamHandle(elem_type)
+  return [ task.inst.waitables.add(h) ]
+
+async def canon_future_new(t, task):
+  trap_if(not task.inst.may_leave)
+  h = WritableFutureHandle(t)
+  return [ task.inst.waitables.add(h) ]
+
+### ๐Ÿ”€ `canon {stream,future}.{read,write}`
+
+async def canon_stream_read(task, i, ptr, n):
+  return await async_copy(ReadableStreamHandle, WritableBufferGuestImpl,
+                          EventCode.STREAM_READ, task, i, ptr, n)
+
+async def canon_stream_write(task, i, ptr, n):
+  return await async_copy(WritableStreamHandle, ReadableBufferGuestImpl,
+                          EventCode.STREAM_WRITE, task, i, ptr, n)
+
+async def canon_future_read(task, i, ptr):
+  return await async_copy(ReadableFutureHandle, WritableBufferGuestImpl,
+                          EventCode.FUTURE_READ, task, i, ptr, 1)
+
+async def canon_future_write(task, i, ptr):
+  return await async_copy(WritableFutureHandle, ReadableBufferGuestImpl,
+                          EventCode.FUTURE_WRITE, task, i, ptr, 1)
+
+async def async_copy(HandleT, BufferT, event_code, task, i, ptr, n):
+  trap_if(not task.inst.may_leave)
+  h = task.inst.waitables.get(i)
+  trap_if(not isinstance(h, HandleT))
+  trap_if(not h.cx)
+  trap_if(h.copying_buffer)
+  buffer = BufferT(h.cx, h.t, ptr, n)
+  if h.stream.closed():
+    flat_results = [CLOSED]
+  else:
+    async def do_copy(on_block):
+      await h.copy(buffer, on_block)
+      def copy_event():
+        if h.copying_buffer is buffer:
+          h.copying_buffer = None
+          return (event_code, i, pack_async_copy_result(buffer, h))
+        else:
+          return None
+      h.cx.task.notify(copy_event)
+    match await call_and_handle_blocking(do_copy):
+      case Blocked():
+        h.copying_buffer = buffer
+        flat_results = [BLOCKED]
+      case Returned():
+        flat_results = [pack_async_copy_result(buffer, h)]
+  return flat_results
+
+BLOCKED = 0xffff_ffff
+CLOSED  = 0x8000_0000
+
+def pack_async_copy_result(buffer, h):
+  assert(buffer.progress <= Buffer.MAX_LENGTH < CLOSED < BLOCKED)
+  if buffer.progress:
+    return buffer.progress
+  if h.stream.closed():
+    return CLOSED
+  return 0
+
+### ๐Ÿ”€ `canon {stream,future}.cancel-{read,write}`
+
+async def canon_stream_cancel_read(sync, task, i):
+  return await cancel_async_copy(ReadableStreamHandle, sync, task, i)
+
+async def canon_stream_cancel_write(sync, task, i):
+  return await cancel_async_copy(WritableStreamHandle, sync, task, i)
+
+async def canon_future_cancel_read(sync, task, i):
+  return await cancel_async_copy(ReadableFutureHandle, sync, task, i)
+
+async def canon_future_cancel_write(sync, task, i):
+  return await cancel_async_copy(WritableFutureHandle, sync, task, i)
+
+async def cancel_async_copy(HandleT, sync, task, i):
+  trap_if(not task.inst.may_leave)
+  h = task.inst.waitables.get(i)
+  trap_if(not isinstance(h, HandleT))
+  trap_if(not h.copying_buffer)
+  if sync:
+    await task.call_sync(h.cancel_copy, h.copying_buffer)
+    flat_results = [h.copying_buffer.progress]
+    h.copying_buffer = None
+  else:
+    match await call_and_handle_blocking(h.cancel_copy, h.copying_buffer):
+      case Blocked():
+        flat_results = [BLOCKED]
+      case Returned():
+        flat_results = [h.copying_buffer.progress]
+        h.copying_buffer = None
+  return flat_results
+
+### ๐Ÿ”€ `canon waitable.drop`
 
-async def canon_subtask_drop(task, i):
+async def canon_waitable_drop(task, i):
   trap_if(not task.inst.may_leave)
-  task.inst.async_subtasks.remove(i).drop()
+  task.inst.waitables.remove(i).drop()
   return []
diff --git a/design/mvp/canonical-abi/run_tests.py b/design/mvp/canonical-abi/run_tests.py
index 653a76d4..0602d788 100644
--- a/design/mvp/canonical-abi/run_tests.py
+++ b/design/mvp/canonical-abi/run_tests.py
@@ -29,18 +29,18 @@ def realloc(self, original_ptr, original_size, alignment, new_size):
     ret = align_to(self.last_alloc, alignment)
     self.last_alloc = ret + new_size
     if self.last_alloc > len(self.memory):
-      print('oom: have {} need {}'.format(len(self.memory), self.last_alloc))
       trap()
     self.memory[ret : ret + original_size] = self.memory[original_ptr : original_ptr + original_size]
     return ret
 
-def mk_opts(memory = bytearray(), encoding = 'utf8', realloc = None, post_return = None):
+def mk_opts(memory = bytearray(), encoding = 'utf8', realloc = None, post_return = None, sync_task_return = False, sync = True):
   opts = CanonicalOptions()
   opts.memory = memory
   opts.string_encoding = encoding
   opts.realloc = realloc
   opts.post_return = post_return
-  opts.sync = True
+  opts.sync_task_return = sync_task_return
+  opts.sync = sync
   opts.callback = None
   return opts
 
@@ -59,6 +59,9 @@ def mk_tup_rec(x):
     return x
   return { str(i):mk_tup_rec(v) for i,v in enumerate(a) }
 
+def unpack_lower_result(ret):
+  return (ret & ~(3 << 30), ret >> 30)
+
 def fail(msg):
   raise BaseException(msg)
 
@@ -361,56 +364,59 @@ def test_flatten(t, params, results):
 test_flatten(FuncType([U8Type() for _ in range(17)],[]), ['i32' for _ in range(17)], [])
 test_flatten(FuncType([U8Type() for _ in range(17)],[TupleType([U8Type(),U8Type()])]), ['i32' for _ in range(17)], ['i32','i32'])
 
-def test_roundtrip(t, v):
-  before = definitions.MAX_FLAT_RESULTS
-  definitions.MAX_FLAT_RESULTS = 16
 
-  ft = FuncType([t],[t])
-  async def callee(task, x):
-    return x
+async def test_roundtrips():
+  async def test_roundtrip(t, v):
+    before = definitions.MAX_FLAT_RESULTS
+    definitions.MAX_FLAT_RESULTS = 16
 
-  callee_heap = Heap(1000)
-  callee_opts = mk_opts(callee_heap.memory, 'utf8', callee_heap.realloc)
-  callee_inst = ComponentInstance()
-  lifted_callee = partial(canon_lift, callee_opts, callee_inst, ft, callee)
+    ft = FuncType([t],[t])
+    async def callee(task, x):
+      return x
 
-  caller_heap = Heap(1000)
-  caller_opts = mk_opts(caller_heap.memory, 'utf8', caller_heap.realloc)
-  caller_inst = ComponentInstance()
-  caller_task = Task(caller_opts, caller_inst, ft, None, None, None)
+    callee_heap = Heap(1000)
+    callee_opts = mk_opts(callee_heap.memory, 'utf8', callee_heap.realloc)
+    callee_inst = ComponentInstance()
+    lifted_callee = partial(canon_lift, callee_opts, callee_inst, ft, callee)
 
-  flat_args = asyncio.run(caller_task.enter(lambda: [v]))
+    caller_heap = Heap(1000)
+    caller_opts = mk_opts(caller_heap.memory, 'utf8', caller_heap.realloc)
+    caller_inst = ComponentInstance()
+    caller_task = Task(caller_opts, caller_inst, ft, None, None, None)
 
-  return_in_heap = len(flatten_types([t])) > definitions.MAX_FLAT_RESULTS
-  if return_in_heap:
-    flat_args += [ caller_heap.realloc(0, 0, alignment(t), elem_size(t)) ]
+    flat_args = await caller_task.enter(lambda: [v])
 
-  flat_results = asyncio.run(canon_lower(caller_opts, ft, lifted_callee, caller_task, flat_args))
+    return_in_heap = len(flatten_types([t])) > definitions.MAX_FLAT_RESULTS
+    if return_in_heap:
+      flat_args += [ caller_heap.realloc(0, 0, alignment(t), elem_size(t)) ]
 
-  if return_in_heap:
-    flat_results = [ flat_args[-1] ]
+    flat_results = await canon_lower(caller_opts, ft, lifted_callee, caller_task, flat_args)
 
-  [got] = lift_flat_values(caller_task, definitions.MAX_FLAT_PARAMS, CoreValueIter(flat_results), [t])
-  caller_task.exit()
+    if return_in_heap:
+      flat_results = [ flat_args[-1] ]
 
-  if got != v:
-    fail("test_roundtrip({},{}) got {}".format(t, v, got))
+    [got] = lift_flat_values(caller_task, definitions.MAX_FLAT_PARAMS, CoreValueIter(flat_results), [t])
+    caller_task.exit()
 
-  definitions.MAX_FLAT_RESULTS = before
+    if got != v:
+      fail("test_roundtrip({},{}) got {}".format(t, v, got))
+
+    definitions.MAX_FLAT_RESULTS = before
+
+  await test_roundtrip(S8Type(), -1)
+  await test_roundtrip(TupleType([U16Type(),U16Type()]), mk_tup(3,4))
+  await test_roundtrip(ListType(StringType()), [mk_str("hello there")])
+  await test_roundtrip(ListType(ListType(StringType())), [[mk_str("one"),mk_str("two")],[mk_str("three")]])
+  await test_roundtrip(ListType(OptionType(TupleType([StringType(),U16Type()]))), [{'some':mk_tup(mk_str("answer"),42)}])
+  await test_roundtrip(VariantType([CaseType('x', TupleType([U32Type(),U32Type(),U32Type(),U32Type(),
+                                                             U32Type(),U32Type(),U32Type(),U32Type(),
+                                                             U32Type(),U32Type(),U32Type(),U32Type(),
+                                                             U32Type(),U32Type(),U32Type(),U32Type(),
+                                                             StringType()]))]),
+                       {'x': mk_tup(1,2,3,4, 5,6,7,8, 9,10,11,12, 13,14,15,16, mk_str("wat"))})
 
-test_roundtrip(S8Type(), -1)
-test_roundtrip(TupleType([U16Type(),U16Type()]), mk_tup(3,4))
-test_roundtrip(ListType(StringType()), [mk_str("hello there")])
-test_roundtrip(ListType(ListType(StringType())), [[mk_str("one"),mk_str("two")],[mk_str("three")]])
-test_roundtrip(ListType(OptionType(TupleType([StringType(),U16Type()]))), [{'some':mk_tup(mk_str("answer"),42)}])
-test_roundtrip(VariantType([CaseType('x', TupleType([U32Type(),U32Type(),U32Type(),U32Type(),
-                                                     U32Type(),U32Type(),U32Type(),U32Type(),
-                                                     U32Type(),U32Type(),U32Type(),U32Type(),
-                                                     U32Type(),U32Type(),U32Type(),U32Type(),
-                                                     StringType()]))]),
-               {'x': mk_tup(1,2,3,4, 5,6,7,8, 9,10,11,12, 13,14,15,16, mk_str("wat"))})
-
-def test_handles():
+
+async def test_handles():
   before = definitions.MAX_FLAT_RESULTS
   definitions.MAX_FLAT_RESULTS = 16
 
@@ -503,7 +509,7 @@ def on_return(results):
     nonlocal got
     got = results
 
-  asyncio.run(canon_lift(opts, inst, ft, core_wasm, None, on_start, on_return, None))
+  await canon_lift(opts, inst, ft, core_wasm, None, on_start, on_return, None)
 
   assert(len(got) == 3)
   assert(got[0] == 46)
@@ -514,7 +520,6 @@ def on_return(results):
   assert(len(inst.resources.table(rt).free) == 4)
   definitions.MAX_FLAT_RESULTS = before
 
-test_handles()
 
 async def test_async_to_async():
   producer_heap = Heap(10)
@@ -550,7 +555,7 @@ async def core_blocking_producer(task, args):
     [] = await canon_task_return(task, CoreFuncType(['i32'],[]), [44])
     await task.on_block(fut3)
     return []
-  blocking_callee = partial(canon_lift, producer_opts, producer_inst, blocking_ft, core_blocking_producer) 
+  blocking_callee = partial(canon_lift, producer_opts, producer_inst, blocking_ft, core_blocking_producer)
 
   consumer_heap = Heap(10)
   consumer_opts = mk_opts(consumer_heap.memory)
@@ -564,31 +569,32 @@ async def consumer(task, args):
     u8 = consumer_heap.memory[ptr]
     assert(u8 == 43)
     [ret] = await canon_lower(consumer_opts, toggle_ft, toggle_callee, task, [])
-    assert(ret == (1 | (CallState.STARTED << 30)))
+    subi,state = unpack_lower_result(ret)
+    assert(state == CallState.STARTED)
     retp = ptr
     consumer_heap.memory[retp] = 13
     [ret] = await canon_lower(consumer_opts, blocking_ft, blocking_callee, task, [83, retp])
     assert(ret == (2 | (CallState.STARTING << 30)))
     assert(consumer_heap.memory[retp] == 13)
     fut1.set_result(None)
-    event, callidx = await task.wait()
+    event, callidx, _ = await task.wait()
     assert(event == EventCode.CALL_DONE)
     assert(callidx == 1)
-    [] = await canon_subtask_drop(task, callidx)
-    event, callidx = await task.wait()
+    [] = await canon_waitable_drop(task, callidx)
+    event, callidx, _ = await task.wait()
     assert(event == EventCode.CALL_STARTED)
     assert(callidx == 2)
     assert(consumer_heap.memory[retp] == 13)
     fut2.set_result(None)
-    event, callidx = await task.wait()
+    event, callidx, _ = await task.wait()
     assert(event == EventCode.CALL_RETURNED)
     assert(callidx == 2)
     assert(consumer_heap.memory[retp] == 44)
     fut3.set_result(None)
-    event, callidx = await task.wait()
+    event, callidx, _ = await task.wait()
     assert(event == EventCode.CALL_DONE)
     assert(callidx == 2)
-    [] = await canon_subtask_drop(task, callidx)
+    [] = await canon_waitable_drop(task, callidx)
 
     dtor_fut = asyncio.Future()
     dtor_value = None
@@ -607,10 +613,10 @@ async def dtor(task, args):
     assert(ret == (2 | (CallState.STARTED << 30)))
     assert(dtor_value is None)
     dtor_fut.set_result(None)
-    event, callidx = await task.wait()
+    event, callidx, _ = await task.wait()
     assert(event == CallState.DONE)
     assert(callidx == 2)
-    [] = await canon_subtask_drop(task, callidx)
+    [] = await canon_waitable_drop(task, callidx)
 
     [] = await canon_task_return(task, CoreFuncType(['i32'],[]), [42])
     return []
@@ -630,7 +636,6 @@ def on_return(results):
   assert(len(got) == 1)
   assert(got[0] == 42)
 
-asyncio.run(test_async_to_async())
 
 async def test_async_callback():
   producer_inst = ComponentInstance()
@@ -664,22 +669,25 @@ async def consumer(task, args):
     return [42]
 
   async def callback(task, args):
-    assert(len(args) == 3)
+    assert(len(args) == 4)
     if args[0] == 42:
       assert(args[1] == EventCode.CALL_DONE)
       assert(args[2] == 1)
-      await canon_subtask_drop(task, 1)
+      assert(args[3] == 0)
+      await canon_waitable_drop(task, 1)
       return [53]
     elif args[0] == 52:
       assert(args[1] == EventCode.YIELDED)
       assert(args[2] == 0)
+      assert(args[3] == 0)
       fut2.set_result(None)
       return [62]
     else:
       assert(args[0] == 62)
       assert(args[1] == EventCode.CALL_DONE)
       assert(args[2] == 2)
-      await canon_subtask_drop(task, 2)
+      assert(args[3] == 0)
+      await canon_waitable_drop(task, 2)
       [] = await canon_task_return(task, CoreFuncType(['i32'],[]), [83])
       return [0]
 
@@ -698,7 +706,6 @@ def on_return(results):
   await canon_lift(opts, consumer_inst, consumer_ft, consumer, None, on_start, on_return)
   assert(got[0] == 83)
 
-asyncio.run(test_async_callback())
 
 async def test_async_to_sync():
   producer_opts = CanonicalOptions()
@@ -742,19 +749,19 @@ async def consumer(task, args):
 
     fut.set_result(None)
     assert(producer1_done == False)
-    event, callidx = await task.wait()
+    event, callidx, _ = await task.wait()
     assert(event == EventCode.CALL_DONE)
     assert(callidx == 1)
-    await canon_subtask_drop(task, callidx)
+    await canon_waitable_drop(task, callidx)
     assert(producer1_done == True)
 
     assert(producer2_done == False)
     await canon_task_yield(task)
     assert(producer2_done == True)
-    event, callidx = await task.poll()
+    event, callidx, _ = await task.poll()
     assert(event == EventCode.CALL_DONE)
     assert(callidx == 2)
-    await canon_subtask_drop(task, callidx)
+    await canon_waitable_drop(task, callidx)
     assert(producer2_done == True)
 
     assert(await task.poll() is None)
@@ -773,7 +780,6 @@ def on_return(results):
   await canon_lift(consumer_opts, consumer_inst, consumer_ft, consumer, None, on_start, on_return)
   assert(got[0] == 83)
 
-asyncio.run(test_async_to_sync())
 
 async def test_async_backpressure():
   producer_opts = CanonicalOptions()
@@ -821,18 +827,18 @@ async def consumer(task, args):
     fut.set_result(None)
     assert(producer1_done == False)
     assert(producer2_done == False)
-    event, callidx = await task.wait()
+    event, callidx, _ = await task.wait()
     assert(event == EventCode.CALL_DONE)
     assert(callidx == 1)
     assert(producer1_done == True)
     assert(producer2_done == True)
-    event, callidx = await task.poll()
+    event, callidx, _ = await task.poll()
     assert(event == EventCode.CALL_DONE)
     assert(callidx == 2)
     assert(producer2_done == True)
 
-    await canon_subtask_drop(task, 1)
-    await canon_subtask_drop(task, 2)
+    await canon_waitable_drop(task, 1)
+    await canon_waitable_drop(task, 2)
 
     assert(await task.poll() is None)
 
@@ -850,8 +856,6 @@ def on_return(results):
   await canon_lift(consumer_opts, consumer_inst, consumer_ft, consumer, None, on_start, on_return)
   assert(got[0] == 84)
 
-if definitions.DETERMINISTIC_PROFILE:
-  asyncio.run(test_async_backpressure())
 
 async def test_sync_using_wait():
   hostcall_opts = mk_opts()
@@ -880,16 +884,16 @@ async def core_func(task, args):
     assert(ret == (2 | (CallState.STARTED << 30)))
 
     fut1.set_result(None)
-    event, callidx = await task.wait()
+    event, callidx, _ = await task.wait()
     assert(event == EventCode.CALL_DONE)
     assert(callidx == 1)
     fut2.set_result(None)
-    event, callidx = await task.wait()
+    event, callidx, _ = await task.wait()
     assert(event == EventCode.CALL_DONE)
     assert(callidx == 2)
 
-    await canon_subtask_drop(task, 1)
-    await canon_subtask_drop(task, 2)
+    await canon_waitable_drop(task, 1)
+    await canon_waitable_drop(task, 2)
 
     return []
 
@@ -898,6 +902,786 @@ def on_start(): return []
   def on_return(results): pass
   await canon_lift(mk_opts(), inst, ft, core_func, None, on_start, on_return)
 
-asyncio.run(test_sync_using_wait())
+
+class HostSource(ReadableStream):
+  remaining: list[int]
+  destroy_if_empty: bool
+  chunk: int
+  waiting: Optional[asyncio.Future]
+  eager_cancel: asyncio.Event
+
+  def __init__(self, contents, chunk, destroy_if_empty = True):
+    self.remaining = contents
+    self.destroy_if_empty = destroy_if_empty
+    self.chunk = chunk
+    self.waiting = None
+    self.eager_cancel = asyncio.Event()
+    self.eager_cancel.set()
+
+  def closed(self):
+    return not self.remaining and self.destroy_if_empty
+
+  def wake_waiting(self, cancelled = False):
+    if self.waiting:
+      self.waiting.set_result(cancelled)
+      self.waiting = None
+
+  def close(self):
+    self.remaining = []
+    self.destroy_if_empty = True
+    self.wake_waiting()
+
+  def destroy_once_empty(self):
+    self.destroy_if_empty = True
+    if self.closed():
+      self.wake_waiting()
+
+  async def read(self, dst, on_block):
+    if not self.remaining:
+      if self.closed():
+        return
+      self.waiting = asyncio.Future()
+      cancelled = await on_block(self.waiting)
+      if cancelled or self.closed():
+        return
+      assert(self.remaining)
+    n = min(dst.remain(), len(self.remaining), self.chunk)
+    dst.lower(self.remaining[:n])
+    del self.remaining[:n]
+
+  async def cancel_read(self, dst, on_block):
+    await on_block(self.eager_cancel.wait())
+    self.wake_waiting(True)
+
+  def write(self, vs):
+    assert(vs and not self.closed())
+    self.remaining += vs
+    self.wake_waiting()
+
+  def maybe_writer_handle_index(self, inst):
+    return None
+
+class HostSink:
+  stream: ReadableStream
+  received: list[int]
+  chunk: int
+  write_remain: int
+  write_event: asyncio.Event
+  ready_to_consume: asyncio.Event
+
+  def __init__(self, stream, chunk, remain = 2**64):
+    self.stream = stream
+    self.received = []
+    self.chunk = chunk
+    self.write_remain = remain
+    self.write_event = asyncio.Event()
+    if remain:
+      self.write_event.set()
+    self.ready_to_consume = asyncio.Event()
+    async def read_all():
+      while not self.stream.closed():
+        async def on_block(f):
+          return await f
+        await self.write_event.wait()
+        await self.stream.read(self, on_block)
+      self.ready_to_consume.set()
+    asyncio.create_task(read_all())
+
+  def set_remain(self, n):
+    self.write_remain = n
+    if self.write_remain > 0:
+      self.write_event.set()
+
+  def remain(self):
+    return self.write_remain
+
+  def lower(self, vs):
+    self.received += vs
+    self.ready_to_consume.set()
+    self.write_remain -= len(vs)
+    if self.write_remain == 0:
+      self.write_event.clear()
+
+  async def consume(self, n):
+    while n > len(self.received):
+      self.ready_to_consume.clear()
+      await self.ready_to_consume.wait()
+      if self.stream.closed():
+        return None
+    ret = self.received[:n];
+    del self.received[:n]
+    return ret
+
+async def test_eager_stream_completion():
+  ft = FuncType([StreamType(U8Type())], [StreamType(U8Type())])
+  inst = ComponentInstance()
+  mem = bytearray(20)
+  opts = mk_opts(memory=mem, sync=False)
+
+  async def host_import(task, on_start, on_return, on_block):
+    args = on_start()
+    assert(len(args) == 1)
+    assert(isinstance(args[0], ReadableStream))
+    incoming = HostSink(args[0], chunk=4)
+    outgoing = HostSource([], chunk=4, destroy_if_empty=False)
+    on_return([outgoing])
+    async def add10():
+      while (vs := await incoming.consume(4)):
+        for i in range(len(vs)):
+          vs[i] += 10
+        outgoing.write(vs)
+      outgoing.close()
+    asyncio.create_task(add10())
+
+  src_stream = HostSource([1,2,3,4,5,6,7,8], chunk=4)
+  def on_start():
+    return [src_stream]
+
+  dst_stream = None
+  def on_return(results):
+    assert(len(results) == 1)
+    nonlocal dst_stream
+    dst_stream = HostSink(results[0], chunk=4)
+
+  async def core_func(task, args):
+    assert(len(args) == 1)
+    rsi1 = args[0]
+    assert(rsi1 == 1)
+    [wsi1] = await canon_stream_new(U8Type(), task)
+    [] = await canon_task_return(task, CoreFuncType(['i32'],[]), [wsi1])
+    [ret] = await canon_stream_read(task, rsi1, 0, 4)
+    assert(ret == 4)
+    assert(mem[0:4] == b'\x01\x02\x03\x04')
+    [wsi2] = await canon_stream_new(U8Type(), task)
+    retp = 12
+    [ret] = await canon_lower(opts, ft, host_import, task, [wsi2, retp])
+    assert(ret == 0)
+    rsi2 = mem[retp]
+    [ret] = await canon_stream_write(task, wsi2, 0, 4)
+    assert(ret == 4)
+    [ret] = await canon_stream_read(task, rsi2, 0, 4)
+    assert(ret == 4)
+    [ret] = await canon_stream_write(task, wsi1, 0, 4)
+    assert(ret == 4)
+    [ret] = await canon_stream_read(task, rsi1, 0, 4)
+    assert(ret == 4)
+    [ret] = await canon_stream_read(task, rsi1, 0, 4)
+    assert(ret == definitions.CLOSED)
+    assert(mem[0:4] == b'\x05\x06\x07\x08')
+    [ret] = await canon_stream_write(task, wsi2, 0, 4)
+    assert(ret == 4)
+    [ret] = await canon_stream_read(task, rsi2, 0, 4)
+    assert(ret == 4)
+    [ret] = await canon_stream_write(task, wsi1, 0, 4)
+    assert(ret == 4)
+    [] = await canon_waitable_drop(task, rsi1)
+    [] = await canon_waitable_drop(task, rsi2)
+    [] = await canon_waitable_drop(task, wsi1)
+    [] = await canon_waitable_drop(task, wsi2)
+    return []
+
+  await canon_lift(opts, inst, ft, core_func, None, on_start, on_return)
+  assert(dst_stream.received == [11,12,13,14,15,16,17,18])
+
+
+async def test_async_stream_ops():
+  ft = FuncType([StreamType(U8Type())], [StreamType(U8Type())])
+  inst = ComponentInstance()
+  mem = bytearray(20)
+  opts = mk_opts(memory=mem, sync=False)
+
+  host_import_incoming = None
+  host_import_outgoing = None
+  async def host_import(task, on_start, on_return, on_block):
+    nonlocal host_import_incoming, host_import_outgoing
+    args = on_start()
+    assert(len(args) == 1)
+    assert(isinstance(args[0], ReadableStream))
+    host_import_incoming = HostSink(args[0], chunk=4, remain = 0)
+    host_import_outgoing = HostSource([], chunk=4, destroy_if_empty=False)
+    on_return([host_import_outgoing])
+    while not host_import_incoming.stream.closed():
+      vs = await on_block(host_import_incoming.consume(4))
+      for i in range(len(vs)):
+        vs[i] += 10
+      host_import_outgoing.write(vs)
+    host_import_outgoing.destroy_once_empty()
+
+  src_stream = HostSource([], chunk=4, destroy_if_empty = False)
+  def on_start():
+    return [src_stream]
+
+  dst_stream = None
+  def on_return(results):
+    assert(len(results) == 1)
+    nonlocal dst_stream
+    dst_stream = HostSink(results[0], chunk=4, remain = 0)
+
+  async def core_func(task, args):
+    [rsi1] = args
+    assert(rsi1 == 1)
+    [wsi1] = await canon_stream_new(U8Type(), task)
+    [] = await canon_task_return(task, CoreFuncType(['i32'],[]), [wsi1])
+    [ret] = await canon_stream_read(task, rsi1, 0, 4)
+    assert(ret == definitions.BLOCKED)
+    src_stream.write([1,2,3,4])
+    event, p1, p2 = await task.wait()
+    assert(event == EventCode.STREAM_READ)
+    assert(p1 == rsi1)
+    assert(p2 == 4)
+    assert(mem[0:4] == b'\x01\x02\x03\x04')
+    [wsi2] = await canon_stream_new(U8Type(), task)
+    retp = 16
+    [ret] = await canon_lower(opts, ft, host_import, task, [wsi2, retp])
+    subi,state = unpack_lower_result(ret)
+    assert(state == CallState.RETURNED)
+    rsi2 = mem[16]
+    assert(rsi2 == 4)
+    [ret] = await canon_stream_write(task, wsi2, 0, 4)
+    assert(ret == definitions.BLOCKED)
+    host_import_incoming.set_remain(100)
+    event, p1, p2 = await task.wait()
+    assert(event == EventCode.STREAM_WRITE)
+    assert(p1 == wsi2)
+    assert(p2 == 4)
+    [ret] = await canon_stream_read(task, rsi2, 0, 4)
+    assert(ret == 4)
+    [ret] = await canon_stream_write(task, wsi1, 0, 4)
+    assert(ret == definitions.BLOCKED)
+    dst_stream.set_remain(100)
+    event, p1, p2 = await task.wait()
+    assert(event == EventCode.STREAM_WRITE)
+    assert(p1 == wsi1)
+    assert(p2 == 4)
+    src_stream.write([5,6,7,8])
+    src_stream.destroy_once_empty()
+    [ret] = await canon_stream_read(task, rsi1, 0, 4)
+    assert(ret == 4)
+    [ret] = await canon_stream_read(task, rsi1, 0, 4)
+    assert(ret == definitions.CLOSED)
+    [] = await canon_waitable_drop(task, rsi1)
+    assert(mem[0:4] == b'\x05\x06\x07\x08')
+    [ret] = await canon_stream_write(task, wsi2, 0, 4)
+    assert(ret == 4)
+    [] = await canon_waitable_drop(task, wsi2)
+    [ret] = await canon_stream_read(task, rsi2, 0, 4)
+    assert(ret == definitions.BLOCKED)
+    event, p1, p2 = await task.wait()
+    assert(event == EventCode.CALL_DONE)
+    assert(p1 == subi)
+    assert(p2 == 0)
+    event, p1, p2 = await task.wait()
+    assert(event == EventCode.STREAM_READ)
+    assert(p1 == rsi2)
+    assert(p2 == 4)
+    [ret] = await canon_stream_read(task, rsi2, 0, 4)
+    assert(ret == definitions.CLOSED)
+    [] = await canon_waitable_drop(task, rsi2)
+    [] = await canon_waitable_drop(task, subi)
+    [ret] = await canon_stream_write(task, wsi1, 0, 4)
+    assert(ret == 4)
+    [] = await canon_waitable_drop(task, wsi1)
+    return []
+
+  await canon_lift(opts, inst, ft, core_func, None, on_start, on_return)
+  assert(dst_stream.received == [11,12,13,14,15,16,17,18])
+
+
+async def test_stream_forward():
+  src_stream = HostSource([1,2,3,4], chunk=4)
+  def on_start():
+    return [src_stream]
+
+  dst_stream = None
+  def on_return(results):
+    assert(len(results) == 1)
+    nonlocal dst_stream
+    dst_stream = results[0]
+
+  async def core_func(task, args):
+    assert(len(args) == 1)
+    rsi1 = args[0]
+    assert(rsi1 == 1)
+    return [rsi1]
+
+  opts = mk_opts()
+  inst = ComponentInstance()
+  ft = FuncType([StreamType(U8Type())], [StreamType(U8Type())])
+  await canon_lift(opts, inst, ft, core_func, None, on_start, on_return)
+  assert(src_stream is dst_stream)
+
+
+async def test_receive_own_stream():
+  inst = ComponentInstance()
+  mem = bytearray(20)
+  opts = mk_opts(memory=mem, sync=False)
+
+  host_ft = FuncType([StreamType(U8Type())], [StreamType(U8Type())])
+  async def host_import(task, on_start, on_return, on_block):
+    args = on_start()
+    assert(len(args) == 1)
+    assert(isinstance(args[0], ReadableStream))
+    on_return(args)
+
+  async def core_func(task, args):
+    assert(len(args) == 0)
+    [wsi] = await canon_stream_new(U8Type(), task)
+    assert(wsi == 1)
+    retp = 4
+    [ret] = await canon_lower(opts, host_ft, host_import, task, [wsi, retp])
+    assert(ret == 0)
+    result = int.from_bytes(mem[retp : retp+4], 'little', signed=False)
+    assert(result == (wsi | 2**31))
+    [] = await canon_waitable_drop(task, wsi)
+    return []
+
+  def on_start(): return []
+  def on_return(results): assert(len(results) == 0)
+  ft = FuncType([],[])
+  await canon_lift(mk_opts(), inst, ft, core_func, None, on_start, on_return)
+
+
+async def test_host_partial_reads_writes():
+  mem = bytearray(20)
+  opts = mk_opts(memory=mem, sync=False)
+
+  src = HostSource([1,2,3,4], chunk=2, destroy_if_empty = False)
+  source_ft = FuncType([], [StreamType(U8Type())])
+  async def host_source(task, on_start, on_return, on_block):
+    [] = on_start()
+    on_return([src])
+
+  dst = None
+  sink_ft = FuncType([StreamType(U8Type())], [])
+  async def host_sink(task, on_start, on_return, on_block):
+    nonlocal dst
+    [s] = on_start()
+    dst = HostSink(s, chunk=1, remain=2)
+    on_return([])
+
+  async def core_func(task, args):
+    assert(len(args) == 0)
+    retp = 4
+    [ret] = await canon_lower(opts, source_ft, host_source, task, [retp])
+    assert(ret == 0)
+    rsi = mem[retp]
+    assert(rsi == 1)
+    [ret] = await canon_stream_read(task, rsi, 0, 4)
+    assert(ret == 2)
+    assert(mem[0:2] == b'\x01\x02')
+    [ret] = await canon_stream_read(task, rsi, 0, 4)
+    assert(ret == 2)
+    assert(mem[0:2] == b'\x03\x04')
+    [ret] = await canon_stream_read(task, rsi, 0, 4)
+    assert(ret == definitions.BLOCKED)
+    src.write([5,6])
+    event, p1, p2 = await task.wait()
+    assert(event == EventCode.STREAM_READ)
+    assert(p1 == rsi)
+    assert(p2 == 2)
+    [] = await canon_waitable_drop(task, rsi)
+
+    [wsi] = await canon_stream_new(U8Type(), task)
+    assert(wsi == 1)
+    [ret] = await canon_lower(opts, sink_ft, host_sink, task, [wsi])
+    assert(ret == 0)
+    mem[0:6] = b'\x01\x02\x03\x04\x05\x06'
+    [ret] = await canon_stream_write(task, wsi, 0, 6)
+    assert(ret == 2)
+    [ret] = await canon_stream_write(task, wsi, 2, 6)
+    assert(ret == definitions.BLOCKED)
+    dst.set_remain(4)
+    event, p1, p2 = await task.wait()
+    assert(event == EventCode.STREAM_WRITE)
+    assert(p1 == wsi)
+    assert(p2 == 4)
+    assert(dst.received == [1,2,3,4,5,6])
+    [] = await canon_waitable_drop(task, wsi)
+    return []
+
+  opts2 = mk_opts()
+  inst = ComponentInstance()
+  def on_start(): return []
+  def on_return(results): assert(len(results) == 0)
+  ft = FuncType([],[])
+  await canon_lift(opts2, inst, ft, core_func, None, on_start, on_return)
+
+
+async def test_wasm_to_wasm_stream():
+  fut1, fut2, fut3, fut4 = asyncio.Future(), asyncio.Future(), asyncio.Future(), asyncio.Future()
+
+  inst1 = ComponentInstance()
+  mem1 = bytearray(10)
+  opts1 = mk_opts(memory=mem1, sync=False)
+  ft1 = FuncType([], [StreamType(U8Type())])
+  async def core_func1(task, args):
+    assert(not args)
+    [wsi] = await canon_stream_new(U8Type(), task)
+    [] = await canon_task_return(task, CoreFuncType(['i32'], []), [wsi])
+
+    await task.wait_on(fut1)
+
+    mem1[0:4] = b'\x01\x02\x03\x04'
+    [ret] = await canon_stream_write(task, wsi, 0, 2)
+    assert(ret == 2)
+    [ret] = await canon_stream_write(task, wsi, 2, 2)
+    assert(ret == 2)
+
+    await task.wait_on(fut2)
+
+    mem1[0:8] = b'\x05\x06\x07\x08\x09\x0a\x0b\x0c'
+    [ret] = await canon_stream_write(task, wsi, 0, 8)
+    assert(ret == definitions.BLOCKED)
+
+    fut3.set_result(None)
+
+    event, p1, p2 = await task.wait()
+    assert(event == EventCode.STREAM_WRITE)
+    assert(p1 == wsi)
+    assert(p2 == 4)
+
+    fut4.set_result(None)
+
+    [] = await canon_waitable_drop(task, wsi)
+    return []
+
+  func1 = partial(canon_lift, opts1, inst1, ft1, core_func1)
+
+  inst2 = ComponentInstance()
+  mem2 = bytearray(10)
+  opts2 = mk_opts(memory=mem2, sync=False)
+  ft2 = FuncType([], [])
+  async def core_func2(task, args):
+    assert(not args)
+    [] = await canon_task_return(task, CoreFuncType([], []), [])
+
+    retp = 0
+    [ret] = await canon_lower(opts2, ft1, func1, task, [retp])
+    subi,state = unpack_lower_result(ret)
+    assert(state== CallState.RETURNED)
+    rsi = mem2[0]
+    assert(rsi == 1)
+
+    [ret] = await canon_stream_read(task, rsi, 0, 8)
+    assert(ret == definitions.BLOCKED)
+
+    fut1.set_result(None)
+
+    event, p1, p2 = await task.wait()
+    assert(event == EventCode.STREAM_READ)
+    assert(p1 == rsi)
+    assert(p2 == 4)
+    assert(mem2[0:8] == b'\x01\x02\x03\x04\x00\x00\x00\x00')
+
+    fut2.set_result(None)
+    await task.wait_on(fut3)
+
+    mem2[0:8] = bytes(8)
+    [ret] = await canon_stream_read(task, rsi, 0, 2)
+    assert(ret == 2)
+    assert(mem2[0:6] == b'\x05\x06\x00\x00\x00\x00')
+    [ret] = await canon_stream_read(task, rsi, 2, 2)
+    assert(ret == 2)
+    assert(mem2[0:6] == b'\x05\x06\x07\x08\x00\x00')
+
+    await task.wait_on(fut4)
+
+    [ret] = await canon_stream_read(task, rsi, 0, 2)
+    assert(ret == definitions.CLOSED)
+    [] = await canon_waitable_drop(task, rsi)
+
+    event, callidx, _ = await task.wait()
+    assert(event == EventCode.CALL_DONE)
+    assert(callidx == subi)
+    [] = await canon_waitable_drop(task, subi)
+    return []
+
+  await canon_lift(opts2, inst2, ft2, core_func2, None, lambda:[], lambda _:())
+
+
+async def test_borrow_stream():
+  rt_inst = ComponentInstance()
+  rt = ResourceType(rt_inst, None)
+
+  inst1 = ComponentInstance()
+  mem1 = bytearray(12)
+  opts1 = mk_opts(memory=mem1)
+  ft1 = FuncType([StreamType(BorrowType(rt))], [])
+  async def core_func1(task, args):
+    [rsi] = args
+
+    [ret] = await canon_stream_read(task, rsi, 4, 2)
+    assert(ret == definitions.BLOCKED)
+
+    event, p1, p2 = await task.wait()
+    assert(event == EventCode.STREAM_READ)
+    assert(p1 == rsi)
+    assert(p2 == 2)
+    [ret] = await canon_stream_read(task, rsi, 0, 2)
+    assert(ret == definitions.CLOSED)
+
+    [] = await canon_waitable_drop(task, rsi)
+
+    h1 = mem1[4]
+    h2 = mem1[8]
+    assert(await canon_resource_rep(rt, task, h1) == [42])
+    assert(await canon_resource_rep(rt, task, h2) == [43])
+    [] = await canon_resource_drop(rt, True, task, h1)
+    [] = await canon_resource_drop(rt, True, task, h2)
+
+    return []
+
+  func1 = partial(canon_lift, opts1, inst1, ft1, core_func1)
+
+  inst2 = ComponentInstance()
+  mem2 = bytearray(10)
+  sync_opts2 = mk_opts(memory=mem2, sync=True)
+  async_opts2 = mk_opts(memory=mem2, sync=False)
+  ft2 = FuncType([], [])
+  async def core_func2(task, args):
+    assert(not args)
+
+    [wsi] = await canon_stream_new(BorrowType(rt), task)
+    [ret] = await canon_lower(async_opts2, ft1, func1, task, [wsi])
+    subi,state = unpack_lower_result(ret)
+    assert(state == CallState.STARTED)
+
+    [h1] = await canon_resource_new(rt, task, 42)
+    [h2] = await canon_resource_new(rt, task, 43)
+    mem2[0] = h1
+    mem2[4] = h2
+
+    [ret] = await canon_stream_write(task, wsi, 0, 2)
+    assert(ret == 2)
+    [] = await canon_waitable_drop(task, wsi)
+
+    event, p1, _ = await task.wait()
+    assert(event == EventCode.CALL_DONE)
+    assert(p1 == subi)
+
+    [] = await canon_waitable_drop(task, subi)
+    return []
+
+  await canon_lift(sync_opts2, inst2, ft2, core_func2, None, lambda:[], lambda _:())
+
+
+async def test_cancel_copy():
+  inst = ComponentInstance()
+  mem = bytearray(10)
+  lower_opts = mk_opts(memory=mem, sync=False)
+
+  host_ft1 = FuncType([StreamType(U8Type())],[])
+  host_sink = None
+  async def host_func1(task, on_start, on_return, on_block):
+    nonlocal host_sink
+    [stream] = on_start()
+    host_sink = HostSink(stream, 2, remain = 0)
+    on_return([])
+
+  host_ft2 = FuncType([], [StreamType(U8Type())])
+  host_source = None
+  async def host_func2(task, on_start, on_return, on_block):
+    nonlocal host_source
+    [] = on_start()
+    host_source = HostSource([], chunk=2, destroy_if_empty = False)
+    on_return([host_source])
+
+  async def core_func(task, args):
+    assert(not args)
+
+    [wsi] = await canon_stream_new(U8Type(), task)
+    [ret] = await canon_lower(lower_opts, host_ft1, host_func1, task, [wsi])
+    assert(ret == 0)
+    mem[0:4] = b'\x0a\x0b\x0c\x0d'
+    [ret] = await canon_stream_write(task, wsi, 0, 4)
+    assert(ret == definitions.BLOCKED)
+    host_sink.set_remain(2)
+    got = await host_sink.consume(2)
+    assert(got == [0xa, 0xb])
+    [ret] = await canon_stream_cancel_write(True, task, wsi)
+    assert(ret == 2)
+    [] = await canon_waitable_drop(task, wsi)
+
+    [wsi] = await canon_stream_new(U8Type(), task)
+    [ret] = await canon_lower(lower_opts, host_ft1, host_func1, task, [wsi])
+    assert(ret == 0)
+    mem[0:4] = b'\x01\x02\x03\x04'
+    [ret] = await canon_stream_write(task, wsi, 0, 4)
+    assert(ret == definitions.BLOCKED)
+    host_sink.set_remain(2)
+    got = await host_sink.consume(2)
+    assert(got == [1, 2])
+    [ret] = await canon_stream_cancel_write(False, task, wsi)
+    assert(ret == 2)
+    [] = await canon_waitable_drop(task, wsi)
+
+    retp = 0
+    [ret] = await canon_lower(lower_opts, host_ft2, host_func2, task, [retp])
+    assert(ret == 0)
+    rsi = mem[retp]
+    [ret] = await canon_stream_read(task, rsi, 0, 4)
+    assert(ret == definitions.BLOCKED)
+    [ret] = await canon_stream_cancel_read(True, task, rsi)
+    assert(ret == 0)
+    [] = await canon_waitable_drop(task, rsi)
+
+    retp = 0
+    [ret] = await canon_lower(lower_opts, host_ft2, host_func2, task, [retp])
+    assert(ret == 0)
+    rsi = mem[retp]
+    [ret] = await canon_stream_read(task, rsi, 0, 4)
+    assert(ret == definitions.BLOCKED)
+    host_source.eager_cancel.clear()
+    [ret] = await canon_stream_cancel_read(False, task, rsi)
+    assert(ret == definitions.BLOCKED)
+    host_source.write([7,8])
+    await asyncio.sleep(0)
+    host_source.eager_cancel.set()
+    event,p1,p2 = await task.wait()
+    assert(event == EventCode.STREAM_READ)
+    assert(p1 == rsi)
+    assert(p2 == 2)
+    assert(mem[0:2] == b'\x07\x08')
+    [] = await canon_waitable_drop(task, rsi)
+
+    return []
+
+  lift_opts = mk_opts()
+  await canon_lift(lift_opts, inst, FuncType([],[]), core_func, None, lambda:[], lambda _:())
+
+
+class HostFutureSink:
+  v: Optional[any] = None
+
+  def remain(self):
+    return 1 if self.v is None else 0
+
+  def lower(self, v):
+    assert(not self.v)
+    assert(len(v) == 1)
+    self.v = v[0]
+
+class HostFutureSource(ReadableStream):
+  v: Optional[asyncio.Future]
+  def __init__(self):
+    self.v = asyncio.Future()
+  def closed(self):
+    return self.v is None
+  def close(self):
+    assert(self.v is None)
+  async def read(self, dst, on_block):
+    assert(self.v is not None)
+    v = await on_block(self.v)
+    if v:
+      dst.lower([v])
+      self.v = None
+  async def cancel_read(self, dst, on_block):
+    if self.v and not self.v.done():
+      self.v.set_result(None)
+      self.v = asyncio.Future()
+  def maybe_writer_handle_index(self, inst):
+    return None
+
+async def test_futures():
+  inst = ComponentInstance()
+  mem = bytearray(10)
+  lower_opts = mk_opts(memory=mem, sync=False)
+
+  host_ft1 = FuncType([FutureType(U8Type())],[FutureType(U8Type())])
+  async def host_func(task, on_start, on_return, on_block):
+    [future] = on_start()
+    outgoing = HostFutureSource()
+    on_return([outgoing])
+    incoming = HostFutureSink()
+    await future.read(incoming, on_block)
+    assert(incoming.v == 42)
+    outgoing.v.set_result(43)
+
+  async def core_func(task, args):
+    assert(not args)
+    [wfi] = await canon_future_new(U8Type(), task)
+    retp = 0
+    [ret] = await canon_lower(lower_opts, host_ft1, host_func, task, [wfi, retp])
+    subi,state = unpack_lower_result(ret)
+    assert(state == CallState.RETURNED)
+    rfi = mem[retp]
+
+    readp = 0
+    [ret] = await canon_future_read(task, rfi, readp)
+    assert(ret == definitions.BLOCKED)
+
+    writep = 8
+    mem[writep] = 42
+    [ret] = await canon_future_write(task, wfi, writep)
+    assert(ret == 1)
+
+    event,p1,p2 = await task.wait()
+    assert(event == EventCode.CALL_DONE)
+    assert(p1 == subi)
+
+    event,p1,p2 = await task.wait()
+    assert(event == EventCode.FUTURE_READ)
+    assert(p1 == rfi)
+    assert(p2 == 1)
+    assert(mem[readp] == 43)
+
+    [] = await canon_waitable_drop(task, wfi)
+    [] = await canon_waitable_drop(task, rfi)
+    [] = await canon_waitable_drop(task, subi)
+
+    [wfi] = await canon_future_new(U8Type(), task)
+    retp = 0
+    [ret] = await canon_lower(lower_opts, host_ft1, host_func, task, [wfi, retp])
+    subi,state = unpack_lower_result(ret)
+    assert(state == CallState.RETURNED)
+    rfi = mem[retp]
+
+    readp = 0
+    [ret] = await canon_future_read(task, rfi, readp)
+    assert(ret == definitions.BLOCKED)
+
+    writep = 8
+    mem[writep] = 42
+    [ret] = await canon_future_write(task, wfi, writep)
+    assert(ret == 1)
+
+    event,p1,p2 = await task.wait()
+    assert(event == EventCode.CALL_DONE)
+    assert(p1 == subi)
+
+    await task.yield_()
+    [ret] = await canon_future_cancel_read(True, task, rfi)
+    assert(ret == 1)
+    assert(mem[readp] == 43)
+
+    [] = await canon_waitable_drop(task, wfi)
+    [] = await canon_waitable_drop(task, rfi)
+    [] = await canon_waitable_drop(task, subi)
+
+    return []
+
+  lift_opts = mk_opts()
+  await canon_lift(lift_opts, inst, FuncType([],[]), core_func, None, lambda:[], lambda _:())
+
+
+async def run_async_tests():
+  await test_roundtrips()
+  await test_handles()
+  await test_async_to_async()
+  await test_async_callback()
+  await test_async_to_sync()
+  await test_async_backpressure()
+  await test_sync_using_wait()
+  await test_eager_stream_completion()
+  await test_stream_forward()
+  await test_receive_own_stream()
+  await test_host_partial_reads_writes()
+  await test_async_stream_ops()
+  await test_wasm_to_wasm_stream()
+  await test_borrow_stream()
+  await test_cancel_copy()
+  await test_futures()
+
+asyncio.run(run_async_tests())
 
 print("All tests passed")

From e8198f737959c049c31ba02d9adc85fbf0919ff8 Mon Sep 17 00:00:00 2001
From: Luke Wagner <mail@lukewagner.name>
Date: Mon, 21 Oct 2024 13:02:14 -0500
Subject: [PATCH 02/22] Grammar fix

Co-authored-by: Joel Dice <joel.dice@fermyon.com>
---
 design/mvp/Explainer.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/design/mvp/Explainer.md b/design/mvp/Explainer.md
index 52009609..ed521ac2 100644
--- a/design/mvp/Explainer.md
+++ b/design/mvp/Explainer.md
@@ -1486,7 +1486,7 @@ The `future.{read,write}` built-ins have type `[i32 i32] -> [i32]` and
 take an index to the matching [readable or writable end](Async.md#streams-and-futures)
 of a future as the first parameter and a pointer linear memory as the second
 parameter. The return value is either `1` if the future value was eagerly
-read or written to the pointer or the sentinel "`BLOCKED`" value otherwise.
+read from or written to the pointer or the sentinel "`BLOCKED`" value otherwise.
 (See [`canon_future_read`] in the Canonical ABI explainer for details.)
 
 The `{stream,future}.cancel-{read,write}` built-ins have type `[i32] -> [i32]`

From e6469e85fc9134c311537829771075da36278fcc Mon Sep 17 00:00:00 2001
From: Luke Wagner <mail@lukewagner.name>
Date: Mon, 21 Oct 2024 13:05:00 -0500
Subject: [PATCH 03/22] Spelling fix

Co-authored-by: Joel Dice <joel.dice@fermyon.com>
---
 design/mvp/CanonicalABI.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/design/mvp/CanonicalABI.md b/design/mvp/CanonicalABI.md
index d6a4bd6f..d11344e0 100644
--- a/design/mvp/CanonicalABI.md
+++ b/design/mvp/CanonicalABI.md
@@ -581,7 +581,7 @@ retroactively removed (e.g., when a `stream.cancel-read` "steals" a pending
 Python code represents `events` as a list of closures, an optimizing
 implementation should be able to avoid dynamically allocating this list and
 instead represent `events` as a linked list embedded in the elements of the
-`waitables` table (noting that, by design, any given `watiables` element can be
+`waitables` table (noting that, by design, any given `waitables` element can be
 in the `events` list at most once).
 
 A task may also cooperatively yield (via `canon task.yield`), allowing the

From 15298d5926e66de628a38bdd3668428fc19003ce Mon Sep 17 00:00:00 2001
From: Luke Wagner <mail@lukewagner.name>
Date: Mon, 21 Oct 2024 15:20:37 -0500
Subject: [PATCH 04/22] Clarify wording in Async.md concerning the writable end

---
 design/mvp/Async.md | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/design/mvp/Async.md b/design/mvp/Async.md
index 4b7e1b73..c46da368 100644
--- a/design/mvp/Async.md
+++ b/design/mvp/Async.md
@@ -211,17 +211,18 @@ attempts to exit.
 ### Streams and Futures
 
 Streams and Futures have two "ends": a *readable end* and *writable end*. When
-*consuming* a `stream` or `future` value as a parameter (of an export call
-with a `stream` or `future` somewhere in the parameter types) or result (of an
+*consuming* a `stream` or `future` value as a parameter (of an export call with
+a `stream` or `future` somewhere in the parameter types) or result (of an
 import call with a `stream` or `future` somewhere in the result type), the
 receiver always gets *unique ownership* of the *readable end* of the `stream`
 or `future`. When *producing* a `stream` or `future` value as a parameter (of
 an import call) or result (of an export call), the producer can either
-*transfer ownership* of a readable end it has already received or it can
-create a fresh writable end (via `stream.new` or `future.new`) and lift this
-writable end (maintaining ownership of the writable end, but creating a fresh
-readable end for the receiver). To maintain the invariant that readable ends
-are unique, a writable end can be lifted at most once, trapping otherwise.
+*transfer ownership* of a readable end it has already received or it can create
+a fresh writable end (via `stream.new` or `future.new`) and then lift this
+writable end to create a fresh readable end in the consumer while maintaining
+ownership of the writable end in the producer. To maintain the invariant that
+readable ends are unique, a writable end can be lifted at most once, trapping
+otherwise.
 
 Based on this, `stream<T>` and `future<T>` values can be passed between
 functions as if they were synchronous `list<T>` and `T` values, resp. For

From fa3cd3a61613a5c0a7963537ae73deeb1d31195d Mon Sep 17 00:00:00 2001
From: Luke Wagner <mail@lukewagner.name>
Date: Mon, 21 Oct 2024 15:23:26 -0500
Subject: [PATCH 05/22] Mention the callback option alongside task.wait

---
 design/mvp/Explainer.md | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/design/mvp/Explainer.md b/design/mvp/Explainer.md
index ed521ac2..20a9fd78 100644
--- a/design/mvp/Explainer.md
+++ b/design/mvp/Explainer.md
@@ -1495,9 +1495,10 @@ of a stream or future that has an outstanding "`BLOCKED`" read or write. If
 cancellation finished eagerly, the return value is the number of elements read
 or written into the given buffer (`0` or `1` for a `future`). If cancellation
 blocks, the return value is the sentinel "`BLOCKED`" value and the caller must
-`task.wait` for a `{STREAM,FUTURE}_{READ,WRITE}` event to indicate the
-completion of the `read` or `write`. (See [`canon_stream_cancel_read`] in the
-Canonical ABI explainer for details.)
+`task.wait` (or, if using `callback`, return to the event loop) to receive a
+`{STREAM,FUTURE}_{READ,WRITE}` event to indicate the completion of the `read`
+or `write`. (See [`canon_stream_cancel_read`] in the Canonical ABI explainer
+for details.)
 
 The `waitable.drop` built-in has type `[i32] -> []` and removes the indicated
 [subtask](Async.md#subtask-and-supertask) or [stream or future](Async.md#streams-and-futures)

From a864991949b04628fc8f4822d00354faa91b30fc Mon Sep 17 00:00:00 2001
From: Luke Wagner <mail@lukewagner.name>
Date: Tue, 22 Oct 2024 14:37:29 -0500
Subject: [PATCH 06/22] Add <typeidx> to {stream,future}.{read,write}

---
 design/mvp/Binary.md                    |  8 +--
 design/mvp/CanonicalABI.md              | 21 ++++---
 design/mvp/Explainer.md                 |  8 +--
 design/mvp/canonical-abi/definitions.py | 21 ++++---
 design/mvp/canonical-abi/run_tests.py   | 84 ++++++++++++-------------
 5 files changed, 72 insertions(+), 70 deletions(-)

diff --git a/design/mvp/Binary.md b/design/mvp/Binary.md
index 565272bc..a1dde0f1 100644
--- a/design/mvp/Binary.md
+++ b/design/mvp/Binary.md
@@ -294,13 +294,13 @@ canon    ::= 0x00 0x00 f:<core:funcidx> opts:<opts> ft:<typeidx> => (canon lift
            | 0x0c                                                => (canon task.yield (core func)) ๐Ÿ”€
            | 0x0d                                                => (canon waitable.drop (core func)) ๐Ÿ”€
            | 0x0e t:<typeidx>                                    => (canon stream.new t (core func)) ๐Ÿ”€
-           | 0x0f                                                => (canon stream.read (core func)) ๐Ÿ”€
-           | 0x10                                                => (canon stream.write (core func)) ๐Ÿ”€
+           | 0x0f t:<typeidx>                                    => (canon stream.read t (core func)) ๐Ÿ”€
+           | 0x10 t:<typeidx>                                    => (canon stream.write t (core func)) ๐Ÿ”€
            | 0x11 async?:<async?>                                => (canon stream.cancel-read async? (core func)) ๐Ÿ”€
            | 0x12 async?:<async?>                                => (canon stream.cancel-write async? (core func)) ๐Ÿ”€
            | 0x13 t:<typeidx>                                    => (canon future.new t (core func)) ๐Ÿ”€
-           | 0x14                                                => (canon future.read (core func)) ๐Ÿ”€
-           | 0x15                                                => (canon future.write (core func)) ๐Ÿ”€
+           | 0x14 t:<typeidx>                                    => (canon future.read t (core func)) ๐Ÿ”€
+           | 0x15 t:<typeidx>                                    => (canon future.write t (core func)) ๐Ÿ”€
            | 0x16 async?:<async?>                                => (canon future.cancel-read async? (core func)) ๐Ÿ”€
            | 0x17 async?:<async?>                                => (canon future.cancel-write async? (core func)) ๐Ÿ”€
 async?   ::= 0x00                                                =>
diff --git a/design/mvp/CanonicalABI.md b/design/mvp/CanonicalABI.md
index d11344e0..d2a5acf0 100644
--- a/design/mvp/CanonicalABI.md
+++ b/design/mvp/CanonicalABI.md
@@ -2978,29 +2978,30 @@ likelihood of deadlock), there is no synchronous option for `read` or `write`.
 The actual copy happens via polymorphic dispatch to `copy`, which has been
 defined above by the 4 `{Readable,Writable}{Stream,Future}Handle` types:
 ```python
-async def canon_stream_read(task, i, ptr, n):
-  return await async_copy(ReadableStreamHandle, WritableBufferGuestImpl,
+async def canon_stream_read(t, task, i, ptr, n):
+  return await async_copy(ReadableStreamHandle, WritableBufferGuestImpl, t,
                           EventCode.STREAM_READ, task, i, ptr, n)
 
-async def canon_stream_write(task, i, ptr, n):
-  return await async_copy(WritableStreamHandle, ReadableBufferGuestImpl,
+async def canon_stream_write(t, task, i, ptr, n):
+  return await async_copy(WritableStreamHandle, ReadableBufferGuestImpl, t,
                           EventCode.STREAM_WRITE, task, i, ptr, n)
 
-async def canon_future_read(task, i, ptr):
-  return await async_copy(ReadableFutureHandle, WritableBufferGuestImpl,
+async def canon_future_read(t, task, i, ptr):
+  return await async_copy(ReadableFutureHandle, WritableBufferGuestImpl, t,
                           EventCode.FUTURE_READ, task, i, ptr, 1)
 
-async def canon_future_write(task, i, ptr):
-  return await async_copy(WritableFutureHandle, ReadableBufferGuestImpl,
+async def canon_future_write(t, task, i, ptr):
+  return await async_copy(WritableFutureHandle, ReadableBufferGuestImpl, t,
                           EventCode.FUTURE_WRITE, task, i, ptr, 1)
 
-async def async_copy(HandleT, BufferT, event_code, task, i, ptr, n):
+async def async_copy(HandleT, BufferT, t, event_code, task, i, ptr, n):
   trap_if(not task.inst.may_leave)
   h = task.inst.waitables.get(i)
   trap_if(not isinstance(h, HandleT))
+  trap_if(h.t != t)
   trap_if(not h.cx)
   trap_if(h.copying_buffer)
-  buffer = BufferT(h.cx, h.t, ptr, n)
+  buffer = BufferT(h.cx, t, ptr, n)
   if h.stream.closed():
     flat_results = [CLOSED]
   else:
diff --git a/design/mvp/Explainer.md b/design/mvp/Explainer.md
index 20a9fd78..c59dd9aa 100644
--- a/design/mvp/Explainer.md
+++ b/design/mvp/Explainer.md
@@ -1373,13 +1373,13 @@ canon ::= ...
         | (canon task.poll (memory <core:memidx>) (core func <id>?)) ๐Ÿ”€
         | (canon task.yield (core func <id>?)) ๐Ÿ”€
         | (canon stream.new <typeidx> (core func <id>?)) ๐Ÿ”€
-        | (canon stream.read (core func <id>?)) ๐Ÿ”€
-        | (canon stream.write (core func <id>?)) ๐Ÿ”€
+        | (canon stream.read <typeidx> (core func <id>?)) ๐Ÿ”€
+        | (canon stream.write <typeidx> (core func <id>?)) ๐Ÿ”€
         | (canon stream.cancel-read async? (core func <id>?)) ๐Ÿ”€
         | (canon stream.cancel-write async? (core func <id>?)) ๐Ÿ”€
         | (canon future.new <typeidx> (core func <id>?)) ๐Ÿ”€
-        | (canon future.read (core func <id>?)) ๐Ÿ”€
-        | (canon future.write (core func <id>?)) ๐Ÿ”€
+        | (canon future.read <typeidx> (core func <id>?)) ๐Ÿ”€
+        | (canon future.write <typeidx> (core func <id>?)) ๐Ÿ”€
         | (canon future.cancel-read async? (core func <id>?)) ๐Ÿ”€
         | (canon future.cancel-write async? (core func <id>?)) ๐Ÿ”€
         | (canon waitable.drop (core func <id>?)) ๐Ÿ”€
diff --git a/design/mvp/canonical-abi/definitions.py b/design/mvp/canonical-abi/definitions.py
index 2a2450eb..2928b15c 100644
--- a/design/mvp/canonical-abi/definitions.py
+++ b/design/mvp/canonical-abi/definitions.py
@@ -1876,29 +1876,30 @@ async def canon_future_new(t, task):
 
 ### ๐Ÿ”€ `canon {stream,future}.{read,write}`
 
-async def canon_stream_read(task, i, ptr, n):
-  return await async_copy(ReadableStreamHandle, WritableBufferGuestImpl,
+async def canon_stream_read(t, task, i, ptr, n):
+  return await async_copy(ReadableStreamHandle, WritableBufferGuestImpl, t,
                           EventCode.STREAM_READ, task, i, ptr, n)
 
-async def canon_stream_write(task, i, ptr, n):
-  return await async_copy(WritableStreamHandle, ReadableBufferGuestImpl,
+async def canon_stream_write(t, task, i, ptr, n):
+  return await async_copy(WritableStreamHandle, ReadableBufferGuestImpl, t,
                           EventCode.STREAM_WRITE, task, i, ptr, n)
 
-async def canon_future_read(task, i, ptr):
-  return await async_copy(ReadableFutureHandle, WritableBufferGuestImpl,
+async def canon_future_read(t, task, i, ptr):
+  return await async_copy(ReadableFutureHandle, WritableBufferGuestImpl, t,
                           EventCode.FUTURE_READ, task, i, ptr, 1)
 
-async def canon_future_write(task, i, ptr):
-  return await async_copy(WritableFutureHandle, ReadableBufferGuestImpl,
+async def canon_future_write(t, task, i, ptr):
+  return await async_copy(WritableFutureHandle, ReadableBufferGuestImpl, t,
                           EventCode.FUTURE_WRITE, task, i, ptr, 1)
 
-async def async_copy(HandleT, BufferT, event_code, task, i, ptr, n):
+async def async_copy(HandleT, BufferT, t, event_code, task, i, ptr, n):
   trap_if(not task.inst.may_leave)
   h = task.inst.waitables.get(i)
   trap_if(not isinstance(h, HandleT))
+  trap_if(h.t != t)
   trap_if(not h.cx)
   trap_if(h.copying_buffer)
-  buffer = BufferT(h.cx, h.t, ptr, n)
+  buffer = BufferT(h.cx, t, ptr, n)
   if h.stream.closed():
     flat_results = [CLOSED]
   else:
diff --git a/design/mvp/canonical-abi/run_tests.py b/design/mvp/canonical-abi/run_tests.py
index 0602d788..2455b7b9 100644
--- a/design/mvp/canonical-abi/run_tests.py
+++ b/design/mvp/canonical-abi/run_tests.py
@@ -1049,7 +1049,7 @@ async def core_func(task, args):
     assert(rsi1 == 1)
     [wsi1] = await canon_stream_new(U8Type(), task)
     [] = await canon_task_return(task, CoreFuncType(['i32'],[]), [wsi1])
-    [ret] = await canon_stream_read(task, rsi1, 0, 4)
+    [ret] = await canon_stream_read(U8Type(), task, rsi1, 0, 4)
     assert(ret == 4)
     assert(mem[0:4] == b'\x01\x02\x03\x04')
     [wsi2] = await canon_stream_new(U8Type(), task)
@@ -1057,22 +1057,22 @@ async def core_func(task, args):
     [ret] = await canon_lower(opts, ft, host_import, task, [wsi2, retp])
     assert(ret == 0)
     rsi2 = mem[retp]
-    [ret] = await canon_stream_write(task, wsi2, 0, 4)
+    [ret] = await canon_stream_write(U8Type(), task, wsi2, 0, 4)
     assert(ret == 4)
-    [ret] = await canon_stream_read(task, rsi2, 0, 4)
+    [ret] = await canon_stream_read(U8Type(), task, rsi2, 0, 4)
     assert(ret == 4)
-    [ret] = await canon_stream_write(task, wsi1, 0, 4)
+    [ret] = await canon_stream_write(U8Type(), task, wsi1, 0, 4)
     assert(ret == 4)
-    [ret] = await canon_stream_read(task, rsi1, 0, 4)
+    [ret] = await canon_stream_read(U8Type(), task, rsi1, 0, 4)
     assert(ret == 4)
-    [ret] = await canon_stream_read(task, rsi1, 0, 4)
+    [ret] = await canon_stream_read(U8Type(), task, rsi1, 0, 4)
     assert(ret == definitions.CLOSED)
     assert(mem[0:4] == b'\x05\x06\x07\x08')
-    [ret] = await canon_stream_write(task, wsi2, 0, 4)
+    [ret] = await canon_stream_write(U8Type(), task, wsi2, 0, 4)
     assert(ret == 4)
-    [ret] = await canon_stream_read(task, rsi2, 0, 4)
+    [ret] = await canon_stream_read(U8Type(), task, rsi2, 0, 4)
     assert(ret == 4)
-    [ret] = await canon_stream_write(task, wsi1, 0, 4)
+    [ret] = await canon_stream_write(U8Type(), task, wsi1, 0, 4)
     assert(ret == 4)
     [] = await canon_waitable_drop(task, rsi1)
     [] = await canon_waitable_drop(task, rsi2)
@@ -1122,7 +1122,7 @@ async def core_func(task, args):
     assert(rsi1 == 1)
     [wsi1] = await canon_stream_new(U8Type(), task)
     [] = await canon_task_return(task, CoreFuncType(['i32'],[]), [wsi1])
-    [ret] = await canon_stream_read(task, rsi1, 0, 4)
+    [ret] = await canon_stream_read(U8Type(), task, rsi1, 0, 4)
     assert(ret == definitions.BLOCKED)
     src_stream.write([1,2,3,4])
     event, p1, p2 = await task.wait()
@@ -1137,16 +1137,16 @@ async def core_func(task, args):
     assert(state == CallState.RETURNED)
     rsi2 = mem[16]
     assert(rsi2 == 4)
-    [ret] = await canon_stream_write(task, wsi2, 0, 4)
+    [ret] = await canon_stream_write(U8Type(), task, wsi2, 0, 4)
     assert(ret == definitions.BLOCKED)
     host_import_incoming.set_remain(100)
     event, p1, p2 = await task.wait()
     assert(event == EventCode.STREAM_WRITE)
     assert(p1 == wsi2)
     assert(p2 == 4)
-    [ret] = await canon_stream_read(task, rsi2, 0, 4)
+    [ret] = await canon_stream_read(U8Type(), task, rsi2, 0, 4)
     assert(ret == 4)
-    [ret] = await canon_stream_write(task, wsi1, 0, 4)
+    [ret] = await canon_stream_write(U8Type(), task, wsi1, 0, 4)
     assert(ret == definitions.BLOCKED)
     dst_stream.set_remain(100)
     event, p1, p2 = await task.wait()
@@ -1155,16 +1155,16 @@ async def core_func(task, args):
     assert(p2 == 4)
     src_stream.write([5,6,7,8])
     src_stream.destroy_once_empty()
-    [ret] = await canon_stream_read(task, rsi1, 0, 4)
+    [ret] = await canon_stream_read(U8Type(), task, rsi1, 0, 4)
     assert(ret == 4)
-    [ret] = await canon_stream_read(task, rsi1, 0, 4)
+    [ret] = await canon_stream_read(U8Type(), task, rsi1, 0, 4)
     assert(ret == definitions.CLOSED)
     [] = await canon_waitable_drop(task, rsi1)
     assert(mem[0:4] == b'\x05\x06\x07\x08')
-    [ret] = await canon_stream_write(task, wsi2, 0, 4)
+    [ret] = await canon_stream_write(U8Type(), task, wsi2, 0, 4)
     assert(ret == 4)
     [] = await canon_waitable_drop(task, wsi2)
-    [ret] = await canon_stream_read(task, rsi2, 0, 4)
+    [ret] = await canon_stream_read(U8Type(), task, rsi2, 0, 4)
     assert(ret == definitions.BLOCKED)
     event, p1, p2 = await task.wait()
     assert(event == EventCode.CALL_DONE)
@@ -1174,11 +1174,11 @@ async def core_func(task, args):
     assert(event == EventCode.STREAM_READ)
     assert(p1 == rsi2)
     assert(p2 == 4)
-    [ret] = await canon_stream_read(task, rsi2, 0, 4)
+    [ret] = await canon_stream_read(U8Type(), task, rsi2, 0, 4)
     assert(ret == definitions.CLOSED)
     [] = await canon_waitable_drop(task, rsi2)
     [] = await canon_waitable_drop(task, subi)
-    [ret] = await canon_stream_write(task, wsi1, 0, 4)
+    [ret] = await canon_stream_write(U8Type(), task, wsi1, 0, 4)
     assert(ret == 4)
     [] = await canon_waitable_drop(task, wsi1)
     return []
@@ -1266,13 +1266,13 @@ async def core_func(task, args):
     assert(ret == 0)
     rsi = mem[retp]
     assert(rsi == 1)
-    [ret] = await canon_stream_read(task, rsi, 0, 4)
+    [ret] = await canon_stream_read(U8Type(), task, rsi, 0, 4)
     assert(ret == 2)
     assert(mem[0:2] == b'\x01\x02')
-    [ret] = await canon_stream_read(task, rsi, 0, 4)
+    [ret] = await canon_stream_read(U8Type(), task, rsi, 0, 4)
     assert(ret == 2)
     assert(mem[0:2] == b'\x03\x04')
-    [ret] = await canon_stream_read(task, rsi, 0, 4)
+    [ret] = await canon_stream_read(U8Type(), task, rsi, 0, 4)
     assert(ret == definitions.BLOCKED)
     src.write([5,6])
     event, p1, p2 = await task.wait()
@@ -1286,9 +1286,9 @@ async def core_func(task, args):
     [ret] = await canon_lower(opts, sink_ft, host_sink, task, [wsi])
     assert(ret == 0)
     mem[0:6] = b'\x01\x02\x03\x04\x05\x06'
-    [ret] = await canon_stream_write(task, wsi, 0, 6)
+    [ret] = await canon_stream_write(U8Type(), task, wsi, 0, 6)
     assert(ret == 2)
-    [ret] = await canon_stream_write(task, wsi, 2, 6)
+    [ret] = await canon_stream_write(U8Type(), task, wsi, 2, 6)
     assert(ret == definitions.BLOCKED)
     dst.set_remain(4)
     event, p1, p2 = await task.wait()
@@ -1322,15 +1322,15 @@ async def core_func1(task, args):
     await task.wait_on(fut1)
 
     mem1[0:4] = b'\x01\x02\x03\x04'
-    [ret] = await canon_stream_write(task, wsi, 0, 2)
+    [ret] = await canon_stream_write(U8Type(), task, wsi, 0, 2)
     assert(ret == 2)
-    [ret] = await canon_stream_write(task, wsi, 2, 2)
+    [ret] = await canon_stream_write(U8Type(), task, wsi, 2, 2)
     assert(ret == 2)
 
     await task.wait_on(fut2)
 
     mem1[0:8] = b'\x05\x06\x07\x08\x09\x0a\x0b\x0c'
-    [ret] = await canon_stream_write(task, wsi, 0, 8)
+    [ret] = await canon_stream_write(U8Type(), task, wsi, 0, 8)
     assert(ret == definitions.BLOCKED)
 
     fut3.set_result(None)
@@ -1362,7 +1362,7 @@ async def core_func2(task, args):
     rsi = mem2[0]
     assert(rsi == 1)
 
-    [ret] = await canon_stream_read(task, rsi, 0, 8)
+    [ret] = await canon_stream_read(U8Type(), task, rsi, 0, 8)
     assert(ret == definitions.BLOCKED)
 
     fut1.set_result(None)
@@ -1377,16 +1377,16 @@ async def core_func2(task, args):
     await task.wait_on(fut3)
 
     mem2[0:8] = bytes(8)
-    [ret] = await canon_stream_read(task, rsi, 0, 2)
+    [ret] = await canon_stream_read(U8Type(), task, rsi, 0, 2)
     assert(ret == 2)
     assert(mem2[0:6] == b'\x05\x06\x00\x00\x00\x00')
-    [ret] = await canon_stream_read(task, rsi, 2, 2)
+    [ret] = await canon_stream_read(U8Type(), task, rsi, 2, 2)
     assert(ret == 2)
     assert(mem2[0:6] == b'\x05\x06\x07\x08\x00\x00')
 
     await task.wait_on(fut4)
 
-    [ret] = await canon_stream_read(task, rsi, 0, 2)
+    [ret] = await canon_stream_read(U8Type(), task, rsi, 0, 2)
     assert(ret == definitions.CLOSED)
     [] = await canon_waitable_drop(task, rsi)
 
@@ -1410,14 +1410,14 @@ async def test_borrow_stream():
   async def core_func1(task, args):
     [rsi] = args
 
-    [ret] = await canon_stream_read(task, rsi, 4, 2)
+    [ret] = await canon_stream_read(BorrowType(rt), task, rsi, 4, 2)
     assert(ret == definitions.BLOCKED)
 
     event, p1, p2 = await task.wait()
     assert(event == EventCode.STREAM_READ)
     assert(p1 == rsi)
     assert(p2 == 2)
-    [ret] = await canon_stream_read(task, rsi, 0, 2)
+    [ret] = await canon_stream_read(BorrowType(rt), task, rsi, 0, 2)
     assert(ret == definitions.CLOSED)
 
     [] = await canon_waitable_drop(task, rsi)
@@ -1451,7 +1451,7 @@ async def core_func2(task, args):
     mem2[0] = h1
     mem2[4] = h2
 
-    [ret] = await canon_stream_write(task, wsi, 0, 2)
+    [ret] = await canon_stream_write(BorrowType(rt), task, wsi, 0, 2)
     assert(ret == 2)
     [] = await canon_waitable_drop(task, wsi)
 
@@ -1493,7 +1493,7 @@ async def core_func(task, args):
     [ret] = await canon_lower(lower_opts, host_ft1, host_func1, task, [wsi])
     assert(ret == 0)
     mem[0:4] = b'\x0a\x0b\x0c\x0d'
-    [ret] = await canon_stream_write(task, wsi, 0, 4)
+    [ret] = await canon_stream_write(U8Type(), task, wsi, 0, 4)
     assert(ret == definitions.BLOCKED)
     host_sink.set_remain(2)
     got = await host_sink.consume(2)
@@ -1506,7 +1506,7 @@ async def core_func(task, args):
     [ret] = await canon_lower(lower_opts, host_ft1, host_func1, task, [wsi])
     assert(ret == 0)
     mem[0:4] = b'\x01\x02\x03\x04'
-    [ret] = await canon_stream_write(task, wsi, 0, 4)
+    [ret] = await canon_stream_write(U8Type(), task, wsi, 0, 4)
     assert(ret == definitions.BLOCKED)
     host_sink.set_remain(2)
     got = await host_sink.consume(2)
@@ -1519,7 +1519,7 @@ async def core_func(task, args):
     [ret] = await canon_lower(lower_opts, host_ft2, host_func2, task, [retp])
     assert(ret == 0)
     rsi = mem[retp]
-    [ret] = await canon_stream_read(task, rsi, 0, 4)
+    [ret] = await canon_stream_read(U8Type(), task, rsi, 0, 4)
     assert(ret == definitions.BLOCKED)
     [ret] = await canon_stream_cancel_read(True, task, rsi)
     assert(ret == 0)
@@ -1529,7 +1529,7 @@ async def core_func(task, args):
     [ret] = await canon_lower(lower_opts, host_ft2, host_func2, task, [retp])
     assert(ret == 0)
     rsi = mem[retp]
-    [ret] = await canon_stream_read(task, rsi, 0, 4)
+    [ret] = await canon_stream_read(U8Type(), task, rsi, 0, 4)
     assert(ret == definitions.BLOCKED)
     host_source.eager_cancel.clear()
     [ret] = await canon_stream_cancel_read(False, task, rsi)
@@ -1607,12 +1607,12 @@ async def core_func(task, args):
     rfi = mem[retp]
 
     readp = 0
-    [ret] = await canon_future_read(task, rfi, readp)
+    [ret] = await canon_future_read(U8Type(), task, rfi, readp)
     assert(ret == definitions.BLOCKED)
 
     writep = 8
     mem[writep] = 42
-    [ret] = await canon_future_write(task, wfi, writep)
+    [ret] = await canon_future_write(U8Type(), task, wfi, writep)
     assert(ret == 1)
 
     event,p1,p2 = await task.wait()
@@ -1637,12 +1637,12 @@ async def core_func(task, args):
     rfi = mem[retp]
 
     readp = 0
-    [ret] = await canon_future_read(task, rfi, readp)
+    [ret] = await canon_future_read(U8Type(), task, rfi, readp)
     assert(ret == definitions.BLOCKED)
 
     writep = 8
     mem[writep] = 42
-    [ret] = await canon_future_write(task, wfi, writep)
+    [ret] = await canon_future_write(U8Type(), task, wfi, writep)
     assert(ret == 1)
 
     event,p1,p2 = await task.wait()

From 0b5247f408c0359a68b2b47391526deb1b1fcba6 Mon Sep 17 00:00:00 2001
From: Luke Wagner <mail@lukewagner.name>
Date: Wed, 23 Oct 2024 19:15:58 -0500
Subject: [PATCH 07/22] Handle the concurrently-closed case in
 {stream,future}.cancel-{read,write}

---
 design/mvp/CanonicalABI.md              | 22 +++++++++++++---------
 design/mvp/canonical-abi/definitions.py | 22 +++++++++++++---------
 2 files changed, 26 insertions(+), 18 deletions(-)

diff --git a/design/mvp/CanonicalABI.md b/design/mvp/CanonicalABI.md
index d2a5acf0..ea099973 100644
--- a/design/mvp/CanonicalABI.md
+++ b/design/mvp/CanonicalABI.md
@@ -3100,17 +3100,21 @@ async def cancel_async_copy(HandleT, sync, task, i):
   h = task.inst.waitables.get(i)
   trap_if(not isinstance(h, HandleT))
   trap_if(not h.copying_buffer)
-  if sync:
-    await task.call_sync(h.cancel_copy, h.copying_buffer)
-    flat_results = [h.copying_buffer.progress]
+  if h.stream.closed():
+    flat_results = [pack_async_copy_result(h.copying_buffer, h)]
     h.copying_buffer = None
   else:
-    match await call_and_handle_blocking(h.cancel_copy, h.copying_buffer):
-      case Blocked():
-        flat_results = [BLOCKED]
-      case Returned():
-        flat_results = [h.copying_buffer.progress]
-        h.copying_buffer = None
+    if sync:
+      await task.call_sync(h.cancel_copy, h.copying_buffer)
+      flat_results = [pack_async_copy_result(h.copying_buffer, h)]
+      h.copying_buffer = None
+    else:
+      match await call_and_handle_blocking(h.cancel_copy, h.copying_buffer):
+        case Blocked():
+          flat_results = [BLOCKED]
+        case Returned():
+          flat_results = [pack_async_copy_result(h.copying_buffer, h)]
+          h.copying_buffer = None
   return flat_results
 ```
 As mentioned above for `async_copy`, if cancellation doesn't block, the
diff --git a/design/mvp/canonical-abi/definitions.py b/design/mvp/canonical-abi/definitions.py
index 2928b15c..4536756d 100644
--- a/design/mvp/canonical-abi/definitions.py
+++ b/design/mvp/canonical-abi/definitions.py
@@ -1950,17 +1950,21 @@ async def cancel_async_copy(HandleT, sync, task, i):
   h = task.inst.waitables.get(i)
   trap_if(not isinstance(h, HandleT))
   trap_if(not h.copying_buffer)
-  if sync:
-    await task.call_sync(h.cancel_copy, h.copying_buffer)
-    flat_results = [h.copying_buffer.progress]
+  if h.stream.closed():
+    flat_results = [pack_async_copy_result(h.copying_buffer, h)]
     h.copying_buffer = None
   else:
-    match await call_and_handle_blocking(h.cancel_copy, h.copying_buffer):
-      case Blocked():
-        flat_results = [BLOCKED]
-      case Returned():
-        flat_results = [h.copying_buffer.progress]
-        h.copying_buffer = None
+    if sync:
+      await task.call_sync(h.cancel_copy, h.copying_buffer)
+      flat_results = [pack_async_copy_result(h.copying_buffer, h)]
+      h.copying_buffer = None
+    else:
+      match await call_and_handle_blocking(h.cancel_copy, h.copying_buffer):
+        case Blocked():
+          flat_results = [BLOCKED]
+        case Returned():
+          flat_results = [pack_async_copy_result(h.copying_buffer, h)]
+          h.copying_buffer = None
   return flat_results
 
 ### ๐Ÿ”€ `canon waitable.drop`

From b20409a64d53cd9ff09bc7bcf65ca8d869a288bf Mon Sep 17 00:00:00 2001
From: Luke Wagner <mail@lukewagner.name>
Date: Thu, 24 Oct 2024 14:39:08 -0500
Subject: [PATCH 08/22] Add note on spec-internal state vs. implementation

---
 design/mvp/CanonicalABI.md | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/design/mvp/CanonicalABI.md b/design/mvp/CanonicalABI.md
index ea099973..44f355dd 100644
--- a/design/mvp/CanonicalABI.md
+++ b/design/mvp/CanonicalABI.md
@@ -11,6 +11,7 @@ being specified here.
   * [Call Context](#call-context)
   * [Canonical ABI Options](#canonical-abi-options)
   * [Runtime State](#runtime-state)
+    * [Component Instance State](#component-instance-state)
     * [Resource State](#resource-state)
     * [Task State](#task-state)
     * [Buffer, Stream and Future State](#buffer-stream-and-future-state)
@@ -136,6 +137,17 @@ reason that `async` is a keyword and most branches below want to start with the
 
 ### Runtime State
 
+The following Python classes define spec-internal state and utility methods
+that are used to define the externally-visible behavior of Canonical ABI's
+lifting, lowering and built-in definitions below. These fields are chosen for
+simplicity over performance and thus an optimizing implementation is expected
+to use a more optimized representations as long as it preserves the same
+externally-visible behavior. Some specific examples of expected optimizations
+are noted below.
+
+
+#### Component Instance State
+
 The `inst` field of `CallContext` points to the component instance which the
 `canon`-generated function is closed over. Component instances contain all the
 core wasm instance as well as some extra state that is used exclusively by the

From a15ec3f33574c5d3353f52d80fc52d34e4377e87 Mon Sep 17 00:00:00 2001
From: Luke Wagner <mail@lukewagner.name>
Date: Thu, 24 Oct 2024 14:56:59 -0500
Subject: [PATCH 09/22] Update channel/pipe wording

Co-authored-by: Dan Gohman <dev@sunfishcode.online>
---
 design/mvp/Async.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/design/mvp/Async.md b/design/mvp/Async.md
index c46da368..70d4f6bd 100644
--- a/design/mvp/Async.md
+++ b/design/mvp/Async.md
@@ -114,7 +114,7 @@ concurrency. Streams and futures are thus not defined to be free-standing
 resources with their own internal memory buffers (like a traditional channel or
 pipe) but, rather, more-primitive control-flow mechanisms that synchronize the
 incremental passing of parameters and results during cross-component calls.
-Higher-level resources like channels and pipes can then be defined in terms
+Higher-level resources like channels and pipes could then be defined in terms
 of these lower-level `stream` and `future` primitives, e.g.:
 ```wit
 resource pipe {

From 4e456d71dc73298b570dd1d5e3279553fcb1b2cc Mon Sep 17 00:00:00 2001
From: Luke Wagner <mail@lukewagner.name>
Date: Thu, 24 Oct 2024 15:01:38 -0500
Subject: [PATCH 10/22] Improve wording

Co-authored-by: Dan Gohman <dev@sunfishcode.online>
---
 design/mvp/Async.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/design/mvp/Async.md b/design/mvp/Async.md
index 70d4f6bd..6d2fcc98 100644
--- a/design/mvp/Async.md
+++ b/design/mvp/Async.md
@@ -249,7 +249,7 @@ previously-mentioned invariant, the readable and writable ends of a stream are
 unique *and never in the same component*.
 
 Given the readable or writable end of a stream, core wasm code can call the
-imported `stream.read` or `stream.write` canonical built-ins, passing the
+imported `stream.read` or `stream.write` canonical built-ins, resp., passing the
 pointer and length of a linear-memory buffer to write-into or read-from, resp.
 These built-ins can either return immediately if >0 elements were able to be
 written or read immediately (without blocking) or return a sentinel "blocked"

From 08b138732a637a026ae3d3d8681bb88bb34c020a Mon Sep 17 00:00:00 2001
From: Luke Wagner <mail@lukewagner.name>
Date: Thu, 24 Oct 2024 21:15:00 -0500
Subject: [PATCH 11/22] Put the canonopts on {stream,future}.{read,write}
 instead of copying from canon lift/lower

---
 design/mvp/Binary.md                    |   8 +-
 design/mvp/CanonicalABI.md              | 245 +++++++++++++-----------
 design/mvp/Explainer.md                 |   8 +-
 design/mvp/canonical-abi/definitions.py | 153 ++++++++-------
 design/mvp/canonical-abi/run_tests.py   | 101 +++++-----
 5 files changed, 280 insertions(+), 235 deletions(-)

diff --git a/design/mvp/Binary.md b/design/mvp/Binary.md
index a1dde0f1..4eb352dd 100644
--- a/design/mvp/Binary.md
+++ b/design/mvp/Binary.md
@@ -294,13 +294,13 @@ canon    ::= 0x00 0x00 f:<core:funcidx> opts:<opts> ft:<typeidx> => (canon lift
            | 0x0c                                                => (canon task.yield (core func)) ๐Ÿ”€
            | 0x0d                                                => (canon waitable.drop (core func)) ๐Ÿ”€
            | 0x0e t:<typeidx>                                    => (canon stream.new t (core func)) ๐Ÿ”€
-           | 0x0f t:<typeidx>                                    => (canon stream.read t (core func)) ๐Ÿ”€
-           | 0x10 t:<typeidx>                                    => (canon stream.write t (core func)) ๐Ÿ”€
+           | 0x0f t:<typeidx> opts:<opts>                        => (canon stream.read t opts (core func)) ๐Ÿ”€
+           | 0x10 t:<typeidx> opts:<opts>                        => (canon stream.write t opts (core func)) ๐Ÿ”€
            | 0x11 async?:<async?>                                => (canon stream.cancel-read async? (core func)) ๐Ÿ”€
            | 0x12 async?:<async?>                                => (canon stream.cancel-write async? (core func)) ๐Ÿ”€
            | 0x13 t:<typeidx>                                    => (canon future.new t (core func)) ๐Ÿ”€
-           | 0x14 t:<typeidx>                                    => (canon future.read t (core func)) ๐Ÿ”€
-           | 0x15 t:<typeidx>                                    => (canon future.write t (core func)) ๐Ÿ”€
+           | 0x14 t:<typeidx> opts:<opts>                        => (canon future.read t opts (core func)) ๐Ÿ”€
+           | 0x15 t:<typeidx> opts:<opts>                        => (canon future.write t opts (core func)) ๐Ÿ”€
            | 0x16 async?:<async?>                                => (canon future.cancel-read async? (core func)) ๐Ÿ”€
            | 0x17 async?:<async?>                                => (canon future.cancel-write async? (core func)) ๐Ÿ”€
 async?   ::= 0x00                                                =>
diff --git a/design/mvp/CanonicalABI.md b/design/mvp/CanonicalABI.md
index 44f355dd..38450c48 100644
--- a/design/mvp/CanonicalABI.md
+++ b/design/mvp/CanonicalABI.md
@@ -8,7 +8,7 @@ walkthrough of the static structure of a component and the
 being specified here.
 
 * [Supporting definitions](#supporting-definitions)
-  * [Call Context](#call-context)
+  * [Lifting and Lowering Context](#lifting-and-lowering-context)
   * [Canonical ABI Options](#canonical-abi-options)
   * [Runtime State](#runtime-state)
     * [Component Instance State](#component-instance-state)
@@ -80,44 +80,37 @@ intentionally propagate OOM into the appropriate explicit return value of the
 function's declared return type.
 
 
-### Call Context
+### Lifting and Lowering Context
 
 Most Canonical ABI definitions depend on some ambient information which is
 established by the `canon lift`- or `canon lower`-defined function that is
 being called:
 * the ABI options supplied via [`canonopt`]
 * the containing component instance
-* the [current task]
+* the `Task` state created by `canon lift` or the `Subtask` state created by
+  `canon lower`
 
-These pieces of ambient information are stored in the first three fields of
-the `CallContext` that is threaded through all the Python functions below as
-the `cx` parameter/field.
+These three pieces of ambient information are stored in the `LiftLowerContext`
+class that is threaded through all the Python functions below as the `cx`
+parameter/field.
 ```python
-class CallContext:
+class LiftLowerContext:
   opts: CanonicalOptions
   inst: ComponentInstance
-  task: Task
-  todo: int
+  call: Task|Subtask
 
-  def __init__(self, opts, inst, task):
+  def __init__(self, opts, inst, call):
     self.opts = opts
     self.inst = inst
-    self.task = task
-    self.todo = 0
-
-  def end_call(self):
-    trap_if(self.todo)
+    self.call = call
 ```
-Additionally, import and export calls have a `todo` count that is incremented
-and decremented by various Canonical ABI rules below to track outstanding
-obligations to do something (e.g., drop a `borrow`ed handle) before the end of
-the call. The `Task` and `Subtask` classes derive `CallContext` and call
-`self.end_call()` when they complete.
+The `CanonicalOptions`, `ComponentInstance`, `Task` and `Subtask` classes
+are defined next.
 
 
 ### Canonical ABI Options
 
-The `opts` field of `CallContext` contains all the possible [`canonopt`]
+The `CanonicalOptions` class contains all the possible [`canonopt`]
 immediates that can be passed to the `canon` definition being implemented.
 ```python
 @dataclass
@@ -148,10 +141,8 @@ are noted below.
 
 #### Component Instance State
 
-The `inst` field of `CallContext` points to the component instance which the
-`canon`-generated function is closed over. Component instances contain all the
-core wasm instance as well as some extra state that is used exclusively by the
-Canonical ABI and introduced below as the fields are used.
+The `ComponentInstance` class contains all the relevant per-component-instance
+state that `canon`-generated functions use to maintain component invariants.
 ```python
 class ComponentInstance:
   resources: ResourceTables
@@ -286,13 +277,13 @@ The `ResourceHandle` class defines the elements of the per-resource-type
 class ResourceHandle:
   rep: int
   own: bool
-  scope: Optional[Task]
+  call: Optional[Task]
   lend_count: int
 
-  def __init__(self, rep, own, scope = None):
+  def __init__(self, rep, own, call = None):
     self.rep = rep
     self.own = own
-    self.scope = scope
+    self.call = call
     self.lend_count = 0
 ```
 The `rep` field of `ResourceHandle` stores the resource representation
@@ -301,11 +292,11 @@ The `rep` field of `ResourceHandle` stores the resource representation
 The `own` field indicates whether this element was created from an `own` type
 (or, if false, a `borrow` type).
 
-The `scope` field stores the `Task` that created the borrowed handle. When a
-component only uses sync-lifted exports, due to lack of reentrance, there is
-at most one `Task` alive in a component instance at any time and thus an
-optimizing implementation doesn't need to store the `Task` per
-`ResourceHandle`.
+The `call` field stores the `Task` that lowered the borrowed handle as a
+parameter. When a component only uses sync-lifted exports, due to lack of
+reentrance, there is at most one `Task` alive in a component instance at any
+time and thus an optimizing implementation doesn't need to store the `Task`
+per `ResourceHandle`.
 
 The `lend_count` field maintains a conservative approximation of the number of
 live handles that were lent from this `own` handle (by calls to `borrow`-taking
@@ -321,13 +312,7 @@ and guards thereof.
 
 #### Task State
 
-Additional runtime state is required to implement the canonical built-ins and
-check that callers and callees uphold their respective parts of the call
-contract. This additional call state derives from `CallContext`, adding extra
-mutable fields. There are two subclasses of `CallContext`: `Task`, which is
-created by `canon_lift` and `Subtask`, which is created by `canon_lower`.
-
-The `Task` class and its subclasses depend on the following type definitions:
+The `Task` class depends on the following type definitions:
 ```python
 class CallState(IntEnum):
   STARTING = 0
@@ -436,26 +421,36 @@ when there is a need to make an `async` call.
 
 A `Task` object is created for each call to `canon_lift` and is implicitly
 threaded through all core function calls. This implicit `Task` parameter
-represents the "[current task]". A `Task` is-a `CallContext`, with its `ft`
-and `opts` derived from the `canon lift` definition that created this `Task`.
+represents the "[current task]".
 ```python
-class Task(CallContext):
+class Task:
+  opts: CanonicalOptions
+  inst: ComponentInstance
   ft: FuncType
   caller: Optional[Task]
   on_return: Optional[Callable]
   on_block: OnBlockCallback
   events: list[EventCallback]
   has_events: asyncio.Event
+  todo: int
 
   def __init__(self, opts, inst, ft, caller, on_return, on_block):
-    super().__init__(opts, inst, self)
+    self.opts = opts
+    self.inst = inst
     self.ft = ft
     self.caller = caller
     self.on_return = on_return
     self.on_block = on_block
     self.events = []
     self.has_events = asyncio.Event()
+    self.todo = 0
+
+  def task(self):
+    return self
 ```
+The `task()` method can be called polymorphically on a `Task|Subtask` to
+get the `Subtask`'s `supertask` or, in the case of a `Task`, itself.
+
 The fields of `Task` are introduced in groups of related `Task` methods next.
 Using a conservative syntactic analysis of the component-level definitions of
 a linked component DAG, an optimizing implementation can statically eliminate
@@ -509,7 +504,8 @@ that OOM the component before it can re-enable backpressure.
       assert(self.inst.interruptible.is_set())
       self.inst.interruptible.clear()
     self.inst.num_tasks += 1
-    return lower_flat_values(self, MAX_FLAT_PARAMS, on_start(), self.ft.param_types())
+    cx = LiftLowerContext(self.opts, self.inst, self)
+    return lower_flat_values(cx, MAX_FLAT_PARAMS, on_start(), self.ft.param_types())
 
   def may_enter(self, pending_task):
     return self.inst.interruptible.is_set() and \
@@ -630,7 +626,8 @@ more than once which must be checked by `return_` and `exit`.
     else:
       maxflat = MAX_FLAT_PARAMS
     ts = self.ft.result_types()
-    vs = lift_flat_values(self, maxflat, CoreValueIter(flat_results), ts)
+    cx = LiftLowerContext(self.opts, self.inst, self)
+    vs = lift_flat_values(cx, maxflat, CoreValueIter(flat_results), ts)
     self.on_return(vs)
     self.on_return = None
 ```
@@ -640,12 +637,15 @@ async or sychronous-using-`always-task-return` call, in which return values
 are passed as parameters to `canon task.return`.
 
 Lastly, when a task exits, the runtime enforces the guard conditions mentioned
-above and allows a pending task to start.
+above and allows a pending task to start. The `todo` counter is used below to
+record the number of unmet obligations to drop borrowed handles, subtasks,
+streams and futures.
 ```python
   def exit(self):
     assert(current_task.locked())
     assert(not self.maybe_next_event())
     assert(self.inst.num_tasks >= 1)
+    trap_if(self.todo)
     trap_if(self.on_return)
     trap_if(self.inst.num_tasks == 1 and self.inst.backpressure)
     self.inst.num_tasks -= 1
@@ -653,17 +653,17 @@ above and allows a pending task to start.
       assert(not self.inst.interruptible.is_set())
       self.inst.interruptible.set()
     self.maybe_start_pending_task()
-    self.end_call()
 ```
 
 While `canon_lift` creates `Task`s, `canon_lower` creates `Subtask` objects.
-Like `Task`, `Subtask` is a subclass of `CallContext` and stores the `ft` and
-`opts` of its `canon lower`. Importantly, the `task` field of a `Subtask`
-refers to the [current task] which called `canon lower`, thereby linking all
-subtasks to their supertask, maintaining the (possibly asynchronous) call
-tree.
+Importantly, the `supertask` field of `Subtask` refers to the [current task]
+which called `canon lower`, thereby linking all subtasks to their supertasks,
+maintaining a (possibly asynchronous) call tree.
 ```python
-class Subtask(CallContext):
+class Subtask:
+  opts: CanonicalOptions
+  inst: ComponentInstance
+  supertask: Task
   ft: FuncType
   flat_args: CoreValueIter
   flat_results: Optional[list[Any]]
@@ -671,9 +671,12 @@ class Subtask(CallContext):
   lenders: list[ResourceHandle]
   notify_supertask: bool
   enqueued: bool
+  todo: int
 
   def __init__(self, opts, ft, task, flat_args):
-    super().__init__(opts, task.inst, task)
+    self.opts = opts
+    self.inst = task.inst
+    self.supertask = task
     self.ft = ft
     self.flat_args = CoreValueIter(flat_args)
     self.flat_results = None
@@ -681,7 +684,13 @@ class Subtask(CallContext):
     self.lenders = []
     self.notify_supertask = False
     self.enqueued = False
+    self.todo = 0
+
+  def task(self):
+    return self.supertask
 ```
+The `task()` method can be called polymorphically on a `Task|Subtask` to
+get the `Subtask`'s `supertask` or, in the case of a `Task`, itself.
 
 The `lenders` field of `Subtask` maintains a list of all the owned handles
 that have been lent to a subtask and must therefor not be dropped until the
@@ -722,7 +731,7 @@ the event loop when only the most recent state matters.
         if self.state == CallState.DONE:
           self.release_lenders()
         return (EventCode(self.state), i, 0)
-      self.task.notify(subtask_event)
+      self.supertask.notify(subtask_event)
 ```
 
 The `on_start` and `on_return` methods of `Subtask` are passed (by
@@ -738,7 +747,8 @@ called).
     self.maybe_notify_supertask()
     max_flat = MAX_FLAT_PARAMS if self.opts.sync else 1
     ts = self.ft.param_types()
-    return lift_flat_values(self, max_flat, self.flat_args, ts)
+    cx = LiftLowerContext(self.opts, self.inst, self)
+    return lift_flat_values(cx, max_flat, self.flat_args, ts)
 
   def on_return(self, vs):
     assert(self.state == CallState.STARTED)
@@ -746,7 +756,8 @@ called).
     self.maybe_notify_supertask()
     max_flat = MAX_FLAT_RESULTS if self.opts.sync else 0
     ts = self.ft.result_types()
-    self.flat_results = lower_flat_values(self, max_flat, vs, ts, self.flat_args)
+    cx = LiftLowerContext(self.opts, self.inst, self)
+    self.flat_results = lower_flat_values(cx, max_flat, vs, ts, self.flat_args)
 ```
 
 When a `Subtask` finishes, it calls `release_lenders` to allow owned handles
@@ -767,14 +778,15 @@ when the subtask finishes.
 
 Lastly, after a `Subtask` has finished and notified its supertask (thereby
 clearing `enqueued`), it may be dropped from the `waitables` table which
-effectively ends the call from the perspective of the caller and guards that
-the `Subtask`'s `todo` count is zero.
+effectively ends the call from the perspective of the caller. The `todo`
+counter is used below to record the number of unmet obligations to drop the
+streams and futures connected to this `Subtask`.
 ```python
   def drop(self):
+    trap_if(self.todo)
     trap_if(self.enqueued)
     trap_if(self.state != CallState.DONE)
-    self.task.todo -= 1
-    self.end_call()
+    self.supertask.todo -= 1
 ```
 
 
@@ -827,7 +839,7 @@ classes are defined below as part of normal `list` parameter lifting and
 lowering.
 ```python
 class BufferGuestImpl(Buffer):
-  cx: CallContext
+  cx: LiftLowerContext
   t: ValType
   ptr: int
   progress: int
@@ -994,20 +1006,20 @@ the shared `canon stream.*` built-in code below.
 class StreamHandle:
   stream: ReadableStream
   t: ValType
-  cx: Optional[CallContext]
+  call: Optional[Task|Subtask]
   copying_buffer: Optional[Buffer]
 
-  def __init__(self, stream, t, cx):
+  def __init__(self, stream, t, call):
     self.stream = stream
     self.t = t
-    self.cx = cx
+    self.call = call
     self.copying_buffer = None
 
   def drop(self):
     trap_if(self.copying_buffer)
     self.stream.close()
-    if self.cx:
-      self.cx.todo -= 1
+    if self.call:
+      self.call.todo -= 1
 
 class ReadableStreamHandle(StreamHandle):
   async def copy(self, dst, on_block):
@@ -1018,7 +1030,7 @@ class ReadableStreamHandle(StreamHandle):
 class WritableStreamHandle(ReadableStreamGuestImpl, StreamHandle):
   def __init__(self, t):
     ReadableStreamGuestImpl.__init__(self)
-    StreamHandle.__init__(self, self, t, cx = None)
+    StreamHandle.__init__(self, self, t, call = None)
   async def copy(self, src, on_block):
     await self.write(src, on_block)
   async def cancel_copy(self, src, on_block):
@@ -1057,7 +1069,7 @@ class ReadableFutureHandle(FutureHandle):
 class WritableFutureHandle(ReadableStreamGuestImpl, FutureHandle):
   def __init__(self, t):
     ReadableStreamGuestImpl.__init__(self)
-    FutureHandle.__init__(self, self, t, cx = None)
+    FutureHandle.__init__(self, self, t, call = None)
 
   async def copy(self, src, on_block):
     assert(src.remain() == 1)
@@ -1533,15 +1545,15 @@ from the source handle, leaving the source handle intact in the current
 component instance's handle table:
 ```python
 def lift_borrow(cx, i, t):
-  assert(isinstance(cx, Subtask))
+  assert(isinstance(cx.call, Subtask))
   h = cx.inst.resources.get(t.rt, i)
   if h.own:
-    cx.add_lender(h)
+    cx.call.add_lender(h)
   else:
-    trap_if(cx.task is not h.scope)
+    trap_if(cx.call.task() is not h.call.task())
   return h.rep
 ```
-The `add_lender` call to `CallContext` participates in the enforcement of the
+The `Subtask.add_lender` participates in the enforcement of the
 dynamic borrow rules, which keep the source `own` handle alive until the end of
 the call (as an intentionally-conservative upper bound on how long the `borrow`
 handle can be held). When `h` is a `borrow` handle, we just need to make sure
@@ -1555,9 +1567,9 @@ transfers ownership of it while lifting the writable end leaves the writable
 end in place, but traps if the writable end has already been lifted before.
 Together, this ensures that at most one component holds each of the readable
 and writable ends of a stream. The `todo` increments must be matched by
-decrements in `StreamHandle.drop` for `CallContext.end_call` to not trap; this
-ensures that the writable stream handles cannot outlive the `Task` to which
-their events are sent (via `h.cx.task.notify()`).
+decrements in `StreamHandle.drop` for `Task.exit`/`Subtask.drop` to not trap;
+this ensures that the writable stream handles cannot outlive the `Task` to
+which their events are sent (via `h.call.task().notify()`).
 ```python
 def lift_stream(cx, i, t):
   return lift_async_value(ReadableStreamHandle, WritableStreamHandle, cx, i, t)
@@ -1572,14 +1584,14 @@ def lift_async_value(ReadableHandleT, WritableHandleT, cx, i, t):
   match h:
     case ReadableHandleT():
       trap_if(h.copying_buffer)
-      trap_if(contains_borrow(t) and cx.task is not h.cx)
-      h.cx.todo -= 1
+      trap_if(contains_borrow(t) and cx.call.task() is not h.call.task())
+      h.call.todo -= 1
       cx.inst.waitables.remove(i)
     case WritableHandleT():
-      trap_if(h.cx is not None)
+      trap_if(h.call is not None)
       assert(not h.copying_buffer)
-      h.cx = cx
-      h.cx.todo += 1
+      h.call = cx.call
+      h.call.todo += 1
     case _:
       trap()
   trap_if(h.t != t)
@@ -2001,11 +2013,11 @@ def lower_own(cx, rep, t):
   return cx.inst.resources.add(t.rt, h)
 
 def lower_borrow(cx, rep, t):
-  assert(isinstance(cx, Task))
+  assert(isinstance(cx.call, Task))
   if cx.inst is t.rt.impl:
     return rep
-  h = ResourceHandle(rep, own=False, scope=cx)
-  cx.todo += 1
+  h = ResourceHandle(rep, own=False, call=cx.call)
+  cx.call.todo += 1
   return cx.inst.resources.add(t.rt, h)
 ```
 The special case in `lower_borrow` is an optimization, recognizing that, when
@@ -2027,22 +2039,22 @@ def lower_future(cx, v, t):
 
 def lower_async_value(ReadableHandleT, WritableHandleT, cx, v, t):
   assert(isinstance(v, ReadableStream))
-  if isinstance(v, WritableHandleT) and cx.inst is v.cx.inst:
+  if isinstance(v, WritableHandleT) and cx.inst is v.call.inst:
     i = cx.inst.waitables.array.index(v)
-    v.cx.todo -= 1
-    v.cx = None
+    v.call.todo -= 1
+    v.call = None
     assert(2**31 > Table.MAX_LENGTH >= i)
     return i | (2**31)
   else:
-    h = ReadableHandleT(v, t, cx)
-    h.cx.todo += 1
+    h = ReadableHandleT(v, t, cx.call)
+    h.call.todo += 1
     return cx.inst.waitables.add(h)
 ```
 In the ordinary case, the abstract `ReadableStream` (which may come from the
 host or the guest) is stored in a `ReadableHandle` in the `waitables` table,
 incrementing `todo` to ensure that `StreamHandle.drop` is called before
-`Task.exit` so that readable stream and future handles cannot outlive the
-`Task` to which their events are sent (via `h.cx.task.notify()`).
+`Task.exit`/`Subtask.drop` so that readable stream and future handles cannot
+outlive the `Task` to which their events are sent (via `h.call.task().notify()`).
 
 The interesting case is when a component receives back a `ReadableStream` that
 it itself holds the `WritableStreamHandle` for. Without specially handling
@@ -2758,7 +2770,7 @@ async def canon_resource_drop(rt, sync, task, i):
   h = inst.resources.remove(rt, i)
   flat_results = [] if sync else [0]
   if h.own:
-    assert(h.scope is None)
+    assert(h.call is None)
     trap_if(h.lend_count != 0)
     if inst is rt.impl:
       if rt.dtor:
@@ -2773,7 +2785,7 @@ async def canon_resource_drop(rt, sync, task, i):
       else:
         task.trap_if_on_the_stack(rt.impl)
   else:
-    h.scope.todo -= 1
+    h.call.todo -= 1
   return flat_results
 ```
 In general, the call to a resource's destructor is treated like a
@@ -2847,7 +2859,9 @@ and pass the results to the caller:
 async def canon_task_return(task, core_ft, flat_args):
   trap_if(not task.inst.may_leave)
   trap_if(task.opts.sync and not task.opts.always_task_return)
-  trap_if(core_ft != flatten_functype(CanonicalOptions(), FuncType(task.ft.results, []), 'lower'))
+  sync_opts = copy(task.opts)
+  sync_opts.sync = True
+  trap_if(core_ft != flatten_functype(sync_opts, FuncType(task.ft.results, []), 'lower'))
   task.return_(flat_args)
   return []
 ```
@@ -2874,7 +2888,7 @@ async def canon_task_wait(opts, task, ptr):
   trap_if(not task.inst.may_leave)
   trap_if(task.opts.callback is not None)
   event, p1, p2 = await task.wait()
-  cx = CallContext(opts, task.inst, task)
+  cx = LiftLowerContext(opts, None, None)
   store(cx, p1, U32Type(), ptr)
   store(cx, p2, U32Type(), ptr + 4)
   return [event]
@@ -2907,7 +2921,7 @@ async def canon_task_poll(opts, task, ptr):
   ret = await task.poll()
   if ret is None:
     return [0]
-  cx = CallContext(opts, task.inst, task)
+  cx = LiftLowerContext(opts, None, None)
   store(cx, ret, TupleType([U32Type(), U32Type(), U32Type()]), ptr)
   return [1]
 ```
@@ -2966,16 +2980,16 @@ call so that  can commence.
 
 For canonical definitions:
 ```wasm
-(canon stream.read (core func $f))
-(canon stream.write (core func $f))
+(canon stream.read $t $opts (core func $f))
+(canon stream.write $t $opts (core func $f))
 ```
 validation specifies:
 * `$f` is given type `(func (param i32 i32 i32) (result i32))`
 
 For canonical definitions:
 ```wasm
-(canon future.read (core func $f))
-(canon future.write (core func $f))
+(canon future.read $t $opts (core func $f))
+(canon future.write $t $opts (core func $f))
 ```
 validation specifies:
 * `$f` is given type `(func (param i32 i32) (result i32))`
@@ -2990,30 +3004,31 @@ likelihood of deadlock), there is no synchronous option for `read` or `write`.
 The actual copy happens via polymorphic dispatch to `copy`, which has been
 defined above by the 4 `{Readable,Writable}{Stream,Future}Handle` types:
 ```python
-async def canon_stream_read(t, task, i, ptr, n):
-  return await async_copy(ReadableStreamHandle, WritableBufferGuestImpl, t,
+async def canon_stream_read(t, opts, task, i, ptr, n):
+  return await async_copy(ReadableStreamHandle, WritableBufferGuestImpl, t, opts,
                           EventCode.STREAM_READ, task, i, ptr, n)
 
-async def canon_stream_write(t, task, i, ptr, n):
-  return await async_copy(WritableStreamHandle, ReadableBufferGuestImpl, t,
+async def canon_stream_write(t, opts, task, i, ptr, n):
+  return await async_copy(WritableStreamHandle, ReadableBufferGuestImpl, t, opts,
                           EventCode.STREAM_WRITE, task, i, ptr, n)
 
-async def canon_future_read(t, task, i, ptr):
-  return await async_copy(ReadableFutureHandle, WritableBufferGuestImpl, t,
+async def canon_future_read(t, opts, task, i, ptr):
+  return await async_copy(ReadableFutureHandle, WritableBufferGuestImpl, t, opts,
                           EventCode.FUTURE_READ, task, i, ptr, 1)
 
-async def canon_future_write(t, task, i, ptr):
-  return await async_copy(WritableFutureHandle, ReadableBufferGuestImpl, t,
+async def canon_future_write(t, opts, task, i, ptr):
+  return await async_copy(WritableFutureHandle, ReadableBufferGuestImpl, t, opts,
                           EventCode.FUTURE_WRITE, task, i, ptr, 1)
 
-async def async_copy(HandleT, BufferT, t, event_code, task, i, ptr, n):
+async def async_copy(HandleT, BufferT, t, opts, event_code, task, i, ptr, n):
   trap_if(not task.inst.may_leave)
   h = task.inst.waitables.get(i)
   trap_if(not isinstance(h, HandleT))
   trap_if(h.t != t)
-  trap_if(not h.cx)
+  trap_if(not h.call)
   trap_if(h.copying_buffer)
-  buffer = BufferT(h.cx, t, ptr, n)
+  cx = LiftLowerContext(opts, task.inst, h.call)
+  buffer = BufferT(cx, t, ptr, n)
   if h.stream.closed():
     flat_results = [CLOSED]
   else:
@@ -3025,7 +3040,7 @@ async def async_copy(HandleT, BufferT, t, event_code, task, i, ptr, n):
           return (event_code, i, pack_async_copy_result(buffer, h))
         else:
           return None
-      h.cx.task.notify(copy_event)
+      h.call.task().notify(copy_event)
     match await call_and_handle_blocking(do_copy):
       case Blocked():
         h.copying_buffer = buffer
@@ -3034,7 +3049,7 @@ async def async_copy(HandleT, BufferT, t, event_code, task, i, ptr, n):
         flat_results = [pack_async_copy_result(buffer, h)]
   return flat_results
 ```
-The trap if `not h.cx` prevents `write`s on the writable end of streams or
+The trap if `not h.call` prevents `write`s on the writable end of streams or
 futures that have not yet been lifted. The `copying_buffer` field serves as a
 boolean indication of whether an async `read` or `write` is already in
 progress, preventing multiple overlapping calls to `read` or `write`. (This
@@ -3042,7 +3057,7 @@ restriction could be relaxed [in the future](Async.md#TODO) to allow greater
 pipeline parallelism.)
 
 One subtle corner case handled by this code that is worth pointing out is that,
-between the `h.cx.task.notify(copy_event)` and the wasm guest code calling
+between the `h.call.task().notify(copy_event)` and the wasm guest code calling
 `task.wait` to receive this event, the wasm guest code can first call
 `{stream,future}.cancel-{read,write}` (defined next) which will return the copy
 progress to the wasm guest code and reset `copying_buffer` to `None` to allow
diff --git a/design/mvp/Explainer.md b/design/mvp/Explainer.md
index c59dd9aa..fcfe88f3 100644
--- a/design/mvp/Explainer.md
+++ b/design/mvp/Explainer.md
@@ -1373,13 +1373,13 @@ canon ::= ...
         | (canon task.poll (memory <core:memidx>) (core func <id>?)) ๐Ÿ”€
         | (canon task.yield (core func <id>?)) ๐Ÿ”€
         | (canon stream.new <typeidx> (core func <id>?)) ๐Ÿ”€
-        | (canon stream.read <typeidx> (core func <id>?)) ๐Ÿ”€
-        | (canon stream.write <typeidx> (core func <id>?)) ๐Ÿ”€
+        | (canon stream.read <typeidx> <canonopt>* (core func <id>?)) ๐Ÿ”€
+        | (canon stream.write <typeidx> <canonopt>* (core func <id>?)) ๐Ÿ”€
         | (canon stream.cancel-read async? (core func <id>?)) ๐Ÿ”€
         | (canon stream.cancel-write async? (core func <id>?)) ๐Ÿ”€
         | (canon future.new <typeidx> (core func <id>?)) ๐Ÿ”€
-        | (canon future.read <typeidx> (core func <id>?)) ๐Ÿ”€
-        | (canon future.write <typeidx> (core func <id>?)) ๐Ÿ”€
+        | (canon future.read <typeidx> <canonopt>* (core func <id>?)) ๐Ÿ”€
+        | (canon future.write <typeidx> <canonopt>* (core func <id>?)) ๐Ÿ”€
         | (canon future.cancel-read async? (core func <id>?)) ๐Ÿ”€
         | (canon future.cancel-write async? (core func <id>?)) ๐Ÿ”€
         | (canon waitable.drop (core func <id>?)) ๐Ÿ”€
diff --git a/design/mvp/canonical-abi/definitions.py b/design/mvp/canonical-abi/definitions.py
index 4536756d..8c362047 100644
--- a/design/mvp/canonical-abi/definitions.py
+++ b/design/mvp/canonical-abi/definitions.py
@@ -9,6 +9,7 @@
 from functools import partial
 from typing import Any, Optional, Callable, Awaitable, Literal, MutableMapping, TypeVar, Generic
 from enum import IntEnum
+from copy import copy
 import math
 import struct
 import random
@@ -190,22 +191,17 @@ class StreamType(ValType):
 class FutureType(ValType):
   t: ValType
 
-### Call Context
+### Lifting and Lowering Context
 
-class CallContext:
+class LiftLowerContext:
   opts: CanonicalOptions
   inst: ComponentInstance
-  task: Task
-  todo: int
+  call: Task|Subtask
 
-  def __init__(self, opts, inst, task):
+  def __init__(self, opts, inst, call):
     self.opts = opts
     self.inst = inst
-    self.task = task
-    self.todo = 0
-
-  def end_call(self):
-    trap_if(self.todo)
+    self.call = call
 
 
 ### Canonical ABI Options
@@ -311,13 +307,13 @@ def remove(self, i):
 class ResourceHandle:
   rep: int
   own: bool
-  scope: Optional[Task]
+  call: Optional[Task]
   lend_count: int
 
-  def __init__(self, rep, own, scope = None):
+  def __init__(self, rep, own, call = None):
     self.rep = rep
     self.own = own
-    self.scope = scope
+    self.call = call
     self.lend_count = 0
 
 #### Task State
@@ -374,22 +370,30 @@ async def do_call():
   asyncio.create_task(do_call())
   return await ret
 
-class Task(CallContext):
+class Task:
+  opts: CanonicalOptions
+  inst: ComponentInstance
   ft: FuncType
   caller: Optional[Task]
   on_return: Optional[Callable]
   on_block: OnBlockCallback
   events: list[EventCallback]
   has_events: asyncio.Event
+  todo: int
 
   def __init__(self, opts, inst, ft, caller, on_return, on_block):
-    super().__init__(opts, inst, self)
+    self.opts = opts
+    self.inst = inst
     self.ft = ft
     self.caller = caller
     self.on_return = on_return
     self.on_block = on_block
     self.events = []
     self.has_events = asyncio.Event()
+    self.todo = 0
+
+  def task(self):
+    return self
 
   def trap_if_on_the_stack(self, inst):
     c = self.caller
@@ -410,7 +414,8 @@ async def enter(self, on_start):
       assert(self.inst.interruptible.is_set())
       self.inst.interruptible.clear()
     self.inst.num_tasks += 1
-    return lower_flat_values(self, MAX_FLAT_PARAMS, on_start(), self.ft.param_types())
+    cx = LiftLowerContext(self.opts, self.inst, self)
+    return lower_flat_values(cx, MAX_FLAT_PARAMS, on_start(), self.ft.param_types())
 
   def may_enter(self, pending_task):
     return self.inst.interruptible.is_set() and \
@@ -475,7 +480,8 @@ def return_(self, flat_results):
     else:
       maxflat = MAX_FLAT_PARAMS
     ts = self.ft.result_types()
-    vs = lift_flat_values(self, maxflat, CoreValueIter(flat_results), ts)
+    cx = LiftLowerContext(self.opts, self.inst, self)
+    vs = lift_flat_values(cx, maxflat, CoreValueIter(flat_results), ts)
     self.on_return(vs)
     self.on_return = None
 
@@ -483,6 +489,7 @@ def exit(self):
     assert(current_task.locked())
     assert(not self.maybe_next_event())
     assert(self.inst.num_tasks >= 1)
+    trap_if(self.todo)
     trap_if(self.on_return)
     trap_if(self.inst.num_tasks == 1 and self.inst.backpressure)
     self.inst.num_tasks -= 1
@@ -490,9 +497,11 @@ def exit(self):
       assert(not self.inst.interruptible.is_set())
       self.inst.interruptible.set()
     self.maybe_start_pending_task()
-    self.end_call()
 
-class Subtask(CallContext):
+class Subtask:
+  opts: CanonicalOptions
+  inst: ComponentInstance
+  supertask: Task
   ft: FuncType
   flat_args: CoreValueIter
   flat_results: Optional[list[Any]]
@@ -500,9 +509,12 @@ class Subtask(CallContext):
   lenders: list[ResourceHandle]
   notify_supertask: bool
   enqueued: bool
+  todo: int
 
   def __init__(self, opts, ft, task, flat_args):
-    super().__init__(opts, task.inst, task)
+    self.opts = opts
+    self.inst = task.inst
+    self.supertask = task
     self.ft = ft
     self.flat_args = CoreValueIter(flat_args)
     self.flat_results = None
@@ -510,6 +522,10 @@ def __init__(self, opts, ft, task, flat_args):
     self.lenders = []
     self.notify_supertask = False
     self.enqueued = False
+    self.todo = 0
+
+  def task(self):
+    return self.supertask
 
   def add_lender(self, lending_handle):
     assert(lending_handle.own)
@@ -529,7 +545,7 @@ def subtask_event():
         if self.state == CallState.DONE:
           self.release_lenders()
         return (EventCode(self.state), i, 0)
-      self.task.notify(subtask_event)
+      self.supertask.notify(subtask_event)
 
   def on_start(self):
     assert(self.state == CallState.STARTING)
@@ -537,7 +553,8 @@ def on_start(self):
     self.maybe_notify_supertask()
     max_flat = MAX_FLAT_PARAMS if self.opts.sync else 1
     ts = self.ft.param_types()
-    return lift_flat_values(self, max_flat, self.flat_args, ts)
+    cx = LiftLowerContext(self.opts, self.inst, self)
+    return lift_flat_values(cx, max_flat, self.flat_args, ts)
 
   def on_return(self, vs):
     assert(self.state == CallState.STARTED)
@@ -545,7 +562,8 @@ def on_return(self, vs):
     self.maybe_notify_supertask()
     max_flat = MAX_FLAT_RESULTS if self.opts.sync else 0
     ts = self.ft.result_types()
-    self.flat_results = lower_flat_values(self, max_flat, vs, ts, self.flat_args)
+    cx = LiftLowerContext(self.opts, self.inst, self)
+    self.flat_results = lower_flat_values(cx, max_flat, vs, ts, self.flat_args)
 
   def finish(self):
     assert(self.state == CallState.RETURNED)
@@ -557,10 +575,10 @@ def finish(self):
     return self.flat_results
 
   def drop(self):
+    trap_if(self.todo)
     trap_if(self.enqueued)
     trap_if(self.state != CallState.DONE)
-    self.task.todo -= 1
-    self.end_call()
+    self.supertask.todo -= 1
 
 #### Buffer, Stream and Future State
 
@@ -578,7 +596,7 @@ class ReadableStream:
   close: Callable[[]]
 
 class BufferGuestImpl(Buffer):
-  cx: CallContext
+  cx: LiftLowerContext
   t: ValType
   ptr: int
   progress: int
@@ -675,20 +693,20 @@ def close(self):
 class StreamHandle:
   stream: ReadableStream
   t: ValType
-  cx: Optional[CallContext]
+  call: Optional[Task|Subtask]
   copying_buffer: Optional[Buffer]
 
-  def __init__(self, stream, t, cx):
+  def __init__(self, stream, t, call):
     self.stream = stream
     self.t = t
-    self.cx = cx
+    self.call = call
     self.copying_buffer = None
 
   def drop(self):
     trap_if(self.copying_buffer)
     self.stream.close()
-    if self.cx:
-      self.cx.todo -= 1
+    if self.call:
+      self.call.todo -= 1
 
 class ReadableStreamHandle(StreamHandle):
   async def copy(self, dst, on_block):
@@ -699,7 +717,7 @@ async def cancel_copy(self, dst, on_block):
 class WritableStreamHandle(ReadableStreamGuestImpl, StreamHandle):
   def __init__(self, t):
     ReadableStreamGuestImpl.__init__(self)
-    StreamHandle.__init__(self, self, t, cx = None)
+    StreamHandle.__init__(self, self, t, call = None)
   async def copy(self, src, on_block):
     await self.write(src, on_block)
   async def cancel_copy(self, src, on_block):
@@ -722,7 +740,7 @@ async def cancel_copy(self, dst, on_block):
 class WritableFutureHandle(ReadableStreamGuestImpl, FutureHandle):
   def __init__(self, t):
     ReadableStreamGuestImpl.__init__(self)
-    FutureHandle.__init__(self, self, t, cx = None)
+    FutureHandle.__init__(self, self, t, call = None)
 
   async def copy(self, src, on_block):
     assert(src.remain() == 1)
@@ -1060,12 +1078,12 @@ def lift_own(cx, i, t):
   return h.rep
 
 def lift_borrow(cx, i, t):
-  assert(isinstance(cx, Subtask))
+  assert(isinstance(cx.call, Subtask))
   h = cx.inst.resources.get(t.rt, i)
   if h.own:
-    cx.add_lender(h)
+    cx.call.add_lender(h)
   else:
-    trap_if(cx.task is not h.scope)
+    trap_if(cx.call.task() is not h.call.task())
   return h.rep
 
 def lift_stream(cx, i, t):
@@ -1081,14 +1099,14 @@ def lift_async_value(ReadableHandleT, WritableHandleT, cx, i, t):
   match h:
     case ReadableHandleT():
       trap_if(h.copying_buffer)
-      trap_if(contains_borrow(t) and cx.task is not h.cx)
-      h.cx.todo -= 1
+      trap_if(contains_borrow(t) and cx.call.task() is not h.call.task())
+      h.call.todo -= 1
       cx.inst.waitables.remove(i)
     case WritableHandleT():
-      trap_if(h.cx is not None)
+      trap_if(h.call is not None)
       assert(not h.copying_buffer)
-      h.cx = cx
-      h.cx.todo += 1
+      h.call = cx.call
+      h.call.todo += 1
     case _:
       trap()
   trap_if(h.t != t)
@@ -1376,11 +1394,11 @@ def lower_own(cx, rep, t):
   return cx.inst.resources.add(t.rt, h)
 
 def lower_borrow(cx, rep, t):
-  assert(isinstance(cx, Task))
+  assert(isinstance(cx.call, Task))
   if cx.inst is t.rt.impl:
     return rep
-  h = ResourceHandle(rep, own=False, scope=cx)
-  cx.todo += 1
+  h = ResourceHandle(rep, own=False, call=cx.call)
+  cx.call.todo += 1
   return cx.inst.resources.add(t.rt, h)
 
 def lower_stream(cx, v, t):
@@ -1392,15 +1410,15 @@ def lower_future(cx, v, t):
 
 def lower_async_value(ReadableHandleT, WritableHandleT, cx, v, t):
   assert(isinstance(v, ReadableStream))
-  if isinstance(v, WritableHandleT) and cx.inst is v.cx.inst:
+  if isinstance(v, WritableHandleT) and cx.inst is v.call.inst:
     i = cx.inst.waitables.array.index(v)
-    v.cx.todo -= 1
-    v.cx = None
+    v.call.todo -= 1
+    v.call = None
     assert(2**31 > Table.MAX_LENGTH >= i)
     return i | (2**31)
   else:
-    h = ReadableHandleT(v, t, cx)
-    h.cx.todo += 1
+    h = ReadableHandleT(v, t, cx.call)
+    h.call.todo += 1
     return cx.inst.waitables.add(h)
 
 ### Flattening
@@ -1792,7 +1810,7 @@ async def canon_resource_drop(rt, sync, task, i):
   h = inst.resources.remove(rt, i)
   flat_results = [] if sync else [0]
   if h.own:
-    assert(h.scope is None)
+    assert(h.call is None)
     trap_if(h.lend_count != 0)
     if inst is rt.impl:
       if rt.dtor:
@@ -1807,7 +1825,7 @@ async def canon_resource_drop(rt, sync, task, i):
       else:
         task.trap_if_on_the_stack(rt.impl)
   else:
-    h.scope.todo -= 1
+    h.call.todo -= 1
   return flat_results
 
 ### `canon resource.rep`
@@ -1828,7 +1846,9 @@ async def canon_task_backpressure(task, flat_args):
 async def canon_task_return(task, core_ft, flat_args):
   trap_if(not task.inst.may_leave)
   trap_if(task.opts.sync and not task.opts.always_task_return)
-  trap_if(core_ft != flatten_functype(CanonicalOptions(), FuncType(task.ft.results, []), 'lower'))
+  sync_opts = copy(task.opts)
+  sync_opts.sync = True
+  trap_if(core_ft != flatten_functype(sync_opts, FuncType(task.ft.results, []), 'lower'))
   task.return_(flat_args)
   return []
 
@@ -1838,7 +1858,7 @@ async def canon_task_wait(opts, task, ptr):
   trap_if(not task.inst.may_leave)
   trap_if(task.opts.callback is not None)
   event, p1, p2 = await task.wait()
-  cx = CallContext(opts, task.inst, task)
+  cx = LiftLowerContext(opts, None, None)
   store(cx, p1, U32Type(), ptr)
   store(cx, p2, U32Type(), ptr + 4)
   return [event]
@@ -1850,7 +1870,7 @@ async def canon_task_poll(opts, task, ptr):
   ret = await task.poll()
   if ret is None:
     return [0]
-  cx = CallContext(opts, task.inst, task)
+  cx = LiftLowerContext(opts, None, None)
   store(cx, ret, TupleType([U32Type(), U32Type(), U32Type()]), ptr)
   return [1]
 
@@ -1876,30 +1896,31 @@ async def canon_future_new(t, task):
 
 ### ๐Ÿ”€ `canon {stream,future}.{read,write}`
 
-async def canon_stream_read(t, task, i, ptr, n):
-  return await async_copy(ReadableStreamHandle, WritableBufferGuestImpl, t,
+async def canon_stream_read(t, opts, task, i, ptr, n):
+  return await async_copy(ReadableStreamHandle, WritableBufferGuestImpl, t, opts,
                           EventCode.STREAM_READ, task, i, ptr, n)
 
-async def canon_stream_write(t, task, i, ptr, n):
-  return await async_copy(WritableStreamHandle, ReadableBufferGuestImpl, t,
+async def canon_stream_write(t, opts, task, i, ptr, n):
+  return await async_copy(WritableStreamHandle, ReadableBufferGuestImpl, t, opts,
                           EventCode.STREAM_WRITE, task, i, ptr, n)
 
-async def canon_future_read(t, task, i, ptr):
-  return await async_copy(ReadableFutureHandle, WritableBufferGuestImpl, t,
+async def canon_future_read(t, opts, task, i, ptr):
+  return await async_copy(ReadableFutureHandle, WritableBufferGuestImpl, t, opts,
                           EventCode.FUTURE_READ, task, i, ptr, 1)
 
-async def canon_future_write(t, task, i, ptr):
-  return await async_copy(WritableFutureHandle, ReadableBufferGuestImpl, t,
+async def canon_future_write(t, opts, task, i, ptr):
+  return await async_copy(WritableFutureHandle, ReadableBufferGuestImpl, t, opts,
                           EventCode.FUTURE_WRITE, task, i, ptr, 1)
 
-async def async_copy(HandleT, BufferT, t, event_code, task, i, ptr, n):
+async def async_copy(HandleT, BufferT, t, opts, event_code, task, i, ptr, n):
   trap_if(not task.inst.may_leave)
   h = task.inst.waitables.get(i)
   trap_if(not isinstance(h, HandleT))
   trap_if(h.t != t)
-  trap_if(not h.cx)
+  trap_if(not h.call)
   trap_if(h.copying_buffer)
-  buffer = BufferT(h.cx, t, ptr, n)
+  cx = LiftLowerContext(opts, task.inst, h.call)
+  buffer = BufferT(cx, t, ptr, n)
   if h.stream.closed():
     flat_results = [CLOSED]
   else:
@@ -1911,7 +1932,7 @@ def copy_event():
           return (event_code, i, pack_async_copy_result(buffer, h))
         else:
           return None
-      h.cx.task.notify(copy_event)
+      h.call.task().notify(copy_event)
     match await call_and_handle_blocking(do_copy):
       case Blocked():
         h.copying_buffer = buffer
diff --git a/design/mvp/canonical-abi/run_tests.py b/design/mvp/canonical-abi/run_tests.py
index 2455b7b9..3578d454 100644
--- a/design/mvp/canonical-abi/run_tests.py
+++ b/design/mvp/canonical-abi/run_tests.py
@@ -47,7 +47,7 @@ def mk_opts(memory = bytearray(), encoding = 'utf8', realloc = None, post_return
 def mk_cx(memory = bytearray(), encoding = 'utf8', realloc = None, post_return = None):
   opts = mk_opts(memory, encoding, realloc, post_return)
   inst = ComponentInstance()
-  return CallContext(opts, inst, None)
+  return LiftLowerContext(opts, inst, None)
 
 def mk_str(s):
   return (s, 'utf8', len(s.encode('utf-8')))
@@ -395,7 +395,8 @@ async def callee(task, x):
     if return_in_heap:
       flat_results = [ flat_args[-1] ]
 
-    [got] = lift_flat_values(caller_task, definitions.MAX_FLAT_PARAMS, CoreValueIter(flat_results), [t])
+    cx = LiftLowerContext(caller_opts, caller_inst, caller_task)
+    [got] = lift_flat_values(cx, definitions.MAX_FLAT_PARAMS, CoreValueIter(flat_results), [t])
     caller_task.exit()
 
     if got != v:
@@ -983,6 +984,8 @@ async def read_all():
         async def on_block(f):
           return await f
         await self.write_event.wait()
+        if self.stream.closed():
+          break
         await self.stream.read(self, on_block)
       self.ready_to_consume.set()
     asyncio.create_task(read_all())
@@ -1049,7 +1052,7 @@ async def core_func(task, args):
     assert(rsi1 == 1)
     [wsi1] = await canon_stream_new(U8Type(), task)
     [] = await canon_task_return(task, CoreFuncType(['i32'],[]), [wsi1])
-    [ret] = await canon_stream_read(U8Type(), task, rsi1, 0, 4)
+    [ret] = await canon_stream_read(U8Type(), opts, task, rsi1, 0, 4)
     assert(ret == 4)
     assert(mem[0:4] == b'\x01\x02\x03\x04')
     [wsi2] = await canon_stream_new(U8Type(), task)
@@ -1057,22 +1060,22 @@ async def core_func(task, args):
     [ret] = await canon_lower(opts, ft, host_import, task, [wsi2, retp])
     assert(ret == 0)
     rsi2 = mem[retp]
-    [ret] = await canon_stream_write(U8Type(), task, wsi2, 0, 4)
+    [ret] = await canon_stream_write(U8Type(), opts, task, wsi2, 0, 4)
     assert(ret == 4)
-    [ret] = await canon_stream_read(U8Type(), task, rsi2, 0, 4)
+    [ret] = await canon_stream_read(U8Type(), opts, task, rsi2, 0, 4)
     assert(ret == 4)
-    [ret] = await canon_stream_write(U8Type(), task, wsi1, 0, 4)
+    [ret] = await canon_stream_write(U8Type(), opts, task, wsi1, 0, 4)
     assert(ret == 4)
-    [ret] = await canon_stream_read(U8Type(), task, rsi1, 0, 4)
+    [ret] = await canon_stream_read(U8Type(), opts, task, rsi1, 0, 4)
     assert(ret == 4)
-    [ret] = await canon_stream_read(U8Type(), task, rsi1, 0, 4)
+    [ret] = await canon_stream_read(U8Type(), opts, task, rsi1, 0, 4)
     assert(ret == definitions.CLOSED)
     assert(mem[0:4] == b'\x05\x06\x07\x08')
-    [ret] = await canon_stream_write(U8Type(), task, wsi2, 0, 4)
+    [ret] = await canon_stream_write(U8Type(), opts, task, wsi2, 0, 4)
     assert(ret == 4)
-    [ret] = await canon_stream_read(U8Type(), task, rsi2, 0, 4)
+    [ret] = await canon_stream_read(U8Type(), opts, task, rsi2, 0, 4)
     assert(ret == 4)
-    [ret] = await canon_stream_write(U8Type(), task, wsi1, 0, 4)
+    [ret] = await canon_stream_write(U8Type(), opts, task, wsi1, 0, 4)
     assert(ret == 4)
     [] = await canon_waitable_drop(task, rsi1)
     [] = await canon_waitable_drop(task, rsi2)
@@ -1122,7 +1125,7 @@ async def core_func(task, args):
     assert(rsi1 == 1)
     [wsi1] = await canon_stream_new(U8Type(), task)
     [] = await canon_task_return(task, CoreFuncType(['i32'],[]), [wsi1])
-    [ret] = await canon_stream_read(U8Type(), task, rsi1, 0, 4)
+    [ret] = await canon_stream_read(U8Type(), opts, task, rsi1, 0, 4)
     assert(ret == definitions.BLOCKED)
     src_stream.write([1,2,3,4])
     event, p1, p2 = await task.wait()
@@ -1137,16 +1140,16 @@ async def core_func(task, args):
     assert(state == CallState.RETURNED)
     rsi2 = mem[16]
     assert(rsi2 == 4)
-    [ret] = await canon_stream_write(U8Type(), task, wsi2, 0, 4)
+    [ret] = await canon_stream_write(U8Type(), opts, task, wsi2, 0, 4)
     assert(ret == definitions.BLOCKED)
     host_import_incoming.set_remain(100)
     event, p1, p2 = await task.wait()
     assert(event == EventCode.STREAM_WRITE)
     assert(p1 == wsi2)
     assert(p2 == 4)
-    [ret] = await canon_stream_read(U8Type(), task, rsi2, 0, 4)
+    [ret] = await canon_stream_read(U8Type(), opts, task, rsi2, 0, 4)
     assert(ret == 4)
-    [ret] = await canon_stream_write(U8Type(), task, wsi1, 0, 4)
+    [ret] = await canon_stream_write(U8Type(), opts, task, wsi1, 0, 4)
     assert(ret == definitions.BLOCKED)
     dst_stream.set_remain(100)
     event, p1, p2 = await task.wait()
@@ -1155,16 +1158,16 @@ async def core_func(task, args):
     assert(p2 == 4)
     src_stream.write([5,6,7,8])
     src_stream.destroy_once_empty()
-    [ret] = await canon_stream_read(U8Type(), task, rsi1, 0, 4)
+    [ret] = await canon_stream_read(U8Type(), opts, task, rsi1, 0, 4)
     assert(ret == 4)
-    [ret] = await canon_stream_read(U8Type(), task, rsi1, 0, 4)
+    [ret] = await canon_stream_read(U8Type(), opts, task, rsi1, 0, 4)
     assert(ret == definitions.CLOSED)
     [] = await canon_waitable_drop(task, rsi1)
     assert(mem[0:4] == b'\x05\x06\x07\x08')
-    [ret] = await canon_stream_write(U8Type(), task, wsi2, 0, 4)
+    [ret] = await canon_stream_write(U8Type(), opts, task, wsi2, 0, 4)
     assert(ret == 4)
     [] = await canon_waitable_drop(task, wsi2)
-    [ret] = await canon_stream_read(U8Type(), task, rsi2, 0, 4)
+    [ret] = await canon_stream_read(U8Type(), opts, task, rsi2, 0, 4)
     assert(ret == definitions.BLOCKED)
     event, p1, p2 = await task.wait()
     assert(event == EventCode.CALL_DONE)
@@ -1174,11 +1177,11 @@ async def core_func(task, args):
     assert(event == EventCode.STREAM_READ)
     assert(p1 == rsi2)
     assert(p2 == 4)
-    [ret] = await canon_stream_read(U8Type(), task, rsi2, 0, 4)
+    [ret] = await canon_stream_read(U8Type(), opts, task, rsi2, 0, 4)
     assert(ret == definitions.CLOSED)
     [] = await canon_waitable_drop(task, rsi2)
     [] = await canon_waitable_drop(task, subi)
-    [ret] = await canon_stream_write(U8Type(), task, wsi1, 0, 4)
+    [ret] = await canon_stream_write(U8Type(), opts, task, wsi1, 0, 4)
     assert(ret == 4)
     [] = await canon_waitable_drop(task, wsi1)
     return []
@@ -1266,13 +1269,13 @@ async def core_func(task, args):
     assert(ret == 0)
     rsi = mem[retp]
     assert(rsi == 1)
-    [ret] = await canon_stream_read(U8Type(), task, rsi, 0, 4)
+    [ret] = await canon_stream_read(U8Type(), opts, task, rsi, 0, 4)
     assert(ret == 2)
     assert(mem[0:2] == b'\x01\x02')
-    [ret] = await canon_stream_read(U8Type(), task, rsi, 0, 4)
+    [ret] = await canon_stream_read(U8Type(), opts, task, rsi, 0, 4)
     assert(ret == 2)
     assert(mem[0:2] == b'\x03\x04')
-    [ret] = await canon_stream_read(U8Type(), task, rsi, 0, 4)
+    [ret] = await canon_stream_read(U8Type(), opts, task, rsi, 0, 4)
     assert(ret == definitions.BLOCKED)
     src.write([5,6])
     event, p1, p2 = await task.wait()
@@ -1286,9 +1289,9 @@ async def core_func(task, args):
     [ret] = await canon_lower(opts, sink_ft, host_sink, task, [wsi])
     assert(ret == 0)
     mem[0:6] = b'\x01\x02\x03\x04\x05\x06'
-    [ret] = await canon_stream_write(U8Type(), task, wsi, 0, 6)
+    [ret] = await canon_stream_write(U8Type(), opts, task, wsi, 0, 6)
     assert(ret == 2)
-    [ret] = await canon_stream_write(U8Type(), task, wsi, 2, 6)
+    [ret] = await canon_stream_write(U8Type(), opts, task, wsi, 2, 6)
     assert(ret == definitions.BLOCKED)
     dst.set_remain(4)
     event, p1, p2 = await task.wait()
@@ -1297,6 +1300,8 @@ async def core_func(task, args):
     assert(p2 == 4)
     assert(dst.received == [1,2,3,4,5,6])
     [] = await canon_waitable_drop(task, wsi)
+    dst.set_remain(100)
+    assert(await dst.consume(100) is None)
     return []
 
   opts2 = mk_opts()
@@ -1322,15 +1327,15 @@ async def core_func1(task, args):
     await task.wait_on(fut1)
 
     mem1[0:4] = b'\x01\x02\x03\x04'
-    [ret] = await canon_stream_write(U8Type(), task, wsi, 0, 2)
+    [ret] = await canon_stream_write(U8Type(), opts1, task, wsi, 0, 2)
     assert(ret == 2)
-    [ret] = await canon_stream_write(U8Type(), task, wsi, 2, 2)
+    [ret] = await canon_stream_write(U8Type(), opts1, task, wsi, 2, 2)
     assert(ret == 2)
 
     await task.wait_on(fut2)
 
     mem1[0:8] = b'\x05\x06\x07\x08\x09\x0a\x0b\x0c'
-    [ret] = await canon_stream_write(U8Type(), task, wsi, 0, 8)
+    [ret] = await canon_stream_write(U8Type(), opts1, task, wsi, 0, 8)
     assert(ret == definitions.BLOCKED)
 
     fut3.set_result(None)
@@ -1362,7 +1367,7 @@ async def core_func2(task, args):
     rsi = mem2[0]
     assert(rsi == 1)
 
-    [ret] = await canon_stream_read(U8Type(), task, rsi, 0, 8)
+    [ret] = await canon_stream_read(U8Type(), opts2, task, rsi, 0, 8)
     assert(ret == definitions.BLOCKED)
 
     fut1.set_result(None)
@@ -1377,16 +1382,16 @@ async def core_func2(task, args):
     await task.wait_on(fut3)
 
     mem2[0:8] = bytes(8)
-    [ret] = await canon_stream_read(U8Type(), task, rsi, 0, 2)
+    [ret] = await canon_stream_read(U8Type(), opts2, task, rsi, 0, 2)
     assert(ret == 2)
     assert(mem2[0:6] == b'\x05\x06\x00\x00\x00\x00')
-    [ret] = await canon_stream_read(U8Type(), task, rsi, 2, 2)
+    [ret] = await canon_stream_read(U8Type(), opts2, task, rsi, 2, 2)
     assert(ret == 2)
     assert(mem2[0:6] == b'\x05\x06\x07\x08\x00\x00')
 
     await task.wait_on(fut4)
 
-    [ret] = await canon_stream_read(U8Type(), task, rsi, 0, 2)
+    [ret] = await canon_stream_read(U8Type(), opts2, task, rsi, 0, 2)
     assert(ret == definitions.CLOSED)
     [] = await canon_waitable_drop(task, rsi)
 
@@ -1410,14 +1415,14 @@ async def test_borrow_stream():
   async def core_func1(task, args):
     [rsi] = args
 
-    [ret] = await canon_stream_read(BorrowType(rt), task, rsi, 4, 2)
+    [ret] = await canon_stream_read(BorrowType(rt), opts1, task, rsi, 4, 2)
     assert(ret == definitions.BLOCKED)
 
     event, p1, p2 = await task.wait()
     assert(event == EventCode.STREAM_READ)
     assert(p1 == rsi)
     assert(p2 == 2)
-    [ret] = await canon_stream_read(BorrowType(rt), task, rsi, 0, 2)
+    [ret] = await canon_stream_read(BorrowType(rt), opts1, task, rsi, 0, 2)
     assert(ret == definitions.CLOSED)
 
     [] = await canon_waitable_drop(task, rsi)
@@ -1451,7 +1456,7 @@ async def core_func2(task, args):
     mem2[0] = h1
     mem2[4] = h2
 
-    [ret] = await canon_stream_write(BorrowType(rt), task, wsi, 0, 2)
+    [ret] = await canon_stream_write(BorrowType(rt), async_opts2, task, wsi, 0, 2)
     assert(ret == 2)
     [] = await canon_waitable_drop(task, wsi)
 
@@ -1486,6 +1491,7 @@ async def host_func2(task, on_start, on_return, on_block):
     host_source = HostSource([], chunk=2, destroy_if_empty = False)
     on_return([host_source])
 
+  lift_opts = mk_opts()
   async def core_func(task, args):
     assert(not args)
 
@@ -1493,7 +1499,7 @@ async def core_func(task, args):
     [ret] = await canon_lower(lower_opts, host_ft1, host_func1, task, [wsi])
     assert(ret == 0)
     mem[0:4] = b'\x0a\x0b\x0c\x0d'
-    [ret] = await canon_stream_write(U8Type(), task, wsi, 0, 4)
+    [ret] = await canon_stream_write(U8Type(), lower_opts, task, wsi, 0, 4)
     assert(ret == definitions.BLOCKED)
     host_sink.set_remain(2)
     got = await host_sink.consume(2)
@@ -1501,12 +1507,14 @@ async def core_func(task, args):
     [ret] = await canon_stream_cancel_write(True, task, wsi)
     assert(ret == 2)
     [] = await canon_waitable_drop(task, wsi)
+    host_sink.set_remain(100)
+    assert(await host_sink.consume(100) is None)
 
     [wsi] = await canon_stream_new(U8Type(), task)
     [ret] = await canon_lower(lower_opts, host_ft1, host_func1, task, [wsi])
     assert(ret == 0)
     mem[0:4] = b'\x01\x02\x03\x04'
-    [ret] = await canon_stream_write(U8Type(), task, wsi, 0, 4)
+    [ret] = await canon_stream_write(U8Type(), lower_opts, task, wsi, 0, 4)
     assert(ret == definitions.BLOCKED)
     host_sink.set_remain(2)
     got = await host_sink.consume(2)
@@ -1514,12 +1522,14 @@ async def core_func(task, args):
     [ret] = await canon_stream_cancel_write(False, task, wsi)
     assert(ret == 2)
     [] = await canon_waitable_drop(task, wsi)
+    host_sink.set_remain(100)
+    assert(await host_sink.consume(100) is None)
 
     retp = 0
     [ret] = await canon_lower(lower_opts, host_ft2, host_func2, task, [retp])
     assert(ret == 0)
     rsi = mem[retp]
-    [ret] = await canon_stream_read(U8Type(), task, rsi, 0, 4)
+    [ret] = await canon_stream_read(U8Type(), lower_opts, task, rsi, 0, 4)
     assert(ret == definitions.BLOCKED)
     [ret] = await canon_stream_cancel_read(True, task, rsi)
     assert(ret == 0)
@@ -1529,7 +1539,7 @@ async def core_func(task, args):
     [ret] = await canon_lower(lower_opts, host_ft2, host_func2, task, [retp])
     assert(ret == 0)
     rsi = mem[retp]
-    [ret] = await canon_stream_read(U8Type(), task, rsi, 0, 4)
+    [ret] = await canon_stream_read(U8Type(), lower_opts, task, rsi, 0, 4)
     assert(ret == definitions.BLOCKED)
     host_source.eager_cancel.clear()
     [ret] = await canon_stream_cancel_read(False, task, rsi)
@@ -1546,7 +1556,6 @@ async def core_func(task, args):
 
     return []
 
-  lift_opts = mk_opts()
   await canon_lift(lift_opts, inst, FuncType([],[]), core_func, None, lambda:[], lambda _:())
 
 
@@ -1597,6 +1606,7 @@ async def host_func(task, on_start, on_return, on_block):
     assert(incoming.v == 42)
     outgoing.v.set_result(43)
 
+  lift_opts = mk_opts()
   async def core_func(task, args):
     assert(not args)
     [wfi] = await canon_future_new(U8Type(), task)
@@ -1607,12 +1617,12 @@ async def core_func(task, args):
     rfi = mem[retp]
 
     readp = 0
-    [ret] = await canon_future_read(U8Type(), task, rfi, readp)
+    [ret] = await canon_future_read(U8Type(), lower_opts, task, rfi, readp)
     assert(ret == definitions.BLOCKED)
 
     writep = 8
     mem[writep] = 42
-    [ret] = await canon_future_write(U8Type(), task, wfi, writep)
+    [ret] = await canon_future_write(U8Type(), lower_opts, task, wfi, writep)
     assert(ret == 1)
 
     event,p1,p2 = await task.wait()
@@ -1637,12 +1647,12 @@ async def core_func(task, args):
     rfi = mem[retp]
 
     readp = 0
-    [ret] = await canon_future_read(U8Type(), task, rfi, readp)
+    [ret] = await canon_future_read(U8Type(), lower_opts, task, rfi, readp)
     assert(ret == definitions.BLOCKED)
 
     writep = 8
     mem[writep] = 42
-    [ret] = await canon_future_write(U8Type(), task, wfi, writep)
+    [ret] = await canon_future_write(U8Type(), lower_opts, task, wfi, writep)
     assert(ret == 1)
 
     event,p1,p2 = await task.wait()
@@ -1660,7 +1670,6 @@ async def core_func(task, args):
 
     return []
 
-  lift_opts = mk_opts()
   await canon_lift(lift_opts, inst, FuncType([],[]), core_func, None, lambda:[], lambda _:())
 
 

From 87f7b852da788dacb7e3210627cb88f161d0fa33 Mon Sep 17 00:00:00 2001
From: Luke Wagner <mail@lukewagner.name>
Date: Fri, 25 Oct 2024 00:14:59 -0500
Subject: [PATCH 12/22] Allow sync task.{wait,yield,poll} and
 {stream,future}.{read,write}

---
 design/mvp/Binary.md                    |   6 +-
 design/mvp/CanonicalABI.md              | 153 +++++++++++++-----------
 design/mvp/Explainer.md                 |   6 +-
 design/mvp/canonical-abi/definitions.py |  79 ++++++------
 design/mvp/canonical-abi/run_tests.py   |  86 ++++++-------
 5 files changed, 178 insertions(+), 152 deletions(-)

diff --git a/design/mvp/Binary.md b/design/mvp/Binary.md
index 4eb352dd..312cdccc 100644
--- a/design/mvp/Binary.md
+++ b/design/mvp/Binary.md
@@ -289,9 +289,9 @@ canon    ::= 0x00 0x00 f:<core:funcidx> opts:<opts> ft:<typeidx> => (canon lift
            | 0x06                                                => (canon thread.hw_concurrency (core func)) ๐Ÿงต
            | 0x08                                                => (canon task.backpressure (core func)) ๐Ÿ”€
            | 0x09 ft:<core:typeidx>                              => (canon task.return ft (core func)) ๐Ÿ”€
-           | 0x0a m:<core:memdix>                                => (canon task.wait (memory m) (core func)) ๐Ÿ”€
-           | 0x0b m:<core:memidx>                                => (canon task.poll (memory m) (core func)) ๐Ÿ”€
-           | 0x0c                                                => (canon task.yield (core func)) ๐Ÿ”€
+           | 0x0a async?:<async>? m:<core:memdix>                => (canon task.wait async? (memory m) (core func)) ๐Ÿ”€
+           | 0x0b async?:<async>? m:<core:memidx>                => (canon task.poll async? (memory m) (core func)) ๐Ÿ”€
+           | 0x0c async?:<async>?                                => (canon task.yield async? (core func)) ๐Ÿ”€
            | 0x0d                                                => (canon waitable.drop (core func)) ๐Ÿ”€
            | 0x0e t:<typeidx>                                    => (canon stream.new t (core func)) ๐Ÿ”€
            | 0x0f t:<typeidx> opts:<opts>                        => (canon stream.read t opts (core func)) ๐Ÿ”€
diff --git a/design/mvp/CanonicalABI.md b/design/mvp/CanonicalABI.md
index 38450c48..3dbbc7de 100644
--- a/design/mvp/CanonicalABI.md
+++ b/design/mvp/CanonicalABI.md
@@ -528,21 +528,34 @@ The conditions in `may_enter` ensure two invariants:
 
 The `wait_on` method, called by `wait` and `yield_` below, blocks the
 current task until the given future is resolved, allowing other tasks to make
-progress in the meantime. While blocked, another asynchronous task can make a
-synchronous import call via `call_sync`, in which case, to preserve
-synchronicity, `wait_on` must wait until the synchronous import call is
-finished (signalled by `interrupt` being re-set).
-```python
-  async def wait_on(self, f):
+progress in the meantime. If called with `sync` set, `interruptible` is
+cleared to ensure that no other tasks are allowed to start or resume,
+emulating a traditional synchronous system call. If `sync` is not set, then
+it's possible that between blocking and resuming, some other task executed and
+cleared `interruptible`, and thus `wait_on` must wait until `interruptible` is
+set again. If `interruptible` is already clear when `wait_on` is called, then
+it is already part of a synchronous call and so there's nothing extra to do.
+```python
+  async def wait_on(self, sync, f):
     self.maybe_start_pending_task()
     if self.inst.interruptible.is_set():
+      if sync:
+        self.inst.interruptible.clear()
       v = await self.on_block(f)
-      while not self.inst.interruptible.is_set():
-        await self.on_block(self.inst.interruptible.wait())
+      if sync:
+        self.inst.interruptible.set()
+      else:
+        while not self.inst.interruptible.is_set():
+          await self.on_block(self.inst.interruptible.wait())
     else:
       v = await self.on_block(f)
     return v
+```
 
+A task can also make a synchronous call (to a `canon` built-in or another
+component) via `call_sync` which, like `wait_on`, clears the `interruptible`
+flag to block new tasks from starting or resuming.
+```python
   async def call_sync(self, callee, *args):
     if self.inst.interruptible.is_set():
       self.inst.interruptible.clear()
@@ -551,19 +564,15 @@ finished (signalled by `interrupt` being re-set).
     else:
       await callee(*args, self.on_block)
 ```
-If `wait_on` or `call_sync` are called when `interruptible` is *initially*
-clear, then the current task must have been created for a synchronously-lifted
-export call and so there are no other tasks to worry about and thus `wait_on`
-*must not* wait for `interrupt` to be re-set (which won't happen until the
-current task finishes via `exit`, defined below).
 
 While a task is running, it may call `wait` (via `canon task.wait` or when
 using a `callback`, by returning to the event loop) to learn about progress
-made by async subtasks which are reported to this task by `notify`.
+made by async subtasks, streams or futures which are reported to this task by
+`notify`.Queue`. (The definition of `wait_on`, used by `wait` here, is next.)
 ```python
-  async def wait(self) -> EventTuple:
+  async def wait(self, sync) -> EventTuple:
     while True:
-      await self.wait_on(self.has_events.wait())
+      await self.wait_on(sync, self.has_events.wait())
       if (e := self.maybe_next_event()):
         return e
 
@@ -585,19 +594,20 @@ flexibility allows multiple redundant events to be collapsed into one (e.g.,
 when a `Subtask` advances `CallState` multiple times before the event enqueued
 by the initial state change is delivered) and also for events to be
 retroactively removed (e.g., when a `stream.cancel-read` "steals" a pending
-`STREAM_READ` event that was enqueued but not yet delivered). Although this
-Python code represents `events` as a list of closures, an optimizing
-implementation should be able to avoid dynamically allocating this list and
-instead represent `events` as a linked list embedded in the elements of the
-`waitables` table (noting that, by design, any given `waitables` element can be
-in the `events` list at most once).
+`STREAM_READ` event that was enqueued but not yet delivered).
+
+Although this Python code represents `events` as an `asyncio.Queue` of
+closures, an optimizing implementation should be able to avoid dynamically
+allocating anything and instead represent `events` as a linked list embedded
+in the elements of the `waitables` table (noting that, by design, any given
+`waitables` element can be in the `events` list at most once).
 
 A task may also cooperatively yield (via `canon task.yield`), allowing the
 runtime to switch execution to another task without having to wait for any
 external I/O (as emulated in the Python code by awaiting `sleep(0)`:
 ```python
-  async def yield_(self):
-    await self.wait_on(asyncio.sleep(0))
+  async def yield_(self, sync):
+    await self.wait_on(sync, asyncio.sleep(0))
 ```
 
 Putting these together, a task may also poll (via `canon task.poll`) for an
@@ -606,8 +616,8 @@ Importantly, `poll` starts by yielding execution (to avoid unintentionally
 starving other tasks) which means that the code calling `task.poll` must
 assume other tasks can execute, just like with `task.wait`.
 ```python
-  async def poll(self) -> Optional[EventTuple]:
-    await self.yield_()
+  async def poll(self, sync) -> Optional[EventTuple]:
+    await self.yield_(sync)
     return self.maybe_next_event()
 ```
 
@@ -2603,10 +2613,10 @@ async def canon_lift(opts, inst, ft, callee, caller, on_start, on_return, on_blo
         is_yield = bool(packed_ctx & 1)
         ctx = packed_ctx & ~1
         if is_yield:
-          await task.yield_()
+          await task.yield_(sync = False)
           event, p1, p2 = (EventCode.YIELDED, 0, 0)
         else:
-          event, p1, p2 = await task.wait()
+          event, p1, p2 = await task.wait(sync = False)
         [packed_ctx] = await call_and_trap_on_throw(opts.callback, task, [ctx, event, p1, p2])
   task.exit()
 ```
@@ -2875,7 +2885,7 @@ required here.
 
 For a canonical definition:
 ```wasm
-(canon task.wait (core func $f))
+(canon task.wait $async? (memory $mem) (core func $f))
 ```
 validation specifies:
 * `$f` is given type `(func (param i32) (result i32))`
@@ -2884,30 +2894,31 @@ Calling `$f` waits for progress to be made in a subtask of the current task,
 returning the event (which is currently simply a `CallState` value) and
 writing the subtask index as an outparam:
 ```python
-async def canon_task_wait(opts, task, ptr):
+async def canon_task_wait(sync, mem, task, ptr):
   trap_if(not task.inst.may_leave)
-  trap_if(task.opts.callback is not None)
-  event, p1, p2 = await task.wait()
-  cx = LiftLowerContext(opts, None, None)
+  event, p1, p2 = await task.wait(sync)
+  cx = LiftLowerContext(CanonicalOptions(memory = mem), None, None)
   store(cx, p1, U32Type(), ptr)
   store(cx, p2, U32Type(), ptr + 4)
   return [event]
 ```
-The `trap_if` ensures that, when a component uses a `callback` all events flow
-through the event loop at the base of the stack.
+If `async` is not set, no other tasks may execute during `task.wait`, which
+can be useful for producer toolchains in situations where interleaving is not
+supported. However, this is generally worse for concurrency and thus producer
+toolchains should set `async` when possible. When `$async` is set, `task.wait`
+will only block the current `Task`, allowing other tasks to start or resume.
 
-Note that `task.wait` will only block the current `Task`, allowing other tasks
-to run. Note also that `task.wait` can be called from a synchronously-lifted
-export so that even synchronous code can make concurrent import calls. In these
-synchronous cases, though, the automatic backpressure (applied by `Task.enter`)
-will ensure there is only ever at most once synchronously-lifted task executing
-in a component instance at a time.
+`task.wait` can be called from a synchronously-lifted export so that even
+synchronous code can make concurrent import calls. In these synchronous cases,
+though, the automatic backpressure (applied by `Task.enter`) will ensure there
+is only ever at most once synchronously-lifted task executing in a component
+instance at a time.
 
 ### ๐Ÿ”€ `canon task.poll`
 
 For a canonical definition:
 ```wasm
-(canon task.poll (core func $f))
+(canon task.poll $async? (memory $mem) (core func $f))
 ```
 validation specifies:
 * `$f` is given type `(func (param i32) (result i32))`
@@ -2916,36 +2927,38 @@ Calling `$f` does a non-blocking check for whether an event is already
 available, returning whether or not there was such an event as a boolean and,
 if there was an event, storing the `i32` event and payloads as outparams.
 ```python
-async def canon_task_poll(opts, task, ptr):
+async def canon_task_poll(sync, mem, task, ptr):
   trap_if(not task.inst.may_leave)
-  ret = await task.poll()
+  ret = await task.poll(sync)
   if ret is None:
     return [0]
-  cx = LiftLowerContext(opts, None, None)
+  cx = LiftLowerContext(CanonicalOptions(memory = mem), None, None)
   store(cx, ret, TupleType([U32Type(), U32Type(), U32Type()]), ptr)
   return [1]
 ```
-Note that the `await` of `task.poll` indicates that `task.poll` can yield to
-other tasks (in this or other components) as part of polling for an event.
+When `async` is set, `task.poll` can yield to other tasks (in this or other
+components) as part of polling for an event.
 
 ### ๐Ÿ”€ `canon task.yield`
 
 For a canonical definition:
 ```wasm
-(canon task.yield (core func $f))
+(canon task.yield $async? (core func $f))
 ```
 validation specifies:
 * `$f` is given type `(func)`
 
-Calling `$f` calls `Task.yield_`, trapping if called when there is a `callback`.
-(When there is a callback, yielding is achieved by returning with the LSB set.)
+Calling `$f` calls `Task.yield_` to allow other tasks to execute:
 ```python
-async def canon_task_yield(task):
+async def canon_task_yield(sync, task):
   trap_if(not task.inst.may_leave)
-  trap_if(task.opts.callback is not None)
-  await task.yield_()
+  await task.yield_(sync)
   return []
 ```
+If `async` is set, no other tasks *in the same component instance* can
+execute, however tasks in *other* component instances may execute. This allows
+a long-running task in one component to avoid starving other components
+without needing support full reentrancy.
 
 ### ๐Ÿ”€ `canon {stream,future}.new`
 
@@ -3032,21 +3045,25 @@ async def async_copy(HandleT, BufferT, t, opts, event_code, task, i, ptr, n):
   if h.stream.closed():
     flat_results = [CLOSED]
   else:
-    async def do_copy(on_block):
-      await h.copy(buffer, on_block)
-      def copy_event():
-        if h.copying_buffer is buffer:
-          h.copying_buffer = None
-          return (event_code, i, pack_async_copy_result(buffer, h))
-        else:
-          return None
-      h.call.task().notify(copy_event)
-    match await call_and_handle_blocking(do_copy):
-      case Blocked():
-        h.copying_buffer = buffer
-        flat_results = [BLOCKED]
-      case Returned():
-        flat_results = [pack_async_copy_result(buffer, h)]
+    if opts.sync:
+      await task.call_sync(h.copy, buffer)
+      flat_results = [pack_async_copy_result(buffer, h)]
+    else:
+      async def do_copy(on_block):
+        await h.copy(buffer, on_block)
+        def copy_event():
+          if h.copying_buffer is buffer:
+            h.copying_buffer = None
+            return (event_code, i, pack_async_copy_result(buffer, h))
+          else:
+            return None
+        h.call.task().notify(copy_event)
+      match await call_and_handle_blocking(do_copy):
+        case Blocked():
+          h.copying_buffer = buffer
+          flat_results = [BLOCKED]
+        case Returned():
+          flat_results = [pack_async_copy_result(buffer, h)]
   return flat_results
 ```
 The trap if `not h.call` prevents `write`s on the writable end of streams or
diff --git a/design/mvp/Explainer.md b/design/mvp/Explainer.md
index fcfe88f3..d99f19ed 100644
--- a/design/mvp/Explainer.md
+++ b/design/mvp/Explainer.md
@@ -1369,9 +1369,9 @@ canon ::= ...
         | (canon resource.rep <typeidx> (core func <id>?))
         | (canon task.backpressure (core func <id>?)) ๐Ÿ”€
         | (canon task.return <core:typeidx> (core func <id>?)) ๐Ÿ”€
-        | (canon task.wait (memory <core:memidx>) (core func <id>?)) ๐Ÿ”€
-        | (canon task.poll (memory <core:memidx>) (core func <id>?)) ๐Ÿ”€
-        | (canon task.yield (core func <id>?)) ๐Ÿ”€
+        | (canon task.wait async? (memory <core:memidx>) (core func <id>?)) ๐Ÿ”€
+        | (canon task.poll async? (memory <core:memidx>) (core func <id>?)) ๐Ÿ”€
+        | (canon task.yield async? (core func <id>?)) ๐Ÿ”€
         | (canon stream.new <typeidx> (core func <id>?)) ๐Ÿ”€
         | (canon stream.read <typeidx> <canonopt>* (core func <id>?)) ๐Ÿ”€
         | (canon stream.write <typeidx> <canonopt>* (core func <id>?)) ๐Ÿ”€
diff --git a/design/mvp/canonical-abi/definitions.py b/design/mvp/canonical-abi/definitions.py
index 8c362047..a4659591 100644
--- a/design/mvp/canonical-abi/definitions.py
+++ b/design/mvp/canonical-abi/definitions.py
@@ -430,12 +430,17 @@ def maybe_start_pending_task(self):
         self.inst.starting_pending_task = True
         pending_future.set_result(None)
 
-  async def wait_on(self, f):
+  async def wait_on(self, sync, f):
     self.maybe_start_pending_task()
     if self.inst.interruptible.is_set():
+      if sync:
+        self.inst.interruptible.clear()
       v = await self.on_block(f)
-      while not self.inst.interruptible.is_set():
-        await self.on_block(self.inst.interruptible.wait())
+      if sync:
+        self.inst.interruptible.set()
+      else:
+        while not self.inst.interruptible.is_set():
+          await self.on_block(self.inst.interruptible.wait())
     else:
       v = await self.on_block(f)
     return v
@@ -448,9 +453,9 @@ async def call_sync(self, callee, *args):
     else:
       await callee(*args, self.on_block)
 
-  async def wait(self) -> EventTuple:
+  async def wait(self, sync) -> EventTuple:
     while True:
-      await self.wait_on(self.has_events.wait())
+      await self.wait_on(sync, self.has_events.wait())
       if (e := self.maybe_next_event()):
         return e
 
@@ -466,11 +471,11 @@ def notify(self, event: EventCallback):
     self.events.append(event)
     self.has_events.set()
 
-  async def yield_(self):
-    await self.wait_on(asyncio.sleep(0))
+  async def yield_(self, sync):
+    await self.wait_on(sync, asyncio.sleep(0))
 
-  async def poll(self) -> Optional[EventTuple]:
-    await self.yield_()
+  async def poll(self, sync) -> Optional[EventTuple]:
+    await self.yield_(sync)
     return self.maybe_next_event()
 
   def return_(self, flat_results):
@@ -1753,10 +1758,10 @@ async def canon_lift(opts, inst, ft, callee, caller, on_start, on_return, on_blo
         is_yield = bool(packed_ctx & 1)
         ctx = packed_ctx & ~1
         if is_yield:
-          await task.yield_()
+          await task.yield_(sync = False)
           event, p1, p2 = (EventCode.YIELDED, 0, 0)
         else:
-          event, p1, p2 = await task.wait()
+          event, p1, p2 = await task.wait(sync = False)
         [packed_ctx] = await call_and_trap_on_throw(opts.callback, task, [ctx, event, p1, p2])
   task.exit()
 
@@ -1854,32 +1859,30 @@ async def canon_task_return(task, core_ft, flat_args):
 
 ### ๐Ÿ”€ `canon task.wait`
 
-async def canon_task_wait(opts, task, ptr):
+async def canon_task_wait(sync, mem, task, ptr):
   trap_if(not task.inst.may_leave)
-  trap_if(task.opts.callback is not None)
-  event, p1, p2 = await task.wait()
-  cx = LiftLowerContext(opts, None, None)
+  event, p1, p2 = await task.wait(sync)
+  cx = LiftLowerContext(CanonicalOptions(memory = mem), None, None)
   store(cx, p1, U32Type(), ptr)
   store(cx, p2, U32Type(), ptr + 4)
   return [event]
 
 ### ๐Ÿ”€ `canon task.poll`
 
-async def canon_task_poll(opts, task, ptr):
+async def canon_task_poll(sync, mem, task, ptr):
   trap_if(not task.inst.may_leave)
-  ret = await task.poll()
+  ret = await task.poll(sync)
   if ret is None:
     return [0]
-  cx = LiftLowerContext(opts, None, None)
+  cx = LiftLowerContext(CanonicalOptions(memory = mem), None, None)
   store(cx, ret, TupleType([U32Type(), U32Type(), U32Type()]), ptr)
   return [1]
 
 ### ๐Ÿ”€ `canon task.yield`
 
-async def canon_task_yield(task):
+async def canon_task_yield(sync, task):
   trap_if(not task.inst.may_leave)
-  trap_if(task.opts.callback is not None)
-  await task.yield_()
+  await task.yield_(sync)
   return []
 
 ### ๐Ÿ”€ `canon {stream,future}.new`
@@ -1924,21 +1927,25 @@ async def async_copy(HandleT, BufferT, t, opts, event_code, task, i, ptr, n):
   if h.stream.closed():
     flat_results = [CLOSED]
   else:
-    async def do_copy(on_block):
-      await h.copy(buffer, on_block)
-      def copy_event():
-        if h.copying_buffer is buffer:
-          h.copying_buffer = None
-          return (event_code, i, pack_async_copy_result(buffer, h))
-        else:
-          return None
-      h.call.task().notify(copy_event)
-    match await call_and_handle_blocking(do_copy):
-      case Blocked():
-        h.copying_buffer = buffer
-        flat_results = [BLOCKED]
-      case Returned():
-        flat_results = [pack_async_copy_result(buffer, h)]
+    if opts.sync:
+      await task.call_sync(h.copy, buffer)
+      flat_results = [pack_async_copy_result(buffer, h)]
+    else:
+      async def do_copy(on_block):
+        await h.copy(buffer, on_block)
+        def copy_event():
+          if h.copying_buffer is buffer:
+            h.copying_buffer = None
+            return (event_code, i, pack_async_copy_result(buffer, h))
+          else:
+            return None
+        h.call.task().notify(copy_event)
+      match await call_and_handle_blocking(do_copy):
+        case Blocked():
+          h.copying_buffer = buffer
+          flat_results = [BLOCKED]
+        case Returned():
+          flat_results = [pack_async_copy_result(buffer, h)]
   return flat_results
 
 BLOCKED = 0xffff_ffff
diff --git a/design/mvp/canonical-abi/run_tests.py b/design/mvp/canonical-abi/run_tests.py
index 3578d454..ea61937d 100644
--- a/design/mvp/canonical-abi/run_tests.py
+++ b/design/mvp/canonical-abi/run_tests.py
@@ -578,21 +578,21 @@ async def consumer(task, args):
     assert(ret == (2 | (CallState.STARTING << 30)))
     assert(consumer_heap.memory[retp] == 13)
     fut1.set_result(None)
-    event, callidx, _ = await task.wait()
+    event, callidx, _ = await task.wait(sync = False)
     assert(event == EventCode.CALL_DONE)
     assert(callidx == 1)
     [] = await canon_waitable_drop(task, callidx)
-    event, callidx, _ = await task.wait()
+    event, callidx, _ = await task.wait(sync = True)
     assert(event == EventCode.CALL_STARTED)
     assert(callidx == 2)
     assert(consumer_heap.memory[retp] == 13)
     fut2.set_result(None)
-    event, callidx, _ = await task.wait()
+    event, callidx, _ = await task.wait(sync = False)
     assert(event == EventCode.CALL_RETURNED)
     assert(callidx == 2)
     assert(consumer_heap.memory[retp] == 44)
     fut3.set_result(None)
-    event, callidx, _ = await task.wait()
+    event, callidx, _ = await task.wait(sync = True)
     assert(event == EventCode.CALL_DONE)
     assert(callidx == 2)
     [] = await canon_waitable_drop(task, callidx)
@@ -614,7 +614,7 @@ async def dtor(task, args):
     assert(ret == (2 | (CallState.STARTED << 30)))
     assert(dtor_value is None)
     dtor_fut.set_result(None)
-    event, callidx, _ = await task.wait()
+    event, callidx, _ = await task.wait(sync = False)
     assert(event == CallState.DONE)
     assert(callidx == 2)
     [] = await canon_waitable_drop(task, callidx)
@@ -746,26 +746,26 @@ async def consumer(task, args):
     [ret] = await canon_lower(consumer_opts, producer_ft, producer2, task, [])
     assert(ret == (2 | (CallState.STARTING << 30)))
 
-    assert(await task.poll() is None)
+    assert(await task.poll(sync = False) is None)
 
     fut.set_result(None)
     assert(producer1_done == False)
-    event, callidx, _ = await task.wait()
+    event, callidx, _ = await task.wait(sync = False)
     assert(event == EventCode.CALL_DONE)
     assert(callidx == 1)
     await canon_waitable_drop(task, callidx)
     assert(producer1_done == True)
 
     assert(producer2_done == False)
-    await canon_task_yield(task)
+    await canon_task_yield(False, task)
     assert(producer2_done == True)
-    event, callidx, _ = await task.poll()
+    event, callidx, _ = await task.poll(sync = False)
     assert(event == EventCode.CALL_DONE)
     assert(callidx == 2)
     await canon_waitable_drop(task, callidx)
     assert(producer2_done == True)
 
-    assert(await task.poll() is None)
+    assert(await task.poll(sync = True) is None)
 
     await canon_task_return(task, CoreFuncType(['i32'],[]), [83])
     return []
@@ -823,17 +823,17 @@ async def consumer(task, args):
     [ret] = await canon_lower(consumer_opts, producer_ft, producer2, task, [])
     assert(ret == (2 | (CallState.STARTING << 30)))
 
-    assert(await task.poll() is None)
+    assert(await task.poll(sync = False) is None)
 
     fut.set_result(None)
     assert(producer1_done == False)
     assert(producer2_done == False)
-    event, callidx, _ = await task.wait()
+    event, callidx, _ = await task.wait(sync = False)
     assert(event == EventCode.CALL_DONE)
     assert(callidx == 1)
     assert(producer1_done == True)
     assert(producer2_done == True)
-    event, callidx, _ = await task.poll()
+    event, callidx, _ = await task.poll(sync = False)
     assert(event == EventCode.CALL_DONE)
     assert(callidx == 2)
     assert(producer2_done == True)
@@ -841,7 +841,7 @@ async def consumer(task, args):
     await canon_waitable_drop(task, 1)
     await canon_waitable_drop(task, 2)
 
-    assert(await task.poll() is None)
+    assert(await task.poll(sync = False) is None)
 
     await canon_task_return(task, CoreFuncType(['i32'],[]), [84])
     return []
@@ -885,11 +885,11 @@ async def core_func(task, args):
     assert(ret == (2 | (CallState.STARTED << 30)))
 
     fut1.set_result(None)
-    event, callidx, _ = await task.wait()
+    event, callidx, _ = await task.wait(sync = False)
     assert(event == EventCode.CALL_DONE)
     assert(callidx == 1)
     fut2.set_result(None)
-    event, callidx, _ = await task.wait()
+    event, callidx, _ = await task.wait(sync = False)
     assert(event == EventCode.CALL_DONE)
     assert(callidx == 2)
 
@@ -1092,6 +1092,7 @@ async def test_async_stream_ops():
   inst = ComponentInstance()
   mem = bytearray(20)
   opts = mk_opts(memory=mem, sync=False)
+  sync_opts = mk_opts(memory=mem, sync=True)
 
   host_import_incoming = None
   host_import_outgoing = None
@@ -1128,7 +1129,7 @@ async def core_func(task, args):
     [ret] = await canon_stream_read(U8Type(), opts, task, rsi1, 0, 4)
     assert(ret == definitions.BLOCKED)
     src_stream.write([1,2,3,4])
-    event, p1, p2 = await task.wait()
+    event, p1, p2 = await task.wait(sync = False)
     assert(event == EventCode.STREAM_READ)
     assert(p1 == rsi1)
     assert(p2 == 4)
@@ -1143,16 +1144,16 @@ async def core_func(task, args):
     [ret] = await canon_stream_write(U8Type(), opts, task, wsi2, 0, 4)
     assert(ret == definitions.BLOCKED)
     host_import_incoming.set_remain(100)
-    event, p1, p2 = await task.wait()
+    event, p1, p2 = await task.wait(sync = False)
     assert(event == EventCode.STREAM_WRITE)
     assert(p1 == wsi2)
     assert(p2 == 4)
-    [ret] = await canon_stream_read(U8Type(), opts, task, rsi2, 0, 4)
+    [ret] = await canon_stream_read(U8Type(), sync_opts, task, rsi2, 0, 4)
     assert(ret == 4)
     [ret] = await canon_stream_write(U8Type(), opts, task, wsi1, 0, 4)
     assert(ret == definitions.BLOCKED)
     dst_stream.set_remain(100)
-    event, p1, p2 = await task.wait()
+    event, p1, p2 = await task.wait(sync = False)
     assert(event == EventCode.STREAM_WRITE)
     assert(p1 == wsi1)
     assert(p2 == 4)
@@ -1160,7 +1161,7 @@ async def core_func(task, args):
     src_stream.destroy_once_empty()
     [ret] = await canon_stream_read(U8Type(), opts, task, rsi1, 0, 4)
     assert(ret == 4)
-    [ret] = await canon_stream_read(U8Type(), opts, task, rsi1, 0, 4)
+    [ret] = await canon_stream_read(U8Type(), sync_opts, task, rsi1, 0, 4)
     assert(ret == definitions.CLOSED)
     [] = await canon_waitable_drop(task, rsi1)
     assert(mem[0:4] == b'\x05\x06\x07\x08')
@@ -1169,11 +1170,11 @@ async def core_func(task, args):
     [] = await canon_waitable_drop(task, wsi2)
     [ret] = await canon_stream_read(U8Type(), opts, task, rsi2, 0, 4)
     assert(ret == definitions.BLOCKED)
-    event, p1, p2 = await task.wait()
+    event, p1, p2 = await task.wait(sync = False)
     assert(event == EventCode.CALL_DONE)
     assert(p1 == subi)
     assert(p2 == 0)
-    event, p1, p2 = await task.wait()
+    event, p1, p2 = await task.wait(sync = False)
     assert(event == EventCode.STREAM_READ)
     assert(p1 == rsi2)
     assert(p2 == 4)
@@ -1181,7 +1182,7 @@ async def core_func(task, args):
     assert(ret == definitions.CLOSED)
     [] = await canon_waitable_drop(task, rsi2)
     [] = await canon_waitable_drop(task, subi)
-    [ret] = await canon_stream_write(U8Type(), opts, task, wsi1, 0, 4)
+    [ret] = await canon_stream_write(U8Type(), sync_opts, task, wsi1, 0, 4)
     assert(ret == 4)
     [] = await canon_waitable_drop(task, wsi1)
     return []
@@ -1278,7 +1279,7 @@ async def core_func(task, args):
     [ret] = await canon_stream_read(U8Type(), opts, task, rsi, 0, 4)
     assert(ret == definitions.BLOCKED)
     src.write([5,6])
-    event, p1, p2 = await task.wait()
+    event, p1, p2 = await task.wait(sync = False)
     assert(event == EventCode.STREAM_READ)
     assert(p1 == rsi)
     assert(p2 == 2)
@@ -1294,7 +1295,7 @@ async def core_func(task, args):
     [ret] = await canon_stream_write(U8Type(), opts, task, wsi, 2, 6)
     assert(ret == definitions.BLOCKED)
     dst.set_remain(4)
-    event, p1, p2 = await task.wait()
+    event, p1, p2 = await task.wait(sync = False)
     assert(event == EventCode.STREAM_WRITE)
     assert(p1 == wsi)
     assert(p2 == 4)
@@ -1324,7 +1325,7 @@ async def core_func1(task, args):
     [wsi] = await canon_stream_new(U8Type(), task)
     [] = await canon_task_return(task, CoreFuncType(['i32'], []), [wsi])
 
-    await task.wait_on(fut1)
+    await task.on_block(fut1)
 
     mem1[0:4] = b'\x01\x02\x03\x04'
     [ret] = await canon_stream_write(U8Type(), opts1, task, wsi, 0, 2)
@@ -1332,7 +1333,7 @@ async def core_func1(task, args):
     [ret] = await canon_stream_write(U8Type(), opts1, task, wsi, 2, 2)
     assert(ret == 2)
 
-    await task.wait_on(fut2)
+    await task.on_block(fut2)
 
     mem1[0:8] = b'\x05\x06\x07\x08\x09\x0a\x0b\x0c'
     [ret] = await canon_stream_write(U8Type(), opts1, task, wsi, 0, 8)
@@ -1340,7 +1341,7 @@ async def core_func1(task, args):
 
     fut3.set_result(None)
 
-    event, p1, p2 = await task.wait()
+    event, p1, p2 = await task.wait(sync = False)
     assert(event == EventCode.STREAM_WRITE)
     assert(p1 == wsi)
     assert(p2 == 4)
@@ -1372,14 +1373,14 @@ async def core_func2(task, args):
 
     fut1.set_result(None)
 
-    event, p1, p2 = await task.wait()
+    event, p1, p2 = await task.wait(sync = False)
     assert(event == EventCode.STREAM_READ)
     assert(p1 == rsi)
     assert(p2 == 4)
     assert(mem2[0:8] == b'\x01\x02\x03\x04\x00\x00\x00\x00')
 
     fut2.set_result(None)
-    await task.wait_on(fut3)
+    await task.on_block(fut3)
 
     mem2[0:8] = bytes(8)
     [ret] = await canon_stream_read(U8Type(), opts2, task, rsi, 0, 2)
@@ -1389,13 +1390,13 @@ async def core_func2(task, args):
     assert(ret == 2)
     assert(mem2[0:6] == b'\x05\x06\x07\x08\x00\x00')
 
-    await task.wait_on(fut4)
+    await task.on_block(fut4)
 
     [ret] = await canon_stream_read(U8Type(), opts2, task, rsi, 0, 2)
     assert(ret == definitions.CLOSED)
     [] = await canon_waitable_drop(task, rsi)
 
-    event, callidx, _ = await task.wait()
+    event, callidx, _ = await task.wait(sync = False)
     assert(event == EventCode.CALL_DONE)
     assert(callidx == subi)
     [] = await canon_waitable_drop(task, subi)
@@ -1415,14 +1416,15 @@ async def test_borrow_stream():
   async def core_func1(task, args):
     [rsi] = args
 
-    [ret] = await canon_stream_read(BorrowType(rt), opts1, task, rsi, 4, 2)
+    stream_opts = mk_opts(memory=mem1, sync=False)
+    [ret] = await canon_stream_read(BorrowType(rt), stream_opts, task, rsi, 4, 2)
     assert(ret == definitions.BLOCKED)
 
-    event, p1, p2 = await task.wait()
+    event, p1, p2 = await task.wait(sync = False)
     assert(event == EventCode.STREAM_READ)
     assert(p1 == rsi)
     assert(p2 == 2)
-    [ret] = await canon_stream_read(BorrowType(rt), opts1, task, rsi, 0, 2)
+    [ret] = await canon_stream_read(BorrowType(rt), stream_opts, task, rsi, 0, 2)
     assert(ret == definitions.CLOSED)
 
     [] = await canon_waitable_drop(task, rsi)
@@ -1460,7 +1462,7 @@ async def core_func2(task, args):
     assert(ret == 2)
     [] = await canon_waitable_drop(task, wsi)
 
-    event, p1, _ = await task.wait()
+    event, p1, _ = await task.wait(sync = False)
     assert(event == EventCode.CALL_DONE)
     assert(p1 == subi)
 
@@ -1547,7 +1549,7 @@ async def core_func(task, args):
     host_source.write([7,8])
     await asyncio.sleep(0)
     host_source.eager_cancel.set()
-    event,p1,p2 = await task.wait()
+    event,p1,p2 = await task.wait(sync = False)
     assert(event == EventCode.STREAM_READ)
     assert(p1 == rsi)
     assert(p2 == 2)
@@ -1625,11 +1627,11 @@ async def core_func(task, args):
     [ret] = await canon_future_write(U8Type(), lower_opts, task, wfi, writep)
     assert(ret == 1)
 
-    event,p1,p2 = await task.wait()
+    event,p1,p2 = await task.wait(sync = False)
     assert(event == EventCode.CALL_DONE)
     assert(p1 == subi)
 
-    event,p1,p2 = await task.wait()
+    event,p1,p2 = await task.wait(sync = False)
     assert(event == EventCode.FUTURE_READ)
     assert(p1 == rfi)
     assert(p2 == 1)
@@ -1655,11 +1657,11 @@ async def core_func(task, args):
     [ret] = await canon_future_write(U8Type(), lower_opts, task, wfi, writep)
     assert(ret == 1)
 
-    event,p1,p2 = await task.wait()
+    event,p1,p2 = await task.wait(sync = False)
     assert(event == EventCode.CALL_DONE)
     assert(p1 == subi)
 
-    await task.yield_()
+    await task.yield_(sync = False)
     [ret] = await canon_future_cancel_read(True, task, rfi)
     assert(ret == 1)
     assert(mem[readp] == 43)

From 300c86c824a0161af62670c8b3cf285fd4c0835d Mon Sep 17 00:00:00 2001
From: Luke Wagner <mail@lukewagner.name>
Date: Mon, 28 Oct 2024 18:58:22 -0500
Subject: [PATCH 13/22] Only enforce scoping for streams/futures containing
 borrows

---
 design/mvp/CanonicalABI.md              | 301 ++++++++++++------------
 design/mvp/canonical-abi/definitions.py | 135 ++++++-----
 design/mvp/canonical-abi/run_tests.py   |   2 +-
 3 files changed, 235 insertions(+), 203 deletions(-)

diff --git a/design/mvp/CanonicalABI.md b/design/mvp/CanonicalABI.md
index 3dbbc7de..70e2ac39 100644
--- a/design/mvp/CanonicalABI.md
+++ b/design/mvp/CanonicalABI.md
@@ -87,25 +87,25 @@ established by the `canon lift`- or `canon lower`-defined function that is
 being called:
 * the ABI options supplied via [`canonopt`]
 * the containing component instance
-* the `Task` state created by `canon lift` or the `Subtask` state created by
-  `canon lower`
+* the `Task` or `Subtask` used to lower or lift, resp., `borrow` handles
 
-These three pieces of ambient information are stored in the `LiftLowerContext`
-class that is threaded through all the Python functions below as the `cx`
+These three pieces of ambient information are stored in an `LiftLowerContext`
+object that is threaded through all the Python functions below as the `cx`
 parameter/field.
 ```python
 class LiftLowerContext:
   opts: CanonicalOptions
   inst: ComponentInstance
-  call: Task|Subtask
+  borrow_scope: Optional[Task|Subtask]
 
-  def __init__(self, opts, inst, call):
+  def __init__(self, opts, inst, borrow_scope = None):
     self.opts = opts
     self.inst = inst
-    self.call = call
+    self.borrow_scope = borrow_scope
 ```
-The `CanonicalOptions`, `ComponentInstance`, `Task` and `Subtask` classes
-are defined next.
+The `borrow_scope` field may be `None` if the types being lifted/lowered are
+known to not contain `borrow`. The `CanonicalOptions`, `ComponentInstance`,
+`Task` and `Subtask` classes are defined next.
 
 
 ### Canonical ABI Options
@@ -277,13 +277,13 @@ The `ResourceHandle` class defines the elements of the per-resource-type
 class ResourceHandle:
   rep: int
   own: bool
-  call: Optional[Task]
+  borrow_scope: Optional[Task]
   lend_count: int
 
-  def __init__(self, rep, own, call = None):
+  def __init__(self, rep, own, borrow_scope = None):
     self.rep = rep
     self.own = own
-    self.call = call
+    self.borrow_scope = borrow_scope
     self.lend_count = 0
 ```
 The `rep` field of `ResourceHandle` stores the resource representation
@@ -292,7 +292,7 @@ The `rep` field of `ResourceHandle` stores the resource representation
 The `own` field indicates whether this element was created from an `own` type
 (or, if false, a `borrow` type).
 
-The `call` field stores the `Task` that lowered the borrowed handle as a
+The `borrow_scope` field stores the `Task` that lowered the borrowed handle as a
 parameter. When a component only uses sync-lifted exports, due to lack of
 reentrance, there is at most one `Task` alive in a component instance at any
 time and thus an optimizing implementation doesn't need to store the `Task`
@@ -306,7 +306,7 @@ functions). This count is maintained by the `ImportCall` bookkeeping functions
 An optimizing implementation can enumerate the canonical definitions present
 in a component to statically determine that a given resource type's handle
 table only contains `own` or `borrow` handles and then, based on this,
-statically eliminate the `own` and the `lend_count` xor `scope` fields,
+statically eliminate the `own` and the `lend_count` xor `borrow_scope` fields,
 and guards thereof.
 
 
@@ -681,7 +681,6 @@ class Subtask:
   lenders: list[ResourceHandle]
   notify_supertask: bool
   enqueued: bool
-  todo: int
 
   def __init__(self, opts, ft, task, flat_args):
     self.opts = opts
@@ -694,7 +693,6 @@ class Subtask:
     self.lenders = []
     self.notify_supertask = False
     self.enqueued = False
-    self.todo = 0
 
   def task(self):
     return self.supertask
@@ -793,7 +791,6 @@ counter is used below to record the number of unmet obligations to drop the
 streams and futures connected to this `Subtask`.
 ```python
   def drop(self):
-    trap_if(self.todo)
     trap_if(self.enqueued)
     trap_if(self.state != CallState.DONE)
     self.supertask.todo -= 1
@@ -900,11 +897,13 @@ that `dst.lower(src.lift(...))` is meant to be fused into a single copy from
 `src`'s memory into `dst`'s memory).
 ```python
 class ReadableStreamGuestImpl(ReadableStream):
+  impl: ComponentInstance
   is_closed: bool
   other_buffer: Optional[Buffer]
   other_future: Optional[asyncio.Future]
 
-  def __init__(self):
+  def __init__(self, inst):
+    self.impl = inst
     self.is_closed = False
     self.other_buffer = None
     self.other_future = None
@@ -1002,35 +1001,67 @@ shared by both the reader and writer ends.
       assert(not self.other_buffer and not self.other_future)
 ```
 
-With the above complex synchronization rules encapsulated by
-`ReadableStreamGuestImpl`, we can move on to the remaining rules and state that
-apply separately to the readable and writable handles that are stored in the
-`waitables` table. Both readable and writable handles store a reference to a
-`ReadableStream`. In the case of a `ReadableStreamHandle`, this
-`ReadableStream` can be host- or guet-implemented. However, in the case of
-`WritableStreamHandle`, the `ReadableStream` is definitely implemented by
-`ReadableStreamGuestImpl`. The point of these handles is to implement
-direction-agnostic `copy`, `cancel_copy` and `drop` methods that are called by
-the shared `canon stream.*` built-in code below.
+The [readable and writable ends] of a stream are stored as `StreamHandle`
+objects in the component instance's `waitables` table. Both ends of a stream
+have the same immutable `stream` and `t` fields but also maintain independent
+mutable state specific to the end. The `paired` state tracks whether a fresh
+writable end (created by `stream.new`) has been lifted and paired with a
+readable end. If a stream contains `borrow` handles, the `borrow_scope` field
+stores the `LiftLowerContext.borrow_scope` to use when lifting or lowering the
+`borrow` handles in the future. Lastly, the `copying_buffer` and
+`copying_task` states maintain whether there is an active asynchronous
+`stream.read` or `stream.write` in progress and if so, which `Task` to notify
+of progress and what `Buffer` to copy from/to.
 ```python
 class StreamHandle:
   stream: ReadableStream
   t: ValType
-  call: Optional[Task|Subtask]
+  paired: bool
+  borrow_scope: Optional[Task|Subtask]
+  copying_task: Optional[Task]
   copying_buffer: Optional[Buffer]
 
-  def __init__(self, stream, t, call):
+  def __init__(self, stream, t):
     self.stream = stream
     self.t = t
-    self.call = call
+    self.paired = False
+    self.borrow_scope = None
+    self.copying_task = None
+    self.copying_buffer = None
+
+  def start_copying(self, task, buffer):
+    assert(not self.copying_task and not self.copying_buffer)
+    task.todo += 1
+    self.copying_task = task
+    self.copying_buffer = buffer
+
+  def stop_copying(self):
+    assert(self.copying_task and self.copying_buffer)
+    self.copying_task.todo -= 1
+    self.copying_task = None
     self.copying_buffer = None
 
   def drop(self):
     trap_if(self.copying_buffer)
     self.stream.close()
-    if self.call:
-      self.call.todo -= 1
+    if isinstance(self.borrow_scope, Task):
+      self.borrow_scope.todo -= 1
+```
+The `trap_if(copying_buffer)` in `drop` and the increment/decrement of
+`copying_task.todo` keep the `StreamHandle` and `Task` alive while performing
+a copy operation (a `stream.read` or `stream.write`) so that the results of a
+copy are always reported back to the `Task` that issued the copy.
+
+The `borrow_scope.todo` decrement matches an increment when a stream
+containing `borrow` handles is lowered as a parameter of an exported function
+and ensures that streams-of-borrows are dropped before the end of the call,
+just like normal `borrow` handles.
 
+Given the above logic, the [readable and writable ends] of a stream can be
+concretely implemented by the following two classes. The `copy`, `cancel_copy`
+and `drop` methods are called polymorphically by the common `async_copy`
+routine shared by the `stream.read` and `stream.write` built-ins below.
+```python
 class ReadableStreamHandle(StreamHandle):
   async def copy(self, dst, on_block):
     await self.stream.read(dst, on_block)
@@ -1038,26 +1069,16 @@ class ReadableStreamHandle(StreamHandle):
     await self.stream.cancel_read(dst, on_block)
 
 class WritableStreamHandle(ReadableStreamGuestImpl, StreamHandle):
-  def __init__(self, t):
-    ReadableStreamGuestImpl.__init__(self)
-    StreamHandle.__init__(self, self, t, call = None)
+  def __init__(self, t, inst):
+    ReadableStreamGuestImpl.__init__(self, inst)
+    StreamHandle.__init__(self, self, t)
   async def copy(self, src, on_block):
     await self.write(src, on_block)
   async def cancel_copy(self, src, on_block):
     await self.cancel_write(src, on_block)
 ```
-Considering the logic in `drop` (which is called polymorphically by
-`canon waitable.drop` below):
-* The trap if `copying_buffer` is set ensures the above-stated precondition
-  that `close` can only be called when there is no pending `read`/`write`.
-  `copying_buffer` is set below when `stream.{read,write}` starts and cleared
-  once wasm is notified of completion.
-* The `todo` decrement matches an increment when the handle's `cx`
-  field was set and is used to ensure that `cx` never points to a dead
-  `Subtask` (whose own `todo` increment ensures that `cx.task` also never
-  points to a dead `Task`).
-
-Given the above definition of how `stream` works, a `future` can simply be
+
+Given the above definitions of how `stream` works, a `future` can simply be
 defined as a `stream` of exactly 1 value by having the `copy` and `cancel_copy`
 methods `close()` the stream as soon as they detect that the 1 `remain`ing
 value has been successfully copied:
@@ -1077,9 +1098,9 @@ class ReadableFutureHandle(FutureHandle):
       self.stream.close()
 
 class WritableFutureHandle(ReadableStreamGuestImpl, FutureHandle):
-  def __init__(self, t):
-    ReadableStreamGuestImpl.__init__(self)
-    FutureHandle.__init__(self, self, t, call = None)
+  def __init__(self, t, inst):
+    ReadableStreamGuestImpl.__init__(self, inst)
+    FutureHandle.__init__(self, self, t)
 
   async def copy(self, src, on_block):
     assert(src.remain() == 1)
@@ -1555,31 +1576,28 @@ from the source handle, leaving the source handle intact in the current
 component instance's handle table:
 ```python
 def lift_borrow(cx, i, t):
-  assert(isinstance(cx.call, Subtask))
+  assert(isinstance(cx.borrow_scope, Subtask))
   h = cx.inst.resources.get(t.rt, i)
   if h.own:
-    cx.call.add_lender(h)
+    cx.borrow_scope.add_lender(h)
   else:
-    trap_if(cx.call.task() is not h.call.task())
+    trap_if(cx.borrow_scope.task() is not h.borrow_scope)
   return h.rep
 ```
 The `Subtask.add_lender` participates in the enforcement of the
 dynamic borrow rules, which keep the source `own` handle alive until the end of
 the call (as an intentionally-conservative upper bound on how long the `borrow`
-handle can be held). When `h` is a `borrow` handle, we just need to make sure
-that the callee task has a shorter liftime than the current task, which is only
-guaranteed if the callee is a subtask of the current task.
+handle can be held). When `h` is a `borrow` handle, we need to make sure
+that the callee task has a shorter liftime than the current task by guarding
+that the callee is a subtask of the task that lowered the handle.
 
 Streams and futures are lifted in almost the same way, with the only difference
 being that it is a dynamic error to attempt to lift a `future` that has already
 been successfully read (`closed()`). In both cases, lifting the readable end
 transfers ownership of it while lifting the writable end leaves the writable
-end in place, but traps if the writable end has already been lifted before.
-Together, this ensures that at most one component holds each of the readable
-and writable ends of a stream. The `todo` increments must be matched by
-decrements in `StreamHandle.drop` for `Task.exit`/`Subtask.drop` to not trap;
-this ensures that the writable stream handles cannot outlive the `Task` to
-which their events are sent (via `h.call.task().notify()`).
+end in place, but traps if the writable end has already been lifted before
+(as indicated by `paired` already being set). Together, this ensures that at
+most one component holds each of the readable and writable ends of a stream.
 ```python
 def lift_stream(cx, i, t):
   return lift_async_value(ReadableStreamHandle, WritableStreamHandle, cx, i, t)
@@ -1594,33 +1612,27 @@ def lift_async_value(ReadableHandleT, WritableHandleT, cx, i, t):
   match h:
     case ReadableHandleT():
       trap_if(h.copying_buffer)
-      trap_if(contains_borrow(t) and cx.call.task() is not h.call.task())
-      h.call.todo -= 1
+      if contains_borrow(t):
+        trap_if(cx.borrow_scope.task() is not h.borrow_scope)
+        h.borrow_scope.todo -= 1
       cx.inst.waitables.remove(i)
     case WritableHandleT():
-      trap_if(h.call is not None)
+      trap_if(h.paired)
       assert(not h.copying_buffer)
-      h.call = cx.call
-      h.call.todo += 1
+      h.paired = True
+      if contains_borrow(t):
+        h.borrow_scope = cx.borrow_scope
     case _:
       trap()
   trap_if(h.t != t)
   return h.stream
 ```
-It's useful to observe that there are no lifetime issues with a `stream` or
-`future` of `borrow` handles due to the following:
-* Validation ensures that `stream<borrow<R>>` or `future<borrow<R>>` can only
-  be lifted as part of the parameters of an import call.
-* When lifting the writable end of a `stream` or `future` for an import call,
-  the code above stores the `Subtask` of the import call in the `cx` field of
-  the `WritableStreamHandle` so that when `ReadableBuffer.lift` transitively
-  calls `lift_borrow` (above), this same `Subtask` is passed as the `cx`
-  argument, thereby triggering the same bookkeeping as if the `borrow` was
-  passed as a normal synchronous parameter of the `Subtask`.
-* When lifting the readable end of a `stream` or `future` for an import call,
-  the `cx.task is not h.cx` condition ensures that `borrow`s are only copied
-  into subtasks with the same `Task` as scope (matching the analogous guard in
-  `lift_borrow`).
+Note that `cx.borrow_scope` is saved in the writable handle for later use when
+lifting stream elements so that lifting a `stream<borrow<R>>` does the same
+bookkeeping as when lifting a `list<borrow<R>>`. Because the readable end of a
+stream containing `borrow` handles is call-scoped (like `borrow` handles), the
+readable end will be closed before the `Subtask` finishes and thus the
+`Subtask` pointed to by `h.borrow_scope` can't be used after it is destroyed.
 
 
 ### Storing
@@ -2015,19 +2027,19 @@ def pack_flags_into_int(v, labels):
 
 Finally, `own` and `borrow` handles are lowered by initializing new handle
 elements in the current component instance's handle table. The increment of
-`todo` is complemented by a decrement in `canon_resource_drop` and ensures
-that all borrowed handles are dropped before the end of the task. 
+`borrow_scope.todo` is complemented by a decrement in `canon_resource_drop`
+and ensures that all borrowed handles are dropped before the end of the task.
 ```python
 def lower_own(cx, rep, t):
   h = ResourceHandle(rep, own=True)
   return cx.inst.resources.add(t.rt, h)
 
 def lower_borrow(cx, rep, t):
-  assert(isinstance(cx.call, Task))
+  assert(isinstance(cx.borrow_scope, Task))
   if cx.inst is t.rt.impl:
     return rep
-  h = ResourceHandle(rep, own=False, call=cx.call)
-  cx.call.todo += 1
+  h = ResourceHandle(rep, own = False, borrow_scope = cx.borrow_scope)
+  h.borrow_scope.todo += 1
   return cx.inst.resources.add(t.rt, h)
 ```
 The special case in `lower_borrow` is an optimization, recognizing that, when
@@ -2049,22 +2061,30 @@ def lower_future(cx, v, t):
 
 def lower_async_value(ReadableHandleT, WritableHandleT, cx, v, t):
   assert(isinstance(v, ReadableStream))
-  if isinstance(v, WritableHandleT) and cx.inst is v.call.inst:
+  if isinstance(v, WritableHandleT) and cx.inst is v.impl:
     i = cx.inst.waitables.array.index(v)
-    v.call.todo -= 1
-    v.call = None
+    assert(v.paired)
+    v.paired = False
+    if contains_borrow(t):
+      v.borrow_scope = None
     assert(2**31 > Table.MAX_LENGTH >= i)
     return i | (2**31)
   else:
-    h = ReadableHandleT(v, t, cx.call)
-    h.call.todo += 1
+    h = ReadableHandleT(v, t)
+    h.paired = True
+    if contains_borrow(t):
+      h.borrow_scope = cx.borrow_scope
+      h.borrow_scope.todo += 1
     return cx.inst.waitables.add(h)
 ```
 In the ordinary case, the abstract `ReadableStream` (which may come from the
-host or the guest) is stored in a `ReadableHandle` in the `waitables` table,
-incrementing `todo` to ensure that `StreamHandle.drop` is called before
-`Task.exit`/`Subtask.drop` so that readable stream and future handles cannot
-outlive the `Task` to which their events are sent (via `h.call.task().notify()`).
+host or the guest) is stored in a `ReadableHandle` in the `waitables` table.
+The `borrow_scope.todo` increment must be matched by a decrement in
+`StreamHandle.drop` (as guarded by `Task.exit`) and ensures that streams of
+`borrow` handles follow the usual `borrow` scoping rules. Symmetric to
+`lift_async_value`, the `cx.borrow_scope` is saved in the readable handle for
+later use when lowering stream elements so that lowering a `stream<borrow<R>>`
+does the same bookkeeping as when lowering a `list<borrow<R>>`.
 
 The interesting case is when a component receives back a `ReadableStream` that
 it itself holds the `WritableStreamHandle` for. Without specially handling
@@ -2078,17 +2098,6 @@ collapsing the two ends of the stream to work fully without guest code (since
 the Canonical ABI is now wholly unnecessary to pass values from writer to
 reader).
 
-As with `lift_async_value`, it's useful to observe that there are no lifetime
-issues with a `stream` or `future` of `borrow` handles due to the following:
-* Validation ensures that `stream<borrow<R>>` or `future<borrow<R>>` can only
-  be lowered as part of the parameters of an export call.
-* When lowering a `stream` or `future`, the code above stores the `Task` of
-  the export call in the `cx` field of the `ReadableStreamHandle` so that when
-  `WritableBuffer.lower` transitively calls `lower_borrow` (above), this same
-  `Task` is passed as the `cx` argument, thereby triggering the same
-  bookkeeping as if the `borrow` was passed as a normal synchronous parameter
-  of the `Task`.
-
 
 ### Flattening
 
@@ -2780,7 +2789,7 @@ async def canon_resource_drop(rt, sync, task, i):
   h = inst.resources.remove(rt, i)
   flat_results = [] if sync else [0]
   if h.own:
-    assert(h.call is None)
+    assert(h.borrow_scope is None)
     trap_if(h.lend_count != 0)
     if inst is rt.impl:
       if rt.dtor:
@@ -2795,7 +2804,7 @@ async def canon_resource_drop(rt, sync, task, i):
       else:
         task.trap_if_on_the_stack(rt.impl)
   else:
-    h.call.todo -= 1
+    h.borrow_scope.todo -= 1
   return flat_results
 ```
 In general, the call to a resource's destructor is treated like a
@@ -2897,7 +2906,7 @@ writing the subtask index as an outparam:
 async def canon_task_wait(sync, mem, task, ptr):
   trap_if(not task.inst.may_leave)
   event, p1, p2 = await task.wait(sync)
-  cx = LiftLowerContext(CanonicalOptions(memory = mem), None, None)
+  cx = LiftLowerContext(CanonicalOptions(memory = mem), task.inst)
   store(cx, p1, U32Type(), ptr)
   store(cx, p2, U32Type(), ptr + 4)
   return [event]
@@ -2932,7 +2941,7 @@ async def canon_task_poll(sync, mem, task, ptr):
   ret = await task.poll(sync)
   if ret is None:
     return [0]
-  cx = LiftLowerContext(CanonicalOptions(memory = mem), None, None)
+  cx = LiftLowerContext(CanonicalOptions(memory = mem), task.inst)
   store(cx, ret, TupleType([U32Type(), U32Type(), U32Type()]), ptr)
   return [1]
 ```
@@ -2975,19 +2984,18 @@ the stream or future to the `waitables` table and return its index.
 ```python
 async def canon_stream_new(elem_type, task):
   trap_if(not task.inst.may_leave)
-  h = WritableStreamHandle(elem_type)
+  h = WritableStreamHandle(elem_type, task.inst)
   return [ task.inst.waitables.add(h) ]
 
 async def canon_future_new(t, task):
   trap_if(not task.inst.may_leave)
-  h = WritableFutureHandle(t)
+  h = WritableFutureHandle(t, task.inst)
   return [ task.inst.waitables.add(h) ]
 ```
-Note that the new writable end initially has its `StreamHandle.cx` field set
-to `None` which means it can't be used to `read` or `write` (defined next)
-until it has been lifted as an import parameter or export result. Lifting this
-readable handle sets `cx` and creates a readable end on the other side of the
-call so that  can commence.
+Note that new writable ends start with `StreamHandle.paired` unset. This
+means they can't be used in `{stream,future}.{read,write}` until after
+they have been lifted, which creates a corresponding readable end and sets
+`paired`.
 
 ### ๐Ÿ”€ `canon {stream,future}.{read,write}`
 
@@ -3038,9 +3046,9 @@ async def async_copy(HandleT, BufferT, t, opts, event_code, task, i, ptr, n):
   h = task.inst.waitables.get(i)
   trap_if(not isinstance(h, HandleT))
   trap_if(h.t != t)
-  trap_if(not h.call)
+  trap_if(not h.paired)
   trap_if(h.copying_buffer)
-  cx = LiftLowerContext(opts, task.inst, h.call)
+  cx = LiftLowerContext(opts, task.inst, h.borrow_scope)
   buffer = BufferT(cx, t, ptr, n)
   if h.stream.closed():
     flat_results = [CLOSED]
@@ -3051,22 +3059,23 @@ async def async_copy(HandleT, BufferT, t, opts, event_code, task, i, ptr, n):
     else:
       async def do_copy(on_block):
         await h.copy(buffer, on_block)
-        def copy_event():
-          if h.copying_buffer is buffer:
-            h.copying_buffer = None
-            return (event_code, i, pack_async_copy_result(buffer, h))
-          else:
-            return None
-        h.call.task().notify(copy_event)
+        if h.copying_buffer is buffer:
+          def copy_event():
+            if h.copying_buffer is buffer:
+              h.stop_copying()
+              return (event_code, i, pack_async_copy_result(buffer, h))
+            else:
+              return None
+          task.notify(copy_event)
       match await call_and_handle_blocking(do_copy):
         case Blocked():
-          h.copying_buffer = buffer
+          h.start_copying(task, buffer)
           flat_results = [BLOCKED]
         case Returned():
           flat_results = [pack_async_copy_result(buffer, h)]
   return flat_results
 ```
-The trap if `not h.call` prevents `write`s on the writable end of streams or
+The trap if `not h.paired` prevents `write`s on the writable end of streams or
 futures that have not yet been lifted. The `copying_buffer` field serves as a
 boolean indication of whether an async `read` or `write` is already in
 progress, preventing multiple overlapping calls to `read` or `write`. (This
@@ -3074,15 +3083,15 @@ restriction could be relaxed [in the future](Async.md#TODO) to allow greater
 pipeline parallelism.)
 
 One subtle corner case handled by this code that is worth pointing out is that,
-between the `h.call.task().notify(copy_event)` and the wasm guest code calling
-`task.wait` to receive this event, the wasm guest code can first call
-`{stream,future}.cancel-{read,write}` (defined next) which will return the copy
-progress to the wasm guest code and reset `copying_buffer` to `None` to allow
-future `read`s or `write`s. Then the wasm guest code can call
+between calling `h.copy()` and `h.copy()` returning, wasm guest code can call
+`{stream,future}.cancel-{read,write}` (defined next) which may return the copy
+progress to the wasm guest code and reset `copying_buffer` to `None` (to allow
+future `read`s or `write`s). Then the wasm guest code can call
 `{stream,future}.{read,write}` *again*, setting `copying_buffer` to a *new*
-buffer. Thus, `copy_event` must check `h.copying_buffer is buffer` at the last
-moment and remove the event otherwise (here: by returning `None`, which
-`task.wait` handles by discarding and waiting for the next event).
+buffer. Thus, it's necessary to test `h.copying_buffer is buffer` both before
+calling `task.notify(copy_event)` (since the `Task` may have `exit()`ed) and
+right before delivering the `copy_event`. (Note that returning `None` from
+`copy_event` causes the event to be discarded.)
 
 When the copy completes, the progress is reported to the caller. The order of
 tests here indicates that, if some progress was made and then the stream was
@@ -3146,28 +3155,27 @@ async def cancel_async_copy(HandleT, sync, task, i):
   trap_if(not h.copying_buffer)
   if h.stream.closed():
     flat_results = [pack_async_copy_result(h.copying_buffer, h)]
-    h.copying_buffer = None
+    h.stop_copying()
   else:
     if sync:
       await task.call_sync(h.cancel_copy, h.copying_buffer)
       flat_results = [pack_async_copy_result(h.copying_buffer, h)]
-      h.copying_buffer = None
+      h.stop_copying()
     else:
       match await call_and_handle_blocking(h.cancel_copy, h.copying_buffer):
         case Blocked():
           flat_results = [BLOCKED]
         case Returned():
           flat_results = [pack_async_copy_result(h.copying_buffer, h)]
-          h.copying_buffer = None
+          h.stop_copying()
   return flat_results
 ```
 As mentioned above for `async_copy`, if cancellation doesn't block, the
-buffer's progress is synchronously returned and the `copying_buffer` field of
-the `StreamHandle` is immediately reset to `None`, preventing any subsequent
-`{STREAM,FUTURE}_{READ,WRITE}` events from being delivered for the cancelled
-`read` or `write` in the future. In the `BLOCKED` case, there is no new
-`waitable` element allocated; the cancellation is simply reported as a normal
-`{STREAM,FUTURE}_{READ,WRITE}` event by the now-unblocked `read` or `write`.
+buffer's progress is synchronously returned and the "copying" status of
+the `StreamHandle` is immediately reset. In the `BLOCKED` case, there is no
+new `waitable` element allocated; the cancellation is simply reported as a
+normal `{STREAM,FUTURE}_{READ,WRITE}` event by the original, now-unblocked
+`read` or `write`.
 
 ### ๐Ÿ”€ `canon waitable.drop`
 
@@ -3268,6 +3276,7 @@ def canon_thread_hw_concurrency():
 [Shared-Everything Dynamic Linking]: examples/SharedEverythingDynamicLinking.md
 [Structured Concurrency]: Async.md#structured-concurrency
 [Current Task]: Async.md#current-task
+[Readable and Writable Ends]: Async.md#streams-and-futures
 
 [Administrative Instructions]: https://webassembly.github.io/spec/core/exec/runtime.html#syntax-instr-admin
 [Implementation Limits]: https://webassembly.github.io/spec/core/appendix/implementation.html
diff --git a/design/mvp/canonical-abi/definitions.py b/design/mvp/canonical-abi/definitions.py
index a4659591..7b7aa70e 100644
--- a/design/mvp/canonical-abi/definitions.py
+++ b/design/mvp/canonical-abi/definitions.py
@@ -196,12 +196,12 @@ class FutureType(ValType):
 class LiftLowerContext:
   opts: CanonicalOptions
   inst: ComponentInstance
-  call: Task|Subtask
+  borrow_scope: Optional[Task|Subtask]
 
-  def __init__(self, opts, inst, call):
+  def __init__(self, opts, inst, borrow_scope = None):
     self.opts = opts
     self.inst = inst
-    self.call = call
+    self.borrow_scope = borrow_scope
 
 
 ### Canonical ABI Options
@@ -307,13 +307,13 @@ def remove(self, i):
 class ResourceHandle:
   rep: int
   own: bool
-  call: Optional[Task]
+  borrow_scope: Optional[Task]
   lend_count: int
 
-  def __init__(self, rep, own, call = None):
+  def __init__(self, rep, own, borrow_scope = None):
     self.rep = rep
     self.own = own
-    self.call = call
+    self.borrow_scope = borrow_scope
     self.lend_count = 0
 
 #### Task State
@@ -514,7 +514,6 @@ class Subtask:
   lenders: list[ResourceHandle]
   notify_supertask: bool
   enqueued: bool
-  todo: int
 
   def __init__(self, opts, ft, task, flat_args):
     self.opts = opts
@@ -527,7 +526,6 @@ def __init__(self, opts, ft, task, flat_args):
     self.lenders = []
     self.notify_supertask = False
     self.enqueued = False
-    self.todo = 0
 
   def task(self):
     return self.supertask
@@ -580,7 +578,6 @@ def finish(self):
     return self.flat_results
 
   def drop(self):
-    trap_if(self.todo)
     trap_if(self.enqueued)
     trap_if(self.state != CallState.DONE)
     self.supertask.todo -= 1
@@ -636,11 +633,13 @@ def lower(self, vs):
     self.progress += len(vs)
 
 class ReadableStreamGuestImpl(ReadableStream):
+  impl: ComponentInstance
   is_closed: bool
   other_buffer: Optional[Buffer]
   other_future: Optional[asyncio.Future]
 
-  def __init__(self):
+  def __init__(self, inst):
+    self.impl = inst
     self.is_closed = False
     self.other_buffer = None
     self.other_future = None
@@ -698,20 +697,36 @@ def close(self):
 class StreamHandle:
   stream: ReadableStream
   t: ValType
-  call: Optional[Task|Subtask]
+  paired: bool
+  borrow_scope: Optional[Task|Subtask]
+  copying_task: Optional[Task]
   copying_buffer: Optional[Buffer]
 
-  def __init__(self, stream, t, call):
+  def __init__(self, stream, t):
     self.stream = stream
     self.t = t
-    self.call = call
+    self.paired = False
+    self.borrow_scope = None
+    self.copying_buffer = None
+    self.copying_task = None
+
+  def start_copying(self, task, buffer):
+    assert(not self.copying_task and not self.copying_buffer)
+    task.todo += 1
+    self.copying_task = task
+    self.copying_buffer = buffer
+
+  def stop_copying(self):
+    assert(self.copying_task and self.copying_buffer)
+    self.copying_task.todo -= 1
+    self.copying_task = None
     self.copying_buffer = None
 
   def drop(self):
     trap_if(self.copying_buffer)
     self.stream.close()
-    if self.call:
-      self.call.todo -= 1
+    if isinstance(self.borrow_scope, Task):
+      self.borrow_scope.todo -= 1
 
 class ReadableStreamHandle(StreamHandle):
   async def copy(self, dst, on_block):
@@ -720,9 +735,9 @@ async def cancel_copy(self, dst, on_block):
     await self.stream.cancel_read(dst, on_block)
 
 class WritableStreamHandle(ReadableStreamGuestImpl, StreamHandle):
-  def __init__(self, t):
-    ReadableStreamGuestImpl.__init__(self)
-    StreamHandle.__init__(self, self, t, call = None)
+  def __init__(self, t, inst):
+    ReadableStreamGuestImpl.__init__(self, inst)
+    StreamHandle.__init__(self, self, t)
   async def copy(self, src, on_block):
     await self.write(src, on_block)
   async def cancel_copy(self, src, on_block):
@@ -743,9 +758,9 @@ async def cancel_copy(self, dst, on_block):
       self.stream.close()
 
 class WritableFutureHandle(ReadableStreamGuestImpl, FutureHandle):
-  def __init__(self, t):
-    ReadableStreamGuestImpl.__init__(self)
-    FutureHandle.__init__(self, self, t, call = None)
+  def __init__(self, t, inst):
+    ReadableStreamGuestImpl.__init__(self, inst)
+    FutureHandle.__init__(self, self, t)
 
   async def copy(self, src, on_block):
     assert(src.remain() == 1)
@@ -1083,12 +1098,12 @@ def lift_own(cx, i, t):
   return h.rep
 
 def lift_borrow(cx, i, t):
-  assert(isinstance(cx.call, Subtask))
+  assert(isinstance(cx.borrow_scope, Subtask))
   h = cx.inst.resources.get(t.rt, i)
   if h.own:
-    cx.call.add_lender(h)
+    cx.borrow_scope.add_lender(h)
   else:
-    trap_if(cx.call.task() is not h.call.task())
+    trap_if(cx.borrow_scope.task() is not h.borrow_scope)
   return h.rep
 
 def lift_stream(cx, i, t):
@@ -1104,14 +1119,16 @@ def lift_async_value(ReadableHandleT, WritableHandleT, cx, i, t):
   match h:
     case ReadableHandleT():
       trap_if(h.copying_buffer)
-      trap_if(contains_borrow(t) and cx.call.task() is not h.call.task())
-      h.call.todo -= 1
+      if contains_borrow(t):
+        trap_if(cx.borrow_scope.task() is not h.borrow_scope)
+        h.borrow_scope.todo -= 1
       cx.inst.waitables.remove(i)
     case WritableHandleT():
-      trap_if(h.call is not None)
+      trap_if(h.paired)
       assert(not h.copying_buffer)
-      h.call = cx.call
-      h.call.todo += 1
+      h.paired = True
+      if contains_borrow(t):
+        h.borrow_scope = cx.borrow_scope
     case _:
       trap()
   trap_if(h.t != t)
@@ -1399,11 +1416,11 @@ def lower_own(cx, rep, t):
   return cx.inst.resources.add(t.rt, h)
 
 def lower_borrow(cx, rep, t):
-  assert(isinstance(cx.call, Task))
+  assert(isinstance(cx.borrow_scope, Task))
   if cx.inst is t.rt.impl:
     return rep
-  h = ResourceHandle(rep, own=False, call=cx.call)
-  cx.call.todo += 1
+  h = ResourceHandle(rep, own = False, borrow_scope = cx.borrow_scope)
+  h.borrow_scope.todo += 1
   return cx.inst.resources.add(t.rt, h)
 
 def lower_stream(cx, v, t):
@@ -1415,15 +1432,20 @@ def lower_future(cx, v, t):
 
 def lower_async_value(ReadableHandleT, WritableHandleT, cx, v, t):
   assert(isinstance(v, ReadableStream))
-  if isinstance(v, WritableHandleT) and cx.inst is v.call.inst:
+  if isinstance(v, WritableHandleT) and cx.inst is v.impl:
     i = cx.inst.waitables.array.index(v)
-    v.call.todo -= 1
-    v.call = None
+    assert(v.paired)
+    v.paired = False
+    if contains_borrow(t):
+      v.borrow_scope = None
     assert(2**31 > Table.MAX_LENGTH >= i)
     return i | (2**31)
   else:
-    h = ReadableHandleT(v, t, cx.call)
-    h.call.todo += 1
+    h = ReadableHandleT(v, t)
+    h.paired = True
+    if contains_borrow(t):
+      h.borrow_scope = cx.borrow_scope
+      h.borrow_scope.todo += 1
     return cx.inst.waitables.add(h)
 
 ### Flattening
@@ -1815,7 +1837,7 @@ async def canon_resource_drop(rt, sync, task, i):
   h = inst.resources.remove(rt, i)
   flat_results = [] if sync else [0]
   if h.own:
-    assert(h.call is None)
+    assert(h.borrow_scope is None)
     trap_if(h.lend_count != 0)
     if inst is rt.impl:
       if rt.dtor:
@@ -1830,7 +1852,7 @@ async def canon_resource_drop(rt, sync, task, i):
       else:
         task.trap_if_on_the_stack(rt.impl)
   else:
-    h.call.todo -= 1
+    h.borrow_scope.todo -= 1
   return flat_results
 
 ### `canon resource.rep`
@@ -1862,7 +1884,7 @@ async def canon_task_return(task, core_ft, flat_args):
 async def canon_task_wait(sync, mem, task, ptr):
   trap_if(not task.inst.may_leave)
   event, p1, p2 = await task.wait(sync)
-  cx = LiftLowerContext(CanonicalOptions(memory = mem), None, None)
+  cx = LiftLowerContext(CanonicalOptions(memory = mem), task.inst)
   store(cx, p1, U32Type(), ptr)
   store(cx, p2, U32Type(), ptr + 4)
   return [event]
@@ -1874,7 +1896,7 @@ async def canon_task_poll(sync, mem, task, ptr):
   ret = await task.poll(sync)
   if ret is None:
     return [0]
-  cx = LiftLowerContext(CanonicalOptions(memory = mem), None, None)
+  cx = LiftLowerContext(CanonicalOptions(memory = mem), task.inst)
   store(cx, ret, TupleType([U32Type(), U32Type(), U32Type()]), ptr)
   return [1]
 
@@ -1889,12 +1911,12 @@ async def canon_task_yield(sync, task):
 
 async def canon_stream_new(elem_type, task):
   trap_if(not task.inst.may_leave)
-  h = WritableStreamHandle(elem_type)
+  h = WritableStreamHandle(elem_type, task.inst)
   return [ task.inst.waitables.add(h) ]
 
 async def canon_future_new(t, task):
   trap_if(not task.inst.may_leave)
-  h = WritableFutureHandle(t)
+  h = WritableFutureHandle(t, task.inst)
   return [ task.inst.waitables.add(h) ]
 
 ### ๐Ÿ”€ `canon {stream,future}.{read,write}`
@@ -1920,9 +1942,9 @@ async def async_copy(HandleT, BufferT, t, opts, event_code, task, i, ptr, n):
   h = task.inst.waitables.get(i)
   trap_if(not isinstance(h, HandleT))
   trap_if(h.t != t)
-  trap_if(not h.call)
+  trap_if(not h.paired)
   trap_if(h.copying_buffer)
-  cx = LiftLowerContext(opts, task.inst, h.call)
+  cx = LiftLowerContext(opts, task.inst, h.borrow_scope)
   buffer = BufferT(cx, t, ptr, n)
   if h.stream.closed():
     flat_results = [CLOSED]
@@ -1933,16 +1955,17 @@ async def async_copy(HandleT, BufferT, t, opts, event_code, task, i, ptr, n):
     else:
       async def do_copy(on_block):
         await h.copy(buffer, on_block)
-        def copy_event():
-          if h.copying_buffer is buffer:
-            h.copying_buffer = None
-            return (event_code, i, pack_async_copy_result(buffer, h))
-          else:
-            return None
-        h.call.task().notify(copy_event)
+        if h.copying_buffer is buffer:
+          def copy_event():
+            if h.copying_buffer is buffer:
+              h.stop_copying()
+              return (event_code, i, pack_async_copy_result(buffer, h))
+            else:
+              return None
+          task.notify(copy_event)
       match await call_and_handle_blocking(do_copy):
         case Blocked():
-          h.copying_buffer = buffer
+          h.start_copying(task, buffer)
           flat_results = [BLOCKED]
         case Returned():
           flat_results = [pack_async_copy_result(buffer, h)]
@@ -1980,19 +2003,19 @@ async def cancel_async_copy(HandleT, sync, task, i):
   trap_if(not h.copying_buffer)
   if h.stream.closed():
     flat_results = [pack_async_copy_result(h.copying_buffer, h)]
-    h.copying_buffer = None
+    h.stop_copying()
   else:
     if sync:
       await task.call_sync(h.cancel_copy, h.copying_buffer)
       flat_results = [pack_async_copy_result(h.copying_buffer, h)]
-      h.copying_buffer = None
+      h.stop_copying()
     else:
       match await call_and_handle_blocking(h.cancel_copy, h.copying_buffer):
         case Blocked():
           flat_results = [BLOCKED]
         case Returned():
           flat_results = [pack_async_copy_result(h.copying_buffer, h)]
-          h.copying_buffer = None
+          h.stop_copying()
   return flat_results
 
 ### ๐Ÿ”€ `canon waitable.drop`
diff --git a/design/mvp/canonical-abi/run_tests.py b/design/mvp/canonical-abi/run_tests.py
index ea61937d..b89a2b27 100644
--- a/design/mvp/canonical-abi/run_tests.py
+++ b/design/mvp/canonical-abi/run_tests.py
@@ -47,7 +47,7 @@ def mk_opts(memory = bytearray(), encoding = 'utf8', realloc = None, post_return
 def mk_cx(memory = bytearray(), encoding = 'utf8', realloc = None, post_return = None):
   opts = mk_opts(memory, encoding, realloc, post_return)
   inst = ComponentInstance()
-  return LiftLowerContext(opts, inst, None)
+  return LiftLowerContext(opts, inst)
 
 def mk_str(s):
   return (s, 'utf8', len(s.encode('utf-8')))

From 70d727a3e752b40ad0d67a867aec901a126667b8 Mon Sep 17 00:00:00 2001
From: Luke Wagner <mail@lukewagner.name>
Date: Wed, 30 Oct 2024 17:45:34 -0500
Subject: [PATCH 14/22] Break waitable.drop into subtask.drop and
 {stream,future}.close-{readable,writable}

---
 design/mvp/Binary.md                    |  16 ++--
 design/mvp/CanonicalABI.md              | 110 +++++++++++++++++-------
 design/mvp/Explainer.md                 |  19 ++--
 design/mvp/canonical-abi/definitions.py |  69 ++++++++++-----
 design/mvp/canonical-abi/run_tests.py   |  78 ++++++++---------
 5 files changed, 189 insertions(+), 103 deletions(-)

diff --git a/design/mvp/Binary.md b/design/mvp/Binary.md
index 312cdccc..14313ab8 100644
--- a/design/mvp/Binary.md
+++ b/design/mvp/Binary.md
@@ -292,17 +292,21 @@ canon    ::= 0x00 0x00 f:<core:funcidx> opts:<opts> ft:<typeidx> => (canon lift
            | 0x0a async?:<async>? m:<core:memdix>                => (canon task.wait async? (memory m) (core func)) ๐Ÿ”€
            | 0x0b async?:<async>? m:<core:memidx>                => (canon task.poll async? (memory m) (core func)) ๐Ÿ”€
            | 0x0c async?:<async>?                                => (canon task.yield async? (core func)) ๐Ÿ”€
-           | 0x0d                                                => (canon waitable.drop (core func)) ๐Ÿ”€
+           | 0x0d                                                => (canon subtask.drop (core func)) ๐Ÿ”€
            | 0x0e t:<typeidx>                                    => (canon stream.new t (core func)) ๐Ÿ”€
            | 0x0f t:<typeidx> opts:<opts>                        => (canon stream.read t opts (core func)) ๐Ÿ”€
            | 0x10 t:<typeidx> opts:<opts>                        => (canon stream.write t opts (core func)) ๐Ÿ”€
            | 0x11 async?:<async?>                                => (canon stream.cancel-read async? (core func)) ๐Ÿ”€
            | 0x12 async?:<async?>                                => (canon stream.cancel-write async? (core func)) ๐Ÿ”€
-           | 0x13 t:<typeidx>                                    => (canon future.new t (core func)) ๐Ÿ”€
-           | 0x14 t:<typeidx> opts:<opts>                        => (canon future.read t opts (core func)) ๐Ÿ”€
-           | 0x15 t:<typeidx> opts:<opts>                        => (canon future.write t opts (core func)) ๐Ÿ”€
-           | 0x16 async?:<async?>                                => (canon future.cancel-read async? (core func)) ๐Ÿ”€
-           | 0x17 async?:<async?>                                => (canon future.cancel-write async? (core func)) ๐Ÿ”€
+           | 0x13 t:<typeidx>                                    => (canon stream.close-readable t (core func)) ๐Ÿ”€
+           | 0x14 t:<typeidx>                                    => (canon stream.close-writable t (core func)) ๐Ÿ”€
+           | 0x15 t:<typeidx>                                    => (canon future.new t (core func)) ๐Ÿ”€
+           | 0x16 t:<typeidx> opts:<opts>                        => (canon future.read t opts (core func)) ๐Ÿ”€
+           | 0x17 t:<typeidx> opts:<opts>                        => (canon future.write t opts (core func)) ๐Ÿ”€
+           | 0x18 async?:<async?>                                => (canon future.cancel-read async? (core func)) ๐Ÿ”€
+           | 0x19 async?:<async?>                                => (canon future.cancel-write async? (core func)) ๐Ÿ”€
+           | 0x1a t:<typeidx>                                    => (canon future.close-readable t (core func)) ๐Ÿ”€
+           | 0x1b t:<typeidx>                                    => (canon future.close-writable t (core func)) ๐Ÿ”€
 async?   ::= 0x00                                                =>
            | 0x01                                                => async
 opts     ::= opt*:vec(<canonopt>)                                => opt*
diff --git a/design/mvp/CanonicalABI.md b/design/mvp/CanonicalABI.md
index 70e2ac39..5903cb50 100644
--- a/design/mvp/CanonicalABI.md
+++ b/design/mvp/CanonicalABI.md
@@ -791,8 +791,8 @@ counter is used below to record the number of unmet obligations to drop the
 streams and futures connected to this `Subtask`.
 ```python
   def drop(self):
-    trap_if(self.enqueued)
     trap_if(self.state != CallState.DONE)
+    assert(not self.enqueued)
     self.supertask.todo -= 1
 ```
 
@@ -1058,9 +1058,14 @@ and ensures that streams-of-borrows are dropped before the end of the call,
 just like normal `borrow` handles.
 
 Given the above logic, the [readable and writable ends] of a stream can be
-concretely implemented by the following two classes. The `copy`, `cancel_copy`
-and `drop` methods are called polymorphically by the common `async_copy`
-routine shared by the `stream.read` and `stream.write` built-ins below.
+concretely implemented by the following two classes. The readable end
+inherits `StreamHandle`'s constructor, which takes an already-created abstract
+`ReadableStream` passed into the component. In contrast, constructing a
+writable end constructs a fresh `ReadableStreamGuestImpl` that will later
+be given to the readable end paired with this writable end. The `copy`,
+`cancel_copy` and `drop` methods are called polymorphically by the common
+`async_copy` routine shared by the `stream.read` and `stream.write` built-ins
+below.
 ```python
 class ReadableStreamHandle(StreamHandle):
   async def copy(self, dst, on_block):
@@ -1068,14 +1073,14 @@ class ReadableStreamHandle(StreamHandle):
   async def cancel_copy(self, dst, on_block):
     await self.stream.cancel_read(dst, on_block)
 
-class WritableStreamHandle(ReadableStreamGuestImpl, StreamHandle):
+class WritableStreamHandle(StreamHandle):
   def __init__(self, t, inst):
-    ReadableStreamGuestImpl.__init__(self, inst)
-    StreamHandle.__init__(self, self, t)
+    stream = ReadableStreamGuestImpl(inst)
+    StreamHandle.__init__(self, stream, t)
   async def copy(self, src, on_block):
-    await self.write(src, on_block)
+    await self.stream.write(src, on_block)
   async def cancel_copy(self, src, on_block):
-    await self.cancel_write(src, on_block)
+    await self.stream.cancel_write(src, on_block)
 ```
 
 Given the above definitions of how `stream` works, a `future` can simply be
@@ -1097,24 +1102,24 @@ class ReadableFutureHandle(FutureHandle):
     if dst.remain() == 0:
       self.stream.close()
 
-class WritableFutureHandle(ReadableStreamGuestImpl, FutureHandle):
+class WritableFutureHandle(FutureHandle):
   def __init__(self, t, inst):
-    ReadableStreamGuestImpl.__init__(self, inst)
-    FutureHandle.__init__(self, self, t)
+    stream = ReadableStreamGuestImpl(inst)
+    FutureHandle.__init__(self, stream, t)
 
   async def copy(self, src, on_block):
     assert(src.remain() == 1)
-    await self.write(src, on_block)
+    await self.stream.write(src, on_block)
     if src.remain() == 0:
-      self.close()
+      self.stream.close()
 
   async def cancel_copy(self, src, on_block):
     await self.cancel_write(src, on_block)
     if src.remain() == 0:
-      self.close()
+      self.stream.close()
 
   def drop(self):
-    trap_if(not self.closed())
+    trap_if(not self.stream.closed())
     FutureHandle.drop(self)
 ```
 The overridden `WritableFutureHandle.drop` method traps if the internal stream
@@ -2061,12 +2066,13 @@ def lower_future(cx, v, t):
 
 def lower_async_value(ReadableHandleT, WritableHandleT, cx, v, t):
   assert(isinstance(v, ReadableStream))
-  if isinstance(v, WritableHandleT) and cx.inst is v.impl:
-    i = cx.inst.waitables.array.index(v)
-    assert(v.paired)
-    v.paired = False
+  if isinstance(v, ReadableStreamGuestImpl) and cx.inst is v.impl:
+    [h] = [h for h in cx.inst.waitables.array if h and h.stream is v]
+    assert(h.paired)
+    h.paired = False
     if contains_borrow(t):
-      v.borrow_scope = None
+      h.borrow_scope = None
+    i = cx.inst.waitables.array.index(h)
     assert(2**31 > Table.MAX_LENGTH >= i)
     return i | (2**31)
   else:
@@ -2091,12 +2097,14 @@ it itself holds the `WritableStreamHandle` for. Without specially handling
 this case, this would lead to copies from a single linear memory into itself
 which is both inefficient and raises subtle semantic interleaving questions
 that we would rather avoid. To avoid both, this case is detected and the
-`ReadableStream` is "unwrapped" to writable handle, returning the existing
+`ReadableStream` is "unwrapped" to the writable handle, returning the existing
 index of it in the `waitables` table, setting the high bit to signal this fact
 to guest code. Guest code must therefore handle this special case by
 collapsing the two ends of the stream to work fully without guest code (since
 the Canonical ABI is now wholly unnecessary to pass values from writer to
-reader).
+reader). The O(N) searches through the `waitables` table are expected to be
+optimized away by instead storing a pointer or index of the writable handle in
+the stream itself (alongside the `impl` field).
 
 
 ### Flattening
@@ -2969,6 +2977,27 @@ execute, however tasks in *other* component instances may execute. This allows
 a long-running task in one component to avoid starving other components
 without needing support full reentrancy.
 
+### ๐Ÿ”€ `canon subtask.drop`
+
+For a canonical definition:
+```wasm
+(canon subtask.drop (core func $f))
+```
+validation specifies:
+* `$f` is given type `(func (param i32))`
+
+Calling `$f` removes the subtask at the given index from the current
+component instance's `watiable` table, performing the guards and bookkeeping
+defined by `Subtask.drop()`.
+```python
+async def canon_subtask_drop(task, i):
+  trap_if(not task.inst.may_leave)
+  h = task.inst.waitables.remove(i)
+  trap_if(not isinstance(h, Subtask))
+  h.drop()
+  return []
+```
+
 ### ๐Ÿ”€ `canon {stream,future}.new`
 
 For canonical definitions:
@@ -3177,22 +3206,41 @@ new `waitable` element allocated; the cancellation is simply reported as a
 normal `{STREAM,FUTURE}_{READ,WRITE}` event by the original, now-unblocked
 `read` or `write`.
 
-### ๐Ÿ”€ `canon waitable.drop`
+### ๐Ÿ”€ `canon {stream,future}.close-{readable,writable}`
 
-For a canonical definition:
+For canonical definitions:
 ```wasm
-(canon waitable.drop (core func $f))
+(canon stream.close-readable $t (core func $f))
+(canon stream.close-writable $t (core func $f))
+(canon future.close-readable $t (core func $f))
+(canon future.close-writable $t (core func $f))
 ```
 validation specifies:
 * `$f` is given type `(func (param i32))`
 
-Calling `$f` removes the indicated waitable (subtask, stream or future) from
-the instance's table, trapping if various conditions aren't met in the
-waitable's `drop()` method.
+Calling `$f` removes the readable or writable end of the stream or future at
+the given index from the current component instance's `waitable` table,
+performing the guards and bookkeeping defined by
+`{Readable,Writable}{Stream,Future}Handle.drop()` above.
 ```python
-async def canon_waitable_drop(task, i):
+async def canon_stream_close_readable(t, task, i):
+  return await close_async_value(ReadableStreamHandle, t, task, i)
+
+async def canon_stream_close_writable(t, task, i):
+  return await close_async_value(WritableStreamHandle, t, task, i)
+
+async def canon_future_close_readable(t, task, i):
+  return await close_async_value(ReadableFutureHandle, t, task, i)
+
+async def canon_future_close_writable(t, task, i):
+  return await close_async_value(WritableFutureHandle, t, task, i)
+
+async def close_async_value(HandleT, t, task, i):
   trap_if(not task.inst.may_leave)
-  task.inst.waitables.remove(i).drop()
+  h = task.inst.waitables.remove(i)
+  trap_if(not isinstance(h, HandleT))
+  trap_if(h.t != t)
+  h.drop()
   return []
 ```
 
diff --git a/design/mvp/Explainer.md b/design/mvp/Explainer.md
index d99f19ed..1c78d04c 100644
--- a/design/mvp/Explainer.md
+++ b/design/mvp/Explainer.md
@@ -1372,16 +1372,21 @@ canon ::= ...
         | (canon task.wait async? (memory <core:memidx>) (core func <id>?)) ๐Ÿ”€
         | (canon task.poll async? (memory <core:memidx>) (core func <id>?)) ๐Ÿ”€
         | (canon task.yield async? (core func <id>?)) ๐Ÿ”€
+        | (canon subtask.drop (core func <id>?)) ๐Ÿ”€
         | (canon stream.new <typeidx> (core func <id>?)) ๐Ÿ”€
         | (canon stream.read <typeidx> <canonopt>* (core func <id>?)) ๐Ÿ”€
         | (canon stream.write <typeidx> <canonopt>* (core func <id>?)) ๐Ÿ”€
         | (canon stream.cancel-read async? (core func <id>?)) ๐Ÿ”€
         | (canon stream.cancel-write async? (core func <id>?)) ๐Ÿ”€
+        | (canon stream.close-readable <typeidx> (core func <id>?)) ๐Ÿ”€
+        | (canon stream.close-writable <typeidx> (core func <id>?)) ๐Ÿ”€
         | (canon future.new <typeidx> (core func <id>?)) ๐Ÿ”€
         | (canon future.read <typeidx> <canonopt>* (core func <id>?)) ๐Ÿ”€
         | (canon future.write <typeidx> <canonopt>* (core func <id>?)) ๐Ÿ”€
         | (canon future.cancel-read async? (core func <id>?)) ๐Ÿ”€
         | (canon future.cancel-write async? (core func <id>?)) ๐Ÿ”€
+        | (canon future.close-readable <typeidx> (core func <id>?)) ๐Ÿ”€
+        | (canon future.close-writable <typeidx> (core func <id>?)) ๐Ÿ”€
         | (canon waitable.drop (core func <id>?)) ๐Ÿ”€
         | (canon thread.spawn <typeidx> (core func <id>?)) ๐Ÿงต
         | (canon thread.hw_concurrency (core func <id>?)) ๐Ÿงต
@@ -1470,6 +1475,10 @@ switch to another task, allowing a long-running computation to cooperatively
 interleave with other tasks. (See also [`canon_task_yield`] in the Canonical
 ABI explainer.)
 
+The `subtask.drop` built-in has type `[i32] -> []` and removes the indicated
+[subtask](Async.md#subtask-and-supertask) from the current instance's subtask
+table, trapping if the subtask isn't done.
+
 The `{stream,future}.new` built-ins have type `[] -> [i32]` and return a new
 [writable end](Async.md#streams-and-futures) of a stream or future. (See
 [`canon_stream_new`] in the Canonical ABI explainer for details.)
@@ -1500,11 +1509,11 @@ blocks, the return value is the sentinel "`BLOCKED`" value and the caller must
 or `write`. (See [`canon_stream_cancel_read`] in the Canonical ABI explainer
 for details.)
 
-The `waitable.drop` built-in has type `[i32] -> []` and removes the indicated
-[subtask](Async.md#subtask-and-supertask) or [stream or future](Async.md#streams-and-futures)
-from the current instance's [waitables](Async.md#waiting) table, trapping if
-the subtask isn't done or the stream or future is in the middle of reading
-or writing.
+The `{stream,future}.close-{readable,writable}` built-ins have type
+`[i32] -> []` and removes the indicated [stream or future](Async.md#streams-and-futures)
+from the current component instance's [waitables](Async.md#waiting) table,
+trapping if the stream or future has a mismatched direction or type or are in
+the middle of a `read` or `write`.
 
 ##### ๐Ÿงต Threading built-ins
 
diff --git a/design/mvp/canonical-abi/definitions.py b/design/mvp/canonical-abi/definitions.py
index 7b7aa70e..54b7fd1b 100644
--- a/design/mvp/canonical-abi/definitions.py
+++ b/design/mvp/canonical-abi/definitions.py
@@ -578,8 +578,8 @@ def finish(self):
     return self.flat_results
 
   def drop(self):
-    trap_if(self.enqueued)
     trap_if(self.state != CallState.DONE)
+    assert(not self.enqueued)
     self.supertask.todo -= 1
 
 #### Buffer, Stream and Future State
@@ -707,8 +707,8 @@ def __init__(self, stream, t):
     self.t = t
     self.paired = False
     self.borrow_scope = None
-    self.copying_buffer = None
     self.copying_task = None
+    self.copying_buffer = None
 
   def start_copying(self, task, buffer):
     assert(not self.copying_task and not self.copying_buffer)
@@ -734,14 +734,14 @@ async def copy(self, dst, on_block):
   async def cancel_copy(self, dst, on_block):
     await self.stream.cancel_read(dst, on_block)
 
-class WritableStreamHandle(ReadableStreamGuestImpl, StreamHandle):
+class WritableStreamHandle(StreamHandle):
   def __init__(self, t, inst):
-    ReadableStreamGuestImpl.__init__(self, inst)
-    StreamHandle.__init__(self, self, t)
+    stream = ReadableStreamGuestImpl(inst)
+    StreamHandle.__init__(self, stream, t)
   async def copy(self, src, on_block):
-    await self.write(src, on_block)
+    await self.stream.write(src, on_block)
   async def cancel_copy(self, src, on_block):
-    await self.cancel_write(src, on_block)
+    await self.stream.cancel_write(src, on_block)
 
 class FutureHandle(StreamHandle): pass
 
@@ -757,24 +757,24 @@ async def cancel_copy(self, dst, on_block):
     if dst.remain() == 0:
       self.stream.close()
 
-class WritableFutureHandle(ReadableStreamGuestImpl, FutureHandle):
+class WritableFutureHandle(FutureHandle):
   def __init__(self, t, inst):
-    ReadableStreamGuestImpl.__init__(self, inst)
-    FutureHandle.__init__(self, self, t)
+    stream = ReadableStreamGuestImpl(inst)
+    FutureHandle.__init__(self, stream, t)
 
   async def copy(self, src, on_block):
     assert(src.remain() == 1)
-    await self.write(src, on_block)
+    await self.stream.write(src, on_block)
     if src.remain() == 0:
-      self.close()
+      self.stream.close()
 
   async def cancel_copy(self, src, on_block):
     await self.cancel_write(src, on_block)
     if src.remain() == 0:
-      self.close()
+      self.stream.close()
 
   def drop(self):
-    trap_if(not self.closed())
+    trap_if(not self.stream.closed())
     FutureHandle.drop(self)
 
 ### Despecialization
@@ -1432,12 +1432,13 @@ def lower_future(cx, v, t):
 
 def lower_async_value(ReadableHandleT, WritableHandleT, cx, v, t):
   assert(isinstance(v, ReadableStream))
-  if isinstance(v, WritableHandleT) and cx.inst is v.impl:
-    i = cx.inst.waitables.array.index(v)
-    assert(v.paired)
-    v.paired = False
+  if isinstance(v, ReadableStreamGuestImpl) and cx.inst is v.impl:
+    [h] = [h for h in cx.inst.waitables.array if h and h.stream is v]
+    assert(h.paired)
+    h.paired = False
     if contains_borrow(t):
-      v.borrow_scope = None
+      h.borrow_scope = None
+    i = cx.inst.waitables.array.index(h)
     assert(2**31 > Table.MAX_LENGTH >= i)
     return i | (2**31)
   else:
@@ -1907,6 +1908,15 @@ async def canon_task_yield(sync, task):
   await task.yield_(sync)
   return []
 
+### ๐Ÿ”€ `canon subtask.drop`
+
+async def canon_subtask_drop(task, i):
+  trap_if(not task.inst.may_leave)
+  h = task.inst.waitables.remove(i)
+  trap_if(not isinstance(h, Subtask))
+  h.drop()
+  return []
+
 ### ๐Ÿ”€ `canon {stream,future}.new`
 
 async def canon_stream_new(elem_type, task):
@@ -2018,9 +2028,24 @@ async def cancel_async_copy(HandleT, sync, task, i):
           h.stop_copying()
   return flat_results
 
-### ๐Ÿ”€ `canon waitable.drop`
+### ๐Ÿ”€ `canon {stream,future}.close-{readable,writable}`
+
+async def canon_stream_close_readable(t, task, i):
+  return await close_async_value(ReadableStreamHandle, t, task, i)
+
+async def canon_stream_close_writable(t, task, i):
+  return await close_async_value(WritableStreamHandle, t, task, i)
+
+async def canon_future_close_readable(t, task, i):
+  return await close_async_value(ReadableFutureHandle, t, task, i)
 
-async def canon_waitable_drop(task, i):
+async def canon_future_close_writable(t, task, i):
+  return await close_async_value(WritableFutureHandle, t, task, i)
+
+async def close_async_value(HandleT, t, task, i):
   trap_if(not task.inst.may_leave)
-  task.inst.waitables.remove(i).drop()
+  h = task.inst.waitables.remove(i)
+  trap_if(not isinstance(h, HandleT))
+  trap_if(h.t != t)
+  h.drop()
   return []
diff --git a/design/mvp/canonical-abi/run_tests.py b/design/mvp/canonical-abi/run_tests.py
index b89a2b27..5a47a3a8 100644
--- a/design/mvp/canonical-abi/run_tests.py
+++ b/design/mvp/canonical-abi/run_tests.py
@@ -581,7 +581,7 @@ async def consumer(task, args):
     event, callidx, _ = await task.wait(sync = False)
     assert(event == EventCode.CALL_DONE)
     assert(callidx == 1)
-    [] = await canon_waitable_drop(task, callidx)
+    [] = await canon_subtask_drop(task, callidx)
     event, callidx, _ = await task.wait(sync = True)
     assert(event == EventCode.CALL_STARTED)
     assert(callidx == 2)
@@ -595,7 +595,7 @@ async def consumer(task, args):
     event, callidx, _ = await task.wait(sync = True)
     assert(event == EventCode.CALL_DONE)
     assert(callidx == 2)
-    [] = await canon_waitable_drop(task, callidx)
+    [] = await canon_subtask_drop(task, callidx)
 
     dtor_fut = asyncio.Future()
     dtor_value = None
@@ -617,7 +617,7 @@ async def dtor(task, args):
     event, callidx, _ = await task.wait(sync = False)
     assert(event == CallState.DONE)
     assert(callidx == 2)
-    [] = await canon_waitable_drop(task, callidx)
+    [] = await canon_subtask_drop(task, callidx)
 
     [] = await canon_task_return(task, CoreFuncType(['i32'],[]), [42])
     return []
@@ -675,7 +675,7 @@ async def callback(task, args):
       assert(args[1] == EventCode.CALL_DONE)
       assert(args[2] == 1)
       assert(args[3] == 0)
-      await canon_waitable_drop(task, 1)
+      await canon_subtask_drop(task, 1)
       return [53]
     elif args[0] == 52:
       assert(args[1] == EventCode.YIELDED)
@@ -688,7 +688,7 @@ async def callback(task, args):
       assert(args[1] == EventCode.CALL_DONE)
       assert(args[2] == 2)
       assert(args[3] == 0)
-      await canon_waitable_drop(task, 2)
+      await canon_subtask_drop(task, 2)
       [] = await canon_task_return(task, CoreFuncType(['i32'],[]), [83])
       return [0]
 
@@ -753,7 +753,7 @@ async def consumer(task, args):
     event, callidx, _ = await task.wait(sync = False)
     assert(event == EventCode.CALL_DONE)
     assert(callidx == 1)
-    await canon_waitable_drop(task, callidx)
+    await canon_subtask_drop(task, callidx)
     assert(producer1_done == True)
 
     assert(producer2_done == False)
@@ -762,7 +762,7 @@ async def consumer(task, args):
     event, callidx, _ = await task.poll(sync = False)
     assert(event == EventCode.CALL_DONE)
     assert(callidx == 2)
-    await canon_waitable_drop(task, callidx)
+    await canon_subtask_drop(task, callidx)
     assert(producer2_done == True)
 
     assert(await task.poll(sync = True) is None)
@@ -838,8 +838,8 @@ async def consumer(task, args):
     assert(callidx == 2)
     assert(producer2_done == True)
 
-    await canon_waitable_drop(task, 1)
-    await canon_waitable_drop(task, 2)
+    await canon_subtask_drop(task, 1)
+    await canon_subtask_drop(task, 2)
 
     assert(await task.poll(sync = False) is None)
 
@@ -893,8 +893,8 @@ async def core_func(task, args):
     assert(event == EventCode.CALL_DONE)
     assert(callidx == 2)
 
-    await canon_waitable_drop(task, 1)
-    await canon_waitable_drop(task, 2)
+    await canon_subtask_drop(task, 1)
+    await canon_subtask_drop(task, 2)
 
     return []
 
@@ -1077,10 +1077,10 @@ async def core_func(task, args):
     assert(ret == 4)
     [ret] = await canon_stream_write(U8Type(), opts, task, wsi1, 0, 4)
     assert(ret == 4)
-    [] = await canon_waitable_drop(task, rsi1)
-    [] = await canon_waitable_drop(task, rsi2)
-    [] = await canon_waitable_drop(task, wsi1)
-    [] = await canon_waitable_drop(task, wsi2)
+    [] = await canon_stream_close_readable(U8Type(), task, rsi1)
+    [] = await canon_stream_close_readable(U8Type(), task, rsi2)
+    [] = await canon_stream_close_writable(U8Type(), task, wsi1)
+    [] = await canon_stream_close_writable(U8Type(), task, wsi2)
     return []
 
   await canon_lift(opts, inst, ft, core_func, None, on_start, on_return)
@@ -1163,11 +1163,11 @@ async def core_func(task, args):
     assert(ret == 4)
     [ret] = await canon_stream_read(U8Type(), sync_opts, task, rsi1, 0, 4)
     assert(ret == definitions.CLOSED)
-    [] = await canon_waitable_drop(task, rsi1)
+    [] = await canon_stream_close_readable(U8Type(), task, rsi1)
     assert(mem[0:4] == b'\x05\x06\x07\x08')
     [ret] = await canon_stream_write(U8Type(), opts, task, wsi2, 0, 4)
     assert(ret == 4)
-    [] = await canon_waitable_drop(task, wsi2)
+    [] = await canon_stream_close_writable(U8Type(), task, wsi2)
     [ret] = await canon_stream_read(U8Type(), opts, task, rsi2, 0, 4)
     assert(ret == definitions.BLOCKED)
     event, p1, p2 = await task.wait(sync = False)
@@ -1180,11 +1180,11 @@ async def core_func(task, args):
     assert(p2 == 4)
     [ret] = await canon_stream_read(U8Type(), opts, task, rsi2, 0, 4)
     assert(ret == definitions.CLOSED)
-    [] = await canon_waitable_drop(task, rsi2)
-    [] = await canon_waitable_drop(task, subi)
+    [] = await canon_stream_close_readable(U8Type(), task, rsi2)
+    [] = await canon_subtask_drop(task, subi)
     [ret] = await canon_stream_write(U8Type(), sync_opts, task, wsi1, 0, 4)
     assert(ret == 4)
-    [] = await canon_waitable_drop(task, wsi1)
+    [] = await canon_stream_close_writable(U8Type(), task, wsi1)
     return []
 
   await canon_lift(opts, inst, ft, core_func, None, on_start, on_return)
@@ -1236,7 +1236,7 @@ async def core_func(task, args):
     assert(ret == 0)
     result = int.from_bytes(mem[retp : retp+4], 'little', signed=False)
     assert(result == (wsi | 2**31))
-    [] = await canon_waitable_drop(task, wsi)
+    [] = await canon_stream_close_writable(U8Type(), task, wsi)
     return []
 
   def on_start(): return []
@@ -1283,7 +1283,7 @@ async def core_func(task, args):
     assert(event == EventCode.STREAM_READ)
     assert(p1 == rsi)
     assert(p2 == 2)
-    [] = await canon_waitable_drop(task, rsi)
+    [] = await canon_stream_close_readable(U8Type(), task, rsi)
 
     [wsi] = await canon_stream_new(U8Type(), task)
     assert(wsi == 1)
@@ -1300,7 +1300,7 @@ async def core_func(task, args):
     assert(p1 == wsi)
     assert(p2 == 4)
     assert(dst.received == [1,2,3,4,5,6])
-    [] = await canon_waitable_drop(task, wsi)
+    [] = await canon_stream_close_writable(U8Type(), task, wsi)
     dst.set_remain(100)
     assert(await dst.consume(100) is None)
     return []
@@ -1348,7 +1348,7 @@ async def core_func1(task, args):
 
     fut4.set_result(None)
 
-    [] = await canon_waitable_drop(task, wsi)
+    [] = await canon_stream_close_writable(U8Type(), task, wsi)
     return []
 
   func1 = partial(canon_lift, opts1, inst1, ft1, core_func1)
@@ -1394,12 +1394,12 @@ async def core_func2(task, args):
 
     [ret] = await canon_stream_read(U8Type(), opts2, task, rsi, 0, 2)
     assert(ret == definitions.CLOSED)
-    [] = await canon_waitable_drop(task, rsi)
+    [] = await canon_stream_close_readable(U8Type(), task, rsi)
 
     event, callidx, _ = await task.wait(sync = False)
     assert(event == EventCode.CALL_DONE)
     assert(callidx == subi)
-    [] = await canon_waitable_drop(task, subi)
+    [] = await canon_subtask_drop(task, subi)
     return []
 
   await canon_lift(opts2, inst2, ft2, core_func2, None, lambda:[], lambda _:())
@@ -1427,7 +1427,7 @@ async def core_func1(task, args):
     [ret] = await canon_stream_read(BorrowType(rt), stream_opts, task, rsi, 0, 2)
     assert(ret == definitions.CLOSED)
 
-    [] = await canon_waitable_drop(task, rsi)
+    [] = await canon_stream_close_readable(BorrowType(rt), task, rsi)
 
     h1 = mem1[4]
     h2 = mem1[8]
@@ -1460,13 +1460,13 @@ async def core_func2(task, args):
 
     [ret] = await canon_stream_write(BorrowType(rt), async_opts2, task, wsi, 0, 2)
     assert(ret == 2)
-    [] = await canon_waitable_drop(task, wsi)
+    [] = await canon_stream_close_writable(BorrowType(rt), task, wsi)
 
     event, p1, _ = await task.wait(sync = False)
     assert(event == EventCode.CALL_DONE)
     assert(p1 == subi)
 
-    [] = await canon_waitable_drop(task, subi)
+    [] = await canon_subtask_drop(task, subi)
     return []
 
   await canon_lift(sync_opts2, inst2, ft2, core_func2, None, lambda:[], lambda _:())
@@ -1508,7 +1508,7 @@ async def core_func(task, args):
     assert(got == [0xa, 0xb])
     [ret] = await canon_stream_cancel_write(True, task, wsi)
     assert(ret == 2)
-    [] = await canon_waitable_drop(task, wsi)
+    [] = await canon_stream_close_writable(U8Type(), task, wsi)
     host_sink.set_remain(100)
     assert(await host_sink.consume(100) is None)
 
@@ -1523,7 +1523,7 @@ async def core_func(task, args):
     assert(got == [1, 2])
     [ret] = await canon_stream_cancel_write(False, task, wsi)
     assert(ret == 2)
-    [] = await canon_waitable_drop(task, wsi)
+    [] = await canon_stream_close_writable(U8Type(), task, wsi)
     host_sink.set_remain(100)
     assert(await host_sink.consume(100) is None)
 
@@ -1535,7 +1535,7 @@ async def core_func(task, args):
     assert(ret == definitions.BLOCKED)
     [ret] = await canon_stream_cancel_read(True, task, rsi)
     assert(ret == 0)
-    [] = await canon_waitable_drop(task, rsi)
+    [] = await canon_stream_close_readable(U8Type(), task, rsi)
 
     retp = 0
     [ret] = await canon_lower(lower_opts, host_ft2, host_func2, task, [retp])
@@ -1554,7 +1554,7 @@ async def core_func(task, args):
     assert(p1 == rsi)
     assert(p2 == 2)
     assert(mem[0:2] == b'\x07\x08')
-    [] = await canon_waitable_drop(task, rsi)
+    [] = await canon_stream_close_readable(U8Type(), task, rsi)
 
     return []
 
@@ -1637,9 +1637,9 @@ async def core_func(task, args):
     assert(p2 == 1)
     assert(mem[readp] == 43)
 
-    [] = await canon_waitable_drop(task, wfi)
-    [] = await canon_waitable_drop(task, rfi)
-    [] = await canon_waitable_drop(task, subi)
+    [] = await canon_future_close_writable(U8Type(), task, wfi)
+    [] = await canon_future_close_readable(U8Type(), task, rfi)
+    [] = await canon_subtask_drop(task, subi)
 
     [wfi] = await canon_future_new(U8Type(), task)
     retp = 0
@@ -1666,9 +1666,9 @@ async def core_func(task, args):
     assert(ret == 1)
     assert(mem[readp] == 43)
 
-    [] = await canon_waitable_drop(task, wfi)
-    [] = await canon_waitable_drop(task, rfi)
-    [] = await canon_waitable_drop(task, subi)
+    [] = await canon_future_close_writable(U8Type(), task, wfi)
+    [] = await canon_future_close_readable(U8Type(), task, rfi)
+    [] = await canon_subtask_drop(task, subi)
 
     return []
 

From 3cf3d5f2fc1a6bc52005ce1041cb316e46003d7d Mon Sep 17 00:00:00 2001
From: Luke Wagner <mail@lukewagner.name>
Date: Thu, 31 Oct 2024 14:01:27 -0500
Subject: [PATCH 15/22] Remove dangling syntax rule for waitable.drop

---
 design/mvp/Explainer.md | 1 -
 1 file changed, 1 deletion(-)

diff --git a/design/mvp/Explainer.md b/design/mvp/Explainer.md
index 1c78d04c..032dbd30 100644
--- a/design/mvp/Explainer.md
+++ b/design/mvp/Explainer.md
@@ -1387,7 +1387,6 @@ canon ::= ...
         | (canon future.cancel-write async? (core func <id>?)) ๐Ÿ”€
         | (canon future.close-readable <typeidx> (core func <id>?)) ๐Ÿ”€
         | (canon future.close-writable <typeidx> (core func <id>?)) ๐Ÿ”€
-        | (canon waitable.drop (core func <id>?)) ๐Ÿ”€
         | (canon thread.spawn <typeidx> (core func <id>?)) ๐Ÿงต
         | (canon thread.hw_concurrency (core func <id>?)) ๐Ÿงต
 ```

From 4581ba55812014071fa1d11ce7c8b60be46f3ae9 Mon Sep 17 00:00:00 2001
From: Luke Wagner <mail@lukewagner.name>
Date: Thu, 31 Oct 2024 14:05:16 -0500
Subject: [PATCH 16/22] Update subsection links and other dangling
 waitable.drop reference

---
 design/mvp/CanonicalABI.md | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/design/mvp/CanonicalABI.md b/design/mvp/CanonicalABI.md
index 5903cb50..999f3088 100644
--- a/design/mvp/CanonicalABI.md
+++ b/design/mvp/CanonicalABI.md
@@ -36,10 +36,11 @@ being specified here.
   * [`canon task.wait`](#-canon-taskwait) ๐Ÿ”€
   * [`canon task.poll`](#-canon-taskpoll) ๐Ÿ”€
   * [`canon task.yield`](#-canon-taskyield) ๐Ÿ”€
+  * [`canon subtask.drop`](#-canon-subtaskdrop) ๐Ÿ”€
   * [`canon {stream,future}.new`](#-canon-streamfuturenew) ๐Ÿ”€
   * [`canon {stream,future}.{read,write}`](#-canon-streamfuturereadwrite) ๐Ÿ”€
   * [`canon {stream,future}.cancel-{read,write}`](#-canon-streamfuturecancel-readwrite) ๐Ÿ”€
-  * [`canon waitable.drop`](#-canon-waitabledrop) ๐Ÿ”€
+  * [`canon {stream,future}.close-{readable,writable}`](#-canon-streamfutureclose-readablewritable) ๐Ÿ”€
 
 
 ## Supporting definitions
@@ -2724,7 +2725,7 @@ In the `async` case, if `do_call` blocks before `Subtask.finish` (signalled by
 `callee` calling `on_block`), the `Subtask` is added to the current component
 instance's `waitables` table, giving it an `i32` index that will be returned
 by `task.wait` to signal progress on this subtask. The `todo` increment is
-matched by a decrement in `canon_waitable_drop` and ensures that all subtasks
+matched by a decrement in `canon_subtask_drop` and ensures that all subtasks
 of a supertask complete before the supertask completes. The `notify_supertask`
 flag is set to tell `Subtask` methods (below) to asynchronously notify the
 supertask of progress.

From e074f4192442185212e7e848fee4d012530b21ed Mon Sep 17 00:00:00 2001
From: Luke Wagner <mail@lukewagner.name>
Date: Tue, 29 Oct 2024 18:11:37 -0500
Subject: [PATCH 17/22] Add 'error' type and 'canon
 error.{new,debug-message,drop}' built-ins

---
 design/mvp/Async.md                     |  10 +-
 design/mvp/Binary.md                    |   5 +
 design/mvp/CanonicalABI.md              | 230 +++++++++++++++++++-----
 design/mvp/Explainer.md                 |  45 +++++
 design/mvp/canonical-abi/definitions.py | 135 ++++++++++----
 design/mvp/canonical-abi/run_tests.py   |  46 +++--
 6 files changed, 372 insertions(+), 99 deletions(-)

diff --git a/design/mvp/Async.md b/design/mvp/Async.md
index 6d2fcc98..402e3b43 100644
--- a/design/mvp/Async.md
+++ b/design/mvp/Async.md
@@ -232,11 +232,10 @@ f: func(x: whatever) -> stream<T>;
 g: func(s: stream<T>) -> stuff;
 ```
 `g(f(x))` works as you might hope, concurrently streaming `x` into `f` which
-concurrently streams its results into `g`. (The addition of [`error`](#TODO)
-will provide a generic answer to the question of what happens if `f`
-experiences an error: `f` can close its returned writable stream end with an
-`error` that will be propagated into `g` which should then propagate the error
-somehow into `stuff`.)
+concurrently streams its results into `g`. If `f` has an error, it can close
+its returned `stream<T>` with an [`error`](Explainer.md#error-type) value
+which `g` will receive along with the notification that its readable stream
+was closed.
 
 If a component instance *would* receive the readable end of a stream for which
 it already owns the writable end, the readable end disappears and the existing
@@ -518,7 +517,6 @@ For now, this remains a [TODO](#todo) and validation will reject `async`-lifted
 Native async support is being proposed incrementally. The following features
 will be added in future chunks roughly in the order list to complete the full
 "async" story:
-* add `error` type that can be included when closing a stream/future
 * `nonblocking` function type attribute: allow a function to declare in its
   type that it will not transitively do anything blocking
 * define what `async` means for `start` functions (top-level await + background
diff --git a/design/mvp/Binary.md b/design/mvp/Binary.md
index 14313ab8..5159216c 100644
--- a/design/mvp/Binary.md
+++ b/design/mvp/Binary.md
@@ -190,6 +190,7 @@ primvaltype   ::= 0x7f                                    => bool
                 | 0x75                                    => f64
                 | 0x74                                    => char
                 | 0x73                                    => string
+                | 0x64                                    => error
 defvaltype    ::= pvt:<primvaltype>                       => pvt
                 | 0x72 lt*:vec(<labelvaltype>)            => (record (field lt)*)    (if |lt*| > 0)
                 | 0x71 case*:vec(<case>)                  => (variant case+) (if |case*| > 0)
@@ -307,6 +308,9 @@ canon    ::= 0x00 0x00 f:<core:funcidx> opts:<opts> ft:<typeidx> => (canon lift
            | 0x19 async?:<async?>                                => (canon future.cancel-write async? (core func)) ๐Ÿ”€
            | 0x1a t:<typeidx>                                    => (canon future.close-readable t (core func)) ๐Ÿ”€
            | 0x1b t:<typeidx>                                    => (canon future.close-writable t (core func)) ๐Ÿ”€
+           | 0x1c opts:<opts>                                    => (canon error.new opts (core func)) ๐Ÿ”€
+           | 0x1d opts:<opts>                                    => (canon error.debug-message opts (core func)) ๐Ÿ”€
+           | 0x1e                                                => (canon error.drop (core func)) ๐Ÿ”€
 async?   ::= 0x00                                                =>
            | 0x01                                                => async
 opts     ::= opt*:vec(<canonopt>)                                => opt*
@@ -478,6 +482,7 @@ named once.
 
 ## Binary Format Warts to Fix in a 1.0 Release
 
+* The opcodes (for types, canon built-ins, etc) should be re-sorted
 * The two `list` type codes should be merged into one with an optional immediate.
 * The `0x00` prefix byte of `importname'` and `exportname'` will be removed or repurposed.
 
diff --git a/design/mvp/CanonicalABI.md b/design/mvp/CanonicalABI.md
index 999f3088..9852e183 100644
--- a/design/mvp/CanonicalABI.md
+++ b/design/mvp/CanonicalABI.md
@@ -41,6 +41,9 @@ being specified here.
   * [`canon {stream,future}.{read,write}`](#-canon-streamfuturereadwrite) ๐Ÿ”€
   * [`canon {stream,future}.cancel-{read,write}`](#-canon-streamfuturecancel-readwrite) ๐Ÿ”€
   * [`canon {stream,future}.close-{readable,writable}`](#-canon-streamfutureclose-readablewritable) ๐Ÿ”€
+  * [`canon error.new`](#-canon-errornew) ๐Ÿ”€
+  * [`canon error.debug-message`](#-canon-errordebug-message) ๐Ÿ”€
+  * [`canon error.drop`](#-canon-errordrop) ๐Ÿ”€
 
 
 ## Supporting definitions
@@ -148,6 +151,7 @@ state that `canon`-generated functions use to maintain component invariants.
 class ComponentInstance:
   resources: ResourceTables
   waitables: Table[Subtask|StreamHandle|FutureHandle]
+  errors: Table[Error]
   num_tasks: int
   may_leave: bool
   backpressure: bool
@@ -158,6 +162,7 @@ class ComponentInstance:
   def __init__(self):
     self.resources = ResourceTables()
     self.waitables = Table[Subtask|StreamHandle|FutureHandle]()
+    self.errors = Table[Error]()
     self.num_tasks = 0
     self.may_leave = True
     self.backpressure = False
@@ -819,6 +824,7 @@ class WritableBuffer(Buffer):
 
 class ReadableStream:
   closed: Callable[[], bool]
+  closed_with_error: Callable[[], Optional[Error]]
   read: Callable[[WritableBuffer, OnBlockCallback], Awaitable]
   cancel_read: Callable[[WritableBuffer, OnBlockCallback], Awaitable]
   close: Callable[[]]
@@ -838,6 +844,9 @@ Going through the methods in these interfaces:
   not to be used again by the `read` being cancelled.
 * `close` may only be called if there is no active `read` and leaves the stream
   `closed` without possibility of blocking.
+* `closed_with_error` may only be called if `closed` has returned `True` and
+  returns an optional `Error` (defined below) that the writable end was
+  closed with.
 
 The abstract `WritableBuffer` interface is implemented by the
 `WritableBufferGuestImpl` class below. The `ReadableBufferGuestImpl` class is
@@ -900,17 +909,22 @@ that `dst.lower(src.lift(...))` is meant to be fused into a single copy from
 class ReadableStreamGuestImpl(ReadableStream):
   impl: ComponentInstance
   is_closed: bool
+  error: Optional[Error]
   other_buffer: Optional[Buffer]
   other_future: Optional[asyncio.Future]
 
   def __init__(self, inst):
     self.impl = inst
     self.is_closed = False
+    self.error = None
     self.other_buffer = None
     self.other_future = None
 
   def closed(self):
     return self.is_closed
+  def closed_with_error(self):
+    assert(self.is_closed)
+    return self.error
 
   async def read(self, dst, on_block):
     await self.rendezvous(dst, self.other_buffer, dst, on_block)
@@ -986,14 +1000,16 @@ assume as a precondition that there is not an outstanding `read` and thus there
 is no need to block on a `cancel_read`. There may however be a pending write
 `await`ing `other_future`, but since we're on the reader end and we know that
 there are no concurrent `read`s, we can simple resolve `other_future` and move
-on without blocking on anything. `close` can also be called (below) from the
-writer direction, in which case all the above logic applies, in the opposite
-direction. Thus, there is only a single direction-agnostic `close` that is
-shared by both the reader and writer ends.
+on without blocking on anything. `close` can also be called by the writable end
+of a stream (below), in which case all the above logic applies, but in the
+opposite direction. Thus, there is only a single direction-agnostic `close`
+that is shared by both the reader and writer ends.
 ```python
-  def close(self):
+  def close(self, error = None):
     if not self.is_closed:
+      assert(not self.error)
       self.is_closed = True
+      self.error = error
       self.other_buffer = None
       if self.other_future:
         self.other_future.set_result(None)
@@ -1001,6 +1017,12 @@ shared by both the reader and writer ends.
     else:
       assert(not self.other_buffer and not self.other_future)
 ```
+Note that when called via the `ReadableStream` abstract interface, `error` is
+necessarily `None`, whereas if called from the writer end, `error` may or may
+not be an `Error`. In the special case that the writer end passes a non-`None`
+error and the stream has already been closed by the reader end, the `Error` is
+dropped, since the reader has already racily cancelled the stream and has no
+way to see the `Error`.
 
 The [readable and writable ends] of a stream are stored as `StreamHandle`
 objects in the component instance's `waitables` table. Both ends of a stream
@@ -1042,9 +1064,9 @@ class StreamHandle:
     self.copying_task = None
     self.copying_buffer = None
 
-  def drop(self):
+  def drop(self, error):
     trap_if(self.copying_buffer)
-    self.stream.close()
+    self.stream.close(error)
     if isinstance(self.borrow_scope, Task):
       self.borrow_scope.todo -= 1
 ```
@@ -1119,17 +1141,14 @@ class WritableFutureHandle(FutureHandle):
     if src.remain() == 0:
       self.stream.close()
 
-  def drop(self):
-    trap_if(not self.stream.closed())
-    FutureHandle.drop(self)
+  def drop(self, error):
+    trap_if(not self.stream.closed() and not error)
+    FutureHandle.drop(self, error)
 ```
-The overridden `WritableFutureHandle.drop` method traps if the internal stream
-has not been closed (and thus the future value has not been written). (*Note
-that there is a [TODO](Async.md#TODO) to add an `error` type and new built-ins
-for dropping a stream or future handle with an `error` which will **not** trap,
-thus allowing a `future` to be resolved without producing a value iff it
-produces an `error`.*)
-
+The overridden `WritableFutureHandle.drop` method traps if the future value has
+not already been written and the future is not being closed with an `error`.
+Thus, a future must either have a single value successfully copied from the
+writer to the reader xor be closed with an `error`.
 
 ### Despecialization
 
@@ -1200,6 +1219,7 @@ def alignment(t):
     case F64Type()                   : return 8
     case CharType()                  : return 4
     case StringType()                : return 4
+    case ErrorType()                 : return 4
     case ListType(t, l)              : return alignment_list(t, l)
     case RecordType(fields)          : return alignment_record(fields)
     case VariantType(cases)          : return alignment_variant(cases)
@@ -1289,6 +1309,7 @@ def elem_size(t):
     case F64Type()                   : return 8
     case CharType()                  : return 4
     case StringType()                : return 8
+    case ErrorType()                 : return 4
     case ListType(t, l)              : return elem_size_list(t, l)
     case RecordType(fields)          : return elem_size_record(fields)
     case VariantType(cases)          : return elem_size_variant(cases)
@@ -1354,6 +1375,7 @@ def load(cx, ptr, t):
     case F64Type()          : return decode_i64_as_float(load_int(cx, ptr, 8))
     case CharType()         : return convert_i32_to_char(cx, load_int(cx, ptr, 4))
     case StringType()       : return load_string(cx, ptr)
+    case ErrorType()        : return lift_error(cx, load_int(cx, ptr, 4))
     case ListType(t, l)     : return load_list(cx, ptr, t, l)
     case RecordType(fields) : return load_record(cx, ptr, fields)
     case VariantType(cases) : return load_variant(cx, ptr, cases)
@@ -1438,14 +1460,16 @@ allocation size choices in many cases. Thus, the value produced by
 `load_string` isn't simply a Python `str`, but a *tuple* containing a `str`,
 the original encoding and the original byte length.
 ```python
-def load_string(cx, ptr):
+String = tuple[str, str, int]
+
+def load_string(cx, ptr) -> String:
   begin = load_int(cx, ptr, 4)
   tagged_code_units = load_int(cx, ptr + 4, 4)
   return load_string_from_range(cx, begin, tagged_code_units)
 
 UTF16_TAG = 1 << 31
 
-def load_string_from_range(cx, ptr, tagged_code_units):
+def load_string_from_range(cx, ptr, tagged_code_units) -> String:
   match cx.opts.string_encoding:
     case 'utf8':
       alignment = 1
@@ -1474,6 +1498,13 @@ def load_string_from_range(cx, ptr, tagged_code_units):
   return (s, cx.opts.string_encoding, tagged_code_units)
 ```
 
+Error values are lifted directly from the per-component-instance `errors`
+table:
+```python
+def lift_error(cx, i):
+  return cx.inst.errors.get(i)
+```
+
 Lists and records are loaded by recursively loading their elements/fields:
 ```python
 def load_list(cx, ptr, elem_type, maybe_length):
@@ -1664,6 +1695,7 @@ def store(cx, v, t, ptr):
     case F64Type()          : store_int(cx, encode_float_as_i64(v), ptr, 8)
     case CharType()         : store_int(cx, char_to_i32(v), ptr, 4)
     case StringType()       : store_string(cx, v, ptr)
+    case ErrorType()        : store_int(cx, lower_error(cx, v), ptr, 4)
     case ListType(t, l)     : store_list(cx, v, ptr, t, l)
     case RecordType(fields) : store_record(cx, v, ptr, fields)
     case VariantType(cases) : store_variant(cx, v, ptr, cases)
@@ -1763,12 +1795,12 @@ We start with a case analysis to enumerate all the meaningful encoding
 combinations, subdividing the `latin1+utf16` encoding into either `latin1` or
 `utf16` based on the `UTF16_BIT` flag set by `load_string`:
 ```python
-def store_string(cx, v, ptr):
+def store_string(cx, v: String, ptr):
   begin, tagged_code_units = store_string_into_range(cx, v)
   store_int(cx, begin, ptr, 4)
   store_int(cx, tagged_code_units, ptr + 4, 4)
 
-def store_string_into_range(cx, v):
+def store_string_into_range(cx, v: String):
   src, src_encoding, src_tagged_code_units = v
 
   if src_encoding == 'latin1+utf16':
@@ -1950,6 +1982,13 @@ def store_probably_utf16_to_latin1_or_utf16(cx, src, src_code_units):
   return (ptr, latin1_size)
 ```
 
+Error values are lowered by storing them directly into the
+per-component-instance `errors` table and passing the `i32` index to wasm.
+```python
+def lower_error(cx, v):
+  return cx.inst.errors.add(v)
+```
+
 Lists and records are stored by recursively storing their elements and
 are symmetric to the loading functions. Unlike strings, lists can
 simply allocate based on the up-front knowledge of length and static
@@ -2188,6 +2227,7 @@ def flatten_type(t):
     case F64Type()                        : return ['f64']
     case CharType()                       : return ['i32']
     case StringType()                     : return ['i32', 'i32']
+    case ErrorType()                      : return ['i32']
     case ListType(t, l)                   : return flatten_list(t, l)
     case RecordType(fields)               : return flatten_record(fields)
     case VariantType(cases)               : return flatten_variant(cases)
@@ -2282,6 +2322,7 @@ def lift_flat(cx, vi, t):
     case F64Type()          : return canonicalize_nan64(vi.next('f64'))
     case CharType()         : return convert_i32_to_char(cx, vi.next('i32'))
     case StringType()       : return lift_flat_string(cx, vi)
+    case ErrorType()        : return lift_error(cx, vi.next('i32'))
     case ListType(t, l)     : return lift_flat_list(cx, vi, t, l)
     case RecordType(fields) : return lift_flat_record(cx, vi, fields)
     case VariantType(cases) : return lift_flat_variant(cx, vi, cases)
@@ -2410,6 +2451,7 @@ def lower_flat(cx, v, t):
     case F64Type()          : return [maybe_scramble_nan64(v)]
     case CharType()         : return [char_to_i32(v)]
     case StringType()       : return lower_flat_string(cx, v)
+    case ErrorType()        : return lower_error(cx, v)
     case ListType(t, l)     : return lower_flat_list(cx, v, t, l)
     case RecordType(fields) : return lower_flat_record(cx, v, fields)
     case VariantType(cases) : return lower_flat_variant(cx, v, cases)
@@ -3081,11 +3123,11 @@ async def async_copy(HandleT, BufferT, t, opts, event_code, task, i, ptr, n):
   cx = LiftLowerContext(opts, task.inst, h.borrow_scope)
   buffer = BufferT(cx, t, ptr, n)
   if h.stream.closed():
-    flat_results = [CLOSED]
+    flat_results = [pack_async_copy_result(task, buffer, h)]
   else:
     if opts.sync:
       await task.call_sync(h.copy, buffer)
-      flat_results = [pack_async_copy_result(buffer, h)]
+      flat_results = [pack_async_copy_result(task, buffer, h)]
     else:
       async def do_copy(on_block):
         await h.copy(buffer, on_block)
@@ -3093,7 +3135,7 @@ async def async_copy(HandleT, BufferT, t, opts, event_code, task, i, ptr, n):
           def copy_event():
             if h.copying_buffer is buffer:
               h.stop_copying()
-              return (event_code, i, pack_async_copy_result(buffer, h))
+              return (event_code, i, pack_async_copy_result(task, buffer, h))
             else:
               return None
           task.notify(copy_event)
@@ -3102,7 +3144,7 @@ async def async_copy(HandleT, BufferT, t, opts, event_code, task, i, ptr, n):
           h.start_copying(task, buffer)
           flat_results = [BLOCKED]
         case Returned():
-          flat_results = [pack_async_copy_result(buffer, h)]
+          flat_results = [pack_async_copy_result(task, buffer, h)]
   return flat_results
 ```
 The trap if `not h.paired` prevents `write`s on the writable end of streams or
@@ -3131,17 +3173,28 @@ discovered on the next `read` or `write` call.
 BLOCKED = 0xffff_ffff
 CLOSED  = 0x8000_0000
 
-def pack_async_copy_result(buffer, h):
-  assert(buffer.progress <= Buffer.MAX_LENGTH < CLOSED < BLOCKED)
+def pack_async_copy_result(task, buffer, h):
   if buffer.progress:
+    assert(buffer.progress <= Buffer.MAX_LENGTH < BLOCKED)
+    assert(not (buffer.progress & CLOSED))
     return buffer.progress
-  if h.stream.closed():
-    return CLOSED
-  return 0
+  elif h.stream.closed():
+    if (error := h.stream.closed_with_error()):
+      assert(isinstance(h, ReadableStreamHandle|ReadableFutureHandle))
+      errori = task.inst.errors.add(error)
+      assert(errori != 0)
+    else:
+      errori = 0
+    assert(errori <= Table.MAX_LENGTH < BLOCKED)
+    assert(not (errori & CLOSED))
+    return errori | CLOSED
+  else:
+    return 0
 ```
-(When [`error`](Async.md#TODO) is added in a future PR, when the `CLOSED` bit
-is set, the low 31 bits will optionally contain the non-zero index of an
-`error` value in some new `errors` table.)
+Note that `error`s are only possible on the *readable* end of a stream or
+future (since, as defined below, only the *writable* end can close the stream
+with an `error`). Thus, `error`s only flow in the same direction as values, as
+an optional last value of the stream or future.
 
 ### ๐Ÿ”€ `canon {stream,future}.cancel-{read,write}`
 
@@ -3184,19 +3237,19 @@ async def cancel_async_copy(HandleT, sync, task, i):
   trap_if(not isinstance(h, HandleT))
   trap_if(not h.copying_buffer)
   if h.stream.closed():
-    flat_results = [pack_async_copy_result(h.copying_buffer, h)]
+    flat_results = [pack_async_copy_result(task, h.copying_buffer, h)]
     h.stop_copying()
   else:
     if sync:
       await task.call_sync(h.cancel_copy, h.copying_buffer)
-      flat_results = [pack_async_copy_result(h.copying_buffer, h)]
+      flat_results = [pack_async_copy_result(task, h.copying_buffer, h)]
       h.stop_copying()
     else:
       match await call_and_handle_blocking(h.cancel_copy, h.copying_buffer):
         case Blocked():
           flat_results = [BLOCKED]
         case Returned():
-          flat_results = [pack_async_copy_result(h.copying_buffer, h)]
+          flat_results = [pack_async_copy_result(task, h.copying_buffer, h)]
           h.stop_copying()
   return flat_results
 ```
@@ -3225,23 +3278,110 @@ performing the guards and bookkeeping defined by
 `{Readable,Writable}{Stream,Future}Handle.drop()` above.
 ```python
 async def canon_stream_close_readable(t, task, i):
-  return await close_async_value(ReadableStreamHandle, t, task, i)
+  return await close_async_value(ReadableStreamHandle, t, task, i, 0)
 
-async def canon_stream_close_writable(t, task, i):
-  return await close_async_value(WritableStreamHandle, t, task, i)
+async def canon_stream_close_writable(t, task, hi, errori):
+  return await close_async_value(WritableStreamHandle, t, task, hi, errori)
 
 async def canon_future_close_readable(t, task, i):
-  return await close_async_value(ReadableFutureHandle, t, task, i)
+  return await close_async_value(ReadableFutureHandle, t, task, i, 0)
 
-async def canon_future_close_writable(t, task, i):
-  return await close_async_value(WritableFutureHandle, t, task, i)
+async def canon_future_close_writable(t, task, hi, errori):
+  return await close_async_value(WritableFutureHandle, t, task, hi, errori)
 
-async def close_async_value(HandleT, t, task, i):
+async def close_async_value(HandleT, t, task, hi, errori):
   trap_if(not task.inst.may_leave)
-  h = task.inst.waitables.remove(i)
+  h = task.inst.waitables.remove(hi)
+  if errori == 0:
+    error = None
+  else:
+    error = task.inst.errors.get(errori)
   trap_if(not isinstance(h, HandleT))
   trap_if(h.t != t)
-  h.drop()
+  h.drop(error)
+  return []
+```
+Note that only the writable ends of streams and futures can be closed with a
+final `error` value and thus `error`s only flow in the same direction as
+values as an optional last value of the stream or future.
+
+### ๐Ÿ”€ `canon error.new`
+
+For a canonical definition:
+```wasm
+(canon error.new $opts (core func $f))
+```
+validation specifies:
+* `$f` is given type `(func (param i32 i32) (result i32))`
+
+Calling `$f` calls the following function which uses the `$opts` immediate to
+(non-deterministically) lift the debug message, create a new `Error` value,
+store it in the per-component-instance `errors` table and returns its index.
+```python
+@dataclass
+class Error:
+  debug_message: String
+
+async def canon_error_new(opts, task, ptr, tagged_code_units):
+  trap_if(not task.inst.may_leave)
+  if DETERMINISTIC_PROFILE or random.randint(0,1):
+    s = String(('', 'utf8', 0))
+  else:
+    cx = LiftLowerContext(opts, task.inst)
+    s = load_string_from_range(cx, ptr, tagged_code_units)
+    s = host_defined_transformation(s)
+  i = task.inst.errors.add(Error(s))
+  return [i]
+```
+Supporting the requirement (introduced in the [explainer](Explainer.md#error-type))
+that wasm code does not depend on the contents of `error` values for
+behavioral correctness, the debug message is completely discarded
+non-deterministically or, in the deterministic profile, always. Importantly
+(for performance), when the debug message is discarded, it is not even lifted
+and thus the O(N) well-formedness conditions are not checked. (Note that
+`host_defined_transformation` is not defined by the Canonical ABI and stands
+for an arbitrary host-defined function.)
+
+### ๐Ÿ”€ `canon error.debug-message`
+
+For a canonical definition:
+```wasm
+(canon error.debug-message $opts (core func $f))
+```
+validation specifies:
+* `$f` is given type `(func (param i32 i32))`
+
+Calling `$f` calls the following function which uses the `$opts` immediate to
+lowers the `Error`'s contained debug message. While *producing* an `error`
+value may non-deterministically discard or transform the debug message, a
+single `error` value must return the same debug message from
+`error.debug-message` over time.
+```python
+async def canon_error_debug_message(opts, task, i, ptr):
+  trap_if(not task.inst.may_leave)
+  error = task.inst.errors.get(i)
+  cx = LiftLowerContext(opts, task.inst)
+  store_string(cx, error.debug_message, ptr)
+  return []
+```
+Note that `ptr` points to an 8-byte region of memory into which will be stored
+the pointer and length of the debug string (allocated via `opts.realloc`).
+
+### ๐Ÿ”€ `canon error.drop`
+
+For a canonical definition:
+```wasm
+(canon error.drop (core func $f))
+```
+validation specifies:
+* `$f` is given type `(func (param i32))`
+
+Calling `$f` calls the following function, which drops the `error` value from
+the current component instance's `errors` table.
+```python
+async def canon_error_drop(task, i):
+  trap_if(not task.inst.may_leave)
+  task.inst.errors.remove(i)
   return []
 ```
 
diff --git a/design/mvp/Explainer.md b/design/mvp/Explainer.md
index 032dbd30..3876ce28 100644
--- a/design/mvp/Explainer.md
+++ b/design/mvp/Explainer.md
@@ -14,6 +14,7 @@ more user-focused explanation, take a look at the
   * [Type definitions](#type-definitions)
     * [Fundamental value types](#fundamental-value-types)
       * [Numeric types](#numeric-types)
+      * [Error type](#error-type)
       * [Container types](#container-types)
       * [Handle types](#handle-types)
       * [Asynchronous value types](#asynchronous-value-types)
@@ -26,6 +27,7 @@ more user-focused explanation, take a look at the
     * [Canonical built-ins](#canonical-built-ins)
       * [Resource built-ins](#resource-built-ins)
       * [Async built-ins](#-async-built-ins)
+      * [Error built-ins](#-error-built-ins)
       * [Threading built-ins](#-threading-built-ins)
   * [Value definitions](#-value-definitions)
   * [Start definitions](#-start-definitions)
@@ -541,6 +543,7 @@ defvaltype    ::= bool
                 | s8 | u8 | s16 | u16 | s32 | u32 | s64 | u64
                 | f32 | f64
                 | char | string
+                | error
                 | (record (field "<label>" <valtype>)+)
                 | (variant (case "<label>" <valtype>?)+)
                 | (list <valtype>)
@@ -599,6 +602,7 @@ sets of abstract values:
 | `u8`, `u16`, `u32`, `u64` | integers in the range [0, 2<sup>N</sup>-1] |
 | `f32`, `f64`              | [IEEE754] floating-point numbers, with a single NaN value |
 | `char`                    | [Unicode Scalar Values] |
+| `error`                   | an immutable, non-deterministic, host-defined value meant to aid in debugging |
 | `record`                  | heterogeneous [tuples] of named values |
 | `variant`                 | heterogeneous [tagged unions] of named values |
 | `list`                    | homogeneous, variable- or fixed-length [sequences] of values |
@@ -630,6 +634,29 @@ a single NaN value. And boolean values in core wasm are usually represented as
 `i32`s where operations interpret all-zeros as `false`, while at the
 component-level there is a `bool` type with `true` and `false` values.
 
+##### Error type
+
+Values of `error` type are immutable, non-deterministic, host-defined and
+meant to be propagated from failure sources to callers in order to aid in
+debugging. Currently `error` values contain only a "debug message" string whose
+contents are determined by the host. Core wasm can create `error` values given
+a debug string, but the host is free to arbitrarily transform (discard,
+preserve, prefix or suffix) this wasm-provided string.
+
+The intention of this highly-non-deterministic semantics is to provide hosts
+the full range of flexibility to:
+* append a basic callstack suitable for forensic debugging in production;
+* optimize for performance in high-volume production scenarios by slicing or
+  discarding error messages;
+* optimize for developer experience in debugging scenarios when debug metadata
+  is present by appending expensive-to-produce symbolicated callstacks.
+
+A consequence of this, however, is that components *must not* depend on the
+contents of `error` values for behavioral correctness. In particular, case
+analysis of the contents of an `error` should not determine *error receovery*;
+for this, proper `result` or `variant` types must be used in the WIT function
+signature.
+
 ##### Container types
 
 The `record`, `variant`, and `list` types allow for grouping, categorizing,
@@ -1387,6 +1414,9 @@ canon ::= ...
         | (canon future.cancel-write async? (core func <id>?)) ๐Ÿ”€
         | (canon future.close-readable <typeidx> (core func <id>?)) ๐Ÿ”€
         | (canon future.close-writable <typeidx> (core func <id>?)) ๐Ÿ”€
+        | (canon error.new <canonopt>* (core func <id>?))
+        | (canon error.debug-message <canonopt>* (core func <id>?))
+        | (canon error.drop (core func <id>?))
         | (canon thread.spawn <typeidx> (core func <id>?)) ๐Ÿงต
         | (canon thread.hw_concurrency (core func <id>?)) ๐Ÿงต
 ```
@@ -1514,6 +1544,21 @@ from the current component instance's [waitables](Async.md#waiting) table,
 trapping if the stream or future has a mismatched direction or type or are in
 the middle of a `read` or `write`.
 
+##### ๐Ÿ”€ Error built-ins
+
+The `error.new` built-in has type `[ptr:i32 len:i32] -> [i32]` and returns
+the index of a new `error` value in a per-component-instance table of errors.
+The given (`ptr`, `length`) pair are non-deterministically lifted and
+transformed to produce the `error`'s internal [debug message](#error-type).
+
+The `error.debug-message` built-in has type `[error:i32 ptr:i32] -> []`
+and writes the [debug message](#error-type) of the given `error` into `ptr`
+as an 8-byte (`ptr`, `length`) pair, according to the Canonical ABI for
+`string` given the `<canonopt>*` immediates.
+
+The `error.drop` built-in has type `[error:i32] -> []` and drops the given
+`error` value from the component instance's table.
+
 ##### ๐Ÿงต Threading built-ins
 
 The [shared-everything-threads] proposal adds component model built-ins for
diff --git a/design/mvp/canonical-abi/definitions.py b/design/mvp/canonical-abi/definitions.py
index 54b7fd1b..9783d479 100644
--- a/design/mvp/canonical-abi/definitions.py
+++ b/design/mvp/canonical-abi/definitions.py
@@ -129,6 +129,7 @@ class F32Type(PrimValType): pass
 class F64Type(PrimValType): pass
 class CharType(PrimValType): pass
 class StringType(PrimValType): pass
+class ErrorType(ValType): pass
 
 @dataclass
 class ListType(ValType):
@@ -221,6 +222,7 @@ class CanonicalOptions:
 class ComponentInstance:
   resources: ResourceTables
   waitables: Table[Subtask|StreamHandle|FutureHandle]
+  errors: Table[Error]
   num_tasks: int
   may_leave: bool
   backpressure: bool
@@ -231,6 +233,7 @@ class ComponentInstance:
   def __init__(self):
     self.resources = ResourceTables()
     self.waitables = Table[Subtask|StreamHandle|FutureHandle]()
+    self.errors = Table[Error]()
     self.num_tasks = 0
     self.may_leave = True
     self.backpressure = False
@@ -593,6 +596,7 @@ class WritableBuffer(Buffer):
 
 class ReadableStream:
   closed: Callable[[], bool]
+  closed_with_error: Callable[[], Optional[Error]]
   read: Callable[[WritableBuffer, OnBlockCallback], Awaitable]
   cancel_read: Callable[[WritableBuffer, OnBlockCallback], Awaitable]
   close: Callable[[]]
@@ -635,17 +639,22 @@ def lower(self, vs):
 class ReadableStreamGuestImpl(ReadableStream):
   impl: ComponentInstance
   is_closed: bool
+  error: Optional[Error]
   other_buffer: Optional[Buffer]
   other_future: Optional[asyncio.Future]
 
   def __init__(self, inst):
     self.impl = inst
     self.is_closed = False
+    self.error = None
     self.other_buffer = None
     self.other_future = None
 
   def closed(self):
     return self.is_closed
+  def closed_with_error(self):
+    assert(self.is_closed)
+    return self.error
 
   async def read(self, dst, on_block):
     await self.rendezvous(dst, self.other_buffer, dst, on_block)
@@ -684,9 +693,11 @@ async def cancel_rendezvous(self, this_buffer, on_block):
         self.other_future.set_result(None)
         self.other_future = None
 
-  def close(self):
+  def close(self, error = None):
     if not self.is_closed:
+      assert(not self.error)
       self.is_closed = True
+      self.error = error
       self.other_buffer = None
       if self.other_future:
         self.other_future.set_result(None)
@@ -722,9 +733,9 @@ def stop_copying(self):
     self.copying_task = None
     self.copying_buffer = None
 
-  def drop(self):
+  def drop(self, error):
     trap_if(self.copying_buffer)
-    self.stream.close()
+    self.stream.close(error)
     if isinstance(self.borrow_scope, Task):
       self.borrow_scope.todo -= 1
 
@@ -773,9 +784,9 @@ async def cancel_copy(self, src, on_block):
     if src.remain() == 0:
       self.stream.close()
 
-  def drop(self):
-    trap_if(not self.stream.closed())
-    FutureHandle.drop(self)
+  def drop(self, error):
+    trap_if(not self.stream.closed() and not error)
+    FutureHandle.drop(self, error)
 
 ### Despecialization
 
@@ -827,6 +838,7 @@ def alignment(t):
     case F64Type()                   : return 8
     case CharType()                  : return 4
     case StringType()                : return 4
+    case ErrorType()                 : return 4
     case ListType(t, l)              : return alignment_list(t, l)
     case RecordType(fields)          : return alignment_record(fields)
     case VariantType(cases)          : return alignment_variant(cases)
@@ -884,6 +896,7 @@ def elem_size(t):
     case F64Type()                   : return 8
     case CharType()                  : return 4
     case StringType()                : return 8
+    case ErrorType()                 : return 4
     case ListType(t, l)              : return elem_size_list(t, l)
     case RecordType(fields)          : return elem_size_record(fields)
     case VariantType(cases)          : return elem_size_variant(cases)
@@ -943,6 +956,7 @@ def load(cx, ptr, t):
     case F64Type()          : return decode_i64_as_float(load_int(cx, ptr, 8))
     case CharType()         : return convert_i32_to_char(cx, load_int(cx, ptr, 4))
     case StringType()       : return load_string(cx, ptr)
+    case ErrorType()        : return lift_error(cx, load_int(cx, ptr, 4))
     case ListType(t, l)     : return load_list(cx, ptr, t, l)
     case RecordType(fields) : return load_record(cx, ptr, fields)
     case VariantType(cases) : return load_variant(cx, ptr, cases)
@@ -993,14 +1007,16 @@ def convert_i32_to_char(cx, i):
   trap_if(0xD800 <= i <= 0xDFFF)
   return chr(i)
 
-def load_string(cx, ptr):
+String = tuple[str, str, int]
+
+def load_string(cx, ptr) -> String:
   begin = load_int(cx, ptr, 4)
   tagged_code_units = load_int(cx, ptr + 4, 4)
   return load_string_from_range(cx, begin, tagged_code_units)
 
 UTF16_TAG = 1 << 31
 
-def load_string_from_range(cx, ptr, tagged_code_units):
+def load_string_from_range(cx, ptr, tagged_code_units) -> String:
   match cx.opts.string_encoding:
     case 'utf8':
       alignment = 1
@@ -1028,6 +1044,9 @@ def load_string_from_range(cx, ptr, tagged_code_units):
 
   return (s, cx.opts.string_encoding, tagged_code_units)
 
+def lift_error(cx, i):
+  return cx.inst.errors.get(i)
+
 def load_list(cx, ptr, elem_type, maybe_length):
   if maybe_length is not None:
     return load_list_from_valid_range(cx, ptr, maybe_length, elem_type)
@@ -1153,6 +1172,7 @@ def store(cx, v, t, ptr):
     case F64Type()          : store_int(cx, encode_float_as_i64(v), ptr, 8)
     case CharType()         : store_int(cx, char_to_i32(v), ptr, 4)
     case StringType()       : store_string(cx, v, ptr)
+    case ErrorType()        : store_int(cx, lower_error(cx, v), ptr, 4)
     case ListType(t, l)     : store_list(cx, v, ptr, t, l)
     case RecordType(fields) : store_record(cx, v, ptr, fields)
     case VariantType(cases) : store_variant(cx, v, ptr, cases)
@@ -1207,12 +1227,12 @@ def char_to_i32(c):
   assert(0 <= i <= 0xD7FF or 0xD800 <= i <= 0x10FFFF)
   return i
 
-def store_string(cx, v, ptr):
+def store_string(cx, v: String, ptr):
   begin, tagged_code_units = store_string_into_range(cx, v)
   store_int(cx, begin, ptr, 4)
   store_int(cx, tagged_code_units, ptr + 4, 4)
 
-def store_string_into_range(cx, v):
+def store_string_into_range(cx, v: String):
   src, src_encoding, src_tagged_code_units = v
 
   if src_encoding == 'latin1+utf16':
@@ -1352,6 +1372,9 @@ def store_probably_utf16_to_latin1_or_utf16(cx, src, src_code_units):
   trap_if(ptr + latin1_size > len(cx.opts.memory))
   return (ptr, latin1_size)
 
+def lower_error(cx, v):
+  return cx.inst.errors.add(v)
+
 def store_list(cx, v, ptr, elem_type, maybe_length):
   if maybe_length is not None:
     assert(maybe_length == len(v))
@@ -1496,6 +1519,7 @@ def flatten_type(t):
     case F64Type()                        : return ['f64']
     case CharType()                       : return ['i32']
     case StringType()                     : return ['i32', 'i32']
+    case ErrorType()                      : return ['i32']
     case ListType(t, l)                   : return flatten_list(t, l)
     case RecordType(fields)               : return flatten_record(fields)
     case VariantType(cases)               : return flatten_variant(cases)
@@ -1562,6 +1586,7 @@ def lift_flat(cx, vi, t):
     case F64Type()          : return canonicalize_nan64(vi.next('f64'))
     case CharType()         : return convert_i32_to_char(cx, vi.next('i32'))
     case StringType()       : return lift_flat_string(cx, vi)
+    case ErrorType()        : return lift_error(cx, vi.next('i32'))
     case ListType(t, l)     : return lift_flat_list(cx, vi, t, l)
     case RecordType(fields) : return lift_flat_record(cx, vi, fields)
     case VariantType(cases) : return lift_flat_variant(cx, vi, cases)
@@ -1655,6 +1680,7 @@ def lower_flat(cx, v, t):
     case F64Type()          : return [maybe_scramble_nan64(v)]
     case CharType()         : return [char_to_i32(v)]
     case StringType()       : return lower_flat_string(cx, v)
+    case ErrorType()        : return lower_error(cx, v)
     case ListType(t, l)     : return lower_flat_list(cx, v, t, l)
     case RecordType(fields) : return lower_flat_record(cx, v, fields)
     case VariantType(cases) : return lower_flat_variant(cx, v, cases)
@@ -1957,11 +1983,11 @@ async def async_copy(HandleT, BufferT, t, opts, event_code, task, i, ptr, n):
   cx = LiftLowerContext(opts, task.inst, h.borrow_scope)
   buffer = BufferT(cx, t, ptr, n)
   if h.stream.closed():
-    flat_results = [CLOSED]
+    flat_results = [pack_async_copy_result(task, buffer, h)]
   else:
     if opts.sync:
       await task.call_sync(h.copy, buffer)
-      flat_results = [pack_async_copy_result(buffer, h)]
+      flat_results = [pack_async_copy_result(task, buffer, h)]
     else:
       async def do_copy(on_block):
         await h.copy(buffer, on_block)
@@ -1969,7 +1995,7 @@ async def do_copy(on_block):
           def copy_event():
             if h.copying_buffer is buffer:
               h.stop_copying()
-              return (event_code, i, pack_async_copy_result(buffer, h))
+              return (event_code, i, pack_async_copy_result(task, buffer, h))
             else:
               return None
           task.notify(copy_event)
@@ -1978,19 +2004,29 @@ def copy_event():
           h.start_copying(task, buffer)
           flat_results = [BLOCKED]
         case Returned():
-          flat_results = [pack_async_copy_result(buffer, h)]
+          flat_results = [pack_async_copy_result(task, buffer, h)]
   return flat_results
 
 BLOCKED = 0xffff_ffff
 CLOSED  = 0x8000_0000
 
-def pack_async_copy_result(buffer, h):
-  assert(buffer.progress <= Buffer.MAX_LENGTH < CLOSED < BLOCKED)
+def pack_async_copy_result(task, buffer, h):
   if buffer.progress:
+    assert(buffer.progress <= Buffer.MAX_LENGTH < BLOCKED)
+    assert(not (buffer.progress & CLOSED))
     return buffer.progress
-  if h.stream.closed():
-    return CLOSED
-  return 0
+  elif h.stream.closed():
+    if (error := h.stream.closed_with_error()):
+      assert(isinstance(h, ReadableStreamHandle|ReadableFutureHandle))
+      errori = task.inst.errors.add(error)
+      assert(errori != 0)
+    else:
+      errori = 0
+    assert(errori <= Table.MAX_LENGTH < BLOCKED)
+    assert(not (errori & CLOSED))
+    return errori | CLOSED
+  else:
+    return 0
 
 ### ๐Ÿ”€ `canon {stream,future}.cancel-{read,write}`
 
@@ -2012,40 +2048,77 @@ async def cancel_async_copy(HandleT, sync, task, i):
   trap_if(not isinstance(h, HandleT))
   trap_if(not h.copying_buffer)
   if h.stream.closed():
-    flat_results = [pack_async_copy_result(h.copying_buffer, h)]
+    flat_results = [pack_async_copy_result(task, h.copying_buffer, h)]
     h.stop_copying()
   else:
     if sync:
       await task.call_sync(h.cancel_copy, h.copying_buffer)
-      flat_results = [pack_async_copy_result(h.copying_buffer, h)]
+      flat_results = [pack_async_copy_result(task, h.copying_buffer, h)]
       h.stop_copying()
     else:
       match await call_and_handle_blocking(h.cancel_copy, h.copying_buffer):
         case Blocked():
           flat_results = [BLOCKED]
         case Returned():
-          flat_results = [pack_async_copy_result(h.copying_buffer, h)]
+          flat_results = [pack_async_copy_result(task, h.copying_buffer, h)]
           h.stop_copying()
   return flat_results
 
 ### ๐Ÿ”€ `canon {stream,future}.close-{readable,writable}`
 
 async def canon_stream_close_readable(t, task, i):
-  return await close_async_value(ReadableStreamHandle, t, task, i)
+  return await close_async_value(ReadableStreamHandle, t, task, i, 0)
 
-async def canon_stream_close_writable(t, task, i):
-  return await close_async_value(WritableStreamHandle, t, task, i)
+async def canon_stream_close_writable(t, task, hi, errori):
+  return await close_async_value(WritableStreamHandle, t, task, hi, errori)
 
 async def canon_future_close_readable(t, task, i):
-  return await close_async_value(ReadableFutureHandle, t, task, i)
+  return await close_async_value(ReadableFutureHandle, t, task, i, 0)
 
-async def canon_future_close_writable(t, task, i):
-  return await close_async_value(WritableFutureHandle, t, task, i)
+async def canon_future_close_writable(t, task, hi, errori):
+  return await close_async_value(WritableFutureHandle, t, task, hi, errori)
 
-async def close_async_value(HandleT, t, task, i):
+async def close_async_value(HandleT, t, task, hi, errori):
   trap_if(not task.inst.may_leave)
-  h = task.inst.waitables.remove(i)
+  h = task.inst.waitables.remove(hi)
+  if errori == 0:
+    error = None
+  else:
+    error = task.inst.errors.get(errori)
   trap_if(not isinstance(h, HandleT))
   trap_if(h.t != t)
-  h.drop()
+  h.drop(error)
+  return []
+
+### ๐Ÿ”€ `canon error.new`
+
+@dataclass
+class Error:
+  debug_message: String
+
+async def canon_error_new(opts, task, ptr, tagged_code_units):
+  trap_if(not task.inst.may_leave)
+  if DETERMINISTIC_PROFILE or random.randint(0,1):
+    s = String(('', 'utf8', 0))
+  else:
+    cx = LiftLowerContext(opts, task.inst)
+    s = load_string_from_range(cx, ptr, tagged_code_units)
+    s = host_defined_transformation(s)
+  i = task.inst.errors.add(Error(s))
+  return [i]
+
+### ๐Ÿ”€ `canon error.debug-message`
+
+async def canon_error_debug_message(opts, task, i, ptr):
+  trap_if(not task.inst.may_leave)
+  error = task.inst.errors.get(i)
+  cx = LiftLowerContext(opts, task.inst)
+  store_string(cx, error.debug_message, ptr)
+  return []
+
+### ๐Ÿ”€ `canon error.drop`
+
+async def canon_error_drop(task, i):
+  trap_if(not task.inst.may_leave)
+  task.inst.errors.remove(i)
   return []
diff --git a/design/mvp/canonical-abi/run_tests.py b/design/mvp/canonical-abi/run_tests.py
index 5a47a3a8..2c9c0507 100644
--- a/design/mvp/canonical-abi/run_tests.py
+++ b/design/mvp/canonical-abi/run_tests.py
@@ -921,13 +921,16 @@ def __init__(self, contents, chunk, destroy_if_empty = True):
 
   def closed(self):
     return not self.remaining and self.destroy_if_empty
+  def closed_with_error(self):
+    assert(self.closed())
+    return None
 
   def wake_waiting(self, cancelled = False):
     if self.waiting:
       self.waiting.set_result(cancelled)
       self.waiting = None
 
-  def close(self):
+  def close(self, error = None):
     self.remaining = []
     self.destroy_if_empty = True
     self.wake_waiting()
@@ -1079,8 +1082,8 @@ async def core_func(task, args):
     assert(ret == 4)
     [] = await canon_stream_close_readable(U8Type(), task, rsi1)
     [] = await canon_stream_close_readable(U8Type(), task, rsi2)
-    [] = await canon_stream_close_writable(U8Type(), task, wsi1)
-    [] = await canon_stream_close_writable(U8Type(), task, wsi2)
+    [] = await canon_stream_close_writable(U8Type(), task, wsi1, 0)
+    [] = await canon_stream_close_writable(U8Type(), task, wsi2, 0)
     return []
 
   await canon_lift(opts, inst, ft, core_func, None, on_start, on_return)
@@ -1167,7 +1170,7 @@ async def core_func(task, args):
     assert(mem[0:4] == b'\x05\x06\x07\x08')
     [ret] = await canon_stream_write(U8Type(), opts, task, wsi2, 0, 4)
     assert(ret == 4)
-    [] = await canon_stream_close_writable(U8Type(), task, wsi2)
+    [] = await canon_stream_close_writable(U8Type(), task, wsi2, 0)
     [ret] = await canon_stream_read(U8Type(), opts, task, rsi2, 0, 4)
     assert(ret == definitions.BLOCKED)
     event, p1, p2 = await task.wait(sync = False)
@@ -1184,7 +1187,7 @@ async def core_func(task, args):
     [] = await canon_subtask_drop(task, subi)
     [ret] = await canon_stream_write(U8Type(), sync_opts, task, wsi1, 0, 4)
     assert(ret == 4)
-    [] = await canon_stream_close_writable(U8Type(), task, wsi1)
+    [] = await canon_stream_close_writable(U8Type(), task, wsi1, 0)
     return []
 
   await canon_lift(opts, inst, ft, core_func, None, on_start, on_return)
@@ -1236,7 +1239,7 @@ async def core_func(task, args):
     assert(ret == 0)
     result = int.from_bytes(mem[retp : retp+4], 'little', signed=False)
     assert(result == (wsi | 2**31))
-    [] = await canon_stream_close_writable(U8Type(), task, wsi)
+    [] = await canon_stream_close_writable(U8Type(), task, wsi, 0)
     return []
 
   def on_start(): return []
@@ -1300,7 +1303,7 @@ async def core_func(task, args):
     assert(p1 == wsi)
     assert(p2 == 4)
     assert(dst.received == [1,2,3,4,5,6])
-    [] = await canon_stream_close_writable(U8Type(), task, wsi)
+    [] = await canon_stream_close_writable(U8Type(), task, wsi, 0)
     dst.set_remain(100)
     assert(await dst.consume(100) is None)
     return []
@@ -1348,14 +1351,17 @@ async def core_func1(task, args):
 
     fut4.set_result(None)
 
-    [] = await canon_stream_close_writable(U8Type(), task, wsi)
+    [errori] = await canon_error_new(opts1, task, 0, 0)
+    [] = await canon_stream_close_writable(U8Type(), task, wsi, errori)
+    [] = await canon_error_drop(task, errori)
     return []
 
   func1 = partial(canon_lift, opts1, inst1, ft1, core_func1)
 
   inst2 = ComponentInstance()
-  mem2 = bytearray(10)
-  opts2 = mk_opts(memory=mem2, sync=False)
+  heap2 = Heap(10)
+  mem2 = heap2.memory
+  opts2 = mk_opts(memory=heap2.memory, realloc=heap2.realloc, sync=False)
   ft2 = FuncType([], [])
   async def core_func2(task, args):
     assert(not args)
@@ -1393,8 +1399,11 @@ async def core_func2(task, args):
     await task.on_block(fut4)
 
     [ret] = await canon_stream_read(U8Type(), opts2, task, rsi, 0, 2)
-    assert(ret == definitions.CLOSED)
+    errori = 1
+    assert(ret == (definitions.CLOSED | errori))
     [] = await canon_stream_close_readable(U8Type(), task, rsi)
+    [] = await canon_error_debug_message(opts2, task, errori, 0)
+    [] = await canon_error_drop(task, errori)
 
     event, callidx, _ = await task.wait(sync = False)
     assert(event == EventCode.CALL_DONE)
@@ -1460,7 +1469,7 @@ async def core_func2(task, args):
 
     [ret] = await canon_stream_write(BorrowType(rt), async_opts2, task, wsi, 0, 2)
     assert(ret == 2)
-    [] = await canon_stream_close_writable(BorrowType(rt), task, wsi)
+    [] = await canon_stream_close_writable(BorrowType(rt), task, wsi, 0)
 
     event, p1, _ = await task.wait(sync = False)
     assert(event == EventCode.CALL_DONE)
@@ -1508,7 +1517,7 @@ async def core_func(task, args):
     assert(got == [0xa, 0xb])
     [ret] = await canon_stream_cancel_write(True, task, wsi)
     assert(ret == 2)
-    [] = await canon_stream_close_writable(U8Type(), task, wsi)
+    [] = await canon_stream_close_writable(U8Type(), task, wsi, 0)
     host_sink.set_remain(100)
     assert(await host_sink.consume(100) is None)
 
@@ -1523,7 +1532,7 @@ async def core_func(task, args):
     assert(got == [1, 2])
     [ret] = await canon_stream_cancel_write(False, task, wsi)
     assert(ret == 2)
-    [] = await canon_stream_close_writable(U8Type(), task, wsi)
+    [] = await canon_stream_close_writable(U8Type(), task, wsi, 0)
     host_sink.set_remain(100)
     assert(await host_sink.consume(100) is None)
 
@@ -1578,7 +1587,10 @@ def __init__(self):
     self.v = asyncio.Future()
   def closed(self):
     return self.v is None
-  def close(self):
+  def closed_with_error(self):
+    assert(self.closed())
+    return None
+  def close(self, error = None):
     assert(self.v is None)
   async def read(self, dst, on_block):
     assert(self.v is not None)
@@ -1637,7 +1649,7 @@ async def core_func(task, args):
     assert(p2 == 1)
     assert(mem[readp] == 43)
 
-    [] = await canon_future_close_writable(U8Type(), task, wfi)
+    [] = await canon_future_close_writable(U8Type(), task, wfi, 0)
     [] = await canon_future_close_readable(U8Type(), task, rfi)
     [] = await canon_subtask_drop(task, subi)
 
@@ -1666,7 +1678,7 @@ async def core_func(task, args):
     assert(ret == 1)
     assert(mem[readp] == 43)
 
-    [] = await canon_future_close_writable(U8Type(), task, wfi)
+    [] = await canon_future_close_writable(U8Type(), task, wfi, 0)
     [] = await canon_future_close_readable(U8Type(), task, rfi)
     [] = await canon_subtask_drop(task, subi)
 

From 30061e5b9ad7e5bf1335c2dcc59c1b7d6bf983d7 Mon Sep 17 00:00:00 2001
From: Luke Wagner <mail@lukewagner.name>
Date: Fri, 1 Nov 2024 15:20:02 -0500
Subject: [PATCH 18/22] Update {stream,future}.close-writable descriptions

---
 design/mvp/CanonicalABI.md | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/design/mvp/CanonicalABI.md b/design/mvp/CanonicalABI.md
index 9852e183..c954dc41 100644
--- a/design/mvp/CanonicalABI.md
+++ b/design/mvp/CanonicalABI.md
@@ -3265,13 +3265,19 @@ normal `{STREAM,FUTURE}_{READ,WRITE}` event by the original, now-unblocked
 For canonical definitions:
 ```wasm
 (canon stream.close-readable $t (core func $f))
-(canon stream.close-writable $t (core func $f))
 (canon future.close-readable $t (core func $f))
-(canon future.close-writable $t (core func $f))
 ```
 validation specifies:
 * `$f` is given type `(func (param i32))`
 
+and for canonical definitions:
+```wasm
+(canon stream.close-writable $t (core func $f))
+(canon future.close-writable $t (core func $f))
+```
+validation specifies:
+* `$f` is given type `(func (param i32 i32))`
+
 Calling `$f` removes the readable or writable end of the stream or future at
 the given index from the current component instance's `waitable` table,
 performing the guards and bookkeeping defined by

From 4306ee25fb0822194c69f41fae9c62a1fb60106c Mon Sep 17 00:00:00 2001
From: Luke Wagner <mail@lukewagner.name>
Date: Fri, 1 Nov 2024 15:29:21 -0500
Subject: [PATCH 19/22] Add example to explainer text about 'error'

---
 design/mvp/Explainer.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/design/mvp/Explainer.md b/design/mvp/Explainer.md
index 3876ce28..b8256508 100644
--- a/design/mvp/Explainer.md
+++ b/design/mvp/Explainer.md
@@ -654,8 +654,8 @@ the full range of flexibility to:
 A consequence of this, however, is that components *must not* depend on the
 contents of `error` values for behavioral correctness. In particular, case
 analysis of the contents of an `error` should not determine *error receovery*;
-for this, proper `result` or `variant` types must be used in the WIT function
-signature.
+explicit `result` or `variant` types must be used in the function return
+type instead (e.g., `(func (result (tuple (stream u8) (future $my-error)))`).
 
 ##### Container types
 

From fcea885c5783e9d63b4d8c6f71735286be74c23d Mon Sep 17 00:00:00 2001
From: Luke Wagner <mail@lukewagner.name>
Date: Sun, 3 Nov 2024 12:01:50 -0600
Subject: [PATCH 20/22] Remove restriction on write-before-lift, remove invalid
 assert, add test

---
 design/mvp/CanonicalABI.md              | 14 ++++++--------
 design/mvp/canonical-abi/definitions.py |  3 +--
 design/mvp/canonical-abi/run_tests.py   | 10 +++++++++-
 3 files changed, 16 insertions(+), 11 deletions(-)

diff --git a/design/mvp/CanonicalABI.md b/design/mvp/CanonicalABI.md
index c954dc41..6c61fb47 100644
--- a/design/mvp/CanonicalABI.md
+++ b/design/mvp/CanonicalABI.md
@@ -1655,7 +1655,6 @@ def lift_async_value(ReadableHandleT, WritableHandleT, cx, i, t):
       cx.inst.waitables.remove(i)
     case WritableHandleT():
       trap_if(h.paired)
-      assert(not h.copying_buffer)
       h.paired = True
       if contains_borrow(t):
         h.borrow_scope = cx.borrow_scope
@@ -3118,7 +3117,6 @@ async def async_copy(HandleT, BufferT, t, opts, event_code, task, i, ptr, n):
   h = task.inst.waitables.get(i)
   trap_if(not isinstance(h, HandleT))
   trap_if(h.t != t)
-  trap_if(not h.paired)
   trap_if(h.copying_buffer)
   cx = LiftLowerContext(opts, task.inst, h.borrow_scope)
   buffer = BufferT(cx, t, ptr, n)
@@ -3126,6 +3124,7 @@ async def async_copy(HandleT, BufferT, t, opts, event_code, task, i, ptr, n):
     flat_results = [pack_async_copy_result(task, buffer, h)]
   else:
     if opts.sync:
+      trap_if(not h.paired)
       await task.call_sync(h.copy, buffer)
       flat_results = [pack_async_copy_result(task, buffer, h)]
     else:
@@ -3147,12 +3146,11 @@ async def async_copy(HandleT, BufferT, t, opts, event_code, task, i, ptr, n):
           flat_results = [pack_async_copy_result(task, buffer, h)]
   return flat_results
 ```
-The trap if `not h.paired` prevents `write`s on the writable end of streams or
-futures that have not yet been lifted. The `copying_buffer` field serves as a
-boolean indication of whether an async `read` or `write` is already in
-progress, preventing multiple overlapping calls to `read` or `write`. (This
-restriction could be relaxed [in the future](Async.md#TODO) to allow greater
-pipeline parallelism.)
+The `trap_if(h.copying_buffer)` trap prevents multiple overlapping calls to
+`read` or `write`. (This restriction could be relaxed [in the
+future](Async.md#TODO) to allow greater pipeline parallelism.) The
+`trap_if(not h.paired)` in the synchronous case prevents what would otherwise
+be a deadlock, performing a blocking write when there is no reader.
 
 One subtle corner case handled by this code that is worth pointing out is that,
 between calling `h.copy()` and `h.copy()` returning, wasm guest code can call
diff --git a/design/mvp/canonical-abi/definitions.py b/design/mvp/canonical-abi/definitions.py
index 9783d479..78377baf 100644
--- a/design/mvp/canonical-abi/definitions.py
+++ b/design/mvp/canonical-abi/definitions.py
@@ -1144,7 +1144,6 @@ def lift_async_value(ReadableHandleT, WritableHandleT, cx, i, t):
       cx.inst.waitables.remove(i)
     case WritableHandleT():
       trap_if(h.paired)
-      assert(not h.copying_buffer)
       h.paired = True
       if contains_borrow(t):
         h.borrow_scope = cx.borrow_scope
@@ -1978,7 +1977,6 @@ async def async_copy(HandleT, BufferT, t, opts, event_code, task, i, ptr, n):
   h = task.inst.waitables.get(i)
   trap_if(not isinstance(h, HandleT))
   trap_if(h.t != t)
-  trap_if(not h.paired)
   trap_if(h.copying_buffer)
   cx = LiftLowerContext(opts, task.inst, h.borrow_scope)
   buffer = BufferT(cx, t, ptr, n)
@@ -1986,6 +1984,7 @@ async def async_copy(HandleT, BufferT, t, opts, event_code, task, i, ptr, n):
     flat_results = [pack_async_copy_result(task, buffer, h)]
   else:
     if opts.sync:
+      trap_if(not h.paired)
       await task.call_sync(h.copy, buffer)
       flat_results = [pack_async_copy_result(task, buffer, h)]
     else:
diff --git a/design/mvp/canonical-abi/run_tests.py b/design/mvp/canonical-abi/run_tests.py
index 2c9c0507..4e5ef5de 100644
--- a/design/mvp/canonical-abi/run_tests.py
+++ b/design/mvp/canonical-abi/run_tests.py
@@ -1234,11 +1234,19 @@ async def core_func(task, args):
     assert(len(args) == 0)
     [wsi] = await canon_stream_new(U8Type(), task)
     assert(wsi == 1)
-    retp = 4
+    [ret] = await canon_stream_write(U8Type(), opts, task, wsi, 0, 4)
+    assert(ret == definitions.BLOCKED)
+    retp = 8
+    [ret] = await canon_lower(opts, host_ft, host_import, task, [wsi, retp])
+    assert(ret == 0)
+    result = int.from_bytes(mem[retp : retp+4], 'little', signed=False)
+    assert(result == (wsi | 2**31))
     [ret] = await canon_lower(opts, host_ft, host_import, task, [wsi, retp])
     assert(ret == 0)
     result = int.from_bytes(mem[retp : retp+4], 'little', signed=False)
     assert(result == (wsi | 2**31))
+    [ret] = await canon_stream_cancel_write(True, task, wsi)
+    assert(ret == 0)
     [] = await canon_stream_close_writable(U8Type(), task, wsi, 0)
     return []
 

From f9b341bc73f6dd95c2f0159a9c59ab97422792a8 Mon Sep 17 00:00:00 2001
From: Luke Wagner <mail@lukewagner.name>
Date: Mon, 4 Nov 2024 16:26:25 -0600
Subject: [PATCH 21/22] Add <typeidx> to {stream,future}.cancel-{read,write}

---
 design/mvp/Binary.md                    |  8 ++++----
 design/mvp/CanonicalABI.md              | 27 +++++++++++++------------
 design/mvp/Explainer.md                 |  8 ++++----
 design/mvp/canonical-abi/definitions.py | 19 ++++++++---------
 design/mvp/canonical-abi/run_tests.py   | 12 +++++------
 5 files changed, 38 insertions(+), 36 deletions(-)

diff --git a/design/mvp/Binary.md b/design/mvp/Binary.md
index 5159216c..508f7faa 100644
--- a/design/mvp/Binary.md
+++ b/design/mvp/Binary.md
@@ -297,15 +297,15 @@ canon    ::= 0x00 0x00 f:<core:funcidx> opts:<opts> ft:<typeidx> => (canon lift
            | 0x0e t:<typeidx>                                    => (canon stream.new t (core func)) ๐Ÿ”€
            | 0x0f t:<typeidx> opts:<opts>                        => (canon stream.read t opts (core func)) ๐Ÿ”€
            | 0x10 t:<typeidx> opts:<opts>                        => (canon stream.write t opts (core func)) ๐Ÿ”€
-           | 0x11 async?:<async?>                                => (canon stream.cancel-read async? (core func)) ๐Ÿ”€
-           | 0x12 async?:<async?>                                => (canon stream.cancel-write async? (core func)) ๐Ÿ”€
+           | 0x11 t:<typeidx> async?:<async?>                    => (canon stream.cancel-read async? (core func)) ๐Ÿ”€
+           | 0x12 t:<typeidx> async?:<async?>                    => (canon stream.cancel-write async? (core func)) ๐Ÿ”€
            | 0x13 t:<typeidx>                                    => (canon stream.close-readable t (core func)) ๐Ÿ”€
            | 0x14 t:<typeidx>                                    => (canon stream.close-writable t (core func)) ๐Ÿ”€
            | 0x15 t:<typeidx>                                    => (canon future.new t (core func)) ๐Ÿ”€
            | 0x16 t:<typeidx> opts:<opts>                        => (canon future.read t opts (core func)) ๐Ÿ”€
            | 0x17 t:<typeidx> opts:<opts>                        => (canon future.write t opts (core func)) ๐Ÿ”€
-           | 0x18 async?:<async?>                                => (canon future.cancel-read async? (core func)) ๐Ÿ”€
-           | 0x19 async?:<async?>                                => (canon future.cancel-write async? (core func)) ๐Ÿ”€
+           | 0x18 t:<typeidx> async?:<async?>                    => (canon future.cancel-read async? (core func)) ๐Ÿ”€
+           | 0x19 t:<typeidx> async?:<async?>                    => (canon future.cancel-write async? (core func)) ๐Ÿ”€
            | 0x1a t:<typeidx>                                    => (canon future.close-readable t (core func)) ๐Ÿ”€
            | 0x1b t:<typeidx>                                    => (canon future.close-writable t (core func)) ๐Ÿ”€
            | 0x1c opts:<opts>                                    => (canon error.new opts (core func)) ๐Ÿ”€
diff --git a/design/mvp/CanonicalABI.md b/design/mvp/CanonicalABI.md
index 6c61fb47..a01dd031 100644
--- a/design/mvp/CanonicalABI.md
+++ b/design/mvp/CanonicalABI.md
@@ -3198,10 +3198,10 @@ an optional last value of the stream or future.
 
 For canonical definitions:
 ```wasm
-(canon stream.cancel-read $async? (core func $f))
-(canon stream.cancel-write $async? (core func $f))
-(canon future.cancel-read $async? (core func $f))
-(canon future.cancel-write $async? (core func $f))
+(canon stream.cancel-read $t $async? (core func $f))
+(canon stream.cancel-write $t $async? (core func $f))
+(canon future.cancel-read $t $async? (core func $f))
+(canon future.cancel-write $t $async? (core func $f))
 ```
 validation specifies:
 * `$f` is given type `(func (param i32) (result i32))`
@@ -3217,22 +3217,23 @@ cancel a `read` or `write` (and regain ownership of the passed buffer) is
 crucial since some languages will need to cancel reading or writing from
 within the synchronous context of a destructor.
 ```python
-async def canon_stream_cancel_read(sync, task, i):
-  return await cancel_async_copy(ReadableStreamHandle, sync, task, i)
+async def canon_stream_cancel_read(t, sync, task, i):
+  return await cancel_async_copy(ReadableStreamHandle, t, sync, task, i)
 
-async def canon_stream_cancel_write(sync, task, i):
-  return await cancel_async_copy(WritableStreamHandle, sync, task, i)
+async def canon_stream_cancel_write(t, sync, task, i):
+  return await cancel_async_copy(WritableStreamHandle, t, sync, task, i)
 
-async def canon_future_cancel_read(sync, task, i):
-  return await cancel_async_copy(ReadableFutureHandle, sync, task, i)
+async def canon_future_cancel_read(t, sync, task, i):
+  return await cancel_async_copy(ReadableFutureHandle, t, sync, task, i)
 
-async def canon_future_cancel_write(sync, task, i):
-  return await cancel_async_copy(WritableFutureHandle, sync, task, i)
+async def canon_future_cancel_write(t, sync, task, i):
+  return await cancel_async_copy(WritableFutureHandle, t, sync, task, i)
 
-async def cancel_async_copy(HandleT, sync, task, i):
+async def cancel_async_copy(HandleT, t, sync, task, i):
   trap_if(not task.inst.may_leave)
   h = task.inst.waitables.get(i)
   trap_if(not isinstance(h, HandleT))
+  trap_if(h.t != t)
   trap_if(not h.copying_buffer)
   if h.stream.closed():
     flat_results = [pack_async_copy_result(task, h.copying_buffer, h)]
diff --git a/design/mvp/Explainer.md b/design/mvp/Explainer.md
index b8256508..608a3eae 100644
--- a/design/mvp/Explainer.md
+++ b/design/mvp/Explainer.md
@@ -1403,15 +1403,15 @@ canon ::= ...
         | (canon stream.new <typeidx> (core func <id>?)) ๐Ÿ”€
         | (canon stream.read <typeidx> <canonopt>* (core func <id>?)) ๐Ÿ”€
         | (canon stream.write <typeidx> <canonopt>* (core func <id>?)) ๐Ÿ”€
-        | (canon stream.cancel-read async? (core func <id>?)) ๐Ÿ”€
-        | (canon stream.cancel-write async? (core func <id>?)) ๐Ÿ”€
+        | (canon stream.cancel-read <typeidx> async? (core func <id>?)) ๐Ÿ”€
+        | (canon stream.cancel-write <typeidx> async? (core func <id>?)) ๐Ÿ”€
         | (canon stream.close-readable <typeidx> (core func <id>?)) ๐Ÿ”€
         | (canon stream.close-writable <typeidx> (core func <id>?)) ๐Ÿ”€
         | (canon future.new <typeidx> (core func <id>?)) ๐Ÿ”€
         | (canon future.read <typeidx> <canonopt>* (core func <id>?)) ๐Ÿ”€
         | (canon future.write <typeidx> <canonopt>* (core func <id>?)) ๐Ÿ”€
-        | (canon future.cancel-read async? (core func <id>?)) ๐Ÿ”€
-        | (canon future.cancel-write async? (core func <id>?)) ๐Ÿ”€
+        | (canon future.cancel-read <typeidx> async? (core func <id>?)) ๐Ÿ”€
+        | (canon future.cancel-write <typeidx> async? (core func <id>?)) ๐Ÿ”€
         | (canon future.close-readable <typeidx> (core func <id>?)) ๐Ÿ”€
         | (canon future.close-writable <typeidx> (core func <id>?)) ๐Ÿ”€
         | (canon error.new <canonopt>* (core func <id>?))
diff --git a/design/mvp/canonical-abi/definitions.py b/design/mvp/canonical-abi/definitions.py
index 78377baf..b4b88995 100644
--- a/design/mvp/canonical-abi/definitions.py
+++ b/design/mvp/canonical-abi/definitions.py
@@ -2029,22 +2029,23 @@ def pack_async_copy_result(task, buffer, h):
 
 ### ๐Ÿ”€ `canon {stream,future}.cancel-{read,write}`
 
-async def canon_stream_cancel_read(sync, task, i):
-  return await cancel_async_copy(ReadableStreamHandle, sync, task, i)
+async def canon_stream_cancel_read(t, sync, task, i):
+  return await cancel_async_copy(ReadableStreamHandle, t, sync, task, i)
 
-async def canon_stream_cancel_write(sync, task, i):
-  return await cancel_async_copy(WritableStreamHandle, sync, task, i)
+async def canon_stream_cancel_write(t, sync, task, i):
+  return await cancel_async_copy(WritableStreamHandle, t, sync, task, i)
 
-async def canon_future_cancel_read(sync, task, i):
-  return await cancel_async_copy(ReadableFutureHandle, sync, task, i)
+async def canon_future_cancel_read(t, sync, task, i):
+  return await cancel_async_copy(ReadableFutureHandle, t, sync, task, i)
 
-async def canon_future_cancel_write(sync, task, i):
-  return await cancel_async_copy(WritableFutureHandle, sync, task, i)
+async def canon_future_cancel_write(t, sync, task, i):
+  return await cancel_async_copy(WritableFutureHandle, t, sync, task, i)
 
-async def cancel_async_copy(HandleT, sync, task, i):
+async def cancel_async_copy(HandleT, t, sync, task, i):
   trap_if(not task.inst.may_leave)
   h = task.inst.waitables.get(i)
   trap_if(not isinstance(h, HandleT))
+  trap_if(h.t != t)
   trap_if(not h.copying_buffer)
   if h.stream.closed():
     flat_results = [pack_async_copy_result(task, h.copying_buffer, h)]
diff --git a/design/mvp/canonical-abi/run_tests.py b/design/mvp/canonical-abi/run_tests.py
index 4e5ef5de..67f3209e 100644
--- a/design/mvp/canonical-abi/run_tests.py
+++ b/design/mvp/canonical-abi/run_tests.py
@@ -1245,7 +1245,7 @@ async def core_func(task, args):
     assert(ret == 0)
     result = int.from_bytes(mem[retp : retp+4], 'little', signed=False)
     assert(result == (wsi | 2**31))
-    [ret] = await canon_stream_cancel_write(True, task, wsi)
+    [ret] = await canon_stream_cancel_write(U8Type(), True, task, wsi)
     assert(ret == 0)
     [] = await canon_stream_close_writable(U8Type(), task, wsi, 0)
     return []
@@ -1523,7 +1523,7 @@ async def core_func(task, args):
     host_sink.set_remain(2)
     got = await host_sink.consume(2)
     assert(got == [0xa, 0xb])
-    [ret] = await canon_stream_cancel_write(True, task, wsi)
+    [ret] = await canon_stream_cancel_write(U8Type(), True, task, wsi)
     assert(ret == 2)
     [] = await canon_stream_close_writable(U8Type(), task, wsi, 0)
     host_sink.set_remain(100)
@@ -1538,7 +1538,7 @@ async def core_func(task, args):
     host_sink.set_remain(2)
     got = await host_sink.consume(2)
     assert(got == [1, 2])
-    [ret] = await canon_stream_cancel_write(False, task, wsi)
+    [ret] = await canon_stream_cancel_write(U8Type(), False, task, wsi)
     assert(ret == 2)
     [] = await canon_stream_close_writable(U8Type(), task, wsi, 0)
     host_sink.set_remain(100)
@@ -1550,7 +1550,7 @@ async def core_func(task, args):
     rsi = mem[retp]
     [ret] = await canon_stream_read(U8Type(), lower_opts, task, rsi, 0, 4)
     assert(ret == definitions.BLOCKED)
-    [ret] = await canon_stream_cancel_read(True, task, rsi)
+    [ret] = await canon_stream_cancel_read(U8Type(), True, task, rsi)
     assert(ret == 0)
     [] = await canon_stream_close_readable(U8Type(), task, rsi)
 
@@ -1561,7 +1561,7 @@ async def core_func(task, args):
     [ret] = await canon_stream_read(U8Type(), lower_opts, task, rsi, 0, 4)
     assert(ret == definitions.BLOCKED)
     host_source.eager_cancel.clear()
-    [ret] = await canon_stream_cancel_read(False, task, rsi)
+    [ret] = await canon_stream_cancel_read(U8Type(), False, task, rsi)
     assert(ret == definitions.BLOCKED)
     host_source.write([7,8])
     await asyncio.sleep(0)
@@ -1682,7 +1682,7 @@ async def core_func(task, args):
     assert(p1 == subi)
 
     await task.yield_(sync = False)
-    [ret] = await canon_future_cancel_read(True, task, rfi)
+    [ret] = await canon_future_cancel_read(U8Type(), True, task, rfi)
     assert(ret == 1)
     assert(mem[readp] == 43)
 

From e8d192ebdcf1665d9ae0987ece96cbdfa2963cf9 Mon Sep 17 00:00:00 2001
From: Luke Wagner <mail@lukewagner.name>
Date: Tue, 5 Nov 2024 15:18:50 -0600
Subject: [PATCH 22/22] Rename 'error' to 'error-context'

---
 design/mvp/Async.md                     |  13 +-
 design/mvp/Binary.md                    |   8 +-
 design/mvp/CanonicalABI.md              | 186 ++++++++++++------------
 design/mvp/Explainer.md                 |  66 +++++----
 design/mvp/canonical-abi/definitions.py | 106 +++++++-------
 design/mvp/canonical-abi/run_tests.py   |  18 +--
 6 files changed, 205 insertions(+), 192 deletions(-)

diff --git a/design/mvp/Async.md b/design/mvp/Async.md
index 402e3b43..530071a5 100644
--- a/design/mvp/Async.md
+++ b/design/mvp/Async.md
@@ -233,9 +233,9 @@ g: func(s: stream<T>) -> stuff;
 ```
 `g(f(x))` works as you might hope, concurrently streaming `x` into `f` which
 concurrently streams its results into `g`. If `f` has an error, it can close
-its returned `stream<T>` with an [`error`](Explainer.md#error-type) value
-which `g` will receive along with the notification that its readable stream
-was closed.
+its returned `stream<T>` with an [`error-context`](Explainer.md#error-context-type)
+value which `g` will receive along with the notification that its readable
+stream was closed.
 
 If a component instance *would* receive the readable end of a stream for which
 it already owns the writable end, the readable end disappears and the existing
@@ -516,7 +516,8 @@ For now, this remains a [TODO](#todo) and validation will reject `async`-lifted
 
 Native async support is being proposed incrementally. The following features
 will be added in future chunks roughly in the order list to complete the full
-"async" story:
+"async" story, with a TBD cutoff between what's in [WASI Preview 3] and what
+comes after:
 * `nonblocking` function type attribute: allow a function to declare in its
   type that it will not transitively do anything blocking
 * define what `async` means for `start` functions (top-level await + background
@@ -529,6 +530,8 @@ will be added in future chunks roughly in the order list to complete the full
 * some way to say "no more elements are coming for a while"
 * `recursive` function type attribute: allow a function to be reentered
   recursively (instead of trapping) and link inner and outer activations
+* add `stringstream` specialization of `stream<char>` (just like `string` is
+  a specialization of `list<char>`)
 * allow pipelining multiple `stream.read`/`write` calls
 * allow chaining multiple async calls together ("promise pipelining")
 * integrate with `shared`: define how to lift and lower functions `async` *and*
@@ -572,3 +575,5 @@ will be added in future chunks roughly in the order list to complete the full
 [stack-switching]: https://github.com/WebAssembly/stack-switching/
 [JSPI]: https://github.com/WebAssembly/js-promise-integration/
 [shared-everything-threads]: https://github.com/webAssembly/shared-everything-threads
+
+[WASI Preview 3]: https://github.com/WebAssembly/WASI/tree/main/wasip2#looking-forward-to-preview-3
diff --git a/design/mvp/Binary.md b/design/mvp/Binary.md
index 508f7faa..5bc7e716 100644
--- a/design/mvp/Binary.md
+++ b/design/mvp/Binary.md
@@ -190,7 +190,7 @@ primvaltype   ::= 0x7f                                    => bool
                 | 0x75                                    => f64
                 | 0x74                                    => char
                 | 0x73                                    => string
-                | 0x64                                    => error
+                | 0x64                                    => error-context
 defvaltype    ::= pvt:<primvaltype>                       => pvt
                 | 0x72 lt*:vec(<labelvaltype>)            => (record (field lt)*)    (if |lt*| > 0)
                 | 0x71 case*:vec(<case>)                  => (variant case+) (if |case*| > 0)
@@ -308,9 +308,9 @@ canon    ::= 0x00 0x00 f:<core:funcidx> opts:<opts> ft:<typeidx> => (canon lift
            | 0x19 t:<typeidx> async?:<async?>                    => (canon future.cancel-write async? (core func)) ๐Ÿ”€
            | 0x1a t:<typeidx>                                    => (canon future.close-readable t (core func)) ๐Ÿ”€
            | 0x1b t:<typeidx>                                    => (canon future.close-writable t (core func)) ๐Ÿ”€
-           | 0x1c opts:<opts>                                    => (canon error.new opts (core func)) ๐Ÿ”€
-           | 0x1d opts:<opts>                                    => (canon error.debug-message opts (core func)) ๐Ÿ”€
-           | 0x1e                                                => (canon error.drop (core func)) ๐Ÿ”€
+           | 0x1c opts:<opts>                                    => (canon error-context.new opts (core func)) ๐Ÿ”€
+           | 0x1d opts:<opts>                                    => (canon error-context.debug-message opts (core func)) ๐Ÿ”€
+           | 0x1e                                                => (canon error-context.drop (core func)) ๐Ÿ”€
 async?   ::= 0x00                                                =>
            | 0x01                                                => async
 opts     ::= opt*:vec(<canonopt>)                                => opt*
diff --git a/design/mvp/CanonicalABI.md b/design/mvp/CanonicalABI.md
index a01dd031..cf7755a8 100644
--- a/design/mvp/CanonicalABI.md
+++ b/design/mvp/CanonicalABI.md
@@ -41,9 +41,9 @@ being specified here.
   * [`canon {stream,future}.{read,write}`](#-canon-streamfuturereadwrite) ๐Ÿ”€
   * [`canon {stream,future}.cancel-{read,write}`](#-canon-streamfuturecancel-readwrite) ๐Ÿ”€
   * [`canon {stream,future}.close-{readable,writable}`](#-canon-streamfutureclose-readablewritable) ๐Ÿ”€
-  * [`canon error.new`](#-canon-errornew) ๐Ÿ”€
-  * [`canon error.debug-message`](#-canon-errordebug-message) ๐Ÿ”€
-  * [`canon error.drop`](#-canon-errordrop) ๐Ÿ”€
+  * [`canon error-context.new`](#-canon-error-contextnew) ๐Ÿ”€
+  * [`canon error-context.debug-message`](#-canon-error-contextdebug-message) ๐Ÿ”€
+  * [`canon error-context.drop`](#-canon-error-contextdrop) ๐Ÿ”€
 
 
 ## Supporting definitions
@@ -151,7 +151,7 @@ state that `canon`-generated functions use to maintain component invariants.
 class ComponentInstance:
   resources: ResourceTables
   waitables: Table[Subtask|StreamHandle|FutureHandle]
-  errors: Table[Error]
+  error_contexts: Table[ErrorContext]
   num_tasks: int
   may_leave: bool
   backpressure: bool
@@ -162,7 +162,7 @@ class ComponentInstance:
   def __init__(self):
     self.resources = ResourceTables()
     self.waitables = Table[Subtask|StreamHandle|FutureHandle]()
-    self.errors = Table[Error]()
+    self.error_contexts = Table[ErrorContext]()
     self.num_tasks = 0
     self.may_leave = True
     self.backpressure = False
@@ -824,7 +824,7 @@ class WritableBuffer(Buffer):
 
 class ReadableStream:
   closed: Callable[[], bool]
-  closed_with_error: Callable[[], Optional[Error]]
+  closed_with_error: Callable[[], Optional[ErrorContext]]
   read: Callable[[WritableBuffer, OnBlockCallback], Awaitable]
   cancel_read: Callable[[WritableBuffer, OnBlockCallback], Awaitable]
   close: Callable[[]]
@@ -845,7 +845,7 @@ Going through the methods in these interfaces:
 * `close` may only be called if there is no active `read` and leaves the stream
   `closed` without possibility of blocking.
 * `closed_with_error` may only be called if `closed` has returned `True` and
-  returns an optional `Error` (defined below) that the writable end was
+  returns an optional `ErrorContext` (defined below) that the writable end was
   closed with.
 
 The abstract `WritableBuffer` interface is implemented by the
@@ -909,14 +909,14 @@ that `dst.lower(src.lift(...))` is meant to be fused into a single copy from
 class ReadableStreamGuestImpl(ReadableStream):
   impl: ComponentInstance
   is_closed: bool
-  error: Optional[Error]
+  errctx: Optional[ErrorContext]
   other_buffer: Optional[Buffer]
   other_future: Optional[asyncio.Future]
 
   def __init__(self, inst):
     self.impl = inst
     self.is_closed = False
-    self.error = None
+    self.errctx = None
     self.other_buffer = None
     self.other_future = None
 
@@ -924,7 +924,7 @@ class ReadableStreamGuestImpl(ReadableStream):
     return self.is_closed
   def closed_with_error(self):
     assert(self.is_closed)
-    return self.error
+    return self.errctx
 
   async def read(self, dst, on_block):
     await self.rendezvous(dst, self.other_buffer, dst, on_block)
@@ -1005,11 +1005,11 @@ of a stream (below), in which case all the above logic applies, but in the
 opposite direction. Thus, there is only a single direction-agnostic `close`
 that is shared by both the reader and writer ends.
 ```python
-  def close(self, error = None):
+  def close(self, errctx = None):
     if not self.is_closed:
-      assert(not self.error)
+      assert(not self.errctx)
       self.is_closed = True
-      self.error = error
+      self.errctx = errctx
       self.other_buffer = None
       if self.other_future:
         self.other_future.set_result(None)
@@ -1017,12 +1017,12 @@ that is shared by both the reader and writer ends.
     else:
       assert(not self.other_buffer and not self.other_future)
 ```
-Note that when called via the `ReadableStream` abstract interface, `error` is
-necessarily `None`, whereas if called from the writer end, `error` may or may
-not be an `Error`. In the special case that the writer end passes a non-`None`
-error and the stream has already been closed by the reader end, the `Error` is
-dropped, since the reader has already racily cancelled the stream and has no
-way to see the `Error`.
+Note that when called via the `ReadableStream` abstract interface, `errctx` is
+necessarily `None`, whereas if called from the writer end, `errctx` may or may
+not be an `ErrorContext`. In the special case that the writer end passes a
+non-`None` error context and the stream has already been closed by the reader
+end, the `ErrorContext` is dropped, since the reader has already racily
+cancelled the stream and has no way to see the `ErrorContext`.
 
 The [readable and writable ends] of a stream are stored as `StreamHandle`
 objects in the component instance's `waitables` table. Both ends of a stream
@@ -1064,9 +1064,9 @@ class StreamHandle:
     self.copying_task = None
     self.copying_buffer = None
 
-  def drop(self, error):
+  def drop(self, errctx):
     trap_if(self.copying_buffer)
-    self.stream.close(error)
+    self.stream.close(errctx)
     if isinstance(self.borrow_scope, Task):
       self.borrow_scope.todo -= 1
 ```
@@ -1141,14 +1141,14 @@ class WritableFutureHandle(FutureHandle):
     if src.remain() == 0:
       self.stream.close()
 
-  def drop(self, error):
-    trap_if(not self.stream.closed() and not error)
-    FutureHandle.drop(self, error)
+  def drop(self, errctx):
+    trap_if(not self.stream.closed() and not errctx)
+    FutureHandle.drop(self, errctx)
 ```
 The overridden `WritableFutureHandle.drop` method traps if the future value has
-not already been written and the future is not being closed with an `error`.
-Thus, a future must either have a single value successfully copied from the
-writer to the reader xor be closed with an `error`.
+not already been written and the future is not being closed with an
+`error-context`. Thus, a future must either have a single value successfully
+copied from the writer to the reader xor be closed with an `error-context`.
 
 ### Despecialization
 
@@ -1219,7 +1219,7 @@ def alignment(t):
     case F64Type()                   : return 8
     case CharType()                  : return 4
     case StringType()                : return 4
-    case ErrorType()                 : return 4
+    case ErrorContextType()          : return 4
     case ListType(t, l)              : return alignment_list(t, l)
     case RecordType(fields)          : return alignment_record(fields)
     case VariantType(cases)          : return alignment_variant(cases)
@@ -1309,7 +1309,7 @@ def elem_size(t):
     case F64Type()                   : return 8
     case CharType()                  : return 4
     case StringType()                : return 8
-    case ErrorType()                 : return 4
+    case ErrorContextType()          : return 4
     case ListType(t, l)              : return elem_size_list(t, l)
     case RecordType(fields)          : return elem_size_record(fields)
     case VariantType(cases)          : return elem_size_variant(cases)
@@ -1375,7 +1375,7 @@ def load(cx, ptr, t):
     case F64Type()          : return decode_i64_as_float(load_int(cx, ptr, 8))
     case CharType()         : return convert_i32_to_char(cx, load_int(cx, ptr, 4))
     case StringType()       : return load_string(cx, ptr)
-    case ErrorType()        : return lift_error(cx, load_int(cx, ptr, 4))
+    case ErrorContextType() : return lift_error_context(cx, load_int(cx, ptr, 4))
     case ListType(t, l)     : return load_list(cx, ptr, t, l)
     case RecordType(fields) : return load_record(cx, ptr, fields)
     case VariantType(cases) : return load_variant(cx, ptr, cases)
@@ -1498,11 +1498,11 @@ def load_string_from_range(cx, ptr, tagged_code_units) -> String:
   return (s, cx.opts.string_encoding, tagged_code_units)
 ```
 
-Error values are lifted directly from the per-component-instance `errors`
-table:
+Error context values are lifted directly from the per-component-instance
+`error_contexts` table:
 ```python
-def lift_error(cx, i):
-  return cx.inst.errors.get(i)
+def lift_error_context(cx, i):
+  return cx.inst.error_contexts.get(i)
 ```
 
 Lists and records are loaded by recursively loading their elements/fields:
@@ -1694,7 +1694,7 @@ def store(cx, v, t, ptr):
     case F64Type()          : store_int(cx, encode_float_as_i64(v), ptr, 8)
     case CharType()         : store_int(cx, char_to_i32(v), ptr, 4)
     case StringType()       : store_string(cx, v, ptr)
-    case ErrorType()        : store_int(cx, lower_error(cx, v), ptr, 4)
+    case ErrorContextType() : store_int(cx, lower_error_context(cx, v), ptr, 4)
     case ListType(t, l)     : store_list(cx, v, ptr, t, l)
     case RecordType(fields) : store_record(cx, v, ptr, fields)
     case VariantType(cases) : store_variant(cx, v, ptr, cases)
@@ -1981,11 +1981,12 @@ def store_probably_utf16_to_latin1_or_utf16(cx, src, src_code_units):
   return (ptr, latin1_size)
 ```
 
-Error values are lowered by storing them directly into the
-per-component-instance `errors` table and passing the `i32` index to wasm.
+Error context values are lowered by storing them directly into the
+per-component-instance `error_contexts` table and passing the `i32` index to
+wasm.
 ```python
-def lower_error(cx, v):
-  return cx.inst.errors.add(v)
+def lower_error_context(cx, v):
+  return cx.inst.error_contexts.add(v)
 ```
 
 Lists and records are stored by recursively storing their elements and
@@ -2226,7 +2227,7 @@ def flatten_type(t):
     case F64Type()                        : return ['f64']
     case CharType()                       : return ['i32']
     case StringType()                     : return ['i32', 'i32']
-    case ErrorType()                      : return ['i32']
+    case ErrorContextType()               : return ['i32']
     case ListType(t, l)                   : return flatten_list(t, l)
     case RecordType(fields)               : return flatten_record(fields)
     case VariantType(cases)               : return flatten_variant(cases)
@@ -2321,7 +2322,7 @@ def lift_flat(cx, vi, t):
     case F64Type()          : return canonicalize_nan64(vi.next('f64'))
     case CharType()         : return convert_i32_to_char(cx, vi.next('i32'))
     case StringType()       : return lift_flat_string(cx, vi)
-    case ErrorType()        : return lift_error(cx, vi.next('i32'))
+    case ErrorContextType() : return lift_error_context(cx, vi.next('i32'))
     case ListType(t, l)     : return lift_flat_list(cx, vi, t, l)
     case RecordType(fields) : return lift_flat_record(cx, vi, fields)
     case VariantType(cases) : return lift_flat_variant(cx, vi, cases)
@@ -2450,7 +2451,7 @@ def lower_flat(cx, v, t):
     case F64Type()          : return [maybe_scramble_nan64(v)]
     case CharType()         : return [char_to_i32(v)]
     case StringType()       : return lower_flat_string(cx, v)
-    case ErrorType()        : return lower_error(cx, v)
+    case ErrorContextType() : return lower_error_context(cx, v)
     case ListType(t, l)     : return lower_flat_list(cx, v, t, l)
     case RecordType(fields) : return lower_flat_record(cx, v, fields)
     case VariantType(cases) : return lower_flat_variant(cx, v, cases)
@@ -3177,22 +3178,22 @@ def pack_async_copy_result(task, buffer, h):
     assert(not (buffer.progress & CLOSED))
     return buffer.progress
   elif h.stream.closed():
-    if (error := h.stream.closed_with_error()):
+    if (errctx := h.stream.closed_with_error()):
       assert(isinstance(h, ReadableStreamHandle|ReadableFutureHandle))
-      errori = task.inst.errors.add(error)
-      assert(errori != 0)
+      errctxi = task.inst.error_contexts.add(errctx)
+      assert(errctxi != 0)
     else:
-      errori = 0
-    assert(errori <= Table.MAX_LENGTH < BLOCKED)
-    assert(not (errori & CLOSED))
-    return errori | CLOSED
+      errctxi = 0
+    assert(errctxi <= Table.MAX_LENGTH < BLOCKED)
+    assert(not (errctxi & CLOSED))
+    return errctxi | CLOSED
   else:
     return 0
 ```
-Note that `error`s are only possible on the *readable* end of a stream or
-future (since, as defined below, only the *writable* end can close the stream
-with an `error`). Thus, `error`s only flow in the same direction as values, as
-an optional last value of the stream or future.
+Note that `error-context`s are only possible on the *readable* end of a stream
+or future (since, as defined below, only the *writable* end can close the
+stream with an `error-context`). Thus, `error-context`s only flow in the same
+direction as values, as an optional last value of the stream or future.
 
 ### ๐Ÿ”€ `canon {stream,future}.cancel-{read,write}`
 
@@ -3285,49 +3286,50 @@ performing the guards and bookkeeping defined by
 async def canon_stream_close_readable(t, task, i):
   return await close_async_value(ReadableStreamHandle, t, task, i, 0)
 
-async def canon_stream_close_writable(t, task, hi, errori):
-  return await close_async_value(WritableStreamHandle, t, task, hi, errori)
+async def canon_stream_close_writable(t, task, hi, errctxi):
+  return await close_async_value(WritableStreamHandle, t, task, hi, errctxi)
 
 async def canon_future_close_readable(t, task, i):
   return await close_async_value(ReadableFutureHandle, t, task, i, 0)
 
-async def canon_future_close_writable(t, task, hi, errori):
-  return await close_async_value(WritableFutureHandle, t, task, hi, errori)
+async def canon_future_close_writable(t, task, hi, errctxi):
+  return await close_async_value(WritableFutureHandle, t, task, hi, errctxi)
 
-async def close_async_value(HandleT, t, task, hi, errori):
+async def close_async_value(HandleT, t, task, hi, errctxi):
   trap_if(not task.inst.may_leave)
   h = task.inst.waitables.remove(hi)
-  if errori == 0:
-    error = None
+  if errctxi == 0:
+    errctx = None
   else:
-    error = task.inst.errors.get(errori)
+    errctx = task.inst.error_contexts.get(errctxi)
   trap_if(not isinstance(h, HandleT))
   trap_if(h.t != t)
-  h.drop(error)
+  h.drop(errctx)
   return []
 ```
 Note that only the writable ends of streams and futures can be closed with a
-final `error` value and thus `error`s only flow in the same direction as
-values as an optional last value of the stream or future.
+final `error-context` value and thus `error-context`s only flow in the same
+direction as values as an optional last value of the stream or future.
 
-### ๐Ÿ”€ `canon error.new`
+### ๐Ÿ”€ `canon error-context.new`
 
 For a canonical definition:
 ```wasm
-(canon error.new $opts (core func $f))
+(canon error-context.new $opts (core func $f))
 ```
 validation specifies:
 * `$f` is given type `(func (param i32 i32) (result i32))`
 
 Calling `$f` calls the following function which uses the `$opts` immediate to
-(non-deterministically) lift the debug message, create a new `Error` value,
-store it in the per-component-instance `errors` table and returns its index.
+(non-deterministically) lift the debug message, create a new `ErrorContext`
+value, store it in the per-component-instance `error_contexts` table and
+returns its index.
 ```python
 @dataclass
-class Error:
+class ErrorContext:
   debug_message: String
 
-async def canon_error_new(opts, task, ptr, tagged_code_units):
+async def canon_error_context_new(opts, task, ptr, tagged_code_units):
   trap_if(not task.inst.may_leave)
   if DETERMINISTIC_PROFILE or random.randint(0,1):
     s = String(('', 'utf8', 0))
@@ -3335,58 +3337,58 @@ async def canon_error_new(opts, task, ptr, tagged_code_units):
     cx = LiftLowerContext(opts, task.inst)
     s = load_string_from_range(cx, ptr, tagged_code_units)
     s = host_defined_transformation(s)
-  i = task.inst.errors.add(Error(s))
+  i = task.inst.error_contexts.add(ErrorContext(s))
   return [i]
 ```
-Supporting the requirement (introduced in the [explainer](Explainer.md#error-type))
-that wasm code does not depend on the contents of `error` values for
-behavioral correctness, the debug message is completely discarded
-non-deterministically or, in the deterministic profile, always. Importantly
-(for performance), when the debug message is discarded, it is not even lifted
-and thus the O(N) well-formedness conditions are not checked. (Note that
-`host_defined_transformation` is not defined by the Canonical ABI and stands
-for an arbitrary host-defined function.)
+Supporting the requirement (introduced in the
+[explainer](Explainer.md#error-context-type)) that wasm code does not depend on
+the contents of `error-context` values for behavioral correctness, the debug
+message is completely discarded non-deterministically or, in the deterministic
+profile, always. Importantly (for performance), when the debug message is
+discarded, it is not even lifted and thus the O(N) well-formedness conditions
+are not checked. (Note that `host_defined_transformation` is not defined by the
+Canonical ABI and stands for an arbitrary host-defined function.)
 
-### ๐Ÿ”€ `canon error.debug-message`
+### ๐Ÿ”€ `canon error-context.debug-message`
 
 For a canonical definition:
 ```wasm
-(canon error.debug-message $opts (core func $f))
+(canon error-context.debug-message $opts (core func $f))
 ```
 validation specifies:
 * `$f` is given type `(func (param i32 i32))`
 
 Calling `$f` calls the following function which uses the `$opts` immediate to
-lowers the `Error`'s contained debug message. While *producing* an `error`
+lowers the `ErrorContext`'s debug message. While *producing* an `error-context`
 value may non-deterministically discard or transform the debug message, a
-single `error` value must return the same debug message from
+single `error-context` value must return the same debug message from
 `error.debug-message` over time.
 ```python
-async def canon_error_debug_message(opts, task, i, ptr):
+async def canon_error_context_debug_message(opts, task, i, ptr):
   trap_if(not task.inst.may_leave)
-  error = task.inst.errors.get(i)
+  errctx = task.inst.error_contexts.get(i)
   cx = LiftLowerContext(opts, task.inst)
-  store_string(cx, error.debug_message, ptr)
+  store_string(cx, errctx.debug_message, ptr)
   return []
 ```
 Note that `ptr` points to an 8-byte region of memory into which will be stored
 the pointer and length of the debug string (allocated via `opts.realloc`).
 
-### ๐Ÿ”€ `canon error.drop`
+### ๐Ÿ”€ `canon error-context.drop`
 
 For a canonical definition:
 ```wasm
-(canon error.drop (core func $f))
+(canon error-context.drop (core func $f))
 ```
 validation specifies:
 * `$f` is given type `(func (param i32))`
 
-Calling `$f` calls the following function, which drops the `error` value from
-the current component instance's `errors` table.
+Calling `$f` calls the following function, which drops the `error-context`
+value from the current component instance's `error_contexts` table.
 ```python
-async def canon_error_drop(task, i):
+async def canon_error_context_drop(task, i):
   trap_if(not task.inst.may_leave)
-  task.inst.errors.remove(i)
+  task.inst.error_contexts.remove(i)
   return []
 ```
 
diff --git a/design/mvp/Explainer.md b/design/mvp/Explainer.md
index 608a3eae..c29f77c8 100644
--- a/design/mvp/Explainer.md
+++ b/design/mvp/Explainer.md
@@ -14,7 +14,7 @@ more user-focused explanation, take a look at the
   * [Type definitions](#type-definitions)
     * [Fundamental value types](#fundamental-value-types)
       * [Numeric types](#numeric-types)
-      * [Error type](#error-type)
+      * [Error Context type](#error-context-type)
       * [Container types](#container-types)
       * [Handle types](#handle-types)
       * [Asynchronous value types](#asynchronous-value-types)
@@ -27,7 +27,7 @@ more user-focused explanation, take a look at the
     * [Canonical built-ins](#canonical-built-ins)
       * [Resource built-ins](#resource-built-ins)
       * [Async built-ins](#-async-built-ins)
-      * [Error built-ins](#-error-built-ins)
+      * [Error Context built-ins](#-error-context-built-ins)
       * [Threading built-ins](#-threading-built-ins)
   * [Value definitions](#-value-definitions)
   * [Start definitions](#-start-definitions)
@@ -543,7 +543,7 @@ defvaltype    ::= bool
                 | s8 | u8 | s16 | u16 | s32 | u32 | s64 | u64
                 | f32 | f64
                 | char | string
-                | error
+                | error-context
                 | (record (field "<label>" <valtype>)+)
                 | (variant (case "<label>" <valtype>?)+)
                 | (list <valtype>)
@@ -602,7 +602,7 @@ sets of abstract values:
 | `u8`, `u16`, `u32`, `u64` | integers in the range [0, 2<sup>N</sup>-1] |
 | `f32`, `f64`              | [IEEE754] floating-point numbers, with a single NaN value |
 | `char`                    | [Unicode Scalar Values] |
-| `error`                   | an immutable, non-deterministic, host-defined value meant to aid in debugging |
+| `error-context`           | an immutable, non-deterministic, host-defined value meant to aid in debugging |
 | `record`                  | heterogeneous [tuples] of named values |
 | `variant`                 | heterogeneous [tagged unions] of named values |
 | `list`                    | homogeneous, variable- or fixed-length [sequences] of values |
@@ -634,28 +634,32 @@ a single NaN value. And boolean values in core wasm are usually represented as
 `i32`s where operations interpret all-zeros as `false`, while at the
 component-level there is a `bool` type with `true` and `false` values.
 
-##### Error type
+##### Error Context type
 
-Values of `error` type are immutable, non-deterministic, host-defined and
-meant to be propagated from failure sources to callers in order to aid in
-debugging. Currently `error` values contain only a "debug message" string whose
-contents are determined by the host. Core wasm can create `error` values given
-a debug string, but the host is free to arbitrarily transform (discard,
-preserve, prefix or suffix) this wasm-provided string.
+Values of `error-context` type are immutable, non-deterministic, host-defined
+and meant to be propagated from failure sources to callers in order to aid in
+debugging. Currently `error-context` values contain only a "debug message"
+string whose contents are determined by the host. Core wasm can create
+`error-context` values given a debug string, but the host is free to
+arbitrarily transform (discard, preserve, prefix or suffix) this
+wasm-provided string. In the future, `error-context` could be enhanced with
+other additional or more-structured context (like a backtrace or a chain of
+originating error contexts).
 
 The intention of this highly-non-deterministic semantics is to provide hosts
 the full range of flexibility to:
 * append a basic callstack suitable for forensic debugging in production;
 * optimize for performance in high-volume production scenarios by slicing or
-  discarding error messages;
+  discarding debug messages;
 * optimize for developer experience in debugging scenarios when debug metadata
   is present by appending expensive-to-produce symbolicated callstacks.
 
 A consequence of this, however, is that components *must not* depend on the
-contents of `error` values for behavioral correctness. In particular, case
-analysis of the contents of an `error` should not determine *error receovery*;
-explicit `result` or `variant` types must be used in the function return
-type instead (e.g., `(func (result (tuple (stream u8) (future $my-error)))`).
+contents of `error-context` values for behavioral correctness. In particular,
+case analysis of the contents of an `error-context` should not determine
+*error receovery*; explicit `result` or `variant` types must be used in the
+function return type instead (e.g.,
+`(func (result (tuple (stream u8) (future $my-error)))`).
 
 ##### Container types
 
@@ -1414,9 +1418,9 @@ canon ::= ...
         | (canon future.cancel-write <typeidx> async? (core func <id>?)) ๐Ÿ”€
         | (canon future.close-readable <typeidx> (core func <id>?)) ๐Ÿ”€
         | (canon future.close-writable <typeidx> (core func <id>?)) ๐Ÿ”€
-        | (canon error.new <canonopt>* (core func <id>?))
-        | (canon error.debug-message <canonopt>* (core func <id>?))
-        | (canon error.drop (core func <id>?))
+        | (canon error-context.new <canonopt>* (core func <id>?))
+        | (canon error-context.debug-message <canonopt>* (core func <id>?))
+        | (canon error-context.drop (core func <id>?))
         | (canon thread.spawn <typeidx> (core func <id>?)) ๐Ÿงต
         | (canon thread.hw_concurrency (core func <id>?)) ๐Ÿงต
 ```
@@ -1544,20 +1548,22 @@ from the current component instance's [waitables](Async.md#waiting) table,
 trapping if the stream or future has a mismatched direction or type or are in
 the middle of a `read` or `write`.
 
-##### ๐Ÿ”€ Error built-ins
+##### ๐Ÿ”€ Error Context built-ins
 
-The `error.new` built-in has type `[ptr:i32 len:i32] -> [i32]` and returns
-the index of a new `error` value in a per-component-instance table of errors.
-The given (`ptr`, `length`) pair are non-deterministically lifted and
-transformed to produce the `error`'s internal [debug message](#error-type).
+The `error-context.new` built-in has type `[ptr:i32 len:i32] -> [i32]` and
+returns the index of a new `error-context` value in a per-component-instance
+table. The given (`ptr`, `length`) pair are non-deterministically lifted and
+transformed to produce the `error-context`'s internal
+[debug message](#error-context-type).
 
-The `error.debug-message` built-in has type `[error:i32 ptr:i32] -> []`
-and writes the [debug message](#error-type) of the given `error` into `ptr`
-as an 8-byte (`ptr`, `length`) pair, according to the Canonical ABI for
-`string` given the `<canonopt>*` immediates.
+The `error-context.debug-message` built-in has type
+`[errctxi:i32 ptr:i32] -> []` and writes the [debug message](#error-context-type)
+of the given `error-context` into `ptr` as an 8-byte (`ptr`, `length`) pair,
+according to the Canonical ABI for `string`, given the `<canonopt>*`
+immediates.
 
-The `error.drop` built-in has type `[error:i32] -> []` and drops the given
-`error` value from the component instance's table.
+The `error-context.drop` built-in has type `[errctxi:i32] -> []` and drops the
+given `error-context` value from the component instance's table.
 
 ##### ๐Ÿงต Threading built-ins
 
diff --git a/design/mvp/canonical-abi/definitions.py b/design/mvp/canonical-abi/definitions.py
index b4b88995..6ce38929 100644
--- a/design/mvp/canonical-abi/definitions.py
+++ b/design/mvp/canonical-abi/definitions.py
@@ -129,7 +129,7 @@ class F32Type(PrimValType): pass
 class F64Type(PrimValType): pass
 class CharType(PrimValType): pass
 class StringType(PrimValType): pass
-class ErrorType(ValType): pass
+class ErrorContextType(ValType): pass
 
 @dataclass
 class ListType(ValType):
@@ -222,7 +222,7 @@ class CanonicalOptions:
 class ComponentInstance:
   resources: ResourceTables
   waitables: Table[Subtask|StreamHandle|FutureHandle]
-  errors: Table[Error]
+  error_contexts: Table[ErrorContext]
   num_tasks: int
   may_leave: bool
   backpressure: bool
@@ -233,7 +233,7 @@ class ComponentInstance:
   def __init__(self):
     self.resources = ResourceTables()
     self.waitables = Table[Subtask|StreamHandle|FutureHandle]()
-    self.errors = Table[Error]()
+    self.error_contexts = Table[ErrorContext]()
     self.num_tasks = 0
     self.may_leave = True
     self.backpressure = False
@@ -596,7 +596,7 @@ class WritableBuffer(Buffer):
 
 class ReadableStream:
   closed: Callable[[], bool]
-  closed_with_error: Callable[[], Optional[Error]]
+  closed_with_error: Callable[[], Optional[ErrorContext]]
   read: Callable[[WritableBuffer, OnBlockCallback], Awaitable]
   cancel_read: Callable[[WritableBuffer, OnBlockCallback], Awaitable]
   close: Callable[[]]
@@ -639,14 +639,14 @@ def lower(self, vs):
 class ReadableStreamGuestImpl(ReadableStream):
   impl: ComponentInstance
   is_closed: bool
-  error: Optional[Error]
+  errctx: Optional[ErrorContext]
   other_buffer: Optional[Buffer]
   other_future: Optional[asyncio.Future]
 
   def __init__(self, inst):
     self.impl = inst
     self.is_closed = False
-    self.error = None
+    self.errctx = None
     self.other_buffer = None
     self.other_future = None
 
@@ -654,7 +654,7 @@ def closed(self):
     return self.is_closed
   def closed_with_error(self):
     assert(self.is_closed)
-    return self.error
+    return self.errctx
 
   async def read(self, dst, on_block):
     await self.rendezvous(dst, self.other_buffer, dst, on_block)
@@ -693,11 +693,11 @@ async def cancel_rendezvous(self, this_buffer, on_block):
         self.other_future.set_result(None)
         self.other_future = None
 
-  def close(self, error = None):
+  def close(self, errctx = None):
     if not self.is_closed:
-      assert(not self.error)
+      assert(not self.errctx)
       self.is_closed = True
-      self.error = error
+      self.errctx = errctx
       self.other_buffer = None
       if self.other_future:
         self.other_future.set_result(None)
@@ -733,9 +733,9 @@ def stop_copying(self):
     self.copying_task = None
     self.copying_buffer = None
 
-  def drop(self, error):
+  def drop(self, errctx):
     trap_if(self.copying_buffer)
-    self.stream.close(error)
+    self.stream.close(errctx)
     if isinstance(self.borrow_scope, Task):
       self.borrow_scope.todo -= 1
 
@@ -784,9 +784,9 @@ async def cancel_copy(self, src, on_block):
     if src.remain() == 0:
       self.stream.close()
 
-  def drop(self, error):
-    trap_if(not self.stream.closed() and not error)
-    FutureHandle.drop(self, error)
+  def drop(self, errctx):
+    trap_if(not self.stream.closed() and not errctx)
+    FutureHandle.drop(self, errctx)
 
 ### Despecialization
 
@@ -838,7 +838,7 @@ def alignment(t):
     case F64Type()                   : return 8
     case CharType()                  : return 4
     case StringType()                : return 4
-    case ErrorType()                 : return 4
+    case ErrorContextType()          : return 4
     case ListType(t, l)              : return alignment_list(t, l)
     case RecordType(fields)          : return alignment_record(fields)
     case VariantType(cases)          : return alignment_variant(cases)
@@ -896,7 +896,7 @@ def elem_size(t):
     case F64Type()                   : return 8
     case CharType()                  : return 4
     case StringType()                : return 8
-    case ErrorType()                 : return 4
+    case ErrorContextType()          : return 4
     case ListType(t, l)              : return elem_size_list(t, l)
     case RecordType(fields)          : return elem_size_record(fields)
     case VariantType(cases)          : return elem_size_variant(cases)
@@ -956,7 +956,7 @@ def load(cx, ptr, t):
     case F64Type()          : return decode_i64_as_float(load_int(cx, ptr, 8))
     case CharType()         : return convert_i32_to_char(cx, load_int(cx, ptr, 4))
     case StringType()       : return load_string(cx, ptr)
-    case ErrorType()        : return lift_error(cx, load_int(cx, ptr, 4))
+    case ErrorContextType() : return lift_error_context(cx, load_int(cx, ptr, 4))
     case ListType(t, l)     : return load_list(cx, ptr, t, l)
     case RecordType(fields) : return load_record(cx, ptr, fields)
     case VariantType(cases) : return load_variant(cx, ptr, cases)
@@ -1044,8 +1044,8 @@ def load_string_from_range(cx, ptr, tagged_code_units) -> String:
 
   return (s, cx.opts.string_encoding, tagged_code_units)
 
-def lift_error(cx, i):
-  return cx.inst.errors.get(i)
+def lift_error_context(cx, i):
+  return cx.inst.error_contexts.get(i)
 
 def load_list(cx, ptr, elem_type, maybe_length):
   if maybe_length is not None:
@@ -1171,7 +1171,7 @@ def store(cx, v, t, ptr):
     case F64Type()          : store_int(cx, encode_float_as_i64(v), ptr, 8)
     case CharType()         : store_int(cx, char_to_i32(v), ptr, 4)
     case StringType()       : store_string(cx, v, ptr)
-    case ErrorType()        : store_int(cx, lower_error(cx, v), ptr, 4)
+    case ErrorContextType() : store_int(cx, lower_error_context(cx, v), ptr, 4)
     case ListType(t, l)     : store_list(cx, v, ptr, t, l)
     case RecordType(fields) : store_record(cx, v, ptr, fields)
     case VariantType(cases) : store_variant(cx, v, ptr, cases)
@@ -1371,8 +1371,8 @@ def store_probably_utf16_to_latin1_or_utf16(cx, src, src_code_units):
   trap_if(ptr + latin1_size > len(cx.opts.memory))
   return (ptr, latin1_size)
 
-def lower_error(cx, v):
-  return cx.inst.errors.add(v)
+def lower_error_context(cx, v):
+  return cx.inst.error_contexts.add(v)
 
 def store_list(cx, v, ptr, elem_type, maybe_length):
   if maybe_length is not None:
@@ -1518,7 +1518,7 @@ def flatten_type(t):
     case F64Type()                        : return ['f64']
     case CharType()                       : return ['i32']
     case StringType()                     : return ['i32', 'i32']
-    case ErrorType()                      : return ['i32']
+    case ErrorContextType()               : return ['i32']
     case ListType(t, l)                   : return flatten_list(t, l)
     case RecordType(fields)               : return flatten_record(fields)
     case VariantType(cases)               : return flatten_variant(cases)
@@ -1585,7 +1585,7 @@ def lift_flat(cx, vi, t):
     case F64Type()          : return canonicalize_nan64(vi.next('f64'))
     case CharType()         : return convert_i32_to_char(cx, vi.next('i32'))
     case StringType()       : return lift_flat_string(cx, vi)
-    case ErrorType()        : return lift_error(cx, vi.next('i32'))
+    case ErrorContextType() : return lift_error_context(cx, vi.next('i32'))
     case ListType(t, l)     : return lift_flat_list(cx, vi, t, l)
     case RecordType(fields) : return lift_flat_record(cx, vi, fields)
     case VariantType(cases) : return lift_flat_variant(cx, vi, cases)
@@ -1679,7 +1679,7 @@ def lower_flat(cx, v, t):
     case F64Type()          : return [maybe_scramble_nan64(v)]
     case CharType()         : return [char_to_i32(v)]
     case StringType()       : return lower_flat_string(cx, v)
-    case ErrorType()        : return lower_error(cx, v)
+    case ErrorContextType() : return lower_error_context(cx, v)
     case ListType(t, l)     : return lower_flat_list(cx, v, t, l)
     case RecordType(fields) : return lower_flat_record(cx, v, fields)
     case VariantType(cases) : return lower_flat_variant(cx, v, cases)
@@ -2015,15 +2015,15 @@ def pack_async_copy_result(task, buffer, h):
     assert(not (buffer.progress & CLOSED))
     return buffer.progress
   elif h.stream.closed():
-    if (error := h.stream.closed_with_error()):
+    if (errctx := h.stream.closed_with_error()):
       assert(isinstance(h, ReadableStreamHandle|ReadableFutureHandle))
-      errori = task.inst.errors.add(error)
-      assert(errori != 0)
+      errctxi = task.inst.error_contexts.add(errctx)
+      assert(errctxi != 0)
     else:
-      errori = 0
-    assert(errori <= Table.MAX_LENGTH < BLOCKED)
-    assert(not (errori & CLOSED))
-    return errori | CLOSED
+      errctxi = 0
+    assert(errctxi <= Table.MAX_LENGTH < BLOCKED)
+    assert(not (errctxi & CLOSED))
+    return errctxi | CLOSED
   else:
     return 0
 
@@ -2069,34 +2069,34 @@ async def cancel_async_copy(HandleT, t, sync, task, i):
 async def canon_stream_close_readable(t, task, i):
   return await close_async_value(ReadableStreamHandle, t, task, i, 0)
 
-async def canon_stream_close_writable(t, task, hi, errori):
-  return await close_async_value(WritableStreamHandle, t, task, hi, errori)
+async def canon_stream_close_writable(t, task, hi, errctxi):
+  return await close_async_value(WritableStreamHandle, t, task, hi, errctxi)
 
 async def canon_future_close_readable(t, task, i):
   return await close_async_value(ReadableFutureHandle, t, task, i, 0)
 
-async def canon_future_close_writable(t, task, hi, errori):
-  return await close_async_value(WritableFutureHandle, t, task, hi, errori)
+async def canon_future_close_writable(t, task, hi, errctxi):
+  return await close_async_value(WritableFutureHandle, t, task, hi, errctxi)
 
-async def close_async_value(HandleT, t, task, hi, errori):
+async def close_async_value(HandleT, t, task, hi, errctxi):
   trap_if(not task.inst.may_leave)
   h = task.inst.waitables.remove(hi)
-  if errori == 0:
-    error = None
+  if errctxi == 0:
+    errctx = None
   else:
-    error = task.inst.errors.get(errori)
+    errctx = task.inst.error_contexts.get(errctxi)
   trap_if(not isinstance(h, HandleT))
   trap_if(h.t != t)
-  h.drop(error)
+  h.drop(errctx)
   return []
 
-### ๐Ÿ”€ `canon error.new`
+### ๐Ÿ”€ `canon error-context.new`
 
 @dataclass
-class Error:
+class ErrorContext:
   debug_message: String
 
-async def canon_error_new(opts, task, ptr, tagged_code_units):
+async def canon_error_context_new(opts, task, ptr, tagged_code_units):
   trap_if(not task.inst.may_leave)
   if DETERMINISTIC_PROFILE or random.randint(0,1):
     s = String(('', 'utf8', 0))
@@ -2104,21 +2104,21 @@ async def canon_error_new(opts, task, ptr, tagged_code_units):
     cx = LiftLowerContext(opts, task.inst)
     s = load_string_from_range(cx, ptr, tagged_code_units)
     s = host_defined_transformation(s)
-  i = task.inst.errors.add(Error(s))
+  i = task.inst.error_contexts.add(ErrorContext(s))
   return [i]
 
-### ๐Ÿ”€ `canon error.debug-message`
+### ๐Ÿ”€ `canon error-context.debug-message`
 
-async def canon_error_debug_message(opts, task, i, ptr):
+async def canon_error_context_debug_message(opts, task, i, ptr):
   trap_if(not task.inst.may_leave)
-  error = task.inst.errors.get(i)
+  errctx = task.inst.error_contexts.get(i)
   cx = LiftLowerContext(opts, task.inst)
-  store_string(cx, error.debug_message, ptr)
+  store_string(cx, errctx.debug_message, ptr)
   return []
 
-### ๐Ÿ”€ `canon error.drop`
+### ๐Ÿ”€ `canon error-context.drop`
 
-async def canon_error_drop(task, i):
+async def canon_error_context_drop(task, i):
   trap_if(not task.inst.may_leave)
-  task.inst.errors.remove(i)
+  task.inst.error_contexts.remove(i)
   return []
diff --git a/design/mvp/canonical-abi/run_tests.py b/design/mvp/canonical-abi/run_tests.py
index 67f3209e..dc0f9c5e 100644
--- a/design/mvp/canonical-abi/run_tests.py
+++ b/design/mvp/canonical-abi/run_tests.py
@@ -930,7 +930,7 @@ def wake_waiting(self, cancelled = False):
       self.waiting.set_result(cancelled)
       self.waiting = None
 
-  def close(self, error = None):
+  def close(self, errctx = None):
     self.remaining = []
     self.destroy_if_empty = True
     self.wake_waiting()
@@ -1359,9 +1359,9 @@ async def core_func1(task, args):
 
     fut4.set_result(None)
 
-    [errori] = await canon_error_new(opts1, task, 0, 0)
-    [] = await canon_stream_close_writable(U8Type(), task, wsi, errori)
-    [] = await canon_error_drop(task, errori)
+    [errctxi] = await canon_error_context_new(opts1, task, 0, 0)
+    [] = await canon_stream_close_writable(U8Type(), task, wsi, errctxi)
+    [] = await canon_error_context_drop(task, errctxi)
     return []
 
   func1 = partial(canon_lift, opts1, inst1, ft1, core_func1)
@@ -1407,11 +1407,11 @@ async def core_func2(task, args):
     await task.on_block(fut4)
 
     [ret] = await canon_stream_read(U8Type(), opts2, task, rsi, 0, 2)
-    errori = 1
-    assert(ret == (definitions.CLOSED | errori))
+    errctxi = 1
+    assert(ret == (definitions.CLOSED | errctxi))
     [] = await canon_stream_close_readable(U8Type(), task, rsi)
-    [] = await canon_error_debug_message(opts2, task, errori, 0)
-    [] = await canon_error_drop(task, errori)
+    [] = await canon_error_context_debug_message(opts2, task, errctxi, 0)
+    [] = await canon_error_context_drop(task, errctxi)
 
     event, callidx, _ = await task.wait(sync = False)
     assert(event == EventCode.CALL_DONE)
@@ -1598,7 +1598,7 @@ def closed(self):
   def closed_with_error(self):
     assert(self.closed())
     return None
-  def close(self, error = None):
+  def close(self, errctx = None):
     assert(self.v is None)
   async def read(self, dst, on_block):
     assert(self.v is not None)