diff --git a/.bazelrc b/.bazelrc index 35a03304558..789e024745e 100644 --- a/.bazelrc +++ b/.bazelrc @@ -61,6 +61,8 @@ build --per_file_copt='external/zlib@-Wno-deprecated-non-prototype' build --host_per_file_copt='external/zlib@-Wno-deprecated-non-prototype' build --per_file_copt=external/protobuf@-Wno-deprecated-declarations build --host_per_file_copt=external/protobuf@-Wno-deprecated-declarations +build --per_file_copt=external/protobuf@-Wno-deprecated-this-capture +build --host_per_file_copt=external/protobuf@-Wno-deprecated-this-capture # opt in to capnp deprecation warnings about trying to attach to a refcounted object build --cxxopt=-DKJ_WARN_REFCOUNTED_ATTACH=1 diff --git a/.clang-tidy b/.clang-tidy index 01196bae205..a912a000882 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -61,7 +61,8 @@ Checks: > -readability-redundant-smartptr-get, readability-reference-to-constructed-temporary, readability-static-accessed-through-instance, - readability-use-concise-preprocessor-directives + readability-use-concise-preprocessor-directives, + jsg-visit-for-gc # TODO: Fix and enable # bugprone-derived-method-shadowing-base-method diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 2b16fdc2e4d..113e0a288d8 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -102,7 +102,7 @@ jobs: lint: uses: ./.github/workflows/_bazel.yml with: - extra_bazel_args: '--config=lint --config=clang-tidy --config=ci-test --config=ci-linux-common' + extra_bazel_args: '--config=lint --config=ci-test --config=ci-linux-common' run_tests: false parse_headers: true secrets: diff --git a/AGENTS.md b/AGENTS.md index 87f06ed25d3..68f6dab4245 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -234,6 +234,9 @@ C++ classes are exposed to JavaScript via JSG macros in `src/workerd/jsg/`. See - `JSG_RESOURCE_TYPE` for reference types, `JSG_STRUCT` for value types - `js.alloc()` for resource allocation +- The `jsg-visit-for-gc` clang-tidy check (`//tools/clang-tidy:jsg-lint`) + validates that GC-visitable fields are traced in `visitForGc()`. Run via + `just clang-tidy `. See `build/AGENTS.md` for details. ### Feature Management diff --git a/build/AGENTS.md b/build/AGENTS.md index 9a733c36d88..198317aaac9 100644 --- a/build/AGENTS.md +++ b/build/AGENTS.md @@ -20,6 +20,7 @@ Custom Bazel rules (`wd_*` macros) for C++, TypeScript, Rust, Cap'n Proto, and t | `wd_capnp_library.bzl` | Cap'n Proto schema compilation | | `wd_rust_crate.bzl` / `wd_rust_binary.bzl` | Rust build rules | | `lint_test.bzl` | ESLint integration | +| `//tools/clang-tidy:jsg-lint` | Custom clang-tidy plugin (source: `tools/clang-tidy/jsg-lint.c++`); ships the `jsg-visit-for-gc` check for GC-root validation | **Conventions:** @@ -28,6 +29,33 @@ Custom Bazel rules (`wd_*` macros) for C++, TypeScript, Rust, Cap'n Proto, and t - Variant generation controllable per-test via `generate_*_variant` booleans - `BUILD.*` files: overlay build files for third-party deps (sqlite3, zlib, simdutf, pyodide, wpt) +## CLANG-TIDY PLUGIN + +`//tools/clang-tidy:jsg-lint` builds a shared-object clang-tidy plugin +that adds workerd-specific static checks. Currently ships `jsg-visit-for-gc`, +which flags JSG resource types whose visitable fields (`jsg::Ref`, `jsg::JsRef`, +`jsg::V8Ref`, `jsg::Function`, `jsg::Promise`, `jsg::BufferSource`, `jsg::Value`, +etc., plus `kj::Maybe`/`Array`/`Vector`/`OneOf` and `jsg::Optional` wrappers +thereof) are missing from `visitForGc()`. + +- Run via `just clang-tidy ` (e.g., `just clang-tidy //src/workerd/api/...`). +- Plugin sources live in `tools/clang-tidy/jsg-lint.c++` and are built as a + `cc_shared_library` target `//tools/clang-tidy:jsg-lint`. The source is + also exported via `exports_files` so downstream projects can rebuild + against their own clang/LLVM headers. +- The clang-tidy binary itself is published to `cloudflare/workerd-tools` + releases (see `deps/build_deps.jsonc`, entries `clang_tidy_*`); the matching + `*_dev.tar.xz` archive provides the clang/LLVM headers needed to build the + plugin out-of-tree. Available for Linux amd64/arm64 and macOS arm64; a + single archive (linux-amd64) serves all platforms since the AST-matching + plugin doesn't depend on the arch-specific config macros that vary. +- Wrapper script `build/tools/clang_tidy/clang_tidy_wrapper.sh` loads the + plugin via `--load=`. +- Suppress an intentional non-visit with `// NOLINT(jsg-visit-for-gc)` plus a + comment explaining why the field is safe to skip (see `src/workerd/api/streams/queue.h` + for `ByteQueue::Entry::store` and `src/workerd/api/node/diagnostics-channel.h` + for `Channel::name`). + ## DEPENDENCY MANAGEMENT Lives in `deps/`. Uses jsonc manifests + codegen: diff --git a/build/deps/build_deps.jsonc b/build/deps/build_deps.jsonc index bd82978d888..4b50a1d2f7d 100644 --- a/build/deps/build_deps.jsonc +++ b/build/deps/build_deps.jsonc @@ -119,27 +119,41 @@ "type": "github_release", "owner": "cloudflare", "repo": "workerd-tools", - "file_regex": "llvm-.*-linux-amd64-clang-tidy", + "file_regex": "llvm-.*-linux-amd64-clang-tidy$", "file_type": "executable", - "freeze_version": "clang-tidy-22.1.1" + "freeze_version": "clang-tidy-22.1.5" }, { "name": "clang_tidy_linux_arm64", "type": "github_release", "owner": "cloudflare", "repo": "workerd-tools", - "file_regex": "llvm-.*-linux-arm64-clang-tidy", + "file_regex": "llvm-.*-linux-arm64-clang-tidy$", "file_type": "executable", - "freeze_version": "clang-tidy-22.1.1" + "freeze_version": "clang-tidy-22.1.5" }, { "name": "clang_tidy_darwin_arm64", "type": "github_release", "owner": "cloudflare", "repo": "workerd-tools", - "file_regex": "llvm-.*-darwin-arm64-clang-tidy", + "file_regex": "llvm-.*-darwin-arm64-clang-tidy$", "file_type": "executable", - "freeze_version": "clang-tidy-22.1.1" + "freeze_version": "clang-tidy-22.1.5" + }, + { + // Clang/LLVM headers needed to build the workerd jsg-lint clang-tidy + // plugin out-of-tree. The clang-tidy plugin is just AST matching, so + // the only platform-specific bits (arch-name macros in + // build/include/llvm/Config/*.def and llvm-config.h) are irrelevant + // here. One archive serves all platforms. + "name": "clang_tidy_dev_headers", + "type": "github_release", + "owner": "cloudflare", + "repo": "workerd-tools", + "file_regex": "llvm-.*-linux-amd64-clang-tidy-dev\\.tar\\.xz$", + "build_file": "@workerd//tools/clang-tidy:BUILD.headers", + "freeze_version": "clang-tidy-22.1.5" } ] } diff --git a/build/deps/gen/build_deps.MODULE.bazel b/build/deps/gen/build_deps.MODULE.bazel index 41a6297bd92..f8857886eee 100644 --- a/build/deps/gen/build_deps.MODULE.bazel +++ b/build/deps/gen/build_deps.MODULE.bazel @@ -11,10 +11,10 @@ bazel_dep(name = "abseil-cpp", version = "20260107.1") bazel_dep(name = "apple_support", version = "2.5.4") # aspect_rules_esbuild -bazel_dep(name = "aspect_rules_esbuild", version = "0.25.1") +bazel_dep(name = "aspect_rules_esbuild", version = "0.26.0") # aspect_rules_js -bazel_dep(name = "aspect_rules_js", version = "3.0.3") +bazel_dep(name = "aspect_rules_js", version = "3.1.1") # aspect_rules_ts bazel_dep(name = "aspect_rules_ts", version = "3.8.9") @@ -29,17 +29,28 @@ bazel_dep(name = "bazel_skylib", version = "1.9.0") http.file( name = "clang_tidy_darwin_arm64", executable = True, - sha256 = "65599ed9056d5da503cd4a0b179276d0676d959eb3a6b19c1720fe4ac697891a", - url = "https://github.com/cloudflare/workerd-tools/releases/download/clang-tidy-22.1.1/llvm-22.1.1-darwin-arm64-clang-tidy", + sha256 = "c499cb9cbcb3af9e7bce2da5d42fe3bfa957928620c73a435f5918e00cf08d6a", + url = "https://github.com/cloudflare/workerd-tools/releases/download/clang-tidy-22.1.5/llvm-22.1.5-darwin-arm64-clang-tidy", ) use_repo(http, "clang_tidy_darwin_arm64") +# clang_tidy_dev_headers +http.archive( + name = "clang_tidy_dev_headers", + build_file = "@workerd//tools/clang-tidy:BUILD.headers", + sha256 = "8c8f3e5abd3e48d2570bdbba9de6a6aa96f01c489ad4ccddeb0449b3a857d706", + strip_prefix = "llvm-22.1.5-linux-amd64-clang-tidy-dev", + type = "tar.xz", + url = "https://github.com/cloudflare/workerd-tools/releases/download/clang-tidy-22.1.5/llvm-22.1.5-linux-amd64-clang-tidy-dev.tar.xz", +) +use_repo(http, "clang_tidy_dev_headers") + # clang_tidy_linux_amd64 http.file( name = "clang_tidy_linux_amd64", executable = True, - sha256 = "52b56c8f46a80dbbde9334f3de0da45744e65757b1ab467e513d5d8cd1d0b771", - url = "https://github.com/cloudflare/workerd-tools/releases/download/clang-tidy-22.1.1/llvm-22.1.1-linux-amd64-clang-tidy", + sha256 = "ef023eeeafba064d4f182ce130b306202a21f23632ad4253687ed10b7df493da", + url = "https://github.com/cloudflare/workerd-tools/releases/download/clang-tidy-22.1.5/llvm-22.1.5-linux-amd64-clang-tidy", ) use_repo(http, "clang_tidy_linux_amd64") @@ -47,8 +58,8 @@ use_repo(http, "clang_tidy_linux_amd64") http.file( name = "clang_tidy_linux_arm64", executable = True, - sha256 = "1ac2e03fee590aaf920861e83be27e8936cd9a5476a90f43bf88c8e6eb424be6", - url = "https://github.com/cloudflare/workerd-tools/releases/download/clang-tidy-22.1.1/llvm-22.1.1-linux-arm64-clang-tidy", + sha256 = "59a52ec78d370141667022fe33dd12b17568f3c1a466641c5df52790a570556d", + url = "https://github.com/cloudflare/workerd-tools/releases/download/clang-tidy-22.1.5/llvm-22.1.5-linux-arm64-clang-tidy", ) use_repo(http, "clang_tidy_linux_arm64") @@ -68,7 +79,7 @@ bazel_dep(name = "rules_nodejs", version = "6.7.4") bazel_dep(name = "rules_oci", version = "2.3.0") # rules_python -bazel_dep(name = "rules_python", version = "2.0.0") +bazel_dep(name = "rules_python", version = "2.0.1") # rules_rust bazel_dep(name = "rules_rust", version = "0.70.0") diff --git a/build/deps/gen/deps.MODULE.bazel b/build/deps/gen/deps.MODULE.bazel index a89c3335e05..ef4f84cf7b9 100644 --- a/build/deps/gen/deps.MODULE.bazel +++ b/build/deps/gen/deps.MODULE.bazel @@ -136,10 +136,10 @@ bazel_dep(name = "tcmalloc", version = "0.0.0-20250927-12f2552") # workerd-cxx http.archive( name = "workerd-cxx", - sha256 = "fbba1b102b2c4fe879b2f610d7e94ceda6beceac3d57a27196482ce3e9536b50", - strip_prefix = "cloudflare-workerd-cxx-c677ef5", + sha256 = "31052a6fec0da501196a4f026469b837ef688c49b455fc437cdb70281f6b38cb", + strip_prefix = "cloudflare-workerd-cxx-a53da2e", type = "tgz", - url = "https://github.com/cloudflare/workerd-cxx/tarball/c677ef53092a8425ce9f059074441fdb1b7c1ed3", + url = "https://github.com/cloudflare/workerd-cxx/tarball/a53da2e9d35710dcad089574625b6c01cf9535d3", ) use_repo(http, "workerd-cxx") diff --git a/build/deps/update-deps.py b/build/deps/update-deps.py index 4d57c647cec..c781c7a70d1 100755 --- a/build/deps/update-deps.py +++ b/build/deps/update-deps.py @@ -339,6 +339,8 @@ def gen_github_release(repo): type = "tgz" if url.endswith(".zip"): type = "zip" + elif url.endswith(".tar.xz") or url.endswith(".txz"): + type = "tar.xz" elif url.endswith(".xz"): type = "xz" elif url.endswith(".tar.bz2"): diff --git a/build/tools/clang_tidy/clang_tidy.bazelrc b/build/tools/clang_tidy/clang_tidy.bazelrc index 6c6124fbc6f..0cbab1269c0 100644 --- a/build/tools/clang_tidy/clang_tidy.bazelrc +++ b/build/tools/clang_tidy/clang_tidy.bazelrc @@ -1,4 +1,5 @@ # enable clang tidy checks with default configuration +build:lint --config=clang-tidy build:clang-tidy --aspects //build/tools/clang_tidy:clang_tidy.bzl%clang_tidy_aspect --output_groups=+clang_tidy_checks build:clang-tidy-only --aspects //build/tools/clang_tidy:clang_tidy.bzl%clang_tidy_aspect --output_groups=clang_tidy_checks diff --git a/build/tools/clang_tidy/clang_tidy.bzl b/build/tools/clang_tidy/clang_tidy.bzl index 14adc088fef..6a6b291df0d 100644 --- a/build/tools/clang_tidy/clang_tidy.bzl +++ b/build/tools/clang_tidy/clang_tidy.bzl @@ -98,8 +98,11 @@ def _clang_tidy_aspect_impl(target, ctx): ctx.attr._clang_tidy_executable.files, ctx.attr._clang_tidy_wrapper.files, ctx.attr._clang_tidy_config.files, + ctx.attr._clang_tidy_plugin.files, ] + plugin_path = ctx.attr._clang_tidy_plugin.files.to_list()[0].path + outs = [] for src in srcs: # run actions need to produce something, declare a dummy file @@ -112,6 +115,7 @@ def _clang_tidy_aspect_impl(target, ctx): # these are consumed by clang_tidy_wrapper,sh args.add(ctx.attr._clang_tidy_executable.files_to_run.executable) args.add(out) + args.add(plugin_path) # clang-tidy arguments # do not print statistics @@ -195,6 +199,10 @@ clang_tidy_aspect = aspect( default = Label("//:clang_tidy_config"), allow_single_file = True, ), + "_clang_tidy_plugin": attr.label( + default = Label("//tools/clang-tidy:jsg-lint"), + allow_single_file = True, + ), "_clang_tidy_compiler_flags": attr.string_list( default = [], ), diff --git a/build/tools/clang_tidy/clang_tidy_wrapper.sh b/build/tools/clang_tidy/clang_tidy_wrapper.sh index 4710c249823..e39d94ae3ef 100755 --- a/build/tools/clang_tidy/clang_tidy_wrapper.sh +++ b/build/tools/clang_tidy/clang_tidy_wrapper.sh @@ -9,6 +9,10 @@ shift OUTPUT=$1 shift +# Path to the workerd jsg-lint plugin shared library. +CLANG_TIDY_PLUGIN=$1 +shift + PWD=$(pwd)/ ESCAPED_PWD=$(sed 's/[\*\.&/]/\\&/g' <<< "$PWD") @@ -18,7 +22,7 @@ ESCAPED_PWD=$(sed 's/[\*\.&/]/\\&/g' <<< "$PWD") CLANG_TIDY_STDERR=$(mktemp) set +e -"${CLANG_TIDY_BIN}" "$@" 2>"$CLANG_TIDY_STDERR" | \ +"${CLANG_TIDY_BIN}" "--load=${CLANG_TIDY_PLUGIN}" "$@" 2>"$CLANG_TIDY_STDERR" | \ # clang-tidy insists on printing absolute file paths, chop current dir off sed "s/$ESCAPED_PWD//g" CLANG_TIDY_EXIT_CODE=$? diff --git a/src/node/internal/crypto_keys.ts b/src/node/internal/crypto_keys.ts index 6ebbf98ceba..a1579c72427 100644 --- a/src/node/internal/crypto_keys.ts +++ b/src/node/internal/crypto_keys.ts @@ -375,7 +375,9 @@ export function createSecretKey( key = Buffer.from(new Uint8Array(key)); } else if (isArrayBufferView(key)) { // We want the key to be a copy of the original buffer, not a view. - key = Buffer.from(key as Buffer); + key = Buffer.from( + new Uint8Array(key.buffer, key.byteOffset, key.byteLength) + ); } // Node.js requires that the key data be less than 2 ** 32 - 1, diff --git a/src/node/internal/internal_http_client.ts b/src/node/internal/internal_http_client.ts index 64d3983c159..e7559f147f5 100644 --- a/src/node/internal/internal_http_client.ts +++ b/src/node/internal/internal_http_client.ts @@ -47,6 +47,11 @@ import type { Socket } from 'node:net'; const INVALID_PATH_REGEX = /[^\u0021-\u00ff]/; +// Matches paths that would override the URL authority when passed to +// `new URL(path, base)`: double separators (// /\ \/ \\) or a scheme +// (colon before the first separator). +const AUTHORITY_OVERRIDE_REGEX = /^(?:[/\\]{2}|[^/\\]*:)/; + type WriteCallback = (err?: Error) => void; function validateHost(host: unknown, name: string): string { @@ -116,6 +121,22 @@ export class ClientRequest extends OutgoingMessage implements _ClientRequest { if (INVALID_PATH_REGEX.test(options.path)) { throw new ERR_UNESCAPED_CHARACTERS('Request path'); } + // Reject paths that would override the URL authority when passed to + // `new URL(path, base)`. Two cases: + // + // 1. Network-path references and backslash variants — the WHATWG URL + // parser treats \ as / for special schemes, so any pair of / and \ + // at the start (// /\ \/ \\) introduces an authority. + // + // 2. Absolute-form URLs — a scheme (e.g. "http:") before the first + // separator causes the parser to ignore the base entirely. + if (AUTHORITY_OVERRIDE_REGEX.test(options.path)) { + throw new ERR_INVALID_ARG_VALUE( + 'options.path', + options.path, + 'must be a path-only request target' + ); + } } type AgentLike = Agent | boolean | null | undefined; @@ -353,12 +374,23 @@ export class ClientRequest extends OutgoingMessage implements _ClientRequest { return; } - const host = this.getHeader('host') ?? this.host; - let url = new URL(`http://${host}`); + let url = new URL(`http://${this.host}`); url.protocol = this.protocol; url.port = this.port; if (this.path.length > 0 && this.path !== '/') { + // Defense-in-depth: re-validate in case this.path was mutated after + // construction (the field is public). + if (AUTHORITY_OVERRIDE_REGEX.test(this.path)) { + this.destroy( + new ERR_INVALID_ARG_VALUE( + 'options.path', + this.path, + 'must be a path-only request target' + ) + ); + return; + } // We pass `path` as the first argument since it can contain search and hash components. // Therefore, running the pathname setter will not work. // Since this is an extremely costly operation, we only do it if necessary. diff --git a/src/rust/kj/http.rs b/src/rust/kj/http.rs index 4c2bcb9bc48..a04a122c1d6 100644 --- a/src/rust/kj/http.rs +++ b/src/rust/kj/http.rs @@ -12,7 +12,6 @@ use crate::io::AsyncInputStream; use crate::io::AsyncIoStream; #[cxx::bridge(namespace = "kj::rust")] -#[expect(clippy::missing_panics_doc)] #[expect(clippy::missing_safety_doc)] pub mod ffi { unsafe extern "C++" { diff --git a/src/workerd/api/BUILD.bazel b/src/workerd/api/BUILD.bazel index f04560dec53..39964a3d067 100644 --- a/src/workerd/api/BUILD.bazel +++ b/src/workerd/api/BUILD.bazel @@ -568,6 +568,15 @@ kj_test( ], ) +kj_test( + src = "memory-cache-test.c++", + deps = [ + ":memory-cache", + "//src/workerd/io", + "//src/workerd/io:trace", + ], +) + kj_test( src = "streams/internal-test.c++", deps = [ diff --git a/src/workerd/api/actor-state.h b/src/workerd/api/actor-state.h index a2cc488a708..67be1b4e968 100644 --- a/src/workerd/api/actor-state.h +++ b/src/workerd/api/actor-state.h @@ -357,6 +357,10 @@ class DurableObjectStorage: public jsg::Object, public DurableObjectStorageOpera // Set if this is a replica Durable Object. kj::Maybe> maybePrimary; + + void visitForGc(jsg::GcVisitor& visitor) { + visitor.visit(maybePrimary); + } }; class DurableObjectTransaction final: public jsg::Object, public DurableObjectStorageOperations { @@ -533,6 +537,10 @@ class ActorState: public jsg::Object { Worker::Actor::Id id; kj::Maybe> transient; kj::Maybe> persistent; + + void visitForGc(jsg::GcVisitor& visitor) { + visitor.visit(transient, persistent); + } }; class WebSocketRequestResponsePair: public jsg::Object { @@ -773,6 +781,10 @@ class DurableObjectState: public jsg::Object { kj::Maybe> facetManager; kj::Maybe version; + void visitForGc(jsg::GcVisitor& visitor) { + visitor.visit(exports, props, storage, container); + } + // Limits for Hibernatable WebSocket tags. const size_t MAX_TAGS_PER_CONNECTION = 10; diff --git a/src/workerd/api/actor.c++ b/src/workerd/api/actor.c++ index 4dea8186a95..9b1fe192dd2 100644 --- a/src/workerd/api/actor.c++ +++ b/src/workerd/api/actor.c++ @@ -15,6 +15,15 @@ namespace workerd::api { +namespace { + +// This number was arbitrarily chosen, but is meant to account for the cost of holding open outbound +// actor connections, which do have a real cost and should be accounted for to encourage GC if they +// accumulate. +constexpr size_t ESTIMATED_EXTERNAL_MEMORY_PER_ACTOR_CHANNEL = 32768; + +} // namespace + kj::Own LocalActorOutgoingFactory::newSingleUseClient( kj::Maybe cfStr) { auto& context = IoContext::current(); @@ -27,6 +36,11 @@ kj::Own LocalActorOutgoingFactory::newSingleUseClient( if (actorChannel == kj::none) { actorChannel = context.getColoLocalActorChannel(channelId, actorId, tracing.getInternalSpanParent()); + + // As in GlobalActorOutgoingFactory, account for external memory used by the open connection. + jsg::Lock& js = context.getCurrentLock(); + channelMemoryAdjustment = + js.getExternalMemoryAdjustment(ESTIMATED_EXTERNAL_MEMORY_PER_ACTOR_CHANNEL); } return KJ_REQUIRE_NONNULL(actorChannel) @@ -60,6 +74,14 @@ kj::Own GlobalActorOutgoingFactory::newSingleUseClient( enableReplicaRouting, routingMode, tracing.getInternalSpanParent(), kj::mv(version)); } } + + // The ActorChannelImpl we just created holds a Cap'n Proto Pipeline::Client representing an + // open connection to the target DO's routing supervisor. Register external memory to pressure + // V8 into collecting this factory's owning stub promptly when it becomes unreachable, + // preventing connection/FD accumulation from stubs that are created and discarded in a loop. + jsg::Lock& js = context.getCurrentLock(); + channelMemoryAdjustment = + js.getExternalMemoryAdjustment(ESTIMATED_EXTERNAL_MEMORY_PER_ACTOR_CHANNEL); } return KJ_REQUIRE_NONNULL(actorChannel) @@ -220,7 +242,8 @@ kj::Own DurableObjectClass::getChannel(IoCo } void DurableObjectClass::serialize(jsg::Lock& js, jsg::Serializer& serializer) { - auto channel = getChannel(IoContext::current()); + auto& ioctx = IoContext::current(); + auto channel = getChannel(ioctx); channel->requireAllowsTransfer(); KJ_IF_SOME(handler, serializer.getExternalHandler()) { @@ -232,10 +255,34 @@ void DurableObjectClass::serialize(jsg::Lock& js, jsg::Serializer& serializer) { JSG_REQUIRE(FeatureFlags::get(js).getWorkerdExperimental(), DOMDataCloneError, "DurableObjectClass serialization requires the 'experimental' compat flag."); - auto token = channel->getToken(IoChannelFactory::ChannelTokenUsage::RPC); - rpcHandler.write([token = kj::mv(token)](rpc::JsValue::External::Builder builder) { - builder.setActorClassChannelToken(token); - }); + KJ_SWITCH_ONEOF(channel->getTokenMaybeSync(IoChannelFactory::ChannelTokenUsage::RPC)) { + KJ_CASE_ONEOF(token, kj::Array) { + rpcHandler.write([token = kj::mv(token)](rpc::JsValue::External::Builder builder) { + builder.setActorClassChannelToken(token); + }); + } + KJ_CASE_ONEOF(promise, kj::Promise>) { + // Token isn't available synchronously, so we have to send a promise. + auto paf = kj::newPromiseAndFulfiller< + rpc::JsValue::ExternalPusher::DelayedChannelToken::Client>(); + + // Arrange to send the token when it's ready. + ioctx.addTask( + promise.then([pusher = rpcHandler.getExternalPusher(), + fulfiller = kj::mv(paf.fulfiller)](kj::Array token) mutable { + auto req = pusher.pushDelayedChannelTokenRequest( + capnp::MessageSize{4 + token.size() / sizeof(capnp::word), 0}); + req.setToken(token); + fulfiller->fulfill(req.send().getCap()); + })); + + // Write the promise for now. + rpcHandler.write( + [promise = kj::mv(paf.promise)](rpc::JsValue::External::Builder builder) mutable { + builder.setDelayedActorClassChannelToken(kj::mv(promise)); + }); + } + } return; } // TODO(someday): structuredClone() should have special handling that just reproduces the same @@ -246,7 +293,16 @@ void DurableObjectClass::serialize(jsg::Lock& js, jsg::Serializer& serializer) { // is temporary, anyone using this will lose their data later. JSG_REQUIRE(FeatureFlags::get(js).getAllowIrrevocableStubStorage(), DOMDataCloneError, "DurableObjectClass cannot be serialized in this context."); - serializer.writeLengthDelimited(channel->getToken(IoChannelFactory::ChannelTokenUsage::STORAGE)); + KJ_SWITCH_ONEOF(channel->getTokenMaybeSync(IoChannelFactory::ChannelTokenUsage::STORAGE)) { + KJ_CASE_ONEOF(token, kj::Array) { + serializer.writeLengthDelimited(token); + } + KJ_CASE_ONEOF(promise, kj::Promise>) { + // TODO(stub-storage): Eventually we'll serialize by pointing to an external table. + KJ_UNIMPLEMENTED( + "tried to store ActorClassChannel whose token is not synchronously available"); + } + } } jsg::Ref DurableObjectClass::deserialize( @@ -273,10 +329,21 @@ jsg::Ref DurableObjectClass::deserialize( "DurableObjectClass serialization requires the 'experimental' compat flag."); auto external = rpcHandler.read(); - KJ_REQUIRE(external.isActorClassChannelToken()); auto& ioctx = IoContext::current(); - auto channel = ioctx.getIoChannelFactory().actorClassFromToken( - IoChannelFactory::ChannelTokenUsage::RPC, external.getActorClassChannelToken()); + kj::Own channel; + + if (external.isDelayedActorClassChannelToken()) { + auto promise = ioctx.getExternalPusher()->unwrapDelayedChannelToken( + external.getDelayedActorClassChannelToken()); + channel = ioctx.getIoChannelFactory().actorClassFromToken( + IoChannelFactory::ChannelTokenUsage::RPC, kj::mv(promise)); + } else if (external.isActorClassChannelToken()) { + channel = ioctx.getIoChannelFactory().actorClassFromToken( + IoChannelFactory::ChannelTokenUsage::RPC, external.getActorClassChannelToken()); + } else { + KJ_FAIL_REQUIRE("wrong external type for DurableObjectClass", external.which()); + } + return js.alloc(ioctx.addObject(kj::mv(channel))); } } diff --git a/src/workerd/api/actor.h b/src/workerd/api/actor.h index 7b2976e749e..a4194513b2e 100644 --- a/src/workerd/api/actor.h +++ b/src/workerd/api/actor.h @@ -320,6 +320,11 @@ class GlobalActorOutgoingFactory final: public Fetcher::OutgoingFactory { ActorRoutingMode routingMode; kj::Maybe version; kj::Maybe> actorChannel; + + // Registered when actorChannel is lazily created, to reflect the cost of holding an open + // connection (file descriptor) to the target DO. This pressures V8 to GC the owning stub + // promptly when it becomes unreachable, preventing FD accumulation. + kj::Maybe channelMemoryAdjustment; }; // Like `GlobalActorOutgoingFactory`, but for colo-local actors @@ -335,6 +340,8 @@ class LocalActorOutgoingFactory final: public Fetcher::OutgoingFactory { uint channelId; kj::String actorId; kj::Maybe> actorChannel; + // As in GlobalActorOutgoingFactory, reflects the cost of holding an open connection. + kj::Maybe channelMemoryAdjustment; }; // Like `GlobalActorOutgoingFactory`, but only used for creating a stub to the primary DO so the diff --git a/src/workerd/api/basics.c++ b/src/workerd/api/basics.c++ index 7765f3c8004..6564225f381 100644 --- a/src/workerd/api/basics.c++ +++ b/src/workerd/api/basics.c++ @@ -572,56 +572,6 @@ class AbortTriggerRpcClient final { rpc::AbortTrigger::Client client; }; -namespace { -// The jsrpc handler that receives aborts from the remote and triggers them locally -// -// TODO(cleanup): This class has been copied to external-pusher.c++. The copy here can be -// deleted as soon as we've switched from StreamSink to ExternalPusher and can delete all the -// StreamSink-related code. For now I'm not trying to avoid duplication. -class AbortTriggerRpcServer final: public rpc::AbortTrigger::Server { - public: - AbortTriggerRpcServer(kj::Own> fulfiller, - kj::Own&& pendingReason) - : fulfiller(kj::mv(fulfiller)), - pendingReason(kj::mv(pendingReason)) {} - - kj::Promise abort(AbortContext abortCtx) override { - auto params = abortCtx.getParams(); - auto reason = params.getReason().getV8Serialized(); - - pendingReason->getWrapped() = kj::heapArray(reason.asBytes()); - fulfiller->fulfill(); - return kj::READY_NOW; - } - - kj::Promise release(ReleaseContext releaseCtx) override { - released = true; - return kj::READY_NOW; - } - - ~AbortTriggerRpcServer() noexcept(false) { - if (pendingReason->getWrapped() != nullptr) { - // Already triggered - return; - } - - if (!released) { - pendingReason->getWrapped() = JSG_KJ_EXCEPTION(FAILED, DOMAbortError, - "An AbortSignal received over RPC was implicitly aborted because the connection back to " - "its trigger was lost."); - } - - // Always fulfill the promise in case the AbortSignal was waiting - fulfiller->fulfill(); - } - - private: - kj::Own> fulfiller; - kj::Own pendingReason; - bool released = false; -}; -} // namespace - AbortSignal::AbortSignal(kj::Maybe exception, jsg::Optional> maybeReason, Flag flag) @@ -863,21 +813,16 @@ void AbortSignal::serialize(jsg::Lock& js, jsg::Serializer& serializer) { } auto triggerCap = [&]() -> rpc::AbortTrigger::Client { - KJ_IF_SOME(pusher, externalHandler->getExternalPusher()) { - auto pipeline = pusher.pushAbortSignalRequest(capnp::MessageSize{2, 0}).sendForPipeline(); + auto pipeline = externalHandler->getExternalPusher() + .pushAbortSignalRequest(capnp::MessageSize{2, 0}) + .sendForPipeline(); - externalHandler->write( - [signal = pipeline.getSignal()](rpc::JsValue::External::Builder builder) mutable { - builder.setAbortSignal(kj::mv(signal)); - }); + externalHandler->write( + [signal = pipeline.getSignal()](rpc::JsValue::External::Builder builder) mutable { + builder.setAbortSignal(kj::mv(signal)); + }); - return pipeline.getTrigger(); - } else { - return externalHandler - ->writeStream([&](rpc::JsValue::External::Builder builder) mutable { - builder.setAbortTrigger(); - }).castAs(); - } + return pipeline.getTrigger(); }(); auto& ioContext = IoContext::current(); @@ -914,24 +859,12 @@ jsg::Ref AbortSignal::deserialize( auto& ioctx = IoContext::current(); auto reader = externalHandler->read(); - if (reader.isAbortTrigger()) { - // Old-style StreamSink. - // TODO(cleanup): Remove this once the ExternalPusher autogate has rolled out. - auto paf = kj::newPromiseAndFulfiller(); - auto pendingReason = ioctx.addObject(kj::refcounted()); - - externalHandler->setLastStream( - kj::heap(kj::mv(paf.fulfiller), kj::addRef(*pendingReason))); - signal->rpcAbortPromise = ioctx.addObject(kj::heap(kj::mv(paf.promise))); - signal->pendingReason = kj::mv(pendingReason); - } else { - KJ_REQUIRE(reader.isAbortSignal(), "external table slot type does't match serialization tag"); + KJ_REQUIRE(reader.isAbortSignal(), "external table slot type does't match serialization tag"); - auto resolvedSignal = ioctx.getExternalPusher()->unwrapAbortSignal(reader.getAbortSignal()); + auto resolvedSignal = ioctx.getExternalPusher()->unwrapAbortSignal(reader.getAbortSignal()); - signal->rpcAbortPromise = ioctx.addObject(kj::heap(kj::mv(resolvedSignal.signal))); - signal->pendingReason = ioctx.addObject(kj::mv(resolvedSignal.reason)); - } + signal->rpcAbortPromise = ioctx.addObject(kj::heap(kj::mv(resolvedSignal.signal))); + signal->pendingReason = ioctx.addObject(kj::mv(resolvedSignal.reason)); return signal; } @@ -1020,6 +953,7 @@ void AbortController::abort(jsg::Lock& js, jsg::Optional maybeReas } void EventTarget::visitForGc(jsg::GcVisitor& visitor) { + visitor.visit(maybeListenerCallback); for (auto& entry: typeMap) { for (auto& handler: entry.value.handlers) { KJ_SWITCH_ONEOF(handler->handler) { diff --git a/src/workerd/api/basics.h b/src/workerd/api/basics.h index 69157057c1c..9b973706c78 100644 --- a/src/workerd/api/basics.h +++ b/src/workerd/api/basics.h @@ -300,6 +300,10 @@ class CustomEvent: public Event { private: jsg::Optional> detail; + + void visitForGc(jsg::GcVisitor& visitor) { + visitor.visit(detail); + } }; // An implementation of the Web Platform Standard EventTarget API diff --git a/src/workerd/api/cache.h b/src/workerd/api/cache.h index bafd161d7da..8b5e2d98460 100644 --- a/src/workerd/api/cache.h +++ b/src/workerd/api/cache.h @@ -139,6 +139,10 @@ class CacheStorage: public jsg::Object { private: jsg::Ref default_; + + void visitForGc(jsg::GcVisitor& visitor) { + visitor.visit(default_); + } }; #define EW_CACHE_ISOLATE_TYPES api::CacheStorage, api::Cache, api::CacheQueryOptions diff --git a/src/workerd/api/commonjs.h b/src/workerd/api/commonjs.h index d3235b3475a..833604a47f8 100644 --- a/src/workerd/api/commonjs.h +++ b/src/workerd/api/commonjs.h @@ -20,6 +20,10 @@ class CommonJsModuleObject final: public jsg::Object { JSG_LAZY_READONLY_INSTANCE_PROPERTY(path, getPath); } + void visitForGc(jsg::GcVisitor& visitor) { + visitor.visit(exports); + } + void visitForMemoryInfo(jsg::MemoryTracker& tracker) const; private: @@ -56,6 +60,10 @@ class CommonJsModuleContext final: public jsg::Object { jsg::Ref module; + void visitForGc(jsg::GcVisitor& visitor) { + visitor.visit(module, exports); + } + void visitForMemoryInfo(jsg::MemoryTracker& tracker) const; private: diff --git a/src/workerd/api/container.c++ b/src/workerd/api/container.c++ index 4e1fb2eebb2..6c3cbfe3782 100644 --- a/src/workerd/api/container.c++ +++ b/src/workerd/api/container.c++ @@ -382,46 +382,65 @@ jsg::Promise Container::interceptOutboundHttp( jsg::Lock& js, kj::String addr, jsg::Ref binding) { auto& ioctx = IoContext::current(); auto channel = binding->getSubrequestChannel(ioctx); + return ioctx.awaitIo(js, interceptOutboundHttpImpl(*rpcClient, kj::mv(addr), kj::mv(channel))); +} +kj::Promise Container::interceptOutboundHttpImpl(rpc::Container::Client rpcClient, + kj::String addr, + kj::Own channel) { // Get a channel token for RPC usage, the container runtime can use this // token later to redeem a Fetcher. - auto token = channel->getToken(IoChannelFactory::ChannelTokenUsage::RPC); + kj::Array token = co_await channel->getToken(IoChannelFactory::ChannelTokenUsage::RPC); + { auto drop = kj::mv(channel); } // no longer needed - auto req = rpcClient->setEgressHttpRequest(); + auto req = rpcClient.setEgressHttpRequest(); req.setHostPort(addr); req.setChannelToken(token); - return ioctx.awaitIo(js, req.sendIgnoringResult()); + co_await req.send(); } jsg::Promise Container::interceptAllOutboundHttp(jsg::Lock& js, jsg::Ref binding) { auto& ioctx = IoContext::current(); auto channel = binding->getSubrequestChannel(ioctx); - auto token = channel->getToken(IoChannelFactory::ChannelTokenUsage::RPC); + return ioctx.awaitIo(js, interceptAllOutboundHttpImpl(*rpcClient, kj::mv(channel))); +} + +kj::Promise Container::interceptAllOutboundHttpImpl( + rpc::Container::Client rpcClient, kj::Own channel) { + auto token = co_await channel->getToken(IoChannelFactory::ChannelTokenUsage::RPC); + { auto drop = kj::mv(channel); } // no longer needed // Register for all IPv4 and IPv6 addresses (on port 80) - auto reqV4 = rpcClient->setEgressHttpRequest(); + auto reqV4 = rpcClient.setEgressHttpRequest(); reqV4.setHostPort("0.0.0.0/0"_kj); reqV4.setChannelToken(token); - auto reqV6 = rpcClient->setEgressHttpRequest(); + auto reqV6 = rpcClient.setEgressHttpRequest(); reqV6.setHostPort("::/0"_kj); reqV6.setChannelToken(token); - return ioctx.awaitIo(js, - kj::joinPromisesFailFast(kj::arr(reqV4.sendIgnoringResult(), reqV6.sendIgnoringResult()))); + co_await kj::joinPromisesFailFast( + kj::arr(reqV4.sendIgnoringResult(), reqV6.sendIgnoringResult())); } jsg::Promise Container::interceptOutboundHttps( jsg::Lock& js, kj::String addr, jsg::Ref binding) { auto& ioctx = IoContext::current(); auto channel = binding->getSubrequestChannel(ioctx); - auto token = channel->getToken(IoChannelFactory::ChannelTokenUsage::RPC); + return ioctx.awaitIo(js, interceptOutboundHttpsImpl(*rpcClient, kj::mv(addr), kj::mv(channel))); +} - auto req = rpcClient->setEgressHttpsRequest(); +kj::Promise Container::interceptOutboundHttpsImpl(rpc::Container::Client rpcClient, + kj::String addr, + kj::Own channel) { + auto token = co_await channel->getToken(IoChannelFactory::ChannelTokenUsage::RPC); + { auto drop = kj::mv(channel); } // no longer needed + + auto req = rpcClient.setEgressHttpsRequest(); req.setHostPort(addr); req.setChannelToken(token); - return ioctx.awaitIo(js, req.sendIgnoringResult()); + co_await req.send(); } jsg::Promise> Container::exec( @@ -557,15 +576,22 @@ jsg::Promise Container::interceptOutboundTcp( jsg::Lock& js, kj::String addr, jsg::Ref binding) { auto& ioctx = IoContext::current(); auto channel = binding->getSubrequestChannel(ioctx); + return ioctx.awaitIo(js, interceptOutboundTcpImpl(*rpcClient, kj::mv(addr), kj::mv(channel))); +} + +kj::Promise Container::interceptOutboundTcpImpl(rpc::Container::Client rpcClient, + kj::String addr, + kj::Own channel) { // Get a channel token for RPC usage, the container runtime can use this // token later to redeem a Fetcher whose connect() handler processes the TCP stream. - auto token = channel->getToken(IoChannelFactory::ChannelTokenUsage::RPC); + auto token = co_await channel->getToken(IoChannelFactory::ChannelTokenUsage::RPC); + { auto drop = kj::mv(channel); } // no longer needed - auto req = rpcClient->setEgressTcpRequest(); + auto req = rpcClient.setEgressTcpRequest(); req.setHostPort(addr); req.setChannelToken(token); - return ioctx.awaitIo(js, req.sendIgnoringResult()); + co_await req.send(); } jsg::Promise Container::monitor(jsg::Lock& js) { diff --git a/src/workerd/api/container.h b/src/workerd/api/container.h index 2bbd745a188..c3d4a26695e 100644 --- a/src/workerd/api/container.h +++ b/src/workerd/api/container.h @@ -297,6 +297,20 @@ class Container: public jsg::Object { class TcpPortWorkerInterface; class TcpPortOutgoingFactory; + + // These helpers are static since they will leave the IoContext on the first co_await, so we + // don't want them trying to access `rpcClient` via the `IoOwn`. + static kj::Promise interceptOutboundHttpImpl(rpc::Container::Client rpcClient, + kj::String addr, + kj::Own channel); + static kj::Promise interceptAllOutboundHttpImpl( + rpc::Container::Client rpcClient, kj::Own channel); + static kj::Promise interceptOutboundHttpsImpl(rpc::Container::Client rpcClient, + kj::String addr, + kj::Own channel); + static kj::Promise interceptOutboundTcpImpl(rpc::Container::Client rpcClient, + kj::String addr, + kj::Own channel); }; #define EW_CONTAINER_ISOLATE_TYPES \ diff --git a/src/workerd/api/crypto/x509.c++ b/src/workerd/api/crypto/x509.c++ index dd613d39c80..050bdc788bf 100644 --- a/src/workerd/api/crypto/x509.c++ +++ b/src/workerd/api/crypto/x509.c++ @@ -625,10 +625,9 @@ kj::Maybe> X509Certificate::getKeyUsage() { kj::Vector ext_key_usage(count); char buf[256]{}; - int j = 0; for (int i = 0; i < count; i++) { if (OBJ_obj2txt(buf, sizeof(buf), sk_ASN1_OBJECT_value(eku.get(), i), 1) >= 0) { - ext_key_usage[j++] = kj::str(buf); + ext_key_usage.add(kj::str(buf)); } } diff --git a/src/workerd/api/crypto/x509.h b/src/workerd/api/crypto/x509.h index 7dc1cc59ada..73191b7f035 100644 --- a/src/workerd/api/crypto/x509.h +++ b/src/workerd/api/crypto/x509.h @@ -83,6 +83,10 @@ class X509Certificate: public jsg::Object { private: kj::Own cert_; kj::Maybe> issuerCert_; + + void visitForGc(jsg::GcVisitor& visitor) { + visitor.visit(issuerCert_); + } }; } // namespace workerd::api diff --git a/src/workerd/api/eventsource.c++ b/src/workerd/api/eventsource.c++ index 8ac114dabcb..3b71a9fb460 100644 --- a/src/workerd/api/eventsource.c++ +++ b/src/workerd/api/eventsource.c++ @@ -510,7 +510,7 @@ void EventSource::visitForGc(jsg::GcVisitor& visitor) { KJ_IF_SOME(i, impl) { visitor.visit(i.options.fetcher); } - visitor.visit(abortController); + visitor.visit(abortController, onopenValue, onmessageValue, onerrorValue); } void EventSource::visitForMemoryInfo(jsg::MemoryTracker& tracker) const { @@ -520,6 +520,9 @@ void EventSource::visitForMemoryInfo(jsg::MemoryTracker& tracker) const { } tracker.trackField("abortController", abortController); tracker.trackField("lastEventId", lastEventId); + tracker.trackField("onopen", onopenValue); + tracker.trackField("onmessage", onmessageValue); + tracker.trackField("onerror", onerrorValue); } } // namespace workerd::api diff --git a/src/workerd/api/export-loopback.h b/src/workerd/api/export-loopback.h index 2cb2742866a..b5cb0609fa4 100644 --- a/src/workerd/api/export-loopback.h +++ b/src/workerd/api/export-loopback.h @@ -182,6 +182,10 @@ class LoopbackDurableObjectNamespace: public DurableObjectNamespace { private: jsg::Ref loopbackClass; + + void visitForGc(jsg::GcVisitor& visitor) { + visitor.visit(loopbackClass); + } }; // Like LoopbackDurableObjectNamespace, but for colo-local (ephemeral) actor namespaces. @@ -209,6 +213,10 @@ class LoopbackColoLocalActorNamespace: public ColoLocalActorNamespace { private: jsg::Ref loopbackClass; + + void visitForGc(jsg::GcVisitor& visitor) { + visitor.visit(loopbackClass); + } }; #define EW_EXPORT_LOOPBACK_ISOLATE_TYPES \ diff --git a/src/workerd/api/global-scope.c++ b/src/workerd/api/global-scope.c++ index 2f705fe7bbb..21ff2274275 100644 --- a/src/workerd/api/global-scope.c++ +++ b/src/workerd/api/global-scope.c++ @@ -166,8 +166,14 @@ kj::Promise ServiceWorkerGlobalScope::connect(kj::String host, // Has a connect handler! response.accept(200, "OK", headers); - // Using neuterable stream to manage lifetime of stream promises + // Using neuterable stream to manage lifetime of stream promises. We MUST neuter this when the + // promise returned to the caller resolves because `connection` is owned by the caller and will + // be destroyed when that happens, while the JS Socket can outlive us via ctx.waitUntil(). auto ownConnection = newNeuterableIoStream(connection); + auto deferredNeuter = kj::defer([ref = ownConnection.addRef()]() mutable { + ref->neuter(makeNeuterException(NeuterReason::CLIENT_DISCONNECTED)); + }); + KJ_ON_SCOPE_FAILURE(ownConnection->neuter(makeNeuterException(NeuterReason::THREW_EXCEPTION))); auto& ioContext = IoContext::current(); jsg::Lock& js = lock; @@ -180,15 +186,15 @@ kj::Promise ServiceWorkerGlobalScope::connect(kj::String host, // provide a more descriptive error message for HTTP, but this is not relevant on the TCP server // side. jsg::Ref jsSocket = - setupSocket(js, kj::mv(ownConnection), kj::none /* remoteAddress */, kj::mv(host), kj::none, - kj::mv(nullTlsStarter), SecureTransportKind::OFF, kj::none, false, kj::none); + setupSocket(js, ownConnection.addRef().toOwn(), kj::none /* remoteAddress */, kj::mv(host), + kj::none, kj::mv(nullTlsStarter), SecureTransportKind::OFF, kj::none, false, kj::none); // handleProxyStatus() is required to indicate that the socket was opened properly. Since the // connection is already open at this point, exception handling is not required. jsSocket->handleProxyStatus(js, kj::Promise>(kj::none)); kj::Maybe span = ioContext.makeTraceSpan("connect_handler"_kjc); auto promise = handler(js, kj::mv(jsSocket), eh.env.addRef(js), eh.getCtx()); - return ioContext.awaitJs(js, kj::mv(promise)).attach(kj::mv(span)); + return ioContext.awaitJs(js, kj::mv(promise)).attach(kj::mv(span), kj::mv(deferredNeuter)); } lock.logWarningOnce("Received a connect event but we lack a handler. " "Did you remember to export a connect() function?"); diff --git a/src/workerd/api/global-scope.h b/src/workerd/api/global-scope.h index 3befcebbe8e..0abc00c27c1 100644 --- a/src/workerd/api/global-scope.h +++ b/src/workerd/api/global-scope.h @@ -377,6 +377,7 @@ class ExecutionContext: public jsg::Object { kj::Maybe> version; void visitForGc(jsg::GcVisitor& visitor) { + visitor.visit(exports); visitor.visit(props); visitor.visit(version); } @@ -1080,6 +1081,10 @@ class ServiceWorkerGlobalScope: public WorkerGlobalScope { kj::Maybe> defaultFetcher; kj::HashMap connectOverrides; + void visitForGc(jsg::GcVisitor& visitor) { + visitor.visit(processValue, bufferValue, defaultFetcher); + } + // Global properties such as scheduler, crypto, caches, self, and origin should // be monkeypatchable / mutable at the global scope. }; diff --git a/src/workerd/api/headers.c++ b/src/workerd/api/headers.c++ index 2a362806f37..1efea68ff5f 100644 --- a/src/workerd/api/headers.c++ +++ b/src/workerd/api/headers.c++ @@ -623,7 +623,7 @@ void Headers::setUnguarded(jsg::Lock& js, kj::String name, kj::String value) { void Headers::setCommon(capnp::CommonHeaderName idx, kj::String value) { kj::uint index = static_cast(idx); - KJ_DASSERT(index <= Headers::MAX_COMMON_HEADER_ID); + value = normalizeHeaderValue(getCommonHeaderName(index), kj::mv(value)); KJ_IF_SOME(existing, commonHeaders[index]) { existing->values.resize(1); existing->values[0] = kj::mv(value); diff --git a/src/workerd/api/http.c++ b/src/workerd/api/http.c++ index 2be9a5b8f4f..1f5c3a16476 100644 --- a/src/workerd/api/http.c++ +++ b/src/workerd/api/http.c++ @@ -2141,7 +2141,8 @@ rpc::JsRpcTarget::Client Fetcher::getClientForOneCall( } void Fetcher::serialize(jsg::Lock& js, jsg::Serializer& serializer) { - auto channel = getSubrequestChannel(IoContext::current()); + auto& ioctx = IoContext::current(); + auto channel = getSubrequestChannel(ioctx); channel->requireAllowsTransfer(); KJ_IF_SOME(handler, serializer.getExternalHandler()) { @@ -2153,10 +2154,34 @@ void Fetcher::serialize(jsg::Lock& js, jsg::Serializer& serializer) { JSG_REQUIRE(FeatureFlags::get(js).getWorkerdExperimental(), DOMDataCloneError, "ServiceStub serialization requires the 'experimental' compat flag."); - auto token = channel->getToken(IoChannelFactory::ChannelTokenUsage::RPC); - rpcHandler.write([token = kj::mv(token)](rpc::JsValue::External::Builder builder) { - builder.setSubrequestChannelToken(token); - }); + KJ_SWITCH_ONEOF(channel->getTokenMaybeSync(IoChannelFactory::ChannelTokenUsage::RPC)) { + KJ_CASE_ONEOF(token, kj::Array) { + rpcHandler.write([token = kj::mv(token)](rpc::JsValue::External::Builder builder) { + builder.setSubrequestChannelToken(token); + }); + } + KJ_CASE_ONEOF(promise, kj::Promise>) { + // Token isn't available synchronously, so we have to send a promise. + auto paf = kj::newPromiseAndFulfiller< + rpc::JsValue::ExternalPusher::DelayedChannelToken::Client>(); + + // Arrange to send the token when it's ready. + ioctx.addTask(promise + .then([pusher = rpcHandler.getExternalPusher(), fulfiller = kj::mv(paf.fulfiller)] + (kj::Array token) mutable { + auto req = pusher.pushDelayedChannelTokenRequest( + capnp::MessageSize { 4 + token.size() / sizeof(capnp::word), 0 }); + req.setToken(token); + fulfiller->fulfill(req.send().getCap()); + })); + + // Write the promise for now. + rpcHandler.write([promise = kj::mv(paf.promise)] + (rpc::JsValue::External::Builder builder) mutable{ + builder.setDelayedSubrequestChannelToken(kj::mv(promise)); + }); + } + } return; } // TODO(someday): structuredClone() should have special handling that just reproduces the same @@ -2167,7 +2192,16 @@ void Fetcher::serialize(jsg::Lock& js, jsg::Serializer& serializer) { // is temporary, anyone using this will lose their data later. JSG_REQUIRE(FeatureFlags::get(js).getAllowIrrevocableStubStorage(), DOMDataCloneError, "ServiceStub cannot be serialized in this context."); - serializer.writeLengthDelimited(channel->getToken(IoChannelFactory::ChannelTokenUsage::STORAGE)); + KJ_SWITCH_ONEOF(channel->getTokenMaybeSync(IoChannelFactory::ChannelTokenUsage::STORAGE)) { + KJ_CASE_ONEOF(token, kj::Array) { + serializer.writeLengthDelimited(token); + } + KJ_CASE_ONEOF(promise, kj::Promise>) { + // TODO(stub-storage): Eventually we'll serialize by pointing to an external table. + KJ_UNIMPLEMENTED( + "tried to store SubrequestChannel whose token is not synchronously available"); + } + } } jsg::Ref Fetcher::deserialize(jsg::Lock& js, @@ -2194,11 +2228,22 @@ jsg::Ref Fetcher::deserialize(jsg::Lock& js, "ServiceStub serialization requires the 'experimental' compat flag."); auto external = rpcHandler.read(); - KJ_REQUIRE(external.isSubrequestChannelToken()); auto& ioctx = IoContext::current(); - auto channel = ioctx.getIoChannelFactory().subrequestChannelFromToken( - IoChannelFactory::ChannelTokenUsage::RPC, - external.getSubrequestChannelToken()); + kj::Own channel; + + if (external.isDelayedSubrequestChannelToken()) { + auto promise = ioctx.getExternalPusher()->unwrapDelayedChannelToken( + external.getDelayedSubrequestChannelToken()); + channel = ioctx.getIoChannelFactory().subrequestChannelFromToken( + IoChannelFactory::ChannelTokenUsage::RPC, kj::mv(promise)); + } else if (external.isSubrequestChannelToken()) { + channel = ioctx.getIoChannelFactory().subrequestChannelFromToken( + IoChannelFactory::ChannelTokenUsage::RPC, + external.getSubrequestChannelToken()); + } else { + KJ_FAIL_REQUIRE("wrong external type for Fetcher", external.which()); + } + return js.alloc(ioctx.addObject(kj::mv(channel))); } } diff --git a/src/workerd/api/memory-cache-test.c++ b/src/workerd/api/memory-cache-test.c++ new file mode 100644 index 00000000000..6d2947e80fc --- /dev/null +++ b/src/workerd/api/memory-cache-test.c++ @@ -0,0 +1,75 @@ +// Regression test: a FallbackDoneCallback returned by getWithFallback() must +// remain safe to invoke even after the SharedMemoryCache::Use that created it +// has been destroyed. Previously, the callback captured a bare pointer to the +// Use, leading to a use-after-free when the callback outlived the Use. +// +// This is representative of production behavior: MemoryCache::read() on a +// shared cache can queue fallback callbacks across isolates via +// CrossThreadPromiseFulfiller. If one worker's fallback fails, +// handleFallbackFailure() ships a new FallbackDoneCallback to the next queued +// worker — which may be on a different thread. If the originating worker's +// isolate is torn down before that callback fires, the Use is destroyed while +// the callback is still live. This test simulates that sequence: obtain a +// callback, destroy the Use, then invoke it. + +#include "memory-cache.h" + +#include + +#include + +namespace workerd::api { +namespace { + +static SharedMemoryCache::Limits testLimits() { + return { + .maxKeys = 100, + .maxValueSize = 1024, + .maxTotalValueSize = 10240, + }; +} + +KJ_TEST("regression: FallbackDoneCallback survives Use destruction") { + kj::EventLoop loop; + kj::WaitScope waitScope(loop); + + const auto& clock = kj::systemCoarseMonotonicClock(); + auto cache = SharedMemoryCache::create(kj::none, "test-cache"_kj, kj::none, clock); + + auto limits = testLimits(); + auto key = kj::str("test-key"); + + SpanBuilder noopSpan(nullptr); + + kj::Maybe savedCallback; + + { + SharedMemoryCache::Use useA(kj::atomicAddRef(*cache), limits); + + // Trigger a cache miss and save the callback. + auto result = useA.getWithFallback(key, noopSpan); + KJ_ASSERT(result.is>()); + auto& promise = result.get>(); + KJ_ASSERT(promise.poll(waitScope)); + auto outcome = promise.wait(waitScope); + KJ_ASSERT(outcome.is()); + savedCallback = kj::mv(outcome.get()); + } + + auto& callback = KJ_ASSERT_NONNULL(savedCallback); + callback(kj::none, noopSpan); + + // If we reach here without crashing, the fix is working. The InProgress + // entry should have been cleaned up since there are no waiters. + + // Verify the cache is still functional after the callback. + { + SharedMemoryCache::Use useC(kj::atomicAddRef(*cache), limits); + auto cached = useC.getWithoutFallback(key, noopSpan); + // Key should not be in cache (fallback failed, no value stored). + KJ_ASSERT(cached == kj::none); + } +} + +} // namespace +} // namespace workerd::api diff --git a/src/workerd/api/memory-cache.c++ b/src/workerd/api/memory-cache.c++ index e69f4e64359..c3b9a58343a 100644 --- a/src/workerd/api/memory-cache.c++ +++ b/src/workerd/api/memory-cache.c++ @@ -212,7 +212,10 @@ void SharedMemoryCache::evictNextWhileLocked( KJ_REQUIRE(data.cache.size() > 0); // Create eviction span - only called from IO context - auto evictionSpan = IoContext::current().makeTraceSpan("memory_cache_eviction"_kjc); + SpanBuilder evictionSpan = nullptr; + KJ_IF_SOME(ctx, IoContext::tryCurrent()) { + evictionSpan = ctx.makeTraceSpan("memory_cache_eviction"_kjc); + } // If there is an entry that has expired already, evict that one. MemoryCacheEntry& maybeExpired = *data.cache.ordered<3>().begin(); @@ -349,28 +352,60 @@ SharedMemoryCache::Use::getWithFallback(const kj::String& key, SpanBuilder& read SharedMemoryCache::Use::FallbackDoneCallback SharedMemoryCache::Use::prepareFallback( InProgress& inProgress) const { - // We need to detect if the Promise that we are about to create ever settles, - // as opposed to being destroyed without either being resolved or rejecting. + return SharedMemoryCache::prepareFallback(*cache, inProgress); +} + +void SharedMemoryCache::Use::handleFallbackFailure(InProgress& inProgress) const { + SharedMemoryCache::handleFallbackFailure(*cache, inProgress); +} + +void SharedMemoryCache::handleFallbackFailure( + const SharedMemoryCache& cache, InProgress& inProgress) { + kj::Own> nextFulfiller; + + // If there is another queued fallback, retrieve it and remove it from the + // queue. Otherwise, just delete the queue entirely. + { + auto data = cache.data.lockExclusive(); + + KJ_IF_SOME(next, inProgress.waiting.pop()) { + nextFulfiller = kj::mv(next.fulfiller); + } else { + data->inProgress.eraseMatch(inProgress.key); + } + } + + // fulfill() might destroy the Promise returned by prepareFallback(). In + // particular, that will happen if the I/O context that the fulfiller was + // created for has been canceled or destroyed, in which case the promise + // associated with the fulfiller has been destroyed. When the promise returned + // by prepareFallback() is destroyed without having settled, it will recover + // from that, but it will lock the cache while doing so. That is why it is + // important that the cache is not already locked when we call fulfill(). + if (nextFulfiller) { + nextFulfiller->fulfill(SharedMemoryCache::prepareFallback(cache, inProgress)); + } +} + +SharedMemoryCache::Use::FallbackDoneCallback SharedMemoryCache::prepareFallback( + const SharedMemoryCache& cacheArg, InProgress& inProgress) { struct FallbackStatus { bool hasSettled = false; }; auto status = kj::heap(); auto& statusRef = *status; - auto deferredCancel = kj::defer([this, status = kj::mv(status), &inProgress]() { - // If the callback was destroyed without having run (for example, because - // it was added to an I/O context that has since been canceled), we treat - // it as if the promise had failed. + auto deferredCancel = kj::defer( + [cache = kj::atomicAddRef(cacheArg), status = kj::mv(status), &inProgress]() mutable { if (!status->hasSettled) { - handleFallbackFailure(inProgress); + SharedMemoryCache::handleFallbackFailure(*cache, inProgress); } }); - return [this, &inProgress, &status = statusRef, deferredCancel = kj::mv(deferredCancel)]( - kj::Maybe maybeResult, SpanBuilder& fallbackSpan) mutable { + return [cache = kj::atomicAddRef(cacheArg), &inProgress, &status = statusRef, + deferredCancel = kj::mv(deferredCancel)]( + kj::Maybe maybeResult, SpanBuilder& fallbackSpan) mutable { KJ_IF_SOME(result, maybeResult) { - // The fallback succeeded. Store the value in the cache and propagate it to - // all waiting requests, even if it has expired already. status.hasSettled = true; auto data = cache->data.lockExclusive(); @@ -383,45 +418,14 @@ SharedMemoryCache::Use::FallbackDoneCallback SharedMemoryCache::Use::prepareFall [&](auto&& waiter) { waiter.fulfiller->fulfill(kj::atomicAddRef(*result.value)); }); data->inProgress.eraseMatch(inProgress.key); - // Track the completion of fallback and distribution to waiters fallbackSpan.setTag("waiters_notified"_kjc, static_cast(waiterCount)); } else { - // The fallback failed for some reason. We do not care much about why it - // failed. If there are other queued fallbacks, handelFallbackFailure will - // schedule the next one. status.hasSettled = true; - handleFallbackFailure(inProgress); + SharedMemoryCache::handleFallbackFailure(*cache, inProgress); } }; } -void SharedMemoryCache::Use::handleFallbackFailure(InProgress& inProgress) const { - kj::Own> nextFulfiller; - - // If there is another queued fallback, retrieve it and remove it from the - // queue. Otherwise, just delete the queue entirely. - { - auto data = cache->data.lockExclusive(); - - KJ_IF_SOME(next, inProgress.waiting.pop()) { - nextFulfiller = kj::mv(next.fulfiller); - } else { - data->inProgress.eraseMatch(inProgress.key); - } - } - - // fulfill() might destroy the Promise returned by prepareFallback(). In - // particular, that will happen if the I/O context that the fulfiller was - // created for has been canceled or destroyed, in which case the promise - // associated with the fulfiller has been destroyed. When the promise returned - // by prepareFallback() is destroyed without having settled, it will recover - // from that, but it will lock the cache while doing so. That is why it is - // important that the cache is not already locked when we call fulfill(). - if (nextFulfiller) { - nextFulfiller->fulfill(prepareFallback(inProgress)); - } -} - void SharedMemoryCache::Use::delete_(const kj::String& key) const { auto data = cache->data.lockExclusive(); cache->removeIfExistsWhileLocked(*data, key); diff --git a/src/workerd/api/memory-cache.h b/src/workerd/api/memory-cache.h index 5be102176e0..fdbef22c1b7 100644 --- a/src/workerd/api/memory-cache.h +++ b/src/workerd/api/memory-cache.h @@ -295,6 +295,10 @@ class SharedMemoryCache: public kj::AtomicRefcounted { // Removes the cache entry with the given key, if it exists. void removeIfExistsWhileLocked(ThreadUnsafeData& data, const kj::String& key) const; + static Use::FallbackDoneCallback prepareFallback( + const SharedMemoryCache& cache, InProgress& inProgress); + static void handleFallbackFailure(const SharedMemoryCache& cache, InProgress& inProgress); + // Callbacks for a HashIndex that allow locating cache entries based on the // cache key, which is a string. This is used for all key-based cache // operations. diff --git a/src/workerd/api/messagechannel.c++ b/src/workerd/api/messagechannel.c++ index 224a69d948b..515c8510773 100644 --- a/src/workerd/api/messagechannel.c++ +++ b/src/workerd/api/messagechannel.c++ @@ -110,6 +110,13 @@ void MessagePort::postMessage(jsg::Lock& js, // If the port is closed, other will be kj::none and we will just drop the message. other->runIfAlive([&](MessagePort& o) { + // Take a strong reference to prevent GC from freeing the target port during + // serialization. Serialization can run arbitrary user code via custom getters + // on the message object. That code could close this port (which also closes + // the entangled port), and then force GC to free the target port — leaving + // the `o` reference dangling for the deliver() call below. + auto ref = o.addRef(); + jsg::Serializer ser(js); KJ_IF_SOME(d, data) { diff --git a/src/workerd/api/messagechannel.h b/src/workerd/api/messagechannel.h index 1cf8a050883..62bdf119be4 100644 --- a/src/workerd/api/messagechannel.h +++ b/src/workerd/api/messagechannel.h @@ -141,6 +141,13 @@ class MessagePort final: public EventTarget { // ports! kj::Own> other; kj::Maybe> onmessageValue; + + void visitForGc(jsg::GcVisitor& visitor) { + KJ_IF_SOME(pending, state.tryGet()) { + visitor.visitAll(pending); + } + visitor.visit(onmessageValue); + } }; // MessageChannel is simple enough... create a couple of MessagePorts @@ -169,6 +176,10 @@ class MessageChannel final: public jsg::Object { private: jsg::Ref port1; jsg::Ref port2; + + void visitForGc(jsg::GcVisitor& visitor) { + visitor.visit(port1, port2); + } }; // Module that exposes MessageChannel and MessagePort for internal use by diff --git a/src/workerd/api/node/async-hooks.h b/src/workerd/api/node/async-hooks.h index d6770bf7948..75e25afb0c1 100644 --- a/src/workerd/api/node/async-hooks.h +++ b/src/workerd/api/node/async-hooks.h @@ -108,6 +108,10 @@ class AsyncLocalStorage final: public jsg::Object { kj::Own key; kj::Maybe> defaultValue; kj::Maybe name; + + void visitForGc(jsg::GcVisitor& visitor) { + visitor.visit(defaultValue); + } }; // Note: The AsyncResource class is provided for Node.js backwards compatibility. diff --git a/src/workerd/api/node/crypto.h b/src/workerd/api/node/crypto.h index cb28ba1c97d..fd753c626dc 100644 --- a/src/workerd/api/node/crypto.h +++ b/src/workerd/api/node/crypto.h @@ -363,6 +363,10 @@ class CryptoImpl final: public jsg::Object { kj::Maybe maybeAuthInfo; bool authTagPassed = false; bool pendingAuthFailed = false; + + void visitForGc(jsg::GcVisitor& visitor) { + visitor.visit(key); + } }; /* @@ -442,6 +446,10 @@ class CryptoImpl final: public jsg::Object { kj::Maybe maybeAuthInfo; kj::Maybe> maybeAad; bool updated = false; + + void visitForGc(jsg::GcVisitor& visitor) { + visitor.visit(key); + } }; kj::OneOf, jsg::Ref> newHandle(jsg::Lock& js, diff --git a/src/workerd/api/node/diagnostics-channel.c++ b/src/workerd/api/node/diagnostics-channel.c++ index 617f93620f7..86a5e16ef71 100644 --- a/src/workerd/api/node/diagnostics-channel.c++ +++ b/src/workerd/api/node/diagnostics-channel.c++ @@ -116,6 +116,9 @@ v8::Local Channel::runStores(jsg::Lock& js, } void Channel::visitForGc(jsg::GcVisitor& visitor) { + // `name` (jsg::Name) is intentionally not visited here: jsg::Name's + // visitForGc is private and visitation happens through NameWrapper, not + // through GcVisitor::visit(). for (auto& sub: subscribers) { visitor.visit(sub.key, sub.value); } diff --git a/src/workerd/api/node/diagnostics-channel.h b/src/workerd/api/node/diagnostics-channel.h index a6efec4bbfa..3d3c0e1013d 100644 --- a/src/workerd/api/node/diagnostics-channel.h +++ b/src/workerd/api/node/diagnostics-channel.h @@ -73,7 +73,10 @@ class Channel: public jsg::Object { } }; - jsg::Name name; + // jsg::Name has a private visitForGc and is visited through NameWrapper + // rather than through the GcVisitor::visit() overload set, so we cannot + // and do not visit it from Channel::visitForGc. + jsg::Name name; // NOLINT(jsg-visit-for-gc) kj::HashMap, MessageCallback> subscribers; kj::Table> stores; diff --git a/src/workerd/api/node/tests/BUILD.bazel b/src/workerd/api/node/tests/BUILD.bazel index a2e66e18718..8963d5c8c30 100644 --- a/src/workerd/api/node/tests/BUILD.bazel +++ b/src/workerd/api/node/tests/BUILD.bazel @@ -15,6 +15,13 @@ wd_test( data = ["buffer-nodejs-test.js"], ) +wd_test( + size = "enormous", + src = "buffer-base64-large-test.wd-test", + args = ["--experimental"], + data = ["buffer-base64-large-test.js"], +) + wd_test( src = "cluster-nodejs-test.wd-test", args = ["--experimental"], @@ -222,6 +229,12 @@ wd_test( data = ["zlib-nodejs-test.js"], ) +wd_test( + src = "gc-tracing-nodejs-test.wd-test", + args = ["--experimental"], + data = ["gc-tracing-nodejs-test.js"], +) + wd_test( size = "large", src = "zlib-zstd-nodejs-test.wd-test", @@ -523,9 +536,16 @@ wd_test( "REQUEST_ARGUMENTS_PORT", "HELLO_WORLD_SERVER_PORT", "GZIP_SERVER_PORT", + "HOST_ECHO_SERVER_PORT", ], ) +wd_test( + src = "http-client-path-ssrf-test.wd-test", + args = ["--experimental"], + data = ["http-client-path-ssrf-test.js"], +) + js_binary( name = "http-server-nodejs-server", entry_point = "http-server-nodejs-server.js", diff --git a/src/workerd/api/node/tests/buffer-base64-large-test.js b/src/workerd/api/node/tests/buffer-base64-large-test.js new file mode 100644 index 00000000000..f2ca382af21 --- /dev/null +++ b/src/workerd/api/node/tests/buffer-base64-large-test.js @@ -0,0 +1,64 @@ +// Copyright (c) 2026 Cloudflare, Inc. +// Licensed under the Apache 2.0 license found in the LICENSE file or at: +// https://opensource.org/licenses/Apache-2.0 +// +// Regression test for a vulnerability where Buffer.toString('base64') on very +// large buffers caused a heap-buffer-overflow. The V8 string creation APIs take +// `int` for the length parameter, but js.str() was passing size_t values that +// could overflow int. With base64 output exceeding ~2.1GB (from input > ~1.6GB), +// the truncated length appeared negative, causing V8 to fall back to strlen() +// and read past the end of the buffer. +// +// The fix adds a check against v8::String::kMaxLength in js.str() before the +// implicit narrowing to int. This test verifies that: +// 1. Normal-sized base64 encoding still works correctly. +// 2. Buffers whose base64 output exceeds kMaxLength throw a RangeError +// instead of crashing. + +import { Buffer } from 'node:buffer'; +import { strictEqual, throws } from 'node:assert'; + +export const base64SmallBuffer = { + test() { + // Sanity check: base64 encoding works correctly at normal sizes. + const buf = Buffer.from('Hello, World!'); + strictEqual(buf.toString('base64'), 'SGVsbG8sIFdvcmxkIQ=='); + }, +}; + +export const base64urlSmallBuffer = { + test() { + // Same sanity check for base64url encoding. + const buf = Buffer.from('Hello, World!'); + strictEqual(buf.toString('base64url'), 'SGVsbG8sIFdvcmxkIQ'); + }, +}; + +export const base64LargeBufferThrowsRangeError = { + test() { + // v8::String::kMaxLength is (1 << 29) - 24 = 536,870,888 on 64-bit. + // Base64 expands by 4/3, so a buffer of 403,000,000 bytes produces + // base64 output of ~537,333,336 bytes, just over the limit. + // This must throw a RangeError, not crash. + const size = 403_000_000; + const buf = Buffer.alloc(size); + strictEqual(buf.length, size, 'Buffer allocation must have succeeded'); + throws(() => buf.toString('base64'), { + name: 'RangeError', + message: /String is too long for a V8 string/, + }); + }, +}; + +export const base64urlLargeBufferThrowsRangeError = { + test() { + // Same test for base64url encoding. + const size = 403_000_000; + const buf = Buffer.alloc(size); + strictEqual(buf.length, size, 'Buffer allocation must have succeeded'); + throws(() => buf.toString('base64url'), { + name: 'RangeError', + message: /String is too long for a V8 string/, + }); + }, +}; diff --git a/src/workerd/api/node/tests/buffer-base64-large-test.wd-test b/src/workerd/api/node/tests/buffer-base64-large-test.wd-test new file mode 100644 index 00000000000..8256071cb70 --- /dev/null +++ b/src/workerd/api/node/tests/buffer-base64-large-test.wd-test @@ -0,0 +1,14 @@ +using Workerd = import "/workerd/workerd.capnp"; + +const unitTests :Workerd.Config = ( + services = [ + ( name = "buffer-base64-large-test", + worker = ( + modules = [ + (name = "worker", esModule = embed "buffer-base64-large-test.js") + ], + compatibilityFlags = ["nodejs_compat_v2"], + ) + ), + ], +); diff --git a/src/workerd/api/node/tests/crypto_keys-test.js b/src/workerd/api/node/tests/crypto_keys-test.js index 0772b5c2b47..cc682989238 100644 --- a/src/workerd/api/node/tests/crypto_keys-test.js +++ b/src/workerd/api/node/tests/crypto_keys-test.js @@ -2191,3 +2191,59 @@ export const export_encrypted_ec_private_key = { ); }, }; + +// Regression test for AUTOVULN-CLOUDFLARE-WORKERD-30: +// createSecretKey() must correctly copy DataView key material. +// Previously, Buffer.from(dataView) produced an empty Buffer because +// DataView has no .length property, causing the key material to be lost. +export const regression_create_secret_key_dataview = { + test() { + // Create a 16-byte ArrayBuffer filled with 0x41 ('A') + const ab = new ArrayBuffer(16); + new Uint8Array(ab).fill(0x41); + + // Create a secret key from a DataView over the full buffer + const dvFull = new DataView(ab); + const keyFull = createSecretKey(dvFull); + const exported = keyFull.export(); + + // The exported key must contain exactly the 16 bytes, not be empty + strictEqual(exported.length, 16, 'DataView key material must not be empty'); + strictEqual( + exported.toString(), + 'A'.repeat(16), + 'DataView key material must match the original bytes' + ); + strictEqual(keyFull.symmetricKeySize, 16); + + // Also verify a DataView over a sub-range of the buffer works correctly + const abLarge = new ArrayBuffer(32); + const u8 = new Uint8Array(abLarge); + u8.fill(0x42); // fill with 'B' + u8.fill(0x43, 8, 16); // bytes 8..15 = 'C' + + const dvSlice = new DataView(abLarge, 8, 8); + const keySlice = createSecretKey(dvSlice); + const exportedSlice = keySlice.export(); + + strictEqual( + exportedSlice.length, + 8, + 'DataView sub-range key must have correct length' + ); + strictEqual( + exportedSlice.toString(), + 'C'.repeat(8), + 'DataView sub-range key must contain the correct bytes' + ); + strictEqual(keySlice.symmetricKeySize, 8); + + // Verify the DataView-created key matches a Uint8Array-created key + // over the same bytes + const keyFromUint8 = createSecretKey(Buffer.from('A'.repeat(16))); + ok( + keyFull.equals(keyFromUint8), + 'DataView key must equal Buffer key with same bytes' + ); + }, +}; diff --git a/src/workerd/api/node/tests/gc-tracing-nodejs-test.js b/src/workerd/api/node/tests/gc-tracing-nodejs-test.js new file mode 100644 index 00000000000..b0b6acb601e --- /dev/null +++ b/src/workerd/api/node/tests/gc-tracing-nodejs-test.js @@ -0,0 +1,104 @@ +// Copyright (c) 2026 Cloudflare, Inc. +// Licensed under the Apache 2.0 license found in the LICENSE file or at: +// https://opensource.org/licenses/Apache-2.0 + +import { ok } from 'node:assert'; +import { + inflateSync, + deflateSync, + brotliCompressSync, + brotliDecompressSync, + createInflate, +} from 'node:zlib'; + +const COMPRESSED_DEFLATE = deflateSync(new Uint8Array(1024)); + +async function awaitGc() { + // Multiple GC passes with yields between them; gives the cycle collector + // room to reclaim and avoids the conservative stack scanner pinning the + // most recent allocation. scheduler.wait is a Workers-platform extension. + for (let i = 0; i < 4; i++) { + await scheduler.wait(0); + globalThis.gc(); + } +} + +// Performing the allocation loop inside a separate function ensures the +// caller's stack frame doesn't keep the last allocated engine rooted +// (V8's conservative stack scanner can otherwise pin the most recent +// value via a register/spill slot). +function collectRefs(fn) { + const refs = []; + for (let i = 0; i < 256; i++) { + const r = fn(); + ok(r.engine, 'engine should be present on info result'); + refs.push(new WeakRef(r.engine)); + } + return refs; +} + +async function expectAllCollected(refs, label) { + await awaitGc(); + let alive = 0; + for (const ref of refs) { + if (ref.deref() !== undefined) alive++; + } + // Allow at most a single straggler. V8's conservative stack scanner + // can keep the most recently allocated object rooted via a stale + // register/spill slot for one extra cycle. The leak we are testing + // for is uncollectable cycles, which would leave all of them alive. + ok( + alive <= 1, + `expected ${label} engines to be collected, ${alive} of ${refs.length} still alive` + ); +} + +// Regression tests for a memory leak that affected the slow path of the sync +// zlib convenience methods (i.e. `{ info: true }`). Each call constructed a +// JSG-bound CompressionStream wrapper that held a `jsg::Function` writeCallback +// capturing the JS handle, forming an uncollectable JS<->C++ cycle. The fix +// adds visitForGc() to CompressionStream so V8 can trace through the C++->JS +// edge and collect the cycle. +// +// We verify the fix by holding WeakRefs to the engines returned by `info: true` +// and asserting they are reclaimed after a GC. Without visitForGc tracing the +// cycle is immortal and the WeakRefs would still resolve. + +export const inflateSyncInfoCollects = { + async test() { + const refs = collectRefs(() => + inflateSync(COMPRESSED_DEFLATE, { info: true }) + ); + await expectAllCollected(refs, 'inflate'); + }, +}; + +export const deflateSyncInfoCollects = { + async test() { + const input = new Uint8Array(1024); + const refs = collectRefs(() => deflateSync(input, { info: true })); + await expectAllCollected(refs, 'deflate'); + }, +}; + +export const brotliSyncInfoCollects = { + async test() { + const input = new Uint8Array(1024); + const compressed = brotliCompressSync(input); + const refs = collectRefs(() => + brotliDecompressSync(compressed, { info: true }) + ); + await expectAllCollected(refs, 'brotli'); + }, +}; + +// Specifically exercises the visitForGc path: createInflate() attaches both +// writeCallback and errorHandler, forming the JS<->C++ cycle. Dropping the +// reference without end()/destroy()/close() bypasses the eager-clear in +// close() and leaves only the GC visitor to break the cycle. +export const createInflateAbandonedCollects = { + async test() { + const refs = collectRefs(() => ({ engine: createInflate() })); + await expectAllCollected(refs, 'createInflate-abandoned'); + }, +}; diff --git a/src/workerd/api/node/tests/gc-tracing-nodejs-test.wd-test b/src/workerd/api/node/tests/gc-tracing-nodejs-test.wd-test new file mode 100644 index 00000000000..7d1879fccb7 --- /dev/null +++ b/src/workerd/api/node/tests/gc-tracing-nodejs-test.wd-test @@ -0,0 +1,15 @@ +using Workerd = import "/workerd/workerd.capnp"; + +const unitTests :Workerd.Config = ( + v8Flags = ["--expose-gc"], + services = [ + ( name = "gc-tracing-nodejs-test", + worker = ( + modules = [ + (name = "worker", esModule = embed "gc-tracing-nodejs-test.js") + ], + compatibilityFlags = ["experimental", "nodejs_compat", "nodejs_compat_v2", "nodejs_zlib", "enable_weak_ref"], + ) + ), + ], +); diff --git a/src/workerd/api/node/tests/http-client-nodejs-server.js b/src/workerd/api/node/tests/http-client-nodejs-server.js index 06423cbedfc..ab18a10915a 100644 --- a/src/workerd/api/node/tests/http-client-nodejs-server.js +++ b/src/workerd/api/node/tests/http-client-nodejs-server.js @@ -132,3 +132,15 @@ const gzipServer = http.createServer((_req, res) => { }); listenTo(gzipServer, process.env.GZIP_SERVER_PORT); + +// Echoes back the Host header the sidecar received, so the test can verify +// that a user-supplied Host header does not redirect the transport destination. +const hostEchoServer = http.createServer((req, res) => { + req.resume(); + req.on('end', () => { + res.writeHead(200, { 'Content-Type': 'text/plain' }); + res.end(req.headers.host || ''); + }); +}); + +listenTo(hostEchoServer, process.env.HOST_ECHO_SERVER_PORT); diff --git a/src/workerd/api/node/tests/http-client-nodejs-test.js b/src/workerd/api/node/tests/http-client-nodejs-test.js index c16efd241c5..fcbdf16cd5b 100644 --- a/src/workerd/api/node/tests/http-client-nodejs-test.js +++ b/src/workerd/api/node/tests/http-client-nodejs-test.js @@ -18,6 +18,7 @@ export const checkPortsSetCorrectly = { 'REQUEST_ARGUMENTS_PORT', 'HELLO_WORLD_SERVER_PORT', 'GZIP_SERVER_PORT', + 'HOST_ECHO_SERVER_PORT', ]; for (const key of keys) { strictEqual(typeof env[key], 'string'); @@ -563,6 +564,53 @@ export const testHttpClientGzipResponseNotAutoDecompressed = { }, }; +// Regression test for AUTOVULN-CLOUDFLARE-WORKERD-15: a user-supplied Host header +// must NOT override the transport destination (options.hostname). The fetch URL +// authority must always come from options.hostname/options.host, matching Node.js +// semantics where Host is an HTTP header, not a routing directive. +export const testHostHeaderDoesNotOverrideTransportDestination = { + async test(_ctrl, env) { + const { promise, resolve, reject } = Promise.withResolvers(); + const attackerHost = '169.254.169.254'; + http + .get( + { + hostname: env.SIDECAR_HOSTNAME, + port: env.HOST_ECHO_SERVER_PORT, + path: '/safe-endpoint', + headers: { Host: attackerHost }, + }, + (res) => { + let body = ''; + res.on('data', (chunk) => (body += chunk)); + res.on('end', () => { + try { + // The request must have reached the sidecar (not 169.254.169.254). + // If the Host header were used as the URL authority (the bug), + // the fetch would go to 169.254.169.254 and either fail or + // return a non-200 response from a different server. + strictEqual(res.statusCode, 200); + // The sidecar echoes back the Host header it received. Since + // fetch() derives the Host header from the URL (which now uses + // this.host, the transport destination), the echoed value will + // contain the sidecar's address, NOT the attacker-supplied value. + ok( + !body.includes(attackerHost), + `Host header must not contain the attacker-supplied value ` + + `"${attackerHost}"; got "${body}"` + ); + resolve(); + } catch (err) { + reject(err); + } + }); + } + ) + .on('error', reject); + await promise; + }, +}; + // Relevant Node.js tests // - [ ] test/parallel/test-http-client-abort-destroy.js // - [ ] test/parallel/test-http-client-abort-event.js diff --git a/src/workerd/api/node/tests/http-client-nodejs-test.wd-test b/src/workerd/api/node/tests/http-client-nodejs-test.wd-test index a6bb541ed4e..38dcda2c290 100644 --- a/src/workerd/api/node/tests/http-client-nodejs-test.wd-test +++ b/src/workerd/api/node/tests/http-client-nodejs-test.wd-test @@ -16,6 +16,7 @@ const unitTests :Workerd.Config = ( (name = "REQUEST_ARGUMENTS_PORT", fromEnvironment = "REQUEST_ARGUMENTS_PORT"), (name = "HELLO_WORLD_SERVER_PORT", fromEnvironment = "HELLO_WORLD_SERVER_PORT"), (name = "GZIP_SERVER_PORT", fromEnvironment = "GZIP_SERVER_PORT"), + (name = "HOST_ECHO_SERVER_PORT", fromEnvironment = "HOST_ECHO_SERVER_PORT"), ], ) ), diff --git a/src/workerd/api/node/tests/http-client-path-ssrf-test.js b/src/workerd/api/node/tests/http-client-path-ssrf-test.js new file mode 100644 index 00000000000..262e92d8475 --- /dev/null +++ b/src/workerd/api/node/tests/http-client-path-ssrf-test.js @@ -0,0 +1,190 @@ +// Copyright (c) 2017-2022 Cloudflare, Inc. +// Licensed under the Apache 2.0 license found in the LICENSE file or at: +// https://opensource.org/licenses/Apache-2.0 + +// Regression test for AUTOVULN-CLOUDFLARE-WORKERD-16: +// Verify that node:http ClientRequest rejects absolute-form and network-path +// request targets in options.path, preventing SSRF via URL authority override. + +import http from 'node:http'; +import { throws } from 'node:assert'; + +// Absolute URLs in options.path must be rejected. +// Without the fix, new URL('http://evil.test/x', baseUrl) in #onFinish +// would replace the configured host with evil.test. +export const testRejectsAbsoluteUrlPath = { + test() { + throws( + () => { + http.request({ + hostname: 'api.example.test', + port: 80, + path: 'http://evil.test/steal', + }); + }, + { + code: 'ERR_INVALID_ARG_VALUE', + }, + 'http.request must reject absolute URL in options.path' + ); + }, +}; + +// HTTPS absolute URLs must also be rejected. +export const testRejectsHttpsAbsoluteUrlPath = { + test() { + throws( + () => { + http.request({ + hostname: 'api.example.test', + port: 80, + path: 'https://evil.test/steal', + }); + }, + { + code: 'ERR_INVALID_ARG_VALUE', + }, + 'http.request must reject https:// absolute URL in options.path' + ); + }, +}; + +// Network-path references (//host/path) must be rejected. +// Without the fix, new URL('//evil.test/x', baseUrl) in #onFinish +// would replace the configured host with evil.test. +export const testRejectsNetworkPathReference = { + test() { + throws( + () => { + http.request({ + hostname: 'api.example.test', + port: 80, + path: '//evil.test/steal', + }); + }, + { + code: 'ERR_INVALID_ARG_VALUE', + }, + 'http.request must reject network-path reference in options.path' + ); + }, +}; + +// Cloud metadata SSRF vector must be rejected. +export const testRejectsMetadataNetworkPath = { + test() { + throws( + () => { + http.request({ + hostname: 'api.example.test', + port: 80, + path: '//169.254.169.254/latest/meta-data/', + }); + }, + { + code: 'ERR_INVALID_ARG_VALUE', + }, + 'http.request must reject metadata endpoint network-path reference' + ); + }, +}; + +// Backslash variants: the WHATWG URL spec normalises \ to / for special +// schemes, which would turn these into authority-overriding forms. Our +// validation uses the same URL parser (ada-url) that later constructs the +// fetch URL, so there are two safe outcomes: +// (a) the parser normalises \ → / and our host check catches it, OR +// (b) the parser does NOT normalise \, so it stays in the path and +// cannot override authority. +// Either way the fetch must never reach the attacker host. We verify +// this by checking that the URL parser resolves these against the +// configured host without authority override. +export const testBackslashPathsCannotOverrideAuthority = { + test() { + const backslashPaths = [ + '\\\\evil.test/x', + '\\/evil.test/x', + '/\\evil.test/x', + ]; + for (const path of backslashPaths) { + // If the parser normalises \ to /, our check rejects it (throws). + // If it doesn't normalise, the path is safe. Either way, verify + // that the URL used for the fetch would never reach evil.test. + let rejected = false; + try { + const req = http.request({ + hostname: 'api.example.test', + port: 80, + path, + }); + req.destroy(); + } catch (e) { + if (e.code === 'ERR_INVALID_ARG_VALUE') { + rejected = true; + } else { + throw e; + } + } + + if (!rejected) { + // The request was allowed — verify the URL parser keeps the + // configured host (i.e. backslash was NOT normalised to /). + const resolved = new URL(path, 'http://api.example.test/'); + if (resolved.host !== 'api.example.test') { + throw new Error( + `Backslash path "${path}" was allowed but URL parser resolved ` + + `host to "${resolved.host}" — authority override!` + ); + } + } + } + }, +}; + +// Defense-in-depth: mutating req.path after construction must not bypass +// the SSRF guard. The #onFinish check should catch it and destroy the +// request with an error. +export const testRejectsPathMutationAfterConstruction = { + async test() { + const req = http.request({ + hostname: 'api.example.test', + port: 80, + path: '/safe', + }); + // Mutate the public field to an authority-overriding value. + req.path = '//evil.test/steal'; + + const error = await new Promise((resolve) => { + req.on('error', resolve); + req.end(); + }); + + if (error.code !== 'ERR_INVALID_ARG_VALUE') { + throw new Error( + `Expected ERR_INVALID_ARG_VALUE but got ${error.code}: ${error.message}` + ); + } + }, +}; + +// Normal relative paths must still work. +export const testAllowsNormalPaths = { + test() { + // These should NOT throw — they are valid path-only request targets. + const req1 = http.request({ hostname: 'example.test', path: '/foo/bar' }); + req1.destroy(); + + const req2 = http.request({ hostname: 'example.test', path: '/foo?q=1' }); + req2.destroy(); + + const req3 = http.request({ hostname: 'example.test', path: '/' }); + req3.destroy(); + + const req4 = http.request({ hostname: 'example.test', path: '/foo#hash' }); + req4.destroy(); + + // Path with encoded characters should work. + const req5 = http.request({ hostname: 'example.test', path: '/foo%20bar' }); + req5.destroy(); + }, +}; diff --git a/src/workerd/api/node/tests/http-client-path-ssrf-test.wd-test b/src/workerd/api/node/tests/http-client-path-ssrf-test.wd-test new file mode 100644 index 00000000000..7da22738ac5 --- /dev/null +++ b/src/workerd/api/node/tests/http-client-path-ssrf-test.wd-test @@ -0,0 +1,14 @@ +using Workerd = import "/workerd/workerd.capnp"; + +const unitTests :Workerd.Config = ( + services = [ + ( name = "http-client-path-ssrf-test", + worker = ( + modules = [ + (name = "worker", esModule = embed "http-client-path-ssrf-test.js") + ], + compatibilityFlags = ["nodejs_compat", "nodejs_compat_v2", "experimental", "enable_nodejs_http_modules"], + ) + ), + ], +); diff --git a/src/workerd/api/node/util.h b/src/workerd/api/node/util.h index 68de5d02be8..794d184b25e 100644 --- a/src/workerd/api/node/util.h +++ b/src/workerd/api/node/util.h @@ -113,6 +113,10 @@ class MIMEType final: public jsg::Object { private: workerd::MimeType inner; jsg::Ref params; + + void visitForGc(jsg::GcVisitor& visitor) { + visitor.visit(params); + } }; #define JS_UTIL_IS_TYPES(V) \ diff --git a/src/workerd/api/node/zlib-util.c++ b/src/workerd/api/node/zlib-util.c++ index 26316394b00..c757a639a6c 100644 --- a/src/workerd/api/node/zlib-util.c++ +++ b/src/workerd/api/node/zlib-util.c++ @@ -537,6 +537,11 @@ void ZlibUtil::CompressionStream::close() { } closed = true; JSG_ASSERT(initialized, Error, "Closing before initialized"_kj); + // Drop JS-heap refs eagerly so callers that explicitly close don't have to + // wait for the cycle collector. visitForGc handles the unclosed case. + writeCallback = kj::none; + writeResult = kj::none; + errorHandler = kj::none; // Context is closed on the destructor of the CompressionContext. } diff --git a/src/workerd/api/node/zlib-util.h b/src/workerd/api/node/zlib-util.h index 35121558a60..9248368e7a5 100644 --- a/src/workerd/api/node/zlib-util.h +++ b/src/workerd/api/node/zlib-util.h @@ -447,6 +447,13 @@ class ZlibUtil final: public jsg::Object { JSG_METHOD(setErrorHandler); } + // writeCallback and errorHandler typically capture `this`'s JS wrapper + // (see internal_zlib_base.ts), forming a JS<->C++ cycle that V8 can only + // collect with this tracing. + void visitForGc(jsg::GcVisitor& visitor) { + visitor.visit(writeCallback, writeResult, errorHandler); + } + protected: CompressionContext* context() { return &context_; diff --git a/src/workerd/api/performance.h b/src/workerd/api/performance.h index 8b0fbc5174d..763cc8c7005 100644 --- a/src/workerd/api/performance.h +++ b/src/workerd/api/performance.h @@ -120,6 +120,10 @@ class PerformanceMark: public PerformanceEntry { private: jsg::Optional> detail; + + void visitForGc(jsg::GcVisitor& visitor) { + visitor.visit(detail); + } }; // UvMetricsInfo represents libuv event loop metrics. @@ -249,6 +253,10 @@ class PerformanceMeasure: public PerformanceEntry { private: jsg::Optional> detail; + + void visitForGc(jsg::GcVisitor& visitor) { + visitor.visit(detail); + } }; class PerformanceResourceTiming: public PerformanceEntry { @@ -619,6 +627,10 @@ class Performance: public EventTarget { private: const IsolateLimitEnforcer& isolateLimitEnforcer; kj::Vector> entries; + + void visitForGc(jsg::GcVisitor& visitor) { + visitor.visitAll(entries); + } }; #define EW_PERFORMANCE_ISOLATE_TYPES \ diff --git a/src/workerd/api/r2-bucket.h b/src/workerd/api/r2-bucket.h index 3712ba7eb86..6c0fecb80d2 100644 --- a/src/workerd/api/r2-bucket.h +++ b/src/workerd/api/r2-bucket.h @@ -349,6 +349,11 @@ class R2Bucket: public jsg::Object { jsg::Optional range; kj::String storageClass; jsg::Optional ssecKeyMd5; + + void visitForGc(jsg::GcVisitor& visitor) { + visitor.visit(checksums); + } + friend class R2Bucket; }; @@ -415,6 +420,10 @@ class R2Bucket: public jsg::Object { private: jsg::Ref body; + + void visitForGc(jsg::GcVisitor& visitor) { + visitor.visit(body); + } }; struct ListResult { diff --git a/src/workerd/api/sql.c++ b/src/workerd/api/sql.c++ index d30ca4be6f4..602ece8621c 100644 --- a/src/workerd/api/sql.c++ +++ b/src/workerd/api/sql.c++ @@ -82,9 +82,8 @@ jsg::Ref SqlStorage::exec( // // In theory we could try to cache multiple copies of the statement, but as this is probably // exceedingly rare, it is not worth the added code complexity. - SqliteDatabase::Regulator& regulator = *this; - return js.alloc( - js, kj::mv(doneCallback), db, regulator, js.toString(querySql), kj::mv(bindings)); + return js.alloc(js, kj::mv(doneCallback), db, + SqliteDatabase::StaticRegulator(regulator), js.toString(querySql), kj::mv(bindings)); } auto result = js.alloc(js, kj::mv(doneCallback), slot.addRef(), kj::mv(bindings)); @@ -104,7 +103,6 @@ jsg::Ref SqlStorage::exec( SqlStorage::IngestResult SqlStorage::ingest(jsg::Lock& js, kj::String querySql) { auto& context = IoContext::current(); TraceContext traceContext = context.makeUserTraceSpan("durable_object_storage_ingest"_kjc); - SqliteDatabase::Regulator& regulator = *this; auto result = getDb(js).ingestSql(regulator, querySql); traceContext.setTag( @@ -142,7 +140,7 @@ double SqlStorage::getDatabaseSize(jsg::Lock& js) { return dbSize; } -bool SqlStorage::isAllowedName(kj::StringPtr name) const { +bool SqlStorageRegulator::isAllowedName(kj::StringPtr name) const { if (util::Autogate::isEnabled(util::AutogateKey::SQL_RESTRICT_RESERVED_NAMES)) { return strncasecmp(name.begin(), "_cf_", 4) != 0; } @@ -152,15 +150,15 @@ bool SqlStorage::isAllowedName(kj::StringPtr name) const { return !name.startsWith("_cf_"); } -bool SqlStorage::isAllowedTrigger(kj::StringPtr name) const { +bool SqlStorageRegulator::isAllowedTrigger(kj::StringPtr name) const { return true; } -void SqlStorage::onError(kj::Maybe sqliteErrorCode, kj::StringPtr message) const { +void SqlStorageRegulator::onError(kj::Maybe sqliteErrorCode, kj::StringPtr message) const { JSG_ASSERT(false, Error, message); } -bool SqlStorage::allowTransactions() const { +bool SqlStorageRegulator::allowTransactions() const { JSG_FAIL_REQUIRE(Error, "To execute a transaction, please use the state.storage.transaction() or " "state.storage.transactionSync() APIs instead of the SQL BEGIN TRANSACTION or SAVEPOINT " @@ -169,7 +167,7 @@ bool SqlStorage::allowTransactions() const { "write coalescing."); } -bool SqlStorage::shouldAddQueryStats() const { +bool SqlStorageRegulator::shouldAddQueryStats() const { // Bill for queries executed from JavaScript. return true; } @@ -200,7 +198,7 @@ jsg::JsValue SqlStorage::wrapSqlValue(jsg::Lock& js, SqlValue value) { } SqlStorage::Cursor::State::State(SqliteDatabase& db, - SqliteDatabase::Regulator& regulator, + SqliteDatabase::StaticRegulator regulator, kj::StringPtr sqlCode, kj::Array bindingsParam) : bindings(kj::mv(bindingsParam)), diff --git a/src/workerd/api/sql.h b/src/workerd/api/sql.h index eae97b2104e..edd4df524c8 100644 --- a/src/workerd/api/sql.h +++ b/src/workerd/api/sql.h @@ -12,7 +12,16 @@ namespace workerd::api { -class SqlStorage final: public jsg::Object, private SqliteDatabase::Regulator { +class SqlStorageRegulator: public SqliteDatabase::Regulator { + public: + bool isAllowedName(kj::StringPtr name) const override; + bool isAllowedTrigger(kj::StringPtr name) const override; + void onError(kj::Maybe sqliteErrorCode, kj::StringPtr message) const override; + bool allowTransactions() const override; + bool shouldAddQueryStats() const override; +}; + +class SqlStorage final: public jsg::Object { public: SqlStorage(jsg::Ref storage); ~SqlStorage(); @@ -68,11 +77,7 @@ class SqlStorage final: public jsg::Object, private SqliteDatabase::Regulator { visitor.visit(storage); } - bool isAllowedName(kj::StringPtr name) const override; - bool isAllowedTrigger(kj::StringPtr name) const override; - void onError(kj::Maybe sqliteErrorCode, kj::StringPtr message) const override; - bool allowTransactions() const override; - bool shouldAddQueryStats() const override; + static constexpr SqlStorageRegulator regulator; SqliteDatabase& getDb(jsg::Lock& js) { return storage->getSqliteDb(js); @@ -99,7 +104,7 @@ class SqlStorage final: public jsg::Object, private SqliteDatabase::Regulator { kj::String kjQuery) : query(js.v8Isolate, jsQuery), statementSize(kjQuery.size()), - statement(db.prepareMulti(sqlStorage, kj::mv(kjQuery))) {} + statement(db.prepareMulti(regulator, kj::mv(kjQuery))) {} }; class StatementCacheCallbacks { @@ -250,7 +255,7 @@ class SqlStorage::Cursor final: public jsg::Object { SqliteDatabase::Query query; State(SqliteDatabase& db, - SqliteDatabase::Regulator& regulator, + SqliteDatabase::StaticRegulator regulator, kj::StringPtr sqlCode, kj::Array bindings); @@ -333,6 +338,10 @@ class SqlStorage::Statement final: public jsg::Object { jsg::Ref sqlStorage; jsg::V8Ref query; + void visitForGc(jsg::GcVisitor& visitor) { + visitor.visit(sqlStorage, query); + } + friend class Cursor; }; diff --git a/src/workerd/api/streams/identity-transform-stream.c++ b/src/workerd/api/streams/identity-transform-stream.c++ index 115c2621eaf..a5b31ed7ef5 100644 --- a/src/workerd/api/streams/identity-transform-stream.c++ +++ b/src/workerd/api/streams/identity-transform-stream.c++ @@ -29,6 +29,8 @@ struct ReadRequest { struct WriteRequest { static constexpr kj::StringPtr NAME KJ_UNUSED = "write-request"_kj; kj::ArrayPtr bytes; + // WARNING: `bytes` may be invalid if fulfiller->isWaiting() returns false! (This indicates the + // write was canceled, e.g. via removeSink() destroying the Canceler.) kj::Own> fulfiller; }; @@ -225,6 +227,15 @@ class IdentityTransformStreamImpl final: public kj::Refcounted, // Check for pending write request. KJ_IF_SOME(request, state.tryGetUnsafe()) { + if (!request.fulfiller->isWaiting()) { + // The write was canceled (e.g. removeSink() destroyed the Canceler during RPC + // serialization). The non-owning `bytes` pointer is now dangling — we must not + // dereference it. Transition to a disconnected error state and retry, mirroring + // the analogous guard in writeHelper() for canceled ReadRequests. + state.forceTransitionTo(KJ_EXCEPTION(DISCONNECTED, "writer canceled")); + return readHelper(bytes); + } + if (bytes.size() >= request.bytes.size()) { // The write buffer will entirely fit into our read buffer; fulfill both requests. memmove(bytes.begin(), request.bytes.begin(), request.bytes.size()); diff --git a/src/workerd/api/streams/internal.c++ b/src/workerd/api/streams/internal.c++ index cd65aa69645..82a355d20b2 100644 --- a/src/workerd/api/streams/internal.c++ +++ b/src/workerd/api/streams/internal.c++ @@ -2172,6 +2172,7 @@ void WritableStreamInternalController::visitForGc(jsg::GcVisitor& visitor) { KJ_IF_SOME(pendingAbort, maybePendingAbort) { visitor.visit(*pendingAbort); } + visitor.visit(maybeClosureWaitable); } void ReadableStreamInternalController::visitForGc(jsg::GcVisitor& visitor) { diff --git a/src/workerd/api/streams/queue.c++ b/src/workerd/api/streams/queue.c++ index 35d03535f8c..6423537ed04 100644 --- a/src/workerd/api/streams/queue.c++ +++ b/src/workerd/api/streams/queue.c++ @@ -383,8 +383,11 @@ size_t ValueQueue::size() const { return impl.size(); } -void ValueQueue::handlePush( - jsg::Lock& js, ConsumerImpl::Ready& state, kj::Maybe queue, kj::Rc entry) { +void ValueQueue::handlePush(jsg::Lock& js, + ConsumerImpl::Ready& state, + ConsumerImpl& consumer, + kj::Maybe queue, + kj::Rc entry) { // If there are no pending reads, just add the entry to the buffer and return, adjusting // the size of the queue in the process. if (state.readRequests.empty()) { @@ -915,10 +918,16 @@ bool ByteQueue::ByobRequest::respond(jsg::Lock& js, size_t amount) { // It is possible that the request was partially filled already. req.pullInto.filled -= unaligned; + // resolveRead calls request->resolve(js) which can synchronously run user + // JavaScript via V8's promise resolution thenable check (Get(resolution, "then")). + // A malicious Object.prototype.then getter can call controller.error() or + // reader.cancel(), which may destroy the ConsumerImpl. We hold a weak ref + // to detect this before accessing consumer again. + auto weak = consumer.selfRef.addRef(); // Fulfill this request! consumer.resolveRead(js, req); - if (unaligned > 0) { + if (unaligned > 0 && weak->isValid() && consumer.state.isActive()) { auto start = sourcePtr.slice(amount - unaligned); KJ_IF_SOME(store, jsg::BufferSource::tryAllocUnsafe(js, unaligned)) { @@ -1044,6 +1053,7 @@ size_t ByteQueue::size() const { void ByteQueue::handlePush(jsg::Lock& js, ConsumerImpl::Ready& state, + ConsumerImpl& consumer, kj::Maybe queue, kj::Rc newEntry) { const auto bufferData = [&](size_t offset) { @@ -1068,6 +1078,13 @@ void ByteQueue::handlePush(jsg::Lock& js, auto amountAvailable = state.queueTotalSize + entrySize; size_t entryOffset = 0; + // request->resolve(js) below can synchronously run user JavaScript via V8's + // promise resolution thenable check (Get(resolution, "then")). A malicious + // Object.prototype.then getter can call controller.error(), which transitions + // the ConsumerImpl from Ready to Errored, freeing the Ready storage that + // `state` references. We hold a weak ref to detect this and bail out. + auto weak = consumer.selfRef.addRef(); + while (!state.readRequests.empty() && amountAvailable > 0) { auto& pending = *state.readRequests.front(); @@ -1077,7 +1094,7 @@ void ByteQueue::handlePush(jsg::Lock& js, // is enough data. if (amountAvailable < pending.pullInto.atLeast) { - return bufferData(0); + return bufferData(entryOffset); } // There might be at least some data in the buffer. If there is, it should @@ -1173,6 +1190,16 @@ void ByteQueue::handlePush(jsg::Lock& js, auto request = kj::mv(state.readRequests.front()); state.readRequests.pop_front(); request->resolve(js); + + // resolve(js) can synchronously run user JavaScript via V8's promise resolution + // thenable check. A malicious Object.prototype.then getter can call + // controller.error() or reader.cancel(), which destroys the ConsumerImpl and + // frees the Ready storage that `state` references. We must check liveness + // before continuing the loop. + if (!weak->isValid()) return; + // Also verify the consumer is still in the Ready state — the re-entrant JS + // may have transitioned it to Errored/Closed without fully destroying it. + if (!consumer.state.isActive()) return; } // If the entry was consumed completely by the pending read, then we're done! @@ -1342,6 +1369,13 @@ bool ByteQueue::handleMaybeClose(jsg::Lock& js, // We should also only be here if the consumer is closing. KJ_ASSERT(consumer.isClosing()); + // request->resolve(js) below can synchronously run user JavaScript via V8's + // promise resolution thenable check (Get(resolution, "then")). A malicious + // Object.prototype.then getter can call reader.cancel(), which frees the + // ConsumerImpl that owns `state` while this frame still holds raw references. + // Hold a weak ref so we can detect that and bail out. + auto weak = consumer.selfRef.addRef(); + const auto consume = [&] { // Consume will copy as much of the remaining data in the buffer as possible // to the next pending read. If the remaining data can fit into the remaining @@ -1367,6 +1401,8 @@ bool ByteQueue::handleMaybeClose(jsg::Lock& js, auto request = kj::mv(state.readRequests.front()); state.readRequests.pop_front(); request->resolve(js); + // resolve(js) may have freed the consumer via re-entrant JS. + // Return true; caller must check liveness before touching consumer. return true; } KJ_CASE_ONEOF(entry, QueueEntry) { @@ -1419,6 +1455,10 @@ bool ByteQueue::handleMaybeClose(jsg::Lock& js, state.readRequests.pop_front(); request->resolve(js); + // resolve(js) may have freed the consumer via re-entrant JS. + // Check liveness before accessing state. + if (!weak->isValid()) return true; + if (state.queueTotalSize == 0) { // If the queueTotalSize is zero at this point, the next item in the queue // must be a close and we can return true. All of the data has been consumed. @@ -1453,6 +1493,8 @@ bool ByteQueue::handleMaybeClose(jsg::Lock& js, auto request = kj::mv(state.readRequests.front()); state.readRequests.pop_front(); request->resolve(js); + // resolve(js) may have freed the consumer via re-entrant JS. + // Return false; caller must check liveness before continuing. return false; } } @@ -1462,21 +1504,29 @@ bool ByteQueue::handleMaybeClose(jsg::Lock& js, }; // We can only consume here if there are pending reads! - while (!state.readRequests.empty()) { + while (weak->isValid() && !state.readRequests.empty()) { // We ignore the read request atLeast here since we are closing. Our goal is to // consume as much of the data as possible. if (consume()) { // If consume returns true, we reached the end and have no more data to // consume. That's a good thing! It means we can go ahead and close down. + // consume() may also return true when the consumer was freed by re-entrant + // JS — caller must check liveness. return true; } + // consume() may have freed the consumer via re-entrant JS. + if (!weak->isValid()) return true; + // If consume() returns false, there is still data left to consume in the queue. // We will loop around and try again so long as there are still read requests // pending. } + // The consumer may have been freed during the loop above. + if (!weak->isValid()) return true; + // At this point, we shouldn't have any read requests and there should be data // left in the queue. We have to keep waiting for more reads to consume the // remaining data. diff --git a/src/workerd/api/streams/queue.h b/src/workerd/api/streams/queue.h index dbd0d4f10e7..0f79efb7e70 100644 --- a/src/workerd/api/streams/queue.h +++ b/src/workerd/api/streams/queue.h @@ -449,7 +449,7 @@ class ConsumerImpl final { } UpdateBackpressureScope scope(*this); - Self::handlePush(js, ready, queue, kj::mv(entry)); + Self::handlePush(js, ready, *this, queue, kj::mv(entry)); } } @@ -467,8 +467,15 @@ class ConsumerImpl final { js.v8Isolate, js.typeError("Cannot call read while there is a pending draining read"_kj)); return request.reject(js, error); } + // handleRead may trigger the pull callback (via onConsumerWantsData), which + // may synchronously call reader.cancel(). Cancel can destroy this ConsumerImpl + // (ByteReadable::cancel sets state = kj::none). We must guard the subsequent + // maybeDrainAndSetState call against use-after-free by taking a weak ref before + // handleRead and checking if we're still alive after it returns. + auto weak = selfRef.addRef(); Self::handleRead(js, ready, *this, queue, kj::mv(request)); - return maybeDrainAndSetState(js); + // Both read() and maybeDrainAndSetState() are void — no return value is lost. + weak->runIfAlive([&](ConsumerImpl& self) { self.maybeDrainAndSetState(js); }); } void reset() { @@ -676,7 +683,17 @@ class ConsumerImpl final { } else { // Otherwise, if isClosing() is true... if (isClosing()) { + // handleMaybeClose calls request->resolve(js) which can synchronously + // run user JavaScript via V8's promise resolution thenable check + // (Get(resolution, "then")). A malicious Object.prototype.then getter + // can call reader.cancel(), which frees *this (the ConsumerImpl) while + // handleMaybeClose / this frame still hold raw Ready& / ConsumerImpl& + // references. We must take a selfRef before calling handleMaybeClose + // and check liveness after it returns. + auto weak = selfRef.addRef(); if (!empty() && !Self::handleMaybeClose(js, ready, *this, queue)) { + // handleMaybeClose may have freed *this via re-entrant JS. + if (!weak->isValid()) return; // If the queue is not empty, we'll have the implementation see // if it can drain the remaining data into pending reads. If handleMaybeClose // returns false, then it could not and we can't yet close. If it returns true, @@ -685,13 +702,16 @@ class ConsumerImpl final { return; } + // handleMaybeClose may have freed *this via re-entrant JS during + // request->resolve(js). Re-check before touching any members. + if (!weak->isValid()) return; + KJ_ASSERT(empty()); KJ_REQUIRE(ready.buffer.size() == 1); // The close should be the only item remaining. // Extract pending reads and resolve them as done. Same GC safety concern // as the error path above — see detailed comment there. auto pendingReads = extractPendingReads(ready); - auto weak = selfRef.addRef(); for (auto& request: pendingReads) { request->resolveAsDone(js); } @@ -855,8 +875,11 @@ class ValueQueue final { private: QueueImpl impl; - static void handlePush( - jsg::Lock& js, ConsumerImpl::Ready& state, kj::Maybe queue, kj::Rc entry); + static void handlePush(jsg::Lock& js, + ConsumerImpl::Ready& state, + ConsumerImpl& consumer, + kj::Maybe queue, + kj::Rc entry); static void handleRead(jsg::Lock& js, ConsumerImpl::Ready& state, ConsumerImpl& consumer, @@ -999,7 +1022,11 @@ class ByteQueue final { } private: - jsg::BufferSource store; + // Intentionally not visited by visitForGc: Entry is not reachable from JS; + // it is owned via kj::Rc (C++ refcount), so the BufferSource cannot be + // part of a JS→C++→JS reference cycle and a strong v8::Global suffices + // to keep it alive. See queue.c++:562 for the empty visitForGc body. + jsg::BufferSource store; // NOLINT(jsg-visit-for-gc) }; struct QueueEntry { @@ -1103,8 +1130,11 @@ class ByteQueue final { private: QueueImpl impl; - static void handlePush( - jsg::Lock& js, ConsumerImpl::Ready& state, kj::Maybe queue, kj::Rc entry); + static void handlePush(jsg::Lock& js, + ConsumerImpl::Ready& state, + ConsumerImpl& consumer, + kj::Maybe queue, + kj::Rc entry); static void handleRead(jsg::Lock& js, ConsumerImpl::Ready& state, ConsumerImpl& consumer, diff --git a/src/workerd/api/streams/readable.c++ b/src/workerd/api/streams/readable.c++ index 774aed242fa..aa965262fe4 100644 --- a/src/workerd/api/streams/readable.c++ +++ b/src/workerd/api/streams/readable.c++ @@ -630,93 +630,6 @@ jsg::Optional ByteLengthQueuingStrategy::size( namespace { -// TODO(cleanup): These classes have been copied to external-pusher.c++. The copies here can be -// deleted as soon as we've switched from StreamSink to ExternalPusher and can delete all the -// StreamSink-related code. For now I'm not trying to avoid duplication. - -// HACK: We need as async pipe, like kj::newOneWayPipe(), except supporting explicit end(). So we -// wrap the two ends of the pipe in special adapters that track whether end() was called. -class ExplicitEndOutputPipeAdapter final: public capnp::ExplicitEndOutputStream { - public: - ExplicitEndOutputPipeAdapter( - kj::Own inner, kj::Own> ended) - : inner(kj::mv(inner)), - ended(kj::mv(ended)) {} - - kj::Promise write(kj::ArrayPtr buffer) override { - return KJ_REQUIRE_NONNULL(inner)->write(buffer); - } - kj::Promise write(kj::ArrayPtr> pieces) override { - return KJ_REQUIRE_NONNULL(inner)->write(pieces); - } - - kj::Maybe> tryPumpFrom( - kj::AsyncInputStream& input, uint64_t amount) override { - return KJ_REQUIRE_NONNULL(inner)->tryPumpFrom(input, amount); - } - - kj::Promise whenWriteDisconnected() override { - return KJ_REQUIRE_NONNULL(inner)->whenWriteDisconnected(); - } - - kj::Promise end() override { - // Signal to the other side that end() was actually called. - ended->getWrapped() = true; - inner = kj::none; - return kj::READY_NOW; - } - - private: - kj::Maybe> inner; - kj::Own> ended; -}; - -class ExplicitEndInputPipeAdapter final: public kj::AsyncInputStream { - public: - ExplicitEndInputPipeAdapter(kj::Own inner, - kj::Own> ended, - kj::Maybe expectedLength) - : inner(kj::mv(inner)), - ended(kj::mv(ended)), - expectedLength(expectedLength) {} - - kj::Promise tryRead(void* buffer, size_t minBytes, size_t maxBytes) override { - size_t result = co_await inner->tryRead(buffer, minBytes, maxBytes); - - KJ_IF_SOME(l, expectedLength) { - KJ_ASSERT(result <= l); - l -= result; - if (l == 0) { - // If we got all the bytes we expected, we treat this as a successful end, because the - // underlying KJ pipe is not actually going to wait for the other side to drop. This is - // consistent with the behavior of Content-Length in HTTP anyway. - ended->getWrapped() = true; - } - } - - if (result < minBytes) { - // Verify that end() was called. - if (!ended->getWrapped()) { - JSG_FAIL_REQUIRE(Error, "ReadableStream received over RPC disconnected prematurely."); - } - } - co_return result; - } - - kj::Maybe tryGetLength() override { - return inner->tryGetLength(); - } - - kj::Promise pumpTo(kj::AsyncOutputStream& output, uint64_t amount) override { - return inner->pumpTo(output, amount); - } - - private: - kj::Own inner; - kj::Own> ended; - kj::Maybe expectedLength; -}; - // Wrapper around ReadableStreamSource that prevents deferred proxying. We need this for RPC // streams because although they are "system streams", they become disconnected when the IoContext // is destroyed, due to the JsRpcCustomEvent being canceled. @@ -792,32 +705,20 @@ void ReadableStream::serialize(jsg::Lock& js, jsg::Serializer& serializer) { auto expectedLength = controller.tryGetLength(encoding); capnp::ByteStream::Client streamCap = [&]() { - KJ_IF_SOME(pusher, externalHandler->getExternalPusher()) { - auto req = pusher.pushByteStreamRequest(capnp::MessageSize{2, 0}); - KJ_IF_SOME(el, expectedLength) { - req.setLengthPlusOne(el + 1); - } - auto pipeline = req.sendForPipeline(); + auto req = externalHandler->getExternalPusher().pushByteStreamRequest(capnp::MessageSize{2, 0}); + KJ_IF_SOME(el, expectedLength) { + req.setLengthPlusOne(el + 1); + } + auto pipeline = req.sendForPipeline(); - externalHandler->write([encoding, expectedLength, source = pipeline.getSource()]( - rpc::JsValue::External::Builder builder) mutable { - auto rs = builder.initReadableStream(); - rs.setStream(kj::mv(source)); - rs.setEncoding(encoding); - }); + externalHandler->write([encoding, expectedLength, source = pipeline.getSource()]( + rpc::JsValue::External::Builder builder) mutable { + auto rs = builder.initReadableStream(); + rs.setStream(kj::mv(source)); + rs.setEncoding(encoding); + }); - return pipeline.getSink(); - } else { - return externalHandler - ->writeStream( - [encoding, expectedLength](rpc::JsValue::External::Builder builder) mutable { - auto rs = builder.initReadableStream(); - rs.setEncoding(encoding); - KJ_IF_SOME(l, expectedLength) { - rs.getExpectedLength().setKnown(l); - } - }).castAs(); - } + return pipeline.getSink(); }(); kj::Own kjStream = @@ -852,26 +753,7 @@ jsg::Ref ReadableStream::deserialize( auto& ioctx = IoContext::current(); - kj::Own in; - if (rs.hasStream()) { - in = - ioctx.getExternalPusher()->unwrapStream(rs.getStream(), externalHandler->getDebugContext()); - } else { - kj::Maybe expectedLength; - auto el = rs.getExpectedLength(); - if (el.isKnown()) { - expectedLength = el.getKnown(); - } - - auto pipe = kj::newOneWayPipe(expectedLength); - - auto endedFlag = kj::refcounted>(false); - - auto out = kj::heap(kj::mv(pipe.out), kj::addRef(*endedFlag)); - in = kj::heap(kj::mv(pipe.in), kj::mv(endedFlag), expectedLength); - - externalHandler->setLastStream(ioctx.getByteStreamFactory().kjToCapnp(kj::mv(out))); - } + kj::Own in = ioctx.getExternalPusher()->unwrapStream(rs.getStream()); return js.alloc(ioctx, kj::heap(newSystemStream(kj::mv(in), encoding, ioctx), ioctx)); diff --git a/src/workerd/api/streams/standard.c++ b/src/workerd/api/streams/standard.c++ index 2a4b4b5e136..a3154877059 100644 --- a/src/workerd/api/streams/standard.c++ +++ b/src/workerd/api/streams/standard.c++ @@ -1847,11 +1847,12 @@ struct ValueReadable final: private api::ValueQueue::ConsumerImpl::StateListener KJ_IF_SOME(s, state) { auto prp = js.newPromiseAndResolver(); reading = true; + KJ_DEFER(reading = false); s.consumer->read(js, ValueQueue::ReadRequest{ .resolver = kj::mv(prp.resolver), }); - reading = false; + // reading is reset by KJ_DEFER above. if (pendingCancel) { // If we were canceled while reading, we need to drop our state now. state = kj::none; @@ -1998,6 +1999,7 @@ struct ByteReadable final: private api::ByteQueue::ConsumerImpl::StateListener { using State = ReadableState; kj::Maybe state; kj::Maybe autoAllocateChunkSize; + bool reading = false; bool pendingCancel = false; JSG_MEMORY_INFO(ByteReadable) { @@ -2045,6 +2047,13 @@ struct ByteReadable final: private api::ByteQueue::ConsumerImpl::StateListener { KJ_IF_SOME(s, state) { auto prp = js.newPromiseAndResolver(); + // Set reading = true to prevent cancel() from destroying the consumer + // while we're in the middle of a synchronous read operation. The pull() + // callback triggered by consumer->read() may call reader.cancel(), which + // would otherwise immediately set state = kj::none and free the consumer. + // KJ_DEFER ensures the flag is cleared even if an operation throws. + reading = true; + KJ_DEFER(reading = false); KJ_IF_SOME(byob, byobOptions) { jsg::BufferSource source(js, byob.bufferView.getHandle(js)); // If atLeast is not given, then by default it is the element size of the view @@ -2090,6 +2099,12 @@ struct ByteReadable final: private api::ByteQueue::ConsumerImpl::StateListener { prp.resolver.reject(js, js.v8Error("Failed to allocate buffer for read.")); } } + // reading is reset by KJ_DEFER above. + if (pendingCancel) { + // If we were canceled while reading, we need to drop our state now. + state = kj::none; + pendingCancel = false; + } return kj::mv(prp.promise); } @@ -2141,11 +2156,12 @@ struct ByteReadable final: private api::ByteQueue::ConsumerImpl::StateListener { bool hasPendingDrainingRead = s.consumer->hasPendingDrainingRead(); s.consumer->cancel(js, maybeReason); auto promise = s.controller->cancel(js, kj::mv(maybeReason)); - // If there's a pending draining read, we need to wait for it to finish before - // dropping our state. The draining read's promise callbacks capture 'this' (the - // Consumer) to clear hasPendingDrainingRead. If we destroy the state now, those - // callbacks will UAF. - if (hasPendingDrainingRead) { + // If we're currently in a read (sync or draining), we need to wait for that to + // finish before dropping our state. For sync reads, consumer->read() is still on + // the call stack and will access the consumer after we return. For draining reads, + // the promise callbacks capture 'this' (the Consumer) to clear hasPendingDrainingRead. + // In either case, destroying state now would UAF. + if (reading || hasPendingDrainingRead) { pendingCancel = true; } else { state = kj::none; @@ -3253,6 +3269,9 @@ class AllReader { void visitForGc(jsg::GcVisitor& visitor) { state.visitForGc(visitor); + for (auto& part: parts) { + visitor.visit(part); + } } private: diff --git a/src/workerd/api/streams/writable.c++ b/src/workerd/api/streams/writable.c++ index d0d8eaa4d7b..22e9849501d 100644 --- a/src/workerd/api/streams/writable.c++ +++ b/src/workerd/api/streams/writable.c++ @@ -168,7 +168,7 @@ void WritableStreamDefaultWriter::visitForGc(jsg::GcVisitor& visitor) { KJ_IF_SOME(attached, state.tryGetActiveUnsafe()) { visitor.visit(attached.stream); } - visitor.visit(closedPromise, readyPromise); + visitor.visit(closedPromise, readyPromise, readyPromisePending); } // ====================================================================================== diff --git a/src/workerd/api/sync-kv.h b/src/workerd/api/sync-kv.h index 3c82bd7d681..38a86131ff4 100644 --- a/src/workerd/api/sync-kv.h +++ b/src/workerd/api/sync-kv.h @@ -59,6 +59,10 @@ class SyncKvStorage: public jsg::Object { private: jsg::Ref storage; + void visitForGc(jsg::GcVisitor& visitor) { + visitor.visit(storage); + } + SqliteKv& getSqliteKv(jsg::Lock& js) { return storage->getSqliteKv(js); } diff --git a/src/workerd/api/tests/BUILD.bazel b/src/workerd/api/tests/BUILD.bazel index 69f3177f816..8a2ee6dfb03 100644 --- a/src/workerd/api/tests/BUILD.bazel +++ b/src/workerd/api/tests/BUILD.bazel @@ -2,6 +2,24 @@ load("@aspect_rules_js//js:defs.bzl", "js_binary") load("@rules_shell//shell:sh_test.bzl", "sh_test") load("//:build/wd_test.bzl", "wd_test") +wd_test( + src = "messageport-postmessage-uaf-test.wd-test", + args = ["--experimental"], + data = ["messageport-postmessage-uaf-test.js"], +) + +wd_test( + src = "streams-byte-cancel-uaf-test.wd-test", + args = ["--experimental"], + data = ["streams-byte-cancel-uaf-test.js"], +) + +wd_test( + src = "streams-byte-handlePush-uaf-test.wd-test", + args = ["--experimental"], + data = ["streams-byte-handlePush-uaf-test.js"], +) + wd_test( src = "structuredclone-error-serialize-test.wd-test", args = ["--experimental"], @@ -46,6 +64,14 @@ wd_test( tags = ["resources:socket:1"], ) +# Regression test for AUTOVULN-CLOUDFLARE-WORKERD-334: connect() handler must neuter the +# NeuterableIoStream when the handler promise resolves, preventing use-after-free. +wd_test( + src = "connect-neuter-test.wd-test", + args = ["--experimental"], + data = ["connect-neuter-test.js"], +) + wd_test( src = "actor-alarms-test.wd-test", args = ["--experimental"], @@ -505,6 +531,20 @@ wd_test( predictable = False, ) +wd_test( + src = "streams-byob-close-reentry-test.wd-test", + args = ["--experimental"], + data = ["streams-byob-close-reentry-test.js"], + # This test exercises a SIGSEGV regression (EDGEWORKER-RUNTIME-H40) by + # forcing reentrant controller.error() during the ReadableStream close + # drain. Predictable mode triggers an extra GC pass after every worker + # entrypoint in KJ_DEBUG builds (see maybeAddGcPassForTest), which + # surfaces a separate latent GC-traceability issue in the stream's + # state machine. Opt out of predictable mode here so this test only + # covers the regression it was written for. + predictable = False, +) + wd_test( src = "pipe-streams-test.wd-test", args = ["--experimental"], @@ -559,6 +599,12 @@ wd_test( data = ["streams-byob-edge-cases-test.js"], ) +wd_test( + src = "streams-byob-concurrent-readatleast-test.wd-test", + args = ["--experimental"], + data = ["streams-byob-concurrent-readatleast-test.js"], +) + wd_test( src = "streams-tee-edge-cases-test.wd-test", args = ["--experimental"], @@ -819,6 +865,18 @@ wd_test( tags = ["requires-network"], ) +wd_test( + src = "worker-loader-unnamed-gc-test.wd-test", + args = ["--experimental"], + data = ["worker-loader-unnamed-gc-test.js"], +) + +wd_test( + src = "worker-loader-rab-test.wd-test", + args = ["--experimental"], + data = ["worker-loader-rab-test.js"], +) + wd_test( src = "leak-fetch-test.wd-test", args = ["--experimental"], @@ -892,12 +950,24 @@ wd_test( data = ["headers-immutable-prototype-test.js"], ) +wd_test( + src = "r2-write-http-metadata-validation-test.wd-test", + args = ["--experimental"], + data = ["r2-write-http-metadata-validation-test.js"], +) + wd_test( src = "identity-transform-stream-state-machine-test.wd-test", args = ["--experimental"], data = ["identity-transform-stream-state-machine-test.js"], ) +wd_test( + src = "identity-transform-stream-uaf-test.wd-test", + args = ["--experimental"], + data = ["identity-transform-stream-uaf-test.js"], +) + wd_test( src = "response-used-body-test.wd-test", args = ["--experimental"], diff --git a/src/workerd/api/tests/connect-neuter-test.js b/src/workerd/api/tests/connect-neuter-test.js new file mode 100644 index 00000000000..a9f79a409de --- /dev/null +++ b/src/workerd/api/tests/connect-neuter-test.js @@ -0,0 +1,52 @@ +// Copyright (c) 2026 Cloudflare, Inc. +// Licensed under the Apache 2.0 license found in the LICENSE file or at: +// https://opensource.org/licenses/Apache-2.0 +// +// Regression test for AUTOVULN-CLOUDFLARE-WORKERD-334: +// connect() handler must neuter the NeuterableIoStream when +// the handler promise resolves, preventing use-after-free. + +import { strictEqual, rejects } from 'assert'; + +let writeRejected = false; + +export default { + async connect(socket, env, ctx) { + const writer = socket.writable.getWriter(); + + ctx.waitUntil( + (async () => { + // Allow connect to return before attempting the write. This should result in the stream + // being neutered. + await scheduler.wait(0); + + await rejects( + async () => await writer.write(new Uint8Array([0x41, 0x42])), + { + name: 'TypeError', + message: + "Can't read from request stream because client disconnected.", + } + ); + + writeRejected = true; + })() + ); + + return; + }, +}; + +export const connectNeuterRegression = { + async test(ctrl, env) { + const socket = env.SELF.connect('example.com:1234'); + + // The destination will close the socket when its `connect` returns. + await socket.closed; + + // Give time for the late-write to be attempted. + await scheduler.wait(10); + + strictEqual(writeRejected, true, 'write must throw on a neutered stream'); + }, +}; diff --git a/src/workerd/api/tests/connect-neuter-test.wd-test b/src/workerd/api/tests/connect-neuter-test.wd-test new file mode 100644 index 00000000000..cbb217e45e4 --- /dev/null +++ b/src/workerd/api/tests/connect-neuter-test.wd-test @@ -0,0 +1,17 @@ +using Workerd = import "/workerd/workerd.capnp"; + +const unitTests :Workerd.Config = ( + services = [ + ( name = "connect-neuter-test", + worker = ( + modules = [ + (name = "worker", esModule = embed "connect-neuter-test.js"), + ], + compatibilityFlags = ["nodejs_compat_v2", "experimental"], + bindings = [ + (name = "SELF", service = "connect-neuter-test"), + ], + ) + ), + ], +); diff --git a/src/workerd/api/tests/identity-transform-stream-uaf-test.js b/src/workerd/api/tests/identity-transform-stream-uaf-test.js new file mode 100644 index 00000000000..1bd702c9878 --- /dev/null +++ b/src/workerd/api/tests/identity-transform-stream-uaf-test.js @@ -0,0 +1,71 @@ +// Copyright (c) 2026 Cloudflare, Inc. +// Licensed under the Apache 2.0 license found in the LICENSE file or at: +// https://opensource.org/licenses/Apache-2.0 + +// Regression test for AUTOVULN-CLOUDFLARE-WORKERD-156: +// Heap use-after-free read in readHelper(). +// +// When a write is pending on an IdentityTransformStream and the +// write promise is canceled (via Canceler destruction in +// removeSink(), or via AbortSignal-triggered pipeTo cancellation), +// the WriteRequest's non-owning bytes pointer becomes dangling. +// readHelper() must detect that the write fulfiller is no longer +// waiting and transition to an error state instead of dereferencing +// the dangling pointer. +// +// This test uses a CompressionStream piped into an +// IdentityTransformStream with an AbortSignal to trigger the +// cancellation path. Post-fix, the read must reject with a +// disconnected error. Pre-fix, the read would succeed by reading +// from freed memory (a heap-use-after-free detectable under ASAN). + +import { strictEqual, rejects } from 'node:assert'; + +export const regressionWriteCancelThenRead = { + async test() { + const its = new IdentityTransformStream(); + const reader = its.readable.getReader({ mode: 'byob' }); + + const cs = new CompressionStream('gzip'); + const csWriter = cs.writable.getWriter(); + const ac = new AbortController(); + + // Pipe compressed output into the identity transform stream. + // preventAbort:true means the abort won't call sink->abort(), + // leaving the WriteRequest with a canceled fulfiller. + const pipePromise = cs.readable + .pipeTo(its.writable, { + signal: ac.signal, + preventAbort: true, + }) + .catch(() => {}); + + // Write data to generate output that parks a WriteRequest in + // the IdentityTransformStreamImpl. + const SIZE = 65536; + await csWriter.write(new Uint8Array(SIZE).fill(0x41)); + + // Let the compressed data flow through. + await scheduler.wait(10); + + // Abort the pipe. This cancels the pumpTo coroutine via + // kj::Canceler, freeing the coroutine frame that backs the + // WriteRequest.bytes pointer. + ac.abort(); + await scheduler.wait(10); + + // Post-fix, readHelper() checks fulfiller->isWaiting() and + // transitions to DISCONNECTED, causing the read to reject. + // Pre-fix, readHelper() would memmove from freed memory. + await rejects( + reader.read(new Uint8Array(SIZE)), + (err) => { + strictEqual(err instanceof Error, true); + return true; + }, + 'read() should reject after write cancellation (UAF guard)' + ); + + await pipePromise; + }, +}; diff --git a/src/workerd/api/tests/identity-transform-stream-uaf-test.wd-test b/src/workerd/api/tests/identity-transform-stream-uaf-test.wd-test new file mode 100644 index 00000000000..f938201f2f1 --- /dev/null +++ b/src/workerd/api/tests/identity-transform-stream-uaf-test.wd-test @@ -0,0 +1,14 @@ +using Workerd = import "/workerd/workerd.capnp"; + +const unitTests :Workerd.Config = ( + services = [ + ( name = "identity-transform-stream-uaf-test", + worker = ( + modules = [ + (name = "worker", esModule = embed "identity-transform-stream-uaf-test.js") + ], + compatibilityFlags = ["nodejs_compat"], + ) + ), + ], +); diff --git a/src/workerd/api/tests/messageport-postmessage-uaf-test.js b/src/workerd/api/tests/messageport-postmessage-uaf-test.js new file mode 100644 index 00000000000..a3a9be51938 --- /dev/null +++ b/src/workerd/api/tests/messageport-postmessage-uaf-test.js @@ -0,0 +1,30 @@ +// Copyright (c) 2026 Cloudflare, Inc. +// Licensed under the Apache 2.0 license found in the LICENSE file or at: +// https://opensource.org/licenses/Apache-2.0 + +// Regression test for heap-use-after-free in MessagePort.postMessage(). +// A custom getter during serialization can close + GC the target port, +// leaving a dangling reference inside the runIfAlive lambda. +export const closeAndGcDuringPostMessage = { + test() { + let port1; + (() => { + const { port1: p1, port2: _p2 } = new MessageChannel(); + port1 = p1; + // port2 (_p2) goes out of scope here — only reachable via port1's weak ref. + })(); + + const maliciousObject = {}; + Object.defineProperty(maliciousObject, 'value', { + get() { + port1.close(); + for (let i = 0; i < 50; i++) gc(); + return 42; + }, + enumerable: true, + }); + + // Should not crash even though the getter closes the port and forces GC. + port1.postMessage(maliciousObject); + }, +}; diff --git a/src/workerd/api/tests/messageport-postmessage-uaf-test.wd-test b/src/workerd/api/tests/messageport-postmessage-uaf-test.wd-test new file mode 100644 index 00000000000..b1d8a2b94fc --- /dev/null +++ b/src/workerd/api/tests/messageport-postmessage-uaf-test.wd-test @@ -0,0 +1,14 @@ +using Workerd = import "/workerd/workerd.capnp"; + +const unitTests :Workerd.Config = ( + v8Flags = ["--expose-gc"], + services = [( + name = "messageport-postmessage-uaf-test", + worker = ( + modules = [ + (name = "worker", esModule = embed "messageport-postmessage-uaf-test.js"), + ], + compatibilityFlags = ["nodejs_compat_v2", "expose_global_message_channel"], + ), + )], +); diff --git a/src/workerd/api/tests/r2-write-http-metadata-validation-test.js b/src/workerd/api/tests/r2-write-http-metadata-validation-test.js new file mode 100644 index 00000000000..629697bcc28 --- /dev/null +++ b/src/workerd/api/tests/r2-write-http-metadata-validation-test.js @@ -0,0 +1,170 @@ +// Copyright (c) 2026 Cloudflare, Inc. +// Licensed under the Apache 2.0 license found in the LICENSE file or at: +// https://opensource.org/licenses/Apache-2.0 + +// Regression test for AUTOVULN-CLOUDFLARE-WORKERD-21: +// R2 writeHttpMetadata must reject metadata values containing invalid header +// bytes (NUL, CR, LF) instead of silently inserting them into the Headers +// object via the unvalidated Headers::setCommon path. + +import assert from 'node:assert'; + +const objResponse = { + name: 'test-key', + version: 'objectVersion', + size: '7', + etag: 'objectEtag', + uploaded: '1724767257918', + storageClass: 'Standard', +}; + +// The httpFields that the mock R2 backend returns — includes a CRLF injection +// in contentType. This simulates an attacker who stored malicious metadata. +const maliciousHttpFields = { + contentType: 'text/plain\r\nX-Injected: yes', +}; + +const nulHttpFields = { + contentDisposition: 'attachment; filename="evil\x00.txt"', +}; + +const validHttpFields = { + contentType: 'text/html', + cacheControl: 'no-store', +}; + +function buildGetResponse(httpFields) { + const encoder = new TextEncoder(); + const meta = { + ...objResponse, + httpFields, + }; + const metadata = encoder.encode(JSON.stringify(meta)); + const body = encoder.encode('payload'); + const responseBody = new ReadableStream({ + start(controller) { + controller.enqueue(metadata); + controller.enqueue(body); + controller.close(); + }, + }); + return new Response(responseBody, { + headers: { + 'cf-r2-metadata-size': metadata.length.toString(), + 'content-length': (metadata.length + body.length).toString(), + }, + }); +} + +// Track which httpFields were stored per object name +const storedHttpFields = {}; + +export default { + // Mock R2 backend: handles the HTTP requests that the R2 bucket binding makes + async fetch(request) { + assert(['GET', 'PUT'].includes(request.method)); + + if (request.method === 'PUT') { + const metadataSizeString = request.headers.get('cf-r2-metadata-size'); + assert.notStrictEqual(metadataSizeString, null); + + const metadataSize = parseInt(metadataSizeString); + assert(!Number.isNaN(metadataSize)); + + const reader = request.body.getReader({ mode: 'byob' }); + const jsonArray = new Uint8Array(metadataSize); + const { value } = await reader.readAtLeast(metadataSize, jsonArray); + reader.releaseLock(); + + const jsonRequest = JSON.parse(new TextDecoder().decode(value)); + + // Consume remaining body + for await (const _ of request.body) { + // intentionally empty + } + + // Store the httpFields for later retrieval + storedHttpFields[jsonRequest.object] = jsonRequest.httpFields || {}; + + return Response.json({ + ...objResponse, + name: jsonRequest.object, + httpFields: jsonRequest.httpFields, + }); + } + + if (request.method === 'GET') { + // GET requests carry the R2 request metadata in a header, not the body + const rawHeader = request.headers.get('cf-r2-request'); + const jsonRequest = JSON.parse(rawHeader); + + // Return the stored httpFields for the requested object + const httpFields = storedHttpFields[jsonRequest.object] || {}; + + return buildGetResponse(httpFields); + } + + return new Response('Not found', { status: 404 }); + }, +}; + +export const writeHttpMetadataValidation = { + async test(ctrl, env) { + // 1. Store an R2 object with a contentType containing CRLF (header injection payload). + await env.BUCKET.put('crlf-test', 'payload', { + httpMetadata: maliciousHttpFields, + }); + + const obj = await env.BUCKET.get('crlf-test'); + assert.ok(obj !== null, 'R2 object should exist'); + + // After the fix, writeHttpMetadata must throw a TypeError because the + // stored contentType value contains \r\n which fails header value validation. + const headers = new Headers(); + assert.throws( + () => obj.writeHttpMetadata(headers), + (err) => { + assert.ok( + err instanceof TypeError, + `Expected TypeError, got ${err.constructor.name}` + ); + return true; + }, + 'writeHttpMetadata should throw TypeError for CRLF in metadata value' + ); + + // 2. Also test NUL byte in contentDisposition + await env.BUCKET.put('nul-test', 'payload', { + httpMetadata: nulHttpFields, + }); + + const obj2 = await env.BUCKET.get('nul-test'); + assert.ok(obj2 !== null, 'R2 object should exist'); + + const headers2 = new Headers(); + assert.throws( + () => obj2.writeHttpMetadata(headers2), + (err) => { + assert.ok( + err instanceof TypeError, + `Expected TypeError, got ${err.constructor.name}` + ); + return true; + }, + 'writeHttpMetadata should throw TypeError for NUL in metadata value' + ); + + // 3. Verify that valid metadata still works correctly + await env.BUCKET.put('valid-test', 'payload', { + httpMetadata: validHttpFields, + }); + + const obj3 = await env.BUCKET.get('valid-test'); + assert.ok(obj3 !== null, 'R2 object should exist'); + + const headers3 = new Headers(); + obj3.writeHttpMetadata(headers3); + assert.strictEqual(headers3.get('content-type'), 'text/html'); + assert.strictEqual(headers3.get('cache-control'), 'no-store'); + }, +}; diff --git a/src/workerd/api/tests/r2-write-http-metadata-validation-test.wd-test b/src/workerd/api/tests/r2-write-http-metadata-validation-test.wd-test new file mode 100644 index 00000000000..6f0c30f01fa --- /dev/null +++ b/src/workerd/api/tests/r2-write-http-metadata-validation-test.wd-test @@ -0,0 +1,17 @@ +using Workerd = import "/workerd/workerd.capnp"; + +const unitTests :Workerd.Config = ( + services = [ + ( name = "r2-write-http-metadata-validation-test", + worker = ( + modules = [ + (name = "worker", esModule = embed "r2-write-http-metadata-validation-test.js") + ], + bindings = [ + ( name = "BUCKET", r2Bucket = "r2-write-http-metadata-validation-test" ), + ], + compatibilityFlags = ["nodejs_compat", "streams_enable_constructors"], + ) + ), + ], +); diff --git a/src/workerd/api/tests/sql-test-tail.js b/src/workerd/api/tests/sql-test-tail.js index 3d1a3edf021..323441315d6 100644 --- a/src/workerd/api/tests/sql-test-tail.js +++ b/src/workerd/api/tests/sql-test-tail.js @@ -22,13 +22,13 @@ export const test = { return acc; }, {}); assert.deepStrictEqual(reduced, { - durable_object_storage_exec: 268, + durable_object_storage_exec: 269, durable_object_storage_ingest: 1030, durable_object_storage_getDatabaseSize: 3, durable_object_storage_put: 18, durable_object_storage_get: 18, durable_object_storage_transaction: 8, - durable_object_subrequest: 47, + durable_object_subrequest: 48, durable_object_storage_deleteAll: 1, createStringTable: 4, runActorFunc: 4, @@ -37,6 +37,7 @@ export const test = { testMultiStatement: 1, testRollbackKvInit: 1, testRollbackAlarmInit: 1, + testCursorUaf: 1, durable_object_storage_setAlarm: 2, durable_object_storage_getAlarm: 1, testSessionsAPIBookmark: 20, diff --git a/src/workerd/api/tests/sql-test.js b/src/workerd/api/tests/sql-test.js index 398894d840b..8c4fffdd7b0 100644 --- a/src/workerd/api/tests/sql-test.js +++ b/src/workerd/api/tests/sql-test.js @@ -1442,6 +1442,38 @@ export class DurableObjectExample extends DurableObject { async runActorFunc(name) { return actorFuncs[name](this.state); } + + // Regression test for SQL cursor use-after-free (VULN-130998). + // If GC collects the SqlStorage handle while a cursor is still live, consuming + // the cursor dereferences a dangling Regulator& — a UAF that ASAN detects. + async testCursorUaf() { + const storage = this.state.storage; + let sql = storage.sql; + + let cursor = sql.exec(` + SELECT 1 AS value + UNION ALL SELECT 2 AS value + UNION ALL SELECT 3 AS value + `); + + // JSG_LAZY_INSTANCE_PROPERTY stores `sql` as a writable own property after first access. + // Replacing it drops the parent-side JS root; the cursor itself does not visit SqlStorage. + storage.sql = null; + sql = null; + + for (let i = 0; i < 64; i++) { + gc(); + const junk = []; + for (let j = 0; j < 1024; j++) junk.push({ i, j, data: 'x'.repeat(64) }); + await scheduler.wait(0); + } + + // Consuming the cursor to completion destroys Cursor::State and SqliteDatabase::Query. + // If SqlStorage was collected, we want to make sure that the below still works without tripping + // an ASan use-after-free. + const rows = cursor.toArray(); + assert.deepEqual(rows, [{ value: 1 }, { value: 2 }, { value: 3 }]); + } } export default { @@ -1750,3 +1782,10 @@ actorFuncs.doCriticalErrorOnTransactionRollback = async (state) => { }); }, /^Error: database or disk is full: SQLITE_FULL/); }; + +export let testCursorUaf = { + async test(ctrl, env, ctx) { + let stub = env.ns.get(env.ns.idFromName('cursor-uaf-test')); + await stub.testCursorUaf(); + }, +}; diff --git a/src/workerd/api/tests/streams-byob-close-reentry-test.js b/src/workerd/api/tests/streams-byob-close-reentry-test.js new file mode 100644 index 00000000000..537e58214bd --- /dev/null +++ b/src/workerd/api/tests/streams-byob-close-reentry-test.js @@ -0,0 +1,95 @@ +// Copyright (c) 2026 Cloudflare, Inc. +// Licensed under the Apache 2.0 license found in the LICENSE file or at: +// https://opensource.org/licenses/Apache-2.0 + +// Regression test for AUTOVULN-CLOUDFLARE-WORKERD-198: Heap UAF in +// ByteQueue ConsumerImpl via Object.prototype.then re-entrancy during +// controller.close(). +// +// When a byte ReadableStream with a pending BYOB read ({min: N}) has +// buffered data (fewer than min bytes) and controller.close() is called, +// ByteQueue::handleMaybeClose() flushes the buffered bytes into the +// pending BYOB view and calls request->resolve(js). V8's promise +// resolution performs Get(resolution, "then"), which invokes an +// attacker-installed Object.prototype.then getter. Inside the getter, +// reader.cancel() frees the ByteQueue::Consumer while +// ConsumerImpl::maybeDrainAndSetState() is still on the stack. +// +// The fix adds selfRef.addRef() liveness guards around handleMaybeClose +// and after each request->resolve(js) call inside it. + +import { strictEqual } from 'node:assert'; + +export const byobCloseReentryViaThen = { + async test() { + let controller; + const rs = new ReadableStream({ + type: 'bytes', + start(c) { + controller = c; + }, + }); + + const reader = rs.getReader({ mode: 'byob' }); + + // Issue a BYOB read with min:10 into a 10-byte buffer. + // Pending read sits in ConsumerImpl::Ready::readRequests. + reader.read(new Uint8Array(10), { + min: 10, + }); + + // Enqueue 5 bytes — fewer than min, so the read stays pending. + controller.enqueue(new Uint8Array([1, 2, 3, 4, 5])); + + let armed = true; + const noopThen = function (resolve, reject) { + /* never settle — prevents further thenable chaining */ + }; + + // Install a trap on Object.prototype.then. When V8 resolves + // the pending read inside handleMaybeClose, it checks for a + // "then" property on the ReadResult wrapper. Our getter calls + // reader.cancel() to free the ConsumerImpl while + // handleMaybeClose / maybeDrainAndSetState still hold raw + // references to it. + Object.defineProperty(Object.prototype, 'then', { + configurable: true, + get() { + if (armed) { + armed = false; + try { + reader.cancel(); + } catch { + // cancel may throw — that's fine + } + return noopThen; + } + return undefined; + }, + }); + + try { + // controller.close() enters the close path: + // ReadableByteStreamController::close → + // ReadableImpl::close → ByteQueue::close → + // QueueImpl::close → ConsumerImpl::close → + // maybeDrainAndSetState → handleMaybeClose + // handleMaybeClose flushes the 5 buffered bytes into the + // BYOB view and calls request->resolve(js), triggering + // the then getter. + controller.close(); + } catch { + // close may throw due to re-entrant cancel — expected + } + + armed = false; + delete Object.prototype.then; + + // If we get here without SIGSEGV / UAF, the fix works. + strictEqual( + true, + true, + 'survived re-entrant cancel during BYOB close drain' + ); + }, +}; diff --git a/src/workerd/api/tests/streams-byob-close-reentry-test.wd-test b/src/workerd/api/tests/streams-byob-close-reentry-test.wd-test new file mode 100644 index 00000000000..904d5f2ed2d --- /dev/null +++ b/src/workerd/api/tests/streams-byob-close-reentry-test.wd-test @@ -0,0 +1,17 @@ +using Workerd = import "/workerd/workerd.capnp"; + +const unitTests :Workerd.Config = ( + services = [ + ( name = "streams-byob-close-reentry-test", + worker = ( + modules = [ + (name = "worker", esModule = embed "streams-byob-close-reentry-test.js") + ], + compatibilityFlags = [ + "nodejs_compat", + "streams_enable_constructors", + ] + ) + ), + ], +); diff --git a/src/workerd/api/tests/streams-byob-concurrent-readatleast-test.js b/src/workerd/api/tests/streams-byob-concurrent-readatleast-test.js new file mode 100644 index 00000000000..414a0e6fa3b --- /dev/null +++ b/src/workerd/api/tests/streams-byob-concurrent-readatleast-test.js @@ -0,0 +1,79 @@ +// Copyright (c) 2026 Cloudflare, Inc. +// Licensed under the Apache 2.0 license found in the LICENSE file or at: +// https://opensource.org/licenses/Apache-2.0 + +// Regression test for AUTOVULN-CLOUDFLARE-WORKERD-18: +// Concurrent BYOB readAtLeast() calls with partial enqueues must +// not trigger an internal ByteQueue invariant failure. The bug was +// that handlePush() in queue.c++ re-buffered a partially consumed +// entry from offset 0 instead of from the unread tail +// (entryOffset), causing duplicated bytes that violated the +// queueTotalSize < atLeast assertion on the next enqueue. + +import { strictEqual, deepStrictEqual } from 'node:assert'; + +export const concurrentByobReadAtLeastPartialEnqueue = { + async test() { + let ctrl; + const rs = new ReadableStream({ + type: 'bytes', + start(controller) { + ctrl = controller; + }, + }); + + const reader = rs.getReader({ mode: 'byob' }); + + // Issue two concurrent readAtLeast(5) calls with 5-byte views. + // Both are pending since no data has been enqueued yet. + const p1 = reader.readAtLeast(5, new Uint8Array(5)); + const p2 = reader.readAtLeast(5, new Uint8Array(5)); + + // Enqueue 7 bytes. handlePush processes pending reads: + // Read #1 (atLeast=5, view=5): copies 5 bytes, + // amountAvailable=2, entryOffset=5 + // Read #2 (atLeast=5): amountAvailable(2) < atLeast(5), + // so buffer the remainder. + // BUG: bufferData(0) → queueTotalSize = 7 (wrong!) + // FIX: bufferData(5) → queueTotalSize = 2 (correct) + ctrl.enqueue(new Uint8Array([1, 2, 3, 4, 5, 6, 7])); + + // Enqueue 4 more bytes. With the bug, queueTotalSize=7 and + // the KJ_REQUIRE (state.queueTotalSize < atLeast → 7 < 5) + // fails. With the fix, queueTotalSize=2, + // amountAvailable=2+4=6 >= 5, so read #2 is fulfilled. + ctrl.enqueue(new Uint8Array([8, 9, 10, 11])); + + ctrl.close(); + + const r1 = await p1; + const r2 = await p2; + + strictEqual(r1.done, false); + strictEqual(r2.done, false); + + // Read #1: first 5 bytes from the 7-byte enqueue. + const r1Bytes = new Uint8Array( + r1.value.buffer, + r1.value.byteOffset, + r1.value.byteLength + ); + deepStrictEqual( + Array.from(r1Bytes), + [1, 2, 3, 4, 5], + 'read #1 should get bytes [1..5]' + ); + + // Read #2: remaining 2 from first enqueue + 3 from second. + const r2Bytes = new Uint8Array( + r2.value.buffer, + r2.value.byteOffset, + r2.value.byteLength + ); + deepStrictEqual( + Array.from(r2Bytes), + [6, 7, 8, 9, 10], + 'read #2 should get bytes [6..10]' + ); + }, +}; diff --git a/src/workerd/api/tests/streams-byob-concurrent-readatleast-test.wd-test b/src/workerd/api/tests/streams-byob-concurrent-readatleast-test.wd-test new file mode 100644 index 00000000000..3ed3f77e6ab --- /dev/null +++ b/src/workerd/api/tests/streams-byob-concurrent-readatleast-test.wd-test @@ -0,0 +1,14 @@ +using Workerd = import "/workerd/workerd.capnp"; + +const unitTests :Workerd.Config = ( + services = [ + ( name = "streams-byob-concurrent-readatleast-test", + worker = ( + modules = [ + (name = "worker", esModule = embed "streams-byob-concurrent-readatleast-test.js") + ], + compatibilityFlags = ["nodejs_compat", "streams_enable_constructors"], + ) + ), + ], +); diff --git a/src/workerd/api/tests/streams-byte-cancel-uaf-test.js b/src/workerd/api/tests/streams-byte-cancel-uaf-test.js new file mode 100644 index 00000000000..670588db404 --- /dev/null +++ b/src/workerd/api/tests/streams-byte-cancel-uaf-test.js @@ -0,0 +1,32 @@ +// Copyright (c) 2026 Cloudflare, Inc. +// Licensed under the Apache 2.0 license found in the LICENSE file or at: +// https://opensource.org/licenses/Apache-2.0 + +// Regression test for heap-use-after-free in ReadableStream byte queue. +// Calling reader.cancel() inside the pull() callback destroys the consumer +// while ConsumerImpl::read() is still on the call stack. The fix guards the +// maybeDrainAndSetState() call with a weak-ref check. +import { strictEqual } from 'node:assert'; + +export const cancelInsidePull = { + async test() { + let pullCalled = false; + let reader; + const stream = new ReadableStream({ + type: 'bytes', + autoAllocateChunkSize: 1024, + pull(controller) { + pullCalled = true; + reader.cancel('canceled from pull'); + return new Promise(() => {}); + }, + }); + reader = stream.getReader(); + const result = await reader.read(); + // After cancel, the read resolves as done. + strictEqual(result.done, true); + strictEqual(pullCalled, true); + // Force GC to shake out any dangling pointers from the freed consumer. + gc(); + }, +}; diff --git a/src/workerd/api/tests/streams-byte-cancel-uaf-test.wd-test b/src/workerd/api/tests/streams-byte-cancel-uaf-test.wd-test new file mode 100644 index 00000000000..45f30bab435 --- /dev/null +++ b/src/workerd/api/tests/streams-byte-cancel-uaf-test.wd-test @@ -0,0 +1,14 @@ +using Workerd = import "/workerd/workerd.capnp"; + +const unitTests :Workerd.Config = ( + v8Flags = ["--expose-gc"], + services = [( + name = "streams-byte-cancel-uaf-test", + worker = ( + modules = [ + (name = "worker", esModule = embed "streams-byte-cancel-uaf-test.js"), + ], + compatibilityFlags = ["nodejs_compat_v2", "streams_enable_constructors"], + ), + )], +); diff --git a/src/workerd/api/tests/streams-byte-handlePush-uaf-test.js b/src/workerd/api/tests/streams-byte-handlePush-uaf-test.js new file mode 100644 index 00000000000..879a4970da8 --- /dev/null +++ b/src/workerd/api/tests/streams-byte-handlePush-uaf-test.js @@ -0,0 +1,86 @@ +// Copyright (c) 2026 Cloudflare, Inc. +// Licensed under the Apache 2.0 license found in the LICENSE file or at: +// https://opensource.org/licenses/Apache-2.0 + +// Regression test for AUTOVULN-CLOUDFLARE-WORKERD-95: +// Heap use-after-free in ByteQueue::handlePush via re-entrant +// ReadableByteStreamController.error() during BYOB read resolution. +// +// The attack: a BYOB reader issues a read, then the controller enqueues data. +// handlePush resolves the pending read via request->resolve(js), which triggers +// V8's promise resolution thenable check (Get(resolution, "then")). A malicious +// Object.prototype.then getter calls controller.error(), which transitions the +// ConsumerImpl from Ready to Errored, freeing the Ready storage. After resolve() +// returns, handlePush's while loop checks state.readRequests.empty() — a +// use-after-free on the freed Ready storage. +// +// Under ASAN this crashes immediately. Without ASAN the test verifies behavioral +// correctness: the runtime does not assert/crash and the read resolves with data. +import { strictEqual } from 'node:assert'; + +export const handlePushReentrantError = { + async test() { + let ctrl; + const stream = new ReadableStream({ + type: 'bytes', + start(controller) { + ctrl = controller; + }, + }); + + const reader = stream.getReader({ mode: 'byob' }); + + // Issue a BYOB read that will be pending until data is enqueued. + const readPromise = reader.read(new Uint8Array(16)); + + // Install a malicious Object.prototype.then getter that calls + // controller.error() during promise resolution, triggering re-entrant + // state destruction while handlePush still holds a Ready& reference. + let thenCalled = false; + Object.defineProperty(Object.prototype, 'then', { + get() { + // Only trigger once to avoid infinite recursion. + delete Object.prototype.then; + thenCalled = true; + try { + ctrl.error(new Error('re-entrant error from then getter')); + } catch { + // controller.error() may throw if the controller state has + // already changed. That's fine. + } + return undefined; + }, + configurable: true, + }); + + try { + // Enqueue data — this calls handlePush which resolves the pending + // BYOB read, triggering the Object.prototype.then getter above. + // Pre-fix, this would cause a heap use-after-free when the while + // loop continued after resolve() returned. + ctrl.enqueue(new Uint8Array([1, 2, 3, 4])); + } catch { + // The enqueue may throw because the stream was errored re-entrantly. + } + + // Clean up the then getter in case it wasn't triggered. + delete Object.prototype.then; + + // The read was resolved with data before the error was triggered + // (handlePush resolves the read, then V8's thenable check fires). + const result = await readPromise; + strictEqual(result.done, false); + strictEqual(result.value.byteLength, 4); + strictEqual(result.value[0], 1); + strictEqual(thenCalled, true); + + // Allocate objects to pressure the allocator into reclaiming freed memory, + // making the UAF more likely to manifest under ASAN. + for (let i = 0; i < 100; i++) { + new ReadableStream({ type: 'bytes', start() {} }); + } + + // Force GC to shake out any dangling pointers from the freed consumer. + gc(); + }, +}; diff --git a/src/workerd/api/tests/streams-byte-handlePush-uaf-test.wd-test b/src/workerd/api/tests/streams-byte-handlePush-uaf-test.wd-test new file mode 100644 index 00000000000..e47d4e60396 --- /dev/null +++ b/src/workerd/api/tests/streams-byte-handlePush-uaf-test.wd-test @@ -0,0 +1,14 @@ +using Workerd = import "/workerd/workerd.capnp"; + +const unitTests :Workerd.Config = ( + v8Flags = ["--expose-gc"], + services = [( + name = "streams-byte-handlePush-uaf-test", + worker = ( + modules = [ + (name = "worker", esModule = embed "streams-byte-handlePush-uaf-test.js"), + ], + compatibilityFlags = ["nodejs_compat_v2", "streams_enable_constructors"], + ), + )], +); diff --git a/src/workerd/api/tests/streams-js-test.js b/src/workerd/api/tests/streams-js-test.js index 397e4762ff8..d76810529db 100644 --- a/src/workerd/api/tests/streams-js-test.js +++ b/src/workerd/api/tests/streams-js-test.js @@ -197,13 +197,20 @@ export const newReadableStreamSyncAlgorithmErrorsHandled = { // Pull error { + let thrown = false; const rs = new ReadableStream({ pull() { - throw new Error('boom'); + if (!thrown) { + thrown = true; + throw new Error('boom'); + } }, }); - await rejects(rs.getReader().read(), { message: 'boom' }); + const reader = rs.getReader(); + await rejects(reader.read(), { message: 'boom' }); + // Verify the stream is persistently errored, not just pull throwing again. + await rejects(reader.read(), { message: 'boom' }); } // Cancel error @@ -234,13 +241,20 @@ export const newReadableStreamAsyncAlgorithmErrorsHandled = { // Async pull error { + let thrown = false; const rs = new ReadableStream({ async pull() { - throw new Error('boom'); + if (!thrown) { + thrown = true; + throw new Error('boom'); + } }, }); - await rejects(rs.getReader().read(), { message: 'boom' }); + const reader = rs.getReader(); + await rejects(reader.read(), { message: 'boom' }); + // Verify the stream is persistently errored, not just pull throwing again. + await rejects(reader.read(), { message: 'boom' }); } // Async cancel error diff --git a/src/workerd/api/tests/tail-worker-test.js b/src/workerd/api/tests/tail-worker-test.js index bace4473cf4..7a380cf5357 100644 --- a/src/workerd/api/tests/tail-worker-test.js +++ b/src/workerd/api/tests/tail-worker-test.js @@ -287,9 +287,9 @@ const expectedWithPropagation = [ // websocket/hibernation: independent roots n(E.wsUpgrade), - n(E.wsHibernation), - n(E.wsMessage), - n(E.wsClose), + // wsMessage and wsClose are children of wsHibernation because the trace context + // was captured at acceptWebSocket() time and restored when the DO was woken up. + n(E.wsHibernation, [n(E.wsMessage), n(E.wsClose)]), // cacheMode: standalone n(E.cacheMode), diff --git a/src/workerd/api/tests/worker-loader-rab-test.js b/src/workerd/api/tests/worker-loader-rab-test.js new file mode 100644 index 00000000000..9d5993c3134 --- /dev/null +++ b/src/workerd/api/tests/worker-loader-rab-test.js @@ -0,0 +1,104 @@ +// Copyright (c) 2025 Cloudflare, Inc. +// Licensed under the Apache 2.0 license found in the LICENSE file or at: +// https://opensource.org/licenses/Apache-2.0 + +// Regression test for AUTOVULN-CLOUDFLARE-WORKERD-70: +// WorkerLoader::extractSource() must copy data/wasm module bytes out of +// V8's BackingStore before going async, because a resizable ArrayBuffer +// can have its committed pages revoked via resize(0) between load() +// returning and the deferred compileDataGlobal() memcpy. + +import assert from 'node:assert'; + +// Minimal valid WASM module that exports an add(i32, i32) -> i32 function. +const WASM_ADD_BYTES = new Uint8Array([ + 0x00, 0x61, 0x73, 0x6d, 0x01, 0x00, 0x00, 0x00, 0x01, 0x07, 0x01, 0x60, 0x02, + 0x7f, 0x7f, 0x01, 0x7f, 0x03, 0x02, 0x01, 0x00, 0x07, 0x07, 0x01, 0x03, 0x61, + 0x64, 0x64, 0x00, 0x00, 0x0a, 0x09, 0x01, 0x07, 0x00, 0x20, 0x00, 0x20, 0x01, + 0x6a, 0x0b, +]); + +// Test that a resizable ArrayBuffer used as a data module body can be +// resized to zero after load() without crashing the process. Pre-fix +// this would SIGSEGV in compileDataGlobal(); post-fix the bytes are +// copied eagerly so the child worker compiles and runs normally. +export let resizableArrayBufferDataModule = { + async test(ctrl, env, ctx) { + // Create a resizable ArrayBuffer and fill it with known content. + const rab = new ArrayBuffer(64, { maxByteLength: 128 }); + const view = new Uint8Array(rab); + const expected = 'Hello from resizable ArrayBuffer!'; + new TextEncoder().encodeInto(expected, view); + + // load() synchronously captures the bytes via jsg::asBytes(). + let worker = env.loader.load({ + compatibilityDate: '2025-01-01', + mainModule: 'main.js', + modules: { + 'main.js': { + js: ` + import {WorkerEntrypoint} from "cloudflare:workers"; + import dataModule from "./data.bin"; + export default class extends WorkerEntrypoint { + getData() { + return new TextDecoder().decode(dataModule.slice(0, ${expected.length})); + } + } + `, + }, + 'data.bin': { + data: rab, + }, + }, + }); + + // Shrink the resizable ArrayBuffer to zero. This mprotect()s the + // previously-committed pages to PROT_NONE. If extractSource() did + // not copy, the deferred compilation will SIGSEGV. + rab.resize(0); + + // Force compilation and exercise the child worker. + let result = await worker.getEntrypoint().getData(); + assert.strictEqual(result, expected); + }, +}; + +// Same test but for wasm modules -- the fix must also copy wasm bytes. +export let resizableArrayBufferWasmModule = { + async test(ctrl, env, ctx) { + // Copy the WASM bytes into a resizable ArrayBuffer. + const rab = new ArrayBuffer(WASM_ADD_BYTES.byteLength, { + maxByteLength: WASM_ADD_BYTES.byteLength * 2, + }); + new Uint8Array(rab).set(WASM_ADD_BYTES); + + let worker = env.loader.load({ + compatibilityDate: '2025-01-01', + mainModule: 'main.js', + modules: { + 'main.js': { + js: ` + import {WorkerEntrypoint} from "cloudflare:workers"; + import wasmModule from "./math.wasm"; + export default class extends WorkerEntrypoint { + async add(a, b) { + const instance = await WebAssembly.instantiate(wasmModule); + return instance.exports.add(a, b); + } + } + `, + }, + 'math.wasm': { + wasm: rab, + }, + }, + }); + + // Shrink to zero after load() captured the bytes. + rab.resize(0); + + // Force compilation -- pre-fix this SIGSEGVs. + let result = await worker.getEntrypoint().add(3, 4); + assert.strictEqual(result, 7); + }, +}; diff --git a/src/workerd/api/tests/worker-loader-rab-test.wd-test b/src/workerd/api/tests/worker-loader-rab-test.wd-test new file mode 100644 index 00000000000..99ec0af80c7 --- /dev/null +++ b/src/workerd/api/tests/worker-loader-rab-test.wd-test @@ -0,0 +1,17 @@ +using Workerd = import "/workerd/workerd.capnp"; + +const unitTests :Workerd.Config = ( + services = [ + ( name = "worker-loader-rab-test", + worker = ( + modules = [ + (name = "worker", esModule = embed "worker-loader-rab-test.js") + ], + compatibilityFlags = ["nodejs_compat","experimental"], + bindings = [ + (name = "loader", workerLoader = ()), + ], + ) + ), + ], +); diff --git a/src/workerd/api/tests/worker-loader-test.js b/src/workerd/api/tests/worker-loader-test.js index 9dcd7196c14..5243928491a 100644 --- a/src/workerd/api/tests/worker-loader-test.js +++ b/src/workerd/api/tests/worker-loader-test.js @@ -1019,6 +1019,77 @@ export let abortIsolateDynamic = { }, }; +// Regression test for AUTOVULN-CLOUDFLARE-WORKERD-256: the inner .then() +// continuation in WorkerLoader::get() previously captured IoContext by raw C++ +// reference (&ioctx). If the originating IoContext was destroyed before the +// user's getCode() promise resolved, and the promise was later resolved from a +// different IoContext, the lambda would dereference freed memory (heap UAF). +// +// The fix replaces the raw reference with a WeakRef. This test exercises the +// patched code path by: +// 1. Making a sub-request (IoContext B) that calls env.loader.get() with a +// pending getCode promise, saving the resolve function globally. +// 2. Returning from the sub-request so IoContext B drains and is destroyed. +// 3. Resolving the saved promise from the test's IoContext A. +// +// Pre-patch: the .then() continuation dereferences freed IoContext B → UAF/crash. +// Post-patch: the WeakRef check detects the dead IoContext and throws a clean +// JS error ("The request which initiated this dynamic worker load has already +// completed."), which surfaces as a rejected promise on the WorkerStub. + +// Entrypoint for the sub-request that sets up the pending loader promise. +export class SetupLoaderUaf extends WorkerEntrypoint { + async fetch() { + let { promise, resolve } = Promise.withResolvers(); + globalThis.savedLoaderResolve = resolve; + + // Call env.loader.get() with a getCode that returns the pending promise. + // This installs a .then() continuation capturing the IoContext reference. + this.env.loader.get(null, () => promise); + + // Let the reentry callback run and chain .then() onto the promise. + await scheduler.wait(10); + + // Return: IoContext B will drain and be destroyed, but the V8 promise + // reaction (with the captured IoContext ref) survives on the JS heap. + return new Response('setup-done'); + } +} + +export let regressionDeadIoContextGetCode = { + async test(ctrl, env, ctx) { + // Step 1: sub-request sets up the pending promise via a separate entrypoint. + let setupEp = ctx.exports.SetupLoaderUaf; + let resp = await setupEp.fetch('http://x/setup-loader-uaf'); + assert.strictEqual(await resp.text(), 'setup-done'); + + // Step 2: IoContext B has drained. Give the runtime a moment to clean up. + await scheduler.wait(50); + + // Step 3: resolve the saved promise from IoContext A. Post-patch, the inner + // .then() continuation detects the dead IoContext via WeakRef and throws. + assert.notStrictEqual( + globalThis.savedLoaderResolve, + undefined, + 'savedLoaderResolve should have been set by the sub-request' + ); + globalThis.savedLoaderResolve({ + compatibilityDate: '2025-01-01', + mainModule: 'm.js', + modules: { 'm.js': 'export default {}' }, + }); + + // Give the promise reaction time to fire. + await scheduler.wait(50); + + // The WorkerStub was created in IoContext B which is now dead. Attempting to + // use it from IoContext A should fail. The exact error depends on the runtime + // path, but the critical thing is that we reach this point without crashing + // (pre-patch, the process would have crashed from the UAF). + assert.ok(true, 'Reached end of test without UAF crash'); + }, +}; + // Test that abortIsolate() works correctly for anonymous dynamic workers. // Anonymous workers don't have a name and therefore aren't stored in the loader's map. export let abortIsolateDynamicAnonymous = { diff --git a/src/workerd/api/tests/worker-loader-unnamed-gc-test.js b/src/workerd/api/tests/worker-loader-unnamed-gc-test.js new file mode 100644 index 00000000000..d692772c16c --- /dev/null +++ b/src/workerd/api/tests/worker-loader-unnamed-gc-test.js @@ -0,0 +1,47 @@ +// Copyright (c) 2025 Cloudflare, Inc. +// Licensed under the Apache 2.0 license found in the LICENSE file or at: +// https://opensource.org/licenses/Apache-2.0 + +// Regression test for AUTOVULN-CLOUDFLARE-WORKERD-110: +// Dropping the only JS reference to an unnamed WorkerStub during the getCode +// callback and forcing GC must not crash the process. Before the fix, +// synchronous destruction of WorkerStubImpl via DeleteQueue's fast path would +// destroy the start() coroutine's ChainPromiseNode while it was still firing, +// tripping KJ_REQUIRE(!firing) in Event::~Event() and aborting the process. +import assert from 'node:assert'; + +export let unnamedStubGcDuringGetCode = { + async test(ctrl, env, ctx) { + let getCodeCalled = false; + let _stub; + _stub = env.loader.get(null, async () => { + getCodeCalled = true; + // Drop the only JS reference to the unnamed stub. + _stub = null; + // Force V8 garbage collection so the CppgcShim destructor runs + // synchronously on this turn, which would trigger the bug pre-fix. + gc(); + gc(); + return { + compatibilityDate: '2025-01-01', + mainModule: 'main.js', + modules: { + 'main.js': ` + import {WorkerEntrypoint} from "cloudflare:workers"; + export default class extends WorkerEntrypoint { + ping() { return 'pong'; } + } + `, + }, + }; + }); + + // Yield to the event loop so the reentry callback (getCode) fires. + // Before the fix, the process would abort here with: + // "Promise callback destroyed itself." + await scheduler.wait(100); + + // If we reach this line, the process did not crash — the fix is working. + assert.ok(getCodeCalled, 'getCode callback should have been invoked'); + }, +}; diff --git a/src/workerd/api/tests/worker-loader-unnamed-gc-test.wd-test b/src/workerd/api/tests/worker-loader-unnamed-gc-test.wd-test new file mode 100644 index 00000000000..cf8a4cfdbac --- /dev/null +++ b/src/workerd/api/tests/worker-loader-unnamed-gc-test.wd-test @@ -0,0 +1,18 @@ +using Workerd = import "/workerd/workerd.capnp"; + +const unitTests :Workerd.Config = ( + v8Flags = ["--expose-gc"], + services = [ + ( name = "worker-loader-unnamed-gc-test", + worker = ( + modules = [ + (name = "worker", esModule = embed "worker-loader-unnamed-gc-test.js") + ], + compatibilityFlags = ["nodejs_compat", "experimental"], + bindings = [ + (name = "loader", workerLoader = ()), + ], + ) + ), + ], +); diff --git a/src/workerd/api/trace.h b/src/workerd/api/trace.h index a8258e72e70..97f4594f300 100644 --- a/src/workerd/api/trace.h +++ b/src/workerd/api/trace.h @@ -174,6 +174,46 @@ class TraceItem final: public jsg::Object { uint cpuTime; uint wallTime; bool truncated; + + void visitForGc(jsg::GcVisitor& visitor) { + KJ_IF_SOME(info, eventInfo) { + KJ_SWITCH_ONEOF(info) { + KJ_CASE_ONEOF(fetch, jsg::Ref) { + visitor.visit(fetch); + } + KJ_CASE_ONEOF(rpc, jsg::Ref) { + visitor.visit(rpc); + } + KJ_CASE_ONEOF(conn, jsg::Ref) { + visitor.visit(conn); + } + KJ_CASE_ONEOF(sched, jsg::Ref) { + visitor.visit(sched); + } + KJ_CASE_ONEOF(alarm, jsg::Ref) { + visitor.visit(alarm); + } + KJ_CASE_ONEOF(queue, jsg::Ref) { + visitor.visit(queue); + } + KJ_CASE_ONEOF(email, jsg::Ref) { + visitor.visit(email); + } + KJ_CASE_ONEOF(tail, jsg::Ref) { + visitor.visit(tail); + } + KJ_CASE_ONEOF(custom, jsg::Ref) { + visitor.visit(custom); + } + KJ_CASE_ONEOF(ws, jsg::Ref) { + visitor.visit(ws); + } + } + } + visitor.visitAll(logs); + visitor.visitAll(exceptions); + visitor.visitAll(diagnosticChannelEvents); + } }; // When adding a new TraceItem eventInfo type, it is important not to @@ -217,6 +257,10 @@ class TraceItem::FetchEventInfo final: public jsg::Object { private: jsg::Ref request; jsg::Optional> response; + + void visitForGc(jsg::GcVisitor& visitor) { + visitor.visit(request, response); + } }; class TraceItem::FetchEventInfo::Request final: public jsg::Object { @@ -416,6 +460,10 @@ class TraceItem::TailEventInfo final: public jsg::Object { private: kj::Array> consumedEvents; + + void visitForGc(jsg::GcVisitor& visitor) { + visitor.visitAll(consumedEvents); + } }; class TraceItem::TailEventInfo::TailItem final: public jsg::Object { @@ -462,6 +510,20 @@ class TraceItem::HibernatableWebSocketEventInfo final: public jsg::Object { private: Type eventType; + + void visitForGc(jsg::GcVisitor& visitor) { + KJ_SWITCH_ONEOF(eventType) { + KJ_CASE_ONEOF(msg, jsg::Ref) { + visitor.visit(msg); + } + KJ_CASE_ONEOF(close, jsg::Ref) { + visitor.visit(close); + } + KJ_CASE_ONEOF(err, jsg::Ref) { + visitor.visit(err); + } + } + } }; class TraceItem::HibernatableWebSocketEventInfo::Message final: public jsg::Object { @@ -581,6 +643,10 @@ class TraceLog final: public jsg::Object { double timestamp; kj::LiteralStringConst level; jsg::V8Ref message; + + void visitForGc(jsg::GcVisitor& visitor) { + visitor.visit(message); + } }; class TraceException final: public jsg::Object { diff --git a/src/workerd/api/worker-loader.c++ b/src/workerd/api/worker-loader.c++ index 09324b5587a..89396d5768e 100644 --- a/src/workerd/api/worker-loader.c++ +++ b/src/workerd/api/worker-loader.c++ @@ -65,10 +65,13 @@ jsg::Ref WorkerLoader::get( auto& ioctx = IoContext::current(); auto reenterAndGetCode = ioctx.makeReentryCallback( - [&ioctx, getCode = kj::mv(getCode), compatDateValidation = compatDateValidation]( - jsg::Lock& js) mutable { - return getCode(js).then( - js, [&ioctx, compatDateValidation](jsg::Lock& js, WorkerCode code) -> DynamicWorkerSource { + [weakIoctx = ioctx.getWeakRef(), getCode = kj::mv(getCode), + compatDateValidation = compatDateValidation](jsg::Lock& js) mutable { + return getCode(js).then(js, + [weakIoctx = kj::addRef(*weakIoctx), compatDateValidation]( + jsg::Lock& js, WorkerCode code) -> DynamicWorkerSource { + auto& ioctx = JSG_REQUIRE_NONNULL(weakIoctx->tryGet(), Error, + "The request which initiated this dynamic worker load has already completed."); return toDynamicWorkerSource(js, ioctx, compatDateValidation, kj::mv(code)); }); }); @@ -220,6 +223,12 @@ Worker::Script::Source WorkerLoader::extractSource(jsg::Lock& js, WorkerCode& co } else KJ_IF_SOME(text, module.text) { return Worker::Script::TextModule{.body = text}; } else KJ_IF_SOME(data, module.data) { + // The kj::Array produced by jsg::asBytes() points into a V8 + // BackingStore. If the user passed a *resizable* ArrayBuffer they can call + // resize(0) (or transfer/detach) after load() returns but before the child + // isolate is compiled asynchronously, leaving us with a (ptr,len) into + // PROT_NONE pages. Copy now so the bytes survive until compileDataGlobal(). + data = kj::heapArray(data.asPtr()); return Worker::Script::DataModule{.body = data}; } else KJ_IF_SOME(json, module.json) { kj::StringPtr serialized = @@ -231,6 +240,8 @@ Worker::Script::Source WorkerLoader::extractSource(jsg::Lock& js, WorkerCode& co } else KJ_IF_SOME(py, module.py) { return Worker::Script::PythonModule{.body = py}; } else KJ_IF_SOME(wasm, module.wasm) { + // Same as `data` above: copy out of the V8 BackingStore before going async. + wasm = kj::heapArray(wasm.asPtr()); return Worker::Script::WasmModule{.body = wasm}; } else { KJ_UNREACHABLE; diff --git a/src/workerd/api/worker-rpc.c++ b/src/workerd/api/worker-rpc.c++ index e350dbd9c52..7830011b088 100644 --- a/src/workerd/api/worker-rpc.c++ +++ b/src/workerd/api/worker-rpc.c++ @@ -15,125 +15,6 @@ namespace workerd::api { -namespace { - -using StreamSinkFulfiller = kj::Own>; - -} // namespace - -// Implementation of StreamSink RPC interface. The stream sender calls `startStream()` when -// serializing each stream, and the recipient calls `setSlot()` when deserializing streams to -// provide the appropriate destination capability. This class is designed to allow these two -// calls to happen in either order for each slot. -class StreamSinkImpl final: public rpc::JsValue::StreamSink::Server, public kj::Refcounted { - public: - ~StreamSinkImpl() noexcept(false) { - for (auto& slot: table) { - KJ_IF_SOME(f, slot.tryGet()) { - f->reject(KJ_EXCEPTION(FAILED, "expected startStream() was never received")); - } - } - } - - void setSlot(uint i, capnp::Capability::Client stream) { - if (table.size() <= i) table.resize(i + 1); - - if (table[i] == nullptr) { - table[i] = kj::mv(stream); - } else KJ_SWITCH_ONEOF(table[i]) { - KJ_CASE_ONEOF(stream, capnp::Capability::Client) { - KJ_FAIL_REQUIRE("setSlot() tried to set the same slot twice", i); - } - KJ_CASE_ONEOF(fulfiller, StreamFulfiller) { - fulfiller->fulfill(kj::mv(stream)); - table[i] = Consumed(); - } - KJ_CASE_ONEOF(_, Consumed) { - KJ_FAIL_REQUIRE("setSlot() tried to set the same slot twice", i); - } - } - } - - kj::Promise startStream(StartStreamContext context) override { - uint i = context.getParams().getExternalIndex(); - - if (table.size() <= i) { - // guard against ridiculous table allocation - JSG_REQUIRE(i < 1024, Error, "Too many streams in one message."); - table.resize(i + 1); - } - - if (table[i] == nullptr) { - auto paf = kj::newPromiseAndFulfiller(); - table[i] = kj::mv(paf.fulfiller); - context.getResults(capnp::MessageSize{4, 1}).setStream(kj::mv(paf.promise)); - } else KJ_SWITCH_ONEOF(table[i]) { - KJ_CASE_ONEOF(stream, capnp::Capability::Client) { - context.getResults(capnp::MessageSize{4, 1}).setStream(kj::mv(stream)); - table[i] = Consumed(); - } - KJ_CASE_ONEOF(fulfiller, StreamFulfiller) { - KJ_FAIL_REQUIRE("startStream() tried to start the same stream twice", i); - } - KJ_CASE_ONEOF(_, Consumed) { - KJ_FAIL_REQUIRE("startStream() tried to start the same stream twice", i); - } - } - - return kj::READY_NOW; - } - - private: - using StreamFulfiller = kj::Own>; - struct Consumed {}; - - // Each slot starts out null (uninitialized). It becomes a Capability::Client if setSlot() is - // called first, or a StreamFulfiller if startStream() is called first. It becomes `Consumed` - // when the other method is called. - // HACK: Slots in the table take advantage of the little-known fact that OneOf has a "null" - // value, which is the value a OneOf has when default-initialized. This is useful because we - // don't want to explicitly initialize skipped slots. Maybe would be another option - // here, but would add 8 bytes to every slot just to store a boolean... feels bloated. There - // are only two methods in this class so I think it's OK. - using Slot = kj::OneOf; - - kj::Vector table; -}; - -kj::Maybe RpcSerializerExternalHandler::getExternalPusher() { - KJ_IF_SOME(ep, externalPusher) { - return ep; - } else KJ_IF_SOME(func, getStreamHandlerFunc.tryGet()) { - // First call, set up ExternalPusher. - return externalPusher.emplace(func()); - } else { - // Using StreamSink. - return kj::none; - } -} - -capnp::Capability::Client RpcSerializerExternalHandler::writeStream(BuilderCallback callback) { - rpc::JsValue::StreamSink::Client* streamSinkPtr; - KJ_IF_SOME(ss, streamSink) { - streamSinkPtr = &ss; - } else { - // First stream written, set up the StreamSink. - auto& func = KJ_REQUIRE_NONNULL(getStreamHandlerFunc.tryGet(), - "this serialization is not using StreamSink; use getExternalPusher() instead"); - streamSinkPtr = &streamSink.emplace(func()); - } - - auto result = ({ - auto req = streamSinkPtr->startStreamRequest(capnp::MessageSize{4, 0}); - req.setExternalIndex(externals.size()); - req.send().getStream(); - }); - - write(kj::mv(callback)); - - return result; -} - capnp::Orphan> RpcSerializerExternalHandler::build( capnp::Orphanage orphanage) { auto result = orphanage.newOrphan>(externals.size()); @@ -155,17 +36,6 @@ rpc::JsValue::External::Reader RpcDeserializerExternalHandler::read() { return externals[i++]; } -void RpcDeserializerExternalHandler::setLastStream(capnp::Capability::Client stream) { - KJ_IF_SOME(ss, streamSink) { - ss.setSlot(i - 1, kj::mv(stream)); - } else { - auto ss = kj::refcounted(); - ss->setSlot(i - 1, kj::mv(stream)); - streamSink = *ss; - streamSinkCap = rpc::JsValue::StreamSink::Client(kj::mv(ss)); - } -} - namespace { // Call to construct an `rpc::JsValue` from a JS value. @@ -213,18 +83,13 @@ void serializeJsValue(jsg::Lock& js, struct DeserializeResult { jsg::JsValue value; kj::Own disposalGroup; - kj::Maybe streamSink; }; // Call to construct a JS value from an `rpc::JsValue`. -DeserializeResult deserializeJsValue(jsg::Lock& js, - rpc::JsValue::Reader reader, - kj::LiteralStringConst debugContext, - kj::Maybe streamSink = kj::none) { +DeserializeResult deserializeJsValue(jsg::Lock& js, rpc::JsValue::Reader reader) { auto disposalGroup = kj::heap(); - RpcDeserializerExternalHandler externalHandler( - reader.getExternals(), *disposalGroup, streamSink, debugContext); + RpcDeserializerExternalHandler externalHandler(reader.getExternals(), *disposalGroup); jsg::Deserializer deserializer(js, reader.getV8Serialized(), kj::none, kj::none, jsg::Deserializer::Options{ @@ -244,22 +109,14 @@ DeserializeResult deserializeJsValue(jsg::Lock& js, return { .value = deserializer.readValue(js), .disposalGroup = kj::mv(disposalGroup), - .streamSink = externalHandler.getStreamSink(), }; } // Does deserializeJsValue() and then adds a `dispose()` method to the returned object (if it is // an object) which disposes all stubs therein. -jsg::JsValue deserializeRpcReturnValue(jsg::Lock& js, - rpc::JsRpcTarget::CallResults::Reader callResults, - kj::Maybe streamSink) { - auto [value, disposalGroup, ss] = - deserializeJsValue(js, callResults.getResult(), "return"_kjc, streamSink); - - if (streamSink == kj::none) { - KJ_REQUIRE(ss == kj::none, - "RPC returned result using StreamSink even though ExternalPusher was provided"); - } +jsg::JsValue deserializeRpcReturnValue( + jsg::Lock& js, rpc::JsRpcTarget::CallResults::Reader callResults) { + auto [value, disposalGroup] = deserializeJsValue(js, callResults.getResult()); // If the object had a disposer on the callee side, it will run when we discard the callPipeline, // so attach that to the disposal group on the caller side. If the returned object did NOT have @@ -502,11 +359,6 @@ JsRpcPromiseAndPipeline callImpl(jsg::Lock& js, } } - kj::Maybe paramsStreamSinkFulfiller; - - bool useExternalPusher = - util::Autogate::isEnabled(util::AutogateKey::RPC_USE_EXTERNAL_PUSHER); - KJ_IF_SOME(args, maybeArgs) { // If we have arguments, serialize them. // Note that we may fail to serialize some element, in which case this will throw back to @@ -523,22 +375,7 @@ JsRpcPromiseAndPipeline callImpl(jsg::Lock& js, ? RpcSerializerExternalHandler::DUPLICATE : RpcSerializerExternalHandler::TRANSFER; - RpcSerializerExternalHandler::GetStreamHandlerFunc getStreamHandlerFunc; - if (useExternalPusher) { - getStreamHandlerFunc.init( - [&]() -> rpc::JsValue::ExternalPusher::Client { return client; }); - } else { - getStreamHandlerFunc.init([&]() { - // A stream was encountered in the params, so we must expect the response to contain - // paramsStreamSink. But we don't have the response yet. So, we need to set up a - // temporary promise client, which we hook to the response a little bit later. - auto paf = kj::newPromiseAndFulfiller(); - paramsStreamSinkFulfiller = kj::mv(paf.fulfiller); - return kj::mv(paf.promise); - }); - } - - RpcSerializerExternalHandler externalHandler(stubOwnership, kj::mv(getStreamHandlerFunc)); + RpcSerializerExternalHandler externalHandler(stubOwnership, client); serializeJsValue(js, jsg::JsValue(arr), externalHandler, [&](capnp::MessageSize hint) { // TODO(perf): Actually use the size hint. return builder.getOperation().initCallWithArgs(); @@ -549,27 +386,14 @@ JsRpcPromiseAndPipeline callImpl(jsg::Lock& js, builder.getOperation().setGetProperty(); } - kj::Maybe> resultStreamSink; - if (useExternalPusher) { - // Unfortunately, we always have to send the ExternalPusher since we don't know whether the - // call will return any streams (or other pushed externals). Luckily, it's a - // one-per-IoContext object, not a big deal. (It'll take a slot on the capnp export table - // though.) - builder.getResultsStreamHandler().setExternalPusher(ioContext.getExternalPusher()); - } else { - // Unfortunately, we always have to send a `resultsStreamSink` because we don't know until - // after the call completes whether or not it will return any streams. If it's unused, - // though, it should only be a couple allocations. - builder.getResultsStreamHandler().setStreamSink( - kj::addRef(*resultStreamSink.emplace(kj::refcounted()))); - } + // Unfortunately, we always have to send the ExternalPusher since we don't know whether the + // call will return any streams (or other pushed externals). Luckily, it's a + // one-per-IoContext object, not a big deal. (It'll take a slot on the capnp export table + // though.) + builder.getResultsStreamHandler().setExternalPusher(ioContext.getExternalPusher()); auto callResult = builder.send(); - KJ_IF_SOME(ssf, paramsStreamSinkFulfiller) { - ssf->fulfill(callResult.getParamsStreamSink()); - } - // We need to arrange that our JsRpcPromise will updated in-place with the final settlement // of this RPC promise. However, we can't actually construct the JsRpcPromise until we have // the final promise to give it. To resolve the cycle, we only create a JsRpcPromise::WeakRef @@ -579,10 +403,9 @@ JsRpcPromiseAndPipeline callImpl(jsg::Lock& js, // RemotePromise lets us consume its pipeline and promise portions independently; we consume // the promise here and we consume the pipeline below, both via kj::mv(). auto jsPromise = ioContext.awaitIo(js, kj::mv(callResult), - [weakRef = kj::atomicAddRef(*weakRef), resultStreamSink = kj::mv(resultStreamSink)]( - jsg::Lock& js, + [weakRef = kj::atomicAddRef(*weakRef)](jsg::Lock& js, capnp::Response response) mutable -> jsg::Value { - auto jsResult = deserializeRpcReturnValue(js, response, resultStreamSink); + auto jsResult = deserializeRpcReturnValue(js, response); if (weakRef->disposed) { // The promise was explicitly disposed before it even resolved. This means we must dispose @@ -965,7 +788,7 @@ template MakeCallPipeline::Result serializeJsValueWithPipeline(jsg::Lock& js, jsg::JsValue value, Func makeBuilder, - RpcSerializerExternalHandler::GetStreamHandlerFunc getStreamSinkFunc); + rpc::JsValue::ExternalPusher::Client externalPusher); // Callee-side implementation of JsRpcTarget. // @@ -1049,6 +872,9 @@ class JsRpcTargetBase: public rpc::JsRpcTarget::Server { kj::Promise pushAbortSignal(PushAbortSignalContext context) override { return externalPusher->pushAbortSignal(context); } + kj::Promise pushDelayedChannelToken(PushDelayedChannelTokenContext context) override { + return externalPusher->pushDelayedChannelToken(context); + } KJ_DISALLOW_COPY_AND_MOVE(JsRpcTargetBase); @@ -1105,18 +931,22 @@ class JsRpcTargetBase: public rpc::JsRpcTarget::Server { // Given a handle for the result, if it's a promise, await the promise, then serialize the // final result for return. - RpcSerializerExternalHandler::GetStreamHandlerFunc getResultsStreamHandlerFunc; - auto resultStreamHandler = params.getResultsStreamHandler(); - switch (resultStreamHandler.which()) { - case rpc::JsRpcTarget::CallParams::ResultsStreamHandler::EXTERNAL_PUSHER: - getResultsStreamHandlerFunc.init( - [cap = resultStreamHandler.getExternalPusher()]() mutable { return kj::mv(cap); }); - break; - case rpc::JsRpcTarget::CallParams::ResultsStreamHandler::STREAM_SINK: - getResultsStreamHandlerFunc.init( - [cap = resultStreamHandler.getStreamSink()]() mutable { return kj::mv(cap); }); - break; - } + auto externalPusher = [&]() -> rpc::JsValue::ExternalPusher::Client { + auto resultStreamHandler = params.getResultsStreamHandler(); + if (resultStreamHandler.hasExternalPusher()) { + return resultStreamHandler.getExternalPusher(); + } else if (resultStreamHandler.hasObsolete4()) { + // A StreamSink was provided -- that's the old approach, which should have been + // eliminated from prod before this rolled out. + return KJ_EXCEPTION(FAILED, "Caller using obsolete StreamSink API?"); + } else { + // The caller simply failed provide an ExternalPusher. This shouldn't happen in prod but + // there are some tests that don't set anything. If we return an exception here, it'll + // only show up if we actually try to use the ExternalPusher to encode our return value. + return KJ_EXCEPTION( + FAILED, "Couldn't return stream because caller didn't provide an ExternalPusher."); + } + }(); kj::Maybe>> callPipelineFulfiller; @@ -1124,27 +954,6 @@ class JsRpcTargetBase: public rpc::JsRpcTarget::Server { // destroyed at the same time as the success callback. kj::Maybe&> callPipelineFulfillerRef; - KJ_IF_SOME(ss, invocationResult.streamSink) { - // Since we have a StreamSink, it's important that we hook up the pipeline for that - // immediately. Annoyingly, that also means we need to hook up a pipeline for - // callPipeline, which we don't actually have yet, so we need to promise-ify it. - - // If the caller requested using ExternalPusher for the results, then it should also use - // ExternalPusher for the params. (Theoretically we could support mix-and-match but... - // let's keep it simple.) - KJ_REQUIRE(resultStreamHandler.isStreamSink(), - "RPC params used StreamSink when result is supposed to use ExternalPusher"); - - auto paf = kj::newPromiseAndFulfiller(); - callPipelineFulfillerRef = *paf.fulfiller; - callPipelineFulfiller = kj::mv(paf.fulfiller); - - capnp::PipelineBuilder builder(16); - builder.setCallPipeline(kj::mv(paf.promise)); - builder.setParamsStreamSink(ss); - callContext.setPipeline(builder.build()); - } - // HACK: Cap'n Proto call contexts are documented as being pointer-like types where the // backing object's lifetime is that of the RPC call, but in reality they are refcounted // under the hood. Since we'll be executing the call in the JS microtask queue, we have no @@ -1165,8 +974,7 @@ class JsRpcTargetBase: public rpc::JsRpcTarget::Server { // must take full ownership. [callContext, ownCallContext = kj::mv(ownCallContext), paramDisposalGroup = kj::mv(invocationResult.paramDisposalGroup), - paramsStreamSink = kj::mv(invocationResult.streamSink), - getResultsStreamHandlerFunc = kj::mv(getResultsStreamHandlerFunc), + externalPusher = kj::mv(externalPusher), callPipelineFulfiller = kj::mv(callPipelineFulfiller)]( jsg::Lock& js, jsg::Value value) mutable { jsg::JsValue resultValue(value.getHandle(js)); @@ -1178,7 +986,7 @@ class JsRpcTargetBase: public rpc::JsRpcTarget::Server { hint.capCount += 1; // for callPipeline results = callContext.initResults(hint); return results.initResult(); - }, kj::mv(getResultsStreamHandlerFunc)); + }, kj::mv(externalPusher)); KJ_SWITCH_ONEOF(maybePipeline) { KJ_CASE_ONEOF(obj, MakeCallPipeline::Object) { @@ -1207,10 +1015,6 @@ class JsRpcTargetBase: public rpc::JsRpcTarget::Server { cpf->fulfill(results.getCallPipeline()); } - KJ_IF_SOME(ss, paramsStreamSink) { - results.setParamsStreamSink(kj::mv(ss)); - } - // paramDisposalGroup will be destroyed when we return (or when this lambda is destroyed // as a result of the promise being rejected). This will implicitly dispose the param // stubs. @@ -1417,7 +1221,6 @@ class JsRpcTargetBase: public rpc::JsRpcTarget::Server { struct InvocationResult { v8::Local returnValue; kj::Maybe> paramDisposalGroup; - kj::Maybe streamSink; }; // Deserializes the arguments and passes them to the given function. @@ -1427,7 +1230,7 @@ class JsRpcTargetBase: public rpc::JsRpcTarget::Server { kj::Maybe args) { // We received arguments from the client, deserialize them back to JS. KJ_IF_SOME(a, args) { - auto [value, disposalGroup, streamSink] = deserializeJsValue(js, a, "params"_kjc); + auto [value, disposalGroup] = deserializeJsValue(js, a); auto args = KJ_REQUIRE_NONNULL( value.tryCast(), "expected JsArray when deserializing arguments."); // Call() expects a `Local []`... so we populate an array. @@ -1440,7 +1243,6 @@ class JsRpcTargetBase: public rpc::JsRpcTarget::Server { InvocationResult result{ .returnValue = jsg::check(fn->Call(js.v8Context(), thisArg, arguments.size(), arguments.data())), - .streamSink = kj::mv(streamSink), }; if (!disposalGroup->empty()) { result.paramDisposalGroup = kj::mv(disposalGroup); @@ -1479,7 +1281,6 @@ class JsRpcTargetBase: public rpc::JsRpcTarget::Server { } kj::Maybe> paramDisposalGroup; - kj::Maybe streamSink; // We're going to pass all the arguments from the client to the function, but we are going to // insert `env` and `ctx`. We assume the last two arguments that the function declared are @@ -1487,8 +1288,7 @@ class JsRpcTargetBase: public rpc::JsRpcTarget::Server { kj::Maybe argsArrayFromClient; size_t argCountFromClient = 0; KJ_IF_SOME(a, args) { - auto [value, disposalGroup, ss] = deserializeJsValue(js, a, "paramsNonClass"_kjc); - streamSink = kj::mv(ss); + auto [value, disposalGroup] = deserializeJsValue(js, a); auto array = KJ_REQUIRE_NONNULL( value.tryCast(), "expected JsArray when deserializing arguments."); @@ -1541,7 +1341,6 @@ class JsRpcTargetBase: public rpc::JsRpcTarget::Server { .returnValue = jsg::check(fn->Call(js.v8Context(), thisArg, arguments.size(), arguments.data())), .paramDisposalGroup = kj::mv(paramDisposalGroup), - .streamSink = kj::mv(streamSink), }; }; }; @@ -1660,7 +1459,7 @@ template MakeCallPipeline::Result serializeJsValueWithPipeline(jsg::Lock& js, jsg::JsValue value, Func makeBuilder, - RpcSerializerExternalHandler::GetStreamHandlerFunc getStreamHandlerFunc) { + rpc::JsValue::ExternalPusher::Client externalPusher) { auto maybeDispose = js.withinHandleScope([&]() -> kj::Maybe> { jsg::JsObject obj = KJ_UNWRAP_OR(value.tryCast(), { return kj::none; }); @@ -1684,7 +1483,7 @@ MakeCallPipeline::Result serializeJsValueWithPipeline(jsg::Lock& js, // Now that we've extracted our dispose function, we can serialize our value. RpcSerializerExternalHandler externalHandler( - RpcSerializerExternalHandler::TRANSFER, kj::mv(getStreamHandlerFunc)); + RpcSerializerExternalHandler::TRANSFER, kj::mv(externalPusher)); serializeJsValue(js, value, externalHandler, kj::mv(makeBuilder)); auto stubDisposers = externalHandler.releaseStubDisposers(); @@ -2101,61 +1900,6 @@ class EntrypointJsRpcTarget final: public JsRpcTargetBase { } }; -// A membrane which wraps the top-level JsRpcTarget of an RPC session on the server side. The -// purpose of this membrane is to allow only a single top-level call, which then gets a -// `CompletionMembrane` wrapped around it. Note that we can't just wrap `CompletionMembrane` around -// the top-level object directly because that capability will not be dropped until the RPC session -// completes, since it is actually returned as the result of the top-level RPC call, but that -// call doesn't return until the `CompletionMembrane` says all capabilities were dropped, so this -// would create a cycle. -class JsRpcSessionCustomEvent::ServerTopLevelMembrane final: public capnp::MembranePolicy, - public kj::Refcounted { - public: - explicit ServerTopLevelMembrane(kj::Own> doneFulfiller) - : completionMembrane(kj::refcounted(kj::mv(doneFulfiller))) {} - - ~ServerTopLevelMembrane() noexcept(false) { - KJ_IF_SOME(cm, completionMembrane) { - cm->reject( - KJ_EXCEPTION(DISCONNECTED, "JS RPC session canceled without calling an RPC method.")); - } - } - - kj::Maybe inboundCall( - uint64_t interfaceId, uint16_t methodId, capnp::Capability::Client target) override { - if (interfaceId == capnp::typeId()) { - // JsRpcTarget::call() - auto cm = kj::mv(JSG_REQUIRE_NONNULL( - completionMembrane, Error, "Only one RPC method call is allowed on this object.")); - completionMembrane = kj::none; - return capnp::membrane(kj::mv(target), kj::mv(cm)); - } else if (interfaceId == capnp::typeId()) { - // ExternalPusher methods - // - // It's important that we use the same membrane that we'll use for call(), so that - // capabilities returned by the ExternalPusher will be wrapped in the membrane, hence they - // will be unwrapped when passed back through the membrane again to call(). - auto& cm = *JSG_REQUIRE_NONNULL( - completionMembrane, Error, "getExternalPusher() must be called before call()"); - return capnp::membrane(kj::mv(target), kj::addRef(cm)); - } else { - KJ_FAIL_ASSERT("unkown interface ID for JsRpcTarget"); - } - } - - kj::Maybe outboundCall( - uint64_t interfaceId, uint16_t methodId, capnp::Capability::Client target) override { - KJ_FAIL_ASSERT("ServerTopLevelMembrane shouldn't have outgoing capabilities"); - } - - kj::Own addRef() override { - return kj::addRef(*this); - } - - private: - kj::Maybe> completionMembrane; -}; - kj::Promise JsRpcSessionCustomEvent::run( kj::Own incomingRequest, kj::Maybe entrypointName, @@ -2180,17 +1924,8 @@ kj::Promise JsRpcSessionCustomEvent::run( try { auto [donePromise, doneFulfiller] = kj::newPromiseAndFulfiller(); - kj::Own topMembrane; - if (util::Autogate::isEnabled(util::AutogateKey::JSRPC_SESSION_HANDLE)) { - // When using the session handle approach, we don't need the convoluted - // `ServerTopLevelMembrane` because the the top-level `JsRpcTarget` is not unnaturally held - // open, so it can be treated the same as any other capability in the session. - topMembrane = kj::refcounted(kj::mv(doneFulfiller)); - } else { - topMembrane = kj::refcounted(kj::mv(doneFulfiller)); - } - - capFulfiller->fulfill(capnp::membrane(revcableTarget.getClient(), kj::mv(topMembrane))); + capFulfiller->fulfill(capnp::membrane( + revcableTarget.getClient(), kj::refcounted(kj::mv(doneFulfiller)))); // `donePromise` resolves once there are no longer any capabilities pointing between the client // and server as part of this session. @@ -2287,26 +2022,19 @@ kj::Promise JsRpcSessionCustomEvent::receiveRpc(JsRpcSessionContext contex auto cap = customEvent->getCap(); - if (util::Autogate::isEnabled(util::AutogateKey::JSRPC_SESSION_HANDLE)) { - auto promise = worker.customEvent(kj::mv(customEvent)); + auto promise = worker.customEvent(kj::mv(customEvent)); - auto results = context.getResults(capnp::MessageSize{4, 2}); - results.setTopLevel(kj::mv(cap)); + auto results = context.getResults(capnp::MessageSize{4, 2}); + results.setTopLevel(kj::mv(cap)); - // Set the returned session capability to resolve to a null capability when the event is - // complete. This also neatly arranges that if the session is dropped early, the - // `customEvent()` promise is canceled, thus canceling the session. - results.setSession(promise.then([ownWorker = kj::mv(ownWorker)](auto outcome) { - return rpc::JsRpcSession::Client(nullptr); - })); - } else { - capnp::PipelineBuilder pipelineBuilder; - pipelineBuilder.setTopLevel(cap); - context.setPipeline(pipelineBuilder.build()); - context.getResults().setTopLevel(kj::mv(cap)); + // Set the returned session capability to resolve to a null capability when the event is + // complete. This also neatly arranges that if the session is dropped early, the + // `customEvent()` promise is canceled, thus canceling the session. + results.setSession(promise.then([ownWorker = kj::mv(ownWorker)](auto outcome) { + return rpc::JsRpcSession::Client(nullptr); + })); - co_await worker.customEvent(kj::mv(customEvent)); - } + return kj::READY_NOW; } }; // namespace workerd::api diff --git a/src/workerd/api/worker-rpc.h b/src/workerd/api/worker-rpc.h index 47d8346b98a..faddc717d38 100644 --- a/src/workerd/api/worker-rpc.h +++ b/src/workerd/api/worker-rpc.h @@ -36,18 +36,14 @@ constexpr size_t MAX_JS_RPC_MESSAGE_SIZE = 1u << 25; // handle RPC specially should use this. class RpcSerializerExternalHandler final: public jsg::Serializer::ExternalHandler { public: - using GetStreamSinkFunc = kj::Function; - using GetExternalPusherFunc = kj::Function; - using GetStreamHandlerFunc = kj::OneOf; - enum StubOwnership { TRANSFER, DUPLICATE }; - // `getStreamSinkFunc` will be called at most once, the first time a stream is encountered in - // serialization, to get the StreamSink that should be used. + // `getExternalPusherFunc` will be called at most once, the first time a stream is encountered in + // serialization, to get the ExternalPusher that should be used. RpcSerializerExternalHandler( - StubOwnership stubOwnership, GetStreamHandlerFunc getStreamHandlerFunc) + StubOwnership stubOwnership, rpc::JsValue::ExternalPusher::Client externalPusher) : stubOwnership(stubOwnership), - getStreamHandlerFunc(kj::mv(getStreamHandlerFunc)) {} + externalPusher(kj::mv(externalPusher)) {} inline StubOwnership getStubOwnership() { return stubOwnership; @@ -55,9 +51,10 @@ class RpcSerializerExternalHandler final: public jsg::Serializer::ExternalHandle using BuilderCallback = kj::Function; - // Returns the ExternalPusher for the remote side. Returns kj::none if this serialization is - // using the older StreamSink approach, in which case you need to call `writeStream()` instead. - kj::Maybe getExternalPusher(); + // Returns the ExternalPusher for the remote side. + rpc::JsValue::ExternalPusher::Client getExternalPusher() { + return externalPusher; + } // Add an external. The value is a callback which will be invoked later to fill in the // JsValue::External in the Cap'n Proto structure. The external array cannot be allocated until @@ -67,13 +64,6 @@ class RpcSerializerExternalHandler final: public jsg::Serializer::ExternalHandle externals.add(kj::mv(callback)); } - // Like write(), but use this when there is also a stream associated with the external, i.e. - // using StreamSink. This returns a capability which will eventually resolve to the stream. - // - // StreamSink is being replaced by ExternalPusher. You should only call writeStream() if - // getExternalPusher() returns kj::none. If ExternalPusher is available, this method will throw. - capnp::Capability::Client writeStream(BuilderCallback callback); - // Build the final list. capnp::Orphan> build(capnp::Orphanage orphanage); @@ -108,71 +98,39 @@ class RpcSerializerExternalHandler final: public jsg::Serializer::ExternalHandle private: StubOwnership stubOwnership; - GetStreamHandlerFunc getStreamHandlerFunc; + rpc::JsValue::ExternalPusher::Client externalPusher; kj::Vector externals; kj::Vector> stubDisposers; - - kj::Maybe streamSink; - kj::Maybe externalPusher; }; class RpcStubDisposalGroup; -class StreamSinkImpl; // ExternalHandler used when deserializing RPC messages. Deserialization functions with which to // handle RPC specially should use this. class RpcDeserializerExternalHandler final: public jsg::Deserializer::ExternalHandler { public: - // The `streamSink` parameter should be provided if a StreamSink already exists, e.g. when - // deserializing results. If omitted, it will be constructed on-demand. - RpcDeserializerExternalHandler(capnp::List::Reader externals, - RpcStubDisposalGroup& disposalGroup, - kj::Maybe streamSink, - kj::LiteralStringConst debugContext) + RpcDeserializerExternalHandler( + capnp::List::Reader externals, RpcStubDisposalGroup& disposalGroup) : externals(externals), - disposalGroup(disposalGroup), - streamSink(streamSink), - debugContext(debugContext) {} + disposalGroup(disposalGroup) {} ~RpcDeserializerExternalHandler() noexcept(false); // Read and return the next external. rpc::JsValue::External::Reader read(); - // Call immediately after `read()` when reading an external that is associated with a stream. - // `stream` is published back to the sender via StreamSink. - void setLastStream(capnp::Capability::Client stream); - // All stubs deserialized as part of a particular parameter or result set are placed in a // common disposal group so that they can be disposed together. RpcStubDisposalGroup& getDisposalGroup() { return disposalGroup; } - // Call after serialization is complete to get the StreamSink that should handle streams found - // while deserializing. Returns none if there were no streams. This should only be called if - // a `streamSink` was NOT passed to the constructor. - kj::Maybe getStreamSink() { - return kj::mv(streamSinkCap); - } - - // Return a string literal to include in deserialization errors for debugging. (In particular - // this specifies if it's params or return.) - kj::LiteralStringConst getDebugContext() { - return debugContext; - } - private: capnp::List::Reader externals; uint i = 0; kj::UnwindDetector unwindDetector; RpcStubDisposalGroup& disposalGroup; - - kj::Maybe streamSink; - kj::Maybe streamSinkCap; - - kj::LiteralStringConst debugContext; }; // Base class for objects which can be sent over RPC, but doing so actually sends a stub which @@ -549,8 +507,6 @@ class JsRpcSessionCustomEvent final: public WorkerInterface::CustomEvent { uint16_t typeId; kj::Maybe wrapperModule; - - class ServerTopLevelMembrane; }; #define EW_WORKER_RPC_ISOLATE_TYPES \ diff --git a/src/workerd/io/external-pusher.c++ b/src/workerd/io/external-pusher.c++ index 6bb3b76d00b..082fb9dc0d6 100644 --- a/src/workerd/io/external-pusher.c++ +++ b/src/workerd/io/external-pusher.c++ @@ -12,10 +12,6 @@ namespace workerd { namespace { -// TODO(cleanup): These classes have been copied from streams/readable.c++. The copies there can be -// deleted as soon as we've switched from StreamSink to ExternalPusher and can delete all the -// StreamSink-related code. For now I'm not trying to avoid duplication. - // HACK: We need as async pipe, like kj::newOneWayPipe(), except supporting explicit end(). So we // wrap the two ends of the pipe in special adapters that track whether end() was called. class ExplicitEndOutputPipeAdapter final: public capnp::ExplicitEndOutputStream { @@ -131,14 +127,14 @@ kj::Promise ExternalPusherImpl::pushByteStream(PushByteStreamContext conte } kj::Own ExternalPusherImpl::unwrapStream( - ExternalPusher::InputStream::Client cap, kj::LiteralStringConst debugContext) { - return kj::newPromisedStream(unwrapStreamImpl(kj::mv(cap), debugContext)); + ExternalPusher::InputStream::Client cap) { + return kj::newPromisedStream(unwrapStreamImpl(kj::mv(cap))); } kj::Promise> ExternalPusherImpl::unwrapStreamImpl( - ExternalPusher::InputStream::Client cap, kj::LiteralStringConst debugContext) { + ExternalPusher::InputStream::Client cap) { auto& unwrapped = KJ_REQUIRE_NONNULL(co_await inputStreamSet.getLocalServer(cap), - "pushed external is not a byte stream", debugContext, cap.debugInfo()); + "pushed external is not a byte stream", cap.debugInfo()); co_return KJ_REQUIRE_NONNULL(kj::mv(kj::downcast(unwrapped).stream), "pushed byte stream has already been consumed"); @@ -149,10 +145,6 @@ kj::Promise> ExternalPusherImpl::unwrapStreamImpl( namespace { // The jsrpc handler that receives aborts from the remote and triggers them locally -// -// TODO(cleanup): This class has been copied from basics.c++. The copy there can be -// deleted as soon as we've switched from StreamSink to ExternalPusher and can delete all the -// StreamSink-related code. For now I'm not trying to avoid duplication. class AbortTriggerRpcServer final: public rpc::AbortTrigger::Server { public: AbortTriggerRpcServer(kj::Own> fulfiller, @@ -258,4 +250,30 @@ kj::Promise ExternalPusherImpl::unwrapAbortSignalImpl( co_await paf.promise; } +// ======================================================================================= +// DelayedChannelToken handling + +class ExternalPusherImpl::DelayedChannelTokenImpl final + : public ExternalPusher::DelayedChannelToken::Server { + public: + DelayedChannelTokenImpl(kj::Array token): token(kj::mv(token)) {} + + kj::Array token; +}; + +kj::Promise ExternalPusherImpl::pushDelayedChannelToken( + PushDelayedChannelTokenContext context) { + auto token = kj::heapArray(context.getParams().getToken()); + auto cap = delayedChannelTokenSet.add(kj::heap(kj::mv(token))); + context.getResults(capnp::MessageSize{2, 1}).setCap(kj::mv(cap)); + return kj::READY_NOW; +} + +kj::Promise> ExternalPusherImpl::unwrapDelayedChannelToken( + rpc::JsValue::ExternalPusher::DelayedChannelToken::Client cap) { + auto& unwrapped = KJ_REQUIRE_NONNULL(co_await delayedChannelTokenSet.getLocalServer(cap), + "pushed external is not a DelayedChannelToken"); + co_return kj::mv(kj::downcast(unwrapped).token); +} + } // namespace workerd diff --git a/src/workerd/io/external-pusher.h b/src/workerd/io/external-pusher.h index 02945ff7bf8..465b8c9fd92 100644 --- a/src/workerd/io/external-pusher.h +++ b/src/workerd/io/external-pusher.h @@ -1,3 +1,4 @@ + // Copyright (c) 2025 Cloudflare, Inc. // Licensed under the Apache 2.0 license found in the LICENSE file or at: // https://opensource.org/licenses/Apache-2.0 @@ -24,8 +25,7 @@ class ExternalPusherImpl: public rpc::JsValue::ExternalPusher::Server, public kj using ExternalPusher = rpc::JsValue::ExternalPusher; - kj::Own unwrapStream( - ExternalPusher::InputStream::Client cap, kj::LiteralStringConst debugContext); + kj::Own unwrapStream(ExternalPusher::InputStream::Client cap); // Box which holds the reason why an AbortSignal was aborted. May be either: // - A serialized V8 value if the signal was aborted from JavaScript. @@ -42,23 +42,29 @@ class ExternalPusherImpl: public rpc::JsValue::ExternalPusher::Server, public kj AbortSignal unwrapAbortSignal(ExternalPusher::AbortSignal::Client cap); + kj::Promise> unwrapDelayedChannelToken( + rpc::JsValue::ExternalPusher::DelayedChannelToken::Client cap); + kj::Promise pushByteStream(PushByteStreamContext context) override; kj::Promise pushAbortSignal(PushAbortSignalContext context) override; + kj::Promise pushDelayedChannelToken(PushDelayedChannelTokenContext context) override; private: capnp::ByteStreamFactory& byteStreamFactory; capnp::CapabilityServerSet inputStreamSet; capnp::CapabilityServerSet abortSignalSet; + capnp::CapabilityServerSet delayedChannelTokenSet; kj::Promise> unwrapStreamImpl( - ExternalPusher::InputStream::Client cap, kj::LiteralStringConst debugContext); + ExternalPusher::InputStream::Client cap); kj::Promise unwrapAbortSignalImpl( ExternalPusher::AbortSignal::Client cap, kj::Own pendingReason); class InputStreamImpl; class AbortSignalImpl; + class DelayedChannelTokenImpl; }; } // namespace workerd diff --git a/src/workerd/io/frankenvalue.h b/src/workerd/io/frankenvalue.h index 78e9e3fef8c..84c519132ae 100644 --- a/src/workerd/io/frankenvalue.h +++ b/src/workerd/io/frankenvalue.h @@ -104,6 +104,32 @@ class Frankenvalue { } } + // Kind of like `rewriteCaps()`, but the callback returns + // kj::OneOf, kj::Promise>>, i.e. it may optionally + // decide to be async. If any of the calls return a promise, then `resolveCaps()` returns a + // promise created by joining the inner promises -- the Frankenvalue MUST NOT be used until that + // promise resolves. (If the promise fails or is canceled, the Frankenvalue must be discarded.) + template + kj::Maybe> resolveCaps(Func&& resolve) { + kj::Vector> promises; + for (auto& slot: capTable) { + KJ_SWITCH_ONEOF(resolve(kj::mv(slot))) { + KJ_CASE_ONEOF(replacement, kj::Own) { + slot = kj::mv(replacement); + } + KJ_CASE_ONEOF(promise, kj::Promise>) { + promises.add(promise.then( + [&slot](kj::Own replacement) { slot = kj::mv(replacement); })); + } + } + } + if (promises.empty()) { + return kj::none; + } else { + return kj::joinPromisesFailFast(promises.releaseAsArray()); + } + } + // When deserializing a JS value, the jsg::Deserializer's ExternalHandler will have this type. class CapTableReader final: public jsg::Deserializer::ExternalHandler { public: diff --git a/src/workerd/io/hibernation-manager.c++ b/src/workerd/io/hibernation-manager.c++ index 0e1d5fb13d3..e98a266faeb 100644 --- a/src/workerd/io/hibernation-manager.c++ +++ b/src/workerd/io/hibernation-manager.c++ @@ -5,7 +5,9 @@ #include "hibernation-manager.h" #include "io-channels.h" +#include "io-context.h" +#include #include namespace workerd { @@ -110,6 +112,15 @@ void HibernationManagerImpl::acceptWebSocket( auto hib = kj::heap(kj::mv(ws), tags, *this); HibernatableWebSocket& refToHibernatable = *hib.get(); + + // TODO(mar): Improve accept span context capturing — route snapshotted user span context + // to serialization point instead of capturing only the invocation root span here. + if (util::Autogate::isEnabled(util::AutogateKey::USER_SPAN_CONTEXT_PROPAGATION)) { + auto invCtx = IoContext::current().getInvocationSpanContext(); + refToHibernatable.userSpanContext = + tracing::SpanContext(invCtx.getTraceId(), invCtx.getSpanId()); + } + allWs.push_front(kj::mv(hib)); refToHibernatable.node = allWs.begin(); @@ -266,8 +277,14 @@ kj::Promise HibernationManagerImpl::handleSocketTermination( } KJ_REQUIRE_NONNULL(params).setTimeout(eventTimeoutMs); - // Dispatch the event. - auto workerInterface = loopback->getWorker(IoChannelFactory::SubrequestMetadata{}); + // Dispatch the event, restoring the trace context captured at acceptWebSocket time. + SpanParent userSpanParent = SpanParent(nullptr); + KJ_IF_SOME(ctx, hib.userSpanContext) { + userSpanParent = SpanParent::fromSpanContext(tracing::SpanContext::clone(ctx)); + } + auto workerInterface = loopback->getWorker({ + .userSpanParent = kj::mv(userSpanParent), + }); event = workerInterface ->customEvent(kj::heap( hibernationEventType, kj::mv(KJ_REQUIRE_NONNULL(params)), *this)) @@ -372,8 +389,14 @@ kj::Promise HibernationManagerImpl::readLoop(HibernatableWebSocket& hib) { auto params = kj::mv(KJ_REQUIRE_NONNULL(maybeParams)); params.setTimeout(eventTimeoutMs); auto isClose = params.isCloseEvent(); - // Dispatch the event. - auto workerInterface = loopback->getWorker(IoChannelFactory::SubrequestMetadata{}); + // Dispatch the event, restoring the trace context captured at acceptWebSocket time. + SpanParent userSpanParent = SpanParent(nullptr); + KJ_IF_SOME(ctx, hib.userSpanContext) { + userSpanParent = SpanParent::fromSpanContext(tracing::SpanContext::clone(ctx)); + } + auto workerInterface = loopback->getWorker({ + .userSpanParent = kj::mv(userSpanParent), + }); co_await workerInterface->customEvent(kj::heap( hibernationEventType, kj::mv(params), *this)); if (isClose) { diff --git a/src/workerd/io/hibernation-manager.h b/src/workerd/io/hibernation-manager.h index a81f734bea8..019e040491c 100644 --- a/src/workerd/io/hibernation-manager.h +++ b/src/workerd/io/hibernation-manager.h @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -130,6 +131,10 @@ class HibernationManagerImpl final: public Worker::Actor::HibernationManager { // This prevents us from dispatching it if we have already done so. bool hasDispatchedClose = false; + // Trace context captured at acceptWebSocket() time, restored when the DO is woken up + // so that hibernation events are linked to the original trace. + kj::Maybe userSpanContext; + // Stores the last received autoResponseRequest timestamp. kj::Maybe autoResponseTimestamp; diff --git a/src/workerd/io/io-channels.c++ b/src/workerd/io/io-channels.c++ index bbe92efe9d5..a4015d2642d 100644 --- a/src/workerd/io/io-channels.c++ +++ b/src/workerd/io/io-channels.c++ @@ -1,13 +1,34 @@ #include "io-channels.h" +#include +#include + namespace workerd { -kj::Array IoChannelFactory::SubrequestChannel::getToken(ChannelTokenUsage usage) { - JSG_FAIL_REQUIRE(DOMDataCloneError, "This ServiceStub cannot be serialized."); +kj::Promise> IoChannelFactory::SubrequestChannel::getToken( + ChannelTokenUsage usage) { + KJ_SWITCH_ONEOF(getTokenMaybeSync(usage)) { + KJ_CASE_ONEOF(token, kj::Array) { + return kj::mv(token); + } + KJ_CASE_ONEOF(promise, kj::Promise>) { + return kj::mv(promise); + } + } + KJ_UNREACHABLE; } -kj::Array IoChannelFactory::ActorClassChannel::getToken(ChannelTokenUsage usage) { - JSG_FAIL_REQUIRE(DOMDataCloneError, "This Durable Object class cannot be serialized."); +kj::Promise> IoChannelFactory::ActorClassChannel::getToken( + ChannelTokenUsage usage) { + KJ_SWITCH_ONEOF(getTokenMaybeSync(usage)) { + KJ_CASE_ONEOF(token, kj::Array) { + return kj::mv(token); + } + KJ_CASE_ONEOF(promise, kj::Promise>) { + return kj::mv(promise); + } + } + KJ_UNREACHABLE; } kj::Own IoChannelFactory::subrequestChannelFromToken( @@ -21,12 +42,300 @@ kj::Own IoChannelFactory::actorClassFromTok DOMDataCloneError, "This Worker is not able to deserialize Durable Object class stubs."); } +namespace { + +class PromisedSubrequestChannel final: public IoChannelFactory::SubrequestChannel { + public: + PromisedSubrequestChannel(kj::Promise> promise) + : readyPromise(waitForResolution(kj::mv(promise)).fork()) {} + + kj::Own startRequest(IoChannelFactory::SubrequestMetadata metadata) override { + KJ_IF_SOME(channel, inner) { + return channel->startRequest(kj::mv(metadata)); + } else { + return newPromisedWorkerInterface(readyPromise.addBranch().then( + [self = addRefToThis(), metadata = kj::mv(metadata)]() mutable { + return KJ_ASSERT_NONNULL(self->inner)->startRequest(kj::mv(metadata)); + })); + } + } + + void requireAllowsTransfer() override { + // PromisedSubrequestChannel is used for channels initialized from a promised channel token. + // A SubrequestChannel created from a channel token should always support transfer, via channel + // tokens. + } + + kj::OneOf, kj::Promise>> getTokenMaybeSync( + IoChannelFactory::ChannelTokenUsage usage) override { + KJ_IF_SOME(channel, inner) { + return channel->getTokenMaybeSync(usage); + } else { + return readyPromise.addBranch().then([this, usage]() -> kj::Promise> { + KJ_SWITCH_ONEOF(KJ_ASSERT_NONNULL(inner)->getTokenMaybeSync(usage)) { + KJ_CASE_ONEOF(token, kj::Array) { + return kj::mv(token); + } + KJ_CASE_ONEOF(promise, kj::Promise>) { + return kj::mv(promise); + } + } + KJ_UNREACHABLE; + }); + } + } + + kj::OneOf, kj::Promise>> getResolved() + override { + KJ_IF_SOME(channel, inner) { + return kj::addRef(*channel); + } else { + return readyPromise.addBranch().then( + [this]() mutable { return kj::addRef(*KJ_ASSERT_NONNULL(inner)); }); + } + } + + private: + kj::Maybe> inner; + kj::ForkedPromise readyPromise; + + kj::Promise waitForResolution(kj::Promise> promise) { + for (;;) { + auto resolution = co_await promise; + KJ_SWITCH_ONEOF(resolution->getResolved()) { + KJ_CASE_ONEOF(channel, kj::Own) { + inner = kj::mv(channel); + co_return; + } + KJ_CASE_ONEOF(deeperPromise, kj::Promise>) { + // Promise resolved to another promise, wait for it too. + promise = kj::mv(deeperPromise); + } + } + } + } +}; + +class PromisedActorClassChannel final: public IoChannelFactory::ActorClassChannel { + public: + PromisedActorClassChannel(kj::Promise> promise) + : readyPromise(waitForResolution(kj::mv(promise)).fork()) {} + + void requireAllowsTransfer() override { + // PromisedActorClassChannel is used for channels initialized from a promised channel token. + // A ActorClassChannel created from a channel token should always support transfer, via channel + // tokens. + } + + kj::OneOf, kj::Promise>> getTokenMaybeSync( + IoChannelFactory::ChannelTokenUsage usage) override { + KJ_IF_SOME(channel, inner) { + return channel->getTokenMaybeSync(usage); + } else { + return readyPromise.addBranch().then([this, usage]() -> kj::Promise> { + KJ_SWITCH_ONEOF(KJ_ASSERT_NONNULL(inner)->getTokenMaybeSync(usage)) { + KJ_CASE_ONEOF(token, kj::Array) { + return kj::mv(token); + } + KJ_CASE_ONEOF(promise, kj::Promise>) { + return kj::mv(promise); + } + } + KJ_UNREACHABLE; + }); + } + } + + kj::OneOf, kj::Promise>> getResolved() + override { + KJ_IF_SOME(channel, inner) { + return kj::addRef(*channel); + } else { + return readyPromise.addBranch().then( + [this]() mutable { return kj::addRef(*KJ_ASSERT_NONNULL(inner)); }); + } + } + + private: + kj::Maybe> inner; + kj::ForkedPromise readyPromise; + + kj::Promise waitForResolution(kj::Promise> promise) { + for (;;) { + auto resolution = co_await promise; + KJ_SWITCH_ONEOF(resolution->getResolved()) { + KJ_CASE_ONEOF(channel, kj::Own) { + inner = kj::mv(channel); + co_return; + } + KJ_CASE_ONEOF(deeperPromise, kj::Promise>) { + promise = kj::mv(deeperPromise); + } + } + } + } +}; + +kj::OneOf, kj::Promise>> +resolveCap(kj::Own cap) { + KJ_IF_SOME(typed, kj::tryDowncast(*cap)) { + KJ_SWITCH_ONEOF(typed.getResolved()) { + KJ_CASE_ONEOF(channel, kj::Own) { + return kj::implicitCast>(kj::mv(channel)); + } + KJ_CASE_ONEOF(promise, kj::Promise>) { + return promise.then([](kj::Own channel) { + return kj::implicitCast>(kj::mv(channel)); + }); + } + } + KJ_UNREACHABLE; + } else KJ_IF_SOME(typed, kj::tryDowncast(*cap)) { + KJ_SWITCH_ONEOF(typed.getResolved()) { + KJ_CASE_ONEOF(channel, kj::Own) { + return kj::implicitCast>(kj::mv(channel)); + } + KJ_CASE_ONEOF(promise, kj::Promise>) { + return promise.then([](kj::Own channel) { + return kj::implicitCast>(kj::mv(channel)); + }); + } + } + KJ_UNREACHABLE; + } else { + auto& ref = *cap; + KJ_FAIL_ASSERT("unknown type in Frankenvalue", typeid(ref).name()); + } +} + +} // namespace + +kj::Own IoChannelFactory::getSubrequestChannel( + uint channel, kj::Maybe props, kj::Maybe versionRequest) { + KJ_IF_SOME(p, props) { + KJ_IF_SOME(promise, p.resolveCaps(resolveCap)) { + return kj::refcounted( + promise.then([this, self = addRef(), channel, props = kj::mv(p), + versionRequest = kj::mv(versionRequest)]() mutable { + return getSubrequestChannelResolved(channel, kj::mv(props), kj::mv(versionRequest)); + })); + } + } + return getSubrequestChannelResolved(channel, kj::mv(props), kj::mv(versionRequest)); +} + +kj::Own IoChannelFactory::getActorClass( + uint channel, kj::Maybe props) { + KJ_IF_SOME(p, props) { + KJ_IF_SOME(promise, p.resolveCaps(resolveCap)) { + return kj::refcounted( + promise.then([this, self = addRef(), channel, props = kj::mv(p)]() mutable { + return getActorClassResolved(channel, kj::mv(props)); + })); + } + } + return getActorClassResolved(channel, kj::mv(props)); +} + +kj::Own WorkerStubChannel::getEntrypoint( + kj::Maybe name, Frankenvalue props, kj::Maybe limits) { + KJ_IF_SOME(promise, props.resolveCaps(resolveCap)) { + return kj::refcounted( + promise.then([self = addRefToThis(), name = kj::mv(name), props = kj::mv(props), + limits = kj::mv(limits)]() mutable { + return self->getEntrypointResolved(kj::mv(name), kj::mv(props), kj::mv(limits)); + })); + } else { + return getEntrypointResolved(kj::mv(name), kj::mv(props), kj::mv(limits)); + } +} + +kj::Own WorkerStubChannel::getActorClass( + kj::Maybe name, Frankenvalue props, kj::Maybe limits) { + KJ_IF_SOME(promise, props.resolveCaps(resolveCap)) { + return kj::refcounted( + promise.then([self = addRefToThis(), name = kj::mv(name), props = kj::mv(props), + limits = kj::mv(limits)]() mutable { + return self->getActorClassResolved(kj::mv(name), kj::mv(props), kj::mv(limits)); + })); + } else { + return getActorClassResolved(kj::mv(name), kj::mv(props), kj::mv(limits)); + } +} + +kj::Own IoChannelFactory::subrequestChannelFromToken( + ChannelTokenUsage usage, kj::Promise> token) { + return kj::refcounted(token.then([this, usage](kj::Array token) { + return subrequestChannelFromToken(usage, token.asPtr()); + })); +} + +kj::Own IoChannelFactory::actorClassFromToken( + ChannelTokenUsage usage, kj::Promise> token) { + return kj::refcounted(token.then( + [this, usage](kj::Array token) { return actorClassFromToken(usage, token.asPtr()); })); +} + void IoChannelFactory::ActorChannel::requireAllowsTransfer() { JSG_FAIL_REQUIRE(DOMDataCloneError, "Durable Object stubs cannot (yet) be transferred between Workers. This will change in " "a future version."); } +kj::OneOf, kj::Promise>> IoChannelFactory::ActorChannel:: + getTokenMaybeSync(ChannelTokenUsage usage) { + JSG_FAIL_REQUIRE(DOMDataCloneError, + "Durable Object stubs cannot (yet) be transferred between Workers. This will change in " + "a future version."); +} + +kj::Promise DynamicWorkerSource::ensureAllResolved() { + kj::Vector> promises; + + KJ_IF_SOME(promise, env.resolveCaps(resolveCap)) { + promises.add(kj::mv(promise)); + } + + auto resolveChannelSlot = [&](kj::Own& slot) { + KJ_SWITCH_ONEOF(slot->getResolved()) { + KJ_CASE_ONEOF(channel, kj::Own) { + slot = kj::mv(channel); + } + KJ_CASE_ONEOF(promise, kj::Promise>) { + promises.add(promise.then([&slot](kj::Own channel) { + slot = kj::mv(channel); + })); + } + } + }; + + KJ_IF_SOME(slot, globalOutbound) { + resolveChannelSlot(slot); + } + + for (auto& slot: tails) { + resolveChannelSlot(slot); + } + for (auto& slot: streamingTails) { + resolveChannelSlot(slot); + } + + if (!promises.empty()) { + co_await kj::joinPromisesFailFast(promises.releaseAsArray()); + } +} + +kj::Promise Worker::Actor::FacetManager::StartInfo::ensureAllResolved() { + KJ_SWITCH_ONEOF(actorClass->getResolved()) { + KJ_CASE_ONEOF(channel, kj::Own) { + actorClass = kj::mv(channel); + } + KJ_CASE_ONEOF(promise, kj::Promise>) { + actorClass = co_await promise; + } + } +} + uint IoChannelCapTableEntry::getChannelNumber(Type expectedType) { // A type mismatch shouldn't be possible as long as attackers cannot tamper with the // serialization, but we do the check to catch bugs. diff --git a/src/workerd/io/io-channels.h b/src/workerd/io/io-channels.h index f68752a50c2..a45b9693f39 100644 --- a/src/workerd/io/io-channels.h +++ b/src/workerd/io/io-channels.h @@ -198,9 +198,32 @@ class IoChannelFactory { virtual void requireAllowsTransfer() = 0; // Get a token representing this SubrequestChannel which can be converted back into a - // SubrequestChannel using subrequestChannelFromToken(). Default implementation throws a - // TypeError. - virtual kj::Array getToken(ChannelTokenUsage usage); + // SubrequestChannel using subrequestChannelFromToken(). This is a convenience wrapper around + // getTokenMaybeSync() for callers that don't care about the synchronous optimization. + kj::Promise> getToken(ChannelTokenUsage usage); + + // Like getToken() but may return the token synchronously. This is what subclasses must + // implement. The synchronous optimization is important because there is significant additional + // overhead in the RPC system when the token cannot be created synchronously (need to use + // ExternalPusher to send a DelayedChannelToken). + virtual kj::OneOf, kj::Promise>> getTokenMaybeSync( + ChannelTokenUsage usage) = 0; + + // If this SubrequestChannel is just a wrapper around a promise for some later + // SubrequestChannel, return the inner channel -- synchronously if the promise has resolved + // already, otherwise asynchronously. + // + // Note that the various `IoChannelFactory` methods that take `props` or `env` objects all + // automatically resolve all channel objects *before* passing off to the underlying + // implementation. In the internal codebase, implementations end up needing to downcast these + // objects to implementation-specific types, and handling the need to call getResolved() + // in every use case would be painful, so it is taken care of in this layer. + // + // Default implementation returns self. + virtual kj::OneOf, kj::Promise>> + getResolved() { + return kj::addRef(*this); + } }; // Obtain an object representing a particular subrequest channel. @@ -212,10 +235,19 @@ class IoChannelFactory { // `props` and `versionRequest` can only be specified if this is a loopback channel (i.e. from // ctx.exports). For any other channel, they will throw. // + // The non-virtual method dispatches to getSubrequestChannelResolved(), but only after resolving + // all channels embedded in `props` (that is, calling `getResolved()` on all of them, waiting + // for the resolutions if necessary, and replacing the caps with the resolutions). + // // TODO(cleanup): Consider getting rid of `startSubrequest()` in favor of this. - virtual kj::Own getSubrequestChannel(uint channel, + kj::Own getSubrequestChannel(uint channel, kj::Maybe props = kj::none, - kj::Maybe versionRequest = kj::none) = 0; + kj::Maybe versionRequest = kj::none); + + // Underlying implementation of getSubrequestChannel(). The implementation can assume that `props` + // contains strictly resolved channels. + virtual kj::Own getSubrequestChannelResolved( + uint channel, kj::Maybe props, kj::Maybe versionRequest) = 0; // Stub for a remote actor. Allows sending requests to the actor. class ActorChannel: public SubrequestChannel { @@ -226,6 +258,8 @@ class IoChannelFactory { // For now, actor stubs are not transferrable -- but we do intend to change that at some point. void requireAllowsTransfer() override final; + kj::OneOf, kj::Promise>> getTokenMaybeSync( + ChannelTokenUsage usage) override final; }; // Get an actor stub from the given namespace for the actor with the given ID. @@ -258,7 +292,13 @@ class IoChannelFactory { // Same as the corresponding methods on SubrequestChannel. virtual void requireAllowsTransfer() = 0; - virtual kj::Array getToken(ChannelTokenUsage usage); + kj::Promise> getToken(ChannelTokenUsage usage); + virtual kj::OneOf, kj::Promise>> getTokenMaybeSync( + ChannelTokenUsage usage) = 0; + virtual kj::OneOf, kj::Promise>> + getResolved() { + return kj::addRef(*this); + } // This class has no functional methods, since it serves as a token to be passed to other // interfaces (namely the facets API). @@ -268,11 +308,16 @@ class IoChannelFactory { // // `props` can only be specified if this is a loopback channel (i.e. from ctx.exports). For any // other channel, it will throw. - virtual kj::Own getActorClass( - uint channel, kj::Maybe props = kj::none) { - // TODO(cleanup): Remove this once the production runtime has implemented this. - KJ_UNIMPLEMENTED("This runtime doesn't support actor class channels."); - } + // + // The non-virtual method dispatches to getActorClassResolved(), but only after resolving + // all channels embedded in `props` (that is, calling `getResolved()` on all of them, waiting + // for the resolutions if necessary, and replacing the caps with the resolutions). + kj::Own getActorClass(uint channel, kj::Maybe props = kj::none); + + // Underlying implementation of getActorClass(). The implementation can assume that `props` + // contains strictly resolved channels. + virtual kj::Own getActorClassResolved( + uint channel, kj::Maybe props) = 0; // Aborts all actors except those in namespaces marked with `preventEviction`. virtual void abortAllActors(kj::Maybe reason) { @@ -309,6 +354,22 @@ class IoChannelFactory { ChannelTokenUsage usage, kj::ArrayPtr token); virtual kj::Own actorClassFromToken( ChannelTokenUsage usage, kj::ArrayPtr token); + + // Overloads which accept a promise. Any attempts to use the channel will have to wait for the + // token to arrive first, but this should be transparent. + kj::Own subrequestChannelFromToken( + ChannelTokenUsage usage, kj::Promise> token); + kj::Own actorClassFromToken( + ChannelTokenUsage usage, kj::Promise> token); + + // Return a strong reference to this same factory. Used in the implementations of + // getSubrequestChannel() and getActorClass() when delayed resolution is needed. + // + // TODO(cleanup): This is hacky. IoChannelFactory isn't declared to simply extend kj::Refcounted + // because the workerd implementation is privately implemented by Server::WorkerService, which + // inherits kj::Refcounted a different way. But maybe it's time for Server::WorkerService to + // stop working that way? + virtual kj::Own addRef() = 0; }; // ResourceLimits provides a means to control the resource allocation for a worker stage via a @@ -328,12 +389,20 @@ struct ResourceLimits { // // This object is returned before the Worker actually loads, so if any errors occur while loading, // any requests sent to the Worker will fail, propagating the exception. -class WorkerStubChannel { +class WorkerStubChannel: public kj::Refcounted { public: - virtual kj::Own getEntrypoint( + // As with IoChannelFactory::getSubrequestChannel(), the non-virtual method waits for `props` to + // resolve first, then calls the virtual method. + kj::Own getEntrypoint( + kj::Maybe name, Frankenvalue props, kj::Maybe limits); + virtual kj::Own getEntrypointResolved( kj::Maybe name, Frankenvalue props, kj::Maybe limits) = 0; - virtual kj::Own getActorClass( + // As with IoChannelFactory::getActorClass(), the non-virtual method waits for `props` to + // resolve first, then calls the virtual method. + kj::Own getActorClass( + kj::Maybe name, Frankenvalue props, kj::Maybe limits); + virtual kj::Own getActorClassResolved( kj::Maybe name, Frankenvalue props, kj::Maybe limits) = 0; // TODO(someday): Allow caller to enumerate entrypoints? @@ -385,6 +454,10 @@ struct DynamicWorkerSource { .ownContentIsRpcResponse = ownContentIsRpcResponse, }; } + + // Walks through all channels in `env` and other properties and ensures that they point at + // resolved objects by calling their `getResolved()` methods. + kj::Promise ensureAllResolved(); }; // A Frankenvalue::CapTableEntry which directly references a numbered I/O channel. This is ONLY diff --git a/src/workerd/io/trace.h b/src/workerd/io/trace.h index 2ff7b941ef3..0bb75c5334a 100644 --- a/src/workerd/io/trace.h +++ b/src/workerd/io/trace.h @@ -1105,6 +1105,10 @@ class SpanParent { // Returns the observer's spanId, or SpanId::nullId if there is none. tracing::SpanId getSpanId(); + // Create a SpanParent from a pre-serialized SpanContext. The resulting SpanParent + // carries identity for toSpanContext() but does not record spans. + static SpanParent fromSpanContext(tracing::SpanContext context); + private: kj::Maybe> observer; }; @@ -1254,6 +1258,26 @@ class SpanObserver: public kj::Refcounted { } }; +// A non-recording SpanObserver that carries a pre-serialized SpanContext for propagation. +// Used to create a SpanParent from stored identity when no live observer exists +// (e.g. rehydrating trace context after hibernation). +class NonRecordingSpanObserver final: public SpanObserver { + public: + explicit NonRecordingSpanObserver(tracing::SpanContext context): context(kj::mv(context)) {} + + kj::Own newChild() override { + return {}; + } + void onOpen(kj::ConstString, kj::Date) override {} + void onClose(kj::Date, Span::TagMap&&, kj::Vector&&) override {} + kj::Maybe toSpanContext() override { + return tracing::SpanContext::clone(context); + } + + private: + tracing::SpanContext context; +}; + inline kj::Maybe SpanParent::toSpanContext() { KJ_IF_SOME(obs, observer) { return obs->toSpanContext(); @@ -1268,6 +1292,10 @@ inline tracing::SpanId SpanParent::getSpanId() { return tracing::SpanId::nullId; } +inline SpanParent SpanParent::fromSpanContext(tracing::SpanContext context) { + return SpanParent(kj::refcounted(kj::mv(context))); +} + inline SpanParent::SpanParent(SpanBuilder& builder): observer(mapAddRef(builder.observer)) {} inline SpanParent SpanParent::addRef() { diff --git a/src/workerd/io/worker-interface.c++ b/src/workerd/io/worker-interface.c++ index 06f124c0946..fc070fe3d4d 100644 --- a/src/workerd/io/worker-interface.c++ +++ b/src/workerd/io/worker-interface.c++ @@ -288,7 +288,7 @@ kj::Promise RevocableWebSocketWorkerInterface::connect(kj::StringPtr host, return kj::READY_NOW; }).eagerlyEvaluate(nullptr); - return worker.connect(host, headers, *wrappedConnection, response, kj::mv(settings)) + return worker.connect(host, headers, *wrappedConnection.get(), response, kj::mv(settings)) .attach(kj::mv(wrappedConnection), kj::mv(revokeTask)); } diff --git a/src/workerd/io/worker-interface.capnp b/src/workerd/io/worker-interface.capnp index 4a29ca2e9fe..0029fb00766 100644 --- a/src/workerd/io/worker-interface.capnp +++ b/src/workerd/io/worker-interface.capnp @@ -530,14 +530,10 @@ struct JsValue { } readableStream :group { - # A ReadableStream. The sender of the JsValue will use the associated StreamSink to open a - # stream of type `ByteStream`. + # A ReadableStream. stream @10 :ExternalPusher.InputStream; - # If present, a stream pushed using the destination isolate's ExternalPusher. - # - # If null (deprecated), then the sender will use the associated StreamSink to open a stream - # of type `ByteStream`. StreamSink is in the process of being replaced by ExternalPusher. + # A stream pushed using the destination isolate's ExternalPusher. encoding @4 :StreamEncoding; # Bytes read from the stream have this encoding. @@ -551,14 +547,7 @@ struct JsValue { } } - abortTrigger @7 :Void; - # Indicates that an `AbortTrigger` is being passed, see the `AbortTrigger` interface for the - # mechanism used to trigger the abort later. This is modeled as a stream, since the sender is - # the one that will later on send the abort signal. This external will have an associated - # stream in the corresponding `StreamSink` with type `AbortTrigger`. - # - # TODO(soon): This will be obsolete when we stop using `StreamSink`; `abortSignal` will - # replace it. (The name is wrong anyway -- this is the signal end, not the trigger end.) + obsolete7 @7 :Void; abortSignal @11 :ExternalPusher.AbortSignal; # Indicates that an `AbortSignal` is being passed. @@ -567,33 +556,18 @@ struct JsValue { actorClassChannelToken @9 :Data; # Encoded ChannelTokens. See channel-token.capnp. + delayedSubrequestChannelToken @12 :ExternalPusher.DelayedChannelToken; + delayedActorClassChannelToken @13 :ExternalPusher.DelayedChannelToken; + # Channel tokens which will be delivered asynchronously. This is sometimes needed in cases + # where the calling worker needs to invoke an asynchronous task to construct the channel + # token. We do not want to delay sending the RPC (especially as this could violate ordering + # guarantees), so instead we send it with a placeholder representing the token to be provided + # later. + # TODO(soon): WebSocket, Request, Response } } - interface StreamSink { - # A JsValue may contain streams that flow from the sender to the receiver. We don't want such - # streams to require a network round trip before the stream can begin pumping. So, we need a - # place to start sending bytes right away. - # - # To that end, JsRpcTarget::call() returns a `paramsStreamSink`. Immediately upon sending the - # request, the client can use promise pipelining to begin pushing bytes to this object. - # - # Similarly, the caller passes a `resultsStreamSink` to the callee. If the response contains - # any streams, it can start pushing to this immediately after responding. - # - # TODO(soon): This design is overcomplicated since it requires allocating StreamSinks for every - # request, even when not used, and requires a lot of weird promise magic. The newer - # ExternalPusher design is simpler, and only incurs overhead when used. Once all of - # production has been updated to understand ExternalPusher, then we can flip an autogate to - # use it by default. Once that has rolled out globally, we can remove StreamSink. - - startStream @0 (externalIndex :UInt32) -> (stream :Capability); - # Opens a stream corresponding to the given index in the JsValue's `externals` array. The type - # of capability returned depends on the type of external. E.g. for `readableStream`, it is a - # `ByteStream`. - } - interface ExternalPusher { # This object allows "pushing" external objects to a target isolate, so that they can # sublequently be referenced by a `JsValue.External`. This allows implementing externals where @@ -637,6 +611,19 @@ struct JsValue { # rejects when the signal is aborted. } + pushDelayedChannelToken @2 (token :Data) -> (cap :DelayedChannelToken); + # Use with `delayed*ChannelToken` members of `External`. + # + # Generally, this `push` method is actually called some time *after* the initial RPC is sent. + # In the initial RPC, the caller fills in the `DelayedChannelToken` with a promise capability. + # Later, when it has the final channel token, it calls `pushDelayedChannelToken()`, then + # resolves the earlier promise to the result. + + interface DelayedChannelToken { + # No methods. This can be unwrapped by the recipient to obtain the channel token passed to + # `pushDelayedChannelToken()`. + } + # TODO(soon): # - Promises } @@ -714,13 +701,10 @@ interface JsRpcTarget extends(JsValue.ExternalPusher) $Cxx.allowCancellation { } resultsStreamHandler :union { - # We're in the process of switching from `StreamSink` to `ExternalPusher`. A caller will only - # offer one or the other, and expect the callee to use that. (Initially, callers will still - # send StreamSink for backwards-compatibility, but once all recipients are able to understand - # ExternalPusher, we'll flip an autogate to make callers send it.) + # This union is now always of type `externalPusher`. - streamSink @4 :JsValue.StreamSink; - # StreamSink used for ReadableStreams found in the results. + obsolete4 @4 :Capability; + # From old StreamSink approach, replaced by ExternalPusher. externalPusher @5 :JsValue.ExternalPusher; # ExternalPusher object which will push into the caller's isolate. Use this to push externals @@ -749,9 +733,8 @@ interface JsRpcTarget extends(JsValue.ExternalPusher) $Cxx.allowCancellation { # `callPipeline` until the disposer is invoked. If `hasDisposer` is false, `callPipeline` can # safely be dropped immediately. - paramsStreamSink @3 :JsValue.StreamSink; - # StreamSink used for ReadableStreams found in the params. The caller begins sending bytes for - # these streams immediately using promise pipelining. + obsolete3 @3 :Capability; + # From old StreamSink approach, replaced by ExternalPusher. } call @0 CallParams -> CallResults; @@ -825,24 +808,12 @@ interface EventDispatcher @0xf20697475ec1752d { # Opens a JS rpc "session". The call does not return until the session is complete. # # `topLevel` is the top-level RPC target, on which exactly one method call can be made. This - # call should be made using pipelining to avoid a round trip at startup, and to properly handle - # the old semantics while they still exist in production (see below). - # - # The exact return semantics of this method are currently in flux. Both an old approach and a - # new approach may be live in production: - # * Old approach: `jsRpcSession()` does not return until (1) exactly one call has been made on - # `topLevel`, and (2) any stubs passed over that call (in either direction) have been dropped. - # The session can be canceled by cancelling the call. When the call returns, `session` is null, - # which is consistent with the session being complete. - # * New approach: `jsRpcSession()` returns immediately. The returned `session` capability keeps - # the session alive. Dropping `session` cancels the session. `session` resolves itself to a - # null capability when `topLevel` and all stubs introduced through it have been dropped; the - # caller may await `whenResolved()` to find out when this happens. + # call should be made using pipelining to avoid a round trip at startup. # - # The transition will take place in three phases: - # 1. Caller is adjusted to support both approaches. - # 2. Automate is rolled out to switch the callee to the new approach. - # 3. Remove code to support old approach. + # `jsRpcSession()` returns immediately. The returned `session` capability keeps the session + # alive. Dropping `session` cancels the session. `session` resolves itself to a null capability + # when `topLevel` and all stubs introduced through it have been dropped; the caller may await + # `whenResolved()` to find out when this happens. # # In C++, we use `WorkerInterface::customEvent()` to dispatch this event. diff --git a/src/workerd/io/worker.h b/src/workerd/io/worker.h index 8c3c6d1f376..626f6e87f29 100644 --- a/src/workerd/io/worker.h +++ b/src/workerd/io/worker.h @@ -911,6 +911,12 @@ class Worker::Actor final: public kj::Refcounted { // ctx.id for the child object. Worker::Actor::Id id; + + // Ensures `actorClass` is a fully-resolved channel. + // + // This is implemented in io-channels.c++ next to DynamicWorkerSource::ensureAllResolved() + // since they are very similar. + kj::Promise ensureAllResolved(); }; // Returns the nesting depth of this facet. Root = 0, direct child of root = 1, etc. diff --git a/src/workerd/jsg/AGENTS.md b/src/workerd/jsg/AGENTS.md index 49e3bd6791d..645b9bdc5ec 100644 --- a/src/workerd/jsg/AGENTS.md +++ b/src/workerd/jsg/AGENTS.md @@ -92,6 +92,15 @@ These rules MUST be followed when writing or modifying JSG code: `kj::Maybe` 10. **Prefer `JSG_PROTOTYPE_PROPERTY`** over `JSG_INSTANCE_PROPERTY` unless there's a specific reason — instance properties break GC optimization +11. **Use `// NOLINT(jsg-visit-for-gc)` to document intentional non-visits.** When a + GC-visitable field intentionally is not visited (e.g., a `kj::Rc`-owned object + unreachable from JS, or a type visited via a different mechanism), suppress the + `jsg-visit-for-gc` clang-tidy diagnostic with a `// NOLINT(jsg-visit-for-gc)` + comment and a brief explanation of *why* it's safe to skip. + +The `jsg-visit-for-gc` clang-tidy check (`//tools/clang-tidy:jsg-lint`) +automatically detects missing `visitForGc` implementations and unvisited fields +across the codebase, enforcing invariants 1 and 2 at build time. ## CODE REVIEW RULE diff --git a/src/workerd/jsg/buffersource-test.c++ b/src/workerd/jsg/buffersource-test.c++ index d5a002220dd..4505ef362b9 100644 --- a/src/workerd/jsg/buffersource-test.c++ +++ b/src/workerd/jsg/buffersource-test.c++ @@ -72,12 +72,50 @@ struct BufferSourceContext: public jsg::Object, public jsg::ContextGlobal { return true; } + // Regression test for AUTOVULN-CLOUDFLARE-WORKERD-17: verify that the const + // overload of BackingStore::asArrayPtr() correctly handles byteOffset as + // bytes (not elements) for multi-byte T. + bool testConstAsArrayPtrByteOffset(jsg::Lock& js, jsg::BufferSource buf) { + // Write known bytes into the buffer: 12 bytes total. + // We expect the caller to pass a Uint8Array view with byteOffset=4 and + // byteLength=8 over a 12-byte ArrayBuffer. + auto bytes = buf.asArrayPtr(); + KJ_ASSERT(bytes.size() == 8); + bytes[0] = 0x01; + bytes[1] = 0x02; + bytes[2] = 0x03; + bytes[3] = 0x04; + bytes[4] = 0x05; + bytes[5] = 0x06; + bytes[6] = 0x07; + bytes[7] = 0x08; + + // Now obtain a const reference and call asArrayPtr() on it. + // The view has byteOffset=4 (from the underlying ArrayBuffer) and + // byteLength=8. The const overload must add byteOffset as bytes, not + // as uint32_t elements, so we should get 2 uint32_t elements starting + // at the view's data (not 4*sizeof(uint32_t)=16 bytes into the + // backing store). + const auto& constBuf = buf; + auto constPtr = constBuf.asArrayPtr(); + KJ_ASSERT(constPtr.size() == 2); + KJ_ASSERT(constPtr.asBytes() == bytes.asConst()); + + // Also verify the non-const overload produces the same result. + auto mutablePtr = buf.asArrayPtr(); + KJ_ASSERT(mutablePtr.size() == 2); + KJ_ASSERT(mutablePtr.asBytes() == constPtr.asBytes()); + + return true; + } + JSG_RESOURCE_TYPE(BufferSourceContext) { JSG_METHOD(takeBufferSource); JSG_METHOD(takeUint8Array); JSG_METHOD(makeBufferSource); JSG_METHOD(makeArrayBuffer); JSG_METHOD(doTest); + JSG_METHOD(testConstAsArrayPtrByteOffset); } }; JSG_DECLARE_ISOLATE_TYPE(BufferSourceIsolate, BufferSourceContext); @@ -121,5 +159,23 @@ KJ_TEST("BufferSource works") { e.expectEval("const buf = new Uint8Array(12); doTest(buf.subarray(4))", "boolean", "true"); } +// Regression test for AUTOVULN-CLOUDFLARE-WORKERD-17: +// The const overload of BackingStore::asArrayPtr() must treat byteOffset as +// a byte count, not an element count. Before the fix, for multi-byte T with a +// nonzero byteOffset the const overload advanced by byteOffset * sizeof(T) bytes +// instead of byteOffset bytes, producing an out-of-bounds pointer. +KJ_TEST("BackingStore const asArrayPtr handles byteOffset correctly") { + Evaluator e(v8System); + + // Create a Uint8Array view at byteOffset=4 over a 12-byte ArrayBuffer. + // The view has byteLength=8. testConstAsArrayPtrByteOffset() will call + // the const overload of asArrayPtr() and verify the pointer + // arithmetic is correct. + e.expectEval("const ab = new ArrayBuffer(12);" + "const view = new Uint8Array(ab, 4, 8);" + "testConstAsArrayPtrByteOffset(view)", + "boolean", "true"); +} + } // namespace } // namespace workerd::jsg::test diff --git a/src/workerd/jsg/buffersource.h b/src/workerd/jsg/buffersource.h index 540502a8a21..aa7bf9d61a7 100644 --- a/src/workerd/jsg/buffersource.h +++ b/src/workerd/jsg/buffersource.h @@ -133,6 +133,7 @@ class BackingStore { inline kj::ArrayPtr asArrayPtr() KJ_LIFETIMEBOUND { KJ_ASSERT(backingStore != nullptr, "Invalid access after move."); KJ_ASSERT(byteLength % sizeof(T) == 0); + KJ_ASSERT(byteOffset % sizeof(T) == 0); kj::byte* data = static_cast(backingStore->Data()); return kj::ArrayPtr(reinterpret_cast(data + byteOffset), byteLength / sizeof(T)); } @@ -148,8 +149,10 @@ class BackingStore { inline const kj::ArrayPtr asArrayPtr() const KJ_LIFETIMEBOUND { KJ_ASSERT(backingStore != nullptr, "Invalid access after move."); KJ_ASSERT(byteLength % sizeof(T) == 0); - return kj::ArrayPtr( - static_cast(backingStore->Data()) + byteOffset, byteLength / sizeof(T)); + KJ_ASSERT(byteOffset % sizeof(T) == 0); + const kj::byte* data = static_cast(backingStore->Data()); + return kj::ArrayPtr( + reinterpret_cast(data + byteOffset), byteLength / sizeof(T)); } template diff --git a/src/workerd/jsg/jsvalue-test.c++ b/src/workerd/jsg/jsvalue-test.c++ index d19e965cee0..2fcf325242d 100644 --- a/src/workerd/jsg/jsvalue-test.c++ +++ b/src/workerd/jsg/jsvalue-test.c++ @@ -152,5 +152,17 @@ KJ_TEST("simple") { "boolean", "true"); } +KJ_TEST("regression_AUTOVULN_CLOUDFLARE_WORKERD_143: deep proxy chain in getPrototype") { + // Regression test for AUTOVULN-CLOUDFLARE-WORKERD-143: a deeply nested chain of + // Proxy objects with no getPrototypeOf trap caused unbounded native recursion in + // JsObject::getPrototype(), leading to SIGSEGV from stack overflow. + // After the fix, getPrototype() iterates instead of recursing and throws a + // RangeError when the depth limit is exceeded. + Evaluator e(v8System); + e.expectEval( + "let p = function(){}; for (let i = 0; i < 200000; i++) { p = new Proxy(p, {}); } try { checkProxyPrototype(p); 'no error'; } catch (e) { e.constructor.name + ': ' + e.message; }", + "string", "RangeError: Maximum proxy chain length exceeded in getPrototype"); +} + } // namespace } // namespace workerd::jsg::test diff --git a/src/workerd/jsg/jsvalue.c++ b/src/workerd/jsg/jsvalue.c++ index 08cd1bc57db..0b8d744cf8d 100644 --- a/src/workerd/jsg/jsvalue.c++ +++ b/src/workerd/jsg/jsvalue.c++ @@ -134,14 +134,15 @@ JsObject JsObject::jsonClone(Lock& js) { } JsValue JsObject::getPrototype(Lock& js) { - if (inner->IsProxy()) { - // Here we emulate the behavior of v8's GetPrototype() function for proxies. - // If the proxy has a getPrototypeOf trap, we call it and return the result. - // Otherwise we return the prototype of the target object. - // Note that we do not check if the target object is extensible or not, or - // if the returned prototype is consistent with the target's prototype if - // the target is not extensible. See the comment below for more details. - auto proxy = inner.As(); + // Iteratively unwrap nested Proxy targets so that an attacker-controlled + // chain of `new Proxy(prev, {})` cannot drive unbounded native recursion. + // V8's own JSProxy::GetPrototype does the same and caps at kMaxIterationLimit. + static constexpr int kMaxProxyDepth = 100'000; + v8::Local current = inner; + for (int depth = 0; current->IsProxy(); ++depth) { + JSG_REQUIRE( + depth < kMaxProxyDepth, RangeError, "Maximum proxy chain length exceeded in getPrototype"); + auto proxy = current.As(); JSG_REQUIRE(!proxy->IsRevoked(), TypeError, "Proxy is revoked"); auto handler = proxy->GetHandler(); JSG_REQUIRE(handler->IsObject(), TypeError, "Proxy handler is not an object"); @@ -150,8 +151,8 @@ JsValue JsObject::getPrototype(Lock& js) { auto target = proxy->GetTarget(); if (trap.isUndefined()) { JSG_REQUIRE(target->IsObject(), TypeError, "Proxy target is not an object"); - // Run this through getPrototype to handle the case where the target is also a proxy. - return JsObject(target.As()).getPrototype(js); + current = target.As(); + continue; // unwrap one layer iteratively, no native recursion } JSG_REQUIRE(trap.isFunction(), TypeError, "Proxy getPrototypeOf trap is not a function"); v8::Local fn = ((v8::Local)trap).As(); @@ -169,10 +170,10 @@ JsValue JsObject::getPrototype(Lock& js) { return ret; } #if V8_MAJOR_VERSION >= 15 || (V8_MAJOR_VERSION == 14 && V8_MINOR_VERSION >= 7) - return JsValue(inner->GetPrototype()); + return JsValue(current->GetPrototype()); #else // TODO(cleanup): Remove when unnecessary. - return JsValue(inner->GetPrototypeV2()); + return JsValue(current->GetPrototypeV2()); #endif } diff --git a/src/workerd/jsg/jsvalue.h b/src/workerd/jsg/jsvalue.h index bee149d27e5..f6d6647e733 100644 --- a/src/workerd/jsg/jsvalue.h +++ b/src/workerd/jsg/jsvalue.h @@ -975,26 +975,39 @@ inline JsString Lock::str() { } inline JsString Lock::str(kj::ArrayPtr str) { + // The V8 string creation APIs take int for the length parameter. Guard against + // size_t values that would overflow int and be misinterpreted (negative values + // cause V8 to fall back to strlen, leading to heap-buffer-overflow reads). + JSG_REQUIRE( + str.size() <= v8::String::kMaxLength, RangeError, "String is too long for a V8 string"); return JsString(check(v8::String::NewFromTwoByte(v8Isolate, reinterpret_cast(str.begin()), v8::NewStringType::kNormal, str.size()))); } inline JsString Lock::str(kj::ArrayPtr str) { + JSG_REQUIRE( + str.size() <= v8::String::kMaxLength, RangeError, "String is too long for a V8 string"); return JsString(check( v8::String::NewFromTwoByte(v8Isolate, str.begin(), v8::NewStringType::kNormal, str.size()))); } inline JsString Lock::str(kj::ArrayPtr str) { + JSG_REQUIRE( + str.size() <= v8::String::kMaxLength, RangeError, "String is too long for a V8 string"); return JsString(check( v8::String::NewFromUtf8(v8Isolate, str.begin(), v8::NewStringType::kNormal, str.size()))); } inline JsString Lock::str(kj::ArrayPtr str) { + JSG_REQUIRE( + str.size() <= v8::String::kMaxLength, RangeError, "String is too long for a V8 string"); return JsString(check( v8::String::NewFromOneByte(v8Isolate, str.begin(), v8::NewStringType::kNormal, str.size()))); } inline JsString Lock::strIntern(kj::StringPtr str) { + JSG_REQUIRE( + str.size() <= v8::String::kMaxLength, RangeError, "String is too long for a V8 string"); return JsString(check(v8::String::NewFromUtf8( v8Isolate, str.begin(), v8::NewStringType::kInternalized, str.size()))); } diff --git a/src/workerd/jsg/meta.h b/src/workerd/jsg/meta.h index 8ab03b0fa3f..15c056ed4ed 100644 --- a/src/workerd/jsg/meta.h +++ b/src/workerd/jsg/meta.h @@ -129,6 +129,10 @@ struct RequiredArgCount_; // The actual counting logic lives in type-wrapper.h (needs the ValueLessParameter concept). template inline constexpr int requiredArgumentCount = - detail::RequiredArgCount_::Args>::value; + // `typename` is required: MethodArgs is an alias template whose target + // StripMagicParam_<...> is a dependent type whose ::Args member is itself + // dependent. The readability-redundant-typename check misses this path. + detail::RequiredArgCount_::Args>::value; // NOLINT(readability-redundant-typename) } // namespace workerd::jsg diff --git a/src/workerd/jsg/util.h b/src/workerd/jsg/util.h index 4590b2751c9..57b3e1558c9 100644 --- a/src/workerd/jsg/util.h +++ b/src/workerd/jsg/util.h @@ -252,6 +252,10 @@ template v8::Local v8Str(v8::Isolate* isolate, kj::ArrayPtr ptr, v8::NewStringType newType = v8::NewStringType::kNormal) { + // The V8 string creation APIs take int for the length parameter. Guard against + // size_t values that would overflow int and be misinterpreted (negative values + // cause V8 to fall back to strlen, leading to heap-buffer-overflow reads). + KJ_REQUIRE(ptr.size() <= v8::String::kMaxLength, "String is too long for a V8 string"); if constexpr (kj::isSameType()) { return check(v8::String::NewFromTwoByte( isolate, reinterpret_cast(ptr.begin()), newType, ptr.size())); @@ -280,6 +284,7 @@ inline v8::Local v8Str(v8::Isolate* isolate, inline v8::Local v8StrFromLatin1(v8::Isolate* isolate, kj::ArrayPtr ptr, v8::NewStringType newType = v8::NewStringType::kNormal) { + KJ_REQUIRE(ptr.size() <= v8::String::kMaxLength, "String is too long for a V8 string"); return check(v8::String::NewFromOneByte(isolate, ptr.begin(), newType, ptr.size())); } diff --git a/src/workerd/server/channel-token-test.c++ b/src/workerd/server/channel-token-test.c++ index 7dced325b89..97e28a66138 100644 --- a/src/workerd/server/channel-token-test.c++ +++ b/src/workerd/server/channel-token-test.c++ @@ -5,6 +5,7 @@ #include "channel-token.h" #include +#include #include namespace workerd::server { @@ -38,10 +39,30 @@ struct ServiceTriplet { } }; +kj::Array expectSync(kj::OneOf, kj::Promise>> variant) { + return KJ_ASSERT_NONNULL( + kj::mv(variant).tryGet>(), "expected token to be rendered synchronously"); +} + class MockSubrequestChannel: public IoChannelFactory::SubrequestChannel { public: + // Simple mock used by the resolver when decoding tokens. Its getTokenMaybeSync() is never + // called in that context. MockSubrequestChannel(ServiceTriplet triplet): triplet(kj::mv(triplet)) {} + + // Mock used as a nested cap inside a parent channel's props. It generates its own token by + // calling back into the ChannelTokenHandler. If `readyPromise` is provided, the token is only + // produced asynchronously after `readyPromise` resolves. + MockSubrequestChannel(ChannelTokenHandler& handler, + ServiceTriplet triplet, + kj::Maybe> readyPromise = kj::none) + : handler(handler), + triplet(kj::mv(triplet)), + readyPromise(kj::mv(readyPromise)) {} + + kj::Maybe handler; ServiceTriplet triplet; + kj::Maybe> readyPromise; kj::Own startRequest(IoChannelFactory::SubrequestMetadata metadata) override { KJ_UNREACHABLE; @@ -49,16 +70,58 @@ class MockSubrequestChannel: public IoChannelFactory::SubrequestChannel { void requireAllowsTransfer() override { KJ_UNREACHABLE; } + kj::OneOf, kj::Promise>> getTokenMaybeSync( + IoChannelFactory::ChannelTokenUsage usage) override { + auto& h = KJ_ASSERT_NONNULL(handler, "this mock was not constructed with a handler ref"); + KJ_IF_SOME(p, readyPromise) { + auto promise = kj::mv(p); + readyPromise = kj::none; + return promise.then([&h, usage, this]() mutable -> kj::Array { + return expectSync(h.encodeSubrequestChannelToken(usage, triplet.serviceName, + triplet.entrypoint.map([](kj::String& s) -> kj::StringPtr { return s; }), + triplet.props)); + }); + } else { + return expectSync(h.encodeSubrequestChannelToken(usage, triplet.serviceName, + triplet.entrypoint.map([](kj::String& s) -> kj::StringPtr { return s; }), triplet.props)); + } + } }; class MockActorClassChannel: public IoChannelFactory::ActorClassChannel { public: MockActorClassChannel(ServiceTriplet triplet): triplet(kj::mv(triplet)) {} + + MockActorClassChannel(ChannelTokenHandler& handler, + ServiceTriplet triplet, + kj::Maybe> readyPromise = kj::none) + : handler(handler), + triplet(kj::mv(triplet)), + readyPromise(kj::mv(readyPromise)) {} + + kj::Maybe handler; ServiceTriplet triplet; + kj::Maybe> readyPromise; void requireAllowsTransfer() override { KJ_UNREACHABLE; } + kj::OneOf, kj::Promise>> getTokenMaybeSync( + IoChannelFactory::ChannelTokenUsage usage) override { + auto& h = KJ_ASSERT_NONNULL(handler, "this mock was not constructed with a handler ref"); + KJ_IF_SOME(p, readyPromise) { + auto promise = kj::mv(p); + readyPromise = kj::none; + return promise.then([&h, usage, this]() mutable -> kj::Array { + return expectSync(h.encodeActorClassChannelToken(usage, triplet.serviceName, + triplet.entrypoint.map([](kj::String& s) -> kj::StringPtr { return s; }), + triplet.props)); + }); + } else { + return expectSync(h.encodeActorClassChannelToken(usage, triplet.serviceName, + triplet.entrypoint.map([](kj::String& s) -> kj::StringPtr { return s; }), triplet.props)); + } + } }; class MockResolver: public ChannelTokenHandler::Resolver { @@ -78,12 +141,24 @@ class MockResolver: public ChannelTokenHandler::Resolver { using Usage = IoChannelFactory::ChannelTokenUsage; +// Build a Frankenvalue whose capTable contains the given entries. The base value is an empty +// object. This goes through the capnp serialization path since Frankenvalue doesn't otherwise +// expose a way to construct a cap table directly. +Frankenvalue propsWithCaps(kj::Vector> caps) { + capnp::MallocMessageBuilder message; + auto builder = message.getRoot(); + builder.setEmptyObject(); + builder.setCapTableSize(caps.size()); + return Frankenvalue::fromCapnp(builder.asReader(), kj::mv(caps)); +} + KJ_TEST("channel token basics") { MockResolver resolver; ChannelTokenHandler handler(resolver); auto props = Frankenvalue::fromJson(kj::str("{\"foo\": 123}")); - auto token = handler.encodeSubrequestChannelToken(Usage::RPC, "foo", "MyEntry"_kj, props); + auto token = + expectSync(handler.encodeSubrequestChannelToken(Usage::RPC, "foo", "MyEntry"_kj, props)); // Decoding works. { @@ -135,7 +210,8 @@ KJ_TEST("channel tokens for storage") { ChannelTokenHandler handler(resolver); auto props = Frankenvalue::fromJson(kj::str("{\"foo\": 123}")); - auto token = handler.encodeSubrequestChannelToken(Usage::STORAGE, "foo", "MyEntry"_kj, props); + auto token = + expectSync(handler.encodeSubrequestChannelToken(Usage::STORAGE, "foo", "MyEntry"_kj, props)); // Decoding works. { @@ -168,7 +244,8 @@ KJ_TEST("actor class channel tokens") { ChannelTokenHandler handler(resolver); auto props = Frankenvalue::fromJson(kj::str("{\"foo\": 123}")); - auto token = handler.encodeActorClassChannelToken(Usage::RPC, "foo", "MyEntry"_kj, props); + auto token = + expectSync(handler.encodeActorClassChannelToken(Usage::RPC, "foo", "MyEntry"_kj, props)); // Decoding works. { @@ -182,5 +259,123 @@ KJ_TEST("actor class channel tokens") { "channel token type mismatch", handler.decodeSubrequestChannelToken(Usage::RPC, token)); } +KJ_TEST("channel token with nested channels (all synchronous)") { + MockResolver resolver; + ChannelTokenHandler handler(resolver); + + // Build a props cap table containing a SubrequestChannel and an ActorClassChannel, both of + // which produce their tokens synchronously. + kj::Vector> caps; + caps.add(kj::refcounted(handler, + ServiceTriplet( + "nested-subreq", "NestedEntry"_kj, Frankenvalue::fromJson(kj::str("{\"inner\": 1}"))))); + caps.add(kj::refcounted(handler, + ServiceTriplet("nested-actor", kj::Maybe(kj::none), + Frankenvalue::fromJson(kj::str("{\"inner\": 2}"))))); + auto props = propsWithCaps(kj::mv(caps)); + + // Encoding is synchronous. + auto token = + expectSync(handler.encodeSubrequestChannelToken(Usage::RPC, "outer", "OuterEntry"_kj, props)); + + // Decoding works and restores the nested channels. + { + auto channel = + handler.decodeSubrequestChannelToken(Usage::RPC, token).downcast(); + KJ_EXPECT(channel->triplet.serviceName == "outer"); + KJ_EXPECT(channel->triplet.entrypoint.map([](kj::String& s) -> kj::StringPtr { return s; }) == + "OuterEntry"_kj); + + auto capTable = channel->triplet.props.getCapTable(); + KJ_ASSERT(capTable.size() == 2); + + auto& nestedSub = KJ_ASSERT_NONNULL(kj::tryDowncast(*capTable[0]), + "expected nested cap 0 to be a SubrequestChannel"); + KJ_EXPECT(nestedSub.triplet == + ServiceTriplet( + "nested-subreq", "NestedEntry"_kj, Frankenvalue::fromJson(kj::str("{\"inner\": 1}")))); + + auto& nestedActor = KJ_ASSERT_NONNULL(kj::tryDowncast(*capTable[1]), + "expected nested cap 1 to be an ActorClassChannel"); + KJ_EXPECT(nestedActor.triplet == + ServiceTriplet("nested-actor", kj::Maybe(kj::none), + Frankenvalue::fromJson(kj::str("{\"inner\": 2}")))); + } + + // Also works with STORAGE usage. + auto storageToken = expectSync( + handler.encodeSubrequestChannelToken(Usage::STORAGE, "outer", "OuterEntry"_kj, props)); + { + auto channel = handler.decodeSubrequestChannelToken(Usage::STORAGE, storageToken) + .downcast(); + KJ_EXPECT(channel->triplet.props.getCapTable().size() == 2); + } + + // And the outer channel can itself be an ActorClassChannel. + auto actorToken = + expectSync(handler.encodeActorClassChannelToken(Usage::RPC, "outer", "OuterEntry"_kj, props)); + { + auto channel = handler.decodeActorClassChannelToken(Usage::RPC, actorToken) + .downcast(); + KJ_EXPECT(channel->triplet.props.getCapTable().size() == 2); + } +} + +KJ_TEST("channel token with nested channel that generates token asynchronously") { + kj::EventLoop loop; + kj::WaitScope waitScope(loop); + + MockResolver resolver; + ChannelTokenHandler handler(resolver); + + // One nested channel is ready synchronously, the other only becomes ready when we fulfill a + // paf. The whole encoding therefore cannot complete until we fulfill the paf. + auto paf = kj::newPromiseAndFulfiller(); + + kj::Vector> caps; + caps.add(kj::refcounted(handler, + ServiceTriplet("sync-subreq", "SyncEntry"_kj, + Frankenvalue::fromJson(kj::str("{\"inner\": \"sync\"}"))))); + caps.add(kj::refcounted(handler, + ServiceTriplet("async-actor", "AsyncEntry"_kj, + Frankenvalue::fromJson(kj::str("{\"inner\": \"async\"}"))), + kj::mv(paf.promise))); + auto props = propsWithCaps(kj::mv(caps)); + + // Encoding returns a promise rather than a synchronous result, because one of the nested + // channels needs to wait before generating its token. + auto tokenOneOf = + handler.encodeSubrequestChannelToken(Usage::RPC, "outer", "OuterEntry"_kj, props); + auto tokenPromise = KJ_ASSERT_NONNULL(kj::mv(tokenOneOf).tryGet>>(), + "expected token to be rendered asynchronously when a nested channel is pending"); + + // The promise should not be ready yet. + KJ_EXPECT(!tokenPromise.poll(waitScope)); + + // Once we fulfill the pending promise, the token is produced. + paf.fulfiller->fulfill(); + auto token = tokenPromise.wait(waitScope); + + // Decoding works and restores the nested channels. + auto channel = + handler.decodeSubrequestChannelToken(Usage::RPC, token).downcast(); + KJ_EXPECT(channel->triplet.serviceName == "outer"); + + auto capTable = channel->triplet.props.getCapTable(); + KJ_ASSERT(capTable.size() == 2); + + auto& nestedSub = KJ_ASSERT_NONNULL(kj::tryDowncast(*capTable[0]), + "expected nested cap 0 to be a SubrequestChannel"); + KJ_EXPECT(nestedSub.triplet == + ServiceTriplet( + "sync-subreq", "SyncEntry"_kj, Frankenvalue::fromJson(kj::str("{\"inner\": \"sync\"}")))); + + auto& nestedActor = KJ_ASSERT_NONNULL(kj::tryDowncast(*capTable[1]), + "expected nested cap 1 to be an ActorClassChannel"); + KJ_EXPECT(nestedActor.triplet == + ServiceTriplet("async-actor", "AsyncEntry"_kj, + Frankenvalue::fromJson(kj::str("{\"inner\": \"async\"}")))); +} + } // namespace } // namespace workerd::server diff --git a/src/workerd/server/channel-token.c++ b/src/workerd/server/channel-token.c++ index da16b81bd5b..58620ba8405 100644 --- a/src/workerd/server/channel-token.c++ +++ b/src/workerd/server/channel-token.c++ @@ -35,14 +35,15 @@ ChannelTokenHandler::ChannelTokenHandler(Resolver& resolver): resolver(resolver) kj::arrayPtr(keyId).copyFrom(kj::arrayPtr(hash).first(KEY_ID_SIZE)); } -kj::Array ChannelTokenHandler::encodeChannelTokenImpl(ChannelToken::Type type, - IoChannelFactory::ChannelTokenUsage usage, - kj::StringPtr serviceName, - kj::Maybe entrypoint, - Frankenvalue& props) { - capnp::word scratch[128]{}; - capnp::MallocMessageBuilder message(scratch); - auto builder = message.getRoot(); +kj::OneOf, kj::Promise>> ChannelTokenHandler:: + encodeChannelTokenImpl(ChannelToken::Type type, + IoChannelFactory::ChannelTokenUsage usage, + kj::StringPtr serviceName, + kj::Maybe entrypoint, + Frankenvalue& props) { + auto message = kj::heap(128); + auto builder = message->getRoot(); + kj::Vector> promises; builder.setType(type); @@ -64,10 +65,28 @@ kj::Array ChannelTokenHandler::encodeChannelTokenImpl(ChannelToken::Type t for (auto i: kj::indices(capTable)) { KJ_IF_SOME(subreq, kj::tryDowncast(*capTable[i])) { - caps[i].setSubrequestChannel(subreq.getToken(usage)); + KJ_SWITCH_ONEOF(subreq.getTokenMaybeSync(usage)) { + KJ_CASE_ONEOF(token, kj::Array) { + caps[i].setSubrequestChannel(token); + } + KJ_CASE_ONEOF(promise, kj::Promise>) { + promises.add(promise.then([slot = caps[i]](kj::Array token) mutable { + slot.setSubrequestChannel(token); + })); + } + } } else KJ_IF_SOME(actorClass, kj::tryDowncast(*capTable[i])) { - caps[i].setActorClassChannel(actorClass.getToken(usage)); + KJ_SWITCH_ONEOF(actorClass.getTokenMaybeSync(usage)) { + KJ_CASE_ONEOF(token, kj::Array) { + caps[i].setActorClassChannel(token); + } + KJ_CASE_ONEOF(promise, kj::Promise>) { + promises.add(promise.then([slot = caps[i]](kj::Array token) mutable { + slot.setActorClassChannel(token); + })); + } + } } else { KJ_FAIL_REQUIRE("unknown type in props"); } @@ -75,6 +94,18 @@ kj::Array ChannelTokenHandler::encodeChannelTokenImpl(ChannelToken::Type t } } + if (promises.empty()) { + return serializeTokenImpl(usage, *message); + } else { + return kj::joinPromisesFailFast(promises.releaseAsArray()) + .then([this, usage, message = kj::mv(message)]() mutable { + return serializeTokenImpl(usage, *message); + }); + } +} + +kj::Array ChannelTokenHandler::serializeTokenImpl( + IoChannelFactory::ChannelTokenUsage usage, capnp::MessageBuilder& message) { kj::VectorOutputStream out; capnp::writePackedMessage(out, message); @@ -135,20 +166,20 @@ kj::Array ChannelTokenHandler::encodeChannelTokenImpl(ChannelToken::Type t KJ_UNREACHABLE; } -kj::Array ChannelTokenHandler::encodeSubrequestChannelToken( - IoChannelFactory::ChannelTokenUsage usage, - kj::StringPtr serviceName, - kj::Maybe entrypoint, - Frankenvalue& props) { +kj::OneOf, kj::Promise>> ChannelTokenHandler:: + encodeSubrequestChannelToken(IoChannelFactory::ChannelTokenUsage usage, + kj::StringPtr serviceName, + kj::Maybe entrypoint, + Frankenvalue& props) { return encodeChannelTokenImpl( ChannelToken::Type::SUBREQUEST, usage, serviceName, entrypoint, props); } -kj::Array ChannelTokenHandler::encodeActorClassChannelToken( - IoChannelFactory::ChannelTokenUsage usage, - kj::StringPtr serviceName, - kj::Maybe entrypoint, - Frankenvalue& props) { +kj::OneOf, kj::Promise>> ChannelTokenHandler:: + encodeActorClassChannelToken(IoChannelFactory::ChannelTokenUsage usage, + kj::StringPtr serviceName, + kj::Maybe entrypoint, + Frankenvalue& props) { return encodeChannelTokenImpl( ChannelToken::Type::ACTOR_CLASS, usage, serviceName, entrypoint, props); } diff --git a/src/workerd/server/channel-token.h b/src/workerd/server/channel-token.h index ef075e3c87a..43ee0b18271 100644 --- a/src/workerd/server/channel-token.h +++ b/src/workerd/server/channel-token.h @@ -37,11 +37,13 @@ class ChannelTokenHandler { explicit ChannelTokenHandler(Resolver& resolver); // Helpers to implement `IoChannelFactory::{SubrequestChannel,ActorClassChannel}::getToken()`. - kj::Array encodeSubrequestChannelToken(IoChannelFactory::ChannelTokenUsage usage, + kj::OneOf, kj::Promise>> encodeSubrequestChannelToken( + IoChannelFactory::ChannelTokenUsage usage, kj::StringPtr serviceName, kj::Maybe entrypoint, Frankenvalue& props); - kj::Array encodeActorClassChannelToken(IoChannelFactory::ChannelTokenUsage usage, + kj::OneOf, kj::Promise>> encodeActorClassChannelToken( + IoChannelFactory::ChannelTokenUsage usage, kj::StringPtr serviceName, kj::Maybe entrypoint, Frankenvalue& props); @@ -73,11 +75,14 @@ class ChannelTokenHandler { static_assert(sizeof(TokenHeader) == 32); // Implementation for both `encode` methods. - kj::Array encodeChannelTokenImpl(ChannelToken::Type type, + kj::OneOf, kj::Promise>> encodeChannelTokenImpl( + ChannelToken::Type type, IoChannelFactory::ChannelTokenUsage usage, kj::StringPtr serviceName, kj::Maybe entrypoint, Frankenvalue& props); + kj::Array serializeTokenImpl( + IoChannelFactory::ChannelTokenUsage usage, capnp::MessageBuilder& message); // Implementation that dynamically returns either SubrequestChannel or ActorClassChannel, which // both happen to inherit CapTableEntry. The caller will immediately downcast to the right type. diff --git a/src/workerd/server/server-test.c++ b/src/workerd/server/server-test.c++ index 762abbc7260..33a4f889a1c 100644 --- a/src/workerd/server/server-test.c++ +++ b/src/workerd/server/server-test.c++ @@ -6398,5 +6398,45 @@ KJ_TEST("Server: workerdDebugPort WebSocket passthrough via WorkerEntrypoint") { wsConn.send(kj::str("\x81\x05", testMessage2)); wsConn.recvWebSocket("echo:world"); } +// Regression test for AUTOVULN-CLOUDFLARE-WORKERD-9: a wrapped binding whose moduleName +// does not resolve to any internal module must produce a config error, not a fatal assertion. +// Before the fix, this config would hit KJ_ASSERT(!value.IsEmpty()) in compileGlobals() +// and abort. After the fix, the unresolved module is rejected with KJ_FAIL_REQUIRE which +// produces a recoverable config error containing the module name. +KJ_TEST("Server: wrapped binding with unresolvable module produces config error") { + // Enable predictable mode so the internal error reference ID is deterministic. + setPredictableModeForTest(); + + TestServer test(singleWorker(R"(( + compatibilityDate = "2024-01-01", + modules = [ + ( name = "main.js", + esModule = + `export default { + ` async fetch(request) { + ` return new Response("should not reach here"); + ` } + `} + ) + ], + bindings = [ + ( name = "brokenBinding", + wrapped = ( + moduleName = "nonexistent:missing-module", + innerBindings = [ (name = "inner", text = "value") ] + ) + ) + ] + ))"_kj)); + + // The KJ_FAIL_REQUIRE exception propagates through compileGlobals, gets caught by the + // worker constructor, converted to a JS "internal error" with a predictable reference ID, + // and reported as a config error. The jsg layer logs the original exception at ERROR level + // (a single log line containing both the exception description and "jsgInternalError"). + KJ_EXPECT_LOG(ERROR, "jsgInternalError"); + test.expectErrors("service hello: Uncaught Error: internal error;" + " reference = 0123456789abcdefghijklmn\n"_kj); +} + } // namespace } // namespace workerd::server diff --git a/src/workerd/server/server.c++ b/src/workerd/server/server.c++ index 95dafe1a09b..a298e80cad4 100644 --- a/src/workerd/server/server.c++ +++ b/src/workerd/server/server.c++ @@ -164,7 +164,7 @@ static inline kj::Own fakeOwn(T& ref) { return kj::Own(&ref, kj::NullDisposer::instance); } -void throwDynamicEntrypointTransferError() { +[[noreturn]] void throwDynamicEntrypointTransferError() { JSG_FAIL_REQUIRE(DOMDataCloneError, "Entrypoints to dynamically-loaded workers cannot be transferred to other Workers, " "because the system does not know how to reload this Worker from scratch. Instead, " @@ -551,6 +551,12 @@ class Server::InvalidConfigService final: public Service { bool hasHandler(kj::StringPtr handlerName) override { return false; } + + kj::OneOf, kj::Promise>> getTokenMaybeSync( + IoChannelFactory::ChannelTokenUsage usage) override { + // Can't get here because workerd would have failed to start. + KJ_UNREACHABLE; + } }; class Server::InvalidConfigActorClass final: public ActorClass { @@ -559,6 +565,11 @@ class Server::InvalidConfigActorClass final: public ActorClass { // Can't get here because workerd would have failed to start. KJ_UNREACHABLE; } + kj::OneOf, kj::Promise>> getTokenMaybeSync( + IoChannelFactory::ChannelTokenUsage usage) override { + // Can't get here because workerd would have failed to start. + KJ_UNREACHABLE; + } kj::Own newActor(kj::Maybe tracker, Worker::Actor::Id actorId, @@ -643,6 +654,11 @@ class Server::ExternalTcpService final: public Service, private WorkerInterface return handlerName == "fetch"_kj || handlerName == "connect"_kj; } + kj::OneOf, kj::Promise>> getTokenMaybeSync( + IoChannelFactory::ChannelTokenUsage usage) override { + JSG_FAIL_REQUIRE(DOMDataCloneError, "ExternalService can't be passed over RPC."); + } + private: kj::Own addr; @@ -727,6 +743,11 @@ class Server::ExternalHttpService final: public Service { return handlerName == "fetch"_kj || handlerName == "connect"_kj; } + kj::OneOf, kj::Promise>> getTokenMaybeSync( + IoChannelFactory::ChannelTokenUsage usage) override { + JSG_FAIL_REQUIRE(DOMDataCloneError, "ExternalService can't be passed over RPC."); + } + private: kj::Own addr; @@ -963,6 +984,11 @@ class Server::NetworkService final: public Service, private WorkerInterface { return handlerName == "fetch"_kj || handlerName == "connect"_kj; } + kj::OneOf, kj::Promise>> getTokenMaybeSync( + IoChannelFactory::ChannelTokenUsage usage) override { + JSG_FAIL_REQUIRE(DOMDataCloneError, "NetworkService can't be passed over RPC."); + } + private: kj::Own network; kj::Maybe> tlsNetwork; @@ -1059,6 +1085,11 @@ class Server::DiskDirectoryService final: public Service, private WorkerInterfac return handlerName == "fetch"_kj; } + kj::OneOf, kj::Promise>> getTokenMaybeSync( + IoChannelFactory::ChannelTokenUsage usage) override { + JSG_FAIL_REQUIRE(DOMDataCloneError, "DiskDirectoryService can't be passed over RPC."); + } + private: kj::Maybe writable; kj::Own readable; @@ -1984,6 +2015,20 @@ class Server::WorkerService final: public Service, if (isDynamic) throwDynamicEntrypointTransferError(); } + kj::OneOf, kj::Promise>> getTokenMaybeSync( + IoChannelFactory::ChannelTokenUsage usage) override { + requireAllowsTransfer(); + + // encodeSubrequestChannelToken wants a reference to the props. It needs this reference to + // be non-const because it might refcount things. But if it's an empty object then there's + // nothing to refcount. So we can just declare this statically... + static Frankenvalue EMPTY_PROPS; + + // If requireAllowsTransfer() passed, then we are not dynamic so should have a service name. + return channelTokenHandler.encodeSubrequestChannelToken( + usage, KJ_ASSERT_NONNULL(serviceName), kj::none, EMPTY_PROPS); + } + kj::Maybe> getEntrypoint(kj::Maybe name, Frankenvalue props) { const kj::HashSet* handlers; KJ_IF_SOME(n, name) { @@ -2917,6 +2962,7 @@ class Server::WorkerService final: public Service, static kj::Promise callFacetStartCallback( kj::Function()> getStartInfo) { auto info = co_await getStartInfo(); + co_await info.ensureAllResolved(); co_return ClassAndId(info.actorClass.downcast(), kj::mv(info.id)); } }; @@ -3223,7 +3269,8 @@ class Server::WorkerService final: public Service, worker->requireAllowsTransfer(); } - kj::Array getToken(ChannelTokenUsage usage) override { + kj::OneOf, kj::Promise>> getTokenMaybeSync( + ChannelTokenUsage usage) override { worker->requireAllowsTransfer(); // If requireAllowsTransfer() passed, then we are not dynamic so should have a service name. @@ -3295,7 +3342,8 @@ class Server::WorkerService final: public Service, return kj::refcounted(*service, className, kj::mv(props)); } - kj::Array getToken(ChannelTokenUsage usage) override { + kj::OneOf, kj::Promise>> getTokenMaybeSync( + ChannelTokenUsage usage) override { service->requireAllowsTransfer(); // If requireAllowsTransfer() passed, then we are not dynamic so should have a service name. @@ -3474,7 +3522,7 @@ class Server::WorkerService final: public Service, co_return; } - kj::Own getSubrequestChannel(uint channel, + kj::Own getSubrequestChannelResolved(uint channel, kj::Maybe props, kj::Maybe versionRequest) override { auto& channels = @@ -3538,7 +3586,8 @@ class Server::WorkerService final: public Service, return ns.getActorChannel(kj::str(id)); } - kj::Own getActorClass(uint channel, kj::Maybe props) override { + kj::Own getActorClassResolved( + uint channel, kj::Maybe props) override { auto& channels = KJ_REQUIRE_NONNULL(ioChannels.tryGet(), "link() has not been called"); @@ -3601,6 +3650,10 @@ class Server::WorkerService final: public Service, return channelTokenHandler.decodeActorClassChannelToken(usage, token); } + kj::Own addRef() override { + return kj::addRef(*this); + } + // --------------------------------------------------------------------------- // implements TimerChannel @@ -4174,11 +4227,12 @@ struct Server::WorkerDef { kj::Maybe> abortIsolateCallback; }; -class Server::WorkerLoaderNamespace: public kj::Refcounted { +class Server::WorkerLoaderNamespace: public kj::Refcounted, private kj::TaskSet::ErrorHandler { public: WorkerLoaderNamespace(Server& server, kj::String namespaceName) : server(server), - namespaceName(kj::mv(namespaceName)) {} + namespaceName(kj::mv(namespaceName)), + startupTasks(*this) {} void unlink() { for (auto& isolate: isolates) { @@ -4209,8 +4263,21 @@ class Server::WorkerLoaderNamespace: public kj::Refcounted { .toOwn(); } else { auto isolateName = kj::str(namespaceName, ":dynamic:", randomUUID(server.entropySource)); - return kj::rc(server, kj::mv(isolateName), kj::none, kj::mv(fetchSource)) - .toOwn(); + auto stub = + kj::rc(server, kj::mv(isolateName), kj::none, kj::mv(fetchSource)); + // Unnamed workers have no entry in the isolates map, so the JS-side + // IoOwn would be the sole owner. Retain an extra ref so that GC of the + // JS handle during the getCode re-entry callback cannot destroy the + // object while its start() coroutine is still running. The extra ref + // is held in a task on the namespace (NOT on the WorkerStubImpl itself) + // so that when the task completes and drops the ref, the destruction + // does not re-enter a firing Event. The named-load path is safe because + // the isolates map already holds an additional kj::Rc. + auto selfRef = stub.addRef(); + startupTasks.add( + stub->whenStartupDone().then([prevent = kj::mv(selfRef)]() { /* prevent dropped here */ }, + [](kj::Exception&&) { /* startup failed; prevent dropped here */ })); + return kj::mv(stub).toOwn(); } } @@ -4226,7 +4293,17 @@ class Server::WorkerLoaderNamespace: public kj::Refcounted { class WorkerStubImpl; kj::HashMap> isolates; - class NullGlobalOutboundChannel: public IoChannelFactory::SubrequestChannel { + // Holds tasks that keep unnamed WorkerStubImpl instances alive while their + // start() coroutines are running. See the unnamed branch of loadIsolate(). + kj::TaskSet startupTasks; + + void taskFailed(kj::Exception&& exception) override { + // Startup failures are already handled by the WorkerStubImpl's + // startupTask (callers get the exception when they await the stub). + // Nothing to do here. + } + + class NullGlobalOutboundChannel final: public IoChannelFactory::SubrequestChannel { public: kj::Own startRequest(IoChannelFactory::SubrequestMetadata metadata) override { JSG_FAIL_REQUIRE(Error, @@ -4246,9 +4323,14 @@ class Server::WorkerLoaderNamespace: public kj::Refcounted { // misleading after the channel has been transferred. JSG_FAIL_REQUIRE(DOMDataCloneError, "The null global outbound is not transferrable."); } + + kj::OneOf, kj::Promise>> getTokenMaybeSync( + IoChannelFactory::ChannelTokenUsage usage) override { + JSG_FAIL_REQUIRE(DOMDataCloneError, "The null global outbound is not transferrable."); + } }; - class WorkerStubImpl final: public WorkerStubChannel, public kj::Refcounted { + class WorkerStubImpl final: public WorkerStubChannel { public: WorkerStubImpl(Server& server, kj::String isolateName, @@ -4257,6 +4339,12 @@ class Server::WorkerLoaderNamespace: public kj::Refcounted { : onAborted(kj::mv(onAborted)), startupTask(start(server, kj::mv(isolateName), kj::mv(fetchSource)).fork()) {} + // Returns a branch of the startup task promise. Used by the namespace to + // hold an extra reference to unnamed stubs until startup completes. + kj::Promise whenStartupDone() { + return startupTask.addBranch(); + } + ~WorkerStubImpl() { unlink(); } @@ -4267,12 +4355,12 @@ class Server::WorkerLoaderNamespace: public kj::Refcounted { } } - kj::Own getEntrypoint( + kj::Own getEntrypointResolved( kj::Maybe name, Frankenvalue props, kj::Maybe limits) override { return kj::refcounted(addRefToThis(), kj::mv(name), kj::mv(props)); } - kj::Own getActorClass( + kj::Own getActorClassResolved( kj::Maybe name, Frankenvalue props, kj::Maybe limits) override { return kj::refcounted(addRefToThis(), kj::mv(name), kj::mv(props)); } @@ -4297,6 +4385,7 @@ class Server::WorkerLoaderNamespace: public kj::Refcounted { kj::String isolateName, kj::Function()> fetchSource) { auto source = co_await fetchSource(); + co_await source.ensureAllResolved(); static const kj::HashMap EMPTY_ACTOR_CONFIGS; // Rewrite the capabilities in `env` in order to build the I/O channel table. @@ -4414,6 +4503,11 @@ class Server::WorkerLoaderNamespace: public kj::Refcounted { throwDynamicEntrypointTransferError(); } + kj::OneOf, kj::Promise>> getTokenMaybeSync( + IoChannelFactory::ChannelTokenUsage usage) override { + throwDynamicEntrypointTransferError(); + } + private: kj::Rc isolate; kj::Maybe entrypointName; @@ -4460,6 +4554,11 @@ class Server::WorkerLoaderNamespace: public kj::Refcounted { throwDynamicEntrypointTransferError(); } + kj::OneOf, kj::Promise>> getTokenMaybeSync( + IoChannelFactory::ChannelTokenUsage usage) override { + throwDynamicEntrypointTransferError(); + } + kj::Maybe> whenReady() override { if (inner != kj::none) return kj::none; diff --git a/src/workerd/server/workerd-api.c++ b/src/workerd/server/workerd-api.c++ index 44c9840862d..c48b01819a9 100644 --- a/src/workerd/server/workerd-api.c++ +++ b/src/workerd/server/workerd-api.c++ @@ -724,8 +724,8 @@ static v8::Local createBindingValue(JsgWorkerdIsolate::Lock& lock, v8::Local arg = env.As(); value = jsg::check(v8::Function::Cast(*fn)->Call(context, context->Global(), 1, &arg)); } else { - KJ_LOG( - ERROR, "wrapped binding module can't be resolved (internal modules only)", moduleName); + KJ_FAIL_REQUIRE( + "wrapped binding module can't be resolved (internal modules only)", moduleName); } } KJ_CASE_ONEOF(hyperdrive, Global::Hyperdrive) { diff --git a/src/workerd/server/workerd-debug-port-client.c++ b/src/workerd/server/workerd-debug-port-client.c++ index d164195fd44..3700b1c8ce3 100644 --- a/src/workerd/server/workerd-debug-port-client.c++ +++ b/src/workerd/server/workerd-debug-port-client.c++ @@ -48,6 +48,11 @@ class WorkerdBootstrapSubrequestChannel final: public IoChannelFactory::Subreque JSG_FAIL_REQUIRE(Error, "WorkerdDebugPort bindings cannot be transferred to other workers"); } + kj::OneOf, kj::Promise>> getTokenMaybeSync( + IoChannelFactory::ChannelTokenUsage usage) override { + JSG_FAIL_REQUIRE(Error, "WorkerdDebugPort bindings cannot be transferred to other workers"); + } + private: rpc::WorkerdBootstrap::Client bootstrap; capnp::HttpOverCapnpFactory& httpOverCapnpFactory; diff --git a/src/workerd/tests/test-fixture.h b/src/workerd/tests/test-fixture.h index 649709ffb0d..6086ed40e13 100644 --- a/src/workerd/tests/test-fixture.h +++ b/src/workerd/tests/test-fixture.h @@ -137,11 +137,15 @@ struct TestFixture { kj::Own startSubrequest(uint channel, SubrequestMetadata metadata) override { KJ_FAIL_ASSERT("no subrequests"); } - kj::Own getSubrequestChannel(uint channel, + kj::Own getSubrequestChannelResolved(uint channel, kj::Maybe props, kj::Maybe versionRequest) override { KJ_FAIL_ASSERT("no subrequests"); } + kj::Own getActorClassResolved( + uint channel, kj::Maybe props) override { + KJ_FAIL_ASSERT("no actor classes"); + } capnp::Capability::Client getCapability(uint channel) override { KJ_FAIL_ASSERT("no capabilities"); } @@ -169,6 +173,10 @@ struct TestFixture { KJ_FAIL_REQUIRE("no actor channels"); } + kj::Own addRef() override { + KJ_FAIL_REQUIRE("not used"); + } + TimerChannel& timer; }; }; diff --git a/src/workerd/util/autogate.c++ b/src/workerd/util/autogate.c++ index 33900382587..a0749b93b0d 100644 --- a/src/workerd/util/autogate.c++ +++ b/src/workerd/util/autogate.c++ @@ -29,8 +29,6 @@ kj::StringPtr KJ_STRINGIFY(AutogateKey key) { return "tail-stream-refactor"_kj; case AutogateKey::RUST_BACKED_NODE_DNS: return "rust-backed-node-dns"_kj; - case AutogateKey::RPC_USE_EXTERNAL_PUSHER: - return "rpc-use-external-pusher"_kj; case AutogateKey::WASM_SHUTDOWN_SIGNAL_SHIM: return "wasm-shutdown-signal-shim"_kj; case AutogateKey::ENABLE_FAST_TEXTENCODER: @@ -47,8 +45,6 @@ kj::StringPtr KJ_STRINGIFY(AutogateKey key) { return "updated-auto-allocate-chunk-size"_kj; case AutogateKey::PYTHON_ABORT_ISOLATE_ON_FATAL_ERROR: return "python-abort-isolate-on-fatal-error"_kj; - case AutogateKey::JSRPC_SESSION_HANDLE: - return "jsrpc-session-handle"_kj; case AutogateKey::NumOfKeys: KJ_FAIL_ASSERT("NumOfKeys should not be used in getName"); } diff --git a/src/workerd/util/autogate.h b/src/workerd/util/autogate.h index 7dfcadcb69b..9494877938a 100644 --- a/src/workerd/util/autogate.h +++ b/src/workerd/util/autogate.h @@ -33,8 +33,6 @@ enum class AutogateKey { TAIL_STREAM_REFACTOR, // Enable Rust-backed Node.js DNS implementation RUST_BACKED_NODE_DNS, - // Use ExternalPusher instead of StreamSink to handle streams in RPC. - RPC_USE_EXTERNAL_PUSHER, // Enable the WebAssembly.instantiate shim that detects modules exporting __instance_signal / // __instance_terminated and registers them for receiving the CPU-limit shutdown signal. WASM_SHUTDOWN_SIGNAL_SHIM, @@ -52,9 +50,6 @@ enum class AutogateKey { UPDATED_AUTO_ALLOCATE_CHUNK_SIZE, // Call abortIsolate() when a Python worker encounters a fatal error. PYTHON_ABORT_ISOLATE_ON_FATAL_ERROR, - // `jsRpcSession()` returns a session handle instead of having the call itself hang until the - // session is complete. - JSRPC_SESSION_HANDLE, NumOfKeys // Reserved for iteration. }; diff --git a/src/workerd/util/sqlite-kv.c++ b/src/workerd/util/sqlite-kv.c++ index 23947e82e55..031967f9f03 100644 --- a/src/workerd/util/sqlite-kv.c++ +++ b/src/workerd/util/sqlite-kv.c++ @@ -174,7 +174,8 @@ void SqliteKv::beforeSqliteReset() { void SqliteKv::rollbackMultiPut(Initialized& stmts, WriteOptions options) { KJ_IF_SOME(e, kj::runCatchingExceptions([&]() { // This should be rare, so we don't prepare a statement for it. - stmts.db.run({.regulator = stmts.regulator, .allowUnconfirmed = options.allowUnconfirmed}, + stmts.db.run( + {.regulator = Initialized::regulator, .allowUnconfirmed = options.allowUnconfirmed}, kj::str("ROLLBACK TO _cf_put_multiple_savepoint")); stmts.stmtMultiPutRelease.run({.allowUnconfirmed = options.allowUnconfirmed}); })) { diff --git a/src/workerd/util/sqlite-kv.h b/src/workerd/util/sqlite-kv.h index 75684734fd8..b3e7f64f5a2 100644 --- a/src/workerd/util/sqlite-kv.h +++ b/src/workerd/util/sqlite-kv.h @@ -93,7 +93,7 @@ class SqliteKv: private SqliteDatabase::ResetListener { // easier to manage. SqliteDatabase& db; - SqliteKvRegulator regulator; + static constexpr SqliteKvRegulator regulator; SqliteDatabase::Statement stmtGet = db.prepare(regulator, R"( SELECT value FROM _cf_KV WHERE key = ? diff --git a/src/workerd/util/sqlite-test.c++ b/src/workerd/util/sqlite-test.c++ index 55ea988eeba..9fb039cb998 100644 --- a/src/workerd/util/sqlite-test.c++ +++ b/src/workerd/util/sqlite-test.c++ @@ -434,8 +434,8 @@ KJ_TEST("SQLite Regulator") { INSERT INTO bar VALUES (456); )"); - RegulatorImpl noFoo("foo"); - RegulatorImpl noBar("bar"); + static RegulatorImpl noFoo("foo"); + static RegulatorImpl noBar("bar"); // We can prepare and run statements that comply with the regulator. auto getFoo = db.prepare(noBar, "SELECT value FROM foo"); @@ -493,7 +493,7 @@ struct RowCounts { template RowCounts countRowsTouched(SqliteDatabase& db, - const SqliteDatabase::Regulator& regulator, + SqliteDatabase::StaticRegulator regulator, kj::StringPtr sqlCode, Params... bindParams) { uint64_t rowsFound = 0; @@ -734,7 +734,7 @@ KJ_TEST("SQLite row counters with triggers") { } }; - RegulatorImpl regulator; + static RegulatorImpl regulator; db.run(R"( CREATE TABLE things ( @@ -860,7 +860,7 @@ KJ_TEST("SQLite observer addQueryStats") { TempDirOnDisk dir; SqliteDatabase::Vfs vfs(*dir); TestSqliteObserver sqliteObserver = TestSqliteObserver(); - TestQueryStatsRegulator regulator; + static TestQueryStatsRegulator regulator; SqliteDatabase db(vfs, kj::Path({"foo"}), kj::WriteMode::CREATE | kj::WriteMode::MODIFY, /*sqliteMaxMemoryBytes=*/kj::maxValue, sqliteObserver); diff --git a/src/workerd/util/sqlite.c++ b/src/workerd/util/sqlite.c++ index a65a7db84bb..1a090cd0f48 100644 --- a/src/workerd/util/sqlite.c++ +++ b/src/workerd/util/sqlite.c++ @@ -252,7 +252,7 @@ class SqliteCallScope { // sqliteErrorCode is a kj::Maybe and represents the error code from sqlite. #define SQLITE_REQUIRE(condition, sqliteErrorCode, errorMessage, ...) \ if (!(condition)) { \ - regulator.onError(sqliteErrorCode, errorMessage); \ + regulator->onError(sqliteErrorCode, errorMessage); \ KJ_FAIL_REQUIRE("SENTRY_DO SQLite failed", errorMessage, ##__VA_ARGS__); \ } @@ -785,7 +785,7 @@ void SqliteDatabase::applyChange(const StateChange& change) { // Set up the regulator that will be used for authorizer callbacks while preparing this // statement. -SqliteDatabase::StatementAndEffect SqliteDatabase::prepareSql(const Regulator& regulator, +SqliteDatabase::StatementAndEffect SqliteDatabase::prepareSql(StaticRegulator regulator, kj::StringPtr sqlCode, uint prepFlags, Multi multi, @@ -904,8 +904,8 @@ SqliteDatabase::StatementAndEffect SqliteDatabase::prepareSql(const Regulator& r // Report queryEvent for this statement sqliteObserver.reportQueryEvent(kj::mv(queryStatement), rowsRead, rowsWritten, - queryLatency, dbWalBytesWritten, err, extendedCode, regulator.shouldAddQueryStats(), - kj::mv(queryErrorDescription)); + queryLatency, dbWalBytesWritten, err, extendedCode, + regulator->shouldAddQueryStats(), kj::mv(queryErrorDescription)); if (err == SQLITE_DONE) { // good @@ -941,7 +941,7 @@ SqliteDatabase::StatementAndEffect SqliteDatabase::prepareSql(const Regulator& r } SqliteDatabase::IngestResult SqliteDatabase::ingestSql( - const Regulator& regulator, kj::StringPtr sqlCode) { + StaticRegulator regulator, kj::StringPtr sqlCode) { uint64_t rowsRead = 0; uint64_t rowsWritten = 0; uint64_t statementCount = 0; @@ -971,7 +971,7 @@ SqliteDatabase::IngestResult SqliteDatabase::ingestSql( } void SqliteDatabase::executeWithRegulator( - const Regulator& regulator, kj::FunctionParam func) { + StaticRegulator regulator, kj::FunctionParam func) { // currentRegulator would only be set if we're running this method while running something else // with a regulator. I'm not sure what the ramifications are, so for now, we'll just assume that // we can only call executeWithRegulator when no regulator is currently set. @@ -1023,7 +1023,7 @@ bool SqliteDatabase::isAuthorized(int actionCode, kj::Maybe param2, kj::Maybe dbName, kj::Maybe triggerName) { - const Regulator& regulator = KJ_UNWRAP_OR(currentRegulator, { + StaticRegulator regulator = KJ_UNWRAP_OR(currentRegulator, { // We're not currently preparing a statement, so we didn't expect the authorizer callback to // run. We blanket-deny in this case as a precaution. KJ_LOG(ERROR, "SQLite authorizer callback invoked at unexpected time", kj::getStackTrace()); @@ -1031,7 +1031,7 @@ bool SqliteDatabase::isAuthorized(int actionCode, }); KJ_IF_SOME(t, triggerName) { - if (!regulator.isAllowedTrigger(t)) { + if (!regulator->isAllowedTrigger(t)) { // Log an error because it seems really suspicious if a trigger runs when it's not allowed. // I want to understand if this can even happen. KJ_LOG(ERROR, "disallowed trigger somehow ran in trusted scope?", t, kj::getStackTrace()); @@ -1071,7 +1071,7 @@ bool SqliteDatabase::isAuthorized(int actionCode, } } - if (®ulator == &TRUSTED && actionCode != SQLITE_TRANSACTION && + if (regulator.get() == &TRUSTED && actionCode != SQLITE_TRANSACTION && actionCode != SQLITE_SAVEPOINT) { // Everything is allowed for trusted queries. (But transactions and savepoints need special // handling below.) @@ -1097,7 +1097,7 @@ bool SqliteDatabase::isAuthorized(int actionCode, case SQLITE_DROP_VIEW: /* View Name NULL */ case SQLITE_REINDEX: /* Index Name NULL */ KJ_ASSERT(param2 == kj::none); - return regulator.isAllowedName(KJ_ASSERT_NONNULL(param1)); + return regulator->isAllowedName(KJ_ASSERT_NONNULL(param1)); case SQLITE_ANALYZE: /* Table Name NULL */ KJ_ASSERT(param2 == kj::none); @@ -1119,22 +1119,22 @@ bool SqliteDatabase::isAuthorized(int actionCode, return true; case SQLITE_ALTER_TABLE: /* Table Name NULL (modified) */ - return regulator.isAllowedName(KJ_ASSERT_NONNULL(param1)); + return regulator->isAllowedName(KJ_ASSERT_NONNULL(param1)); case SQLITE_READ: /* Table Name Column Name */ case SQLITE_UPDATE: /* Table Name Column Name */ - return regulator.isAllowedName(KJ_ASSERT_NONNULL(param1)); + return regulator->isAllowedName(KJ_ASSERT_NONNULL(param1)); case SQLITE_CREATE_INDEX: /* Index Name Table Name */ case SQLITE_DROP_INDEX: /* Index Name Table Name */ case SQLITE_CREATE_TRIGGER: /* Trigger Name Table Name */ case SQLITE_DROP_TRIGGER: /* Trigger Name Table Name */ - return regulator.isAllowedName(KJ_ASSERT_NONNULL(param1)) && - regulator.isAllowedName(KJ_ASSERT_NONNULL(param2)); + return regulator->isAllowedName(KJ_ASSERT_NONNULL(param1)) && + regulator->isAllowedName(KJ_ASSERT_NONNULL(param2)); case SQLITE_TRANSACTION: /* Operation NULL */ { - if (!regulator.allowTransactions()) { + if (!regulator->allowTransactions()) { return false; } @@ -1160,7 +1160,7 @@ bool SqliteDatabase::isAuthorized(int actionCode, case SQLITE_SAVEPOINT: /* Operation Savepoint Name */ { kj::String name = kj::str(KJ_ASSERT_NONNULL(param2)); - if (!regulator.allowTransactions() || !regulator.isAllowedName(name)) { + if (!regulator->allowTransactions() || !regulator->isAllowedName(name)) { return false; } @@ -1197,7 +1197,7 @@ bool SqliteDatabase::isAuthorized(int actionCode, } else if (pragma == "table_info" || pragma == "table_xinfo") { // Allow if the specific named table is not protected. KJ_IF_SOME(name, param2) { - return regulator.isAllowedName(name); + return regulator->isAllowedName(name); } else { return false; // shouldn't happen? } @@ -1237,11 +1237,11 @@ bool SqliteDatabase::isAuthorized(int actionCode, case PragmaSignature::OBJECT_NAME: { // Argument is required. auto val = KJ_UNWRAP_OR(param2, return false); - return regulator.isAllowedName(val); + return regulator->isAllowedName(val); } case PragmaSignature::OPTIONAL_OBJECT_NAME: { auto val = KJ_UNWRAP_OR(param2, return true); - return regulator.isAllowedName(val); + return regulator->isAllowedName(val); } case PragmaSignature::NULL_OR_NUMBER: { // Argument is not required @@ -1255,7 +1255,7 @@ bool SqliteDatabase::isAuthorized(int actionCode, // val is allowed if it parses to an integer if (val.tryParseAs() != kj::none) return true; // Otherwise, val must be the name of an object the user has access to - return regulator.isAllowedName(val); + return regulator->isAllowedName(val); } } KJ_UNREACHABLE; @@ -1288,7 +1288,7 @@ bool SqliteDatabase::isAuthorized(int actionCode, if (strcasecmp(moduleName.begin(), "fts5") == 0 || strcasecmp(moduleName.begin(), "fts5vocab") == 0) { if (util::Autogate::isEnabled(util::AutogateKey::SQL_RESTRICT_RESERVED_NAMES)) { - return regulator.isAllowedName(KJ_ASSERT_NONNULL(param1)); + return regulator->isAllowedName(KJ_ASSERT_NONNULL(param1)); } auto& tableName = KJ_ASSERT_NONNULL(param1); if (tableName.size() >= 4 && strncasecmp(tableName.begin(), "_cf_", 4) == 0) { @@ -1336,12 +1336,12 @@ bool SqliteDatabase::isAuthorized(int actionCode, bool SqliteDatabase::isAuthorizedTemp(int actionCode, const kj::Maybe& param1, const kj::Maybe& param2, - const Regulator& regulator) { + StaticRegulator regulator) { switch (actionCode) { case SQLITE_READ: /* Table Name Column Name */ case SQLITE_UPDATE: /* Table Name Column Name */ - return regulator.isAllowedName(KJ_ASSERT_NONNULL(param1)); + return regulator->isAllowedName(KJ_ASSERT_NONNULL(param1)); default: return false; } @@ -1430,7 +1430,7 @@ void SqliteDatabase::setupSecurity(sqlite3* db) { } SqliteDatabase::Statement SqliteDatabase::prepare( - const Regulator& regulator, kj::StringPtr sqlCode) { + StaticRegulator regulator, kj::StringPtr sqlCode) { return Statement( *this, regulator, prepareSql(regulator, sqlCode, SQLITE_PREPARE_PERSISTENT, SINGLE)); } @@ -1504,12 +1504,12 @@ void SqliteDatabase::Query::destroy() { // active from the caller. auto memoryScope = db.enterMemoryScope(); - if (regulator.shouldAddQueryStats()) { + if (regulator->shouldAddQueryStats()) { // Update the db stats that we have collected for the query. db.sqliteObserver.addQueryStats(rowsRead, rowsWritten); } - queryEvent.setQueryEventStats(rowsRead, rowsWritten, !(regulator.shouldAddQueryStats())); + queryEvent.setQueryEventStats(rowsRead, rowsWritten, !(regulator->shouldAddQueryStats())); try { kj::StringPtr statement = sqlite3_sql(getStatementAndEffect().statement); @@ -1546,7 +1546,7 @@ void SqliteDatabase::Query::destroy() { } void SqliteDatabase::Query::checkRequirements(size_t size) { - if (regulator.shouldAddQueryStats()) { + if (regulator->shouldAddQueryStats()) { KJ_IF_SOME(actorAccountLimits, db.actorAccountLimits) { actorAccountLimits.requireActorCanExecuteQueries(); } @@ -1756,7 +1756,7 @@ bool SqliteDatabase::Query::isNull(uint column) { SqliteDatabase::StatementAndEffect& SqliteDatabase::Query::getStatementAndEffect() { return KJ_UNWRAP_OR(maybeStatement, { - regulator.onError(kj::none, "SQLite query was canceled because the database was deleted."); + regulator->onError(kj::none, "SQLite query was canceled because the database was deleted."); KJ_FAIL_REQUIRE("query canceled because reset() was called on the database"); }); } diff --git a/src/workerd/util/sqlite.h b/src/workerd/util/sqlite.h index 64623d5dbd0..4c178ae0e17 100644 --- a/src/workerd/util/sqlite.h +++ b/src/workerd/util/sqlite.h @@ -76,8 +76,31 @@ class SqliteDatabase { struct VfsOptions; class Regulator; - struct QueryOptions { + // StaticRegulator is a wrapper type that asserts (using consteval) that the Regulator is + // defined statically, and thus its lifetime is process-long. Regulators are static bundles of + // logic that parameterize how a SqliteDatabase behaves. There are a couple of challenges to + // making a SqliteDatabase template-parameterized with these behaviors, but we do want to assert + // that no one accidentally provides a relatively short-lived Regulator to logic that outlives + // that Regulator. For now, requiring that regulators are statically defined fits this goal. + class StaticRegulator { + public: + // consteval implicitly asserts that the const Regulator& reference is known at compile time, + // which means it must have a static memory address. + consteval StaticRegulator(const Regulator& regulator): regulator(regulator) {} + + const Regulator* operator->() const { + return ®ulator; + } + const Regulator* get() const { + return ®ulator; + } + + private: const Regulator& regulator; + }; + + struct QueryOptions { + StaticRegulator regulator; bool allowUnconfirmed = false; }; @@ -174,7 +197,7 @@ class SqliteDatabase { // Prepares the given SQL code as a persistent statement that can be used across several queries. // Don't use this for one-off queries; use run() instead. - Statement prepare(const Regulator& regulator, kj::StringPtr sqlCode); + Statement prepare(StaticRegulator regulator, kj::StringPtr sqlCode); // Prepares a statement that may actually be multiple statements (separated by semicolons). // In this case, the code is not actually parsed until first executed (this implies @@ -186,7 +209,7 @@ class SqliteDatabase { // // As with exec(), the result of executing a batch of multiple statements is always the result // of the last statement. The results of all other statements are discarded. - Statement prepareMulti(const Regulator& regulator, kj::String sqlCode); + Statement prepareMulti(StaticRegulator regulator, kj::String sqlCode); // Convenience method to start a query. This is equivalent to `prepare(sqlCode).run(bindings...)` // except: @@ -201,7 +224,7 @@ class SqliteDatabase { Statement prepare(const char (&sqlCode)[size]); template - Statement prepare(const Regulator& regulator, const char (&sqlCode)[size]); + Statement prepare(StaticRegulator regulator, const char (&sqlCode)[size]); // When the input is a string literal, we automatically use the TRUSTED regulator. template @@ -251,10 +274,10 @@ class SqliteDatabase { // Helper to execute a chunk of SQL that may not be complete. // Executes every valid statement provided, and returns the remaining portion of the input // that was not processed. This is used for streaming SQL ingestion. - IngestResult ingestSql(const Regulator& regulator, kj::StringPtr sqlCode); + IngestResult ingestSql(StaticRegulator regulator, kj::StringPtr sqlCode); // Execute a function with the given regulator. - void executeWithRegulator(const Regulator& regulator, kj::FunctionParam func); + void executeWithRegulator(StaticRegulator regulator, kj::FunctionParam func); // Resets the database to an empty state by deleting the underlying database file and creating // a new one in its place. This is the recommended way to "drop database" in SQLite, and is used @@ -341,7 +364,7 @@ class SqliteDatabase { kj::Maybe maybeDb; // Set while a query is compiling. - kj::Maybe currentRegulator; + kj::Maybe currentRegulator; // Set during the *first* time a statement is being compiled, to capture information about it // from the authorizer callback. It is assumed that if the statement must be re-parsed later, @@ -418,7 +441,7 @@ class SqliteDatabase { // // If `prelude` is provided, then, in MULTI mode, all statements which are executed immediately // are also appended to `prelude`. - StatementAndEffect prepareSql(const Regulator& regulator, + StatementAndEffect prepareSql(StaticRegulator regulator, kj::StringPtr sqlCode, uint prepFlags, Multi multi, @@ -435,7 +458,7 @@ class SqliteDatabase { bool isAuthorizedTemp(int actionCode, const kj::Maybe& param1, const kj::Maybe& param2, - const Regulator& regulator); + StaticRegulator regulator); void setupSecurity(sqlite3* db); @@ -484,20 +507,20 @@ class SqliteDatabase::Statement final: private ResetListener { Query run(StatementOptions options, Params&&... bindings); private: - const Regulator& regulator; + StaticRegulator regulator; kj::OneOf stmt; // List of statements to execute before this one. Only non-empty if this Statement was created // by prepareMulti(). kj::Vector prelude; - Statement(SqliteDatabase& db, const Regulator& regulator, StatementAndEffect stmt) + Statement(SqliteDatabase& db, StaticRegulator regulator, StatementAndEffect stmt) : ResetListener(db), regulator(regulator), stmt(kj::mv(stmt)) {} // Lazily-parsed statement -- used by `prepareMulti()`. - Statement(SqliteDatabase& db, const Regulator& regulator, kj::String sqlCode) + Statement(SqliteDatabase& db, StaticRegulator regulator, kj::String sqlCode) : ResetListener(db), regulator(regulator), stmt(kj::mv(sqlCode)) {} @@ -663,7 +686,7 @@ class SqliteDatabase::Query final: private ResetListener { kj::Maybe queryErrorDescription = kj::none; }; - const Regulator& regulator; + StaticRegulator regulator; StatementAndEffect ownStatement; // for one-off queries kj::Maybe maybeStatement; // null if database was reset bool done = false; @@ -1031,12 +1054,12 @@ SqliteDatabase::Statement SqliteDatabase::prepare(const char (&sqlCode)[size]) { } template SqliteDatabase::Statement SqliteDatabase::prepare( - const Regulator& regulator, const char (&sqlCode)[size]) { + StaticRegulator regulator, const char (&sqlCode)[size]) { return prepare(regulator, kj::StringPtr(sqlCode, size - 1)); } inline SqliteDatabase::Statement SqliteDatabase::prepareMulti( - const Regulator& regulator, kj::String sqlCode) { + StaticRegulator regulator, kj::String sqlCode) { return Statement(*this, regulator, kj::mv(sqlCode)); } diff --git a/src/workerd/util/stream-utils.c++ b/src/workerd/util/stream-utils.c++ index 33b33ddc54c..afb32614884 100644 --- a/src/workerd/util/stream-utils.c++ +++ b/src/workerd/util/stream-utils.c++ @@ -241,8 +241,8 @@ kj::Own newNeuterableInputStream(kj::AsyncInputStream& in return kj::refcounted(inner); } -kj::Own newNeuterableIoStream(kj::AsyncIoStream& inner) { - return kj::heap(inner); +kj::Rc newNeuterableIoStream(kj::AsyncIoStream& inner) { + return kj::rc(inner); } } // namespace workerd diff --git a/src/workerd/util/stream-utils.h b/src/workerd/util/stream-utils.h index 3e0c23f0458..1dba4e48f8d 100644 --- a/src/workerd/util/stream-utils.h +++ b/src/workerd/util/stream-utils.h @@ -28,7 +28,7 @@ class NeuterableInputStream: public kj::AsyncInputStream, public kj::Refcounted virtual void neuter(kj::Exception ex) = 0; }; -class NeuterableIoStream: public kj::AsyncIoStream { +class NeuterableIoStream: public kj::AsyncIoStream, public kj::Refcounted { public: virtual void neuter(kj::Exception ex) = 0; }; @@ -44,6 +44,6 @@ class EndableAsyncOutputStream: public kj::AsyncOutputStream { }; kj::Own newNeuterableInputStream(kj::AsyncInputStream&); -kj::Own newNeuterableIoStream(kj::AsyncIoStream&); +kj::Rc newNeuterableIoStream(kj::AsyncIoStream&); } // namespace workerd diff --git a/tools/BUILD.bazel b/tools/BUILD.bazel index 6dd0c7ade0f..8882f7850dc 100644 --- a/tools/BUILD.bazel +++ b/tools/BUILD.bazel @@ -64,7 +64,6 @@ native_binary( "@bazel_tools//src/conditions:linux_x86_64": "@clang_tidy_linux_amd64//file:downloaded", "@bazel_tools//src/conditions:linux_aarch64": "@clang_tidy_linux_arm64//file:downloaded", "@bazel_tools//src/conditions:darwin_arm64": "@clang_tidy_darwin_arm64//file:downloaded", - "@bazel_tools//src/conditions:windows_x64": "@clang_tidy_windows_amd64//file:downloaded", }, ), out = "clang_tidy", diff --git a/tools/clang-tidy/BUILD.bazel b/tools/clang-tidy/BUILD.bazel new file mode 100644 index 00000000000..4ea62f706c2 --- /dev/null +++ b/tools/clang-tidy/BUILD.bazel @@ -0,0 +1,65 @@ +"""Workerd JSG clang-tidy plugin. + +`jsg-lint` is a clang-tidy module providing workerd-specific static checks +(currently the `jsg-visit-for-gc` check that validates JSG resource types +correctly visit their GC roots). + +The plugin is loaded into clang-tidy via `--load=`. Symbols defined +in the host clang-tidy binary (ClangTidyCheck vtable, ClangTidyModuleRegistry, +etc.) are resolved at dlopen() time. This requires the host clang-tidy to +have been built with LLVM_ENABLE_PLUGINS=ON, CLANG_PLUGIN_SUPPORT=ON, and +LLVM_ENABLE_RTTI=ON; the binary published by cloudflare/workerd-tools +(version >= clang-tidy-22.1.5) meets all three. + +The source file is exported via `exports_files` so downstream projects can +rebuild the plugin against their own clang/LLVM headers (e.g. linking +against a system libclang rather than the workerd-tools dev archive). +""" + +load("@rules_cc//cc:cc_library.bzl", "cc_library") +load("@rules_cc//cc:cc_shared_library.bzl", "cc_shared_library") + +exports_files(["jsg-lint.c++"]) + +cc_library( + name = "jsg-lint-static", + srcs = ["jsg-lint.c++"], + copts = select({ + # workerd-tools clang-tidy links against libstdc++ on Linux; the + # plugin must match. macOS uses libc++ (Apple default). + "@platforms//os:linux": ["-stdlib=libstdc++"], + "//conditions:default": [], + }), + tags = ["manual"], + # No clang-tidy is published by workerd-tools for Windows. + target_compatible_with = select({ + "@platforms//os:linux": [], + "@platforms//os:macos": [], + "//conditions:default": ["@platforms//:incompatible"], + }), + deps = ["@clang_tidy_dev_headers"], +) + +cc_shared_library( + name = "jsg-lint", + tags = ["manual"], + target_compatible_with = select({ + "@platforms//os:linux": [], + "@platforms//os:macos": [], + "//conditions:default": ["@platforms//:incompatible"], + }), + user_link_flags = select({ + # On macOS, a host-loaded plugin must defer resolution of clang-tidy + # symbols (ClangTidyCheck vtable etc.) to dlopen() time. ld64 still + # supports -undefined dynamic_lookup for this case. + "@platforms//os:macos": [ + "-undefined", + "dynamic_lookup", + ], + "//conditions:default": [ + "-stdlib=libstdc++", + ], + }), + visibility = ["//visibility:public"], + deps = [":jsg-lint-static"], +) diff --git a/tools/clang-tidy/BUILD.headers b/tools/clang-tidy/BUILD.headers new file mode 100644 index 00000000000..d051de3a790 --- /dev/null +++ b/tools/clang-tidy/BUILD.headers @@ -0,0 +1,47 @@ +load("@rules_cc//cc:cc_library.bzl", "cc_library") + +package(default_visibility = ["//visibility:public"]) + +# Headers needed to compile the workerd jsg-lint clang-tidy plugin +# out-of-tree against the clang-tidy binary published by +# cloudflare/workerd-tools. jsg-lint.c++ uses the C++ API directly but +# transitively pulls in llvm-c headers (e.g. llvm-c/DataTypes.h via +# llvm/Support/DataTypes.h), so the C-API directories are kept. +# clang-tools-extra is narrowed to the top-level clang-tidy plugin +# headers (ClangTidyCheck.h, ClangTidyModule.h); the ~400 files under +# per-check subdirectories are not needed. +# +# `build/` holds tablegen-generated `.inc` files (Attrs.inc, +# DiagnosticGroups.inc, OMP.inc, etc.) and the Config headers +# (llvm/Config/llvm-config.h, abi-breaking.h) that are transitively +# included by every LLVM Support header, so it is not optional. +cc_library( + name = "clang_tidy_dev_headers", + hdrs = glob( + [ + "clang/include/clang/**/*.h", + "clang/include/clang/**/*.def", + "clang/include/clang/**/*.inc", + "clang/include/clang-c/**/*.h", + "llvm/include/llvm/**/*.h", + "llvm/include/llvm/**/*.def", + "llvm/include/llvm/**/*.inc", + "llvm/include/llvm-c/**/*.h", + "build/include/**/*.h", + "build/include/**/*.def", + "build/include/**/*.inc", + "build/tools/clang/include/**/*.h", + "build/tools/clang/include/**/*.def", + "build/tools/clang/include/**/*.inc", + "clang-tools-extra/clang-tidy/*.h", + ], + allow_empty = True, + ), + includes = [ + "build/include", + "build/tools/clang/include", + "clang-tools-extra", + "clang/include", + "llvm/include", + ], +) diff --git a/tools/clang-tidy/jsg-lint.c++ b/tools/clang-tidy/jsg-lint.c++ new file mode 100644 index 00000000000..78bb3154cd0 --- /dev/null +++ b/tools/clang-tidy/jsg-lint.c++ @@ -0,0 +1,430 @@ +#include "clang-tidy/ClangTidyCheck.h" +#include "clang-tidy/ClangTidyModule.h" +#include "clang/AST/DeclTemplate.h" +#include "clang/AST/Type.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include "clang/Basic/SourceManager.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/StringRef.h" + +#include + +namespace workerd { +namespace jsglint { + +// Anchored suffix match: returns true iff `qualifiedName` equals `suffix` or +// is of the form `::`. Avoids the substring trap that would +// otherwise match `foo::jsg::Refrigerator` against `jsg::Ref`. +// +// TODO: Replace this with proper scope analysis: resolve each known visitable +// template by qualified name once via Sema::LookupQualifiedName at the first +// MatchFinder callback, cache the TemplateDecl* pointers, and compare those +// directly. Pointer-identity is the correct primitive for AST scope queries; +// suffix matching is a pragmatic shortcut that depends on jsg/kj never having +// `using namespace` aliases that introduce a name into an unrelated namespace. +static bool endsWithQualified(llvm::StringRef qualifiedName, + llvm::StringRef suffix) { + if (qualifiedName == suffix) return true; + if (qualifiedName.size() < suffix.size() + 2) return false; + if (!qualifiedName.ends_with(suffix)) return false; + auto sep = qualifiedName.size() - suffix.size(); + return qualifiedName[sep - 1] == ':' && qualifiedName[sep - 2] == ':'; +} + +// Visitable leaf templates: each holds a GC root and must be visited. +static const llvm::StringRef kVisitableLeafTemplates[] = { + "jsg::Ref", "jsg::V8Ref", "jsg::JsRef", + "jsg::Function", "jsg::Promise", "jsg::HashableV8Ref", + "jsg::MemoizedIdentity", +}; + +// Non-template visitable leaf types. +static const llvm::StringRef kVisitableLeafTypes[] = { + "jsg::BufferSource", + "jsg::Name", + "jsg::Value", + "jsg::Data", +}; + +// Container templates whose visitability is determined by their type +// arguments. `FirstArg` containers visit one element; `AnyArg` containers +// (variants) are visitable if any element type is. +enum class ContainerKind { None, FirstArg, AnyArg }; + +static const llvm::StringRef kFirstArgContainers[] = { + "kj::Maybe", "kj::Array", "kj::Vector", + "jsg::Optional", "jsg::LenientOptional", +}; + +static const llvm::StringRef kAnyArgContainers[] = { + "kj::OneOf", +}; + +static ContainerKind getContainerKind(llvm::StringRef qualifiedName) { + for (auto suffix : kFirstArgContainers) { + if (endsWithQualified(qualifiedName, suffix)) return ContainerKind::FirstArg; + } + for (auto suffix : kAnyArgContainers) { + if (endsWithQualified(qualifiedName, suffix)) return ContainerKind::AnyArg; + } + return ContainerKind::None; +} + +// Returns the qualified name of the template (e.g. "workerd::jsg::Ref") if +// `qt` is a template specialization; otherwise an empty string. +static std::string getTemplateQualifiedName(clang::QualType qt) { + const auto *t = qt.getTypePtr()->getAs(); + if (!t) return ""; + auto *td = t->getTemplateName().getAsTemplateDecl(); + if (!td) return ""; + return td->getQualifiedNameAsString(); +} + +static bool isVisitableType(clang::QualType qt) { + if (qt.isNull()) return false; + qt = qt.getNonReferenceType().getUnqualifiedType(); + + // Direct named record type, e.g. `jsg::BufferSource`. + if (const auto *rt = qt.getTypePtr()->getAs()) { + auto fqn = rt->getDecl()->getQualifiedNameAsString(); + for (auto suffix : kVisitableLeafTypes) { + if (endsWithQualified(fqn, suffix)) return true; + } + } + + // Template specialization: dispatch on outer template name. + std::string tmpl = getTemplateQualifiedName(qt); + if (tmpl.empty()) return false; + + for (auto suffix : kVisitableLeafTemplates) { + if (endsWithQualified(tmpl, suffix)) return true; + } + + auto kind = getContainerKind(tmpl); + if (kind == ContainerKind::None) return false; + + const auto *t = qt.getTypePtr()->getAs(); + if (!t) return false; + auto args = t->template_arguments(); + + if (kind == ContainerKind::FirstArg) { + if (args.empty()) return false; + if (args[0].getKind() != clang::TemplateArgument::Type) return false; + return isVisitableType(args[0].getAsType()); + } + // AnyArg + for (const auto &arg : args) { + if (arg.getKind() == clang::TemplateArgument::Type) { + if (isVisitableType(arg.getAsType())) return true; + } + } + return false; +} + +// Returns true if `filename` looks like a C++ implementation file (.c++ / +// .cpp / .cc / .c). Implementation files have full visibility into the +// out-of-line bodies of methods declared in the headers they include, so they +// are the correct place to validate visitForGc; running against headers alone +// would yield false "no body" diagnostics when the definition lives in a +// sibling .c++ file. +static bool isImplFile(llvm::StringRef filename) { + return filename.ends_with(".c++") || filename.ends_with(".cpp") || + filename.ends_with(".cc") || filename.ends_with(".c"); +} + +// Returns true if `decl` is lexically nested inside a `namespace jsg` (whose +// fully-qualified name suffix is `::jsg` or which is the top-level `jsg`). +// Used to skip JSG framework internals; the check targets user resource types, +// not the GC primitives the framework itself defines. +static bool isInJsgNamespace(const clang::Decl *decl) { + for (const auto *ctx = decl->getDeclContext(); ctx; ctx = ctx->getParent()) { + if (const auto *ns = llvm::dyn_cast(ctx)) { + if (ns->getName() == "jsg") return true; + } + } + return false; +} + +class VisitForGcCheck : public clang::tidy::ClangTidyCheck { + public: + VisitForGcCheck(clang::StringRef Name, clang::tidy::ClangTidyContext *Context) + : ClangTidyCheck(Name, Context) {} + + void registerMatchers(clang::ast_matchers::MatchFinder *Finder) override { + using namespace clang::ast_matchers; + // Match every concrete record definition in the TU; we filter inside + // check(). We also match every FieldDecl so we can compute the set of + // record types that appear as fields of other records (the "used as + // member" set), which gates the Option B diagnostic. + Finder->addMatcher( + cxxRecordDecl(isDefinition(), unless(isImplicit())).bind("record"), + this); + Finder->addMatcher(fieldDecl().bind("field"), this); + } + + void onStartOfTranslationUnit() override { + records_.clear(); + usedAsField_.clear(); + holderVisitForGcVisible_.clear(); + transitivelyVisitedFields_.clear(); + sourceManager_ = nullptr; + } + + void check(const clang::ast_matchers::MatchFinder::MatchResult &Result) override { + sourceManager_ = Result.SourceManager; + + if (const auto *field = Result.Nodes.getNodeAs("field")) { + recordUsedAsField(field->getType()); + return; + } + + const auto *record = Result.Nodes.getNodeAs("record"); + if (!record) return; + + records_.push_back(record); + } + + void onEndOfTranslationUnit() override { + if (sourceManager_ == nullptr) return; + + // Only validate when the primary translation unit is an implementation + // file; .h passes can only see declarations and would yield false "no + // body" diagnostics when visitForGc is defined out-of-line in a sibling + // .c++ that this header pass cannot observe. Each header gets walked from + // every .c++ that includes it, so coverage is preserved. + auto mainFile = sourceManager_->getFileEntryRefForID(sourceManager_->getMainFileID()); + if (!mainFile || !isImplFile(mainFile->getName())) return; + + // First pass: walk every visible visitForGc body to populate + // transitivelyVisitedFields_ and holderVisitForGcVisible_. This lets us + // (a) recognize when a parent's visitForGc reaches into a nested struct + // field, and (b) restrict "used-as-field" diagnostics to TUs where some + // holder's body is actually parseable here. + for (const auto *record : records_) { + if (isInJsgNamespace(record)) continue; + if (record->getDescribedClassTemplate() != nullptr) continue; + if (llvm::isa(record)) continue; + if (record->isDependentContext()) continue; + for (const auto *method : record->methods()) { + if (method->getNameAsString() != "visitForGc") continue; + const clang::FunctionDecl *defn = nullptr; + if (!method->isDefined(defn) || defn == nullptr) continue; + llvm::DenseSet unused; + collectVisitedFields(defn->getBody(), unused); + } + } + + // Second pass: emit diagnostics for records that need them. + for (const auto *record : records_) { + checkRecord(record); + } + } + + private: + // Records we encountered in this TU and need to evaluate after the field- + // collection pass completes. + llvm::SmallVector records_; + + // CanonicalDecl* of records that appear as a field type somewhere in this + // TU. We use canonical decls so forward-declared types and their definitions + // alias; declaration redeclarations point at the same canonical decl. + llvm::DenseSet usedAsField_; + + // Records whose holder's visitForGc body is visible in this TU. A struct + // qualifies for the "used as field" diagnostic only when its holder is + // analyzable here; otherwise we'd false-positive in every TU that includes + // the header but not the holder's defining .c++. CanonicalDecl* keys. + llvm::DenseSet holderVisitForGcVisible_; + + // (FieldDecl canonical, ...) pairs marking that some outer holder's + // visitForGc body transitively reaches the given field via a member-access + // chain (e.g., `visitor.visit(s.func)` where `s` is a struct field). + // Used to suppress diagnostics on nested structs whose visitable fields are + // already covered by an enclosing record's visitForGc. + llvm::DenseSet transitivelyVisitedFields_; + + const clang::SourceManager *sourceManager_ = nullptr; + + void recordUsedAsField(clang::QualType qt) { + if (qt.isNull()) return; + qt = qt.getNonReferenceType().getUnqualifiedType(); + + if (const auto *rt = qt.getTypePtr()->getAs()) { + if (const auto *rd = llvm::dyn_cast(rt->getDecl())) { + usedAsField_.insert(rd->getCanonicalDecl()); + } + } + + // Recurse into template arguments so kj::Maybe, kj::Vector, + // etc. mark Impl as used-as-field. + if (const auto *t = qt.getTypePtr()->getAs()) { + for (const auto &arg : t->template_arguments()) { + if (arg.getKind() == clang::TemplateArgument::Type) { + recordUsedAsField(arg.getAsType()); + } + } + } + } + + void checkRecord(const clang::CXXRecordDecl *record) { + // Skip uninstantiated template definitions: primary class templates, + // partial specializations, and anything dependent. Field types in these + // are unresolved. + if (record->getDescribedClassTemplate() != nullptr) return; + if (llvm::isa(record)) return; + if (record->isDependentContext()) return; + + // Skip JSG framework internals; the check targets user resource types. + if (isInJsgNamespace(record)) return; + + llvm::SmallVector visitableFields; + for (const auto *field : record->fields()) { + if (isVisitableType(field->getType())) { + visitableFields.push_back(field); + } + } + if (visitableFields.empty()) return; + + const clang::CXXMethodDecl *visitMethod = nullptr; + for (const auto *method : record->methods()) { + if (method->getNameAsString() == "visitForGc") { + visitMethod = method; + break; + } + } + + if (!visitMethod) { + // No visitForGc on the record itself. Decide whether to diagnose: + // - If the record participates in JSG visitation (has visitForGc in + // a base class, e.g., jsg::Object's empty default), diagnose: the + // framework will dispatch to the empty default and miss the + // visitable fields. This is local-TU-decidable. + // - If the record is used as a field of another record AND some + // holder's visitForGc body in this TU reaches into it, diagnose: + // we have an authoritative view here, and any field the holder + // didn't visit is a real gap. Other TUs that include this header + // but not the holder's defining .c++ stay silent for this struct + // (deferred to whichever TU is authoritative). + // - If used as a field but no holder visitForGc is visible in this + // TU, defer — some other TU will be authoritative. + // - Standalone struct not held anywhere, no diagnostic. + bool hasBaseVisitForGc = false; + for (const auto &base : record->bases()) { + if (const auto *baseRecord = base.getType()->getAsCXXRecordDecl()) { + if (baseHasVisitForGc(baseRecord)) { + hasBaseVisitForGc = true; + break; + } + } + } + bool usedAsField = usedAsField_.count(record->getCanonicalDecl()) != 0; + bool holderVisible = + holderVisitForGcVisible_.count(record->getCanonicalDecl()) != 0; + if (!hasBaseVisitForGc && !(usedAsField && holderVisible)) return; + + for (const auto *field : visitableFields) { + // Suppress when an enclosing record's visitForGc body reaches this + // field via a member-access chain (e.g., visitor.visit(state.func) + // covers State::func from NativeHandler's body). + if (transitivelyVisitedFields_.count(field->getCanonicalDecl())) continue; + + diag(field->getLocation(), + "field '%0' of visitable type '%1' is not visited in visitForGc " + "(class has no visitForGc method)") + << field->getName() << field->getType().getAsString(); + } + return; + } + + // The class has visitForGc declared but no body visible in this TU — the + // out-of-line definition lives in a sibling .c++ that this pass cannot + // observe. Skip silently; the defining TU will check it. + const clang::FunctionDecl *defn = nullptr; + if (!visitMethod->isDefined(defn) || defn == nullptr) return; + const auto *body = defn->getBody(); + if (!body) return; + + llvm::DenseSet visitedFields; + collectVisitedFields(body, visitedFields); + + for (const auto *field : visitableFields) { + if (!visitedFields.count(field->getCanonicalDecl())) { + diag(field->getLocation(), + "field '%0' of visitable type '%1' is not visited in visitForGc") + << field->getName() << field->getType().getAsString(); + } + } + } + + // True if `record` declares its own visitForGc method or transitively + // inherits one. Uses a visited set to avoid revisiting shared bases in + // diamond hierarchies (and to defend against malformed cycles). + static bool baseHasVisitForGc(const clang::CXXRecordDecl *record) { + llvm::DenseSet visited; + return baseHasVisitForGcImpl(record, visited); + } + + static bool baseHasVisitForGcImpl( + const clang::CXXRecordDecl *record, + llvm::DenseSet &visited) { + if (record == nullptr) return false; + record = record->getDefinition(); + if (record == nullptr) return false; + if (!visited.insert(record->getCanonicalDecl()).second) return false; + for (const auto *method : record->methods()) { + if (method->getNameAsString() == "visitForGc") return true; + } + for (const auto &base : record->bases()) { + if (const auto *baseRecord = base.getType()->getAsCXXRecordDecl()) { + if (baseHasVisitForGcImpl(baseRecord, visited)) return true; + } + } + return false; + } + + void collectVisitedFields(const clang::Stmt *stmt, + llvm::DenseSet &visitedFields) { + if (!stmt) return; + + if (auto *memberExpr = llvm::dyn_cast(stmt)) { + auto *memberDecl = memberExpr->getMemberDecl(); + if (auto *fieldDecl = llvm::dyn_cast(memberDecl)) { + if (isVisitableType(fieldDecl->getType())) { + visitedFields.insert(fieldDecl->getCanonicalDecl()); + } + // Record this field as "transitively visited" from an enclosing + // record's perspective. This lets a parent's visitForGc body cover + // a nested struct's visitable fields without that nested struct + // having to declare its own visitForGc (the `NativeHandler::State` + // pattern, where `visitor.visit(state.func)` reaches `State::func` + // from NativeHandler's body). + transitivelyVisitedFields_.insert(fieldDecl->getCanonicalDecl()); + // Mark the record this field belongs to as "holder visible here", + // i.e., we have evidence about whether its fields are visited in + // this TU. Used to gate the "used-as-field" diagnostic so it only + // fires in TUs that can authoritatively answer. + if (const auto *parent = llvm::dyn_cast( + fieldDecl->getParent())) { + holderVisitForGcVisible_.insert(parent->getCanonicalDecl()); + } + } + } + + for (const auto *child : stmt->children()) { + collectVisitedFields(child, visitedFields); + } + } +}; + +class JsgLintModule : public clang::tidy::ClangTidyModule { + public: + void addCheckFactories(clang::tidy::ClangTidyCheckFactories &CheckFactories) override { + CheckFactories.registerCheck("jsg-visit-for-gc"); + } +}; + +static clang::tidy::ClangTidyModuleRegistry::Add + X("jsg-lint", "Workerd JSG static checks."); + +} // namespace jsglint +} // namespace workerd diff --git a/tools/cross/internal_build.py b/tools/cross/internal_build.py index 5dd58271c8b..c7b8f20e64b 100644 --- a/tools/cross/internal_build.py +++ b/tools/cross/internal_build.py @@ -11,7 +11,7 @@ def parse_args(): parser.add_argument("pr_id", help="Pull Request ID") parser.add_argument("merge_sha", help="Merge Commit SHA") parser.add_argument("head_sha", help="HEAD Commit SHA") - parser.add_argument("run_attempt", help="# of Run Attempt") + parser.add_argument("run_id", help="Unique ID for this CI job attempt") parser.add_argument("branch_name", help="PR's Branch Name") parser.add_argument("URL", help="URL to submit build task") parser.add_argument("client_id", help="CF Access client id") @@ -33,7 +33,7 @@ def parse_args(): "pr_id": args.pr_id, "merge_commit_sha": args.merge_sha, "head_commit_sha": args.head_sha, - "run_attempt": args.run_attempt, + "run_id": args.run_id, "branch_name": args.branch_name, } diff --git a/types/defines/cf.d.ts b/types/defines/cf.d.ts index d7784c0bcb1..ff4d6626f37 100644 --- a/types/defines/cf.d.ts +++ b/types/defines/cf.d.ts @@ -763,6 +763,32 @@ interface IncomingRequestCfPropertiesTLSClientAuth { * @example "Dec 22 19:39:00 2018 GMT" */ certNotAfter: string; + /** + * The client leaf certificate in [RFC 9440](https://www.rfc-editor.org/rfc/rfc9440) + * format (`:base64-DER:`). Empty if no client certificate was presented or if + * the leaf certificate exceeded 10 KB (see {@link certRFC9440TooLarge}). + * + * Suitable for forwarding to an origin via the `Client-Cert` HTTP header. + */ + certRFC9440: string; + /** + * `true` if the leaf certificate exceeded 10 KB and was omitted from + * {@link certRFC9440}. + */ + certRFC9440TooLarge: boolean; + /** + * The intermediate certificate chain in [RFC 9440](https://www.rfc-editor.org/rfc/rfc9440) + * format as a comma-separated list. Empty if no intermediates were sent or + * if the chain exceeded 16 KB (see {@link certChainRFC9440TooLarge}). + * + * Suitable for forwarding to an origin via the `Client-Cert-Chain` HTTP header. + */ + certChainRFC9440: string; + /** + * `true` if the intermediate chain exceeded 16 KB and was omitted from + * {@link certChainRFC9440}. + */ + certChainRFC9440TooLarge: boolean; } /** Placeholder values for TLS Client Authorization */ @@ -784,6 +810,10 @@ interface IncomingRequestCfPropertiesTLSClientAuthPlaceholder { certFingerprintSHA256: ""; certNotBefore: ""; certNotAfter: ""; + certRFC9440: ""; + certRFC9440TooLarge: false; + certChainRFC9440: ""; + certChainRFC9440TooLarge: false; } /** Possible outcomes of TLS verification */ diff --git a/types/generated-snapshot/experimental/index.d.ts b/types/generated-snapshot/experimental/index.d.ts index 9ed67679cd2..5b8135e21b7 100755 --- a/types/generated-snapshot/experimental/index.d.ts +++ b/types/generated-snapshot/experimental/index.d.ts @@ -12898,6 +12898,32 @@ interface IncomingRequestCfPropertiesTLSClientAuth { * @example "Dec 22 19:39:00 2018 GMT" */ certNotAfter: string; + /** + * The client leaf certificate in [RFC 9440](https://www.rfc-editor.org/rfc/rfc9440) + * format (`:base64-DER:`). Empty if no client certificate was presented or if + * the leaf certificate exceeded 10 KB (see {@link certRFC9440TooLarge}). + * + * Suitable for forwarding to an origin via the `Client-Cert` HTTP header. + */ + certRFC9440: string; + /** + * `true` if the leaf certificate exceeded 10 KB and was omitted from + * {@link certRFC9440}. + */ + certRFC9440TooLarge: boolean; + /** + * The intermediate certificate chain in [RFC 9440](https://www.rfc-editor.org/rfc/rfc9440) + * format as a comma-separated list. Empty if no intermediates were sent or + * if the chain exceeded 16 KB (see {@link certChainRFC9440TooLarge}). + * + * Suitable for forwarding to an origin via the `Client-Cert-Chain` HTTP header. + */ + certChainRFC9440: string; + /** + * `true` if the intermediate chain exceeded 16 KB and was omitted from + * {@link certChainRFC9440}. + */ + certChainRFC9440TooLarge: boolean; } /** Placeholder values for TLS Client Authorization */ interface IncomingRequestCfPropertiesTLSClientAuthPlaceholder { @@ -12918,6 +12944,10 @@ interface IncomingRequestCfPropertiesTLSClientAuthPlaceholder { certFingerprintSHA256: ""; certNotBefore: ""; certNotAfter: ""; + certRFC9440: ""; + certRFC9440TooLarge: false; + certChainRFC9440: ""; + certChainRFC9440TooLarge: false; } /** Possible outcomes of TLS verification */ declare type CertVerificationStatus = diff --git a/types/generated-snapshot/experimental/index.ts b/types/generated-snapshot/experimental/index.ts index 2c9e15fdefa..91373d9af55 100755 --- a/types/generated-snapshot/experimental/index.ts +++ b/types/generated-snapshot/experimental/index.ts @@ -12915,6 +12915,32 @@ export interface IncomingRequestCfPropertiesTLSClientAuth { * @example "Dec 22 19:39:00 2018 GMT" */ certNotAfter: string; + /** + * The client leaf certificate in [RFC 9440](https://www.rfc-editor.org/rfc/rfc9440) + * format (`:base64-DER:`). Empty if no client certificate was presented or if + * the leaf certificate exceeded 10 KB (see {@link certRFC9440TooLarge}). + * + * Suitable for forwarding to an origin via the `Client-Cert` HTTP header. + */ + certRFC9440: string; + /** + * `true` if the leaf certificate exceeded 10 KB and was omitted from + * {@link certRFC9440}. + */ + certRFC9440TooLarge: boolean; + /** + * The intermediate certificate chain in [RFC 9440](https://www.rfc-editor.org/rfc/rfc9440) + * format as a comma-separated list. Empty if no intermediates were sent or + * if the chain exceeded 16 KB (see {@link certChainRFC9440TooLarge}). + * + * Suitable for forwarding to an origin via the `Client-Cert-Chain` HTTP header. + */ + certChainRFC9440: string; + /** + * `true` if the intermediate chain exceeded 16 KB and was omitted from + * {@link certChainRFC9440}. + */ + certChainRFC9440TooLarge: boolean; } /** Placeholder values for TLS Client Authorization */ export interface IncomingRequestCfPropertiesTLSClientAuthPlaceholder { @@ -12935,6 +12961,10 @@ export interface IncomingRequestCfPropertiesTLSClientAuthPlaceholder { certFingerprintSHA256: ""; certNotBefore: ""; certNotAfter: ""; + certRFC9440: ""; + certRFC9440TooLarge: false; + certChainRFC9440: ""; + certChainRFC9440TooLarge: false; } /** Possible outcomes of TLS verification */ export declare type CertVerificationStatus = diff --git a/types/generated-snapshot/latest/index.d.ts b/types/generated-snapshot/latest/index.d.ts index 2a7e1831347..156c2444d87 100755 --- a/types/generated-snapshot/latest/index.d.ts +++ b/types/generated-snapshot/latest/index.d.ts @@ -12230,6 +12230,32 @@ interface IncomingRequestCfPropertiesTLSClientAuth { * @example "Dec 22 19:39:00 2018 GMT" */ certNotAfter: string; + /** + * The client leaf certificate in [RFC 9440](https://www.rfc-editor.org/rfc/rfc9440) + * format (`:base64-DER:`). Empty if no client certificate was presented or if + * the leaf certificate exceeded 10 KB (see {@link certRFC9440TooLarge}). + * + * Suitable for forwarding to an origin via the `Client-Cert` HTTP header. + */ + certRFC9440: string; + /** + * `true` if the leaf certificate exceeded 10 KB and was omitted from + * {@link certRFC9440}. + */ + certRFC9440TooLarge: boolean; + /** + * The intermediate certificate chain in [RFC 9440](https://www.rfc-editor.org/rfc/rfc9440) + * format as a comma-separated list. Empty if no intermediates were sent or + * if the chain exceeded 16 KB (see {@link certChainRFC9440TooLarge}). + * + * Suitable for forwarding to an origin via the `Client-Cert-Chain` HTTP header. + */ + certChainRFC9440: string; + /** + * `true` if the intermediate chain exceeded 16 KB and was omitted from + * {@link certChainRFC9440}. + */ + certChainRFC9440TooLarge: boolean; } /** Placeholder values for TLS Client Authorization */ interface IncomingRequestCfPropertiesTLSClientAuthPlaceholder { @@ -12250,6 +12276,10 @@ interface IncomingRequestCfPropertiesTLSClientAuthPlaceholder { certFingerprintSHA256: ""; certNotBefore: ""; certNotAfter: ""; + certRFC9440: ""; + certRFC9440TooLarge: false; + certChainRFC9440: ""; + certChainRFC9440TooLarge: false; } /** Possible outcomes of TLS verification */ declare type CertVerificationStatus = diff --git a/types/generated-snapshot/latest/index.ts b/types/generated-snapshot/latest/index.ts index 5af1c0dc329..062c4f1d408 100755 --- a/types/generated-snapshot/latest/index.ts +++ b/types/generated-snapshot/latest/index.ts @@ -12247,6 +12247,32 @@ export interface IncomingRequestCfPropertiesTLSClientAuth { * @example "Dec 22 19:39:00 2018 GMT" */ certNotAfter: string; + /** + * The client leaf certificate in [RFC 9440](https://www.rfc-editor.org/rfc/rfc9440) + * format (`:base64-DER:`). Empty if no client certificate was presented or if + * the leaf certificate exceeded 10 KB (see {@link certRFC9440TooLarge}). + * + * Suitable for forwarding to an origin via the `Client-Cert` HTTP header. + */ + certRFC9440: string; + /** + * `true` if the leaf certificate exceeded 10 KB and was omitted from + * {@link certRFC9440}. + */ + certRFC9440TooLarge: boolean; + /** + * The intermediate certificate chain in [RFC 9440](https://www.rfc-editor.org/rfc/rfc9440) + * format as a comma-separated list. Empty if no intermediates were sent or + * if the chain exceeded 16 KB (see {@link certChainRFC9440TooLarge}). + * + * Suitable for forwarding to an origin via the `Client-Cert-Chain` HTTP header. + */ + certChainRFC9440: string; + /** + * `true` if the intermediate chain exceeded 16 KB and was omitted from + * {@link certChainRFC9440}. + */ + certChainRFC9440TooLarge: boolean; } /** Placeholder values for TLS Client Authorization */ export interface IncomingRequestCfPropertiesTLSClientAuthPlaceholder { @@ -12267,6 +12293,10 @@ export interface IncomingRequestCfPropertiesTLSClientAuthPlaceholder { certFingerprintSHA256: ""; certNotBefore: ""; certNotAfter: ""; + certRFC9440: ""; + certRFC9440TooLarge: false; + certChainRFC9440: ""; + certChainRFC9440TooLarge: false; } /** Possible outcomes of TLS verification */ export declare type CertVerificationStatus =