diff --git a/config.example.yaml b/config.example.yaml index 7d4f3c5..e8a1c01 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -108,3 +108,29 @@ storage: log: # Log level: debug, info, warn, error level: "info" + +# ms3t — embedded S3-compatible HTTP server backed by an MST. When +# enabled, sprue exposes an S3 listener alongside its UCAN endpoint; +# objects are stored as content-addressed blocks in a local SQLite +# blockstore and (optionally) shipped to a Forge stack via guppy. +ms3t: + enabled: false + # host:port for the S3 listener. Path-style addressing only; clients + # must set forcePathStyle=true and disable streaming uploads + # (request_checksum_calculation=when_required for AWS CLI v2.23+). + addr: ":9000" + # ms3t persists its log segments and space keypair under this + # directory. Created if missing. + data_dir: "./ms3t-data" + # Body chunk size in bytes for new objects. Default 1 MiB. + chunk_size: 1048576 + # Log: seal the open segment when its on-disk size reaches this + # threshold. Default 64 MiB. + seal_bytes: 67108864 + # Log: seal the open segment when it has been open this long, even + # if size has not been reached. Default 5s. + seal_age: "5s" + # Log: number of most-recent sealed segments to keep on disk after + # they have been shipped to Forge. Older flushed segments are + # unlinked. Higher values trade disk for read locality. Default 6. + retain: 6 diff --git a/go.mod b/go.mod index 6da5e59..83a2619 100644 --- a/go.mod +++ b/go.mod @@ -3,18 +3,19 @@ module github.com/storacha/sprue go 1.25.3 require ( - github.com/aws/aws-sdk-go-v2 v1.41.3 - github.com/aws/aws-sdk-go-v2/config v1.32.11 - github.com/aws/aws-sdk-go-v2/credentials v1.19.11 + github.com/aws/aws-sdk-go-v2 v1.41.6 + github.com/aws/aws-sdk-go-v2/config v1.32.16 + github.com/aws/aws-sdk-go-v2/credentials v1.19.15 github.com/aws/aws-sdk-go-v2/feature/dynamodb/attributevalue v1.20.34 github.com/aws/aws-sdk-go-v2/service/dynamodb v1.56.1 - github.com/aws/aws-sdk-go-v2/service/s3 v1.96.4 + github.com/aws/aws-sdk-go-v2/service/s3 v1.99.1 github.com/google/uuid v1.6.0 + github.com/hashicorp/golang-lru/arc/v2 v2.0.7 github.com/ipfs/go-cid v0.6.0 github.com/ipfs/go-log/v2 v2.9.0 github.com/ipld/go-ipld-prime v0.21.1-0.20240917223228-6148356a4c2e github.com/jackc/pgx/v5 v5.9.1 - github.com/labstack/echo/v4 v4.14.0 + github.com/labstack/echo/v4 v4.15.0 github.com/multiformats/go-multiaddr v0.16.0 github.com/multiformats/go-multibase v0.2.0 github.com/multiformats/go-multihash v0.2.3 @@ -23,25 +24,54 @@ require ( github.com/spf13/cobra v1.10.2 github.com/spf13/viper v1.21.0 github.com/storacha/go-libstoracha v0.7.5 - github.com/storacha/go-ucanto v0.7.2 + github.com/storacha/go-ucanto v0.8.2 + github.com/storacha/guppy v0.7.0 + github.com/storacha/indexing-service v1.12.2 github.com/stretchr/testify v1.11.1 github.com/testcontainers/testcontainers-go v0.42.0 github.com/testcontainers/testcontainers-go/modules/dynamodb v0.41.0 github.com/testcontainers/testcontainers-go/modules/minio v0.40.0 github.com/testcontainers/testcontainers-go/modules/postgres v0.42.0 + github.com/versity/versitygw v1.4.1 go.uber.org/fx v1.24.0 go.uber.org/zap v1.27.0 ) require ( + github.com/Azure/go-ntlmssp v0.1.0 // indirect + github.com/DataDog/datadog-go/v5 v5.8.3 // indirect + github.com/andybalholm/brotli v1.2.1 // indirect + github.com/aws/aws-sdk-go-v2/feature/s3/transfermanager v0.1.17 // indirect + github.com/cenkalti/backoff/v5 v5.0.3 // indirect + github.com/go-asn1-ber/asn1-ber v1.5.8-0.20250403174932-29230038a667 // indirect + github.com/go-ldap/ldap/v3 v3.4.13 // indirect + github.com/gofiber/fiber/v2 v2.52.12 // indirect + github.com/hashicorp/go-cleanhttp v0.5.2 // indirect + github.com/hashicorp/go-retryablehttp v0.7.8 // indirect + github.com/hashicorp/go-rootcerts v1.0.2 // indirect + github.com/hashicorp/go-secure-stdlib/strutil v0.1.2 // indirect + github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect + github.com/hashicorp/vault-client-go v0.4.3 // indirect + github.com/ipfs/boxo v0.30.0 // indirect github.com/jackc/pgpassfile v1.0.0 // indirect github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect github.com/jackc/puddle/v2 v2.2.2 // indirect github.com/mfridman/interpolate v0.0.2 // indirect + github.com/mitchellh/go-homedir v1.1.0 // indirect github.com/moby/moby/api v1.54.1 // indirect github.com/moby/moby/client v0.4.0 // indirect github.com/moby/sys/atomicwriter v0.1.0 // indirect + github.com/nats-io/nats.go v1.51.0 // indirect + github.com/nats-io/nkeys v0.4.15 // indirect + github.com/nats-io/nuid v1.0.1 // indirect + github.com/pierrec/lz4/v4 v4.1.26 // indirect + github.com/rabbitmq/amqp091-go v1.10.0 // indirect + github.com/ryanuber/go-glob v1.0.0 // indirect + github.com/segmentio/kafka-go v0.4.50 // indirect github.com/sethvargo/go-retry v0.3.0 // indirect + github.com/smira/go-statsd v1.3.4 // indirect + github.com/stretchr/objx v0.5.3 // indirect + github.com/valyala/fasthttp v1.70.0 // indirect go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.43.0 // indirect go.opentelemetry.io/otel/sdk v1.43.0 // indirect go.opentelemetry.io/otel/sdk/metric v1.43.0 // indirect @@ -52,26 +82,25 @@ require ( dario.cat/mergo v1.0.2 // indirect github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c // indirect github.com/Microsoft/go-winio v0.6.2 // indirect - github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.6 // indirect - github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.19 // indirect - github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.19 // indirect - github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.19 // indirect - github.com/aws/aws-sdk-go-v2/internal/ini v1.8.5 // indirect - github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.20 // indirect + github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.9 // indirect + github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.22 // indirect + github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.22 // indirect + github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.22 // indirect + github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.23 // indirect github.com/aws/aws-sdk-go-v2/service/dynamodbstreams v1.32.12 // indirect - github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.6 // indirect - github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.9.11 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.8 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.9.14 // indirect github.com/aws/aws-sdk-go-v2/service/internal/endpoint-discovery v1.11.19 // indirect - github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.19 // indirect - github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.19 // indirect - github.com/aws/aws-sdk-go-v2/service/signin v1.0.7 // indirect - github.com/aws/aws-sdk-go-v2/service/sso v1.30.12 // indirect - github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.16 // indirect - github.com/aws/aws-sdk-go-v2/service/sts v1.41.8 // indirect - github.com/aws/smithy-go v1.24.2 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.22 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.22 // indirect + github.com/aws/aws-sdk-go-v2/service/signin v1.0.10 // indirect + github.com/aws/aws-sdk-go-v2/service/sso v1.30.16 // indirect + github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.20 // indirect + github.com/aws/aws-sdk-go-v2/service/sts v1.42.0 // indirect + github.com/aws/smithy-go v1.25.0 // indirect github.com/cenkalti/backoff/v4 v4.3.0 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect - github.com/clipperhouse/uax29/v2 v2.6.0 // indirect + github.com/clipperhouse/uax29/v2 v2.7.0 // indirect github.com/containerd/errdefs v1.0.0 // indirect github.com/containerd/errdefs/pkg v0.3.0 // indirect github.com/containerd/log v0.1.0 // indirect @@ -86,45 +115,45 @@ require ( github.com/ebitengine/purego v0.10.0 // indirect github.com/felixge/httpsnoop v1.0.4 // indirect github.com/filecoin-project/go-data-segment v0.0.1 // indirect - github.com/filecoin-project/go-fil-commcid v0.2.0 // indirect + github.com/filecoin-project/go-fil-commcid v0.3.1 // indirect github.com/filecoin-project/go-fil-commp-hashhash v0.2.0 // indirect github.com/fsnotify/fsnotify v1.9.0 // indirect github.com/go-logr/logr v1.4.3 // indirect github.com/go-logr/stdr v1.2.2 // indirect - github.com/go-ole/go-ole v1.2.6 // indirect + github.com/go-ole/go-ole v1.3.0 // indirect github.com/go-viper/mapstructure/v2 v2.4.0 // indirect github.com/gobwas/glob v0.2.3 // indirect github.com/gogo/protobuf v1.3.2 // indirect github.com/hashicorp/golang-lru v1.0.2 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/ipfs/bbloom v0.0.4 // indirect - github.com/ipfs/go-block-format v0.2.0 // indirect + github.com/ipfs/go-block-format v0.2.1 github.com/ipfs/go-blockservice v0.5.2 // indirect github.com/ipfs/go-datastore v0.9.0 // indirect github.com/ipfs/go-ipfs-blockstore v1.3.1 // indirect github.com/ipfs/go-ipfs-ds-help v1.1.1 // indirect github.com/ipfs/go-ipfs-exchange-interface v0.2.1 // indirect github.com/ipfs/go-ipfs-util v0.0.3 // indirect - github.com/ipfs/go-ipld-cbor v0.1.0 // indirect - github.com/ipfs/go-ipld-format v0.6.0 // indirect + github.com/ipfs/go-ipld-cbor v0.2.0 + github.com/ipfs/go-ipld-format v0.6.1 // indirect github.com/ipfs/go-ipld-legacy v0.2.1 // indirect github.com/ipfs/go-log v1.0.5 // indirect github.com/ipfs/go-merkledag v0.11.0 // indirect - github.com/ipfs/go-metrics-interface v0.0.1 // indirect + github.com/ipfs/go-metrics-interface v0.3.0 // indirect github.com/ipfs/go-verifcid v0.0.3 // indirect github.com/ipld/go-car v0.6.2 // indirect - github.com/ipld/go-codec-dagpb v1.6.0 // indirect + github.com/ipld/go-codec-dagpb v1.7.0 // indirect github.com/ipni/go-libipni v0.6.18 // indirect github.com/klauspost/compress v1.18.5 // indirect - github.com/klauspost/cpuid/v2 v2.2.10 // indirect + github.com/klauspost/cpuid/v2 v2.3.0 // indirect github.com/labstack/gommon v0.4.2 // indirect github.com/libp2p/go-buffer-pool v0.1.0 // indirect github.com/libp2p/go-libp2p v0.41.1 // indirect - github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 // indirect + github.com/lufia/plan9stats v0.0.0-20240513124658-fba389f38bae // indirect github.com/magiconair/properties v1.8.10 // indirect github.com/mattn/go-colorable v0.1.14 // indirect - github.com/mattn/go-isatty v0.0.20 // indirect - github.com/mattn/go-runewidth v0.0.19 // indirect + github.com/mattn/go-isatty v0.0.21 // indirect + github.com/mattn/go-runewidth v0.0.23 // indirect github.com/minio/sha256-simd v1.0.1 // indirect github.com/moby/docker-image-spec v1.3.1 // indirect github.com/moby/go-archive v0.2.0 // indirect @@ -136,14 +165,13 @@ require ( github.com/mr-tron/base58 v1.2.0 // indirect github.com/multiformats/go-base32 v0.1.0 // indirect github.com/multiformats/go-base36 v0.2.0 // indirect - github.com/multiformats/go-multicodec v0.9.1 // indirect + github.com/multiformats/go-multicodec v0.9.2 github.com/multiformats/go-varint v0.1.0 // indirect github.com/opencontainers/go-digest v1.0.0 // indirect github.com/opencontainers/image-spec v1.1.1 // indirect github.com/opentracing/opentracing-go v1.2.0 // indirect github.com/pelletier/go-toml/v2 v2.2.4 // indirect github.com/pion/datachannel v1.6.0 // indirect - github.com/pion/logging v0.2.4 // indirect github.com/pion/sctp v1.9.2 // indirect github.com/pion/webrtc/v4 v4.2.9 // indirect github.com/pkg/errors v0.9.1 // indirect @@ -164,7 +192,7 @@ require ( github.com/ucan-wg/go-ucan v0.0.0-20240916120445-37f52863156c // indirect github.com/valyala/bytebufferpool v1.0.0 // indirect github.com/valyala/fasttemplate v1.2.2 // indirect - github.com/whyrusleeping/cbor-gen v0.3.1 // indirect + github.com/whyrusleeping/cbor-gen v0.3.1 github.com/yusufpapurcu/wmi v1.2.4 // indirect go.opentelemetry.io/auto/sdk v1.2.1 // indirect go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.67.0 // indirect @@ -175,13 +203,13 @@ require ( go.uber.org/dig v1.19.0 // indirect go.uber.org/multierr v1.11.0 // indirect go.yaml.in/yaml/v3 v3.0.4 // indirect - golang.org/x/crypto v0.49.0 // indirect - golang.org/x/exp v0.0.0-20260218203240-3dfff04db8fa // indirect - golang.org/x/net v0.52.0 // indirect - golang.org/x/sys v0.42.0 // indirect - golang.org/x/text v0.35.0 // indirect - golang.org/x/time v0.14.0 // indirect - golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da // indirect + golang.org/x/crypto v0.50.0 // indirect + golang.org/x/exp v0.0.0-20260312153236-7ab1446f8b90 // indirect + golang.org/x/net v0.53.0 // indirect + golang.org/x/sys v0.43.0 // indirect + golang.org/x/text v0.36.0 // indirect + golang.org/x/time v0.15.0 // indirect + golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da google.golang.org/genproto/googleapis/api v0.0.0-20260401024825-9d38bb4040a9 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20260401024825-9d38bb4040a9 // indirect google.golang.org/protobuf v1.36.11 // indirect diff --git a/go.sum b/go.sum index 7dc4c21..ace4b18 100644 --- a/go.sum +++ b/go.sum @@ -43,60 +43,71 @@ github.com/AdaLogics/go-fuzz-headers v0.0.0-20240806141605-e8a1dd7889d6 h1:He8af github.com/AdaLogics/go-fuzz-headers v0.0.0-20240806141605-e8a1dd7889d6/go.mod h1:8o94RPi1/7XTJvwPpRSzSUedZrtlirdB3r9Z20bi2f8= github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c h1:udKWzYgxTojEKWjV8V+WSxDXJ4NFATAsZjh8iIbsQIg= github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E= +github.com/Azure/go-ntlmssp v0.1.0 h1:DjFo6YtWzNqNvQdrwEyr/e4nhU3vRiwenz5QX7sFz+A= +github.com/Azure/go-ntlmssp v0.1.0/go.mod h1:NYqdhxd/8aAct/s4qSYZEerdPuH1liG2/X9DiVTbhpk= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= +github.com/DataDog/datadog-go/v5 v5.8.3 h1:s58CUJ9s8lezjhTNJO/SxkPBv2qZjS3ktpRSqGF5n0s= +github.com/DataDog/datadog-go/v5 v5.8.3/go.mod h1:K9kcYBlxkcPP8tvvjZZKs/m1edNAUFzBbdpTUKfCsuw= +github.com/Microsoft/go-winio v0.5.0/go.mod h1:JPGBdM1cNvN/6ISo+n8V5iA4v8pBzdOpzfwIujj1a84= github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY= github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU= +github.com/alexbrainman/sspi v0.0.0-20250919150558-7d374ff0d59e h1:4dAU9FXIyQktpoUAgOJK3OTFc/xug0PCXYCqU0FgDKI= +github.com/alexbrainman/sspi v0.0.0-20250919150558-7d374ff0d59e/go.mod h1:cEWa1LVoE5KvSD9ONXsZrj0z6KqySlCCNKHlLzbqAt4= +github.com/andybalholm/brotli v1.2.1 h1:R+f5xP285VArJDRgowrfb9DqL18yVK0gKAW/F+eTWro= +github.com/andybalholm/brotli v1.2.1/go.mod h1:rzTDkvFWvIrjDXZHkuS16NPggd91W3kUSvPlQ1pLaKY= github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY= github.com/armon/circbuf v0.0.0-20150827004946-bbbad097214e/go.mod h1:3U/XgcO3hCbHZ8TKRvWD2dDTCfh9M9ya+I9JpbB7O8o= github.com/armon/go-metrics v0.0.0-20180917152333-f0300d1749da/go.mod h1:Q73ZrmVTwzkszR9V5SSuryQ31EELlFMUz1kKyl939pY= github.com/armon/go-radix v0.0.0-20180808171621-7fddfc383310/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8= -github.com/aws/aws-sdk-go-v2 v1.41.3 h1:4kQ/fa22KjDt13QCy1+bYADvdgcxpfH18f0zP542kZA= -github.com/aws/aws-sdk-go-v2 v1.41.3/go.mod h1:mwsPRE8ceUUpiTgF7QmQIJ7lgsKUPQOUl3o72QBrE1o= -github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.6 h1:N4lRUXZpZ1KVEUn6hxtco/1d2lgYhNn1fHkkl8WhlyQ= -github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.6/go.mod h1:lyw7GFp3qENLh7kwzf7iMzAxDn+NzjXEAGjKS2UOKqI= -github.com/aws/aws-sdk-go-v2/config v1.32.11 h1:ftxI5sgz8jZkckuUHXfC/wMUc8u3fG1vQS0plr2F2Zs= -github.com/aws/aws-sdk-go-v2/config v1.32.11/go.mod h1:twF11+6ps9aNRKEDimksp923o44w/Thk9+8YIlzWMmo= -github.com/aws/aws-sdk-go-v2/credentials v1.19.11 h1:NdV8cwCcAXrCWyxArt58BrvZJ9pZ9Fhf9w6Uh5W3Uyc= -github.com/aws/aws-sdk-go-v2/credentials v1.19.11/go.mod h1:30yY2zqkMPdrvxBqzI9xQCM+WrlrZKSOpSJEsylVU+8= +github.com/asaskevich/EventBus v0.0.0-20200907212545-49d423059eef h1:2JGTg6JapxP9/R33ZaagQtAM4EkkSYnIAlOG5EI8gkM= +github.com/asaskevich/EventBus v0.0.0-20200907212545-49d423059eef/go.mod h1:JS7hed4L1fj0hXcyEejnW57/7LCetXggd+vwrRnYeII= +github.com/aws/aws-sdk-go-v2 v1.41.6 h1:1AX0AthnBQzMx1vbmir3Y4WsnJgiydmnJjiLu+LvXOg= +github.com/aws/aws-sdk-go-v2 v1.41.6/go.mod h1:dy0UzBIfwSeot4grGvY1AqFWN5zgziMmWGzysDnHFcQ= +github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.9 h1:adBsCIIpLbLmYnkQU+nAChU5yhVTvu5PerROm+/Kq2A= +github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.9/go.mod h1:uOYhgfgThm/ZyAuJGNQ5YgNyOlYfqnGpTHXvk3cpykg= +github.com/aws/aws-sdk-go-v2/config v1.32.16 h1:Q0iQ7quUgJP0F/SCRTieScnaMdXr9h/2+wze1u3cNeM= +github.com/aws/aws-sdk-go-v2/config v1.32.16/go.mod h1:duCCnJEFqpt2RC6no1iK6q+8HpwOAkiUua0pY507dQc= +github.com/aws/aws-sdk-go-v2/credentials v1.19.15 h1:fyvgWTszojq8hEnMi8PPBTvZdTtEVmAVyo+NFLHBhH4= +github.com/aws/aws-sdk-go-v2/credentials v1.19.15/go.mod h1:gJiYyMOjNg8OEdRWOf3CrFQxM2a98qmrtjx1zuiQfB8= github.com/aws/aws-sdk-go-v2/feature/dynamodb/attributevalue v1.20.34 h1:gBoK/UF+CltS2dkNgpUwEROtNBtAsVCfWqIi+0qRDVA= github.com/aws/aws-sdk-go-v2/feature/dynamodb/attributevalue v1.20.34/go.mod h1:B4x2ogC2wSey/swvEainiBzLXiY89+xJaa85vcJFvD8= -github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.19 h1:INUvJxmhdEbVulJYHI061k4TVuS3jzzthNvjqvVvTKM= -github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.19/go.mod h1:FpZN2QISLdEBWkayloda+sZjVJL+e9Gl0k1SyTgcswU= -github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.19 h1:/sECfyq2JTifMI2JPyZ4bdRN77zJmr6SrS1eL3augIA= -github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.19/go.mod h1:dMf8A5oAqr9/oxOfLkC/c2LU/uMcALP0Rgn2BD5LWn0= -github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.19 h1:AWeJMk33GTBf6J20XJe6qZoRSJo0WfUhsMdUKhoODXE= -github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.19/go.mod h1:+GWrYoaAsV7/4pNHpwh1kiNLXkKaSoppxQq9lbH8Ejw= -github.com/aws/aws-sdk-go-v2/internal/ini v1.8.5 h1:clHU5fm//kWS1C2HgtgWxfQbFbx4b6rx+5jzhgX9HrI= -github.com/aws/aws-sdk-go-v2/internal/ini v1.8.5/go.mod h1:O3h0IK87yXci+kg6flUKzJnWeziQUKciKrLjcatSNcY= -github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.20 h1:qi3e/dmpdONhj1RyIZdi6DKKpDXS5Lb8ftr3p7cyHJc= -github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.20/go.mod h1:V1K+TeJVD5JOk3D9e5tsX2KUdL7BlB+FV6cBhdobN8c= +github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.22 h1:IOGsJ1xVWhsi+ZO7/NW8OuZZBtMJLZbk4P5HDjJO0jQ= +github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.22/go.mod h1:b+hYdbU+jGKfXE8kKM6g1+h+L/Go3vMvzlxBsiuGsxg= +github.com/aws/aws-sdk-go-v2/feature/s3/transfermanager v0.1.17 h1:95y7/EqethAhFwMKJ9cDutzBhsS1h8uBwkJ5rp8pNTU= +github.com/aws/aws-sdk-go-v2/feature/s3/transfermanager v0.1.17/go.mod h1:77baheqr62SkTw77HWH8qpdWTd2gXKN0xg0qLvDSkpk= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.22 h1:GmLa5Kw1ESqtFpXsx5MmC84QWa/ZrLZvlJGa2y+4kcQ= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.22/go.mod h1:6sW9iWm9DK9YRpRGga/qzrzNLgKpT2cIxb7Vo2eNOp0= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.22 h1:dY4kWZiSaXIzxnKlj17nHnBcXXBfac6UlsAx2qL6XrU= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.22/go.mod h1:KIpEUx0JuRZLO7U6cbV204cWAEco2iC3l061IxlwLtI= +github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.23 h1:FPXsW9+gMuIeKmz7j6ENWcWtBGTe1kH8r9thNt5Uxx4= +github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.23/go.mod h1:7J8iGMdRKk6lw2C+cMIphgAnT8uTwBwNOsGkyOCm80U= github.com/aws/aws-sdk-go-v2/service/dynamodb v1.56.1 h1:EkW4NqA2mwCkL7YCDYh6OpA/bCMhKYbZgpRHt2FD2Ow= github.com/aws/aws-sdk-go-v2/service/dynamodb v1.56.1/go.mod h1:OQp5333OH1IjmJmJpTU4IwoaOoCMnDrThg0zIx169rE= github.com/aws/aws-sdk-go-v2/service/dynamodbstreams v1.32.12 h1:EhZjf2GKn/V3yPfYmUGdYmrcbxaGu2LO0M6ZrOt/qu8= github.com/aws/aws-sdk-go-v2/service/dynamodbstreams v1.32.12/go.mod h1:KPi0H5VdX4011P0gF806TZt8EiP3FkeRkt6+lzMUvxQ= -github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.6 h1:XAq62tBTJP/85lFD5oqOOe7YYgWxY9LvWq8plyDvDVg= -github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.6/go.mod h1:x0nZssQ3qZSnIcePWLvcoFisRXJzcTVvYpAAdYX8+GI= -github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.9.11 h1:BYf7XNsJMzl4mObARUBUib+j2tf0U//JAAtTnYqvqCw= -github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.9.11/go.mod h1:aEUS4WrNk/+FxkBZZa7tVgp4pGH+kFGW40Y8rCPqt5g= +github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.8 h1:HtOTYcbVcGABLOVuPYaIihj6IlkqubBwFj10K5fxRek= +github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.8/go.mod h1:VsK9abqQeGlzPgUr+isNWzPlK2vKe9INMLWnY65f5Xs= +github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.9.14 h1:xnvDEnw+pnj5mctWiYuFbigrEzSm35x7k4KS/ZkCANg= +github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.9.14/go.mod h1:yS5rNogD8e0Wu9+l3MUwr6eENBzEeGejvINpN5PAYfY= github.com/aws/aws-sdk-go-v2/service/internal/endpoint-discovery v1.11.19 h1:jdCj9vbCXwzTcIJX+MVd2UdssFhRJFTrWlPZwZB8Hpk= github.com/aws/aws-sdk-go-v2/service/internal/endpoint-discovery v1.11.19/go.mod h1:Dgg2d5WGRr7YB8JJsELskBxLUhgwWppXPwlvmuQKhbc= -github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.19 h1:X1Tow7suZk9UCJHE1Iw9GMZJJl0dAnKXXP1NaSDHwmw= -github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.19/go.mod h1:/rARO8psX+4sfjUQXp5LLifjUt8DuATZ31WptNJTyQA= -github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.19 h1:JnQeStZvPHFHeyky/7LbMlyQjUa+jIBj36OlWm0pzIk= -github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.19/go.mod h1:HGyasyHvYdFQeJhvDHfH7HXkHh57htcJGKDZ+7z+I24= -github.com/aws/aws-sdk-go-v2/service/s3 v1.96.4 h1:4ExZyubQ6LQQVuF2Qp9OsfEvsTdAWh5Gfwf6PgIdLdk= -github.com/aws/aws-sdk-go-v2/service/s3 v1.96.4/go.mod h1:NF3JcMGOiARAss1ld3WGORCw71+4ExDD2cbbdKS5PpA= -github.com/aws/aws-sdk-go-v2/service/signin v1.0.7 h1:Y2cAXlClHsXkkOvWZFXATr34b0hxxloeQu/pAZz2row= -github.com/aws/aws-sdk-go-v2/service/signin v1.0.7/go.mod h1:idzZ7gmDeqeNrSPkdbtMp9qWMgcBwykA7P7Rzh5DXVU= -github.com/aws/aws-sdk-go-v2/service/sso v1.30.12 h1:iSsvB9EtQ09YrsmIc44Heqlx5ByGErqhPK1ZQLppias= -github.com/aws/aws-sdk-go-v2/service/sso v1.30.12/go.mod h1:fEWYKTRGoZNl8tZ77i61/ccwOMJdGxwOhWCkp6TXAr0= -github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.16 h1:EnUdUqRP1CNzt2DkV67tJx6XDN4xlfBFm+bzeNOQVb0= -github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.16/go.mod h1:Jic/xv0Rq/pFNCh3WwpH4BEqdbSAl+IyHro8LbibHD8= -github.com/aws/aws-sdk-go-v2/service/sts v1.41.8 h1:XQTQTF75vnug2TXS8m7CVJfC2nniYPZnO1D4Np761Oo= -github.com/aws/aws-sdk-go-v2/service/sts v1.41.8/go.mod h1:Xgx+PR1NUOjNmQY+tRMnouRp83JRM8pRMw/vCaVhPkI= -github.com/aws/smithy-go v1.24.2 h1:FzA3bu/nt/vDvmnkg+R8Xl46gmzEDam6mZ1hzmwXFng= -github.com/aws/smithy-go v1.24.2/go.mod h1:YE2RhdIuDbA5E5bTdciG9KrW3+TiEONeUWCqxX9i1Fc= +github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.22 h1:PUmZeJU6Y1Lbvt9WFuJ0ugUK2xn6hIWUBBbKuOWF30s= +github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.22/go.mod h1:nO6egFBoAaoXze24a2C0NjQCvdpk8OueRoYimvEB9jo= +github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.22 h1:SE+aQ4DEqG53RRCAIHlCf//B2ycxGH7jFkpnAh/kKPM= +github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.22/go.mod h1:ES3ynECd7fYeJIL6+oax+uIEljmfps0S70BaQzbMd/o= +github.com/aws/aws-sdk-go-v2/service/s3 v1.99.1 h1:kU/eBN5+MWNo/LcbNa4hWDdN76hdcd7hocU5kvu7IsU= +github.com/aws/aws-sdk-go-v2/service/s3 v1.99.1/go.mod h1:Fw9aqhJicIVee1VytBBjH+l+5ov6/PhbtIK/u3rt/ls= +github.com/aws/aws-sdk-go-v2/service/signin v1.0.10 h1:a1Fq/KXn75wSzoJaPQTgZO0wHGqE9mjFnylnqEPTchA= +github.com/aws/aws-sdk-go-v2/service/signin v1.0.10/go.mod h1:p6+MXNxW7IA6dMgHfTAzljuwSKD0NCm/4lbS4t6+7vI= +github.com/aws/aws-sdk-go-v2/service/sso v1.30.16 h1:x6bKbmDhsgSZwv6q19wY/u3rLk/3FGjJWyqKcIRufpE= +github.com/aws/aws-sdk-go-v2/service/sso v1.30.16/go.mod h1:CudnEVKRtLn0+3uMV0yEXZ+YZOKnAtUJ5DmDhilVnIw= +github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.20 h1:oK/njaL8GtyEihkWMD4k3VgHCT64RQKkZwh0DG5j8ak= +github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.20/go.mod h1:JHs8/y1f3zY7U5WcuzoJ/yAYGYtNIVPKLIbp61euvmg= +github.com/aws/aws-sdk-go-v2/service/sts v1.42.0 h1:ks8KBcZPh3PYISr5dAiXCM5/Thcuxk8l+PG4+A0exds= +github.com/aws/aws-sdk-go-v2/service/sts v1.42.0/go.mod h1:pFw33T0WLvXU3rw1WBkpMlkgIn54eCB5FYLhjDc9Foo= +github.com/aws/smithy-go v1.25.0 h1:Sz/XJ64rwuiKtB6j98nDIPyYrV1nVNJ4YU74gttcl5U= +github.com/aws/smithy-go v1.25.0/go.mod h1:YE2RhdIuDbA5E5bTdciG9KrW3+TiEONeUWCqxX9i1Fc= github.com/benbjohnson/clock v1.3.5 h1:VvXlSJBzZpA/zum6Sj74hxwYI2DIxRWuNIoXAzHZz5o= github.com/benbjohnson/clock v1.3.5/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= @@ -114,8 +125,8 @@ github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWR github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= -github.com/clipperhouse/uax29/v2 v2.6.0 h1:z0cDbUV+aPASdFb2/ndFnS9ts/WNXgTNNGFoKXuhpos= -github.com/clipperhouse/uax29/v2 v2.6.0/go.mod h1:Wn1g7MK6OoeDT0vL+Q0SQLDz/KpfsVRgg6W7ihQeh4g= +github.com/clipperhouse/uax29/v2 v2.7.0 h1:+gs4oBZ2gPfVrKPthwbMzWZDaAFPGYK72F0NJv2v7Vk= +github.com/clipperhouse/uax29/v2 v2.7.0/go.mod h1:EFJ2TJMRUaplDxHKj1qAEhCtQPW2tJSwu5BF98AuoVM= github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= github.com/cncf/udpa/go v0.0.0-20200629203442-efcf912fb354/go.mod h1:WmhPx2Nbnhtbo57+VJT5O0JRkEi1Wbu0z5j0R8u5Hbk= github.com/cncf/udpa/go v0.0.0-20201120205902-5459f2c99403/go.mod h1:WmhPx2Nbnhtbo57+VJT5O0JRkEi1Wbu0z5j0R8u5Hbk= @@ -167,12 +178,16 @@ github.com/envoyproxy/go-control-plane v0.9.9-0.20201210154907-fd9021fe5dad/go.m github.com/envoyproxy/go-control-plane v0.9.9-0.20210217033140-668b12f5399d/go.mod h1:cXg6YxExXjJnVBQHBLXeUAgxn2UodCpnH306RInaBQk= github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4= +github.com/fatih/color v1.16.0 h1:zmkK9Ngbjj+K0yRhTVONQh1p/HknKYSlNT+vZCzyokM= +github.com/fatih/color v1.16.0/go.mod h1:fL2Sau1YI5c0pdGEVCbKQbLXB6edEj1ZgiY4NijnWvE= github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg= github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= +github.com/filecoin-project/go-clock v0.1.0 h1:SFbYIM75M8NnFm1yMHhN9Ahy3W5bEZV9gd6MPfXbKVU= +github.com/filecoin-project/go-clock v0.1.0/go.mod h1:4uB/O4PvOjlx1VCMdZ9MyDZXRm//gkj1ELEbxfI1AZs= github.com/filecoin-project/go-data-segment v0.0.1 h1:1wmDxOG4ubWQm3ZC1XI5nCon5qgSq7Ra3Rb6Dbu10Gs= github.com/filecoin-project/go-data-segment v0.0.1/go.mod h1:H0/NKbsRxmRFBcLibmABv+yFNHdmtl5AyplYLnb0Zv4= -github.com/filecoin-project/go-fil-commcid v0.2.0 h1:B+5UX8XGgdg/XsdUpST4pEBviKkFOw+Fvl2bLhSKGpI= -github.com/filecoin-project/go-fil-commcid v0.2.0/go.mod h1:8yigf3JDIil+/WpqR5zoKyP0jBPCOGtEqq/K1CcMy9Q= +github.com/filecoin-project/go-fil-commcid v0.3.1 h1:4EfxpHSlvtkOqa9weG2Yt5kxFmPib2xU7Uc9Lbqk7fs= +github.com/filecoin-project/go-fil-commcid v0.3.1/go.mod h1:z7Ssf8d7kspF9QRAVHDbZ+43JK4mkhbGH5lyph1TnKY= github.com/filecoin-project/go-fil-commp-hashhash v0.2.0 h1:HYIUugzjq78YvV3vC6rL95+SfC/aSTVSnZSZiDV5pCk= github.com/filecoin-project/go-fil-commp-hashhash v0.2.0/go.mod h1:VH3fAFOru4yyWar4626IoS5+VGE8SfZiBODJLUigEo4= github.com/flynn/noise v1.1.0 h1:KjPQoQCEFdZDiP03phOvGi11+SVVhBG2wOWAorLsstg= @@ -185,16 +200,21 @@ github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4 github.com/fsnotify/fsnotify v1.9.0 h1:2Ml+OJNzbYCTzsxtv8vKSFD9PbJjmhYF14k/jKC7S9k= github.com/fsnotify/fsnotify v1.9.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0= github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= +github.com/go-asn1-ber/asn1-ber v1.5.8-0.20250403174932-29230038a667 h1:BP4M0CvQ4S3TGls2FvczZtj5Re/2ZzkV9VwqPHH/3Bo= +github.com/go-asn1-ber/asn1-ber v1.5.8-0.20250403174932-29230038a667/go.mod h1:hEBeB/ic+5LoWskz+yKT7vGhhPYkProFKoKdwZRWMe0= github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU= github.com/go-gl/glfw/v3.3/glfw v0.0.0-20191125211704-12ad95a8df72/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= github.com/go-gl/glfw/v3.3/glfw v0.0.0-20200222043503-6f7a984d4dc4/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= +github.com/go-ldap/ldap/v3 v3.4.13 h1:+x1nG9h+MZN7h/lUi5Q3UZ0fJ1GyDQYbPvbuH38baDQ= +github.com/go-ldap/ldap/v3 v3.4.13/go.mod h1:LxsGZV6vbaK0sIvYfsv47rfh4ca0JXokCoKjZxsszv0= github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI= github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= -github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY= github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0= +github.com/go-ole/go-ole v1.3.0 h1:Dt6ye7+vXGIKZ7Xtk4s6/xVdGDQynvom7xCFEdWr6uE= +github.com/go-ole/go-ole v1.3.0/go.mod h1:5LS6F96DhAwUc7C+1HLexzMXY1xGRSryjyPPKW6zv78= github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI= github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8= github.com/go-viper/mapstructure/v2 v2.4.0 h1:EBsztssimR/CONLSZZ04E8qAkxNYq4Qp9LvH92wZUgs= @@ -203,6 +223,8 @@ github.com/go-yaml/yaml v2.1.0+incompatible/go.mod h1:w2MrLa16VYP0jy6N7M5kHaCkaL github.com/gobwas/glob v0.2.3 h1:A4xDbljILXROh+kObIiy5kIaPYD8e96x1tgBhUI5J+Y= github.com/gobwas/glob v0.2.3/go.mod h1:d3Ez4x06l9bZtSvzIay5+Yzi0fmZzPgnTbPcKjJAkT8= github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= +github.com/gofiber/fiber/v2 v2.52.12 h1:0LdToKclcPOj8PktUdIKo9BUohjjwfnQl42Dhw8/WUw= +github.com/gofiber/fiber/v2 v2.52.12/go.mod h1:YEcBbO/FB+5M1IZNBP9FO3J9281zgPAreiI1oqg8nDw= github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= @@ -217,6 +239,7 @@ github.com/golang/mock v1.4.1/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt github.com/golang/mock v1.4.3/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw= github.com/golang/mock v1.4.4/go.mod h1:l3mdAwkq5BuhzHwde/uurv3sEJeZMXNpwsxVWU71h+4= github.com/golang/mock v1.5.0/go.mod h1:CWnOUgYIOo4TcNZ0wHX3YZCqsaM1I1Jvs6v3mP3KVu8= +github.com/golang/mock v1.6.0/go.mod h1:p6yTPP+5HYm5mzsMV8JkE6ZKdX+/wYM6Hr+LicevLPs= github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= @@ -248,6 +271,7 @@ github.com/google/go-cmp v0.5.3/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/ github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= @@ -267,8 +291,8 @@ github.com/google/pprof v0.0.0-20201023163331-3e6fc7fc9c4c/go.mod h1:kpwsk12EmLe github.com/google/pprof v0.0.0-20201203190320-1bf35d6f28c2/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= github.com/google/pprof v0.0.0-20210122040257-d980be63207e/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= github.com/google/pprof v0.0.0-20210226084205-cbba55b83ad5/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= -github.com/google/pprof v0.0.0-20250208200701-d0013a598941 h1:43XjGa6toxLpeksjcxs1jIoIyr+vUfOqY2c6HB4bpoc= -github.com/google/pprof v0.0.0-20250208200701-d0013a598941/go.mod h1:vavhavw2zAxS5dIdcRluK6cSGGPlZynqzFM8NdvU144= +github.com/google/pprof v0.0.0-20250403155104-27863c87afa6 h1:BHT72Gu3keYf3ZEu2J0b1vyeLSOYI8bm5wbJM/8yDe8= +github.com/google/pprof v0.0.0-20250403155104-27863c87afa6/go.mod h1:boTsfXsheKC2y+lKOCMpSfarhxDeIzfZG1jqGcPl3cA= github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI= github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= @@ -286,24 +310,42 @@ github.com/hashicorp/consul/api v1.1.0/go.mod h1:VmuI/Lkw1nC05EYQWNKwWGbkg+FbDBt github.com/hashicorp/consul/sdk v0.1.1/go.mod h1:VKf9jXwCTEY1QZP2MOLRhb5i/I/ssyNV1vwHyQBF0x8= github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= github.com/hashicorp/go-cleanhttp v0.5.1/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80= +github.com/hashicorp/go-cleanhttp v0.5.2 h1:035FKYIWjmULyFRBKPs8TBQoi0x6d9G4xc9neXJWAZQ= +github.com/hashicorp/go-cleanhttp v0.5.2/go.mod h1:kO/YDlP8L1346E6Sodw+PrpBSV4/SoxCXGY6BqNFT48= +github.com/hashicorp/go-hclog v1.6.3 h1:Qr2kF+eVWjTiYmU7Y31tYlP1h0q/X3Nl3tPGdaB11/k= +github.com/hashicorp/go-hclog v1.6.3/go.mod h1:W4Qnvbt70Wk/zYJryRzDRU/4r0kIg0PVHBcfoyhpF5M= github.com/hashicorp/go-immutable-radix v1.0.0/go.mod h1:0y9vanUI8NX6FsYoO3zeMjhV/C5i9g4Q3DwcSNZ4P60= github.com/hashicorp/go-msgpack v0.5.3/go.mod h1:ahLV/dePpqEmjfWmKiqvPkv/twdG7iPBM1vqhUKIvfM= github.com/hashicorp/go-multierror v1.0.0/go.mod h1:dHtQlpGsu+cZNNAkkCN/P3hoUDHhCYQXV3UM06sGGrk= +github.com/hashicorp/go-retryablehttp v0.7.8 h1:ylXZWnqa7Lhqpk0L1P1LzDtGcCR0rPVUrx/c8Unxc48= +github.com/hashicorp/go-retryablehttp v0.7.8/go.mod h1:rjiScheydd+CxvumBsIrFKlx3iS0jrZ7LvzFGFmuKbw= github.com/hashicorp/go-rootcerts v1.0.0/go.mod h1:K6zTfqpRlCUIjkwsN4Z+hiSfzSTQa6eBIzfwKfwNnHU= +github.com/hashicorp/go-rootcerts v1.0.2 h1:jzhAVGtqPKbwpyCPELlgNWhE1znq+qwJtW5Oi2viEzc= +github.com/hashicorp/go-rootcerts v1.0.2/go.mod h1:pqUvnprVnM5bf7AOirdbb01K4ccR319Vf4pU3K5EGc8= +github.com/hashicorp/go-secure-stdlib/strutil v0.1.2 h1:kes8mmyCpxJsI7FTwtzRqEy9CdjCtrXrXGuOpxEA7Ts= +github.com/hashicorp/go-secure-stdlib/strutil v0.1.2/go.mod h1:Gou2R9+il93BqX25LAKCLuM+y9U2T4hlwvT1yprcna4= github.com/hashicorp/go-sockaddr v1.0.0/go.mod h1:7Xibr9yA9JjQq1JpNB2Vw7kxv8xerXegt+ozgdvDeDU= github.com/hashicorp/go-syslog v1.0.0/go.mod h1:qPfqrKkXGihmCqbJM2mZgkZGvKG1dFdvsLplgctolz4= github.com/hashicorp/go-uuid v1.0.0/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= github.com/hashicorp/go-uuid v1.0.1/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= +github.com/hashicorp/go-uuid v1.0.3 h1:2gKiV6YVmrJ1i2CKKa9obLvRieoRGviZFL26PcT/Co8= +github.com/hashicorp/go-uuid v1.0.3/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= github.com/hashicorp/go.net v0.0.1/go.mod h1:hjKkEWcCURg++eb33jQU7oqQcI9XDCnUzHA0oac0k90= github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= github.com/hashicorp/golang-lru v1.0.2 h1:dV3g9Z/unq5DpblPpw+Oqcv4dU/1omnb4Ok8iPY6p1c= github.com/hashicorp/golang-lru v1.0.2/go.mod h1:iADmTwqILo4mZ8BN3D2Q6+9jd8WM5uGBxy+E8yxSoD4= +github.com/hashicorp/golang-lru/arc/v2 v2.0.7 h1:QxkVTxwColcduO+LP7eJO56r2hFiG8zEbfAAzRv52KQ= +github.com/hashicorp/golang-lru/arc/v2 v2.0.7/go.mod h1:Pe7gBlGdc8clY5LJ0LpJXMt5AmgmWNH1g+oFFVUHOEc= +github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k= +github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM= github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ= github.com/hashicorp/logutils v1.0.0/go.mod h1:QIAnNjmIWmVIIkWDTG1z5v++HQmx9WQRO+LraFDTW64= github.com/hashicorp/mdns v1.0.0/go.mod h1:tL+uN++7HEJ6SQLQ2/p+z2pH24WQKWjBPkE0mNTz8vQ= github.com/hashicorp/memberlist v0.1.3/go.mod h1:ajVTdAv/9Im8oMAAj5G31PhhMCZJV2pPBoIllUwCN7I= github.com/hashicorp/serf v0.8.2/go.mod h1:6hOLApaqBFA1NXqRQAsxw9QxuDEvNxSQRwA/JwenrHc= +github.com/hashicorp/vault-client-go v0.4.3 h1:zG7STGVgn/VK6rnZc0k8PGbfv2x/sJExRKHSUg3ljWc= +github.com/hashicorp/vault-client-go v0.4.3/go.mod h1:4tDw7Uhq5XOxS1fO+oMtotHL7j4sB9cp0T7U6m4FzDY= github.com/huin/goupnp v1.3.0 h1:UvLUlWDNpoUdYzb2TCn+MuTWtcjXKSza2n6CBdQ0xXc= github.com/huin/goupnp v1.3.0/go.mod h1:gnGPsThkYa7bFi/KWmEysQRf48l2dvR5bxr2OFckNX8= github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= @@ -313,10 +355,12 @@ github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2 github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= github.com/ipfs/bbloom v0.0.4 h1:Gi+8EGJ2y5qiD5FbsbpX/TMNcJw8gSqr7eyjHa4Fhvs= github.com/ipfs/bbloom v0.0.4/go.mod h1:cS9YprKXpoZ9lT0n/Mw/a6/aFV6DTjTLYHeA+gyqMG0= +github.com/ipfs/boxo v0.30.0 h1:7afsoxPGGqfoH7Dum/wOTGUB9M5fb8HyKPMlLfBvIEQ= +github.com/ipfs/boxo v0.30.0/go.mod h1:BPqgGGyHB9rZZcPSzah2Dc9C+5Or3U1aQe7EH1H7370= github.com/ipfs/go-bitswap v0.11.0 h1:j1WVvhDX1yhG32NTC9xfxnqycqYIlhzEzLXG/cU1HyQ= github.com/ipfs/go-bitswap v0.11.0/go.mod h1:05aE8H3XOU+LXpTedeAS0OZpcO1WFsj5niYQH9a1Tmk= -github.com/ipfs/go-block-format v0.2.0 h1:ZqrkxBA2ICbDRbK8KJs/u0O3dlp6gmAuuXUJNiW1Ycs= -github.com/ipfs/go-block-format v0.2.0/go.mod h1:+jpL11nFx5A/SPpsoBn6Bzkra/zaArfSmsknbPMYgzM= +github.com/ipfs/go-block-format v0.2.1 h1:96kW71XGNNa+mZw/MTzJrCpMhBWCrd9kBLoKm9Iip/Q= +github.com/ipfs/go-block-format v0.2.1/go.mod h1:frtvXHMQhM6zn7HvEQu+Qz5wSTj+04oEH/I+NjDgEjk= github.com/ipfs/go-blockservice v0.5.2 h1:in9Bc+QcXwd1apOVM7Un9t8tixPKdaHQFdLSUM1Xgk8= github.com/ipfs/go-blockservice v0.5.2/go.mod h1:VpMblFEqG67A/H2sHKAemeH9vlURVavlysbdUI632yk= github.com/ipfs/go-cid v0.6.0 h1:DlOReBV1xhHBhhfy/gBNNTSyfOM6rLiIx9J7A4DGf30= @@ -337,16 +381,16 @@ github.com/ipfs/go-ipfs-exchange-interface v0.2.1 h1:jMzo2VhLKSHbVe+mHNzYgs95n0+ github.com/ipfs/go-ipfs-exchange-interface v0.2.1/go.mod h1:MUsYn6rKbG6CTtsDp+lKJPmVt3ZrCViNyH3rfPGsZ2E= github.com/ipfs/go-ipfs-exchange-offline v0.3.0 h1:c/Dg8GDPzixGd0MC8Jh6mjOwU57uYokgWRFidfvEkuA= github.com/ipfs/go-ipfs-exchange-offline v0.3.0/go.mod h1:MOdJ9DChbb5u37M1IcbrRB02e++Z7521fMxqCNRrz9s= -github.com/ipfs/go-ipfs-pq v0.0.2 h1:e1vOOW6MuOwG2lqxcLA+wEn93i/9laCY8sXAw76jFOY= -github.com/ipfs/go-ipfs-pq v0.0.2/go.mod h1:LWIqQpqfRG3fNc5XsnIhz/wQ2XXGyugQwls7BgUmUfY= +github.com/ipfs/go-ipfs-pq v0.0.3 h1:YpoHVJB+jzK15mr/xsWC574tyDLkezVrDNeaalQBsTE= +github.com/ipfs/go-ipfs-pq v0.0.3/go.mod h1:btNw5hsHBpRcSSgZtiNm/SLj5gYIZ18AKtv3kERkRb4= github.com/ipfs/go-ipfs-routing v0.3.0 h1:9W/W3N+g+y4ZDeffSgqhgo7BsBSJwPMcyssET9OWevc= github.com/ipfs/go-ipfs-routing v0.3.0/go.mod h1:dKqtTFIql7e1zYsEuWLyuOU+E0WJWW8JjbTPLParDWo= github.com/ipfs/go-ipfs-util v0.0.3 h1:2RFdGez6bu2ZlZdI+rWfIdbQb1KudQp3VGwPtdNCmE0= github.com/ipfs/go-ipfs-util v0.0.3/go.mod h1:LHzG1a0Ig4G+iZ26UUOMjHd+lfM84LZCrn17xAKWBvs= -github.com/ipfs/go-ipld-cbor v0.1.0 h1:dx0nS0kILVivGhfWuB6dUpMa/LAwElHPw1yOGYopoYs= -github.com/ipfs/go-ipld-cbor v0.1.0/go.mod h1:U2aYlmVrJr2wsUBU67K4KgepApSZddGRDWBYR0H4sCk= -github.com/ipfs/go-ipld-format v0.6.0 h1:VEJlA2kQ3LqFSIm5Vu6eIlSxD/Ze90xtc4Meten1F5U= -github.com/ipfs/go-ipld-format v0.6.0/go.mod h1:g4QVMTn3marU3qXchwjpKPKgJv+zF+OlaKMyhJ4LHPg= +github.com/ipfs/go-ipld-cbor v0.2.0 h1:VHIW3HVIjcMd8m4ZLZbrYpwjzqlVUfjLM7oK4T5/YF0= +github.com/ipfs/go-ipld-cbor v0.2.0/go.mod h1:Cp8T7w1NKcu4AQJLqK0tWpd1nkgTxEVB5C6kVpLW6/0= +github.com/ipfs/go-ipld-format v0.6.1 h1:lQLmBM/HHbrXvjIkrydRXkn+gc0DE5xO5fqelsCKYOQ= +github.com/ipfs/go-ipld-format v0.6.1/go.mod h1:8TOH1Hj+LFyqM2PjSqI2/ZnyO0KlfhHbJLkbxFa61hs= github.com/ipfs/go-ipld-legacy v0.2.1 h1:mDFtrBpmU7b//LzLSypVrXsD8QxkEWxu5qVxN99/+tk= github.com/ipfs/go-ipld-legacy v0.2.1/go.mod h1:782MOUghNzMO2DER0FlBR94mllfdCJCkTtDtPM51otM= github.com/ipfs/go-log v1.0.5 h1:2dOuUCB1Z7uoczMWgAyDck5JLb72zHzrMnGnCNNbvY8= @@ -356,18 +400,18 @@ github.com/ipfs/go-log/v2 v2.9.0 h1:l4b06AwVXwldIzbVPZy5z7sKp9lHFTX0KWfTBCtHaOk= github.com/ipfs/go-log/v2 v2.9.0/go.mod h1:UhIYAwMV7Nb4ZmihUxfIRM2Istw/y9cAk3xaK+4Zs2c= github.com/ipfs/go-merkledag v0.11.0 h1:DgzwK5hprESOzS4O1t/wi6JDpyVQdvm9Bs59N/jqfBY= github.com/ipfs/go-merkledag v0.11.0/go.mod h1:Q4f/1ezvBiJV0YCIXvt51W/9/kqJGH4I1LsA7+djsM4= -github.com/ipfs/go-metrics-interface v0.0.1 h1:j+cpbjYvu4R8zbleSs36gvB7jR+wsL2fGD6n0jO4kdg= -github.com/ipfs/go-metrics-interface v0.0.1/go.mod h1:6s6euYU4zowdslK0GKHmqaIZ3j/b/tL7HTWtJ4VPgWY= -github.com/ipfs/go-peertaskqueue v0.8.0 h1:JyNO144tfu9bx6Hpo119zvbEL9iQ760FHOiJYsUjqaU= -github.com/ipfs/go-peertaskqueue v0.8.0/go.mod h1:cz8hEnnARq4Du5TGqiWKgMr/BOSQ5XOgMOh1K5YYKKM= +github.com/ipfs/go-metrics-interface v0.3.0 h1:YwG7/Cy4R94mYDUuwsBfeziJCVm9pBMJ6q/JR9V40TU= +github.com/ipfs/go-metrics-interface v0.3.0/go.mod h1:OxxQjZDGocXVdyTPocns6cOLwHieqej/jos7H4POwoY= +github.com/ipfs/go-peertaskqueue v0.8.2 h1:PaHFRaVFdxQk1Qo3OKiHPYjmmusQy7gKQUaL8JDszAU= +github.com/ipfs/go-peertaskqueue v0.8.2/go.mod h1:L6QPvou0346c2qPJNiJa6BvOibxDfaiPlqHInmzg0FA= github.com/ipfs/go-test v0.2.1 h1:/D/a8xZ2JzkYqcVcV/7HYlCnc7bv/pKHQiX5TdClkPE= github.com/ipfs/go-test v0.2.1/go.mod h1:dzu+KB9cmWjuJnXFDYJwC25T3j1GcN57byN+ixmK39M= github.com/ipfs/go-verifcid v0.0.3 h1:gmRKccqhWDocCRkC+a59g5QW7uJw5bpX9HWBevXa0zs= github.com/ipfs/go-verifcid v0.0.3/go.mod h1:gcCtGniVzelKrbk9ooUSX/pM3xlH73fZZJDzQJRvOUw= github.com/ipld/go-car v0.6.2 h1:Hlnl3Awgnq8icK+ze3iRghk805lu8YNq3wlREDTF2qc= github.com/ipld/go-car v0.6.2/go.mod h1:oEGXdwp6bmxJCZ+rARSkDliTeYnVzv3++eXajZ+Bmr8= -github.com/ipld/go-codec-dagpb v1.6.0 h1:9nYazfyu9B1p3NAgfVdpRco3Fs2nFC72DqVsMj6rOcc= -github.com/ipld/go-codec-dagpb v1.6.0/go.mod h1:ANzFhfP2uMJxRBr8CE+WQWs5UsNa0pYtmKZ+agnUw9s= +github.com/ipld/go-codec-dagpb v1.7.0 h1:hpuvQjCSVSLnTnHXn+QAMR0mLmb1gA6wl10LExo2Ts0= +github.com/ipld/go-codec-dagpb v1.7.0/go.mod h1:rD3Zg+zub9ZnxcLwfol/OTQRVjaLzXypgy4UqHQvilM= github.com/ipld/go-ipld-prime v0.21.1-0.20240917223228-6148356a4c2e h1:0Anxx6pMS8U/qjTLVxPhpTYuuDMssHDtUEvzIz2Skw4= github.com/ipld/go-ipld-prime v0.21.1-0.20240917223228-6148356a4c2e/go.mod h1:LN+1Tx6867lbDCmf8bErp1TNw3Kh9eY2n0eJ+whRx38= github.com/ipni/go-libipni v0.6.18 h1:x8X6y0QoMmSKtwRlczWdWEYedoLUGCEek2TttfDKPk4= @@ -386,6 +430,18 @@ github.com/jbenet/go-temp-err-catcher v0.1.0 h1:zpb3ZH6wIE8Shj2sKS+khgRvf7T7RABo github.com/jbenet/go-temp-err-catcher v0.1.0/go.mod h1:0kJRvmDZXNMIiJirNPEYfhpPwbGVtZVWC34vc5WLsDk= github.com/jbenet/goprocess v0.1.4 h1:DRGOFReOMqqDNXwW70QkacFW0YN9QnwLV0Vqk+3oU0o= github.com/jbenet/goprocess v0.1.4/go.mod h1:5yspPrukOVuOLORacaBi858NqyClJPQxYZlqdZVfqY4= +github.com/jcmturner/aescts/v2 v2.0.0 h1:9YKLH6ey7H4eDBXW8khjYslgyqG2xZikXP0EQFKrle8= +github.com/jcmturner/aescts/v2 v2.0.0/go.mod h1:AiaICIRyfYg35RUkr8yESTqvSy7csK90qZ5xfvvsoNs= +github.com/jcmturner/dnsutils/v2 v2.0.0 h1:lltnkeZGL0wILNvrNiVCR6Ro5PGU/SeBvVO/8c/iPbo= +github.com/jcmturner/dnsutils/v2 v2.0.0/go.mod h1:b0TnjGOvI/n42bZa+hmXL+kFJZsFT7G4t3HTlQ184QM= +github.com/jcmturner/gofork v1.7.6 h1:QH0l3hzAU1tfT3rZCnW5zXl+orbkNMMRGJfdJjHVETg= +github.com/jcmturner/gofork v1.7.6/go.mod h1:1622LH6i/EZqLloHfE7IeZ0uEJwMSUyQ/nDd82IeqRo= +github.com/jcmturner/goidentity/v6 v6.0.1 h1:VKnZd2oEIMorCTsFBnJWbExfNN7yZr3EhJAxwOkZg6o= +github.com/jcmturner/goidentity/v6 v6.0.1/go.mod h1:X1YW3bgtvwAXju7V3LCIMpY0Gbxyjn/mY9zx4tFonSg= +github.com/jcmturner/gokrb5/v8 v8.4.4 h1:x1Sv4HaTpepFkXbt2IkL29DXRf8sOfZXo8eRKh687T8= +github.com/jcmturner/gokrb5/v8 v8.4.4/go.mod h1:1btQEpgT6k+unzCwX1KdWMEwPPkkgBtP+F6aCACiMrs= +github.com/jcmturner/rpc/v2 v2.0.3 h1:7FXXj8Ti1IaVFpSAziCZWNzbNuZmnvw/i6CqLNdWfZY= +github.com/jcmturner/rpc/v2 v2.0.3/go.mod h1:VUJYCIDm3PVOEHw8sgt091/20OJjskO/YJki3ELg/Hc= github.com/json-iterator/go v1.1.11/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= @@ -397,8 +453,8 @@ github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/klauspost/compress v1.18.5 h1:/h1gH5Ce+VWNLSWqPzOVn6XBO+vJbCNGvjoaGBFW2IE= github.com/klauspost/compress v1.18.5/go.mod h1:cwPg85FWrGar70rWktvGQj8/hthj3wpl0PGDogxkrSQ= -github.com/klauspost/cpuid/v2 v2.2.10 h1:tBs3QSyvjDyFTq3uoc/9xFpCuOsJQFNPiAhYdw2skhE= -github.com/klauspost/cpuid/v2 v2.2.10/go.mod h1:hqwkgyIinND0mEev00jJYCxPNVRVXFQeu1XKlok6oO0= +github.com/klauspost/cpuid/v2 v2.3.0 h1:S4CRMLnYUhGeDFDqkGriYKdfoFlDnMtqTiI/sFzhA9Y= +github.com/klauspost/cpuid/v2 v2.3.0/go.mod h1:hqwkgyIinND0mEev00jJYCxPNVRVXFQeu1XKlok6oO0= github.com/koron/go-ssdp v0.0.5 h1:E1iSMxIs4WqxTbIBLtmNBeOOC+1sCIXQeqTWVnpmwhk= github.com/koron/go-ssdp v0.0.5/go.mod h1:Qm59B7hpKpDqfyRNWRNr00jGwLdXjDyZh6y7rH6VS0w= github.com/kr/fs v0.1.0/go.mod h1:FFnZGqtBN9Gxj7eW1uZ42v5BccTP0vu6NEaFoC2HwRg= @@ -409,28 +465,28 @@ github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= -github.com/labstack/echo/v4 v4.14.0 h1:+tiMrDLxwv6u0oKtD03mv+V1vXXB3wCqPHJqPuIe+7M= -github.com/labstack/echo/v4 v4.14.0/go.mod h1:xmw1clThob0BSVRX1CRQkGQ/vjwcpOMjQZSZa9fKA/c= +github.com/labstack/echo/v4 v4.15.0 h1:hoRTKWcnR5STXZFe9BmYun9AMTNeSbjHi2vtDuADJ24= +github.com/labstack/echo/v4 v4.15.0/go.mod h1:xmw1clThob0BSVRX1CRQkGQ/vjwcpOMjQZSZa9fKA/c= github.com/labstack/gommon v0.4.2 h1:F8qTUNXgG1+6WQmqoUWnz8WiEU60mXVVw0P4ht1WRA0= github.com/labstack/gommon v0.4.2/go.mod h1:QlUFxVM+SNXhDL/Z7YhocGIBYOiwB0mXm1+1bAPHPyU= -github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw= -github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o= +github.com/lib/pq v1.11.1 h1:wuChtj2hfsGmmx3nf1m7xC2XpK6OtelS2shMY+bGMtI= +github.com/lib/pq v1.11.1/go.mod h1:/p+8NSbOcwzAEI7wiMXFlgydTwcgTr3OSKMsD2BitpA= github.com/libp2p/go-buffer-pool v0.1.0 h1:oK4mSFcQz7cTQIfqbe4MIj9gLW+mnanjyFtc6cdF0Y8= github.com/libp2p/go-buffer-pool v0.1.0/go.mod h1:N+vh8gMqimBzdKkSMVuydVDq+UV5QTWy5HSiZacSbPg= github.com/libp2p/go-libp2p v0.41.1 h1:8ecNQVT5ev/jqALTvisSJeVNvXYJyK4NhQx1nNRXQZE= github.com/libp2p/go-libp2p v0.41.1/go.mod h1:DcGTovJzQl/I7HMrby5ZRjeD0kQkGiy+9w6aEkSZpRI= github.com/libp2p/go-libp2p-asn-util v0.4.1 h1:xqL7++IKD9TBFMgnLPZR6/6iYhawHKHl950SO9L6n94= github.com/libp2p/go-libp2p-asn-util v0.4.1/go.mod h1:d/NI6XZ9qxw67b4e+NgpQexCIiFYJjErASrYW4PFDN8= -github.com/libp2p/go-libp2p-record v0.2.0 h1:oiNUOCWno2BFuxt3my4i1frNrt7PerzB3queqa1NkQ0= -github.com/libp2p/go-libp2p-record v0.2.0/go.mod h1:I+3zMkvvg5m2OcSdoL0KPljyJyvNDFGKX7QdlpYUcwk= +github.com/libp2p/go-libp2p-record v0.3.1 h1:cly48Xi5GjNw5Wq+7gmjfBiG9HCzQVkiZOUZ8kUl+Fg= +github.com/libp2p/go-libp2p-record v0.3.1/go.mod h1:T8itUkLcWQLCYMqtX7Th6r7SexyUJpIyPgks757td/E= github.com/libp2p/go-libp2p-testing v0.12.0 h1:EPvBb4kKMWO29qP4mZGyhVzUyR25dvfUIK5WDu6iPUA= github.com/libp2p/go-libp2p-testing v0.12.0/go.mod h1:KcGDRXyN7sQCllucn1cOOS+Dmm7ujhfEyXQL5lvkcPg= github.com/libp2p/go-msgio v0.3.0 h1:mf3Z8B1xcFN314sWX+2vOTShIE0Mmn2TXn3YCUQGNj0= github.com/libp2p/go-msgio v0.3.0/go.mod h1:nyRM819GmVaF9LX3l03RMh10QdOroF++NBbxAb0mmDM= github.com/libp2p/go-netroute v0.2.2 h1:Dejd8cQ47Qx2kRABg6lPwknU7+nBnFRpko45/fFPuZ8= github.com/libp2p/go-netroute v0.2.2/go.mod h1:Rntq6jUAH0l9Gg17w5bFGhcC9a+vk4KNXs6s7IljKYE= -github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 h1:6E+4a0GO5zZEnZ81pIr0yLvtUWk2if982qA3F3QD6H4= -github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0/go.mod h1:zJYVVT2jmtg6P3p1VtQj7WsuWi/y4VnjVBn7F8KPB3I= +github.com/lufia/plan9stats v0.0.0-20240513124658-fba389f38bae h1:dIZY4ULFcto4tAFlj1FYZl8ztUZ13bdq+PLY+NOfbyI= +github.com/lufia/plan9stats v0.0.0-20240513124658-fba389f38bae/go.mod h1:ilwx/Dta8jXAgpFYFvSWEMwxmbWXyiUHkd5FwyKhb5k= github.com/magiconair/properties v1.8.5/go.mod h1:y3VJvCyxH9uVvJTWEGAELF3aiYNyPKd5NZ3oSwXrF60= github.com/magiconair/properties v1.8.10 h1:s31yESBquKXCV9a/ScB3ESkOjUYYv+X0rg8SYxI99mE= github.com/magiconair/properties v1.8.10/go.mod h1:Dhd985XPs7jluiymwWYZ0G4Z61jb3vdS329zhj2hYo0= @@ -438,11 +494,11 @@ github.com/mattn/go-colorable v0.0.9/go.mod h1:9vuHe8Xs5qXnSaW/c/ABM9alt+Vo+STaO github.com/mattn/go-colorable v0.1.14 h1:9A9LHSqF/7dyVVX6g0U9cwm9pG3kP9gSzcuIPHPsaIE= github.com/mattn/go-colorable v0.1.14/go.mod h1:6LmQG8QLFO4G5z1gPvYEzlUgJ2wF+stgPZH1UqBm1s8= github.com/mattn/go-isatty v0.0.3/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4= -github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= -github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= +github.com/mattn/go-isatty v0.0.21 h1:xYae+lCNBP7QuW4PUnNG61ffM4hVIfm+zUzDuSzYLGs= +github.com/mattn/go-isatty v0.0.21/go.mod h1:ZXfXG4SQHsB/w3ZeOYbR0PrPwLy+n6xiMrJlRFqopa4= github.com/mattn/go-runewidth v0.0.9/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI= -github.com/mattn/go-runewidth v0.0.19 h1:v++JhqYnZuu5jSKrk9RbgF5v4CGUjqRfBm05byFGLdw= -github.com/mattn/go-runewidth v0.0.19/go.mod h1:XBkDxAl56ILZc9knddidhrOlY5R/pDhgLpndooCuJAs= +github.com/mattn/go-runewidth v0.0.23 h1:7ykA0T0jkPpzSvMS5i9uoNn2Xy3R383f9HDx3RybWcw= +github.com/mattn/go-runewidth v0.0.23/go.mod h1:XBkDxAl56ILZc9knddidhrOlY5R/pDhgLpndooCuJAs= github.com/mdelapenya/tlscert v0.2.0 h1:7H81W6Z/4weDvZBNOfQte5GpIMo0lGYEeWbkGp5LJHI= github.com/mdelapenya/tlscert v0.2.0/go.mod h1:O4njj3ELLnJjGdkN7M/vIVCpZ+Cf0L6muqOG4tLSl8o= github.com/mfridman/interpolate v0.0.2 h1:pnuTK7MQIxxFz1Gr+rjSIx9u7qVjf5VOoM/u6BbAxPY= @@ -456,6 +512,8 @@ github.com/minio/sha256-simd v1.0.1 h1:6kaan5IFmwTNynnKKpDHe6FWHohJOHhCPchzK49dz github.com/minio/sha256-simd v1.0.1/go.mod h1:Pz6AKMiUdngCLpeTL/RJY1M9rUuPMYujV5xJjtbRSN8= github.com/mitchellh/cli v1.0.0/go.mod h1:hNIlj7HEI86fIcpObd7a0FcrxTWetlwJDGcceTlRvqc= github.com/mitchellh/go-homedir v1.0.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= +github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y= +github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= github.com/mitchellh/go-testing-interface v1.0.0/go.mod h1:kRemZodwjscx+RGhAo8eIhFbs2+BFgRtFPeD/KE+zxI= github.com/mitchellh/gox v0.4.0/go.mod h1:Sd9lOJ0+aimLBi73mGofS1ycjY8lL3uZM3JPS42BGNg= github.com/mitchellh/iochan v1.0.0/go.mod h1:JwYml1nuB7xOzsp52dPpHFffvOCDupsG0QubkSMEySY= @@ -503,8 +561,8 @@ github.com/multiformats/go-multiaddr-fmt v0.1.0 h1:WLEFClPycPkp4fnIzoFoV9FVd49/e github.com/multiformats/go-multiaddr-fmt v0.1.0/go.mod h1:hGtDIW4PU4BqJ50gW2quDuPVjyWNZxToGUh/HwTZYJo= github.com/multiformats/go-multibase v0.2.0 h1:isdYCVLvksgWlMW9OZRYJEa9pZETFivncJHmHnnd87g= github.com/multiformats/go-multibase v0.2.0/go.mod h1:bFBZX4lKCA/2lyOFSAoKH5SS6oPyjtnzK/XTFDPkNuk= -github.com/multiformats/go-multicodec v0.9.1 h1:x/Fuxr7ZuR4jJV4Os5g444F7xC4XmyUaT/FWtE+9Zjo= -github.com/multiformats/go-multicodec v0.9.1/go.mod h1:LLWNMtyV5ithSBUo3vFIMaeDy+h3EbkMTek1m+Fybbo= +github.com/multiformats/go-multicodec v0.9.2 h1:YrlXCuqxjqm3bXl+vBq5LKz5pz4mvAsugdqy78k0pXQ= +github.com/multiformats/go-multicodec v0.9.2/go.mod h1:LLWNMtyV5ithSBUo3vFIMaeDy+h3EbkMTek1m+Fybbo= github.com/multiformats/go-multihash v0.2.3 h1:7Lyc8XfX/IY2jWb/gI7JP+o7JEq9hOa7BFvVU9RSh+U= github.com/multiformats/go-multihash v0.2.3/go.mod h1:dXgKXCXjBzdscBLk9JkjINiEsCKRVch90MdaGiKsvSM= github.com/multiformats/go-multistream v0.6.0 h1:ZaHKbsL404720283o4c/IHQXiS6gb8qAN5EIJ4PN5EA= @@ -513,15 +571,23 @@ github.com/multiformats/go-varint v0.1.0 h1:i2wqFp4sdl3IcIxfAonHQV9qU5OsZ4Ts9IOo github.com/multiformats/go-varint v0.1.0/go.mod h1:5KVAVXegtfmNQQm/lCY+ATvDzvJJhSkUlGQV9wgObdI= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= +github.com/nats-io/nats.go v1.51.0 h1:ByW84XTz6W03GSSsygsZcA+xgKK8vPGaa/FCAAEHnAI= +github.com/nats-io/nats.go v1.51.0/go.mod h1:26HypzazeOkyO3/mqd1zZd53STJN0EjCYF9Uy2ZOBno= +github.com/nats-io/nkeys v0.4.15 h1:JACV5jRVO9V856KOapQ7x+EY8Jo3qw1vJt/9Jpwzkk4= +github.com/nats-io/nkeys v0.4.15/go.mod h1:CpMchTXC9fxA5zrMo4KpySxNjiDVvr8ANOSZdiNfUrs= +github.com/nats-io/nuid v1.0.1 h1:5iA8DT8V7q8WK2EScv2padNa/rTESc1KdnPw4TC2paw= +github.com/nats-io/nuid v1.0.1/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OSON2c= github.com/ncruces/go-strftime v1.0.0 h1:HMFp8mLCTPp341M/ZnA4qaf7ZlsbTc+miZjCLOFAw7w= github.com/ncruces/go-strftime v1.0.0/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls= github.com/neelance/astrewrite v0.0.0-20160511093645-99348263ae86/go.mod h1:kHJEU3ofeGjhHklVoIGuVj85JJwZ6kWPaJwCIxgnFmo= github.com/neelance/sourcemap v0.0.0-20200213170602-2833bce08e4c/go.mod h1:Qr6/a/Q4r9LP1IltGz7tA7iOK1WonHEYhu1HRBA7ZiM= +github.com/oklog/ulid/v2 v2.1.1 h1:suPZ4ARWLOJLegGFiZZ1dFAkqzhMjL3J1TzI+5wHz8s= +github.com/oklog/ulid/v2 v2.1.1/go.mod h1:rcEKHmBBKfef9DhnvX7y1HZBYxjXb0cP5ExxNsTT1QQ= github.com/olekukonko/tablewriter v0.0.5 h1:P2Ga83D34wi1o9J6Wh1mRuqd4mF/x/lgBS7N7AbDhec= github.com/olekukonko/tablewriter v0.0.5/go.mod h1:hPp6KlRPjbx+hW8ykQs1w3UBbZlj6HuIJcUGPhkA7kY= github.com/onsi/ginkgo v1.16.5 h1:8xi0RTUf59SOSfEtZMvwTvXYMzG4gV23XVHOZiXNtnE= -github.com/onsi/ginkgo/v2 v2.22.2 h1:/3X8Panh8/WwhU/3Ssa6rCKqPLuAkVY2I0RoyDLySlU= -github.com/onsi/ginkgo/v2 v2.22.2/go.mod h1:oeMosUL+8LtarXBHu/c0bx2D/K9zyQ6uX3cTyztHwsk= +github.com/onsi/ginkgo/v2 v2.23.3 h1:edHxnszytJ4lD9D5Jjc4tiDkPBZ3siDeJJkUZJJVkp0= +github.com/onsi/ginkgo/v2 v2.23.3/go.mod h1:zXTP6xIp3U8aVuXN8ENK9IXRaTjFnpVB9mGmaSRvxnM= github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U= github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM= github.com/opencontainers/image-spec v1.1.1 h1:y0fUlFfIZhPF1W537XOLg0/fcx6zcHCJwooC2xJA040= @@ -532,6 +598,8 @@ github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c/go.mod h1:lzWF7FI github.com/pelletier/go-toml v1.9.3/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCkoOuaOx1Y+c= github.com/pelletier/go-toml/v2 v2.2.4 h1:mye9XuhQ6gvn5h28+VilKrrPoQVanw5PMw/TB0t5Ec4= github.com/pelletier/go-toml/v2 v2.2.4/go.mod h1:2gIqNv+qfxSVS7cM2xJQKtLSTLUE9V8t9Stt+h56mCY= +github.com/pierrec/lz4/v4 v4.1.26 h1:GrpZw1gZttORinvzBdXPUXATeqlJjqUG/D87TKMnhjY= +github.com/pierrec/lz4/v4 v4.1.26/go.mod h1:EoQMVJgeeEOMsCqCzqFm2O0cJvljX2nGZjcRIPL34O4= github.com/pion/datachannel v1.6.0 h1:XecBlj+cvsxhAMZWFfFcPyUaDZtd7IJvrXqlXD/53i0= github.com/pion/datachannel v1.6.0/go.mod h1:ur+wzYF8mWdC+Mkis5Thosk+u/VOL287apDNEbFpsIk= github.com/pion/dtls/v2 v2.2.12 h1:KP7H5/c1EiVAAKUmXyCzPiQe5+bCJrpOeKg/L05dunk= @@ -583,13 +651,13 @@ github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55 h1:o4JXh1EVt github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55/go.mod h1:OmDBASR4679mdNQnz2pUhc2G8CO2JrUAVFDRBDP/hJE= github.com/pressly/goose/v3 v3.27.0 h1:/D30gVTuQhu0WsNZYbJi4DMOsx1lNq+6SkLe+Wp59BM= github.com/pressly/goose/v3 v3.27.0/go.mod h1:3ZBeCXqzkgIRvrEMDkYh1guvtoJTU5oMMuDdkutoM78= -github.com/prometheus/client_golang v1.21.1 h1:DOvXXTqVzvkIewV/CDPFdejpMCGeMcbGCQ8YOmu+Ibk= -github.com/prometheus/client_golang v1.21.1/go.mod h1:U9NM32ykUErtVBxdvD3zfi+EuFkkaBvMb09mIfe0Zgg= +github.com/prometheus/client_golang v1.22.0 h1:rb93p9lokFEsctTys46VnV1kLCDpVZ0a/Y92Vm0Zc6Q= +github.com/prometheus/client_golang v1.22.0/go.mod h1:R7ljNsLXhuQXYZYtw6GAE9AZg8Y7vEW5scdCXrWRXC0= github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= -github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E= -github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY= -github.com/prometheus/common v0.62.0 h1:xasJaQlnWAeyHdUBeGjXmutelfJHWMRr+Fg4QszZ2Io= -github.com/prometheus/common v0.62.0/go.mod h1:vyBcEuLSvWos9B1+CyL7JZ2up+uFzXhkqml0W5zIY1I= +github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk= +github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE= +github.com/prometheus/common v0.63.0 h1:YR/EIY1o3mEFP/kZCD7iDMnLPlGyuU2Gb3HIcXnA98k= +github.com/prometheus/common v0.63.0/go.mod h1:VVFF/fBIoToEnWRVkYoXEkq3R3paCoxG9PXP74SnV18= github.com/prometheus/procfs v0.19.2 h1:zUMhqEW66Ex7OXIiDkll3tl9a1ZdilUOd/F6ZXw4Vws= github.com/prometheus/procfs v0.19.2/go.mod h1:M0aotyiemPhBCM0z5w87kL22CxfcH05ZpYlu+b4J7mw= github.com/quic-go/qpack v0.5.1 h1:giqksBPnT/HDtZ6VhtFKgoLOWmlyo9Ei6u9PqzIMbhI= @@ -598,6 +666,8 @@ github.com/quic-go/quic-go v0.50.1 h1:unsgjFIUqW8a2oopkY7YNONpV1gYND6Nt9hnt1PN94 github.com/quic-go/quic-go v0.50.1/go.mod h1:Vim6OmUvlYdwBhXP9ZVrtGmCMWa3wEqhq3NgYrI8b4E= github.com/quic-go/webtransport-go v0.8.1-0.20241018022711-4ac2c9250e66 h1:4WFk6u3sOT6pLa1kQ50ZVdm8BQFgJNA117cepZxtLIg= github.com/quic-go/webtransport-go v0.8.1-0.20241018022711-4ac2c9250e66/go.mod h1:Vp72IJajgeOL6ddqrAhmp7IM9zbTcgkQxD/YdxrVwMw= +github.com/rabbitmq/amqp091-go v1.10.0 h1:STpn5XsHlHGcecLmMFCtg7mqq0RnD+zFr4uzukfVhBw= +github.com/rabbitmq/amqp091-go v1.10.0/go.mod h1:Hy4jKW5kQART1u+JkDTF9YYOQUHXqMuhrgxOEeS7G4o= github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE= github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo= github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ= @@ -609,9 +679,13 @@ github.com/rs/xid v1.5.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg= github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/ryanuber/columnize v0.0.0-20160712163229-9b3edd62028f/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts= +github.com/ryanuber/go-glob v1.0.0 h1:iQh3xXAumdQ+4Ufa5b25cRpC5TYKlno6hsv6Cb3pkBk= +github.com/ryanuber/go-glob v1.0.0/go.mod h1:807d1WSdnB0XRJzKNil9Om6lcp/3a0v4qIHxIXzX/Yc= github.com/sagikazarmark/locafero v0.11.0 h1:1iurJgmM9G3PA/I+wWYIOw/5SyBtxapeHDcg+AAIFXc= github.com/sagikazarmark/locafero v0.11.0/go.mod h1:nVIGvgyzw595SUSUE6tvCp3YYTeHs15MvlmU87WwIik= github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc= +github.com/segmentio/kafka-go v0.4.50 h1:mcyC3tT5WeyWzrFbd6O374t+hmcu1NKt2Pu1L3QaXmc= +github.com/segmentio/kafka-go v0.4.50/go.mod h1:Y1gn60kzLEEaW28YshXyk2+VCUKbJ3Qr6DrnT3i4+9E= github.com/sethvargo/go-retry v0.3.0 h1:EEt31A35QhrcRZtrYFDTBg91cqZVnFL2navjDrah2SE= github.com/sethvargo/go-retry v0.3.0/go.mod h1:mNX17F0C/HguQMyMyJxcnU471gOZGxCLyYaFyAZraas= github.com/shirou/gopsutil/v4 v4.26.3 h1:2ESdQt90yU3oXF/CdOlRCJxrP+Am1aBYubTMTfxJ1qc= @@ -620,6 +694,7 @@ github.com/shurcooL/go v0.0.0-20200502201357-93f07166e636/go.mod h1:TDJrrUr11Vxr github.com/shurcooL/httpfs v0.0.0-20190707220628-8d4bc4ba7749/go.mod h1:ZY1cvUeJuFPAdZ/B6v7RHavJWZn2YPVFQ1OSXhCGOkg= github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc= github.com/shurcooL/vfsgen v0.0.0-20200824052919-0d455de96546/go.mod h1:TrYk7fJVaAttu97ZZKrO9UbRa8izdowaMIZcxYMbVaw= +github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= github.com/sirupsen/logrus v1.9.4 h1:TsZE7l11zFCLZnZ+teH4Umoq5BhEIfIzfRDZ1Uzql2w= github.com/sirupsen/logrus v1.9.4/go.mod h1:ftWc9WdOfJ0a92nsE2jF5u5ZwH8Bv2zdeOC42RjbV2g= @@ -629,6 +704,8 @@ github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1 github.com/smartystreets/goconvey v1.6.4/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA= github.com/smartystreets/goconvey v1.8.1 h1:qGjIddxOk4grTu9JPOU31tVfq3cNdBlNa5sSznIX1xY= github.com/smartystreets/goconvey v1.8.1/go.mod h1:+/u4qLyY6x1jReYOp7GOM2FSt8aP9CzCZL03bI28W60= +github.com/smira/go-statsd v1.3.4 h1:kBYWcLSGT+qC6JVbvfz48kX7mQys32fjDOPrfmsSx2c= +github.com/smira/go-statsd v1.3.4/go.mod h1:RjdsESPgDODtg1VpVVf9MJrEW2Hw0wtRNbmB1CAhu6A= github.com/sourcegraph/conc v0.3.1-0.20240121214520-5f936abd7ae8 h1:+jumHNA0Wrelhe64i8F6HNlS8pkoyMv5sreGx2Ry5Rw= github.com/sourcegraph/conc v0.3.1-0.20240121214520-5f936abd7ae8/go.mod h1:3n1Cwaq1E1/1lhQhtRK2ts/ZwZEhjcQeJQ1RuC6Q/8U= github.com/spaolacci/murmur3 v1.1.0 h1:7c1g84S4BPRrfL5Xrdp6fOJ206sU9y293DDHaoy0bLI= @@ -652,9 +729,15 @@ github.com/spf13/viper v1.21.0 h1:x5S+0EU27Lbphp4UKm1C+1oQO+rKx36vfCoaVebLFSU= github.com/spf13/viper v1.21.0/go.mod h1:P0lhsswPGWD/1lZJ9ny3fYnVqxiegrlNrEmgLjbTCAY= github.com/storacha/go-libstoracha v0.7.5 h1:zfRbku2RXxbH0uNWnpGQyJqafiJ+uCGs3tMmkHgZ/QE= github.com/storacha/go-libstoracha v0.7.5/go.mod h1:htUh/VZ0qHRLPJKWZsgXv9mCOqlAFGTVS//ApvQVNf0= -github.com/storacha/go-ucanto v0.7.2 h1:sLg+swDM/6VEcrb9VOik3hP8ek3NvqqKWiZRmsva5X0= -github.com/storacha/go-ucanto v0.7.2/go.mod h1:DZlWyzuSkXk3phAuJpGDyhxYWpJogW1RFqp/VfldT64= +github.com/storacha/go-ucanto v0.8.2 h1:oDrnRV3hN8+H816m33szbyEY7ItURBmyyMLmw2CHpBo= +github.com/storacha/go-ucanto v0.8.2/go.mod h1:DZlWyzuSkXk3phAuJpGDyhxYWpJogW1RFqp/VfldT64= +github.com/storacha/guppy v0.7.0 h1:oPC8FhgFIPzG2EI7Pm/BQUWTrDqRJzZEJVTSN/i64Zs= +github.com/storacha/guppy v0.7.0/go.mod h1:n5aeC6UKuTRVhxE6V90jhsbv8XYzNdeTfA8Gq2bDqTk= +github.com/storacha/indexing-service v1.12.2 h1:DrcIzvM36Ux7i0UmGoSZiU8lR8WjVIqsTULSE1kA+7I= +github.com/storacha/indexing-service v1.12.2/go.mod h1:Yk+uHoTA6qaTE13Ptq6FArsR9hESOetzej9194KwjhM= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= github.com/stretchr/objx v0.5.3 h1:jmXUvGomnU1o3W/V5h2VEradbpJDwGrzugQQvL0POH4= github.com/stretchr/objx v0.5.3/go.mod h1:rDQraq+vQZU7Fde9LOZLr8Tax6zZvy4kuNKF+QYS+U0= github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= @@ -663,6 +746,9 @@ github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81P github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= github.com/subosito/gotenv v1.2.0/go.mod h1:N0PQaV/YGNqwC0u51sEeR/aUtSLEXKX9iv69rRypqCw= @@ -685,8 +771,12 @@ github.com/ucan-wg/go-ucan v0.0.0-20240916120445-37f52863156c/go.mod h1:IiRc1OKW github.com/urfave/cli v1.22.10/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0= github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw= github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc= +github.com/valyala/fasthttp v1.70.0 h1:LAhMGcWk13QZWm85+eg8ZBNbrq5mnkWFGbHMUJHIdXA= +github.com/valyala/fasthttp v1.70.0/go.mod h1:oDZEHHkJ/Buyklg6uURmYs19442zFSnCIfX3j1FY3pE= github.com/valyala/fasttemplate v1.2.2 h1:lxLXG0uE3Qnshl9QyaK6XJxMXlQZELvChBOCmQD0Loo= github.com/valyala/fasttemplate v1.2.2/go.mod h1:KHLXt3tVN2HBp8eijSv/kGJopbvo7S+qRAEEKiv+SiQ= +github.com/versity/versitygw v1.4.1 h1:eNh58r3bmcwET0QWdkNmZgoo/QOO3GysT0YGtpnCyYc= +github.com/versity/versitygw v1.4.1/go.mod h1:8xbEr+kFj/wT/Tf7AOP/biagykBN3iSB5zMhd35Hi0g= github.com/warpfork/go-testmark v0.12.1 h1:rMgCpJfwy1sJ50x0M0NgyphxYYPMOODIJHhsXyEHU0s= github.com/warpfork/go-testmark v0.12.1/go.mod h1:kHwy7wfvGSPh1rQJYKayD4AbtNaeyZdcGi9tNJTaa5Y= github.com/warpfork/go-wish v0.0.0-20220906213052-39a1cc7a02d0 h1:GDDkbFiaK8jsSDJfjId/PEGEShv6ugrt4kYsC5UIDaQ= @@ -695,6 +785,14 @@ github.com/whyrusleeping/cbor-gen v0.3.1 h1:82ioxmhEYut7LBVGhGq8xoRkXPLElVuh5mV6 github.com/whyrusleeping/cbor-gen v0.3.1/go.mod h1:pM99HXyEbSQHcosHc0iW7YFmwnscr+t9Te4ibko05so= github.com/wlynxg/anet v0.0.5 h1:J3VJGi1gvo0JwZ/P1/Yc/8p63SoW98B5dHkYDmpgvvU= github.com/wlynxg/anet v0.0.5/go.mod h1:eay5PRQr7fIVAMbTbchTnO9gG65Hg/uYGdc7mguHxoA= +github.com/xdg-go/pbkdf2 v1.0.0 h1:Su7DPu48wXMwC3bs7MCNG+z4FhcyEuz5dlvchbq0B0c= +github.com/xdg-go/pbkdf2 v1.0.0/go.mod h1:jrpuAogTd400dnrH08LKmI/xc1MbPOebTwRqcT5RDeI= +github.com/xdg-go/scram v1.1.2 h1:FHX5I5B4i4hKRVRBCFRxq1iQRej7WO3hhBuJf+UUySY= +github.com/xdg-go/scram v1.1.2/go.mod h1:RT/sEzTbU5y00aCK8UOx6R7YryM0iF1N2MOmC3kKLN4= +github.com/xdg-go/stringprep v1.0.4 h1:XLI/Ng3O1Atzq0oBs3TWm+5ZVgkq2aqdlvP9JtoZ6c8= +github.com/xdg-go/stringprep v1.0.4/go.mod h1:mPGuuIYwz7CmR2bT9j4GbQqutWS1zV24gijq1dTyGkM= +github.com/xyproto/randomstring v1.0.5 h1:YtlWPoRdgMu3NZtP45drfy1GKoojuR7hmRcnhZqKjWU= +github.com/xyproto/randomstring v1.0.5/go.mod h1:rgmS5DeNXLivK7YprL0pY+lTuhNQW3iGxZ18UQApw/E= github.com/yuin/goldmark v1.1.25/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.1.32/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= @@ -743,8 +841,8 @@ go.uber.org/fx v1.24.0 h1:wE8mruvpg2kiiL1Vqd0CC+tr0/24XIB10Iwp2lLWzkg= go.uber.org/fx v1.24.0/go.mod h1:AmDeGyS+ZARGKM4tlH4FY2Jr63VjbEDJHtqXTGP5hbo= go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= -go.uber.org/mock v0.5.0 h1:KAMbZvZPyBPWgD14IrIQ38QCyjwpvVVV6K/bHl1IwQU= -go.uber.org/mock v0.5.0/go.mod h1:ge71pBPLYDk7QIi1LupWxdAykm7KIEFchiOqd6z7qMM= +go.uber.org/mock v0.6.0 h1:hyF9dfmbgIX5EfOdasqLsWD6xqpNZlXblLB/Dbnwv3Y= +go.uber.org/mock v0.6.0/go.mod h1:KiVJ4BqZJaMj4svdfmHM0AUx4NJYO8ZNpPnZn1Z+BBU= go.uber.org/multierr v1.5.0/go.mod h1:FeouvMocqHpRaaGuG9EjoKcStLC43Zu/fmqdUMPcKYU= go.uber.org/multierr v1.6.0/go.mod h1:cdWPpRnG4AhwMwsgIHip0KRBQjJy5kYEpYjJxpXp9iU= go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= @@ -765,8 +863,8 @@ golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8U golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20210711020723-a769d52b0f97/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= -golang.org/x/crypto v0.49.0 h1:+Ng2ULVvLHnJ/ZFEq4KdcDd/cfjrrjjNSXNzxg0Y4U4= -golang.org/x/crypto v0.49.0/go.mod h1:ErX4dUh2UM+CFYiXZRTcMpEcN8b/1gxEuv3nODoYtCA= +golang.org/x/crypto v0.50.0 h1:zO47/JPrL6vsNkINmLoo/PH1gcxpls50DNogFvB5ZGI= +golang.org/x/crypto v0.50.0/go.mod h1:3muZ7vA7PBCE6xgPX7nkzzjiUq87kRItoJQM1Yo8S+Q= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8= @@ -777,8 +875,8 @@ golang.org/x/exp v0.0.0-20191227195350-da58074b4299/go.mod h1:2RIsYlXP63K8oxa1u0 golang.org/x/exp v0.0.0-20200119233911-0405dc783f0a/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4= golang.org/x/exp v0.0.0-20200207192155-f17229e696bd/go.mod h1:J/WKrq2StrnmMY6+EHIKF9dgMWnmCNThgcyBT1FY9mM= golang.org/x/exp v0.0.0-20200224162631-6cc2880d07d6/go.mod h1:3jZMyOhIsHpP37uCMkUooju7aAi5cS1Q23tOzKc+0MU= -golang.org/x/exp v0.0.0-20260218203240-3dfff04db8fa h1:Zt3DZoOFFYkKhDT3v7Lm9FDMEV06GpzjG2jrqW+QTE0= -golang.org/x/exp v0.0.0-20260218203240-3dfff04db8fa/go.mod h1:K79w1Vqn7PoiZn+TkNpx3BUWUQksGO3JcVX6qIjytmA= +golang.org/x/exp v0.0.0-20260312153236-7ab1446f8b90 h1:jiDhWWeC7jfWqR9c/uplMOqJ0sbNlNWv0UkzE0vX1MA= +golang.org/x/exp v0.0.0-20260312153236-7ab1446f8b90/go.mod h1:xE1HEv6b+1SCZ5/uscMRjUBKtIxworgEcEi+/n9NQDQ= golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js= golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= @@ -807,8 +905,8 @@ golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= golang.org/x/mod v0.9.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= -golang.org/x/mod v0.33.0 h1:tHFzIWbBifEmbwtGz65eaWyGiGZatSrT9prnU8DbVL8= -golang.org/x/mod v0.33.0/go.mod h1:swjeQEj+6r7fODbD2cqrnje9PnziFuw4bmLbBZFrQ5w= +golang.org/x/mod v0.34.0 h1:xIHgNUUnW6sYkcM5Jleh05DvLOtwc6RitGHbDk4akRI= +golang.org/x/mod v0.34.0/go.mod h1:ykgH52iCZe79kzLLMhyCUzhMci+nQj+0XkbXpNYtVjY= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20181023162649-9b4f9f5ad519/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -848,8 +946,8 @@ golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96b golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= golang.org/x/net v0.8.0/go.mod h1:QVkue5JL9kW//ek3r6jTKnTFis1tRmNAW2P1shuFdJc= -golang.org/x/net v0.52.0 h1:He/TN1l0e4mmR3QqHMT2Xab3Aj3L9qjbhRm78/6jrW0= -golang.org/x/net v0.52.0/go.mod h1:R1MAz7uMZxVMualyPXb+VaqGSa3LIaUqk0eEt3w36Sw= +golang.org/x/net v0.53.0 h1:d+qAbo5L0orcWAr0a9JweQpjXF19LMXJE8Ey7hwOdUA= +golang.org/x/net v0.53.0/go.mod h1:JvMuJH7rrdiCfbeHoo3fCQU24Lf5JJwT9W3sJFulfgs= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= @@ -914,6 +1012,7 @@ golang.org/x/sys v0.0.0-20201201145000-ef89a241ccb3/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20201204225414-ed752295db88/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210104204734-6f8348627aad/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210119212857-b64e53b001e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210220050731-9a76102bfb43/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210305230114-8fe3ee5dd75b/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210315160823-c6e025ad8005/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -926,16 +1025,17 @@ golang.org/x/sys v0.0.0-20210616094352-59db8d763f22/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.42.0 h1:omrd2nAlyT5ESRdCLYdm3+fMfNFE/+Rf4bDIQImRJeo= -golang.org/x/sys v0.42.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw= +golang.org/x/sys v0.43.0 h1:Rlag2XtaFTxp19wS8MXlJwTvoh8ArU6ezoyFsMyCTNI= +golang.org/x/sys v0.43.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= golang.org/x/term v0.6.0/go.mod h1:m6U89DPEgQRMq3DNkDClhWw02AUbt2daBVO4cn4Hv9U= -golang.org/x/term v0.41.0 h1:QCgPso/Q3RTJx2Th4bDLqML4W6iJiaXFq2/ftQF13YU= -golang.org/x/term v0.41.0/go.mod h1:3pfBgksrReYfZ5lvYM0kSO0LIkAl4Yl2bXOkKP7Ec2A= +golang.org/x/term v0.42.0 h1:UiKe+zDFmJobeJ5ggPwOshJIVt6/Ft0rcfrXZDLWAWY= +golang.org/x/term v0.42.0/go.mod h1:Dq/D+snpsbazcBG5+F9Q1n2rXV8Ma+71xEjTRufARgY= golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= @@ -946,13 +1046,13 @@ golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.8.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= -golang.org/x/text v0.35.0 h1:JOVx6vVDFokkpaq1AEptVzLTpDe9KGpj5tR4/X+ybL8= -golang.org/x/text v0.35.0/go.mod h1:khi/HExzZJ2pGnjenulevKNX1W67CUy0AsXcNubPGCA= +golang.org/x/text v0.36.0 h1:JfKh3XmcRPqZPKevfXVpI1wXPTqbkE5f7JA92a55Yxg= +golang.org/x/text v0.36.0/go.mod h1:NIdBknypM8iqVmPiuco0Dh6P5Jcdk8lJL0CUebqK164= golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= -golang.org/x/time v0.14.0 h1:MRx4UaLrDotUKUdCIqzPC48t1Y9hANFKIRpNx+Te8PI= -golang.org/x/time v0.14.0/go.mod h1:eL/Oa2bBBK0TkX57Fyni+NgnyQQN4LitPmob2Hjnqw4= +golang.org/x/time v0.15.0 h1:bbrp8t3bGUeFOx08pvsMYRTCVSMk89u4tKbNOZbp88U= +golang.org/x/time v0.15.0/go.mod h1:Y4YMaQmXwGQZoFaVFk4YpCt4FLQMYKZe9oeV/f4MSno= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= @@ -1005,13 +1105,14 @@ golang.org/x/tools v0.0.0-20201208233053-a543418bbed2/go.mod h1:emZCQorbCU4vsT4f golang.org/x/tools v0.0.0-20210105154028-b0ab187a4818/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.1.0/go.mod h1:xkSsbof2nBLbhDlRMhhhyNLN/zl3eTqcnHD5viDpcZ0= +golang.org/x/tools v0.1.1/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= golang.org/x/tools v0.1.2/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= golang.org/x/tools v0.1.5/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= golang.org/x/tools v0.7.0/go.mod h1:4pg6aUX35JBAogB10C9AtvVL+qowtN4pT3CGSQex14s= -golang.org/x/tools v0.42.0 h1:uNgphsn75Tdz5Ji2q36v/nsFSfR/9BRFvqhGBaJGd5k= -golang.org/x/tools v0.42.0/go.mod h1:Ma6lCIwGZvHK6XtgbswSoWroEkhugApmsXyrUmBhfr0= +golang.org/x/tools v0.43.0 h1:12BdW9CeB3Z+J/I/wj34VMl8X+fEXBxVR90JeMX5E7s= +golang.org/x/tools v0.43.0/go.mod h1:uHkMso649BX2cZK6+RpuIPXS3ho2hZo4FVwfoy1vIk0= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= diff --git a/internal/config/config.go b/internal/config/config.go index 66ac223..47b0a32 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -23,6 +23,7 @@ type Config struct { Storage StorageConfig `mapstructure:"storage"` Log LogConfig `mapstructure:"log"` Mailer MailerConfig `mapstructure:"mailer"` + MS3T MS3TConfig `mapstructure:"ms3t"` } type DeploymentConfig struct { @@ -146,6 +147,44 @@ type LogConfig struct { Level string `mapstructure:"level"` } +// MS3TConfig configures the embedded ms3t S3-compatible HTTP server. +// When Enabled is false, none of the rest is consulted and no S3 +// listener starts. +type MS3TConfig struct { + // Enabled toggles the S3 listener. + Enabled bool `mapstructure:"enabled"` + // Addr is the host:port to bind the S3 listener to. + Addr string `mapstructure:"addr"` + // DataDir is where ms3t persists its log segments, space key, and + // any other on-disk state. + DataDir string `mapstructure:"data_dir"` + // ChunkSize is the body chunk size used for new objects, in bytes. + // 0 → ms3t default (1 MiB). + ChunkSize int64 `mapstructure:"chunk_size"` + // SealBytes is the open-segment size at which the log seals and + // sends the segment to the background flusher. 0 → 64 MiB. + SealBytes int64 `mapstructure:"seal_bytes"` + // SealAge is the maximum time the log will keep an open segment + // before sealing it. Drives the seal cadence under low write + // volume. 0 → 5s. + SealAge string `mapstructure:"seal_age"` + // Retain is the number of most-recent sealed segments to keep on + // disk after a successful Forge flush. Older flushed segments are + // unlinked. Higher values trade disk for read locality. 0 → 6. + Retain int `mapstructure:"retain"` + + // Region is the AWS region advertised to S3 clients. Used by the + // versitygw protocol layer for sigv4 verification. + Region string `mapstructure:"region"` + // RootAccess is the access key id of the single-account IAM + // root user. Required when Enabled is true. + RootAccess string `mapstructure:"root_access"` + // RootSecret is the secret access key paired with RootAccess. + // Required when Enabled is true. Provide via env + // (SPRUE_MS3T_ROOT_SECRET) — do not commit to config files. + RootSecret string `mapstructure:"root_secret"` +} + type MailerConfig struct { // Type specifies the mailer implementation to use (e.g., "postmark", "smtp", "nop"). Type string `mapstructure:"type"` @@ -207,6 +246,16 @@ func SetDefaults(v *viper.Viper) { // Log defaults v.SetDefault("log.level", "info") + + // MS3T defaults — disabled by default; sprue is the source of + // truth for whether the S3 listener is exposed. + v.SetDefault("ms3t.enabled", false) + v.SetDefault("ms3t.addr", ":9000") + v.SetDefault("ms3t.data_dir", "./ms3t-data") + v.SetDefault("ms3t.seal_bytes", 64<<20) + v.SetDefault("ms3t.seal_age", "5s") + v.SetDefault("ms3t.retain", 6) + v.SetDefault("ms3t.region", "us-east-1") } // BindEnvVars sets up environment variable binding with SPRUE_ prefix. diff --git a/internal/fx/app.go b/internal/fx/app.go index 0629579..b49e155 100644 --- a/internal/fx/app.go +++ b/internal/fx/app.go @@ -9,6 +9,7 @@ import ( "github.com/storacha/sprue/internal/fx/store/aws" "github.com/storacha/sprue/internal/fx/store/memory" "github.com/storacha/sprue/internal/fx/store/postgres" + "github.com/storacha/sprue/pkg/ms3t" "go.uber.org/fx" ) @@ -24,6 +25,7 @@ var AppModule = func(cfg *config.Config) fx.Option { service.Module, handlers.Module, ServerModule, + ms3t.Module, } switch cfg.Storage.Type { case config.StorageTypeMemory: diff --git a/pkg/ms3t/architectural.md b/pkg/ms3t/architectural.md new file mode 100644 index 0000000..82f340f --- /dev/null +++ b/pkg/ms3t/architectural.md @@ -0,0 +1,512 @@ +# ms3t — S3 over Forge (current state) + +This document describes the implementation under `sprue/pkg/ms3t/` +as it stands today. ms3t is an embedded S3 protocol listener that +runs in-process inside sprue (or, in tests, against an in-memory +harness) and translates S3 requests into mutations of a per-bucket +Merkle Search Tree, durably journaled to a local LSM-style log and +asynchronously shipped to Forge. + +It is still a prototype: many S3 features are unimplemented (see +"Not implemented" near the end), and several knobs that future +production work will tighten are noted as TODOs in code. + +## At a glance + +- **Protocol layer** — `github.com/versity/versitygw`. We get a + near-complete S3 REST front end (sigv4, path-style addressing, + the standard verb shapes) by implementing versitygw's + `backend.Backend` interface. +- **Backend adapter** — `pkg/ms3t/s3frontend.Backend`. Wires every + served verb into ms3t's domain primitives. Anything we haven't + implemented inherits `ErrNotImplemented` from versitygw's + `backend.BackendUnsupported`. +- **Per-op transaction** — `pkg/ms3t/bucketop.Tx`. Acquires the + per-bucket lock, snapshots the bucket's published Root from the + registry, and exposes a per-tx staging buffer + CBOR-typed view + over it. On Commit it fsyncs the batch into the log and + CAS-advances the registry Root in one shot. +- **Storage tiers** — an LSM-style local log: + - *Hot* — current open segment (CAR + .ops sidecar) on local + disk. AppendBatch fsyncs both files before returning. + - *Warm* — sealed segments retained on local disk for fast reads. + - *Cold* — segments shipped off-host to Forge (piri CAR + index + claim). The layered read tier falls through to Forge on misses. +- **Persistent metadata** — Postgres, under the `ms3t` schema. + Per-bucket Root + per-segment lifecycle live in the same database. +- **Identity** — ms3t owns its own ed25519 keypair (the *space*) and + is the root UCAN authority for self-issued + `space/content/retrieve` delegations. Sprue's identity is the + audience for piri allocate/accept invocations. + +## On-disk layout + +``` +/ +├── space.key # ed25519 keypair (UCAN identity) +└── segments/ + ├── seg-NNNNNNNNNNNNNNNNNNNN.car # one CAR per segment + ├── seg-NNNNNNNNNNNNNNNNNNNN.ops # per-batch (bucket, root) records + └── seg-NNNNNNNNNNNNNNNNNNNN.idx # JSON sidecar (sealed only) +``` + +- `.car` — CAR v1 with a placeholder root in the header. Block + frames are appended via `cars.WriteBlocksAt`. Per-batch fsync. +- `.ops` — append-only sidecar of `[bucket: text, root: bytes]` + CBOR records, each prefixed by a 4-byte big-endian length. One + record per AppendBatch (one S3 op). +- `.idx` — written atomically (tmp + rename) at seal time. JSON: + `{seq, size_bytes, sha256_hex, sealed_at, blocks: [{cid, + offset, length}], op_roots: [{bucket, root}]}`. The post-crash + source of truth for sealed segments. + +## Postgres schema + +Migrations are in `pkg/ms3t/migrations/sql/`, applied via goose +against the caller-provided `*pgxpool.Pool` at startup. All ms3t +tables live under the `ms3t` schema; goose's bookkeeping is at +`ms3t.goose_db_version`, so it never collides with other migrations +on the same database. + +```sql +CREATE TABLE ms3t.buckets ( + name TEXT PRIMARY KEY, + root_cid BYTEA, -- current MST root, NULL for empty bucket + forge_root_cid BYTEA, -- last MST root whose blocks shipped to Forge + created_at BIGINT NOT NULL +); + +CREATE TABLE ms3t.segments ( + seq BIGINT PRIMARY KEY, + state TEXT CHECK (state IN ('open','sealed','flushed')), + sealed_at BIGINT, + flushed_at BIGINT, + size_bytes BIGINT DEFAULT 0, + car_sha256 BYTEA +); + +CREATE TABLE ms3t.segment_op_roots ( + seq BIGINT, + seq_within INT, + bucket TEXT NOT NULL, + root_cid BYTEA NOT NULL, + PRIMARY KEY (seq, seq_within), + FOREIGN KEY (seq) REFERENCES ms3t.segments(seq) ON DELETE CASCADE +); +CREATE INDEX ON ms3t.segment_op_roots (bucket, seq); + +CREATE SEQUENCE ms3t.segment_seq; +``` + +`forge_root_cid` is the per-bucket high-water mark of "what's +durably in Forge." When the flusher succeeds, it advances +`forge_root_cid` for every op-root the segment carried in the same +transaction that flips the segment's state to `flushed`. + +## Per-object data shape + +Each S3 object is represented by an `ObjectManifest` block whose +`Body` field describes how the bytes are framed. The Body shape is +polymorphic via the `Format` string; the only codec today is +`fixed-v1`. + +```go +type ObjectManifest struct { + Key string `cborgen:"k"` + ContentType string `cborgen:"ct"` + Created int64 `cborgen:"t"` + Body Body `cborgen:"b"` +} + +type Body struct { + Size int64 `cborgen:"s"` + SHA256 []byte `cborgen:"h"` // hex of this is the ETag we serve today + Content cid.Cid `cborgen:"c"` // points at format-specific DAG root + Format string `cborgen:"f"` +} + +const FormatFixed = "fixed-v1" + +type FixedChunkerIndex struct { + ChunkSize int64 `cborgen:"cs"` + Chunks []cid.Cid `cborgen:"c"` +} +``` + +The `BodyCodec` interface (`pkg/ms3t/bucket/chunker.go`) is the +seam: + +```go +type BodyWriter interface { + Chunk(ctx context.Context, w blockstore.WriteStore, r io.Reader) (Body, error) +} +type BodyReader interface { + Format() string + Open(ctx context.Context, bs blockstore.ReadStore, body Body) io.ReadCloser + OpenRange(ctx context.Context, bs blockstore.ReadStore, body Body, start, end int64) io.ReadCloser +} +type BodyCodec interface { BodyWriter; BodyReader } +``` + +`FixedChunker` reads the body in `ChunkSize`-byte (default 1 MiB) +segments, writes each as a raw IPLD block, and finishes with a +`FixedChunkerIndex` CBOR block listing the chunk CIDs in order. +`Body.Content` points at the index. Reads lazily fetch the index on +first call and stream chunks; ranged reads translate the absolute +range into `(chunkIndex, in-chunk-offset)` and skip ahead. + +Adding a new codec is a new `BodyCodec` implementation plus a new +`Format` constant; the Body / Manifest shape stays stable. + +## Bucket as MST + +The bucket is a Merkle Search Tree (forked from the atproto MST in +`pkg/ms3t/mst/`, with relaxed key validation) keyed by S3 object +key. Each leaf points at an ObjectManifest CID. The bucket's +"current state" is a single MST root CID held at +`ms3t.buckets.root_cid`. + +Public MST methods used by the backend: `Add`, `Update`, `Delete`, +`Get`, `GetPointer`, `WalkLeavesFromNocache`. The MST is +content-addressed all the way down — every mutation produces a new +root CID. Mutated nodes are written through the staging buffer +(which feeds the log on Commit) via `tx.Put`/`tx.PutBlock`. + +## Storage tiers (LSM) + +``` + ┌────────────────────────────────────────────┐ + │ HOT open segment │ + │ AppendBatch fsyncs CAR + .ops sidecar│ ◀─┐ + │ before returning │ │ + └──────────────┬─────────────────────────────┘ │ + │ │ + seal-on-bytes / seal-on-age │ reads + │ │ fall + ┌──────────────▼─────────────────────────────┐ │ through + │ WARM sealed segments on local disk │ │ here + │ .idx sidecar persisted │ │ + │ (atomic tmp+rename) │ ◀─┤ + │ MarkSegmentSealed in Postgres │ │ + └──────────────┬─────────────────────────────┘ │ + │ │ + Flush callback │ + │ │ + ┌──────────────▼─────────────────────────────┐ │ + │ COLD shipped to Forge (piri + indexer) │ │ + │ per-bucket forge_root_cid advanced │ ◀─┤ + │ retention sweeps after cfg.Retain │ │ + └──────────────┬─────────────────────────────┘ │ + │ │ + network reads │ + ▼ ▼ + ┌──────────────────┐ ┌──────────────────┐ + │ blockstore.Forge │ │ Layered.GetBlock │ + │ indexer + piri │ │ open → sealed → │ + │ ranged GETs │ │ Forge fall-through│ + └──────────────────┘ └──────────────────┘ +``` + +The read path (`blockstore.Layered`): + +1. Open segment's in-memory index (CIDs from blocks just appended). +2. Sealed segments on local disk, newest-first by seq. +3. Forge — only reached on local miss. `blockstore.Forge` queries + the indexer for the block's `(CAR multihash, offset, length)`, + self-issues a scoped retrieval delegation, and does a ranged + GET against piri. + +The write path (per S3 op): + +1. `bucketop.Coordinator.Begin(bucket)` — clones the bucket name + (defends against fiber's recycled request buffer), acquires the + per-bucket lock, snapshots the bucket's State from the registry. +2. `BodyCodec.Chunk(ctx, tx, body)` — writes body chunks + + FixedChunkerIndex through `tx.PutBlock`/`tx.Put` (which buffer + in `OpStaging`). +3. `tx.Put(manifest)` — writes the ObjectManifest block. +4. `t.Add(key, mfCid)` (or Update / Delete) → `t.GetPointer(tx)` — + serializes the new MST nodes through the same staging buffer, + returns the new root CID. +5. `tx.Commit(newRoot)`: + - `staging.Commit` calls `log.AppendBatch(blocks, OpRoot{bucket, + root})`. Segment.append fsyncs CAR + .ops before returning. + - `reg.CASRoot(bucket, expect, next)` advances the bucket Root + in Postgres. + - Releases the per-bucket lock. +6. Return 200 to the client. + +The flush path (background goroutine in `logstore.Store`): + +1. Pick a sealed segment off the queue. +2. Build a `uploader.CARSource` from segment metadata + (`{Path, Size, SHA256, Positions}` — every field already on the + segment, no rescan). +3. `uploader.Forge.SubmitCAR`: + - Allocate + HTTP PUT (streaming straight from `CARSource.Path`) + + Accept the data CAR via a piri selected by routing. + - Build a `ShardedDagIndexView` from `CARSource.Positions`, + archive it, allocate + PUT + Accept the index blob. + - Self-issue a `space/content/retrieve` delegation scoped to the + index blob. + - Publish the index claim against the indexing-service. +4. `meta.MarkSegmentFlushed(seq, flushedAt, opRoots)` in one + Postgres transaction — flips state to `flushed`, writes + `flushed_at`, advances `forge_root_cid` for every op-root the + segment carried. +5. Retention: if there are more than `Retain` flushed segments on + disk, retire the oldest (close fds, unlink files, delete the + Postgres row). + +The default seal triggers (set in `pkg/ms3t/logstore/config.go`) +are 64 MiB or 5s; both can be overridden via `ServerConfig`. + +## Module map + +``` +pkg/ms3t/ +├── server.go — Server, ServerConfig, ServerDeps, New, newFlushFunc +├── module.go — fx Module + registerLifecycle (production wiring) +├── util.go — LoadOrCreateSigner (space.key) +│ +├── s3frontend/ — versitygw backend.Backend implementation +│ ├── backend.go — Backend, Recover (no-op), Drain (Coordinator.Close) +│ ├── bucket.go — bucket-level handlers + ACL/policy/lock/versioning stubs +│ └── object.go — object-level handlers + listWalk + lookupManifest +│ +├── bucketop/ — per-bucket write transaction primitive +│ └── bucketop.go — Coordinator, Tx, WithTx, WithLock, MutateFn +│ +├── blockstore/ — read/write contracts + impls + Log seam +│ ├── store.go — Reader, Writer, Store, BlockReader/Writer, etc. +│ ├── log.go — Log interface, OpRoot, BlockLoc +│ ├── staging.go — OpStaging (per-op buffer) +│ ├── layered.go — Layered (composite read tier) +│ └── forge.go — Forge (network base reader; no writes) +│ +├── logstore/ — LSM-style segment-based log +│ ├── store.go — Store, Open, AppendBatch, Get, Close +│ ├── segment.go — Segment lifecycle + on-disk format +│ ├── recovery.go — startup reconciliation +│ ├── config.go — Config (Dir/SealBytes/SealAge/Retain/Flush/Meta) +│ └── types.go — Meta interface, SegmentMeta, State +│ +├── uploader/ — ship sealed segment to Forge +│ └── forge.go — Uploader interface, CARSource, Forge.SubmitCAR +│ +├── registry/ — Postgres-backed bucket and segment metadata +│ ├── registry.go — Registry interface + State +│ ├── postgres.go — Postgres bucket methods +│ └── segments.go — Postgres methods satisfying logstore.Meta +│ +├── bucket/ — per-object data model + body codec +│ ├── manifest.go — ObjectManifest, Body, FormatFixed, FixedChunkerIndex +│ ├── chunker.go — BodyWriter / BodyReader / BodyCodec / FixedChunker +│ └── cbor_gen.go — generated by gen/ +│ +├── mst/ — atproto fork (relaxed key validation) +│ +├── cars/ — CAR encoding / scanning helpers +│ ├── encoder.go +│ └── reader.go +│ +├── migrations/ — goose-applied SQL embed +│ └── sql/{00001_init,00002_segments}.sql +│ +├── testing/ — smoke harness + curated suite tests +│ ├── harness.go — StartHarness + in-memory deps fakes +│ ├── integration.go — Run/RunT, upstream Suite values +│ ├── smoke_test.go — TestSmoke_* / TestSmokeXFail_* tables +│ ├── harness_test.go — TestHarnessLifecycle +│ └── listbuckets_test.go — TestListBucketsNamesStable (regression) +│ +└── gen/ — cborgen for bucket/cbor_gen.go +``` + +## Interfaces and seams + +| Contract | Production impl | Test impl | +|---|---|---| +| `versitygw/backend.Backend` | `s3frontend.Backend` | (same; harness boots the full server) | +| `blockstore.Log` | `logstore.Store` | (same) | +| `blockstore.BlockReader` | `blockstore.Forge` | `testing.nopBaseReader` | +| `registry.Registry` + `logstore.Meta` | `*registry.Postgres` (one struct, both interfaces) | `testing.memStore` (one struct, both) | +| `uploader.Uploader` | `uploader.Forge` | `testing.nopUploader` | +| `bucket.BodyCodec` | `*bucket.FixedChunker` | (same) | + +`s3frontend.Backend` is constructed with `(reg, rs, log, codec)` — +note that the read seam is a `blockstore.ReadStore` (no Put method), +so write paths can't accidentally route through it. Writes go via +`bucketop.Tx` which exposes the staging buffer behind the same +`Reader`/`Writer`/`BlockReader`/`BlockWriter` interfaces. + +## Lifecycle: Server.New → Start → Stop + +`pkg/ms3t/server.go::New(ctx, cfg, deps)`: + +1. Validate inputs (`Addr`, `DataDir`, `RootAccess/RootSecret`, all + `ServerDeps` fields present). +2. Apply defaults (`Region` → `us-east-1`, `ChunkSize` → + `bucket.DefaultChunkSize` = 1 MiB, `MaxConnections` / + `MaxRequests` → 4096). +3. Build a `logstore.FlushFunc` closure capturing the uploader + + meta — this is what runs per sealed segment off the flush + goroutine. +4. `logstore.Open(...)` — runs recovery (see next section), starts + the flush + seal-ticker goroutines. +5. Construct `blockstore.NewLayered(log, deps.BaseBlockReader)`. +6. Construct `s3frontend.New(deps.Registry, layered, log, codec)`. +7. Build the versitygw `s3api.S3ApiServer` with single-account IAM, + no audit/event sinks, generous concurrency limits. + +`Start`: calls `Backend.Recover` (a no-op today; the LSM already +recovered in `logstore.Open`) and spawns the listener goroutine +(`s3api.ServeMultiPort`). + +`Stop`: shuts the listener down and calls `Backend.Drain`, which +calls `Coordinator.Close` → `Log.Close` (force-seal the open +segment, drain the flush queue). Returns the joined error of both +steps. + +## Recovery on startup + +`logstore.Open` runs full reconciliation between disk and Postgres +before accepting writes: + +1. Scan `/segments/` for `.car` files. +2. Query `Meta.ListUnflushedSegments()` for open/sealed rows. +3. Reconcile by `seq`: + - **File + DB open** → rebuild as open via `cars.ScanFile` + + `readAllOps`. Force-seal at startup; we never resume an open + segment from a previous process. + - **File + DB sealed** → load from `.idx`, re-enqueue for flush. + - **File + .idx, no DB row** → rehydrate the DB row (the .idx is + authoritative for sealed state), keep for retention. + - **File only (no .idx, orphan from a torn seal)** → rebuild as + open, seed DB, force-seal. + - **DB row, no file** → log error, delete the DB row. +4. Sealed segments are placed at the head of the read fall-through + list (newest-first by seq) so reads find recent writes first. + +## Identity / Forge wiring + +ms3t generates and persists its own ed25519 keypair on first run at +`/space.key`. That keypair is the **space**: a `did:key` +whose ms3t is the root UCAN authority over. + +| Identity | Used for | +|---|---| +| ms3t's space signer | self-issuing `space/content/retrieve` delegations (read path, indexer claim publication, piri retrievals) | +| sprue's identity | piri allocate/accept invocations, audience of ms3t's self-issued retrieval delegations | + +Sprue is the audience for those delegations because `uploader.Forge` +talks to piri *as sprue*. ms3t-as-space-root keeps zero-out-of-band +provisioning at the cost of a not-very-multi-tenant story; that's a +tradeoff to revisit if/when ms3t serves more than one customer. + +## Testing surface + +- **`pkg/ms3t/testing/harness.go`** — `StartHarness(ctx, opts...)` + boots a real `*ms3t.Server` on a random `127.0.0.1` port with + in-memory deps (`memStore` for Registry+Meta, `nopBaseReader` for + the Layered base, `nopUploader` so flush is a no-op). Options: + `WithLogger`, `WithRegion`, `WithCredentials`, `WithChunkSize`, + `WithSealConfig`, `WithReadyTimeout`. Each call gets its own + scratch tempdir; cleanup is registered against the test's `t`. +- **`pkg/ms3t/testing/integration.go`** — wraps versitygw's upstream + `tests/integration` package. `Run(ctx, c, suite) Result` snapshots + versitygw's package-level pass/fail counters before/after and + returns the delta; `RunT(t, c, suite) Result` drives `Run` and + reports failures via `t.Errorf`. Curated `Suite` constants: + `Smoke`, `CRUD`, `Multipart`, `Tagging`, `ObjectLock`, + `Versioning`, `Auth`, `Full`. +- **`pkg/ms3t/testing/smoke_test.go`** — one top-level `Test` per S3 + group (`TestSmoke_CreateBucket`, `TestSmoke_PutObject`, …) plus + matching `TestSmokeXFail_*` for cases ms3t fails today. Each test + is a table-driven Go test (so GoLand renders one play-icon per + row). XFail tests treat per-case failures as `t.Skip` and only + fail if a case unexpectedly passes — that's the cue to promote + the row to the matching `TestSmoke_*`. + +Today: **66 cases** pass via `TestSmoke_*`, **53 cases** are tracked +as known-failing via `TestSmokeXFail_*` (total 119, matching the +upstream Smoke set). + +## Not implemented + +- **Multipart upload.** Per project decision, multipart in-flight + state will live in service-side storage, NOT folded into the MST. + Out of scope today. +- **ACLs, bucket policy, object lock, versioning, tagging.** The + always-called middleware methods (`GetBucketAcl`, + `GetBucketPolicy`, `GetObjectLockConfiguration`, + `GetBucketVersioning`) return polite empty / "not configured" + responses so PUT/GET don't trip on `ErrNotImplemented`. The + full surface is unimplemented. +- **Standard ETag.** S3 uses `md5(body)` hex for single-part PUTs + (and a different format for multipart). ms3t currently returns + `sha256(body)` hex. Adding md5 tracking to the `Body` record is + the agreed fix; tracked under the `PutObject_success` smoke case. +- **User metadata round-trip** (`x-amz-meta-*`, + `Content-Disposition`, etc.). `ObjectManifest` doesn't carry a + user-metadata map yet. Tracked under `PutObject_with_metadata` / + `HeadObject_success` smoke cases. +- **Range support on HeadObject.** GetObject honors `Range`; + HeadObject doesn't. +- **Conditional reads/writes** (`If-Match`, `If-None-Match`). +- **Server-side checksum surface** (CRC64NVME, etc.). The body's + sha256 is computed, but we don't surface checksum response + headers in `x-amz-checksum-*` form, and we don't validate + client-supplied checksums. +- **GC of unreferenced bodies.** `forge_root_cid` is a high-water + mark — anything reachable from `root_cid` but not from + `forge_root_cid` is "in flight" — but there's no expiry path to + Forge yet, so storage grows monotonically. +- **Multi-tenancy.** ms3t is the space owner; one instance ↔ one + space. +- **Multi-instance / HA.** The per-bucket lock is in-process. A + multi-writer story would need cross-process coordination. + +## Known TODOs in code + +- `pkg/ms3t/blockstore/staging.go` — `OpStaging` buffers an entire + S3 op's blocks in memory until Commit. For multi-GB PUTs this + bounds peak memory at ≈ payload size. A file-backed alternative + (CAR-shaped temp file + `cid → (offset, length)` index) would + cap the per-tx footprint at one chunk + index. The interface is + unchanged; only the storage backend would swap. +- `pkg/ms3t/registry/segments.go` — orphan `forge_root_cid` if + `staging.Commit` succeeds but `reg.CASRoot` fails afterwards. + Proposed fix: conditional `UPDATE … AND root_cid = $newRoot` in + the per-op-root advance, so flush only advances `forge_root_cid` + for buckets whose Root we actually recorded. + +## Reading the code + +If you're new and want to follow a request through: + +- **PUT**: `s3frontend.Backend.PutObject` (object.go) → + `bucketop.Coordinator.WithTx` (bucketop.go) → + `bucket.FixedChunker.Chunk` (chunker.go) → + `mst.MerkleSearchTree.Add` + `GetPointer` → `Tx.Commit` → + `OpStaging.Commit` → `logstore.Store.AppendBatch` → + `Segment.append` (fsyncs) → `registry.Postgres.CASRoot` → 200 OK. +- **GET**: `s3frontend.Backend.GetObject` (object.go) → + `lookupManifest` (registry → MST.Get over Layered → manifest + decode) → `FixedChunker.Open[Range]` over Layered → stream to + client. Every miss past the open segment falls through to sealed + segments and finally to `blockstore.Forge` (indexer + piri). +- **Flush**: `logstore.Store.flushLoop` → `cfg.Flush` (which is + `newFlushFunc` from `server.go`) → builds `CARSource` from + `Segment.{CARPath, Size, SHA256, BlockPositions}` → + `uploader.Forge.SubmitCAR` (allocate + PUT + accept + index + + claim) → `meta.MarkSegmentFlushed`. +- **Recovery**: `logstore.recovery.go` reconciles + `/segments/` against `Meta.ListUnflushedSegments`. +- **Where ms3t plugs into sprue**: `pkg/ms3t/module.go::Module` is + the only fx-aware file. `registerLifecycle` builds the + production-only collaborators (Forge reader, Postgres registry, + Forge uploader, space signer, migrations) and hands them to + `New` from `server.go`. +- **The MST itself**: `pkg/ms3t/mst/`. Standalone fork of the + atproto MST with relaxed key validation; no other ms3t deps. diff --git a/pkg/ms3t/blockstore/forge.go b/pkg/ms3t/blockstore/forge.go new file mode 100644 index 0000000..c370520 --- /dev/null +++ b/pkg/ms3t/blockstore/forge.go @@ -0,0 +1,283 @@ +package blockstore + +import ( + "context" + "errors" + "fmt" + "io" + "net/http" + "net/url" + "time" + + block "github.com/ipfs/go-block-format" + "github.com/ipfs/go-cid" + "github.com/storacha/go-libstoracha/capabilities/assert" + contentcap "github.com/storacha/go-libstoracha/capabilities/space/content" + captypes "github.com/storacha/go-libstoracha/capabilities/types" + "github.com/storacha/go-libstoracha/failure" + rclient "github.com/storacha/go-ucanto/client/retrieval" + "github.com/storacha/go-ucanto/core/dag/blockstore" + "github.com/storacha/go-ucanto/core/delegation" + "github.com/storacha/go-ucanto/core/receipt" + "github.com/storacha/go-ucanto/core/result" + "github.com/storacha/go-ucanto/did" + "github.com/storacha/go-ucanto/principal" + "github.com/storacha/go-ucanto/ucan" + "github.com/storacha/guppy/pkg/client/locator" + indexclient "github.com/storacha/indexing-service/pkg/client" + "go.uber.org/zap" +) + +// ErrNotFound is returned by Get when the indexing-service has no +// location commitment for the requested CID. +var ErrNotFound = errors.New("blockstore: not found") + +// Forge is a read-only IpldBlockstore that resolves CIDs through the +// Storacha indexing-service and fetches the underlying bytes via +// authorized UCAN-wrapped GETs against piri storage nodes. +// +// Used in ms3t's "no_cache" mode: every Get goes to the network. There +// is no in-process block cache; the only caching is the small +// metadata cache inside the IndexLocator (digest → location +// commitment), which exists per-Forge instance and resets on process +// restart. Block bytes always traverse the network. +// +// Put is a no-op so this type can be passed as the underlying for +// CARBuffer (whose Commit calls Put for each freshly-Submitted +// block). +type Forge struct { + locator locator.Locator + signer principal.Signer + spaceSigner principal.Signer + spaces []did.DID + logger *zap.Logger +} + +var _ BlockReader = (*Forge)(nil) + +// ForgeConfig wires sprue's existing services into a read-only Forge +// blockstore. +type ForgeConfig struct { + // IndexerEndpoint is the indexing-service URL (cfg.Indexer.Endpoint). + IndexerEndpoint string + // IndexerDID is the indexing-service principal (cfg.Indexer.DID). + IndexerDID string + // Spaces scopes the locator queries; for ms3t this is the single + // space ms3t owns. + Spaces []did.DID + // Signer is sprue's upload-service identity. Used as the issuer of + // `space/content/retrieve` invocations against piri. + Signer principal.Signer + // SpaceSigner is the keypair of the space ms3t owns. Used to + // self-issue space/content/retrieve delegations. The chain is + // space → sprue → piri (with the sprue→piri hop being the actual + // retrieve invocation that piri authorizes). + SpaceSigner principal.Signer + // HTTPClient is used for the underlying indexer queries. piri + // retrievals use go-ucanto's retrieval client which manages its + // own HTTP. Optional; defaults to http.DefaultClient. + HTTPClient *http.Client + // Logger is optional. + Logger *zap.Logger +} + +// NewForge constructs a Forge blockstore. Builds an indexing-service +// client and wraps it with guppy's IndexLocator. +func NewForge(cfg ForgeConfig) (*Forge, error) { + if cfg.IndexerEndpoint == "" { + return nil, errors.New("forge blockstore: indexer endpoint is required") + } + if cfg.IndexerDID == "" { + return nil, errors.New("forge blockstore: indexer DID is required") + } + if len(cfg.Spaces) == 0 { + return nil, errors.New("forge blockstore: at least one space is required") + } + if cfg.Signer == nil { + return nil, errors.New("forge blockstore: signer is required") + } + if cfg.SpaceSigner == nil { + return nil, errors.New("forge blockstore: space signer is required") + } + + endpointURL, err := url.Parse(cfg.IndexerEndpoint) + if err != nil { + return nil, fmt.Errorf("forge blockstore: parse indexer endpoint: %w", err) + } + indexerDID, err := did.Parse(cfg.IndexerDID) + if err != nil { + return nil, fmt.Errorf("forge blockstore: parse indexer DID: %w", err) + } + + httpc := cfg.HTTPClient + if httpc == nil { + httpc = http.DefaultClient + } + logger := cfg.Logger + if logger == nil { + logger = zap.NewNop() + } + + idxClient, err := indexclient.New(indexerDID, *endpointURL, indexclient.WithHTTPClient(httpc)) + if err != nil { + return nil, fmt.Errorf("forge blockstore: build indexing-service client: %w", err) + } + + authFn := newAuthorizeRetrieval(cfg.SpaceSigner, indexerDID) + loc := locator.NewIndexLocator(idxClient, authFn) + + return &Forge{ + locator: loc, + signer: cfg.Signer, + spaceSigner: cfg.SpaceSigner, + spaces: cfg.Spaces, + logger: logger, + }, nil +} + +// GetBlock resolves the CID through the indexer and retrieves the +// underlying byte slice from piri via a UCAN-authorized +// `space/content/retrieve` invocation. The request is scoped to +// the inner block's offset/length within the containing CAR shard. +func (f *Forge) GetBlock(ctx context.Context, c cid.Cid) (block.Block, error) { + locations, err := f.locator.Locate(ctx, f.spaces, c.Hash()) + if err != nil { + var nf locator.NotFoundError + if errors.As(err, &nf) { + return nil, ErrNotFound + } + return nil, fmt.Errorf("forge: locate %s: %w", c, err) + } + if len(locations) == 0 { + return nil, ErrNotFound + } + + loc := locations[0] + caveats, rerr := assert.LocationCaveatsReader.Read(loc.Commitment.Nb()) + if rerr != nil { + return nil, fmt.Errorf("forge: read location caveats for %s: %w", c, rerr) + } + if len(caveats.Location) == 0 { + return nil, fmt.Errorf("forge: empty location URL set for %s", c) + } + target := caveats.Location[0] + + // space scopes the retrieve capability. Fall back to our + // configured space if the commitment is the legacy form without + // a Space field. + space := caveats.Space + if space == (did.DID{}) { + space = f.spaces[0] + } + + // audience for the retrieve invocation is the storage provider + // that issued the commitment. + storageProvider, err := did.Parse(loc.Commitment.With()) + if err != nil { + return nil, fmt.Errorf("forge: parse storage provider DID: %w", err) + } + + // Self-issued retrieval proof: space → sprue. Per-call to keep + // the chain short-lived. + retrievalProof, err := delegation.Delegate( + f.spaceSigner, + f.signer, + []ucan.Capability[ucan.NoCaveats]{ + ucan.NewCapability(contentcap.Retrieve.Can(), space.String(), ucan.NoCaveats{}), + }, + delegation.WithExpiration(int(time.Now().Add(60*time.Second).Unix())), + ) + if err != nil { + return nil, fmt.Errorf("forge: build retrieval proof: %w", err) + } + + rangeStart := loc.Position.Offset + rangeEnd := rangeStart + loc.Position.Length - 1 + + inv, err := contentcap.Retrieve.Invoke( + f.signer, // issuer = sprue + storageProvider, // audience = piri + space.String(), // with = space + contentcap.RetrieveCaveats{ + Blob: contentcap.BlobDigest{Digest: caveats.Content.Hash()}, + Range: contentcap.Range{Start: rangeStart, End: rangeEnd}, + }, + delegation.WithProof(delegation.FromDelegation(retrievalProof)), + ) + if err != nil { + return nil, fmt.Errorf("forge: build retrieve invocation: %w", err) + } + + conn, err := rclient.NewConnection(storageProvider, &target) + if err != nil { + return nil, fmt.Errorf("forge: build retrieval connection: %w", err) + } + + xres, hres, err := rclient.Execute(ctx, inv, conn) + if err != nil { + return nil, fmt.Errorf("forge: execute retrieve for %s: %w", c, err) + } + + rcptLink, ok := xres.Get(inv.Link()) + if !ok { + return nil, fmt.Errorf("forge: no receipt for retrieve of %s", c) + } + bs, err := blockstore.NewBlockReader(blockstore.WithBlocksIterator(xres.Blocks())) + if err != nil { + return nil, fmt.Errorf("forge: build block reader: %w", err) + } + anyRcpt, err := receipt.NewAnyReceipt(rcptLink, bs) + if err != nil { + return nil, fmt.Errorf("forge: build receipt: %w", err) + } + rcpt, err := receipt.Rebind[contentcap.RetrieveOk, failure.FailureModel]( + anyRcpt, contentcap.RetrieveOkType(), failure.FailureType(), captypes.Converters..., + ) + if err != nil { + return nil, fmt.Errorf("forge: rebind receipt: %w", err) + } + if _, err := result.Unwrap(result.MapError(rcpt.Out(), failure.FromFailureModel)); err != nil { + return nil, fmt.Errorf("forge: retrieve %s: %w", c, err) + } + + body, err := io.ReadAll(hres.Body()) + if err != nil { + return nil, fmt.Errorf("forge: read retrieve body for %s: %w", c, err) + } + if uint64(len(body)) != loc.Position.Length { + return nil, fmt.Errorf("forge: %s short read: got %d bytes, want %d", + c, len(body), loc.Position.Length) + } + + return block.NewBlockWithCid(body, c) +} + +// newAuthorizeRetrieval returns the AuthorizeRetrievalFunc the +// IndexLocator calls before each indexer query. The space signer +// (root authority) directly authorizes the indexer to fetch any +// blob in the space. NoCaveats means "no specific blob digest +// constraint" — the indexer pulls whichever index blob it needs +// to satisfy the lookup. +// +// Mirrors the pattern in +// github.com/storacha/guppy/cmd/retrieve.go::94. Difference: ms3t's +// "user" is itself, so the proof chain is one hop (space → indexer) +// rather than the typical (user → upload service → indexer). +func newAuthorizeRetrieval(spaceSigner principal.Signer, indexerDID did.DID) locator.AuthorizeRetrievalFunc { + return func(spaces []did.DID) (delegation.Delegation, error) { + caps := make([]ucan.Capability[ucan.NoCaveats], 0, len(spaces)) + for _, space := range spaces { + caps = append(caps, ucan.NewCapability( + contentcap.Retrieve.Can(), + space.String(), + ucan.NoCaveats{}, + )) + } + return delegation.Delegate( + spaceSigner, + indexerDID, + caps, + delegation.WithExpiration(int(time.Now().Add(60*time.Second).Unix())), + ) + } +} diff --git a/pkg/ms3t/blockstore/layered.go b/pkg/ms3t/blockstore/layered.go new file mode 100644 index 0000000..e59955e --- /dev/null +++ b/pkg/ms3t/blockstore/layered.go @@ -0,0 +1,81 @@ +package blockstore + +import ( + "context" + "errors" + + block "github.com/ipfs/go-block-format" + "github.com/ipfs/go-cid" +) + +// Layered is the production ReadStore: a read-only seam that +// consults a small in-memory cache first, then the local LSM log, +// then a base blockstore (typically *Forge — indexing-service + +// piri). +// +// It exposes both halves of ReadStore from a single underlying +// traversal: +// +// - GetBlock returns raw blocks (body chunks). +// - Get fetches the same blocks and CBOR-decodes them (manifests, +// MST nodes), via an internal Store wrapped around our own +// GetBlock so the cache + log → base ordering is preserved. +// +// Layered has no Put: real writes flow through bucketop.Tx → +// OpStaging → Log.AppendBatch. +type Layered struct { + log Log + base BlockReader + + // cstSelf is a CBOR view backed by Layered's own GetBlock. The + // adapter exposes Layered as a BaseStore so CborStore can wrap + // it; the CBOR decoder's block fetches come back through + // GetBlock and reuse the cache + fallthrough. + cstSelf Store +} + +// NewLayered wires a log store in front of a base blockstore. +func NewLayered(log Log, base BlockReader) *Layered { + l := &Layered{log: log, base: base} + l.cstSelf = CborStore(layeredAsBlockstore{l}) + return l +} + +// Get fetches a CBOR-encoded value at c and decodes it into out. +// Same read order as GetBlock (cache → log → base) — the decoder +// fetches via GetBlock under the hood. +func (l *Layered) Get(ctx context.Context, c cid.Cid, out any) error { + return l.cstSelf.Get(ctx, c, out) +} + +// GetBlock fetches a raw block: cache → log → base. +func (l *Layered) GetBlock(ctx context.Context, c cid.Cid) (blk block.Block, retErr error) { + if l.log != nil { + b, err := l.log.Get(ctx, c) + if err == nil { + return b, nil + } + if !errors.Is(err, ErrNotFound) { + return nil, err + } + } + return l.base.GetBlock(ctx, c) +} + +// layeredAsBlockstore lifts Layered into a BaseStore for the +// CborStore wrapper. Internal-only — exists so the CBOR decoder +// reuses Layered's cache + fallthrough order rather than going +// around them. +type layeredAsBlockstore struct{ inner *Layered } + +func (a layeredAsBlockstore) Get(ctx context.Context, c cid.Cid) (block.Block, error) { + return a.inner.GetBlock(ctx, c) +} + +// Put is unused: Layered is read-only, but BaseStore (= cbor +// IpldBlockstore) requires it. The CBOR codec only ever invokes +// Get on this adapter, so this stays a no-op. +func (a layeredAsBlockstore) Put(_ context.Context, _ block.Block) error { return nil } + +// Compile-time assertion: Layered is the production ReadStore. +var _ ReadStore = (*Layered)(nil) diff --git a/pkg/ms3t/blockstore/log.go b/pkg/ms3t/blockstore/log.go new file mode 100644 index 0000000..b2f5c60 --- /dev/null +++ b/pkg/ms3t/blockstore/log.go @@ -0,0 +1,54 @@ +package blockstore + +import ( + "context" + + block "github.com/ipfs/go-block-format" + "github.com/ipfs/go-cid" +) + +// OpRoot ties a single batch of block writes to the bucket Root +// they collectively materialize. Every AppendBatch on a Log +// records exactly one OpRoot; the flush pipeline replays these +// to advance per-bucket forge_root_cid as segments ship. +type OpRoot struct { + // Bucket is the bucket whose Root this batch advances. + Bucket string + // Root is the new MST root the batch produces. + Root cid.Cid +} + +// BlockLoc points at a block's payload bytes inside a CAR file — +// the byte offset of the frame and the frame length. Logs +// populate one entry per block at append time; consumers (most +// notably the flush path that builds a ShardedDagIndexView) read +// the entries to avoid rescanning the file. +type BlockLoc struct { + Offset uint64 + Length uint64 +} + +// Log is the journaling tier — an append-only block store with +// three levels of durability: +// +// - Hot: the open segment. AppendBatch fsyncs the batch into +// the segment's CAR + ops sidecar before returning, so a +// successful AppendBatch is durable on local disk before any +// acked write becomes visible to clients. +// - Warm: sealed segments retained on local disk. Reads hit +// them via Get newest-first; Append never touches them. +// - Cold: segments flushed off-host (to Forge in production). +// Out of scope for the Log interface — the implementation +// manages the flush pipeline outside this contract. +// +// Get is the seam Layered uses to consult the journal before +// falling through to the network base — it returns ErrNotFound +// when no local segment holds the requested CID. Close drains +// the flush pipeline at process shutdown. +// +// Implemented by *logstore.Store. +type Log interface { + AppendBatch(ctx context.Context, blocks []block.Block, opRoot OpRoot) error + Get(ctx context.Context, c cid.Cid) (block.Block, error) + Close(ctx context.Context) error +} diff --git a/pkg/ms3t/blockstore/staging.go b/pkg/ms3t/blockstore/staging.go new file mode 100644 index 0000000..7c350c9 --- /dev/null +++ b/pkg/ms3t/blockstore/staging.go @@ -0,0 +1,137 @@ +package blockstore + +import ( + "context" + "errors" + "fmt" + "sync" + + block "github.com/ipfs/go-block-format" + "github.com/ipfs/go-cid" +) + +// OpStaging is a per-S3-op IpldBlockstore that captures every Put — +// body chunks, MST nodes, ObjectManifests — in memory. On Commit it +// hands the entire ordered batch to the log store in one +// fsynced AppendBatch call, after which the new bucket Root may be +// safely advanced via the registry CAS. +// +// Reads check the in-memory buffer first and fall through to the +// underlying read store on miss. This lets MST.GetPointer recompute +// path Put a node and immediately re-Read it during the same op. +// +// Single-shot per session: create at the start of an S3 op, Put any +// number of blocks, then call Commit(root) on success or Discard on +// failure. Failed ops never touch the log because nothing is written +// until Commit. +// +// TODO(perf): the in-memory `blocks` map bounds the transaction's +// memory footprint at the size of the entire payload until Commit. +// For a multi-GB PutObject this means the full body — every chunk, +// every MST node, the manifest — sits in process memory until the +// log accepts the batch. +// +// An alternative OpStaging implementation could spool to a temp +// file (CAR-shaped, with an in-memory cid → (offset, length) index +// for read-your-writes) instead of an unbounded map, capping the +// per-transaction footprint to roughly one chunk plus the index. +// The interface (Get / Put / Commit / Discard) does not need to +// change — only the storage backend behind these methods. +// +// Discard would unlink the temp file; Commit could hand the file +// off to Log.AppendBatch (or a future SubmitCAR-style entry point +// that takes the path directly) to avoid materializing the batch +// as a Go slice in the hot path. +type OpStaging struct { + underlying ReadStore + log Log + bucket string + + mu sync.RWMutex + // blocks holds every Put for the lifetime of the transaction. + // See the TODO(perf) on OpStaging — this is the field a + // file-backed implementation would replace. + blocks map[string]block.Block // keyed by string(cid.Bytes()) + order []cid.Cid +} + +// NewOpStaging constructs a per-op staging buffer. underlying is the +// read fallback (typically *Layered); log is the durable write +// target; bucket is the bucket whose root this op will advance. +func NewOpStaging(underlying ReadStore, log Log, bucket string) *OpStaging { + return &OpStaging{ + underlying: underlying, + log: log, + bucket: bucket, + blocks: map[string]block.Block{}, + } +} + +func (b *OpStaging) Get(ctx context.Context, c cid.Cid) (block.Block, error) { + b.mu.RLock() + blk, ok := b.blocks[string(c.Bytes())] + b.mu.RUnlock() + if ok { + return blk, nil + } + return b.underlying.GetBlock(ctx, c) +} + +func (b *OpStaging) Put(_ context.Context, blk block.Block) error { + b.mu.Lock() + defer b.mu.Unlock() + key := string(blk.Cid().Bytes()) + if _, exists := b.blocks[key]; !exists { + b.blocks[key] = blk + b.order = append(b.order, blk.Cid()) + } + return nil +} + +// Commit hands every staged block + (bucket, root) to the log in one +// AppendBatch. After Commit returns nil, the blocks AND the op-root +// are durable on disk; the caller may advance the bucket's published +// Root. +// +// An empty blocks slice is legal: an MST mutation can produce a +// new root that points at a node already materialized in a prior +// segment (e.g., trimTop after Delete unwraps to an existing +// subtree). The bucket Root still needs to advance, so AppendBatch +// is called with an empty payload and the OpRoot record alone. +func (b *OpStaging) Commit(ctx context.Context, root cid.Cid) error { + b.mu.Lock() + defer b.mu.Unlock() + + if !root.Defined() { + return errors.New("opstaging: commit with undefined root") + } + + blks := make([]block.Block, len(b.order)) + for i, c := range b.order { + blks[i] = b.blocks[string(c.Bytes())] + } + if err := b.log.AppendBatch(ctx, blks, OpRoot{Bucket: b.bucket, Root: root}); err != nil { + return fmt.Errorf("opstaging: append: %w", err) + } + + b.blocks = map[string]block.Block{} + b.order = nil + return nil +} + +// Discard drops any staged blocks without writing them. Use when the +// surrounding op has failed and the in-flight batch should be +// abandoned. +func (b *OpStaging) Discard() { + b.mu.Lock() + defer b.mu.Unlock() + b.blocks = map[string]block.Block{} + b.order = nil +} + +// OpStaging is passed to CborStore in bucketop.Tx construction, so +// it must satisfy BaseStore (the IPFS-standard Get/Put-on-blocks +// shape). The two halves are: Get → check in-memory map then fall +// through to the underlying ReadStore's GetBlock; Put → append to +// the in-memory map. +var _ BaseStore = (*OpStaging)(nil) diff --git a/pkg/ms3t/blockstore/staging_test.go b/pkg/ms3t/blockstore/staging_test.go new file mode 100644 index 0000000..5c99402 --- /dev/null +++ b/pkg/ms3t/blockstore/staging_test.go @@ -0,0 +1,211 @@ +package blockstore_test + +import ( + "context" + "errors" + "testing" + "time" + + block "github.com/ipfs/go-block-format" + "github.com/ipfs/go-cid" + "github.com/multiformats/go-multihash" + "go.uber.org/zap/zaptest" + + "github.com/storacha/sprue/pkg/ms3t/blockstore" + "github.com/storacha/sprue/pkg/ms3t/logstore" +) + +// In-memory Meta — minimal subset duplicated here to avoid pulling +// the logstore test fake out of its package. +type fakeMeta struct { + seq uint64 + rows map[uint64]*logstore.SegmentMeta + roots []blockstore.OpRoot +} + +func newFakeMeta() *fakeMeta { return &fakeMeta{rows: map[uint64]*logstore.SegmentMeta{}} } + +func (f *fakeMeta) NextSegmentSeq(_ context.Context) (uint64, error) { f.seq++; return f.seq, nil } +func (f *fakeMeta) InsertSegmentOpen(_ context.Context, seq uint64) error { + f.rows[seq] = &logstore.SegmentMeta{Seq: seq, State: logstore.StateOpen} + return nil +} +func (f *fakeMeta) MarkSegmentSealed(_ context.Context, seq uint64, sealedAt int64, sizeBytes int64, sha256 []byte, opRoots []blockstore.OpRoot) error { + r, ok := f.rows[seq] + if !ok || r.State != logstore.StateOpen { + return nil + } + r.State = logstore.StateSealed + r.OpRoots = append([]blockstore.OpRoot(nil), opRoots...) + f.roots = append(f.roots, opRoots...) + return nil +} +func (f *fakeMeta) MarkSegmentFlushed(_ context.Context, seq uint64, _ int64, _ []blockstore.OpRoot) error { + if r, ok := f.rows[seq]; ok { + r.State = logstore.StateFlushed + } + return nil +} +func (f *fakeMeta) DeleteSegment(_ context.Context, seq uint64) error { + delete(f.rows, seq) + return nil +} +func (f *fakeMeta) ListUnflushedSegments(_ context.Context) ([]logstore.SegmentMeta, error) { + var out []logstore.SegmentMeta + for _, r := range f.rows { + if r.State == logstore.StateOpen || r.State == logstore.StateSealed { + out = append(out, *r) + } + } + return out, nil +} +func (f *fakeMeta) RehydrateSegment(_ context.Context, m logstore.SegmentMeta) error { + cp := m + f.rows[m.Seq] = &cp + return nil +} + +// noopBase satisfies blockstore.BlockReader but always returns +// errUnknownBase so we can detect when a GetBlock falls through +// past the log layer. +type noopBase struct{} + +var errUnknownBase = errors.New("base: unknown") + +func (noopBase) GetBlock(_ context.Context, _ cid.Cid) (block.Block, error) { + return nil, errUnknownBase +} + +func makeBlock(t *testing.T, payload []byte) block.Block { + t.Helper() + mh, err := multihash.Sum(payload, multihash.SHA2_256, -1) + if err != nil { + t.Fatalf("mh: %v", err) + } + c := cid.NewCidV1(cid.Raw, mh) + blk, err := block.NewBlockWithCid(payload, c) + if err != nil { + t.Fatalf("blk: %v", err) + } + return blk +} + +func makeRoot(t *testing.T, name string) cid.Cid { + t.Helper() + mh, err := multihash.Sum([]byte("r:"+name), multihash.SHA2_256, -1) + if err != nil { + t.Fatalf("mh: %v", err) + } + return cid.NewCidV1(cid.DagCBOR, mh) +} + +func TestLayeredAndStagingHappyPath(t *testing.T) { + dir := t.TempDir() + meta := newFakeMeta() + logger := zaptest.NewLogger(t) + + log, err := logstore.Open(context.Background(), logstore.Config{ + Dir: dir, + Meta: meta, + SealBytes: 1 << 30, + SealAge: 1 * time.Hour, + Retain: 6, + Flush: func(ctx context.Context, seg *logstore.Segment) error { + return meta.MarkSegmentFlushed(ctx, seg.Seq(), time.Now().Unix(), seg.OpRoots()) + }, + Logger: logger, + }) + if err != nil { + t.Fatalf("logstore Open: %v", err) + } + t.Cleanup(func() { _ = log.Close(context.Background()) }) + + bs := blockstore.NewLayered(log, noopBase{}) + + // Stage two blocks for bucket "alpha", commit, then Get them back + // via the layered store. + stage := blockstore.NewOpStaging(bs, log, "alpha") + a := makeBlock(t, []byte("alpha-1")) + b := makeBlock(t, []byte("alpha-2")) + for _, blk := range []block.Block{a, b} { + if err := stage.Put(context.Background(), blk); err != nil { + t.Fatalf("stage.Put: %v", err) + } + } + if err := stage.Commit(context.Background(), makeRoot(t, "alpha")); err != nil { + t.Fatalf("Commit: %v", err) + } + + for _, blk := range []block.Block{a, b} { + got, err := bs.GetBlock(context.Background(), blk.Cid()) + if err != nil { + t.Fatalf("layered.Get %s: %v", blk.Cid(), err) + } + if string(got.RawData()) != string(blk.RawData()) { + t.Fatalf("layered.Get %s mismatch: got %q want %q", blk.Cid(), got.RawData(), blk.RawData()) + } + } +} + +func TestLayeredFallsThroughToBaseOnMiss(t *testing.T) { + dir := t.TempDir() + meta := newFakeMeta() + logger := zaptest.NewLogger(t) + + log, err := logstore.Open(context.Background(), logstore.Config{ + Dir: dir, + Meta: meta, + SealBytes: 1 << 30, + SealAge: 1 * time.Hour, + Retain: 6, + Flush: func(ctx context.Context, seg *logstore.Segment) error { + return meta.MarkSegmentFlushed(ctx, seg.Seq(), time.Now().Unix(), seg.OpRoots()) + }, + Logger: logger, + }) + if err != nil { + t.Fatalf("logstore Open: %v", err) + } + t.Cleanup(func() { _ = log.Close(context.Background()) }) + + bs := blockstore.NewLayered(log, noopBase{}) + missing := makeBlock(t, []byte("nope")).Cid() + _, err = bs.GetBlock(context.Background(), missing) + if !errors.Is(err, errUnknownBase) { + t.Fatalf("expected base sentinel, got %v", err) + } +} + +func TestStagingDiscardLeavesLogUntouched(t *testing.T) { + dir := t.TempDir() + meta := newFakeMeta() + logger := zaptest.NewLogger(t) + + log, err := logstore.Open(context.Background(), logstore.Config{ + Dir: dir, + Meta: meta, + SealBytes: 1 << 30, + SealAge: 1 * time.Hour, + Retain: 6, + Flush: func(ctx context.Context, seg *logstore.Segment) error { + return meta.MarkSegmentFlushed(ctx, seg.Seq(), time.Now().Unix(), seg.OpRoots()) + }, + Logger: logger, + }) + if err != nil { + t.Fatalf("logstore Open: %v", err) + } + t.Cleanup(func() { _ = log.Close(context.Background()) }) + + bs := blockstore.NewLayered(log, noopBase{}) + stage := blockstore.NewOpStaging(bs, log, "alpha") + blk := makeBlock(t, []byte("never-committed")) + if err := stage.Put(context.Background(), blk); err != nil { + t.Fatalf("Put: %v", err) + } + stage.Discard() + + if _, err := log.Get(context.Background(), blk.Cid()); !errors.Is(err, blockstore.ErrNotFound) { + t.Fatalf("Discard should leave log empty, got %v", err) + } +} diff --git a/pkg/ms3t/blockstore/store.go b/pkg/ms3t/blockstore/store.go new file mode 100644 index 0000000..5cfcd06 --- /dev/null +++ b/pkg/ms3t/blockstore/store.go @@ -0,0 +1,123 @@ +// Package blockstore is the home for ms3t's block I/O abstractions. +// It declares the contracts (Reader, Writer, Store, BlockReader, +// BlockWriter, ReadStore, BaseStore, Log) and the in-process +// implementations of the read tier (Layered), the transactional +// tier (OpStaging), and the network base (Forge). The on-disk LSM +// implementation of Log lives in pkg/ms3t/logstore. +// +// Tiered architecture: +// +// WRITE PATH +// client → OpStaging → (Commit) → Log → (Flush) → BaseStore (Forge) +// ↑ ↑ +// buffered until Commit; hot (open) + +// reads see own writes warm (sealed local) + +// cold (off-host) +// +// READ PATH +// client → Layered (cache → Log → BaseStore) +// +// Conventions: +// +// - Reader / Writer / Store: CBOR-typed I/O, mirroring the shape +// of cbor.IpldStore. Method names are Get / Put. +// - BlockReader / BlockWriter: raw-block I/O. Method names are +// GetBlock / PutBlock so a single type can expose both halves +// without method-name collision against the CBOR-typed Get/Put. +// - ReadStore = Reader + BlockReader: the read seam s3frontend +// drives. Layered is the production implementation. +// - Log: the journaling tier — see log.go. +// - BaseStore: alias for cbor.IpldBlockstore. The bottom tier +// keeps the IPFS-standard naming convention so anything +// implementing the cbor IpldBlockstore interface (Forge, +// third-party IPLD blockstores) drops in without an adapter. +// - OpStaging: per-transaction store. Get/Put buffer in memory; +// Commit hands the entire batch to a Log via AppendBatch and +// returns once the journal has fsynced; Discard rolls back. +// +// CborStore is the helper that wraps a BaseStore into a Store with +// the multihash fixed to SHA2_256, so encoded blocks address-equal +// across the codebase regardless of where in the layer stack they +// were materialized. +package blockstore + +import ( + "context" + + block "github.com/ipfs/go-block-format" + "github.com/ipfs/go-cid" + cbor "github.com/ipfs/go-ipld-cbor" + mh "github.com/multiformats/go-multihash" +) + +// Reader fetches a CBOR-encoded value at c into out. Same shape as +// cbor.IpldStore.Get; mst.LoadMST and any code path that walks the +// MST without materializing it accept a Reader. +type Reader interface { + Get(ctx context.Context, c cid.Cid, out any) error +} + +// Writer writes a CBOR-encoded value, returning its CID. Same shape +// as cbor.IpldStore.Put. +type Writer interface { + Put(ctx context.Context, v any) (cid.Cid, error) +} + +// Store is Reader + Writer — the CBOR-typed I/O surface (manifests, +// MST nodes). Equivalent in shape to cbor.IpldStore but defined +// here so consumers don't have to import cbor. +type Store interface { + Reader + Writer +} + +// BlockReader fetches a raw block. Used by chunker.OpenBody for +// streaming body chunks. Same shape as cbor.IpldBlockstore.Get but +// renamed to GetBlock so a single type can expose both a CBOR-typed +// Get (Reader) and a raw-block GetBlock without method-name +// collision. +type BlockReader interface { + GetBlock(ctx context.Context, c cid.Cid) (block.Block, error) +} + +// BlockWriter writes a raw block. Used by chunker.PutBody for body +// chunks. Same shape as cbor.IpldBlockstore.Put but renamed to +// PutBlock for the same reason as BlockReader. +type BlockWriter interface { + PutBlock(ctx context.Context, blk block.Block) error +} + +// ReadStore is the read-only seam the s3frontend.Backend uses for +// both CBOR-decoded reads (manifest, MST nodes) and raw block reads +// (body chunks). Layered is the production implementation. +type ReadStore interface { + Reader + BlockReader +} + +// WriteStore is the write seam a body codec uses: CBOR-typed Put +// (for format-specific index blocks) plus raw-block PutBlock (for +// chunk bytes). bucketop.Tx satisfies it. +type WriteStore interface { + Writer + BlockWriter +} + +// BaseStore is the bottom-tier raw-block interface. Aliased to +// cbor.IpldBlockstore so anything implementing the IPFS-standard +// convention (Forge, third-party IPLD blockstores) drops in +// without an adapter. ms3t's higher-layer interfaces (BlockReader, +// BlockWriter, Store) use the GetBlock / PutBlock / typed Get / Put +// naming convention; only this layer and CborStore work in the IPFS +// convention. +type BaseStore = cbor.IpldBlockstore + +// CborStore wraps a BaseStore in a Store, fixing the multihash to +// SHA2_256 so encoded blocks address-equal across the codebase. +// Used by Layered to expose itself as a CBOR view, and by bucketop +// to wrap an OpStaging into the per-tx CBOR view. +func CborStore(bs BaseStore) Store { + cst := cbor.NewCborStore(bs) + cst.DefaultMultihash = mh.SHA2_256 + return cst +} diff --git a/pkg/ms3t/bucket/cbor_gen.go b/pkg/ms3t/bucket/cbor_gen.go new file mode 100644 index 0000000..ffbf8a6 --- /dev/null +++ b/pkg/ms3t/bucket/cbor_gen.go @@ -0,0 +1,637 @@ +// Code generated by github.com/whyrusleeping/cbor-gen. DO NOT EDIT. + +package bucket + +import ( + "fmt" + "io" + "math" + "sort" + + cid "github.com/ipfs/go-cid" + cbg "github.com/whyrusleeping/cbor-gen" + xerrors "golang.org/x/xerrors" +) + +var _ = xerrors.Errorf +var _ = cid.Undef +var _ = math.E +var _ = sort.Sort + +func (t *ObjectManifest) MarshalCBOR(w io.Writer) error { + if t == nil { + _, err := w.Write(cbg.CborNull) + return err + } + + cw := cbg.NewCborWriter(w) + + if _, err := cw.Write([]byte{164}); err != nil { + return err + } + + // t.Body (bucket.Body) (struct) + if len("b") > 1000000 { + return xerrors.Errorf("Value in field \"b\" was too long") + } + + if err := cw.WriteMajorTypeHeader(cbg.MajTextString, uint64(len("b"))); err != nil { + return err + } + if _, err := cw.WriteString(string("b")); err != nil { + return err + } + + if err := t.Body.MarshalCBOR(cw); err != nil { + return err + } + + // t.Key (string) (string) + if len("k") > 1000000 { + return xerrors.Errorf("Value in field \"k\" was too long") + } + + if err := cw.WriteMajorTypeHeader(cbg.MajTextString, uint64(len("k"))); err != nil { + return err + } + if _, err := cw.WriteString(string("k")); err != nil { + return err + } + + if len(t.Key) > 1000000 { + return xerrors.Errorf("Value in field t.Key was too long") + } + + if err := cw.WriteMajorTypeHeader(cbg.MajTextString, uint64(len(t.Key))); err != nil { + return err + } + if _, err := cw.WriteString(string(t.Key)); err != nil { + return err + } + + // t.Created (int64) (int64) + if len("t") > 1000000 { + return xerrors.Errorf("Value in field \"t\" was too long") + } + + if err := cw.WriteMajorTypeHeader(cbg.MajTextString, uint64(len("t"))); err != nil { + return err + } + if _, err := cw.WriteString(string("t")); err != nil { + return err + } + + if t.Created >= 0 { + if err := cw.WriteMajorTypeHeader(cbg.MajUnsignedInt, uint64(t.Created)); err != nil { + return err + } + } else { + if err := cw.WriteMajorTypeHeader(cbg.MajNegativeInt, uint64(-t.Created-1)); err != nil { + return err + } + } + + // t.ContentType (string) (string) + if len("ct") > 1000000 { + return xerrors.Errorf("Value in field \"ct\" was too long") + } + + if err := cw.WriteMajorTypeHeader(cbg.MajTextString, uint64(len("ct"))); err != nil { + return err + } + if _, err := cw.WriteString(string("ct")); err != nil { + return err + } + + if len(t.ContentType) > 1000000 { + return xerrors.Errorf("Value in field t.ContentType was too long") + } + + if err := cw.WriteMajorTypeHeader(cbg.MajTextString, uint64(len(t.ContentType))); err != nil { + return err + } + if _, err := cw.WriteString(string(t.ContentType)); err != nil { + return err + } + return nil +} + +func (t *ObjectManifest) UnmarshalCBOR(r io.Reader) (err error) { + *t = ObjectManifest{} + + cr := cbg.NewCborReader(r) + + maj, extra, err := cr.ReadHeader() + if err != nil { + return err + } + defer func() { + if err == io.EOF { + err = io.ErrUnexpectedEOF + } + }() + + if maj != cbg.MajMap { + return fmt.Errorf("cbor input should be of type map") + } + + if extra > cbg.MaxLength { + return fmt.Errorf("ObjectManifest: map struct too large (%d)", extra) + } + + n := extra + + nameBuf := make([]byte, 2) + for i := uint64(0); i < n; i++ { + nameLen, ok, err := cbg.ReadFullStringIntoBuf(cr, nameBuf, 1000000) + if err != nil { + return err + } + + if !ok { + // Field doesn't exist on this type, so ignore it + if err := cbg.ScanForLinks(cr, func(cid.Cid) {}); err != nil { + return err + } + continue + } + + switch string(nameBuf[:nameLen]) { + // t.Body (bucket.Body) (struct) + case "b": + + { + + if err := t.Body.UnmarshalCBOR(cr); err != nil { + return xerrors.Errorf("unmarshaling t.Body: %w", err) + } + + } + // t.Key (string) (string) + case "k": + + { + sval, err := cbg.ReadStringWithMax(cr, 1000000) + if err != nil { + return err + } + + t.Key = string(sval) + } + // t.Created (int64) (int64) + case "t": + { + maj, extra, err := cr.ReadHeader() + if err != nil { + return err + } + var extraI int64 + switch maj { + case cbg.MajUnsignedInt: + extraI = int64(extra) + if extraI < 0 { + return fmt.Errorf("int64 positive overflow") + } + case cbg.MajNegativeInt: + extraI = int64(extra) + if extraI < 0 { + return fmt.Errorf("int64 negative overflow") + } + extraI = -1 - extraI + default: + return fmt.Errorf("wrong type for int64 field: %d", maj) + } + + t.Created = int64(extraI) + } + // t.ContentType (string) (string) + case "ct": + + { + sval, err := cbg.ReadStringWithMax(cr, 1000000) + if err != nil { + return err + } + + t.ContentType = string(sval) + } + + default: + // Field doesn't exist on this type, so ignore it + if err := cbg.ScanForLinks(r, func(cid.Cid) {}); err != nil { + return err + } + } + } + + return nil +} +func (t *Body) MarshalCBOR(w io.Writer) error { + if t == nil { + _, err := w.Write(cbg.CborNull) + return err + } + + cw := cbg.NewCborWriter(w) + + if _, err := cw.Write([]byte{164}); err != nil { + return err + } + + // t.Content (cid.Cid) (struct) + if len("c") > 1000000 { + return xerrors.Errorf("Value in field \"c\" was too long") + } + + if err := cw.WriteMajorTypeHeader(cbg.MajTextString, uint64(len("c"))); err != nil { + return err + } + if _, err := cw.WriteString(string("c")); err != nil { + return err + } + + if err := cbg.WriteCid(cw, t.Content); err != nil { + return xerrors.Errorf("failed to write cid field t.Content: %w", err) + } + + // t.Format (string) (string) + if len("f") > 1000000 { + return xerrors.Errorf("Value in field \"f\" was too long") + } + + if err := cw.WriteMajorTypeHeader(cbg.MajTextString, uint64(len("f"))); err != nil { + return err + } + if _, err := cw.WriteString(string("f")); err != nil { + return err + } + + if len(t.Format) > 1000000 { + return xerrors.Errorf("Value in field t.Format was too long") + } + + if err := cw.WriteMajorTypeHeader(cbg.MajTextString, uint64(len(t.Format))); err != nil { + return err + } + if _, err := cw.WriteString(string(t.Format)); err != nil { + return err + } + + // t.SHA256 ([]uint8) (slice) + if len("h") > 1000000 { + return xerrors.Errorf("Value in field \"h\" was too long") + } + + if err := cw.WriteMajorTypeHeader(cbg.MajTextString, uint64(len("h"))); err != nil { + return err + } + if _, err := cw.WriteString(string("h")); err != nil { + return err + } + + if len(t.SHA256) > 2097152 { + return xerrors.Errorf("Byte array in field t.SHA256 was too long") + } + + if err := cw.WriteMajorTypeHeader(cbg.MajByteString, uint64(len(t.SHA256))); err != nil { + return err + } + + if _, err := cw.Write(t.SHA256); err != nil { + return err + } + + // t.Size (int64) (int64) + if len("s") > 1000000 { + return xerrors.Errorf("Value in field \"s\" was too long") + } + + if err := cw.WriteMajorTypeHeader(cbg.MajTextString, uint64(len("s"))); err != nil { + return err + } + if _, err := cw.WriteString(string("s")); err != nil { + return err + } + + if t.Size >= 0 { + if err := cw.WriteMajorTypeHeader(cbg.MajUnsignedInt, uint64(t.Size)); err != nil { + return err + } + } else { + if err := cw.WriteMajorTypeHeader(cbg.MajNegativeInt, uint64(-t.Size-1)); err != nil { + return err + } + } + + return nil +} + +func (t *Body) UnmarshalCBOR(r io.Reader) (err error) { + *t = Body{} + + cr := cbg.NewCborReader(r) + + maj, extra, err := cr.ReadHeader() + if err != nil { + return err + } + defer func() { + if err == io.EOF { + err = io.ErrUnexpectedEOF + } + }() + + if maj != cbg.MajMap { + return fmt.Errorf("cbor input should be of type map") + } + + if extra > cbg.MaxLength { + return fmt.Errorf("Body: map struct too large (%d)", extra) + } + + n := extra + + nameBuf := make([]byte, 1) + for i := uint64(0); i < n; i++ { + nameLen, ok, err := cbg.ReadFullStringIntoBuf(cr, nameBuf, 1000000) + if err != nil { + return err + } + + if !ok { + // Field doesn't exist on this type, so ignore it + if err := cbg.ScanForLinks(cr, func(cid.Cid) {}); err != nil { + return err + } + continue + } + + switch string(nameBuf[:nameLen]) { + // t.Content (cid.Cid) (struct) + case "c": + + { + + c, err := cbg.ReadCid(cr) + if err != nil { + return xerrors.Errorf("failed to read cid field t.Content: %w", err) + } + + t.Content = c + + } + // t.Format (string) (string) + case "f": + + { + sval, err := cbg.ReadStringWithMax(cr, 1000000) + if err != nil { + return err + } + + t.Format = string(sval) + } + // t.SHA256 ([]uint8) (slice) + case "h": + + maj, extra, err = cr.ReadHeader() + if err != nil { + return err + } + + if extra > 2097152 { + return fmt.Errorf("t.SHA256: byte array too large (%d)", extra) + } + if maj != cbg.MajByteString { + return fmt.Errorf("expected byte array") + } + + if extra > 0 { + t.SHA256 = make([]uint8, extra) + } + + if _, err := io.ReadFull(cr, t.SHA256); err != nil { + return err + } + + // t.Size (int64) (int64) + case "s": + { + maj, extra, err := cr.ReadHeader() + if err != nil { + return err + } + var extraI int64 + switch maj { + case cbg.MajUnsignedInt: + extraI = int64(extra) + if extraI < 0 { + return fmt.Errorf("int64 positive overflow") + } + case cbg.MajNegativeInt: + extraI = int64(extra) + if extraI < 0 { + return fmt.Errorf("int64 negative overflow") + } + extraI = -1 - extraI + default: + return fmt.Errorf("wrong type for int64 field: %d", maj) + } + + t.Size = int64(extraI) + } + + default: + // Field doesn't exist on this type, so ignore it + if err := cbg.ScanForLinks(r, func(cid.Cid) {}); err != nil { + return err + } + } + } + + return nil +} +func (t *FixedChunkerIndex) MarshalCBOR(w io.Writer) error { + if t == nil { + _, err := w.Write(cbg.CborNull) + return err + } + + cw := cbg.NewCborWriter(w) + + if _, err := cw.Write([]byte{162}); err != nil { + return err + } + + // t.Chunks ([]cid.Cid) (slice) + if len("c") > 1000000 { + return xerrors.Errorf("Value in field \"c\" was too long") + } + + if err := cw.WriteMajorTypeHeader(cbg.MajTextString, uint64(len("c"))); err != nil { + return err + } + if _, err := cw.WriteString(string("c")); err != nil { + return err + } + + if len(t.Chunks) > 8192 { + return xerrors.Errorf("Slice value in field t.Chunks was too long") + } + + if err := cw.WriteMajorTypeHeader(cbg.MajArray, uint64(len(t.Chunks))); err != nil { + return err + } + for _, v := range t.Chunks { + + if err := cbg.WriteCid(cw, v); err != nil { + return xerrors.Errorf("failed to write cid field v: %w", err) + } + + } + + // t.ChunkSize (int64) (int64) + if len("cs") > 1000000 { + return xerrors.Errorf("Value in field \"cs\" was too long") + } + + if err := cw.WriteMajorTypeHeader(cbg.MajTextString, uint64(len("cs"))); err != nil { + return err + } + if _, err := cw.WriteString(string("cs")); err != nil { + return err + } + + if t.ChunkSize >= 0 { + if err := cw.WriteMajorTypeHeader(cbg.MajUnsignedInt, uint64(t.ChunkSize)); err != nil { + return err + } + } else { + if err := cw.WriteMajorTypeHeader(cbg.MajNegativeInt, uint64(-t.ChunkSize-1)); err != nil { + return err + } + } + + return nil +} + +func (t *FixedChunkerIndex) UnmarshalCBOR(r io.Reader) (err error) { + *t = FixedChunkerIndex{} + + cr := cbg.NewCborReader(r) + + maj, extra, err := cr.ReadHeader() + if err != nil { + return err + } + defer func() { + if err == io.EOF { + err = io.ErrUnexpectedEOF + } + }() + + if maj != cbg.MajMap { + return fmt.Errorf("cbor input should be of type map") + } + + if extra > cbg.MaxLength { + return fmt.Errorf("FixedChunkerIndex: map struct too large (%d)", extra) + } + + n := extra + + nameBuf := make([]byte, 2) + for i := uint64(0); i < n; i++ { + nameLen, ok, err := cbg.ReadFullStringIntoBuf(cr, nameBuf, 1000000) + if err != nil { + return err + } + + if !ok { + // Field doesn't exist on this type, so ignore it + if err := cbg.ScanForLinks(cr, func(cid.Cid) {}); err != nil { + return err + } + continue + } + + switch string(nameBuf[:nameLen]) { + // t.Chunks ([]cid.Cid) (slice) + case "c": + + maj, extra, err = cr.ReadHeader() + if err != nil { + return err + } + + if extra > 8192 { + return fmt.Errorf("t.Chunks: array too large (%d)", extra) + } + + if maj != cbg.MajArray { + return fmt.Errorf("expected cbor array") + } + + if extra > 0 { + t.Chunks = make([]cid.Cid, extra) + } + + for i := 0; i < int(extra); i++ { + { + var maj byte + var extra uint64 + var err error + _ = maj + _ = extra + _ = err + + { + + c, err := cbg.ReadCid(cr) + if err != nil { + return xerrors.Errorf("failed to read cid field t.Chunks[i]: %w", err) + } + + t.Chunks[i] = c + + } + + } + } + // t.ChunkSize (int64) (int64) + case "cs": + { + maj, extra, err := cr.ReadHeader() + if err != nil { + return err + } + var extraI int64 + switch maj { + case cbg.MajUnsignedInt: + extraI = int64(extra) + if extraI < 0 { + return fmt.Errorf("int64 positive overflow") + } + case cbg.MajNegativeInt: + extraI = int64(extra) + if extraI < 0 { + return fmt.Errorf("int64 negative overflow") + } + extraI = -1 - extraI + default: + return fmt.Errorf("wrong type for int64 field: %d", maj) + } + + t.ChunkSize = int64(extraI) + } + + default: + // Field doesn't exist on this type, so ignore it + if err := cbg.ScanForLinks(r, func(cid.Cid) {}); err != nil { + return err + } + } + } + + return nil +} diff --git a/pkg/ms3t/bucket/chunker.go b/pkg/ms3t/bucket/chunker.go new file mode 100644 index 0000000..46ab0ef --- /dev/null +++ b/pkg/ms3t/bucket/chunker.go @@ -0,0 +1,257 @@ +package bucket + +import ( + "context" + "crypto/sha256" + "errors" + "fmt" + "io" + + block "github.com/ipfs/go-block-format" + "github.com/ipfs/go-cid" + mh "github.com/multiformats/go-multihash" + + "github.com/storacha/sprue/pkg/ms3t/blockstore" +) + +// DefaultChunkSize is the chunk size used when callers don't supply one. +// 1 MiB matches typical UnixFS chunking and balances per-blob piri +// overhead against range-read granularity. +const DefaultChunkSize int64 = 1 << 20 + +// rawBlockPrefix produces CIDs for body chunks: CIDv1, raw codec (0x55), +// sha256 multihash. Chunks are opaque bytes — no IPLD links — so the raw +// codec is the natural fit. +var rawBlockPrefix = cid.Prefix{ + Version: 1, + Codec: cid.Raw, + MhType: mh.SHA2_256, + MhLength: -1, +} + +// BodyWriter writes the bytes from r as a sequence of blocks (raw +// chunks plus whatever index/DAG blocks the codec needs) to w and +// returns a Body record describing how to reconstruct the bytes. +// +// w accepts both raw block writes (PutBlock for chunk bytes) and +// CBOR-typed writes (Put for format-specific index blocks). +// bucketop.Tx satisfies it. +type BodyWriter interface { + Chunk(ctx context.Context, w blockstore.WriteStore, r io.Reader) (Body, error) +} + +// BodyReader streams bytes back out of a Body. Format identifies +// the codec the writer produced; consumers route a Body to the +// matching BodyReader by that string. +// +// bs accepts both raw block reads (GetBlock for chunk bytes) and +// CBOR-typed reads (Get for index blocks). blockstore.Layered +// satisfies it. +type BodyReader interface { + // Format returns the Body.Format value this reader handles. + Format() string + // Open returns a stream over the full body. + Open(ctx context.Context, bs blockstore.ReadStore, body Body) io.ReadCloser + // OpenRange returns a stream over [start, end] inclusive. + OpenRange(ctx context.Context, bs blockstore.ReadStore, body Body, start, end int64) io.ReadCloser +} + +// BodyCodec is the canonical pair: a single concrete impl satisfies +// both halves so a Body produced by Chunk can always be read back +// via Open / OpenRange of the same codec instance. +type BodyCodec interface { + BodyWriter + BodyReader +} + +// FixedChunker is the default codec: fixed-size raw chunks indexed +// by a FixedChunkerIndex CBOR document at Body.Content. Implements +// BodyCodec. +type FixedChunker struct { + // ChunkSize is the body chunk size in bytes. 0 → DefaultChunkSize. + ChunkSize int64 +} + +// Compile-time assertion: FixedChunker is the canonical BodyCodec. +var _ BodyCodec = (*FixedChunker)(nil) + +// Format returns FormatFixed. +func (c *FixedChunker) Format() string { return FormatFixed } + +// Chunk reads body bytes from r, splits them at ChunkSize, writes +// each chunk as a raw block, then writes a FixedChunkerIndex CBOR +// block listing the chunks in order. The Body returned points +// Content at the index block. +func (c *FixedChunker) Chunk(ctx context.Context, w blockstore.WriteStore, r io.Reader) (Body, error) { + chunkSize := c.ChunkSize + if chunkSize <= 0 { + chunkSize = DefaultChunkSize + } + + buf := make([]byte, chunkSize) + bodyHasher := sha256.New() + var chunks []cid.Cid + var total int64 + + for { + n, err := io.ReadFull(r, buf) + if n > 0 { + chunk := buf[:n] + bodyHasher.Write(chunk) + cidv, perr := putRawBlock(ctx, w, chunk) + if perr != nil { + return Body{}, fmt.Errorf("put chunk: %w", perr) + } + chunks = append(chunks, cidv) + total += int64(n) + } + if err == nil { + continue + } + if errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF) { + break + } + return Body{}, fmt.Errorf("read body: %w", err) + } + + idx := &FixedChunkerIndex{ChunkSize: chunkSize, Chunks: chunks} + indexCID, err := w.Put(ctx, idx) + if err != nil { + return Body{}, fmt.Errorf("put fixed index: %w", err) + } + + return Body{ + Size: total, + SHA256: bodyHasher.Sum(nil), + Content: indexCID, + Format: FormatFixed, + }, nil +} + +// Open returns a reader over the full body. +func (c *FixedChunker) Open(ctx context.Context, bs blockstore.ReadStore, body Body) io.ReadCloser { + return &fixedBodyReader{ctx: ctx, bs: bs, body: body, end: body.Size - 1} +} + +// OpenRange returns a reader over [start, end] inclusive of the +// body. Caller must ensure 0 <= start <= end <= Size-1. +func (c *FixedChunker) OpenRange(ctx context.Context, bs blockstore.ReadStore, body Body, start, end int64) io.ReadCloser { + return &fixedBodyReader{ + ctx: ctx, + bs: bs, + body: body, + start: start, + end: end, + needsSeek: true, + pos: start, + } +} + +func putRawBlock(ctx context.Context, w blockstore.BlockWriter, data []byte) (cid.Cid, error) { + c, err := rawBlockPrefix.Sum(data) + if err != nil { + return cid.Undef, err + } + blk, err := block.NewBlockWithCid(data, c) + if err != nil { + return cid.Undef, err + } + if err := w.PutBlock(ctx, blk); err != nil { + return cid.Undef, err + } + return c, nil +} + +// fixedBodyReader streams chunks lazily for FixedChunker bodies. It +// fetches the index block on first read, then walks chunks. Both +// whole-body and ranged reads use the same loop — only the initial +// offset and end position differ. +type fixedBodyReader struct { + ctx context.Context + bs blockstore.ReadStore + body Body + + // idx is fetched lazily on first Read. + idx *FixedChunkerIndex + + start int64 // first byte to return (0 for whole-body) + end int64 // last byte to return (inclusive) + pos int64 // current absolute byte position + needsSeek bool // whether we still owe an initial seek into the start chunk + + nextChunk int // index into idx.Chunks of the next block to fetch + cur []byte // currently materialized chunk bytes + curOff int // read position within cur + err error +} + +func (br *fixedBodyReader) ensureIndex() error { + if br.idx != nil { + return nil + } + var idx FixedChunkerIndex + if err := br.bs.Get(br.ctx, br.body.Content, &idx); err != nil { + return fmt.Errorf("fetch fixed index %s: %w", br.body.Content, err) + } + br.idx = &idx + if br.needsSeek { + // The constructor for ranged reads stored the absolute start + // offset; translate it to (chunk index, in-chunk offset) now + // that we know ChunkSize. + br.nextChunk = int(br.start / idx.ChunkSize) + br.curOff = int(br.start % idx.ChunkSize) + } + return nil +} + +func (br *fixedBodyReader) Read(p []byte) (int, error) { + if br.err != nil { + return 0, br.err + } + if br.pos > br.end { + br.err = io.EOF + return 0, io.EOF + } + if err := br.ensureIndex(); err != nil { + br.err = err + return 0, err + } + + if br.cur == nil || (br.curOff >= len(br.cur) && !br.needsSeek) { + if br.nextChunk >= len(br.idx.Chunks) { + br.err = io.EOF + return 0, io.EOF + } + blk, err := br.bs.GetBlock(br.ctx, br.idx.Chunks[br.nextChunk]) + if err != nil { + br.err = fmt.Errorf("read chunk %d: %w", br.nextChunk, err) + return 0, br.err + } + br.cur = blk.RawData() + // On a ranged read the first chunk is partial — curOff was + // pre-set in ensureIndex; consume it here and clear the flag. + if !br.needsSeek { + br.curOff = 0 + } + br.needsSeek = false + br.nextChunk++ + } + + // Don't read past the inclusive end position. + remaining := br.end - br.pos + 1 + available := int64(len(br.cur) - br.curOff) + want := int64(len(p)) + if want > available { + want = available + } + if want > remaining { + want = remaining + } + + n := copy(p[:want], br.cur[br.curOff:br.curOff+int(want)]) + br.curOff += n + br.pos += int64(n) + return n, nil +} + +func (br *fixedBodyReader) Close() error { return nil } diff --git a/pkg/ms3t/bucket/manifest.go b/pkg/ms3t/bucket/manifest.go new file mode 100644 index 0000000..925ad7b --- /dev/null +++ b/pkg/ms3t/bucket/manifest.go @@ -0,0 +1,43 @@ +package bucket + +import "github.com/ipfs/go-cid" + +// ObjectManifest is the per-object metadata record stored as a CBOR +// block in the IPLD blockstore. The MST leaf for an object key +// points at this record's CID. Body identifies the body DAG; the +// shape of that DAG is determined by Body.Format and read back via +// the matching BodyCodec. +type ObjectManifest struct { + Key string `cborgen:"k"` + ContentType string `cborgen:"ct"` + Created int64 `cborgen:"t"` + Body Body `cborgen:"b"` +} + +// Body identifies the bytes of an object via a CID and a format +// tag. Format routes the Body to the right BodyCodec implementation +// at read time; Content is the root of whatever block DAG that +// codec produced. Size and SHA256 are codec-agnostic — the total +// number of body bytes and the sha256 of the full body, respectively +// (the latter is the source for the S3 ETag wire format). +type Body struct { + Size int64 `cborgen:"s"` + SHA256 []byte `cborgen:"h"` + Content cid.Cid `cborgen:"c"` + Format string `cborgen:"f"` +} + +// FormatFixed is the Body.Format value used by FixedChunker — a +// flat array of fixed-size raw blocks indexed by a FixedChunkerIndex +// CBOR document at Body.Content. +const FormatFixed = "fixed-v1" + +// FixedChunkerIndex is the body-DAG root for FormatFixed: an +// ordered list of chunk CIDs plus the per-chunk size. The reader +// fetches the index block from Body.Content, then streams the +// chunks. Range arithmetic is direct: byte N lives in chunk +// index N/ChunkSize at offset N%ChunkSize. +type FixedChunkerIndex struct { + ChunkSize int64 `cborgen:"cs"` + Chunks []cid.Cid `cborgen:"c"` +} diff --git a/pkg/ms3t/bucketop/bucketop.go b/pkg/ms3t/bucketop/bucketop.go new file mode 100644 index 0000000..fe0eec0 --- /dev/null +++ b/pkg/ms3t/bucketop/bucketop.go @@ -0,0 +1,340 @@ +// Package bucketop provides the per-bucket write-transaction +// primitive for ms3t. Each Tx snapshots the bucket's published Root +// from the registry, exposes a per-op staging buffer that +// write-throughs to the LSM log, and on Commit fsyncs the buffer +// into one log.AppendBatch and CAS-advances the bucket Root from +// the snapshotted value to the caller-supplied newRoot — atomically +// from the caller's perspective. +// +// The package owns the four-way wiring (registry, log, layered read +// tier, MST CBOR view) that S3 verb implementations would otherwise +// compose by hand for every PUT/DELETE. It also owns the per-bucket +// lock map so concurrent transactions against the same bucket +// serialize within a single process and the CAS in Commit always +// sees a fresh snapshot. +// +// Read paths bypass bucketop. They only need the read-side +// blockstore directly; tx-style ceremony would be pure overhead. +package bucketop + +import ( + "context" + "errors" + "fmt" + "strings" + "sync" + + block "github.com/ipfs/go-block-format" + "github.com/ipfs/go-cid" + + "github.com/storacha/sprue/pkg/ms3t/blockstore" + "github.com/storacha/sprue/pkg/ms3t/mst" + "github.com/storacha/sprue/pkg/ms3t/registry" +) + +// ErrBucketNotFound is returned by Begin when the bucket doesn't +// exist in the registry. Callers map this to NoSuchBucket at the +// protocol layer. +var ErrBucketNotFound = errors.New("bucketop: bucket not found") + +// Deps wires the Coordinator to its three dependencies. Every field +// is an interface so tests can supply in-memory equivalents without +// standing up Postgres, an on-disk log, or a network blockstore. +type Deps struct { + // Reg tracks per-bucket Root. Begin reads State; Commit + // CAS-advances Root from the snapshot to newRoot. + Reg registry.Registry + + // Log is the durability boundary. Tx.Commit calls + // log.AppendBatch with the per-tx blocks plus an op-root + // record of (bucket, newRoot). + Log blockstore.Log + + // Reads is the read tier the staging buffer falls through to + // on miss during the transaction. + Reads blockstore.ReadStore +} + +// Coordinator manages per-bucket transactions. One per ms3t backend. +// Its job is to hand out Tx instances that share the same Deps, +// serialize concurrent transactions per bucket, and own the log's +// shutdown. +type Coordinator struct { + deps Deps + + mu sync.Mutex + locks map[string]*sync.Mutex +} + +// NewCoordinator returns a Coordinator wired to the given deps. +func NewCoordinator(deps Deps) *Coordinator { + return &Coordinator{ + deps: deps, + locks: map[string]*sync.Mutex{}, + } +} + +// Begin starts a write transaction against bucket. Steps: +// 1. Acquire the per-bucket lock. +// 2. Snapshot the bucket's State from the registry. If the bucket +// doesn't exist, release the lock and return ErrBucketNotFound. +// 3. Allocate a per-op staging buffer plus a CBOR view over it. +// +// Caller MUST defer tx.Discard() and call tx.Commit on success. +// Both Commit and Discard are idempotent against the lock — either +// one releases it; calling the other afterwards is a no-op. +// +// The bucket name is cloned defensively: protocol layers like +// versitygw/fiber return string headers that alias the request +// buffer (valid only inside the handler), and we persist the +// bucket name in Tx.bucket → OpRoot.Bucket → segment.opRoots, +// which the async flush path reads after the handler returns. +func (c *Coordinator) Begin(ctx context.Context, bucket string) (*Tx, error) { + bucket = strings.Clone(bucket) + release := c.Lock(bucket) + + state, err := c.deps.Reg.Get(ctx, bucket) + if err != nil { + release() + if errors.Is(err, registry.ErrNotFound) { + return nil, ErrBucketNotFound + } + return nil, fmt.Errorf("bucketop: get bucket %q: %w", bucket, err) + } + + staging := blockstore.NewOpStaging(c.deps.Reads, c.deps.Log, bucket) + return &Tx{ + deps: c.deps, + bucket: bucket, + state: state, + staging: staging, + cst: blockstore.CborStore(staging), + release: release, + }, nil +} + +// Lock acquires the per-bucket lock without starting a transaction +// and returns a release func the caller MUST defer. Used by +// non-write operations that still need to serialize against +// concurrent writes — DeleteBucket, for example, walks the MST to +// confirm the bucket is empty and then deletes the registry row; +// without serialization a concurrent PUT could squeeze in between. +// +// Most callers should prefer WithLock, which removes the +// defer-or-leak hazard. +func (c *Coordinator) Lock(bucket string) func() { + lock := c.lockFor(bucket) + lock.Lock() + return lock.Unlock +} + +// MutateFn is the closure passed to WithTx. It receives the +// transaction's bucket-state snapshot and the per-op staging +// view, and returns the MST root the transaction should advance +// to. +// +// - Returning (newRoot, nil) with newRoot.Defined() commits the +// transaction: log.AppendBatch fsyncs the staging buffer and +// reg.CASRoot advances the bucket Root. +// - Returning (cid.Undef, nil) signals "no-op success": the +// staging buffer is discarded with no log append and no Root +// advance. Used by S3 DELETE-on-missing-key, which is +// idempotent: the protocol wants a 200 even though the tree +// didn't change. +// - Returning (_, non-nil err) discards and propagates err. +type MutateFn func(ctx context.Context, tx *Tx) (newRoot cid.Cid, err error) + +// WithTx runs fn against a fresh transaction. Begin/Commit/Discard +// happen automatically based on what fn returns; the caller can +// neither leak the bucket lock by forgetting Discard nor leak +// in-flight bytes by forgetting Commit. +// +// Errors mapped to the caller: +// - ErrBucketNotFound from Begin propagates verbatim (fn is not +// invoked). +// - registry.ErrConflict from the inner CASRoot propagates +// wrapped (only reachable in cross-process races; the +// in-process bucket lock prevents it within one Coordinator). +// - Any error fn returns propagates verbatim. +func (c *Coordinator) WithTx(ctx context.Context, bucket string, fn MutateFn) error { + tx, err := c.Begin(ctx, bucket) + if err != nil { + return err + } + + newRoot, fnErr := fn(ctx, tx) + if fnErr != nil { + tx.Discard() + return fnErr + } + if !newRoot.Defined() { + tx.Discard() + return nil + } + return tx.Commit(ctx, newRoot) +} + +// LockFn is the closure passed to WithLock. It runs while the +// per-bucket lock is held; the lock is released as soon as fn +// returns, regardless of whether fn errored. +type LockFn func(ctx context.Context) error + +// WithLock runs fn while holding the per-bucket lock. Counterpart +// to WithTx for non-mutating bucket-level operations +// (DeleteBucket's empty-check + delete; future bucket-policy +// updates). +func (c *Coordinator) WithLock(ctx context.Context, bucket string, fn LockFn) error { + release := c.Lock(bucket) + defer release() + return fn(ctx) +} + +func (c *Coordinator) lockFor(bucket string) *sync.Mutex { + c.mu.Lock() + defer c.mu.Unlock() + if m, ok := c.locks[bucket]; ok { + return m + } + m := &sync.Mutex{} + c.locks[bucket] = m + return m +} + +// Close shuts down the underlying log: seals the open segment, +// drains the flush queue, and updates per-bucket forge_root_cid +// for every op_root contained in flushed segments. After Close +// returns cleanly, every acked write is durable in Forge or +// scheduled to ship. Close is one-shot at process shutdown; +// subsequent Begin/Lock calls are not safe. +func (c *Coordinator) Close(ctx context.Context) error { + return c.deps.Log.Close(ctx) +} + +// Tx is a single-bucket write transaction. It exposes four I/O +// methods (Get/Put for CBOR, GetBlock/PutBlock for raw bytes) so +// callers don't have to reach for the underlying blockstore / +// IpldStore views — the four interface assertions below pin the +// contracts the rest of pkg/ms3t relies on. +type Tx struct { + deps Deps + bucket string + state *registry.State + staging *blockstore.OpStaging + cst blockstore.Store + + // release is the bucket-lock release closure. Set by Begin; + // nil-ed by finalize() so Commit and Discard mutually agree + // that the lock has been released exactly once. + release func() +} + +// Compile-time assertions: Tx is the canonical handle through which +// the rest of pkg/ms3t reaches into the per-op staging buffer, so +// it must satisfy each of the contracts at the call sites. +var ( + _ blockstore.Store = (*Tx)(nil) // Get, Put → manifest CBOR + MST.GetPointer + _ blockstore.Reader = (*Tx)(nil) // Get → mst.LoadMST / NewEmptyMST + _ blockstore.BlockReader = (*Tx)(nil) // GetBlock → OpenBody / OpenBodyRange + _ blockstore.BlockWriter = (*Tx)(nil) // PutBlock → PutBody +) + +// State returns the bucket's State as snapshotted at Begin. The +// reported Root is the value Commit will CAS against. +func (tx *Tx) State() *registry.State { return tx.state } + +// Get fetches a CBOR-encoded value at c into out. Tx satisfies +// blockstore.Store (Get + Put) and blockstore.Reader (Get) so it +// can be passed directly to mst.LoadMST and +// MerkleSearchTree.GetPointer. +func (tx *Tx) Get(ctx context.Context, c cid.Cid, out any) error { + return tx.cst.Get(ctx, c, out) +} + +// Put CBOR-encodes v into the per-op staging buffer and returns +// its CID. Tx satisfies blockstore.Store via Get + Put. +func (tx *Tx) Put(ctx context.Context, v any) (cid.Cid, error) { + return tx.cst.Put(ctx, v) +} + +// GetBlock fetches a raw block from the per-tx view: staging +// buffer first, then the layered read store. Satisfies +// bucket.BlockReader so OpenBody / OpenBodyRange can read from +// freshly-staged chunks during the same op (rare but consistent). +func (tx *Tx) GetBlock(ctx context.Context, c cid.Cid) (block.Block, error) { + return tx.staging.Get(ctx, c) +} + +// PutBlock writes a raw block into the per-op staging buffer. +// Satisfies bucket.BlockWriter so PutBody can stream chunks +// directly through the Tx without the caller threading a separate +// blockstore. +func (tx *Tx) PutBlock(ctx context.Context, blk block.Block) error { + return tx.staging.Put(ctx, blk) +} + +// LoadTree returns the bucket's MST loaded from State().Root, or a +// fresh empty MST if the bucket has no objects yet. Mutations on +// the returned tree flow into the per-op staging buffer because +// the tree is loaded with Tx as its store (Tx satisfies blockstore.Reader, +// and MST writes only happen at GetPointer time, which takes its +// writer as an explicit argument). +func (tx *Tx) LoadTree() *mst.MerkleSearchTree { + if tx.state.Root.Defined() { + return mst.LoadMST(tx, tx.state.Root) + } + return mst.NewEmptyMST(tx) +} + +// Commit finalizes the transaction: +// 1. log.AppendBatch fsyncs the staging buffer into the open log +// segment with an op-root of (bucket, newRoot). +// 2. registry.CASRoot advances the bucket Root from State().Root +// to newRoot. +// 3. The bucket lock is released. +// +// Returns registry.ErrConflict if another writer raced ahead of us +// (only possible across processes — within a single process the +// per-bucket lock prevents it). On any error the lock is still +// released; defer-Discard becomes a no-op. +// +// Failure mode worth knowing: if step 1 succeeds but step 2 fails +// (transient Postgres error, context cancellation between the two +// calls), the op_root is durable in the log even though the bucket +// Root never advanced. The flusher will eventually see this op_root +// and — today — blindly advance forge_root_cid to it, leaving +// forge_root_cid pointing at an orphan Root the bucket never +// published. See the TODO in pkg/ms3t/registry/segments.go's +// MarkSegmentFlushed for the planned conditional-update fix. +func (tx *Tx) Commit(ctx context.Context, newRoot cid.Cid) error { + if tx.release == nil { + return errors.New("bucketop: tx already finalized") + } + defer tx.finalize() + + if err := tx.staging.Commit(ctx, newRoot); err != nil { + return fmt.Errorf("bucketop: append: %w", err) + } + if err := tx.deps.Reg.CASRoot(ctx, tx.bucket, tx.state.Root, newRoot); err != nil { + return fmt.Errorf("bucketop: advance root: %w", err) + } + return nil +} + +// Discard rolls back the staging buffer (drops staged blocks +// without writing) and releases the bucket lock. Idempotent — safe +// to defer at the top of every operation regardless of whether +// Commit eventually runs. +func (tx *Tx) Discard() { + if tx.release == nil { + return + } + tx.staging.Discard() + tx.finalize() +} + +func (tx *Tx) finalize() { + if tx.release != nil { + tx.release() + tx.release = nil + } +} diff --git a/pkg/ms3t/cars/encoder.go b/pkg/ms3t/cars/encoder.go new file mode 100644 index 0000000..03952c5 --- /dev/null +++ b/pkg/ms3t/cars/encoder.go @@ -0,0 +1,205 @@ +// Package cars writes CAR v1 (Content Addressable aRchive) files. +// +// The format is intentionally simple: +// +// [varint: header_len][DAG-CBOR header bytes] +// [varint: frame_len][CID bytes][block bytes] +// [varint: frame_len][CID bytes][block bytes] +// ... +// +// Header is `{ "roots": [...], "version": 1 }` in DAG-CBOR +// (deterministic key order: by length, then bytewise — "roots" before +// "version"). +// +// Each block frame's varint length covers the CID bytes plus the raw +// block bytes that follow. +package cars + +import ( + "bytes" + "encoding/binary" + "fmt" + "io" + + block "github.com/ipfs/go-block-format" + "github.com/ipfs/go-cid" + cbg "github.com/whyrusleeping/cbor-gen" +) + +// BlockPosition records where a block's raw payload bytes live within +// an encoded CAR. Offset and Length are measured against the **block +// data**, NOT the frame header or the CID prefix — i.e. they describe +// the slice of the CAR you'd seek to and read from to recover the +// raw block bytes. +// +// This is the convention `blobindex.Position` expects. +type BlockPosition struct { + CID cid.Cid + Offset uint64 + Length uint64 +} + +// Write encodes a CAR v1 file with the given roots and blocks. Block +// ordering is preserved. +func Write(w io.Writer, roots []cid.Cid, blocks []block.Block) error { + _, err := WriteWithPositions(w, roots, blocks) + return err +} + +// WriteHeader writes only the CAR v1 header (root array + version) +// and returns the number of bytes written. Used by callers that +// build a CAR incrementally — e.g. an append-only log segment that +// emits one header at open and many block frames over time. +func WriteHeader(w io.Writer, roots []cid.Cid) (int64, error) { + if len(roots) == 0 { + return 0, fmt.Errorf("cars: at least one root required") + } + cw := &countingWriter{w: w} + headerBytes, err := encodeHeader(roots) + if err != nil { + return 0, fmt.Errorf("cars: encode header: %w", err) + } + if err := writeUvarint(cw, uint64(len(headerBytes))); err != nil { + return cw.n, fmt.Errorf("cars: write header len: %w", err) + } + if _, err := cw.Write(headerBytes); err != nil { + return cw.n, fmt.Errorf("cars: write header: %w", err) + } + return cw.n, nil +} + +// WriteBlocksAt writes only block frames (no header) at fileOffset +// and returns the absolute byte positions of each block's payload +// within the file. Use this to extend an already-open CAR built by +// WriteHeader. fileOffset must equal the current end-of-file size of +// the underlying writer; positions returned reflect that origin so +// they can be used as ReadAt offsets directly. +func WriteBlocksAt(w io.Writer, fileOffset int64, blocks []block.Block) ([]BlockPosition, error) { + cw := &countingWriter{w: w, n: fileOffset} + positions := make([]BlockPosition, 0, len(blocks)) + for i, blk := range blocks { + pos, err := writeBlock(cw, blk) + if err != nil { + return positions, fmt.Errorf("cars: write block %d (%s): %w", i, blk.Cid(), err) + } + positions = append(positions, pos) + } + return positions, nil +} + +// WriteWithPositions is like Write, but additionally returns the byte +// position of each block's payload within the encoded CAR. Used by the +// Forge uploader to build a `blobindex.ShardedDagIndexView` mapping +// inner CIDs to their slices of the outer CAR blob. +func WriteWithPositions(w io.Writer, roots []cid.Cid, blocks []block.Block) ([]BlockPosition, error) { + if len(roots) == 0 { + return nil, fmt.Errorf("cars: at least one root required") + } + + cw := &countingWriter{w: w} + + headerBytes, err := encodeHeader(roots) + if err != nil { + return nil, fmt.Errorf("cars: encode header: %w", err) + } + if err := writeUvarint(cw, uint64(len(headerBytes))); err != nil { + return nil, fmt.Errorf("cars: write header len: %w", err) + } + if _, err := cw.Write(headerBytes); err != nil { + return nil, fmt.Errorf("cars: write header: %w", err) + } + + positions := make([]BlockPosition, 0, len(blocks)) + for i, blk := range blocks { + pos, err := writeBlock(cw, blk) + if err != nil { + return nil, fmt.Errorf("cars: write block %d (%s): %w", i, blk.Cid(), err) + } + positions = append(positions, pos) + } + return positions, nil +} + +func encodeHeader(roots []cid.Cid) ([]byte, error) { + var buf bytes.Buffer + cw := cbg.NewCborWriter(&buf) + + if err := cw.WriteMajorTypeHeader(cbg.MajMap, 2); err != nil { + return nil, err + } + + if err := writeMapKey(cw, "roots"); err != nil { + return nil, err + } + if err := cw.WriteMajorTypeHeader(cbg.MajArray, uint64(len(roots))); err != nil { + return nil, err + } + for _, c := range roots { + if err := cbg.WriteCid(cw, c); err != nil { + return nil, err + } + } + + if err := writeMapKey(cw, "version"); err != nil { + return nil, err + } + if err := cw.WriteMajorTypeHeader(cbg.MajUnsignedInt, 1); err != nil { + return nil, err + } + + return buf.Bytes(), nil +} + +func writeMapKey(cw *cbg.CborWriter, key string) error { + if err := cw.WriteMajorTypeHeader(cbg.MajTextString, uint64(len(key))); err != nil { + return err + } + _, err := cw.WriteString(key) + return err +} + +// writeBlock emits one frame and returns the position of the block's +// payload (post-CID-prefix bytes) within the surrounding CAR. +func writeBlock(cw *countingWriter, blk block.Block) (BlockPosition, error) { + cidBytes := blk.Cid().Bytes() + data := blk.RawData() + frameLen := uint64(len(cidBytes) + len(data)) + + if err := writeUvarint(cw, frameLen); err != nil { + return BlockPosition{}, err + } + if _, err := cw.Write(cidBytes); err != nil { + return BlockPosition{}, err + } + + dataOffset := cw.n + if _, err := cw.Write(data); err != nil { + return BlockPosition{}, err + } + return BlockPosition{ + CID: blk.Cid(), + Offset: uint64(dataOffset), + Length: uint64(len(data)), + }, nil +} + +func writeUvarint(w io.Writer, n uint64) error { + var buf [binary.MaxVarintLen64]byte + sz := binary.PutUvarint(buf[:], n) + _, err := w.Write(buf[:sz]) + return err +} + +// countingWriter forwards writes to an underlying io.Writer while +// tracking the total number of bytes written. Used to compute block +// payload offsets for the index. +type countingWriter struct { + w io.Writer + n int64 +} + +func (cw *countingWriter) Write(p []byte) (int, error) { + n, err := cw.w.Write(p) + cw.n += int64(n) + return n, err +} diff --git a/pkg/ms3t/cars/reader.go b/pkg/ms3t/cars/reader.go new file mode 100644 index 0000000..1d746e1 --- /dev/null +++ b/pkg/ms3t/cars/reader.go @@ -0,0 +1,161 @@ +package cars + +import ( + "bufio" + "encoding/binary" + "errors" + "fmt" + "io" + "os" + + block "github.com/ipfs/go-block-format" + "github.com/ipfs/go-cid" +) + +// ErrTorn is returned by ScanFile when the trailing bytes of the CAR +// look like an incomplete frame (truncated varint, mismatched frame +// length, or short read on payload). Callers can use the LastGoodEnd +// field of the returned ScanResult to truncate the file back to the +// last fully-fsynced batch boundary. +var ErrTorn = errors.New("cars: torn trailing frame") + +// Frame is one block read from a CAR file along with its on-disk +// position. Offset/Length describe the payload bytes (post-CID +// prefix), matching the convention used by BlockPosition / Write. +type Frame struct { + Block block.Block + Offset uint64 + Length uint64 +} + +// ScanResult is the outcome of ScanFile. +type ScanResult struct { + // Frames are every block read in file order. + Frames []Frame + // LastGoodEnd is the byte offset just past the last fully-read + // frame. If the file is intact, equals the file size; if a torn + // frame was detected, equals the start of that torn frame so + // callers can truncate to it. + LastGoodEnd int64 + // HeaderEnd is the byte offset just past the CAR v1 header (i.e., + // the offset of the first frame). + HeaderEnd int64 +} + +// ScanFile reads a CAR v1 file from path and returns every fully +// readable block + its on-disk position. If the file ends in a torn +// frame, ScanFile returns the frames it could read along with +// ErrTorn and LastGoodEnd pointing at the start of the torn frame. +// +// This is the recovery primitive: callers can `os.Truncate(path, +// LastGoodEnd)` to drop a torn tail, then re-derive the in-memory +// index from Frames. +func ScanFile(path string) (*ScanResult, error) { + f, err := os.Open(path) + if err != nil { + return nil, fmt.Errorf("cars: open %s: %w", path, err) + } + defer f.Close() + st, err := f.Stat() + if err != nil { + return nil, fmt.Errorf("cars: stat %s: %w", path, err) + } + size := st.Size() + + br := bufio.NewReader(f) + headerLen, err := binary.ReadUvarint(br) + if err != nil { + return nil, fmt.Errorf("cars: read header len: %w", err) + } + headerVarintBytes := uvarintLen(headerLen) + if _, err := br.Discard(int(headerLen)); err != nil { + return nil, fmt.Errorf("cars: skip header: %w", err) + } + headerEnd := int64(headerVarintBytes) + int64(headerLen) + + res := &ScanResult{HeaderEnd: headerEnd, LastGoodEnd: headerEnd} + pos := headerEnd + + for pos < size { + frameStart := pos + frameLen, varSize, terr := readUvarint(br) + if terr != nil { + if errors.Is(terr, io.EOF) || errors.Is(terr, io.ErrUnexpectedEOF) { + return res, fmt.Errorf("%w at offset %d", ErrTorn, frameStart) + } + return nil, fmt.Errorf("cars: read frame len at %d: %w", frameStart, terr) + } + // Bound check: frame must fit in remaining bytes. + if int64(frameLen)+int64(varSize)+frameStart > size { + res.LastGoodEnd = frameStart + return res, fmt.Errorf("%w at offset %d (frame len %d exceeds file)", ErrTorn, frameStart, frameLen) + } + + // Read frame body: CID prefix + block bytes. + body := make([]byte, frameLen) + if _, err := io.ReadFull(br, body); err != nil { + res.LastGoodEnd = frameStart + return res, fmt.Errorf("%w at offset %d: %w", ErrTorn, frameStart, err) + } + c, cidLen, err := cidFromBytes(body) + if err != nil { + return nil, fmt.Errorf("cars: parse cid at offset %d: %w", frameStart, err) + } + payload := body[cidLen:] + blk, err := block.NewBlockWithCid(payload, c) + if err != nil { + return nil, fmt.Errorf("cars: new block at offset %d: %w", frameStart, err) + } + + dataOffset := uint64(frameStart) + uint64(varSize) + uint64(cidLen) + res.Frames = append(res.Frames, Frame{ + Block: blk, + Offset: dataOffset, + Length: uint64(len(payload)), + }) + pos = frameStart + int64(varSize) + int64(frameLen) + res.LastGoodEnd = pos + } + return res, nil +} + +// uvarintLen returns the encoded byte length of n. +func uvarintLen(n uint64) int { + var buf [binary.MaxVarintLen64]byte + return binary.PutUvarint(buf[:], n) +} + +// readUvarint pulls a varint from br and reports how many bytes it +// consumed. Wraps the bufio reader's ReadByte so we can count. +func readUvarint(br *bufio.Reader) (uint64, int, error) { + var ( + x uint64 + s uint + n int + ) + for { + b, err := br.ReadByte() + if err != nil { + return 0, n, err + } + n++ + if b < 0x80 { + if n > binary.MaxVarintLen64 || (n == binary.MaxVarintLen64 && b > 1) { + return 0, n, fmt.Errorf("cars: uvarint overflow") + } + return x | uint64(b)< maxSeq { + maxSeq = seq + } + row, hasRow := dbBySeq[seq] + + switch { + case hasRow && row.State == StateOpen: + seg, err := rebuildOpenFromDisk(s.cfg.Dir, seq, s.logger) + if err != nil { + return fmt.Errorf("logstore: rebuild open seg %d: %w", seq, err) + } + if recoveredOpen != nil { + return fmt.Errorf("logstore: more than one open segment on disk (seqs %d and %d)", + recoveredOpen.seq, seq) + } + recoveredOpen = seg + + case hasRow && row.State == StateSealed: + seg, err := loadSealedFromIdx(s.cfg.Dir, seq, s.logger) + if err != nil { + return fmt.Errorf("logstore: load sealed seg %d: %w", seq, err) + } + sealedRecovered = append(sealedRecovered, loaded{seg: seg}) + + case flushedOnDisk[seq]: + seg, err := loadFlushedFromIdx(s.cfg.Dir, seq, 0, s.logger) + if err != nil { + return fmt.Errorf("logstore: load flushed seg %d: %w", seq, err) + } + flushedRecovered = append(flushedRecovered, loaded{seg: seg}) + + default: + // File on disk but no DB row and no idx — treat as a + // previously-open segment that crashed before sealing. Rebuild + // as open and let the force-seal path in Open() finalize it. + seg, err := rebuildOpenFromDisk(s.cfg.Dir, seq, s.logger) + if err != nil { + return fmt.Errorf("logstore: rebuild orphan seg %d: %w", seq, err) + } + // Seed the DB row in 'open' so the seal transition's + // "from open" UPDATE matches. + if err := s.cfg.Meta.InsertSegmentOpen(ctx, seq); err != nil { + return fmt.Errorf("logstore: insert orphan row %d: %w", seq, err) + } + if recoveredOpen != nil { + return fmt.Errorf("logstore: orphan + open conflict (seqs %d and %d)", + recoveredOpen.seq, seq) + } + recoveredOpen = seg + } + } + + // DB rows without a corresponding .car file → log + clean up. + for seq, row := range dbBySeq { + if _, ok := carSeqs[seq]; ok { + continue + } + s.logger.Error("logstore: DB segment row without on-disk file; deleting row", + zap.Uint64("seq", seq), zap.String("state", row.State.String())) + if err := s.cfg.Meta.DeleteSegment(ctx, seq); err != nil { + return fmt.Errorf("logstore: delete orphan row %d: %w", seq, err) + } + } + + // Sort recovered sealed/flushed segments newest-first by seq. + sort.Slice(sealedRecovered, func(i, j int) bool { + return sealedRecovered[i].seg.Seq() > sealedRecovered[j].seg.Seq() + }) + sort.Slice(flushedRecovered, func(i, j int) bool { + return flushedRecovered[i].seg.Seq() > flushedRecovered[j].seg.Seq() + }) + + // Combine into the sealed slice (newest-first overall). + all := make([]*Segment, 0, len(sealedRecovered)+len(flushedRecovered)) + for _, l := range sealedRecovered { + all = append(all, l.seg) + } + for _, l := range flushedRecovered { + all = append(all, l.seg) + } + sort.SliceStable(all, func(i, j int) bool { return all[i].Seq() > all[j].Seq() }) + s.sealed = all + + // Re-enqueue sealed segments (not flushed) for the flusher. + for _, seg := range s.sealed { + if seg.State() == StateSealed { + select { + case s.flushQ <- seg: + default: + s.logger.Warn("logstore: flush queue full at recovery; will retry on tick", + zap.Uint64("seq", seg.Seq())) + } + } + } + + s.open = recoveredOpen + if recoveredOpen != nil && recoveredOpen.Seq() > maxSeq { + maxSeq = recoveredOpen.Seq() + } + s.nextSeq = maxSeq + 1 + + return nil +} diff --git a/pkg/ms3t/logstore/segment.go b/pkg/ms3t/logstore/segment.go new file mode 100644 index 0000000..5af0b71 --- /dev/null +++ b/pkg/ms3t/logstore/segment.go @@ -0,0 +1,826 @@ +package logstore + +import ( + "context" + "crypto/sha256" + "encoding/binary" + "encoding/json" + "errors" + "fmt" + "io" + "os" + "path/filepath" + "sync" + "time" + + block "github.com/ipfs/go-block-format" + "github.com/ipfs/go-cid" + "go.uber.org/zap" + + "github.com/storacha/sprue/pkg/ms3t/blockstore" + "github.com/storacha/sprue/pkg/ms3t/cars" +) + +// placeholderRoot is the placeholder CAR header root. Each segment +// is multi-rooted by intent; the per-op roots live in the .ops +// sidecar (and in-memory OpRoots), not the CAR header. +var placeholderRoot = cid.NewCidV1(cid.Raw, []byte{0x00, 0x00}) + +// Segment is one log file. Open segments accept appends; sealed +// segments are read-only. +// +// Concurrency model: Append is serialized by Store.appMu. Reads +// (Lookup + ReadAt against fdRO) and seal/finalize use Segment-level +// locks so they don't block appenders unnecessarily. +type Segment struct { + seq uint64 + dir string + logger *zap.Logger + + // stateMu guards state, sealedAt, sha256, opRoots, sizeBytes, + // index, seen, fdRW, and fdRO. RLock for reads (lookups, opRoots + // access); Lock for mutating any of the above. + stateMu sync.RWMutex + + state State + sealedAt int64 + sha256 []byte + + sizeBytes int64 + // index maps each block's CID to its on-disk byte position + // inside the segment's CAR. Updated on append (after a successful + // fsync) and rebuilt on recovery from either the .idx sidecar or + // a fresh CAR scan. + index map[cid.Cid]blockstore.BlockLoc + // seen is the dedup gate consulted by append. CIDs that have + // already landed in this segment are skipped before + // cars.WriteBlocksAt is called, so duplicate bytes are never + // written to disk and never shipped to Forge. Always kept in + // sync with index's key set. + seen *cid.Set + opRoots []blockstore.OpRoot + + // fdRW is the append/read file descriptor for an open segment. + // Closed at seal. + fdRW *os.File + // opsFD is the append-only ops sidecar (open segment only). + // Closed at seal. + opsFD *os.File + // fdRO is the read-only descriptor used to serve Get after seal + // (and before, when the open fdRW exists). For open segments we + // use fdRW for reads via ReadAt; fdRO is opened lazily at seal + // time so reads after seal don't need to reopen on every Get. + fdRO *os.File +} + +// Seq returns the segment's identifier. +func (s *Segment) Seq() uint64 { return s.seq } + +// State reports the current lifecycle state. +func (s *Segment) State() State { + s.stateMu.RLock() + defer s.stateMu.RUnlock() + return s.state +} + +// Size reports the current on-disk byte size of the CAR file. +func (s *Segment) Size() int64 { + s.stateMu.RLock() + defer s.stateMu.RUnlock() + return s.sizeBytes +} + +// SHA256 returns the seal-time sha256 of the CAR file. Empty for +// open segments. +func (s *Segment) SHA256() []byte { + s.stateMu.RLock() + defer s.stateMu.RUnlock() + out := make([]byte, len(s.sha256)) + copy(out, s.sha256) + return out +} + +// SealedAt returns the seal-time unix-seconds timestamp. Zero for +// open segments. +func (s *Segment) SealedAt() int64 { + s.stateMu.RLock() + defer s.stateMu.RUnlock() + return s.sealedAt +} + +// OpRoots returns a copy of the per-batch (bucket, root) records. +// Safe to call from any goroutine. +func (s *Segment) OpRoots() []blockstore.OpRoot { + s.stateMu.RLock() + defer s.stateMu.RUnlock() + out := make([]blockstore.OpRoot, len(s.opRoots)) + copy(out, s.opRoots) + return out +} + +// BlockPositions returns a copy of the cid → on-disk-position +// table for the segment's CAR. Populated at append time and +// rebuilt on recovery from either the .idx sidecar or a fresh CAR +// scan. Used by the flush path to build a ShardedDagIndexView +// without rescanning the file. Safe to call from any goroutine. +func (s *Segment) BlockPositions() map[cid.Cid]blockstore.BlockLoc { + s.stateMu.RLock() + defer s.stateMu.RUnlock() + out := make(map[cid.Cid]blockstore.BlockLoc, len(s.index)) + for c, loc := range s.index { + out[c] = loc + } + return out +} + +// CARPath returns the absolute path to the segment's CAR file. +func (s *Segment) CARPath() string { return filepath.Join(s.dir, carName(s.seq)) } + +// OpsPath returns the absolute path to the segment's ops sidecar. +func (s *Segment) OpsPath() string { return filepath.Join(s.dir, opsName(s.seq)) } + +// IdxPath returns the absolute path to the segment's idx sidecar. +func (s *Segment) IdxPath() string { return filepath.Join(s.dir, idxName(s.seq)) } + +func carName(seq uint64) string { return fmt.Sprintf("seg-%020d.car", seq) } +func opsName(seq uint64) string { return fmt.Sprintf("seg-%020d.ops", seq) } +func idxName(seq uint64) string { return fmt.Sprintf("seg-%020d.idx", seq) } + +// createOpenSegment creates a brand-new segment in the open state: +// initializes the CAR file with a header, opens the ops sidecar, +// and records the row in Meta. +func createOpenSegment(ctx context.Context, dir string, seq uint64, meta Meta, logger *zap.Logger) (*Segment, error) { + carPath := filepath.Join(dir, carName(seq)) + opsPath := filepath.Join(dir, opsName(seq)) + + carFile, err := os.OpenFile(carPath, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0o644) + if err != nil { + return nil, fmt.Errorf("logstore: open car %d: %w", seq, err) + } + hdrLen, err := cars.WriteHeader(carFile, []cid.Cid{placeholderRoot}) + if err != nil { + _ = carFile.Close() + _ = os.Remove(carPath) + return nil, fmt.Errorf("logstore: write header %d: %w", seq, err) + } + if err := carFile.Sync(); err != nil { + _ = carFile.Close() + _ = os.Remove(carPath) + return nil, fmt.Errorf("logstore: sync header %d: %w", seq, err) + } + + opsFile, err := os.OpenFile(opsPath, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0o644) + if err != nil { + _ = carFile.Close() + _ = os.Remove(carPath) + return nil, fmt.Errorf("logstore: open ops %d: %w", seq, err) + } + + if err := meta.InsertSegmentOpen(ctx, seq); err != nil { + _ = carFile.Close() + _ = opsFile.Close() + _ = os.Remove(carPath) + _ = os.Remove(opsPath) + return nil, err + } + + return &Segment{ + seq: seq, + dir: dir, + logger: logger, + state: StateOpen, + sizeBytes: hdrLen, + index: map[cid.Cid]blockstore.BlockLoc{}, + seen: cid.NewSet(), + fdRW: carFile, + opsFD: opsFile, + }, nil +} + +// append writes the given blocks + opRoot to disk and updates the +// in-memory index. fsyncs both files before returning. Caller must +// hold Store.appMu. +// +// Block-level dedup: every block is checked against s.seen before +// writing. CIDs already present in this segment are skipped, so a +// duplicate body chunk or MST node landing across two PUTs in the +// same segment never hits the CAR file twice and never ships to +// Forge twice. The op-root record is appended unconditionally — +// even an all-duplicate batch still represents a real bucket-Root +// advance and must be replayed by the flusher. +func (s *Segment) append(blocks []block.Block, opRoot blockstore.OpRoot) error { + s.stateMu.Lock() + defer s.stateMu.Unlock() + + if s.state != StateOpen || s.fdRW == nil { + return errors.New("logstore: segment not open for append") + } + + // Filter out CIDs we've already written into this segment. We + // don't mutate s.seen yet — only after the file write succeeds — + // so a fsync error doesn't poison the dedup state. + fresh := make([]block.Block, 0, len(blocks)) + for _, blk := range blocks { + if s.seen.Has(blk.Cid()) { + continue + } + fresh = append(fresh, blk) + } + + var positions []cars.BlockPosition + if len(fresh) > 0 { + var err error + positions, err = cars.WriteBlocksAt(s.fdRW, s.sizeBytes, fresh) + if err != nil { + return fmt.Errorf("logstore: append blocks seg %d: %w", s.seq, err) + } + } + + // Append the op-root record to the ops sidecar regardless of + // whether any new bytes were written to the CAR. + opsRec, err := encodeOpRecord(opRoot) + if err != nil { + return fmt.Errorf("logstore: encode oprec seg %d: %w", s.seq, err) + } + if _, err := s.opsFD.Write(opsRec); err != nil { + return fmt.Errorf("logstore: write ops seg %d: %w", s.seq, err) + } + + // fsync both files in parallel. The CAR fsync is a fast no-op + // when len(fresh) == 0 (nothing written since the last sync) but + // we issue it anyway to keep the durability contract uniform. + var wg sync.WaitGroup + var carErr, opsErr error + wg.Add(2) + go func() { + defer wg.Done() + carErr = s.fdRW.Sync() + }() + go func() { + defer wg.Done() + opsErr = s.opsFD.Sync() + }() + wg.Wait() + if carErr != nil { + return fmt.Errorf("logstore: fsync car seg %d: %w", s.seq, carErr) + } + if opsErr != nil { + return fmt.Errorf("logstore: fsync ops seg %d: %w", s.seq, opsErr) + } + + // Commit the dedup state and the position table together. + for i, blk := range fresh { + s.seen.Add(blk.Cid()) + s.index[blk.Cid()] = blockstore.BlockLoc{Offset: positions[i].Offset, Length: positions[i].Length} + } + if n := len(positions); n > 0 { + end := int64(positions[n-1].Offset) + int64(positions[n-1].Length) + if end > s.sizeBytes { + s.sizeBytes = end + } + } + s.opRoots = append(s.opRoots, opRoot) + return nil +} + +// seal closes the open fds, hashes the CAR, writes the .idx sidecar, +// and updates Meta. After this returns, the segment is in +// StateSealed and safe to be flushed. +func (s *Segment) seal(ctx context.Context, meta Meta) error { + s.stateMu.Lock() + defer s.stateMu.Unlock() + + if s.state != StateOpen { + // Idempotent: already sealed. + return nil + } + + // Final fsync before close (defensive — append already fsyncs). + if err := s.fdRW.Sync(); err != nil { + return fmt.Errorf("logstore: pre-seal fsync car %d: %w", s.seq, err) + } + if err := s.opsFD.Sync(); err != nil { + return fmt.Errorf("logstore: pre-seal fsync ops %d: %w", s.seq, err) + } + if err := s.fdRW.Close(); err != nil { + return fmt.Errorf("logstore: close car %d: %w", s.seq, err) + } + s.fdRW = nil + if err := s.opsFD.Close(); err != nil { + return fmt.Errorf("logstore: close ops %d: %w", s.seq, err) + } + s.opsFD = nil + + // Compute CAR sha256 by streaming the file. + sum, err := hashFile(s.CARPath()) + if err != nil { + return fmt.Errorf("logstore: hash %d: %w", s.seq, err) + } + s.sha256 = sum + s.sealedAt = time.Now().Unix() + s.state = StateSealed + + // Write idx sidecar (atomic via tmp+rename). + if err := s.writeIdxLocked(); err != nil { + return fmt.Errorf("logstore: write idx %d: %w", s.seq, err) + } + + // Persist sealed state in Postgres. + if err := meta.MarkSegmentSealed(ctx, s.seq, s.sealedAt, s.sizeBytes, s.sha256, s.opRoots); err != nil { + return fmt.Errorf("logstore: mark sealed %d: %w", s.seq, err) + } + + // Open the read-only fd that will serve Get from now on. + roFD, err := os.Open(s.CARPath()) + if err != nil { + return fmt.Errorf("logstore: open ro car %d: %w", s.seq, err) + } + s.fdRO = roFD + + return nil +} + +// retire closes any open fd and unlinks the segment's files. Safe to +// call after MarkFlushed; the caller must guarantee no other +// goroutine still holds a reference for reads. +func (s *Segment) retire() error { + s.stateMu.Lock() + defer s.stateMu.Unlock() + + if s.fdRO != nil { + _ = s.fdRO.Close() + s.fdRO = nil + } + if s.fdRW != nil { + _ = s.fdRW.Close() + s.fdRW = nil + } + if s.opsFD != nil { + _ = s.opsFD.Close() + s.opsFD = nil + } + + for _, name := range []string{s.CARPath(), s.OpsPath(), s.IdxPath()} { + if err := os.Remove(name); err != nil && !errors.Is(err, os.ErrNotExist) { + return fmt.Errorf("logstore: unlink %s: %w", name, err) + } + } + return nil +} + +// get returns the block at the given CID, or blockstore.ErrNotFound. Safe for +// concurrent callers. +func (s *Segment) get(_ context.Context, c cid.Cid) (block.Block, error) { + s.stateMu.RLock() + loc, ok := s.index[c] + fd := s.fdRO + if fd == nil { + fd = s.fdRW + } + s.stateMu.RUnlock() + if !ok { + return nil, blockstore.ErrNotFound + } + if fd == nil { + return nil, fmt.Errorf("logstore: segment %d has no read fd", s.seq) + } + buf := make([]byte, loc.Length) + if _, err := fd.ReadAt(buf, int64(loc.Offset)); err != nil { + return nil, fmt.Errorf("logstore: read seg %d offset %d: %w", s.seq, loc.Offset, err) + } + return block.NewBlockWithCid(buf, c) +} + +// writeIdxLocked persists the idx sidecar. Caller must hold stateMu +// in write mode and have already populated sha256/sealedAt. +func (s *Segment) writeIdxLocked() error { + type idxBlock struct { + CID string `json:"cid"` + Offset uint64 `json:"offset"` + Length uint64 `json:"length"` + } + type idxOpRoot struct { + Bucket string `json:"bucket"` + Root string `json:"root"` + } + type idxFile struct { + Seq uint64 `json:"seq"` + SizeBytes int64 `json:"size_bytes"` + SHA256 string `json:"sha256_hex"` + SealedAt int64 `json:"sealed_at"` + Blocks []idxBlock `json:"blocks"` + OpRoots []idxOpRoot `json:"op_roots"` + } + + blocks := make([]idxBlock, 0, len(s.index)) + for c, loc := range s.index { + blocks = append(blocks, idxBlock{ + CID: c.String(), + Offset: loc.Offset, + Length: loc.Length, + }) + } + opRoots := make([]idxOpRoot, len(s.opRoots)) + for i, opr := range s.opRoots { + opRoots[i] = idxOpRoot{Bucket: opr.Bucket, Root: opr.Root.String()} + } + + body := idxFile{ + Seq: s.seq, + SizeBytes: s.sizeBytes, + SHA256: fmt.Sprintf("%x", s.sha256), + SealedAt: s.sealedAt, + Blocks: blocks, + OpRoots: opRoots, + } + data, err := json.MarshalIndent(body, "", " ") + if err != nil { + return err + } + tmp := s.IdxPath() + ".tmp" + if err := os.WriteFile(tmp, data, 0o644); err != nil { + return err + } + return os.Rename(tmp, s.IdxPath()) +} + +// loadSealedFromIdx hydrates a Segment in the StateSealed state from +// its on-disk .idx sidecar. Used at startup. Returns (nil, error) on +// any malformed sidecar; the caller can fall back to a CAR scan. +func loadSealedFromIdx(dir string, seq uint64, logger *zap.Logger) (*Segment, error) { + idxPath := filepath.Join(dir, idxName(seq)) + data, err := os.ReadFile(idxPath) + if err != nil { + return nil, fmt.Errorf("logstore: read idx %d: %w", seq, err) + } + var raw struct { + Seq uint64 `json:"seq"` + SizeBytes int64 `json:"size_bytes"` + SHA256 string `json:"sha256_hex"` + SealedAt int64 `json:"sealed_at"` + Blocks []struct { + CID string `json:"cid"` + Offset uint64 `json:"offset"` + Length uint64 `json:"length"` + } `json:"blocks"` + OpRoots []struct { + Bucket string `json:"bucket"` + Root string `json:"root"` + } `json:"op_roots"` + } + if err := json.Unmarshal(data, &raw); err != nil { + return nil, fmt.Errorf("logstore: parse idx %d: %w", seq, err) + } + if raw.Seq != seq { + return nil, fmt.Errorf("logstore: idx seq %d does not match filename %d", raw.Seq, seq) + } + idx := make(map[cid.Cid]blockstore.BlockLoc, len(raw.Blocks)) + seen := cid.NewSet() + for _, b := range raw.Blocks { + c, err := cid.Decode(b.CID) + if err != nil { + return nil, fmt.Errorf("logstore: idx bad cid %q: %w", b.CID, err) + } + idx[c] = blockstore.BlockLoc{Offset: b.Offset, Length: b.Length} + seen.Add(c) + } + ops := make([]blockstore.OpRoot, len(raw.OpRoots)) + for i, o := range raw.OpRoots { + c, err := cid.Decode(o.Root) + if err != nil { + return nil, fmt.Errorf("logstore: idx bad root %q: %w", o.Root, err) + } + ops[i] = blockstore.OpRoot{Bucket: o.Bucket, Root: c} + } + sha, err := hexDecode(raw.SHA256) + if err != nil { + return nil, fmt.Errorf("logstore: idx bad sha %q: %w", raw.SHA256, err) + } + + carFD, err := os.Open(filepath.Join(dir, carName(seq))) + if err != nil { + return nil, fmt.Errorf("logstore: open sealed car %d: %w", seq, err) + } + return &Segment{ + seq: seq, + dir: dir, + logger: logger, + state: StateSealed, + sealedAt: raw.SealedAt, + sha256: sha, + sizeBytes: raw.SizeBytes, + index: idx, + seen: seen, + opRoots: ops, + fdRO: carFD, + }, nil +} + +// loadFlushedFromIdx is loadSealedFromIdx but yields StateFlushed. +// Used to pick up retained segments at startup. +func loadFlushedFromIdx(dir string, seq uint64, flushedAt int64, logger *zap.Logger) (*Segment, error) { + seg, err := loadSealedFromIdx(dir, seq, logger) + if err != nil { + return nil, err + } + seg.state = StateFlushed + _ = flushedAt // kept for future use; not stored on Segment today. + return seg, nil +} + +// rebuildOpenFromDisk takes a torn or sidecar-less open segment on +// disk (the segment was open at crash time) and reconstructs an +// in-memory Segment ready to be sealed. It scans the CAR (truncating +// any torn last frame) and replays the .ops file. +// +// The returned segment is in StateOpen with its fds repositioned at +// EOF; the caller is expected to immediately call seal() to retire +// it cleanly. We do not resume appending to a recovered open +// segment — every restart starts a fresh segment for the next ops. +func rebuildOpenFromDisk(dir string, seq uint64, logger *zap.Logger) (*Segment, error) { + carPath := filepath.Join(dir, carName(seq)) + scan, err := cars.ScanFile(carPath) + if err != nil && !errors.Is(err, cars.ErrTorn) { + return nil, fmt.Errorf("logstore: scan recovered car %d: %w", seq, err) + } + if errors.Is(err, cars.ErrTorn) { + if terr := os.Truncate(carPath, scan.LastGoodEnd); terr != nil { + return nil, fmt.Errorf("logstore: truncate torn car %d: %w", seq, terr) + } + logger.Warn("logstore: truncated torn trailing frame in segment", + zap.Uint64("seq", seq), + zap.Int64("truncated_at", scan.LastGoodEnd)) + } + + idx := make(map[cid.Cid]blockstore.BlockLoc, len(scan.Frames)) + seen := cid.NewSet() + var size int64 = scan.LastGoodEnd + for _, f := range scan.Frames { + c := f.Block.Cid() + idx[c] = blockstore.BlockLoc{Offset: f.Offset, Length: f.Length} + seen.Add(c) + } + + opsPath := filepath.Join(dir, opsName(seq)) + ops, err := readAllOps(opsPath) + if err != nil { + return nil, fmt.Errorf("logstore: read ops %d: %w", seq, err) + } + + carFD, err := os.OpenFile(carPath, os.O_RDWR, 0o644) + if err != nil { + return nil, fmt.Errorf("logstore: reopen car %d: %w", seq, err) + } + if _, err := carFD.Seek(size, io.SeekStart); err != nil { + _ = carFD.Close() + return nil, fmt.Errorf("logstore: seek car %d: %w", seq, err) + } + opsFD, err := os.OpenFile(opsPath, os.O_RDWR|os.O_CREATE, 0o644) + if err != nil { + _ = carFD.Close() + return nil, fmt.Errorf("logstore: reopen ops %d: %w", seq, err) + } + if _, err := opsFD.Seek(0, io.SeekEnd); err != nil { + _ = carFD.Close() + _ = opsFD.Close() + return nil, fmt.Errorf("logstore: seek ops %d: %w", seq, err) + } + return &Segment{ + seq: seq, + dir: dir, + logger: logger, + state: StateOpen, + sizeBytes: size, + index: idx, + seen: seen, + opRoots: ops, + fdRW: carFD, + opsFD: opsFD, + }, nil +} + +// === ops sidecar codec === +// +// Each record is a 4-byte big-endian length prefix followed by a +// minimal CBOR-encoded payload: a 2-element array +// [bucket: text, root: cid bytes]. We use array form rather than a +// map to keep the encoding compact and order-independent of map +// iteration. + +const opRecMaxSize = 1 << 20 // 1 MiB ceiling per record (defensive) + +func encodeOpRecord(opr blockstore.OpRoot) ([]byte, error) { + if !opr.Root.Defined() { + return nil, errors.New("logstore: opRoot.Root must be defined") + } + if len(opr.Bucket) > 1<<16 { + return nil, errors.New("logstore: bucket name too long") + } + bucketBytes := []byte(opr.Bucket) + rootBytes := opr.Root.Bytes() + + // Manual CBOR: array(2) + text(bucket) + bytes(root). + body := make([]byte, 0, 16+len(bucketBytes)+len(rootBytes)) + body = appendCborHead(body, 4 /*MajArray*/, 2) + body = appendCborHead(body, 3 /*MajTextString*/, uint64(len(bucketBytes))) + body = append(body, bucketBytes...) + body = appendCborHead(body, 2 /*MajByteString*/, uint64(len(rootBytes))) + body = append(body, rootBytes...) + + buf := make([]byte, 4+len(body)) + binary.BigEndian.PutUint32(buf[:4], uint32(len(body))) + copy(buf[4:], body) + return buf, nil +} + +func readAllOps(path string) ([]blockstore.OpRoot, error) { + data, err := os.ReadFile(path) + if err != nil { + if errors.Is(err, os.ErrNotExist) { + return nil, nil + } + return nil, err + } + var out []blockstore.OpRoot + for off := 0; off < len(data); { + if len(data)-off < 4 { + break // torn trailing prefix — drop + } + length := int(binary.BigEndian.Uint32(data[off : off+4])) + if length <= 0 || length > opRecMaxSize || off+4+length > len(data) { + break // torn trailing record — drop + } + body := data[off+4 : off+4+length] + opr, err := decodeOpRecord(body) + if err != nil { + return nil, fmt.Errorf("logstore: ops record at %d: %w", off, err) + } + out = append(out, opr) + off += 4 + length + } + return out, nil +} + +func decodeOpRecord(body []byte) (blockstore.OpRoot, error) { + r := newCborReader(body) + maj, count, err := r.readHead() + if err != nil { + return blockstore.OpRoot{}, err + } + if maj != 4 || count != 2 { + return blockstore.OpRoot{}, fmt.Errorf("expected array(2), got %d/%d", maj, count) + } + bm, blen, err := r.readHead() + if err != nil { + return blockstore.OpRoot{}, err + } + if bm != 3 { + return blockstore.OpRoot{}, fmt.Errorf("expected text bucket, got maj %d", bm) + } + bucket, err := r.readBytes(int(blen)) + if err != nil { + return blockstore.OpRoot{}, err + } + rm, rlen, err := r.readHead() + if err != nil { + return blockstore.OpRoot{}, err + } + if rm != 2 { + return blockstore.OpRoot{}, fmt.Errorf("expected bytes root, got maj %d", rm) + } + rootBytes, err := r.readBytes(int(rlen)) + if err != nil { + return blockstore.OpRoot{}, err + } + c, err := cid.Cast(rootBytes) + if err != nil { + return blockstore.OpRoot{}, err + } + return blockstore.OpRoot{Bucket: string(bucket), Root: c}, nil +} + +// hashFile returns the sha256 of the file at path. +func hashFile(path string) ([]byte, error) { + f, err := os.Open(path) + if err != nil { + return nil, err + } + defer f.Close() + h := sha256.New() + if _, err := io.Copy(h, f); err != nil { + return nil, err + } + return h.Sum(nil), nil +} + +func hexDecode(s string) ([]byte, error) { + if len(s)%2 != 0 { + return nil, fmt.Errorf("odd length") + } + out := make([]byte, len(s)/2) + for i := 0; i < len(out); i++ { + hi, ok1 := unhex(s[2*i]) + lo, ok2 := unhex(s[2*i+1]) + if !ok1 || !ok2 { + return nil, fmt.Errorf("bad hex char") + } + out[i] = hi<<4 | lo + } + return out, nil +} + +func unhex(b byte) (byte, bool) { + switch { + case b >= '0' && b <= '9': + return b - '0', true + case b >= 'a' && b <= 'f': + return b - 'a' + 10, true + case b >= 'A' && b <= 'F': + return b - 'A' + 10, true + } + return 0, false +} + +// === minimal CBOR head encoding/decoding === + +func appendCborHead(buf []byte, maj uint8, val uint64) []byte { + switch { + case val < 24: + return append(buf, byte(maj<<5)|byte(val)) + case val < 1<<8: + return append(buf, byte(maj<<5)|24, byte(val)) + case val < 1<<16: + return append(buf, byte(maj<<5)|25, byte(val>>8), byte(val)) + case val < 1<<32: + return append(buf, byte(maj<<5)|26, + byte(val>>24), byte(val>>16), byte(val>>8), byte(val)) + default: + return append(buf, byte(maj<<5)|27, + byte(val>>56), byte(val>>48), byte(val>>40), byte(val>>32), + byte(val>>24), byte(val>>16), byte(val>>8), byte(val)) + } +} + +type cborReader struct { + buf []byte + pos int +} + +func newCborReader(b []byte) *cborReader { return &cborReader{buf: b} } + +func (r *cborReader) readHead() (uint8, uint64, error) { + if r.pos >= len(r.buf) { + return 0, 0, io.EOF + } + first := r.buf[r.pos] + r.pos++ + maj := first >> 5 + low := first & 0x1f + switch { + case low < 24: + return maj, uint64(low), nil + case low == 24: + if r.pos+1 > len(r.buf) { + return 0, 0, io.ErrUnexpectedEOF + } + v := uint64(r.buf[r.pos]) + r.pos++ + return maj, v, nil + case low == 25: + if r.pos+2 > len(r.buf) { + return 0, 0, io.ErrUnexpectedEOF + } + v := uint64(r.buf[r.pos])<<8 | uint64(r.buf[r.pos+1]) + r.pos += 2 + return maj, v, nil + case low == 26: + if r.pos+4 > len(r.buf) { + return 0, 0, io.ErrUnexpectedEOF + } + v := uint64(r.buf[r.pos])<<24 | uint64(r.buf[r.pos+1])<<16 | + uint64(r.buf[r.pos+2])<<8 | uint64(r.buf[r.pos+3]) + r.pos += 4 + return maj, v, nil + case low == 27: + if r.pos+8 > len(r.buf) { + return 0, 0, io.ErrUnexpectedEOF + } + v := uint64(r.buf[r.pos])<<56 | uint64(r.buf[r.pos+1])<<48 | + uint64(r.buf[r.pos+2])<<40 | uint64(r.buf[r.pos+3])<<32 | + uint64(r.buf[r.pos+4])<<24 | uint64(r.buf[r.pos+5])<<16 | + uint64(r.buf[r.pos+6])<<8 | uint64(r.buf[r.pos+7]) + r.pos += 8 + return maj, v, nil + default: + return 0, 0, fmt.Errorf("invalid cbor head 0x%x", first) + } +} + +func (r *cborReader) readBytes(n int) ([]byte, error) { + if r.pos+n > len(r.buf) { + return nil, io.ErrUnexpectedEOF + } + b := r.buf[r.pos : r.pos+n] + r.pos += n + return b, nil +} diff --git a/pkg/ms3t/logstore/store.go b/pkg/ms3t/logstore/store.go new file mode 100644 index 0000000..d387fbe --- /dev/null +++ b/pkg/ms3t/logstore/store.go @@ -0,0 +1,430 @@ +package logstore + +import ( + "context" + "errors" + "fmt" + "os" + "sync" + "time" + + block "github.com/ipfs/go-block-format" + "github.com/ipfs/go-cid" + "go.uber.org/zap" + + "github.com/storacha/sprue/pkg/ms3t/blockstore" +) + +// Compile-time assertion that *Store satisfies blockstore.Log. +// blockstore.Log is the consumer-facing contract (AppendBatch / +// Get / Close); *Store is the production LSM implementation that +// backs it. +var _ blockstore.Log = (*Store)(nil) + +// Store is the LSM-style log: one open segment accepting appends, +// plus N sealed segments (some flushed, some pending flush) that +// serve reads in front of the network blockstore. +// +// Concurrency: +// - catMu (RWMutex) guards open + sealed slice + nextSeq. Writers +// hold Lock briefly during seal/retire/new-open swaps. Readers +// hold RLock to take a stable snapshot of the segment list, then +// do file I/O outside the lock. +// - appMu (Mutex) serializes appenders against each other so the +// open-segment append fd has a single writer. +type Store struct { + cfg Config + logger *zap.Logger + + catMu sync.RWMutex + open *Segment + sealed []*Segment // newest-first; includes flushed-and-retained + nextSeq uint64 + + appMu sync.Mutex + + flushQ chan *Segment + closing chan struct{} + wg sync.WaitGroup + + openedAt time.Time + + // sealReq is a coalesced "seal the open segment now" channel. + // AppendBatch sends after exceeding SealBytes; the seal-ticker + // sends on every tick if the open segment has been open longer + // than SealAge. + sealReq chan struct{} +} + +// Open initializes a Store: scans Dir, reconciles with cfg.Meta, +// re-enqueues unflushed segments for the flusher, force-seals any +// previously-open segment, and starts a fresh open segment ready to +// accept appends. +func Open(ctx context.Context, cfg Config) (*Store, error) { + if err := cfg.validate(); err != nil { + return nil, err + } + cfg.defaults() + + if err := os.MkdirAll(cfg.Dir, 0o755); err != nil { + return nil, fmt.Errorf("logstore: mkdir %s: %w", cfg.Dir, err) + } + + s := &Store{ + cfg: cfg, + logger: cfg.Logger, + flushQ: make(chan *Segment, 64), + closing: make(chan struct{}), + sealReq: make(chan struct{}, 1), + } + + if err := s.recover(ctx); err != nil { + return nil, err + } + + // Force-seal a recovered open segment (if any) so a fresh open is + // always brand-new on each process startup. This avoids the + // complications of resuming append into a partially-written file. + if s.open != nil { + if err := s.open.seal(ctx, cfg.Meta); err != nil { + return nil, fmt.Errorf("logstore: force-seal recovered open segment: %w", err) + } + s.sealed = append([]*Segment{s.open}, s.sealed...) + select { + case s.flushQ <- s.open: + default: + s.logger.Warn("logstore: flush queue full at recovery; segment will retry on next tick") + } + s.open = nil + } + + s.wg.Add(2) + go s.flushLoop() + go s.sealTickerLoop() + + return s, nil +} + +// AppendBatch persists `blocks` to the open segment along with an +// op-root record identifying the (bucket, root) this batch's S3 +// op produced. fsyncs CAR + ops sidecar before returning. After +// AppendBatch returns nil, both blocks and op-root are durable; the +// caller may safely advance the bucket's published Root. +// +// An empty blocks slice is legal — an MST mutation can produce a +// new root that points at a node already materialized in a prior +// segment (e.g., trimTop after Delete unwraps to an existing +// subtree). In that case only the OpRoot record is written; +// nothing new lands in the CAR. +func (s *Store) AppendBatch(ctx context.Context, blocks []block.Block, opRoot blockstore.OpRoot) error { + if !opRoot.Root.Defined() { + return errors.New("logstore: AppendBatch: opRoot.Root must be defined") + } + + s.appMu.Lock() + defer s.appMu.Unlock() + + open, err := s.ensureOpenLockedAppMu(ctx) + if err != nil { + return err + } + if err := open.append(blocks, opRoot); err != nil { + return err + } + + // Trigger seal if size threshold hit. Non-blocking signal — the + // actual seal happens off this goroutine to keep AppendBatch + // latency bounded by fsync. + if open.Size() >= s.cfg.SealBytes { + s.requestSeal() + } + return nil +} + +// Get returns the block from the local log if any segment contains +// it, or ErrNotFound otherwise. Searches open first, then sealed +// newest-first. +func (s *Store) Get(ctx context.Context, c cid.Cid) (block.Block, error) { + s.catMu.RLock() + open := s.open + sealed := make([]*Segment, len(s.sealed)) + copy(sealed, s.sealed) + s.catMu.RUnlock() + + if open != nil { + if blk, err := open.get(ctx, c); err == nil { + return blk, nil + } else if !errors.Is(err, blockstore.ErrNotFound) { + return nil, err + } + } + for _, seg := range sealed { + blk, err := seg.get(ctx, c) + if err == nil { + return blk, nil + } + if !errors.Is(err, blockstore.ErrNotFound) { + return nil, err + } + } + return nil, blockstore.ErrNotFound +} + +// Close seals the open segment, drains the flush queue, and stops +// background goroutines. Safe to call once. +func (s *Store) Close(ctx context.Context) error { + s.catMu.Lock() + already := s.closing == nil + if !already { + select { + case <-s.closing: + already = true + default: + } + } + if !already { + close(s.closing) + } + s.catMu.Unlock() + if already { + return nil + } + + // Force-seal the open segment so anything still buffered makes it + // into the flush queue. + s.appMu.Lock() + s.catMu.Lock() + open := s.open + s.open = nil + s.catMu.Unlock() + if open != nil { + if err := open.seal(ctx, s.cfg.Meta); err != nil { + s.logger.Error("logstore: seal at close", zap.Error(err)) + } else { + s.catMu.Lock() + s.sealed = append([]*Segment{open}, s.sealed...) + s.catMu.Unlock() + select { + case s.flushQ <- open: + case <-ctx.Done(): + } + } + } + s.appMu.Unlock() + + close(s.flushQ) + s.wg.Wait() + return nil +} + +// requestSeal coalesces seal triggers — the channel has buffer 1 so +// repeated triggers between two ticks of the seal goroutine are +// folded into one. +func (s *Store) requestSeal() { + select { + case s.sealReq <- struct{}{}: + default: + } +} + +// ensureOpenLockedAppMu returns the current open segment, creating a +// fresh one if none exists. Caller must hold appMu (so concurrent +// AppendBatches don't race on segment creation). +func (s *Store) ensureOpenLockedAppMu(ctx context.Context) (*Segment, error) { + s.catMu.RLock() + open := s.open + s.catMu.RUnlock() + if open != nil { + return open, nil + } + + seq, err := s.cfg.Meta.NextSegmentSeq(ctx) + if err != nil { + return nil, err + } + seg, err := createOpenSegment(ctx, s.cfg.Dir, seq, s.cfg.Meta, s.logger) + if err != nil { + return nil, err + } + + s.catMu.Lock() + if s.open == nil { + s.open = seg + s.openedAt = time.Now() + if seq >= s.nextSeq { + s.nextSeq = seq + 1 + } + s.catMu.Unlock() + return seg, nil + } + // Lost a race; another caller created an open segment first. + s.catMu.Unlock() + if err := seg.retire(); err != nil { + s.logger.Warn("logstore: retire raced new segment", zap.Error(err)) + } + if err := s.cfg.Meta.DeleteSegment(ctx, seq); err != nil { + s.logger.Warn("logstore: delete raced new segment row", zap.Error(err)) + } + s.catMu.RLock() + open = s.open + s.catMu.RUnlock() + return open, nil +} + +// sealOpenIfDue seals the current open segment if one exists. Sends +// to flushQ. Idempotent: returns nil if there's nothing to seal. +func (s *Store) sealOpenIfDue(ctx context.Context, force bool) error { + s.appMu.Lock() + defer s.appMu.Unlock() + + s.catMu.RLock() + open := s.open + openedAt := s.openedAt + s.catMu.RUnlock() + if open == nil { + return nil + } + if !force { + if open.Size() < s.cfg.SealBytes && time.Since(openedAt) < s.cfg.SealAge { + return nil + } + } + + if err := open.seal(ctx, s.cfg.Meta); err != nil { + return err + } + + s.catMu.Lock() + if s.open == open { + s.open = nil + s.sealed = append([]*Segment{open}, s.sealed...) + } + s.catMu.Unlock() + + select { + case s.flushQ <- open: + case <-s.closing: + return nil + } + return nil +} + +// flushLoop drains flushQ, calling cfg.Flush for each sealed segment. +// On success, transitions the segment to StateFlushed and runs the +// retention sweep. On failure, requeues with backoff so transient +// errors (network blips) don't permanently stall the pipeline. +// +// Exits when either the closing signal fires or flushQ is closed +// (whichever comes first). +func (s *Store) flushLoop() { + defer s.wg.Done() + for { + select { + case <-s.closing: + return + case seg, ok := <-s.flushQ: + if !ok { + return + } + s.flushOne(seg) + } + } +} + +func (s *Store) flushOne(seg *Segment) { + ctx := context.Background() + const maxAttempts = 5 + backoff := time.Second + + for attempt := 1; attempt <= maxAttempts; attempt++ { + err := s.cfg.Flush(ctx, seg) + if err == nil { + seg.stateMu.Lock() + seg.state = StateFlushed + seg.stateMu.Unlock() + s.runRetention(ctx) + return + } + s.logger.Warn("logstore: flush attempt failed", + zap.Uint64("seq", seg.Seq()), + zap.Int("attempt", attempt), + zap.Error(err)) + select { + case <-s.closing: + return + case <-time.After(backoff): + } + if backoff < 30*time.Second { + backoff *= 2 + } + } + s.logger.Error("logstore: flush exhausted retries; segment remains sealed", + zap.Uint64("seq", seg.Seq())) + // Leaving the segment in sealed state; recovery will pick it up + // at next process restart, or operators can intervene. +} + +// runRetention removes flushed segments older than cfg.Retain from +// disk and the catalog. +func (s *Store) runRetention(ctx context.Context) { + s.catMu.Lock() + // Walk newest-first, count flushed segments. Once we exceed + // Retain flushed segments, the rest are retire candidates. + var ( + flushedSeen int + keep []*Segment + retire []*Segment + ) + for _, seg := range s.sealed { + if seg.State() != StateFlushed { + keep = append(keep, seg) + continue + } + flushedSeen++ + if flushedSeen <= s.cfg.Retain { + keep = append(keep, seg) + continue + } + retire = append(retire, seg) + } + s.sealed = keep + s.catMu.Unlock() + + for _, seg := range retire { + if err := seg.retire(); err != nil { + s.logger.Warn("logstore: retire", zap.Uint64("seq", seg.Seq()), zap.Error(err)) + } + if err := s.cfg.Meta.DeleteSegment(ctx, seg.Seq()); err != nil { + s.logger.Warn("logstore: delete segment row", + zap.Uint64("seq", seg.Seq()), zap.Error(err)) + } + } +} + +// sealTickerLoop wakes periodically (every SealAge / 4) and seals +// the open segment if it has been open longer than SealAge or its +// size is over SealBytes (the latter is also signaled directly via +// requestSeal but we double-check defensively). +func (s *Store) sealTickerLoop() { + defer s.wg.Done() + interval := s.cfg.SealAge / 4 + if interval < 100*time.Millisecond { + interval = 100 * time.Millisecond + } + t := time.NewTicker(interval) + defer t.Stop() + for { + select { + case <-s.closing: + return + case <-t.C: + if err := s.sealOpenIfDue(context.Background(), false); err != nil { + s.logger.Warn("logstore: tick seal", zap.Error(err)) + } + case <-s.sealReq: + if err := s.sealOpenIfDue(context.Background(), false); err != nil { + s.logger.Warn("logstore: req seal", zap.Error(err)) + } + } + } +} diff --git a/pkg/ms3t/logstore/store_test.go b/pkg/ms3t/logstore/store_test.go new file mode 100644 index 0000000..4769208 --- /dev/null +++ b/pkg/ms3t/logstore/store_test.go @@ -0,0 +1,501 @@ +package logstore + +import ( + "context" + "errors" + "fmt" + "path/filepath" + "sync" + "sync/atomic" + "testing" + "time" + + block "github.com/ipfs/go-block-format" + "github.com/ipfs/go-cid" + "github.com/multiformats/go-multihash" + "go.uber.org/zap/zaptest" + + "github.com/storacha/sprue/pkg/ms3t/blockstore" +) + +// fakeMeta is an in-memory Meta implementation for tests. It keeps +// just enough state to exercise the segment lifecycle without +// touching Postgres. +type fakeMeta struct { + mu sync.Mutex + nextSeq uint64 + segments map[uint64]*SegmentMeta + flushed []uint64 // order of MarkSegmentFlushed calls +} + +func newFakeMeta() *fakeMeta { + return &fakeMeta{segments: map[uint64]*SegmentMeta{}} +} + +func (f *fakeMeta) NextSegmentSeq(_ context.Context) (uint64, error) { + f.mu.Lock() + defer f.mu.Unlock() + f.nextSeq++ + return f.nextSeq, nil +} + +func (f *fakeMeta) InsertSegmentOpen(_ context.Context, seq uint64) error { + f.mu.Lock() + defer f.mu.Unlock() + if _, ok := f.segments[seq]; ok { + return nil + } + f.segments[seq] = &SegmentMeta{Seq: seq, State: StateOpen} + return nil +} + +func (f *fakeMeta) MarkSegmentSealed(_ context.Context, seq uint64, sealedAt int64, sizeBytes int64, sha256 []byte, opRoots []blockstore.OpRoot) error { + f.mu.Lock() + defer f.mu.Unlock() + m, ok := f.segments[seq] + if !ok { + return fmt.Errorf("fake: seal missing seq %d", seq) + } + if m.State != StateOpen { + // idempotent + return nil + } + m.State = StateSealed + m.SealedAt = sealedAt + m.SizeBytes = sizeBytes + m.SHA256 = append([]byte(nil), sha256...) + m.OpRoots = append([]blockstore.OpRoot(nil), opRoots...) + return nil +} + +func (f *fakeMeta) MarkSegmentFlushed(_ context.Context, seq uint64, flushedAt int64, opRoots []blockstore.OpRoot) error { + f.mu.Lock() + defer f.mu.Unlock() + m, ok := f.segments[seq] + if !ok { + return fmt.Errorf("fake: flush missing seq %d", seq) + } + if m.State == StateFlushed { + return nil + } + m.State = StateFlushed + m.FlushedAt = flushedAt + if len(opRoots) > 0 { + m.OpRoots = append([]blockstore.OpRoot(nil), opRoots...) + } + f.flushed = append(f.flushed, seq) + return nil +} + +func (f *fakeMeta) DeleteSegment(_ context.Context, seq uint64) error { + f.mu.Lock() + defer f.mu.Unlock() + delete(f.segments, seq) + return nil +} + +func (f *fakeMeta) ListUnflushedSegments(_ context.Context) ([]SegmentMeta, error) { + f.mu.Lock() + defer f.mu.Unlock() + var out []SegmentMeta + for _, m := range f.segments { + if m.State == StateOpen || m.State == StateSealed { + out = append(out, *m) + } + } + return out, nil +} + +func (f *fakeMeta) RehydrateSegment(_ context.Context, m SegmentMeta) error { + f.mu.Lock() + defer f.mu.Unlock() + cp := m + f.segments[m.Seq] = &cp + return nil +} + +func (f *fakeMeta) snapshot(seq uint64) (SegmentMeta, bool) { + f.mu.Lock() + defer f.mu.Unlock() + m, ok := f.segments[seq] + if !ok { + return SegmentMeta{}, false + } + return *m, true +} + +// makeBlock returns a raw-codec block whose CID is the sha256 of +// payload. We construct the CID explicitly rather than relying on +// block.NewBlock because the latter uses a v0 CID we don't want. +func makeBlock(t *testing.T, payload []byte) block.Block { + t.Helper() + mh, err := multihash.Sum(payload, multihash.SHA2_256, -1) + if err != nil { + t.Fatalf("multihash: %v", err) + } + c := cid.NewCidV1(cid.Raw, mh) + blk, err := block.NewBlockWithCid(payload, c) + if err != nil { + t.Fatalf("block: %v", err) + } + return blk +} + +// makeRoot returns a deterministic CID derived from name; used as +// the OpRoot.Root in tests. +func makeRoot(t *testing.T, name string) cid.Cid { + t.Helper() + mh, err := multihash.Sum([]byte("root:"+name), multihash.SHA2_256, -1) + if err != nil { + t.Fatalf("mh: %v", err) + } + return cid.NewCidV1(cid.DagCBOR, mh) +} + +func newTestStore(t *testing.T, sealBytes int64, sealAge time.Duration, retain int) (*Store, *fakeMeta, *atomicCounter) { + t.Helper() + dir := t.TempDir() + meta := newFakeMeta() + flushCalls := &atomicCounter{} + logger := zaptest.NewLogger(t) + cfg := Config{ + Dir: dir, + Meta: meta, + SealBytes: sealBytes, + SealAge: sealAge, + Retain: retain, + Flush: func(ctx context.Context, seg *Segment) error { + flushCalls.add(1) + return meta.MarkSegmentFlushed(ctx, seg.Seq(), time.Now().Unix(), seg.OpRoots()) + }, + Logger: logger, + } + s, err := Open(context.Background(), cfg) + if err != nil { + t.Fatalf("Open: %v", err) + } + t.Cleanup(func() { _ = s.Close(context.Background()) }) + return s, meta, flushCalls +} + +type atomicCounter struct{ n int64 } + +func (a *atomicCounter) add(n int64) { atomic.AddInt64(&a.n, n) } +func (a *atomicCounter) load() int64 { return atomic.LoadInt64(&a.n) } + +func TestAppendThenGetSameProcess(t *testing.T) { + s, _, _ := newTestStore(t, 64<<20, 5*time.Second, 6) + + blk := makeBlock(t, []byte("hello world")) + root := makeRoot(t, "alpha") + if err := s.AppendBatch(context.Background(), []block.Block{blk}, blockstore.OpRoot{Bucket: "bk", Root: root}); err != nil { + t.Fatalf("AppendBatch: %v", err) + } + + got, err := s.Get(context.Background(), blk.Cid()) + if err != nil { + t.Fatalf("Get: %v", err) + } + if string(got.RawData()) != "hello world" { + t.Fatalf("got %q want %q", got.RawData(), "hello world") + } +} + +func TestSealBySize(t *testing.T) { + s, meta, flushes := newTestStore(t, 256, 50*time.Millisecond, 6) + + // Each block carries 100 bytes of payload; after a few writes the + // segment crosses the 256-byte threshold and seals. + payload := make([]byte, 100) + for i := range payload { + payload[i] = byte(i) + } + for i := 0; i < 6; i++ { + blk := makeBlock(t, append([]byte(fmt.Sprintf("rec-%02d-", i)), payload...)) + if err := s.AppendBatch(context.Background(), []block.Block{blk}, blockstore.OpRoot{ + Bucket: "bk", + Root: makeRoot(t, fmt.Sprintf("size-%d", i)), + }); err != nil { + t.Fatalf("append %d: %v", i, err) + } + } + + // Wait for at least one flush. + deadline := time.Now().Add(2 * time.Second) + for time.Now().Before(deadline) { + if flushes.load() > 0 { + break + } + time.Sleep(10 * time.Millisecond) + } + if flushes.load() == 0 { + t.Fatalf("expected at least one flush after size threshold; got 0") + } + + // At least one segment row should now be flushed. + meta.mu.Lock() + var flushed int + for _, m := range meta.segments { + if m.State == StateFlushed { + flushed++ + } + } + meta.mu.Unlock() + if flushed == 0 { + t.Fatalf("expected at least one segment in flushed state") + } +} + +func TestSealByAge(t *testing.T) { + s, _, flushes := newTestStore(t, 1<<30, 80*time.Millisecond, 6) + + blk := makeBlock(t, []byte("age-trigger")) + if err := s.AppendBatch(context.Background(), []block.Block{blk}, blockstore.OpRoot{ + Bucket: "bk", + Root: makeRoot(t, "age"), + }); err != nil { + t.Fatalf("append: %v", err) + } + + deadline := time.Now().Add(2 * time.Second) + for time.Now().Before(deadline) { + if flushes.load() > 0 { + break + } + time.Sleep(20 * time.Millisecond) + } + if flushes.load() == 0 { + t.Fatalf("expected age-triggered seal to produce a flush") + } +} + +func TestRetentionDropsOldFlushed(t *testing.T) { + s, _, _ := newTestStore(t, 64, 50*time.Millisecond, 2) + dir := s.cfg.Dir + + // Issue 5 PUTs; each one large enough to exceed SealBytes=64 in + // a single batch, so each becomes its own segment. + for i := 0; i < 5; i++ { + payload := make([]byte, 80) + for j := range payload { + payload[j] = byte(i) + } + blk := makeBlock(t, append([]byte(fmt.Sprintf("retain-%02d-", i)), payload...)) + if err := s.AppendBatch(context.Background(), []block.Block{blk}, blockstore.OpRoot{ + Bucket: "bk", + Root: makeRoot(t, fmt.Sprintf("ret-%d", i)), + }); err != nil { + t.Fatalf("append %d: %v", i, err) + } + } + + // Wait for retention to converge. + deadline := time.Now().Add(3 * time.Second) + for time.Now().Before(deadline) { + entries, err := readSegmentSeqs(dir) + if err != nil { + t.Fatalf("readDir: %v", err) + } + // 1 active open + 2 retained + if len(entries) <= 3 { + break + } + time.Sleep(50 * time.Millisecond) + } + + entries, err := readSegmentSeqs(dir) + if err != nil { + t.Fatalf("readDir: %v", err) + } + if len(entries) > 3 { + t.Fatalf("retain=2 should leave at most 3 .car files (open + retained); got %d (%v)", + len(entries), entries) + } +} + +func TestForceSealRecoveredOpenOnRestart(t *testing.T) { + dir := t.TempDir() + meta := newFakeMeta() + logger := zaptest.NewLogger(t) + openStore := func() *Store { + cfg := Config{ + Dir: dir, + Meta: meta, + SealBytes: 1 << 30, // never seals on size during this test + SealAge: 1 * time.Hour, + Retain: 6, + Flush: func(ctx context.Context, seg *Segment) error { + return meta.MarkSegmentFlushed(ctx, seg.Seq(), time.Now().Unix(), seg.OpRoots()) + }, + Logger: logger, + } + s, err := Open(context.Background(), cfg) + if err != nil { + t.Fatalf("Open: %v", err) + } + return s + } + + s := openStore() + blk := makeBlock(t, []byte("survives-restart")) + if err := s.AppendBatch(context.Background(), []block.Block{blk}, blockstore.OpRoot{ + Bucket: "bk", + Root: makeRoot(t, "survive"), + }); err != nil { + t.Fatalf("append: %v", err) + } + + // Simulate process exit without orderly Close (don't seal). Close + // the file descriptors via a panic-safe path: we just stop the + // goroutines and forget the in-memory state. + close(s.closing) + s.wg.Wait() + // Drop the in-memory ref; on disk the segment is still open. + + // Re-Open from the same dir. + s2 := openStore() + t.Cleanup(func() { _ = s2.Close(context.Background()) }) + + // The previously-open segment should have been force-sealed on + // startup; the write must still be readable. + got, err := s2.Get(context.Background(), blk.Cid()) + if err != nil { + t.Fatalf("Get after restart: %v", err) + } + if string(got.RawData()) != "survives-restart" { + t.Fatalf("got %q", got.RawData()) + } +} + +func TestAppendBatchEmptyBlocksAccepted(t *testing.T) { + s, _, _ := newTestStore(t, 64<<20, 5*time.Second, 6) + root := makeRoot(t, "x") + if err := s.AppendBatch(context.Background(), nil, blockstore.OpRoot{Bucket: "bk", Root: root}); err != nil { + t.Fatalf("empty blocks with defined root should succeed, got %v", err) + } + if err := s.AppendBatch(context.Background(), []block.Block{makeBlock(t, []byte("x"))}, blockstore.OpRoot{Bucket: "bk"}); err == nil { + t.Fatalf("expected error on undefined root") + } +} + +func TestGetMissReturnsErrNotFound(t *testing.T) { + s, _, _ := newTestStore(t, 64<<20, 5*time.Second, 6) + + want, err := makeRoot(t, "absent"), error(nil) + _, err = s.Get(context.Background(), want) + if !errors.Is(err, blockstore.ErrNotFound) { + t.Fatalf("expected ErrNotFound, got %v", err) + } +} + +// TestAppendBatchDedupesAcrossOps confirms that a CID written in +// one AppendBatch is filtered out of a later AppendBatch landing in +// the same open segment: the file grows by one frame's worth of +// bytes, not two. +func TestAppendBatchDedupesAcrossOps(t *testing.T) { + s, _, _ := newTestStore(t, 64<<20, 1*time.Hour, 6) + + shared := makeBlock(t, []byte("shared block bytes")) + uniqA := makeBlock(t, []byte("unique-A")) + uniqB := makeBlock(t, []byte("unique-B")) + + // First batch: shared + uniqA. + if err := s.AppendBatch(context.Background(), + []block.Block{shared, uniqA}, + blockstore.OpRoot{Bucket: "bk", Root: makeRoot(t, "op-a")}, + ); err != nil { + t.Fatalf("append A: %v", err) + } + + // Snapshot the open segment's size after the first append. + s.catMu.RLock() + sizeAfterA := s.open.Size() + s.catMu.RUnlock() + + // Second batch: shared (duplicate of first batch) + uniqB. + if err := s.AppendBatch(context.Background(), + []block.Block{shared, uniqB}, + blockstore.OpRoot{Bucket: "bk", Root: makeRoot(t, "op-b")}, + ); err != nil { + t.Fatalf("append B: %v", err) + } + + s.catMu.RLock() + sizeAfterB := s.open.Size() + s.catMu.RUnlock() + + // Frame for `shared` is one varint(len) + cid + payload. Whatever + // that totals, the second batch should NOT have re-written it. + // `uniqA` has ~the same payload size as `uniqB`, so growth-from-A + // and growth-from-B (had we written `shared` twice) would be + // nearly identical. Instead we expect growth-from-B ≈ uniqB-frame + // only. Simplest assertion: only one frame's worth of growth. + growthB := sizeAfterB - sizeAfterA + growthFirstBatch := sizeAfterA // includes header + 2 frames; can't isolate + + if growthB >= growthFirstBatch { + t.Fatalf("second batch grew %d bytes, expected ~half of first-batch growth (%d) since shared was deduped", + growthB, growthFirstBatch) + } + + // All three blocks must be readable. + for _, blk := range []block.Block{shared, uniqA, uniqB} { + got, err := s.Get(context.Background(), blk.Cid()) + if err != nil { + t.Fatalf("Get %s: %v", blk.Cid(), err) + } + if string(got.RawData()) != string(blk.RawData()) { + t.Fatalf("Get %s payload mismatch", blk.Cid()) + } + } +} + +// TestAppendBatchAllDuplicatesStillRecordsOpRoot covers the edge +// case where every block in a batch is a duplicate of bytes already +// in the segment. The CAR file shouldn't grow — but the op-root +// still has to persist so the bucket's forge_root_cid catches up +// when the segment ships. +func TestAppendBatchAllDuplicatesStillRecordsOpRoot(t *testing.T) { + s, _, _ := newTestStore(t, 64<<20, 1*time.Hour, 6) + + blk := makeBlock(t, []byte("only-block")) + + if err := s.AppendBatch(context.Background(), []block.Block{blk}, blockstore.OpRoot{ + Bucket: "bk", Root: makeRoot(t, "first"), + }); err != nil { + t.Fatalf("first append: %v", err) + } + s.catMu.RLock() + sizeBefore := s.open.Size() + opRootsBefore := len(s.open.OpRoots()) + s.catMu.RUnlock() + + if err := s.AppendBatch(context.Background(), []block.Block{blk}, blockstore.OpRoot{ + Bucket: "bk", Root: makeRoot(t, "second"), + }); err != nil { + t.Fatalf("dup append: %v", err) + } + s.catMu.RLock() + sizeAfter := s.open.Size() + opRootsAfter := len(s.open.OpRoots()) + s.catMu.RUnlock() + + if sizeAfter != sizeBefore { + t.Fatalf("all-duplicate batch grew CAR by %d bytes; expected 0", sizeAfter-sizeBefore) + } + if opRootsAfter != opRootsBefore+1 { + t.Fatalf("op-root count went %d→%d; expected +1", opRootsBefore, opRootsAfter) + } +} + +func readSegmentSeqs(dir string) ([]string, error) { + matches, err := filepath.Glob(filepath.Join(dir, "seg-*.car")) + if err != nil { + return nil, err + } + out := make([]string, 0, len(matches)) + for _, m := range matches { + out = append(out, filepath.Base(m)) + } + return out, nil +} diff --git a/pkg/ms3t/logstore/types.go b/pkg/ms3t/logstore/types.go new file mode 100644 index 0000000..fe67d0b --- /dev/null +++ b/pkg/ms3t/logstore/types.go @@ -0,0 +1,108 @@ +package logstore + +import ( + "context" + + "github.com/storacha/sprue/pkg/ms3t/blockstore" +) + +// State describes the lifecycle stage of a segment as observed at the +// catalog/Postgres level. The on-disk MANIFEST may briefly lag the +// in-memory state but recovery reconciles the two. +type State int + +const ( + // StateOpen means the segment is the current append target. Exactly + // one segment is in this state at a time. + StateOpen State = iota + // StateSealed means the segment is closed for writes and waiting to + // be (or being) shipped to Forge. + StateSealed + // StateFlushed means the segment has been successfully shipped to + // Forge and the per-bucket forge_root advances were applied. The + // segment may still be on disk, kept around as a read tier. + StateFlushed +) + +// String renders State for logs. +func (s State) String() string { + switch s { + case StateOpen: + return "open" + case StateSealed: + return "sealed" + case StateFlushed: + return "flushed" + default: + return "unknown" + } +} + +// ParseState is the inverse of State.String. Unknown strings yield +// StateOpen and ok=false, matching what we want at the SQL boundary. +func ParseState(s string) (State, bool) { + switch s { + case "open": + return StateOpen, true + case "sealed": + return StateSealed, true + case "flushed": + return StateFlushed, true + default: + return StateOpen, false + } +} + +// SegmentMeta is the persistence-layer view of a segment. Used by +// recovery to enumerate segments that need attention. +type SegmentMeta struct { + Seq uint64 + State State + SealedAt int64 + FlushedAt int64 + SizeBytes int64 + SHA256 []byte + OpRoots []blockstore.OpRoot +} + +// Meta is the persistence backing for the segment lifecycle. The +// production implementation is *registry.Postgres; tests use an +// in-memory fake. Logstore never touches SQL directly. +type Meta interface { + // NextSegmentSeq returns a fresh monotonic segment id. + NextSegmentSeq(ctx context.Context) (uint64, error) + + // InsertSegmentOpen records that segment seq has just been opened. + // Idempotent: if the row already exists in any state it is left + // alone. + InsertSegmentOpen(ctx context.Context, seq uint64) error + + // MarkSegmentSealed transitions a segment from open to sealed in + // one transaction: updates ms3t.segments and inserts the + // per-segment op-root rows. opRoots are applied in slice order + // (each gets seq_within = i). + MarkSegmentSealed(ctx context.Context, seq uint64, sealedAt int64, sizeBytes int64, sha256 []byte, opRoots []blockstore.OpRoot) error + + // MarkSegmentFlushed transitions a segment from sealed to flushed + // AND advances forge_root_cid in ms3t.buckets for every op-root + // recorded against this segment, all in one transaction. opRoots + // is the in-order list from MarkSegmentSealed; the registry uses + // it directly so callers can treat the sidecar as the source of + // truth. + MarkSegmentFlushed(ctx context.Context, seq uint64, flushedAt int64, opRoots []blockstore.OpRoot) error + + // DeleteSegment removes a segment row (cascades to op-root rows). + // Used by retention after the on-disk file is unlinked. + DeleteSegment(ctx context.Context, seq uint64) error + + // ListUnflushedSegments returns every segment whose state is open + // or sealed, ordered by seq ascending. Recovery uses this to + // re-enqueue work for the flusher and to verify on-disk vs DB + // state. + ListUnflushedSegments(ctx context.Context) ([]SegmentMeta, error) + + // RehydrateSegment writes a segment row + its op-root rows from a + // sidecar `.idx` when the DB row is missing or torn. Idempotent + // on (seq) — replaces any existing rows for that segment. + RehydrateSegment(ctx context.Context, m SegmentMeta) error +} diff --git a/pkg/ms3t/migrations/migrations.go b/pkg/ms3t/migrations/migrations.go new file mode 100644 index 0000000..91a6b9a --- /dev/null +++ b/pkg/ms3t/migrations/migrations.go @@ -0,0 +1,64 @@ +// Package migrations embeds the ms3t Postgres migrations and exposes +// a runner that applies them via goose against a caller-provided +// *pgxpool.Pool. +// +// All ms3t tables live in the `ms3t` schema and goose tracks them in +// ms3t.goose_db_version, so this package can run against the same +// database as sprue's internal/migrations without colliding. +package migrations + +import ( + "context" + "fmt" + + "github.com/jackc/pgx/v5/pgxpool" + "github.com/jackc/pgx/v5/stdlib" + "github.com/pressly/goose/v3" + "go.uber.org/zap" + + "embed" +) + +//go:embed sql/*.sql +var FS embed.FS + +const ( + schemaName = "ms3t" + gooseVersionName = schemaName + ".goose_db_version" +) + +// Up applies all pending migrations embedded in FS to the database +// behind pool. The ms3t schema is created if it does not already +// exist, then goose is configured to track its version in +// ms3t.goose_db_version. +func Up(ctx context.Context, pool *pgxpool.Pool, logger *zap.Logger) error { + if _, err := pool.Exec(ctx, "CREATE SCHEMA IF NOT EXISTS "+schemaName); err != nil { + return fmt.Errorf("ms3t migrations: ensure schema: %w", err) + } + + db := stdlib.OpenDBFromPool(pool) + defer db.Close() + + goose.SetBaseFS(FS) + goose.SetLogger(&zapGooseLogger{logger: logger}) + goose.SetTableName(gooseVersionName) + if err := goose.SetDialect("postgres"); err != nil { + return fmt.Errorf("ms3t migrations: set dialect: %w", err) + } + if err := goose.UpContext(ctx, db, "sql"); err != nil { + return fmt.Errorf("ms3t migrations: up: %w", err) + } + return nil +} + +type zapGooseLogger struct { + logger *zap.Logger +} + +func (l *zapGooseLogger) Fatalf(format string, v ...interface{}) { + l.logger.Sugar().Fatalf(format, v...) +} + +func (l *zapGooseLogger) Printf(format string, v ...interface{}) { + l.logger.Sugar().Infof(format, v...) +} diff --git a/pkg/ms3t/migrations/sql/00001_init.sql b/pkg/ms3t/migrations/sql/00001_init.sql new file mode 100644 index 0000000..f8c185d --- /dev/null +++ b/pkg/ms3t/migrations/sql/00001_init.sql @@ -0,0 +1,20 @@ +-- +goose Up +-- ms3t bucket registry. Mirrors the columns of the previous SQLite +-- schema (pkg/ms3t/registry/sqlite.go's `buckets` table) but in the +-- `ms3t` schema so the same Postgres database can host both sprue's +-- and ms3t's tables without collision. +-- +-- name — S3 bucket name (PK) +-- root_cid — current MST root CID, bytes form; NULL for empty bucket +-- forge_root_cid — last MST root whose DAG has been shipped to Forge +-- created_at — unix seconds at create time + +CREATE TABLE ms3t.buckets ( + name TEXT PRIMARY KEY, + root_cid BYTEA, + forge_root_cid BYTEA, + created_at BIGINT NOT NULL +); + +-- +goose Down +DROP TABLE ms3t.buckets; diff --git a/pkg/ms3t/migrations/sql/00002_segments.sql b/pkg/ms3t/migrations/sql/00002_segments.sql new file mode 100644 index 0000000..ccd82dc --- /dev/null +++ b/pkg/ms3t/migrations/sql/00002_segments.sql @@ -0,0 +1,49 @@ +-- +goose Up +-- ms3t log segments (LSM-style write log) and the per-segment +-- record of bucket-root advances that landed in each segment. +-- +-- segments +-- seq — monotonic segment id (matches the on-disk filename +-- stem `seg-.car`) +-- state — one of 'open', 'sealed', 'flushed' +-- sealed_at — unix seconds when seal was completed; NULL while open +-- flushed_at — unix seconds when the Forge ship completed; NULL otherwise +-- size_bytes — final size of the CAR file at seal +-- car_sha256 — sha256 of the CAR file at seal (used to detect torn +-- sidecars during recovery) +-- +-- segment_op_roots +-- seq, seq_within — composite ordering of S3 ops within a segment +-- bucket — the bucket whose root advanced for this op +-- root_cid — the new MST root the op produced +-- +-- The on-disk `seg-.idx` sidecar is the source of truth at +-- recovery time; these tables are rehydrated from sidecars when rows +-- are missing. The flusher uses `segment_op_roots` (joined with +-- `segments.state = 'flushed'`) to advance per-bucket forge_root_cid +-- in `ms3t.buckets` atomically with the state transition. + +CREATE SEQUENCE ms3t.segment_seq; + +CREATE TABLE ms3t.segments ( + seq BIGINT PRIMARY KEY, + state TEXT NOT NULL CHECK (state IN ('open', 'sealed', 'flushed')), + sealed_at BIGINT, + flushed_at BIGINT, + size_bytes BIGINT NOT NULL DEFAULT 0, + car_sha256 BYTEA +); + +CREATE TABLE ms3t.segment_op_roots ( + seq BIGINT NOT NULL REFERENCES ms3t.segments(seq) ON DELETE CASCADE, + seq_within INT NOT NULL, + bucket TEXT NOT NULL, + root_cid BYTEA NOT NULL, + PRIMARY KEY (seq, seq_within) +); +CREATE INDEX segment_op_roots_bucket_seq_idx ON ms3t.segment_op_roots (bucket, seq); + +-- +goose Down +DROP TABLE ms3t.segment_op_roots; +DROP TABLE ms3t.segments; +DROP SEQUENCE ms3t.segment_seq; diff --git a/pkg/ms3t/module.go b/pkg/ms3t/module.go new file mode 100644 index 0000000..ce5e0fa --- /dev/null +++ b/pkg/ms3t/module.go @@ -0,0 +1,190 @@ +// Package ms3t exposes the embedded S3 listener as both a low-level +// Server type (see server.go) and an fx module (see Module). +// +// The S3 protocol layer is provided by github.com/versity/versitygw; +// the storage backend is the LSM-style log in pkg/ms3t/logstore in +// front of a Forge-backed read tier, with versitygw → logstore +// translation in pkg/ms3t/s3frontend. +// +// pkg/ms3t depends on a single external storage type for production +// wiring: *pgxpool.Pool. Callers are responsible for constructing +// the pool (typically via sprue's internal/fx/store/postgres). The +// module runs its own goose migrations (pkg/ms3t/migrations) against +// the pool at startup so the outer wiring does not need to know +// about ms3t's schema. +// +// When config.MS3T.Enabled is false the module is a no-op, so it is +// safe to always include it in the app graph. +package ms3t + +import ( + "context" + "fmt" + "os" + "path/filepath" + "time" + + "github.com/jackc/pgx/v5/pgxpool" + "github.com/storacha/go-ucanto/did" + "go.uber.org/fx" + "go.uber.org/zap" + + "github.com/storacha/sprue/internal/config" + "github.com/storacha/sprue/pkg/identity" + "github.com/storacha/sprue/pkg/indexerclient" + "github.com/storacha/sprue/pkg/ms3t/blockstore" + "github.com/storacha/sprue/pkg/ms3t/migrations" + "github.com/storacha/sprue/pkg/ms3t/registry" + "github.com/storacha/sprue/pkg/ms3t/uploader" + "github.com/storacha/sprue/pkg/piriclient" + "github.com/storacha/sprue/pkg/routing" +) + +// Module registers the embedded ms3t S3 listener. When +// config.MS3T.Enabled is false the module is a no-op, so it's safe +// to always include in the app graph. +var Module = fx.Module("ms3t", + fx.Invoke(registerLifecycle), +) + +// FxDeps bundles the sprue-internal services ms3t pulls in from the +// fx graph in production. Pool is the only storage dependency: ms3t +// owns its own schema (under the ms3t Postgres schema) and runs its +// own migrations. +// +// Pool is marked optional in the fx graph because storage backends +// other than postgres (memory, aws) do not provide one. ms3t is +// opt-in; when ms3t.enabled is true, registerLifecycle returns a +// fail-fast error if Pool is nil. +type FxDeps struct { + fx.In + + Pool *pgxpool.Pool `optional:"true"` + Identity *identity.Identity + Router *routing.Service + PiriProvider piriclient.Provider + IndexerClient *indexerclient.Client `optional:"true"` +} + +// registerLifecycle is the fx-only thin shim. It builds the +// production-only collaborators (Forge, Internal uploader, Postgres +// registry, migrations, space signer) and hands them to ms3t.New. +// Anything beyond that wiring lives in server.go and is reachable by +// tests without fx. +func registerLifecycle( + lc fx.Lifecycle, + cfg *config.Config, + zlog *zap.Logger, + deps FxDeps, +) error { + mc := cfg.MS3T + if !mc.Enabled { + return nil + } + + if deps.Pool == nil { + return fmt.Errorf("ms3t: a *pgxpool.Pool must be provided in the fx graph when ms3t.enabled is true") + } + if deps.IndexerClient == nil { + return fmt.Errorf("ms3t: indexer client is required (configure indexer.endpoint)") + } + + if err := os.MkdirAll(mc.DataDir, 0o755); err != nil { + return fmt.Errorf("ms3t: mkdir data dir: %w", err) + } + + // Apply ms3t's own migrations against the caller-supplied pool. + // Goose runs in the ms3t schema and tracks its version table at + // ms3t.goose_db_version, so this never collides with any other + // migrations on the same database. + if err := migrations.Up(context.Background(), deps.Pool, zlog); err != nil { + return fmt.Errorf("ms3t: migrations: %w", err) + } + + // ms3t IS the space owner (root UCAN authority) so that + // self-issued space/content/retrieve delegations validate down + // the chain to piri's retrieval auth check. Key is generated on + // first run and persisted under data_dir/space.key. + keyPath := filepath.Join(mc.DataDir, "space.key") + spaceSigner, err := LoadOrCreateSigner(keyPath) + if err != nil { + return fmt.Errorf("ms3t: space signer: %w", err) + } + zlog.Info("ms3t space loaded", + zap.String("space_did", spaceSigner.DID().String()), + zap.String("key_file", keyPath), + ) + + forgeReader, err := blockstore.NewForge(blockstore.ForgeConfig{ + IndexerEndpoint: cfg.Indexer.Endpoint, + IndexerDID: cfg.Indexer.DID, + Spaces: []did.DID{spaceSigner.DID()}, + Signer: deps.Identity.Signer, + SpaceSigner: spaceSigner, + Logger: zlog, + }) + if err != nil { + return fmt.Errorf("ms3t: Reader blockstore: %w", err) + } + + reg := registry.NewPostgres(deps.Pool) + + zlog.Info("ms3t internal uploader configured", + zap.String("space_did", spaceSigner.DID().String()), + zap.String("signer_did", deps.Identity.DID()), + ) + up, err := uploader.NewForge(uploader.ForgeConfig{ + Router: deps.Router, + PiriProvider: deps.PiriProvider, + IndexerClient: deps.IndexerClient, + Signer: deps.Identity.Signer, + SpaceSigner: spaceSigner, + Logger: zlog, + }) + if err != nil { + return fmt.Errorf("ms3t: uploader: %w", err) + } + + sealAge, err := time.ParseDuration(emptyDefault(mc.SealAge, "5s")) + if err != nil { + return fmt.Errorf("ms3t: parse seal_age %q: %w", mc.SealAge, err) + } + + server, err := New(context.Background(), + ServerConfig{ + Addr: mc.Addr, + DataDir: mc.DataDir, + Region: mc.Region, + RootAccess: mc.RootAccess, + RootSecret: mc.RootSecret, + ChunkSize: mc.ChunkSize, + SealBytes: mc.SealBytes, + SealAge: sealAge, + Retain: mc.Retain, + }, + ServerDeps{ + Logger: zlog, + BaseBlockReader: forgeReader, + Uploader: up, + Registry: reg, + Meta: reg, + }, + ) + if err != nil { + return err + } + + lc.Append(fx.Hook{ + OnStart: server.Start, + OnStop: server.Stop, + }) + return nil +} + +// emptyDefault returns def when s is the empty string. +func emptyDefault(s, def string) string { + if s == "" { + return def + } + return s +} diff --git a/pkg/ms3t/mst/cbor_gen.go b/pkg/ms3t/mst/cbor_gen.go new file mode 100644 index 0000000..2c56f7b --- /dev/null +++ b/pkg/ms3t/mst/cbor_gen.go @@ -0,0 +1,433 @@ +// Code generated by github.com/whyrusleeping/cbor-gen. DO NOT EDIT. + +package mst + +import ( + "fmt" + "io" + "math" + "sort" + + cid "github.com/ipfs/go-cid" + cbg "github.com/whyrusleeping/cbor-gen" + xerrors "golang.org/x/xerrors" +) + +var _ = xerrors.Errorf +var _ = cid.Undef +var _ = math.E +var _ = sort.Sort + +func (t *NodeData) MarshalCBOR(w io.Writer) error { + if t == nil { + _, err := w.Write(cbg.CborNull) + return err + } + + cw := cbg.NewCborWriter(w) + + if _, err := cw.Write([]byte{162}); err != nil { + return err + } + + // t.Entries ([]mst.TreeEntry) (slice) + if len("e") > 1000000 { + return xerrors.Errorf("Value in field \"e\" was too long") + } + + if err := cw.WriteMajorTypeHeader(cbg.MajTextString, uint64(len("e"))); err != nil { + return err + } + if _, err := cw.WriteString(string("e")); err != nil { + return err + } + + if len(t.Entries) > 8192 { + return xerrors.Errorf("Slice value in field t.Entries was too long") + } + + if err := cw.WriteMajorTypeHeader(cbg.MajArray, uint64(len(t.Entries))); err != nil { + return err + } + for _, v := range t.Entries { + if err := v.MarshalCBOR(cw); err != nil { + return err + } + + } + + // t.Left (cid.Cid) (struct) + if len("l") > 1000000 { + return xerrors.Errorf("Value in field \"l\" was too long") + } + + if err := cw.WriteMajorTypeHeader(cbg.MajTextString, uint64(len("l"))); err != nil { + return err + } + if _, err := cw.WriteString(string("l")); err != nil { + return err + } + + if t.Left == nil { + if _, err := cw.Write(cbg.CborNull); err != nil { + return err + } + } else { + if err := cbg.WriteCid(cw, *t.Left); err != nil { + return xerrors.Errorf("failed to write cid field t.Left: %w", err) + } + } + + return nil +} + +func (t *NodeData) UnmarshalCBOR(r io.Reader) (err error) { + *t = NodeData{} + + cr := cbg.NewCborReader(r) + + maj, extra, err := cr.ReadHeader() + if err != nil { + return err + } + defer func() { + if err == io.EOF { + err = io.ErrUnexpectedEOF + } + }() + + if maj != cbg.MajMap { + return fmt.Errorf("cbor input should be of type map") + } + + if extra > cbg.MaxLength { + return fmt.Errorf("NodeData: map struct too large (%d)", extra) + } + + n := extra + + nameBuf := make([]byte, 1) + for i := uint64(0); i < n; i++ { + nameLen, ok, err := cbg.ReadFullStringIntoBuf(cr, nameBuf, 1000000) + if err != nil { + return err + } + + if !ok { + // Field doesn't exist on this type, so ignore it + if err := cbg.ScanForLinks(cr, func(cid.Cid) {}); err != nil { + return err + } + continue + } + + switch string(nameBuf[:nameLen]) { + // t.Entries ([]mst.TreeEntry) (slice) + case "e": + + maj, extra, err = cr.ReadHeader() + if err != nil { + return err + } + + if extra > 8192 { + return fmt.Errorf("t.Entries: array too large (%d)", extra) + } + + if maj != cbg.MajArray { + return fmt.Errorf("expected cbor array") + } + + if extra > 0 { + t.Entries = make([]TreeEntry, extra) + } + + for i := 0; i < int(extra); i++ { + { + var maj byte + var extra uint64 + var err error + _ = maj + _ = extra + _ = err + + { + + if err := t.Entries[i].UnmarshalCBOR(cr); err != nil { + return xerrors.Errorf("unmarshaling t.Entries[i]: %w", err) + } + + } + + } + } + // t.Left (cid.Cid) (struct) + case "l": + + { + + b, err := cr.ReadByte() + if err != nil { + return err + } + if b != cbg.CborNull[0] { + if err := cr.UnreadByte(); err != nil { + return err + } + + c, err := cbg.ReadCid(cr) + if err != nil { + return xerrors.Errorf("failed to read cid field t.Left: %w", err) + } + + t.Left = &c + } + + } + + default: + // Field doesn't exist on this type, so ignore it + if err := cbg.ScanForLinks(r, func(cid.Cid) {}); err != nil { + return err + } + } + } + + return nil +} +func (t *TreeEntry) MarshalCBOR(w io.Writer) error { + if t == nil { + _, err := w.Write(cbg.CborNull) + return err + } + + cw := cbg.NewCborWriter(w) + + if _, err := cw.Write([]byte{164}); err != nil { + return err + } + + // t.KeySuffix ([]uint8) (slice) + if len("k") > 1000000 { + return xerrors.Errorf("Value in field \"k\" was too long") + } + + if err := cw.WriteMajorTypeHeader(cbg.MajTextString, uint64(len("k"))); err != nil { + return err + } + if _, err := cw.WriteString(string("k")); err != nil { + return err + } + + if len(t.KeySuffix) > 2097152 { + return xerrors.Errorf("Byte array in field t.KeySuffix was too long") + } + + if err := cw.WriteMajorTypeHeader(cbg.MajByteString, uint64(len(t.KeySuffix))); err != nil { + return err + } + + if _, err := cw.Write(t.KeySuffix); err != nil { + return err + } + + // t.PrefixLen (int64) (int64) + if len("p") > 1000000 { + return xerrors.Errorf("Value in field \"p\" was too long") + } + + if err := cw.WriteMajorTypeHeader(cbg.MajTextString, uint64(len("p"))); err != nil { + return err + } + if _, err := cw.WriteString(string("p")); err != nil { + return err + } + + if t.PrefixLen >= 0 { + if err := cw.WriteMajorTypeHeader(cbg.MajUnsignedInt, uint64(t.PrefixLen)); err != nil { + return err + } + } else { + if err := cw.WriteMajorTypeHeader(cbg.MajNegativeInt, uint64(-t.PrefixLen-1)); err != nil { + return err + } + } + + // t.Tree (cid.Cid) (struct) + if len("t") > 1000000 { + return xerrors.Errorf("Value in field \"t\" was too long") + } + + if err := cw.WriteMajorTypeHeader(cbg.MajTextString, uint64(len("t"))); err != nil { + return err + } + if _, err := cw.WriteString(string("t")); err != nil { + return err + } + + if t.Tree == nil { + if _, err := cw.Write(cbg.CborNull); err != nil { + return err + } + } else { + if err := cbg.WriteCid(cw, *t.Tree); err != nil { + return xerrors.Errorf("failed to write cid field t.Tree: %w", err) + } + } + + // t.Val (cid.Cid) (struct) + if len("v") > 1000000 { + return xerrors.Errorf("Value in field \"v\" was too long") + } + + if err := cw.WriteMajorTypeHeader(cbg.MajTextString, uint64(len("v"))); err != nil { + return err + } + if _, err := cw.WriteString(string("v")); err != nil { + return err + } + + if err := cbg.WriteCid(cw, t.Val); err != nil { + return xerrors.Errorf("failed to write cid field t.Val: %w", err) + } + + return nil +} + +func (t *TreeEntry) UnmarshalCBOR(r io.Reader) (err error) { + *t = TreeEntry{} + + cr := cbg.NewCborReader(r) + + maj, extra, err := cr.ReadHeader() + if err != nil { + return err + } + defer func() { + if err == io.EOF { + err = io.ErrUnexpectedEOF + } + }() + + if maj != cbg.MajMap { + return fmt.Errorf("cbor input should be of type map") + } + + if extra > cbg.MaxLength { + return fmt.Errorf("TreeEntry: map struct too large (%d)", extra) + } + + n := extra + + nameBuf := make([]byte, 1) + for i := uint64(0); i < n; i++ { + nameLen, ok, err := cbg.ReadFullStringIntoBuf(cr, nameBuf, 1000000) + if err != nil { + return err + } + + if !ok { + // Field doesn't exist on this type, so ignore it + if err := cbg.ScanForLinks(cr, func(cid.Cid) {}); err != nil { + return err + } + continue + } + + switch string(nameBuf[:nameLen]) { + // t.KeySuffix ([]uint8) (slice) + case "k": + + maj, extra, err = cr.ReadHeader() + if err != nil { + return err + } + + if extra > 2097152 { + return fmt.Errorf("t.KeySuffix: byte array too large (%d)", extra) + } + if maj != cbg.MajByteString { + return fmt.Errorf("expected byte array") + } + + if extra > 0 { + t.KeySuffix = make([]uint8, extra) + } + + if _, err := io.ReadFull(cr, t.KeySuffix); err != nil { + return err + } + + // t.PrefixLen (int64) (int64) + case "p": + { + maj, extra, err := cr.ReadHeader() + if err != nil { + return err + } + var extraI int64 + switch maj { + case cbg.MajUnsignedInt: + extraI = int64(extra) + if extraI < 0 { + return fmt.Errorf("int64 positive overflow") + } + case cbg.MajNegativeInt: + extraI = int64(extra) + if extraI < 0 { + return fmt.Errorf("int64 negative overflow") + } + extraI = -1 - extraI + default: + return fmt.Errorf("wrong type for int64 field: %d", maj) + } + + t.PrefixLen = int64(extraI) + } + // t.Tree (cid.Cid) (struct) + case "t": + + { + + b, err := cr.ReadByte() + if err != nil { + return err + } + if b != cbg.CborNull[0] { + if err := cr.UnreadByte(); err != nil { + return err + } + + c, err := cbg.ReadCid(cr) + if err != nil { + return xerrors.Errorf("failed to read cid field t.Tree: %w", err) + } + + t.Tree = &c + } + + } + // t.Val (cid.Cid) (struct) + case "v": + + { + + c, err := cbg.ReadCid(cr) + if err != nil { + return xerrors.Errorf("failed to read cid field t.Val: %w", err) + } + + t.Val = c + + } + + default: + // Field doesn't exist on this type, so ignore it + if err := cbg.ScanForLinks(r, func(cid.Cid) {}); err != nil { + return err + } + } + } + + return nil +} diff --git a/pkg/ms3t/mst/diff.go b/pkg/ms3t/mst/diff.go new file mode 100644 index 0000000..f663ff8 --- /dev/null +++ b/pkg/ms3t/mst/diff.go @@ -0,0 +1,193 @@ +package mst + +import ( + "context" + "fmt" + + cid "github.com/ipfs/go-cid" + + "github.com/storacha/sprue/pkg/ms3t/blockstore" +) + +// DiffOp describes a single change between two MST roots. +type DiffOp struct { + Depth int + Op string // "add", "del", "mut" + Rpath string + OldCid cid.Cid + NewCid cid.Cid +} + +// DiffTrees enumerates the additions, deletions, and mutations needed to go +// from the MST rooted at `from` to the MST rooted at `to`. +func DiffTrees(ctx context.Context, bs blockstore.BaseStore, from, to cid.Cid) ([]*DiffOp, error) { + cst := blockstore.CborStore(bs) + + if from == cid.Undef { + return identityDiff(ctx, bs, to) + } + + ft := LoadMST(cst, from) + tt := LoadMST(cst, to) + + fents, err := ft.getEntries(ctx) + if err != nil { + return nil, err + } + + tents, err := tt.getEntries(ctx) + if err != nil { + return nil, err + } + + var ixf, ixt int + var out []*DiffOp + for ixf < len(fents) && ixt < len(tents) { + ef := fents[ixf] + et := tents[ixt] + + if nodeEntriesEqual(&ef, &et) { + ixf++ + ixt++ + continue + } + + if ef.isLeaf() && et.isLeaf() { + if ef.Key == et.Key { + if ef.Val == et.Val { + return nil, fmt.Errorf("hang on, why are these leaves equal?") + } + + out = append(out, &DiffOp{ + Op: "mut", + Rpath: ef.Key, + OldCid: ef.Val, + NewCid: et.Val, + }) + ixf++ + ixt++ + continue + } + + if ef.Key > et.Key { + out = append(out, &DiffOp{ + Op: "add", + Rpath: et.Key, + NewCid: et.Val, + }) + ixt++ + } else { + out = append(out, &DiffOp{ + Op: "del", + Rpath: ef.Key, + OldCid: ef.Val, + }) + ixf++ + } + + continue + } + + if ef.isTree() { + sub, err := ef.Tree.getEntries(ctx) + if err != nil { + return nil, err + } + + fents = append(sub, fents[ixf+1:]...) + ixf = 0 + continue + } + + if et.isTree() { + sub, err := et.Tree.getEntries(ctx) + if err != nil { + return nil, err + } + + tents = append(sub, tents[ixt+1:]...) + ixt = 0 + continue + } + } + + for ; ixf < len(fents); ixf++ { + e := fents[ixf] + if e.isLeaf() { + out = append(out, &DiffOp{ + Op: "del", + Rpath: e.Key, + OldCid: e.Val, + }) + } else if e.isTree() { + if err := e.Tree.WalkLeavesFrom(ctx, "", func(key string, val cid.Cid) error { + out = append(out, &DiffOp{ + Op: "del", + Rpath: key, + OldCid: val, + }) + return nil + }); err != nil { + return nil, err + } + } + } + + for ; ixt < len(tents); ixt++ { + e := tents[ixt] + if e.isLeaf() { + out = append(out, &DiffOp{ + Op: "add", + Rpath: e.Key, + NewCid: e.Val, + }) + } else if e.isTree() { + if err := e.Tree.WalkLeavesFrom(ctx, "", func(key string, val cid.Cid) error { + out = append(out, &DiffOp{ + Op: "add", + Rpath: key, + NewCid: val, + }) + return nil + }); err != nil { + return nil, err + } + } + } + + return out, nil +} + +func nodeEntriesEqual(a, b *nodeEntry) bool { + if !(a.Key == b.Key && a.Val == b.Val) { + return false + } + + if a.Tree == nil && b.Tree == nil { + return true + } + + if a.Tree != nil && b.Tree != nil && a.Tree.pointer == b.Tree.pointer { + return true + } + + return false +} + +func identityDiff(ctx context.Context, bs blockstore.BaseStore, root cid.Cid) ([]*DiffOp, error) { + cst := blockstore.CborStore(bs) + tt := LoadMST(cst, root) + + var ops []*DiffOp + if err := tt.WalkLeavesFrom(ctx, "", func(key string, val cid.Cid) error { + ops = append(ops, &DiffOp{ + Op: "add", + Rpath: key, + NewCid: val, + }) + return nil + }); err != nil { + return nil, err + } + return ops, nil +} diff --git a/pkg/ms3t/mst/mst.go b/pkg/ms3t/mst/mst.go new file mode 100644 index 0000000..dc42b36 --- /dev/null +++ b/pkg/ms3t/mst/mst.go @@ -0,0 +1,882 @@ +// Package mst is a fork of github.com/bluesky-social/indigo/mst with the +// atproto-specific key validation relaxed for use as a generic ordered +// content-addressed key/value map. Keys may be any non-empty UTF-8 string up +// to 1024 bytes, with the only forbidden bytes being NUL. +// +// On-disk format is unchanged from the atproto MST: NodeData / TreeEntry CBOR +// blocks with prefix-compressed byte-string keys. Cross-implementation +// compatibility with atproto MSTs is intentionally not preserved. +// +// See https://hal.inria.fr/hal-02303490/document for the underlying data +// structure. SHA-256 is used for key hashing with a 4-bit fanout (~16 entries +// per layer). +package mst + +import ( + "context" + "fmt" + "reflect" + + "github.com/ipfs/go-cid" + + "github.com/storacha/sprue/pkg/ms3t/blockstore" +) + +// The MST package consumes blockstore.Reader on the load + traversal +// path and blockstore.Store on the materialization (GetPointer) +// path. Mutating operations (Add, Update, Delete) build new +// in-memory tree values without any I/O writes; the only write site +// in the package is GetPointer, which takes a writer argument. + +// nodeKind is the type of node in the MST. +type nodeKind uint8 + +const ( + entryUndefined nodeKind = 0 + entryLeaf nodeKind = 1 + entryTree nodeKind = 2 +) + +// nodeEntry is either a leaf (key/value) or a pointer to a subtree. +type nodeEntry struct { + Kind nodeKind + Key string + Val cid.Cid + Tree *MerkleSearchTree +} + +func mkTreeEntry(t *MerkleSearchTree) nodeEntry { + return nodeEntry{ + Kind: entryTree, + Tree: t, + } +} + +func (ne nodeEntry) isTree() bool { return ne.Kind == entryTree } +func (ne nodeEntry) isLeaf() bool { return ne.Kind == entryLeaf } +func (ne nodeEntry) isUndefined() bool { return ne.Kind == entryUndefined } + +// Sanity check: two trees can never be neighbors in an entries slice. +func checkTreeInvariant(ents []nodeEntry) { + for i := 0; i < len(ents)-1; i++ { + if ents[i].isTree() && ents[i+1].isTree() { + panic(fmt.Sprintf("two trees next to each other! %d %d", i, i+1)) + } + } +} + +// CBORTypes returns the types in this package that need to be registered with +// the CBOR codec. +func CBORTypes() []reflect.Type { + return []reflect.Type{ + reflect.TypeOf(NodeData{}), + reflect.TypeOf(TreeEntry{}), + } +} + +// NodeData is the CBOR-serialized form of an MST node. +type NodeData struct { + Left *cid.Cid `cborgen:"l"` // [nullable] pointer to lower-level subtree to the "left" of this path/key + Entries []TreeEntry `cborgen:"e"` // ordered list of entries at this node +} + +// TreeEntry is one entry within a NodeData. +type TreeEntry struct { + PrefixLen int64 `cborgen:"p"` // count of bytes shared with previous key in tree + KeySuffix []byte `cborgen:"k"` // remaining part of key (appended to "previous key") + Val cid.Cid `cborgen:"v"` // CID pointer at this path/key + Tree *cid.Cid `cborgen:"t"` // [nullable] pointer to lower-level subtree to the "right" of this entry +} + +// MerkleSearchTree is an MST tree node. Values are immutable: methods return +// copies with changes applied. Hydration is lazy; a tree loaded by CID has no +// entries until getEntries is called. +// +// The cst field is a blockstore.Reader: traversal and mutation +// (Add/Update/Delete) both stay read-only at the storage level, +// returning new in-memory MerkleSearchTree values rather than +// persisting anything. The only write site is GetPointer, which +// takes its writer as an explicit argument. +type MerkleSearchTree struct { + cst blockstore.Reader + entries []nodeEntry // non-nil when "hydrated" + layer int + pointer cid.Cid + validPtr bool +} + +// NewEmptyMST returns a new empty MST using cst as its storage. +func NewEmptyMST(cst blockstore.Reader) *MerkleSearchTree { + return createMST(cst, cid.Undef, []nodeEntry{}, 0) +} + +func createMST(cst blockstore.Reader, ptr cid.Cid, entries []nodeEntry, layer int) *MerkleSearchTree { + mst := &MerkleSearchTree{ + cst: cst, + pointer: ptr, + layer: layer, + entries: entries, + validPtr: ptr.Defined(), + } + return mst +} + +// LoadMST returns a lazy reference to an MST rooted at the given CID. Entries +// are not loaded until needed. +func LoadMST(cst blockstore.Reader, root cid.Cid) *MerkleSearchTree { + return createMST(cst, root, nil, -1) +} + +// === Immutability === + +func (mst *MerkleSearchTree) newTree(entries []nodeEntry) *MerkleSearchTree { + if entries == nil { + panic("nil entries passed to newTree") + } + return createMST(mst.cst, cid.Undef, entries, mst.layer) +} + +// === Lazy getters === + +func (mst *MerkleSearchTree) getEntries(ctx context.Context) ([]nodeEntry, error) { + if mst.entries != nil { + return mst.entries, nil + } + + if mst.pointer != cid.Undef { + var nd NodeData + if err := mst.cst.Get(ctx, mst.pointer, &nd); err != nil { + return nil, err + } + entries, err := entriesFromNodeData(ctx, &nd, mst.cst) + if err != nil { + return nil, err + } + if entries == nil { + panic("got nil entries from node data decoding") + } + mst.entries = entries + return entries, nil + } + + return nil, fmt.Errorf("no entries or self-pointer (CID) on MerkleSearchTree") +} + +func entriesFromNodeData(ctx context.Context, nd *NodeData, cst blockstore.Reader) ([]nodeEntry, error) { + layer := -1 + if len(nd.Entries) > 0 { + // the first entry's KeySuffix is a complete key (PrefixLen=0) + firstLeaf := nd.Entries[0] + layer = leadingZerosOnHashBytes(firstLeaf.KeySuffix) + } + + entries, err := deserializeNodeData(ctx, cst, nd, layer) + if err != nil { + return nil, err + } + + return entries, nil +} + +// GetPointer returns the CID of this MST root, recomputing it if any subtree +// has been mutated since the last call. writer is the IpldStore that any +// freshly-serialized subtree nodes are Put through; only this method (and the +// cidForEntries / serializeNodeData helpers it drives) ever issues writes +// against it. +func (mst *MerkleSearchTree) GetPointer(ctx context.Context, writer blockstore.Store) (cid.Cid, error) { + if mst.validPtr { + return mst.pointer, nil + } + + if _, err := mst.getEntries(ctx); err != nil { + return cid.Undef, err + } + + for i, e := range mst.entries { + if e.isTree() { + if !e.Tree.validPtr { + if _, err := e.Tree.GetPointer(ctx, writer); err != nil { + return cid.Undef, err + } + mst.entries[i] = e + } + } + } + + nptr, err := cidForEntries(ctx, mst.entries, writer) + if err != nil { + return cid.Undef, err + } + mst.pointer = nptr + mst.validPtr = true + + return mst.pointer, nil +} + +func (mst *MerkleSearchTree) getLayer(ctx context.Context) (int, error) { + layer, err := mst.attemptGetLayer(ctx) + if err != nil { + return -1, err + } + if layer < 0 { + mst.layer = 0 + } else { + mst.layer = layer + } + return mst.layer, nil +} + +func (mst *MerkleSearchTree) attemptGetLayer(ctx context.Context) (int, error) { + if mst.layer >= 0 { + return mst.layer, nil + } + + entries, err := mst.getEntries(ctx) + if err != nil { + return -1, err + } + + layer := layerForEntries(entries) + if layer < 0 { + for _, e := range entries { + if e.isTree() { + childLayer, err := e.Tree.attemptGetLayer(ctx) + if err != nil { + return -1, err + } + if childLayer >= 0 { + layer = childLayer + 1 + break + } + } + } + } + + if layer >= 0 { + mst.layer = layer + } + return mst.layer, nil +} + +// === Core operations === + +// Add inserts a new key/value pair. Returns ErrAlreadyExists if the key is +// already present. +func (mst *MerkleSearchTree) Add(ctx context.Context, key string, val cid.Cid, knownZeros int) (*MerkleSearchTree, error) { + if err := ensureValidKey(key); err != nil { + return nil, err + } + + if val == cid.Undef { + return nil, fmt.Errorf("tried to insert an undef CID") + } + + keyZeros := knownZeros + if keyZeros < 0 { + keyZeros = leadingZerosOnHash(key) + } + + layer, err := mst.getLayer(ctx) + if err != nil { + return nil, fmt.Errorf("getting layer failed: %w", err) + } + + newLeaf := nodeEntry{ + Kind: entryLeaf, + Key: key, + Val: val, + } + + if keyZeros == layer { + index, err := mst.findGtOrEqualLeafIndex(ctx, key) + if err != nil { + return nil, err + } + + found, err := mst.atIndex(index) + if err != nil { + return nil, err + } + + if found.isLeaf() && found.Key == key { + return nil, ErrAlreadyExists + } + + prevNode, err := mst.atIndex(index - 1) + if err != nil { + return nil, err + } + + if prevNode.isUndefined() || prevNode.isLeaf() { + return mst.spliceIn(ctx, newLeaf, index) + } + + left, right, err := prevNode.Tree.splitAround(ctx, key) + if err != nil { + return nil, err + } + return mst.replaceWithSplit(ctx, index-1, left, newLeaf, right) + + } else if keyZeros < layer { + index, err := mst.findGtOrEqualLeafIndex(ctx, key) + if err != nil { + return nil, err + } + + prevNode, err := mst.atIndex(index - 1) + if err != nil { + return nil, err + } + + if !prevNode.isUndefined() && prevNode.isTree() { + newSubtree, err := prevNode.Tree.Add(ctx, key, val, keyZeros) + if err != nil { + return nil, err + } + return mst.updateEntry(ctx, index-1, mkTreeEntry(newSubtree)) + } + + subTree, err := mst.createChild(ctx) + if err != nil { + return nil, err + } + + newSubTree, err := subTree.Add(ctx, key, val, keyZeros) + if err != nil { + return nil, fmt.Errorf("subtree add: %w", err) + } + + return mst.spliceIn(ctx, mkTreeEntry(newSubTree), index) + } + + // keyZeros > layer: must push the rest of the tree down + left, right, err := mst.splitAround(ctx, key) + if err != nil { + return nil, err + } + + layer, err = mst.getLayer(ctx) + if err != nil { + return nil, fmt.Errorf("get layer in split case failed: %w", err) + } + + extraLayersToAdd := keyZeros - layer + + for i := 1; i < extraLayersToAdd; i++ { + if left != nil { + par, err := left.createParent(ctx) + if err != nil { + return nil, fmt.Errorf("create left parent: %w", err) + } + left = par + } + + if right != nil { + par, err := right.createParent(ctx) + if err != nil { + return nil, fmt.Errorf("create right parent: %w", err) + } + right = par + } + } + + var updated []nodeEntry + if left != nil { + updated = append(updated, mkTreeEntry(left)) + } + + updated = append(updated, nodeEntry{ + Kind: entryLeaf, + Key: key, + Val: val, + }) + + if right != nil { + updated = append(updated, mkTreeEntry(right)) + } + + checkTreeInvariant(updated) + newRoot := createMST(mst.cst, cid.Undef, updated, keyZeros) + newRoot.validPtr = false + + return newRoot, nil +} + +// ErrNotFound is returned by Get / Update / Delete when the key is absent. +var ErrNotFound = fmt.Errorf("mst: not found") + +// ErrAlreadyExists is returned by Add when the key is already present. +var ErrAlreadyExists = fmt.Errorf("mst: key already exists") + +// Get returns the CID at the given key, or ErrNotFound. +func (mst *MerkleSearchTree) Get(ctx context.Context, k string) (cid.Cid, error) { + index, err := mst.findGtOrEqualLeafIndex(ctx, k) + if err != nil { + return cid.Undef, err + } + + found, err := mst.atIndex(index) + if err != nil { + return cid.Undef, err + } + + if !found.isUndefined() && found.isLeaf() && found.Key == k { + return found.Val, nil + } + + prev, err := mst.atIndex(index - 1) + if err != nil { + return cid.Undef, err + } + + if !prev.isUndefined() && prev.isTree() { + return prev.Tree.Get(ctx, k) + } + + return cid.Undef, ErrNotFound +} + +// Update replaces the value at an existing key. Returns ErrNotFound if absent. +func (mst *MerkleSearchTree) Update(ctx context.Context, k string, val cid.Cid) (*MerkleSearchTree, error) { + if err := ensureValidKey(k); err != nil { + return nil, err + } + + if val == cid.Undef { + return nil, fmt.Errorf("tried to insert an undef CID") + } + + index, err := mst.findGtOrEqualLeafIndex(ctx, k) + if err != nil { + return nil, err + } + + found, err := mst.atIndex(index) + if err != nil { + return nil, err + } + + if !found.isUndefined() && found.isLeaf() && found.Key == k { + return mst.updateEntry(ctx, index, nodeEntry{ + Kind: entryLeaf, + Key: k, + Val: val, + }) + } + + prev, err := mst.atIndex(index - 1) + if err != nil { + return nil, err + } + + if !prev.isUndefined() && prev.isTree() { + updatedTree, err := prev.Tree.Update(ctx, k, val) + if err != nil { + return nil, err + } + return mst.updateEntry(ctx, index-1, mkTreeEntry(updatedTree)) + } + + return nil, ErrNotFound +} + +// Delete removes the leaf at the given key. +func (mst *MerkleSearchTree) Delete(ctx context.Context, k string) (*MerkleSearchTree, error) { + altered, err := mst.deleteRecurse(ctx, k) + if err != nil { + return nil, err + } + return altered.trimTop(ctx) +} + +func (mst *MerkleSearchTree) deleteRecurse(ctx context.Context, k string) (*MerkleSearchTree, error) { + ix, err := mst.findGtOrEqualLeafIndex(ctx, k) + if err != nil { + return nil, err + } + + found, err := mst.atIndex(ix) + if err != nil { + return nil, err + } + + if found.isLeaf() && found.Key == k { + prev, err := mst.atIndex(ix - 1) + if err != nil { + return nil, err + } + + next, err := mst.atIndex(ix + 1) + if err != nil { + return nil, err + } + + if prev.isTree() && next.isTree() { + merged, err := prev.Tree.appendMerge(ctx, next.Tree) + if err != nil { + return nil, err + } + entries, err := mst.getEntries(ctx) + if err != nil { + return nil, err + } + return mst.newTree(append(append(entries[:ix-1], mkTreeEntry(merged)), entries[ix+2:]...)), nil + } + return mst.removeEntry(ctx, ix) + } + + prev, err := mst.atIndex(ix - 1) + if err != nil { + return nil, err + } + + if prev.isTree() { + subtree, err := prev.Tree.deleteRecurse(ctx, k) + if err != nil { + return nil, err + } + + subtreeEntries, err := subtree.getEntries(ctx) + if err != nil { + return nil, err + } + + if len(subtreeEntries) == 0 { + return mst.removeEntry(ctx, ix-1) + } + return mst.updateEntry(ctx, ix-1, mkTreeEntry(subtree)) + } + + return nil, ErrNotFound +} + +// === Simple operations === + +func (mst *MerkleSearchTree) updateEntry(ctx context.Context, ix int, entry nodeEntry) (*MerkleSearchTree, error) { + entries, err := mst.getEntries(ctx) + if err != nil { + return nil, err + } + + nents := make([]nodeEntry, len(entries)) + copy(nents, entries[:ix]) + nents[ix] = entry + copy(nents[ix+1:], entries[ix+1:]) + + checkTreeInvariant(nents) + return mst.newTree(nents), nil +} + +func (mst *MerkleSearchTree) removeEntry(ctx context.Context, ix int) (*MerkleSearchTree, error) { + entries, err := mst.getEntries(ctx) + if err != nil { + return nil, err + } + + nents := make([]nodeEntry, len(entries)-1) + copy(nents, entries[:ix]) + copy(nents[ix:], entries[ix+1:]) + + checkTreeInvariant(nents) + return mst.newTree(nents), nil +} + +func (mst *MerkleSearchTree) append(ctx context.Context, ent nodeEntry) (*MerkleSearchTree, error) { + entries, err := mst.getEntries(ctx) + if err != nil { + return nil, err + } + + nents := make([]nodeEntry, len(entries)+1) + copy(nents, entries) + nents[len(nents)-1] = ent + + checkTreeInvariant(nents) + return mst.newTree(nents), nil +} + +func (mst *MerkleSearchTree) prepend(ctx context.Context, ent nodeEntry) (*MerkleSearchTree, error) { + entries, err := mst.getEntries(ctx) + if err != nil { + return nil, err + } + + nents := make([]nodeEntry, len(entries)+1) + copy(nents[1:], entries) + nents[0] = ent + + checkTreeInvariant(nents) + return mst.newTree(nents), nil +} + +func (mst *MerkleSearchTree) atIndex(ix int) (nodeEntry, error) { + entries, err := mst.getEntries(context.TODO()) + if err != nil { + return nodeEntry{}, err + } + + if ix < 0 || ix >= len(entries) { + return nodeEntry{}, nil + } + + return entries[ix], nil +} + +func (mst *MerkleSearchTree) spliceIn(ctx context.Context, entry nodeEntry, ix int) (*MerkleSearchTree, error) { + entries, err := mst.getEntries(ctx) + if err != nil { + return nil, err + } + + nents := make([]nodeEntry, len(entries)+1) + copy(nents, entries[:ix]) + nents[ix] = entry + copy(nents[ix+1:], entries[ix:]) + + checkTreeInvariant(nents) + return mst.newTree(nents), nil +} + +func (mst *MerkleSearchTree) replaceWithSplit(ctx context.Context, ix int, left *MerkleSearchTree, nl nodeEntry, right *MerkleSearchTree) (*MerkleSearchTree, error) { + entries, err := mst.getEntries(ctx) + if err != nil { + return nil, err + } + checkTreeInvariant(entries) + var update []nodeEntry + update = append(update, entries[:ix]...) + + if left != nil { + update = append(update, nodeEntry{ + Kind: entryTree, + Tree: left, + }) + } + + update = append(update, nl) + + if right != nil { + update = append(update, nodeEntry{ + Kind: entryTree, + Tree: right, + }) + } + + update = append(update, entries[ix+1:]...) + + checkTreeInvariant(update) + return mst.newTree(update), nil +} + +func (mst *MerkleSearchTree) trimTop(ctx context.Context) (*MerkleSearchTree, error) { + entries, err := mst.getEntries(ctx) + if err != nil { + return nil, err + } + if len(entries) == 1 && entries[0].isTree() { + return entries[0].Tree.trimTop(ctx) + } + return mst, nil +} + +// === Subtree splits === + +func (mst *MerkleSearchTree) splitAround(ctx context.Context, key string) (*MerkleSearchTree, *MerkleSearchTree, error) { + index, err := mst.findGtOrEqualLeafIndex(ctx, key) + if err != nil { + return nil, nil, err + } + + entries, err := mst.getEntries(ctx) + if err != nil { + return nil, nil, err + } + + leftData := entries[:index] + rightData := entries[index:] + left := mst.newTree(leftData) + right := mst.newTree(rightData) + + if len(leftData) > 0 && leftData[len(leftData)-1].isTree() { + lastInLeft := leftData[len(leftData)-1] + + nleft, err := left.removeEntry(ctx, len(leftData)-1) + if err != nil { + return nil, nil, err + } + left = nleft + + subl, subr, err := lastInLeft.Tree.splitAround(ctx, key) + if err != nil { + return nil, nil, err + } + + if subl != nil { + left, err = left.append(ctx, mkTreeEntry(subl)) + if err != nil { + return nil, nil, err + } + } + + if subr != nil { + right, err = right.prepend(ctx, mkTreeEntry(subr)) + if err != nil { + return nil, nil, err + } + } + } + + if left.entryCount() == 0 { + left = nil + } + if right.entryCount() == 0 { + right = nil + } + + return left, right, nil +} + +func (mst *MerkleSearchTree) entryCount() int { + entries, err := mst.getEntries(context.TODO()) + if err != nil { + panic(err) + } + return len(entries) +} + +func (mst *MerkleSearchTree) appendMerge(ctx context.Context, omst *MerkleSearchTree) (*MerkleSearchTree, error) { + mylayer, err := mst.getLayer(ctx) + if err != nil { + return nil, err + } + + olayer, err := omst.getLayer(ctx) + if err != nil { + return nil, err + } + + if mylayer != olayer { + return nil, fmt.Errorf("trying to merge two nodes from different layers") + } + + entries, err := mst.getEntries(ctx) + if err != nil { + return nil, err + } + + tomergeEnts, err := omst.getEntries(ctx) + if err != nil { + return nil, err + } + + lastInLeft := entries[len(entries)-1] + firstInRight := tomergeEnts[0] + + if lastInLeft.isTree() && firstInRight.isTree() { + merged, err := lastInLeft.Tree.appendMerge(ctx, firstInRight.Tree) + if err != nil { + return nil, err + } + return mst.newTree(append(append(entries[:len(entries)-1], mkTreeEntry(merged)), tomergeEnts[1:]...)), nil + } + return mst.newTree(append(entries, tomergeEnts...)), nil +} + +// === Create relatives === + +func (mst *MerkleSearchTree) createChild(ctx context.Context) (*MerkleSearchTree, error) { + layer, err := mst.getLayer(ctx) + if err != nil { + return nil, err + } + return createMST(mst.cst, cid.Undef, []nodeEntry{}, layer-1), nil +} + +func (mst *MerkleSearchTree) createParent(ctx context.Context) (*MerkleSearchTree, error) { + layer, err := mst.getLayer(ctx) + if err != nil { + return nil, err + } + return createMST(mst.cst, cid.Undef, []nodeEntry{mkTreeEntry(mst)}, layer+1), nil +} + +// === Finding insertion points === + +func (mst *MerkleSearchTree) findGtOrEqualLeafIndex(ctx context.Context, key string) (int, error) { + entries, err := mst.getEntries(ctx) + if err != nil { + return -1, err + } + + for i, e := range entries { + if e.isLeaf() && e.Key >= key { + return i, nil + } + } + + return len(entries), nil +} + +// === List operations === + +// ErrStopWalk halts a WalkLeavesFrom traversal without surfacing as an error +// to the caller. The walk function returns nil after stopping. +var ErrStopWalk = fmt.Errorf("mst: stop walk") + +// WalkLeavesFrom walks leaves in sorted order starting at the first key >= +// from. The callback may return ErrStopWalk to halt early; any other error +// aborts and is returned to the caller. +func (mst *MerkleSearchTree) WalkLeavesFrom(ctx context.Context, from string, cb func(key string, val cid.Cid) error) error { + err := mst.walkLeavesFrom(ctx, from, false, cb) + if err == ErrStopWalk { + return nil + } + return err +} + +// WalkLeavesFromNocache is like WalkLeavesFrom but does not retain hydrated +// subtree state, intended for one-pass streaming traversals. +func (mst *MerkleSearchTree) WalkLeavesFromNocache(ctx context.Context, from string, cb func(key string, val cid.Cid) error) error { + err := mst.walkLeavesFrom(ctx, from, true, cb) + if err == ErrStopWalk { + return nil + } + return err +} + +func (mst *MerkleSearchTree) walkLeavesFrom(ctx context.Context, from string, nocache bool, cb func(key string, val cid.Cid) error) error { + index, err := mst.findGtOrEqualLeafIndex(ctx, from) + if err != nil { + return err + } + + entries, err := mst.getEntries(ctx) + if err != nil { + return fmt.Errorf("get entries: %w", err) + } + + if index > 0 { + prev := entries[index-1] + if !prev.isUndefined() && prev.isTree() { + if err := prev.Tree.walkLeavesFrom(ctx, from, nocache, cb); err != nil { + return err + } + } + } + + for _, e := range entries[index:] { + if e.isLeaf() { + if err := cb(e.Key, e.Val); err != nil { + return err + } + } else { + if err := e.Tree.walkLeavesFrom(ctx, from, nocache, cb); err != nil { + return err + } + if nocache { + e.Tree = nil + } + } + } + return nil +} diff --git a/pkg/ms3t/mst/mst_util.go b/pkg/ms3t/mst/mst_util.go new file mode 100644 index 0000000..6aa386f --- /dev/null +++ b/pkg/ms3t/mst/mst_util.go @@ -0,0 +1,205 @@ +package mst + +import ( + "context" + "crypto/sha256" + "fmt" + "strings" + "unicode/utf8" + "unsafe" + + "github.com/ipfs/go-cid" + + "github.com/storacha/sprue/pkg/ms3t/blockstore" +) + +// MaxKeyBytes is the maximum length, in bytes, of a key stored in the MST. +// Matches S3's object key length cap. +const MaxKeyBytes = 1024 + +// 4-bit fanout: count zero bits in 2-bit chunks. A leading 0x00 byte = 4 zeros. +func leadingZerosOnHash(key string) int { + var b []byte + if len(key) > 0 { + b = unsafe.Slice(unsafe.StringData(key), len(key)) + } + return leadingZerosOnHashBytes(b) +} + +func leadingZerosOnHashBytes(key []byte) (total int) { + hv := sha256.Sum256(key) + for _, b := range hv { + if b&0xC0 != 0 { + break + } + if b == 0x00 { + total += 4 + continue + } + if b&0xFC == 0x00 { + total += 3 + } else if b&0xF0 == 0x00 { + total += 2 + } else { + total += 1 + } + break + } + return total +} + +func layerForEntries(entries []nodeEntry) int { + var firstLeaf nodeEntry + for _, e := range entries { + if e.isLeaf() { + firstLeaf = e + break + } + } + + if firstLeaf.Kind == entryUndefined { + return -1 + } + + return leadingZerosOnHash(firstLeaf.Key) +} + +func deserializeNodeData(ctx context.Context, cst blockstore.Reader, nd *NodeData, layer int) ([]nodeEntry, error) { + entries := []nodeEntry{} + if nd.Left != nil { + entries = append(entries, nodeEntry{ + Kind: entryTree, + Tree: createMST(cst, *nd.Left, nil, layer-1), + }) + } + + var lastKey string + var keyb []byte // re-used between entries + for _, e := range nd.Entries { + if keyb == nil { + keyb = make([]byte, 0, int(e.PrefixLen)+len(e.KeySuffix)) + } + keyb = append(keyb[:0], lastKey[:e.PrefixLen]...) + keyb = append(keyb, e.KeySuffix...) + + keyStr := string(keyb) + if err := ensureValidKey(keyStr); err != nil { + return nil, err + } + + entries = append(entries, nodeEntry{ + Kind: entryLeaf, + Key: keyStr, + Val: e.Val, + }) + + if e.Tree != nil { + entries = append(entries, nodeEntry{ + Kind: entryTree, + Tree: createMST(cst, *e.Tree, nil, layer-1), + Key: keyStr, + }) + } + lastKey = keyStr + } + + return entries, nil +} + +func serializeNodeData(ctx context.Context, entries []nodeEntry, writer blockstore.Store) (*NodeData, error) { + var data NodeData + + i := 0 + if len(entries) > 0 && entries[0].isTree() { + i++ + + ptr, err := entries[0].Tree.GetPointer(ctx, writer) + if err != nil { + return nil, err + } + data.Left = &ptr + } + + var lastKey string + for i < len(entries) { + leaf := entries[i] + + if !leaf.isLeaf() { + return nil, fmt.Errorf("not a valid node: two subtrees next to each other (%d, %d)", i, len(entries)) + } + i++ + + var subtree *cid.Cid + + if i < len(entries) { + next := entries[i] + + if next.isTree() { + ptr, err := next.Tree.GetPointer(ctx, writer) + if err != nil { + return nil, fmt.Errorf("getting subtree pointer: %w", err) + } + + subtree = &ptr + i++ + } + } + + if err := ensureValidKey(leaf.Key); err != nil { + return nil, err + } + + prefixLen := countPrefixLen(lastKey, leaf.Key) + data.Entries = append(data.Entries, TreeEntry{ + PrefixLen: int64(prefixLen), + KeySuffix: []byte(leaf.Key)[prefixLen:], + Val: leaf.Val, + Tree: subtree, + }) + + lastKey = leaf.Key + } + + return &data, nil +} + +func countPrefixLen(a, b string) int { + var i int + for i = 0; i < len(a) && i < len(b); i++ { + if a[i] != b[i] { + return i + } + } + return i +} + +func cidForEntries(ctx context.Context, entries []nodeEntry, writer blockstore.Store) (cid.Cid, error) { + nd, err := serializeNodeData(ctx, entries, writer) + if err != nil { + return cid.Undef, fmt.Errorf("serializing new entries: %w", err) + } + return writer.Put(ctx, nd) +} + +// IsValidKey reports whether s is a valid MST key under this fork's relaxed +// rules: non-empty, valid UTF-8, no NUL bytes, at most MaxKeyBytes bytes long. +func IsValidKey(s string) bool { + if len(s) == 0 || len(s) > MaxKeyBytes { + return false + } + if !utf8.ValidString(s) { + return false + } + if strings.ContainsRune(s, 0) { + return false + } + return true +} + +func ensureValidKey(s string) error { + if !IsValidKey(s) { + return fmt.Errorf("invalid mst key (len=%d)", len(s)) + } + return nil +} + diff --git a/pkg/ms3t/registry/postgres.go b/pkg/ms3t/registry/postgres.go new file mode 100644 index 0000000..475b971 --- /dev/null +++ b/pkg/ms3t/registry/postgres.go @@ -0,0 +1,180 @@ +package registry + +import ( + "context" + "errors" + "fmt" + + "github.com/ipfs/go-cid" + "github.com/jackc/pgx/v5" + "github.com/jackc/pgx/v5/pgconn" + "github.com/jackc/pgx/v5/pgxpool" +) + +// uniqueViolation is the Postgres SQLSTATE for a unique constraint +// violation (matches the literal used elsewhere in sprue's stores). +const uniqueViolation = "23505" + +// Postgres is a *pgxpool.Pool-backed Registry. Schema is owned by +// pkg/ms3t/migrations and lives in the `ms3t` Postgres schema. The +// pool is borrowed, never closed by this type. +type Postgres struct { + pool *pgxpool.Pool +} + +// NewPostgres wraps an existing pool. Callers are responsible for +// running pkg/ms3t/migrations.Up against the same pool before any +// registry method is called. +func NewPostgres(pool *pgxpool.Pool) *Postgres { + return &Postgres{pool: pool} +} + +// Compile-time assertion. +var _ Registry = (*Postgres)(nil) + +func (r *Postgres) Create(ctx context.Context, name string, createdAt int64) error { + _, err := r.pool.Exec(ctx, + `INSERT INTO ms3t.buckets (name, root_cid, created_at) VALUES ($1, NULL, $2)`, + name, createdAt) + if err != nil { + var pgErr *pgconn.PgError + if errors.As(err, &pgErr) && pgErr.Code == uniqueViolation { + return ErrExists + } + return fmt.Errorf("registry: create %q: %w", name, err) + } + return nil +} + +func (r *Postgres) Get(ctx context.Context, name string) (*State, error) { + var rootBytes, forgeBytes []byte + var createdAt int64 + err := r.pool.QueryRow(ctx, + `SELECT root_cid, forge_root_cid, created_at FROM ms3t.buckets WHERE name = $1`, name). + Scan(&rootBytes, &forgeBytes, &createdAt) + if errors.Is(err, pgx.ErrNoRows) { + return nil, ErrNotFound + } + if err != nil { + return nil, fmt.Errorf("registry: get %q: %w", name, err) + } + + st := &State{Name: name, CreatedAt: createdAt} + if err := setCidPg(&st.Root, rootBytes, name, "root_cid"); err != nil { + return nil, err + } + if err := setCidPg(&st.ForgeRoot, forgeBytes, name, "forge_root_cid"); err != nil { + return nil, err + } + return st, nil +} + +func (r *Postgres) List(ctx context.Context) ([]*State, error) { + rows, err := r.pool.Query(ctx, + `SELECT name, root_cid, forge_root_cid, created_at FROM ms3t.buckets ORDER BY name ASC`) + if err != nil { + return nil, fmt.Errorf("registry: list: %w", err) + } + defer rows.Close() + + var out []*State + for rows.Next() { + var name string + var rootBytes, forgeBytes []byte + var createdAt int64 + if err := rows.Scan(&name, &rootBytes, &forgeBytes, &createdAt); err != nil { + return nil, fmt.Errorf("registry: list scan: %w", err) + } + st := &State{Name: name, CreatedAt: createdAt} + if err := setCidPg(&st.Root, rootBytes, name, "root_cid"); err != nil { + return nil, err + } + if err := setCidPg(&st.ForgeRoot, forgeBytes, name, "forge_root_cid"); err != nil { + return nil, err + } + out = append(out, st) + } + if err := rows.Err(); err != nil { + return nil, fmt.Errorf("registry: list rows: %w", err) + } + return out, nil +} + +func (r *Postgres) Delete(ctx context.Context, name string) error { + tag, err := r.pool.Exec(ctx, `DELETE FROM ms3t.buckets WHERE name = $1`, name) + if err != nil { + return fmt.Errorf("registry: delete %q: %w", name, err) + } + if tag.RowsAffected() == 0 { + return ErrNotFound + } + return nil +} + +func (r *Postgres) CASRoot(ctx context.Context, name string, expect, next cid.Cid) error { + var ( + expectBytes []byte + nextBytes []byte + ) + if expect.Defined() { + expectBytes = expect.Bytes() + } + if next.Defined() { + nextBytes = next.Bytes() + } + + var ( + tag pgconn.CommandTag + err error + ) + if expectBytes == nil { + tag, err = r.pool.Exec(ctx, + `UPDATE ms3t.buckets SET root_cid = $1 WHERE name = $2 AND root_cid IS NULL`, + nextBytes, name) + } else { + tag, err = r.pool.Exec(ctx, + `UPDATE ms3t.buckets SET root_cid = $1 WHERE name = $2 AND root_cid = $3`, + nextBytes, name, expectBytes) + } + if err != nil { + return fmt.Errorf("registry: cas %q: %w", name, err) + } + if tag.RowsAffected() == 0 { + // Either the bucket doesn't exist or the expected root didn't match. + if _, gerr := r.Get(ctx, name); errors.Is(gerr, ErrNotFound) { + return ErrNotFound + } + return ErrConflict + } + return nil +} + +func (r *Postgres) SetForgeRoot(ctx context.Context, name string, root cid.Cid) error { + var rootBytes []byte + if root.Defined() { + rootBytes = root.Bytes() + } + tag, err := r.pool.Exec(ctx, + `UPDATE ms3t.buckets SET forge_root_cid = $1 WHERE name = $2`, + rootBytes, name) + if err != nil { + return fmt.Errorf("registry: set forge root %q: %w", name, err) + } + if tag.RowsAffected() == 0 { + return ErrNotFound + } + return nil +} + +func setCidPg(dst *cid.Cid, raw []byte, name, field string) error { + if len(raw) == 0 { + *dst = cid.Undef + return nil + } + c, err := cid.Cast(raw) + if err != nil { + return fmt.Errorf("registry: bad %s for %q: %w", field, name, err) + } + *dst = c + return nil +} diff --git a/pkg/ms3t/registry/registry.go b/pkg/ms3t/registry/registry.go new file mode 100644 index 0000000..b3fe485 --- /dev/null +++ b/pkg/ms3t/registry/registry.go @@ -0,0 +1,51 @@ +// Package registry tracks the set of buckets and the current MST root CID +// for each. The interface is small enough that swapping SQLite for postgres +// or DynamoDB later is just a new implementation. +package registry + +import ( + "context" + "errors" + + "github.com/ipfs/go-cid" +) + +// State is the metadata stored per bucket. +type State struct { + Name string + Root cid.Cid // current MST root; cid.Undef for empty bucket + ForgeRoot cid.Cid // last MST root whose DAG has been shipped to Forge + CreatedAt int64 // unix seconds +} + +// Registry tracks bucket state. All methods are safe for concurrent use. +type Registry interface { + // Create inserts a new bucket. Returns ErrExists if name is taken. + Create(ctx context.Context, name string, createdAt int64) error + + // Get returns the state of a bucket, or ErrNotFound. + Get(ctx context.Context, name string) (*State, error) + + // List returns every bucket in lexicographic name order. + List(ctx context.Context) ([]*State, error) + + // Delete removes a bucket. Returns ErrNotFound if absent. + Delete(ctx context.Context, name string) error + + // CASRoot atomically advances the bucket root from expect to next. + // Returns ErrConflict if the current root does not equal expect. + CASRoot(ctx context.Context, name string, expect, next cid.Cid) error + + // SetForgeRoot records that the DAG reachable from root has been + // successfully shipped to Forge. Used as the high-water mark by + // the recovery loop: anything reachable from Root but not from + // ForgeRoot needs to be re-submitted on startup. + SetForgeRoot(ctx context.Context, name string, root cid.Cid) error +} + +// Common errors. +var ( + ErrNotFound = errors.New("registry: bucket not found") + ErrExists = errors.New("registry: bucket already exists") + ErrConflict = errors.New("registry: root cas conflict") +) diff --git a/pkg/ms3t/registry/segments.go b/pkg/ms3t/registry/segments.go new file mode 100644 index 0000000..4e369ce --- /dev/null +++ b/pkg/ms3t/registry/segments.go @@ -0,0 +1,277 @@ +package registry + +import ( + "context" + "errors" + "fmt" + + "github.com/ipfs/go-cid" + "github.com/jackc/pgx/v5" + + "github.com/storacha/sprue/pkg/ms3t/blockstore" + "github.com/storacha/sprue/pkg/ms3t/logstore" +) + +// Segment-level methods for *Postgres. These satisfy logstore.Meta; +// the compile-time assertion at the bottom of the file pins the +// interface. + +func (r *Postgres) NextSegmentSeq(ctx context.Context) (uint64, error) { + var seq uint64 + if err := r.pool.QueryRow(ctx, `SELECT nextval('ms3t.segment_seq')`).Scan(&seq); err != nil { + return 0, fmt.Errorf("registry: next segment seq: %w", err) + } + return seq, nil +} + +func (r *Postgres) InsertSegmentOpen(ctx context.Context, seq uint64) error { + _, err := r.pool.Exec(ctx, + `INSERT INTO ms3t.segments (seq, state, size_bytes) VALUES ($1, 'open', 0) + ON CONFLICT (seq) DO NOTHING`, + int64(seq)) + if err != nil { + return fmt.Errorf("registry: insert segment %d: %w", seq, err) + } + return nil +} + +func (r *Postgres) MarkSegmentSealed(ctx context.Context, seq uint64, sealedAt int64, sizeBytes int64, sha256 []byte, opRoots []blockstore.OpRoot) error { + tx, err := r.pool.Begin(ctx) + if err != nil { + return fmt.Errorf("registry: begin seal %d: %w", seq, err) + } + defer tx.Rollback(ctx) + + tag, err := tx.Exec(ctx, + `UPDATE ms3t.segments + SET state = 'sealed', sealed_at = $2, size_bytes = $3, car_sha256 = $4 + WHERE seq = $1 AND state = 'open'`, + int64(seq), sealedAt, sizeBytes, sha256) + if err != nil { + return fmt.Errorf("registry: seal %d: %w", seq, err) + } + if tag.RowsAffected() == 0 { + // Either the segment is missing or it has already advanced past + // 'open'. Treat as a no-op so seal is idempotent against + // crashes between disk seal and DB update. + return nil + } + + if err := insertOpRootsTx(ctx, tx, seq, opRoots); err != nil { + return err + } + if err := tx.Commit(ctx); err != nil { + return fmt.Errorf("registry: commit seal %d: %w", seq, err) + } + return nil +} + +func (r *Postgres) MarkSegmentFlushed(ctx context.Context, seq uint64, flushedAt int64, opRoots []blockstore.OpRoot) error { + tx, err := r.pool.Begin(ctx) + if err != nil { + return fmt.Errorf("registry: begin flush %d: %w", seq, err) + } + defer tx.Rollback(ctx) + + tag, err := tx.Exec(ctx, + `UPDATE ms3t.segments SET state = 'flushed', flushed_at = $2 WHERE seq = $1 AND state = 'sealed'`, + int64(seq), flushedAt) + if err != nil { + return fmt.Errorf("registry: flush %d: %w", seq, err) + } + if tag.RowsAffected() == 0 { + // Already flushed (or somehow rolled back to open). Idempotent. + return nil + } + + // Apply forge_root advances in slice order. Segments flush in seq + // order, and within a segment the slice order is the order of + // commits, so the last write for each bucket wins. + // + // TODO(frrist/ms3t): the UPDATE below is unconditional on + // root_cid, which is incorrect when a writer's logstore.Commit + // succeeds but its subsequent registry.CASRoot fails (transient + // Postgres error, context cancellation between the two calls). + // In that case, the op_root for newRoot is durable in the log + // even though the bucket's published root_cid never advanced. + // When this segment flushes, the loop below blindly sets + // forge_root_cid = newRoot — even though root_cid is still + // oldRoot — breaking the invariant "forge_root_cid is a Root the + // bucket has actually published, with its full DAG in Forge." + // + // Fix: gate the UPDATE on root_cid, e.g. + // + // UPDATE ms3t.buckets + // SET forge_root_cid = $1 + // WHERE name = $2 AND root_cid = $1 + // + // With segments flushing in seq order, this naturally lets a + // later segment's flush advance forge_root_cid for the bucket + // once root_cid has caught up via a successful CASRoot, and + // silently skips orphan op_roots from failed commits. + // + // Out of scope for the bucketop refactor; track separately. + for _, opr := range opRoots { + if !opr.Root.Defined() { + continue + } + if _, err := tx.Exec(ctx, + `UPDATE ms3t.buckets SET forge_root_cid = $1 WHERE name = $2`, + opr.Root.Bytes(), opr.Bucket); err != nil { + return fmt.Errorf("registry: advance forge_root for %q: %w", opr.Bucket, err) + } + } + if err := tx.Commit(ctx); err != nil { + return fmt.Errorf("registry: commit flush %d: %w", seq, err) + } + return nil +} + +func (r *Postgres) DeleteSegment(ctx context.Context, seq uint64) error { + if _, err := r.pool.Exec(ctx, `DELETE FROM ms3t.segments WHERE seq = $1`, int64(seq)); err != nil { + return fmt.Errorf("registry: delete segment %d: %w", seq, err) + } + return nil +} + +func (r *Postgres) ListUnflushedSegments(ctx context.Context) ([]logstore.SegmentMeta, error) { + rows, err := r.pool.Query(ctx, + `SELECT seq, state, COALESCE(sealed_at, 0), COALESCE(flushed_at, 0), size_bytes, car_sha256 + FROM ms3t.segments + WHERE state IN ('open', 'sealed') + ORDER BY seq ASC`) + if err != nil { + return nil, fmt.Errorf("registry: list unflushed segments: %w", err) + } + defer rows.Close() + + var out []logstore.SegmentMeta + for rows.Next() { + var ( + seqInt int64 + stateS string + sealed int64 + flushed int64 + size int64 + sha []byte + ) + if err := rows.Scan(&seqInt, &stateS, &sealed, &flushed, &size, &sha); err != nil { + return nil, fmt.Errorf("registry: scan segment: %w", err) + } + state, ok := logstore.ParseState(stateS) + if !ok { + return nil, fmt.Errorf("registry: bad segment state %q for seq %d", stateS, seqInt) + } + out = append(out, logstore.SegmentMeta{ + Seq: uint64(seqInt), + State: state, + SealedAt: sealed, + FlushedAt: flushed, + SizeBytes: size, + SHA256: sha, + }) + } + if err := rows.Err(); err != nil { + return nil, fmt.Errorf("registry: list segments rows: %w", err) + } + + // Hydrate op_roots for sealed segments only (open segments have + // none). Done in a second pass to keep the query simple. + for i := range out { + if out[i].State != logstore.StateSealed { + continue + } + ops, err := r.fetchOpRoots(ctx, out[i].Seq) + if err != nil { + return nil, err + } + out[i].OpRoots = ops + } + return out, nil +} + +func (r *Postgres) RehydrateSegment(ctx context.Context, m logstore.SegmentMeta) error { + tx, err := r.pool.Begin(ctx) + if err != nil { + return fmt.Errorf("registry: begin rehydrate %d: %w", m.Seq, err) + } + defer tx.Rollback(ctx) + + // Replace any existing rows for this seq. + if _, err := tx.Exec(ctx, `DELETE FROM ms3t.segments WHERE seq = $1`, int64(m.Seq)); err != nil { + return fmt.Errorf("registry: rehydrate clear %d: %w", m.Seq, err) + } + + var sealedAt, flushedAt *int64 + if m.SealedAt != 0 { + v := m.SealedAt + sealedAt = &v + } + if m.FlushedAt != 0 { + v := m.FlushedAt + flushedAt = &v + } + if _, err := tx.Exec(ctx, + `INSERT INTO ms3t.segments (seq, state, sealed_at, flushed_at, size_bytes, car_sha256) + VALUES ($1, $2, $3, $4, $5, $6)`, + int64(m.Seq), m.State.String(), sealedAt, flushedAt, m.SizeBytes, m.SHA256); err != nil { + return fmt.Errorf("registry: rehydrate insert %d: %w", m.Seq, err) + } + + if err := insertOpRootsTx(ctx, tx, m.Seq, m.OpRoots); err != nil { + return err + } + if err := tx.Commit(ctx); err != nil { + return fmt.Errorf("registry: rehydrate commit %d: %w", m.Seq, err) + } + return nil +} + +func (r *Postgres) fetchOpRoots(ctx context.Context, seq uint64) ([]blockstore.OpRoot, error) { + rows, err := r.pool.Query(ctx, + `SELECT bucket, root_cid FROM ms3t.segment_op_roots WHERE seq = $1 ORDER BY seq_within ASC`, + int64(seq)) + if err != nil { + return nil, fmt.Errorf("registry: fetch op_roots %d: %w", seq, err) + } + defer rows.Close() + + var out []blockstore.OpRoot + for rows.Next() { + var bucket string + var rootBytes []byte + if err := rows.Scan(&bucket, &rootBytes); err != nil { + return nil, fmt.Errorf("registry: scan op_root: %w", err) + } + c, err := cid.Cast(rootBytes) + if err != nil { + return nil, fmt.Errorf("registry: bad root_cid for %q seq %d: %w", bucket, seq, err) + } + out = append(out, blockstore.OpRoot{Bucket: bucket, Root: c}) + } + if err := rows.Err(); err != nil { + return nil, fmt.Errorf("registry: fetch op_roots rows %d: %w", seq, err) + } + return out, nil +} + +func insertOpRootsTx(ctx context.Context, tx pgx.Tx, seq uint64, opRoots []blockstore.OpRoot) error { + if len(opRoots) == 0 { + return nil + } + for i, opr := range opRoots { + if !opr.Root.Defined() { + return errors.New("registry: op_root.Root must be defined") + } + if _, err := tx.Exec(ctx, + `INSERT INTO ms3t.segment_op_roots (seq, seq_within, bucket, root_cid) + VALUES ($1, $2, $3, $4)`, + int64(seq), i, opr.Bucket, opr.Root.Bytes()); err != nil { + return fmt.Errorf("registry: insert op_root %d/%d: %w", seq, i, err) + } + } + return nil +} + +// Compile-time assertion: Postgres satisfies logstore.Meta. +var _ logstore.Meta = (*Postgres)(nil) diff --git a/pkg/ms3t/s3frontend/backend.go b/pkg/ms3t/s3frontend/backend.go new file mode 100644 index 0000000..28013b2 --- /dev/null +++ b/pkg/ms3t/s3frontend/backend.go @@ -0,0 +1,89 @@ +// Package s3frontend implements versitygw's backend.Backend by +// orchestrating directly over the ms3t domain primitives. It is the +// only S3 frontend ms3t ships; it is wired into the process via +// pkg/ms3t.Server. +// +// The Backend type is a thin protocol adapter: +// - Read paths drive a single ReadStore that exposes both +// CBOR-decoded reads (manifest, MST nodes) and raw block reads +// (body chunks). The interface has no Put method, so write paths +// can't accidentally route through it. +// - Write paths drive a per-op bucketop.Tx, which owns the +// staging buffer, MST CBOR view, bucket-Root CAS, and per-bucket +// locking. +// +// Operations not implemented (multipart, lifecycle, locking, +// versioning, etc.) inherit ErrNotImplemented from the embedded +// backend.BackendUnsupported. The few unsupported-by-default +// methods that versitygw nevertheless calls on every request +// (GetBucketAcl, GetBucketPolicy, GetObjectLockConfiguration, +// GetBucketVersioning) are stubbed in bucket.go. +package s3frontend + +import ( + "context" + + "github.com/versity/versitygw/backend" + + "github.com/storacha/sprue/pkg/ms3t/blockstore" + msbucket "github.com/storacha/sprue/pkg/ms3t/bucket" + "github.com/storacha/sprue/pkg/ms3t/bucketop" + "github.com/storacha/sprue/pkg/ms3t/logstore" + "github.com/storacha/sprue/pkg/ms3t/registry" +) + +// Backend implements versitygw's backend.Backend directly over the +// ms3t domain primitives. The embedded BackendUnsupported supplies +// ErrNotImplemented defaults for every operation; we override only +// the ones we actually serve. +type Backend struct { + backend.BackendUnsupported + + read blockstore.ReadStore + reg registry.Registry + txns *bucketop.Coordinator + codec msbucket.BodyCodec +} + +// Compile-time assertion that Backend satisfies versitygw's interface. +var _ backend.Backend = (*Backend)(nil) + +// New constructs a Backend wired over ms3t's domain primitives. +// rs is the layered read blockstore (log → forge); log is the +// LSM-style write log; codec is the body-DAG codec used for both +// chunking on PUT and streaming on GET — typically a *FixedChunker. +func New(reg registry.Registry, rs blockstore.ReadStore, log *logstore.Store, codec msbucket.BodyCodec) *Backend { + return &Backend{ + read: rs, + reg: reg, + txns: bucketop.NewCoordinator(bucketop.Deps{Reg: reg, Log: log, Reads: rs}), + codec: codec, + } +} + +// String identifies this backend in versitygw logs. +func (*Backend) String() string { return "ms3t" } + +// Shutdown is a no-op; lifecycle for the underlying registry/log is +// owned by pkg/ms3t.Server's Stop hook, not by versitygw. +func (*Backend) Shutdown() {} + +// Recover is a no-op in the LSM design: logstore.Open already +// scanned the segment directory, reconciled with Postgres, and +// re-enqueued any pending segments for the background flusher. +// Recover is retained as the lifecycle seam in case future +// invariants need verifying before the listener accepts traffic. +func (b *Backend) Recover(_ context.Context) error { return nil } + +// Drain shuts the log down via the Coordinator: seals the open +// segment, drains the flush queue, and updates per-bucket +// forge_root_cid for every op_root that landed in a flushed +// segment. After Drain returns cleanly, no acked write is +// unrepresented in Postgres. +func (b *Backend) Drain(ctx context.Context) error { + if b.txns == nil { + return nil + } + return b.txns.Close(ctx) +} + diff --git a/pkg/ms3t/s3frontend/bucket.go b/pkg/ms3t/s3frontend/bucket.go new file mode 100644 index 0000000..07b0f3c --- /dev/null +++ b/pkg/ms3t/s3frontend/bucket.go @@ -0,0 +1,211 @@ +package s3frontend + +import ( + "context" + "errors" + "sort" + "strings" + "time" + + "github.com/aws/aws-sdk-go-v2/service/s3" + "github.com/ipfs/go-cid" + "github.com/versity/versitygw/s3err" + "github.com/versity/versitygw/s3response" + + "github.com/storacha/sprue/pkg/ms3t/mst" + "github.com/storacha/sprue/pkg/ms3t/registry" +) + +func (b *Backend) ListBuckets(ctx context.Context, input s3response.ListBucketsInput) (s3response.ListAllMyBucketsResult, error) { + states, err := b.reg.List(ctx) + if err != nil { + return s3response.ListAllMyBucketsResult{}, err + } + sort.Slice(states, func(i, j int) bool { return states[i].Name < states[j].Name }) + + var entries []s3response.ListAllMyBucketsEntry + var cToken string + for _, st := range states { + if input.Prefix != "" && !strings.HasPrefix(st.Name, input.Prefix) { + continue + } + if st.Name <= input.ContinuationToken { + continue + } + if input.MaxBuckets > 0 && int32(len(entries)) == input.MaxBuckets { + cToken = entries[len(entries)-1].Name + break + } + entries = append(entries, s3response.ListAllMyBucketsEntry{ + Name: st.Name, + CreationDate: time.Unix(st.CreatedAt, 0), + }) + } + + return s3response.ListAllMyBucketsResult{ + Buckets: s3response.ListAllMyBucketsList{Bucket: entries}, + Owner: s3response.CanonicalUser{ID: input.Owner}, + Prefix: input.Prefix, + ContinuationToken: cToken, + }, nil +} + +// GetBucketAcl is invoked on every object op via versitygw's ParseAcl +// middleware to capture the bucket owner before the controller runs +// (acl-parser.go:30). We don't model ACLs — but returning the +// BackendUnsupported default (ErrNotImplemented) propagates as +// "header you provided implies functionality that is not implemented" +// for *every* PUT/GET/DELETE. Returning empty bytes for a known +// bucket lets ParseACL produce ACL{}, after which the middleware +// substitutes the configured root access key as the owner. +func (b *Backend) GetBucketAcl(ctx context.Context, input *s3.GetBucketAclInput) ([]byte, error) { + if input.Bucket == nil { + return nil, s3err.GetAPIError(s3err.ErrInvalidBucketName) + } + if _, err := b.reg.Get(ctx, *input.Bucket); err != nil { + if errors.Is(err, registry.ErrNotFound) { + return nil, s3err.GetAPIError(s3err.ErrNoSuchBucket) + } + return nil, err + } + return nil, nil +} + +// GetObjectLockConfiguration is called from auth.CheckObjectAccess +// (object_lock.go:223) on every object PUT/DELETE. The caller only +// tolerates ErrObjectLockConfigurationNotFound; ErrNotImplemented +// propagates as "header you provided implies functionality not +// implemented" — ms3t doesn't model object lock today, so the +// honest answer is "no configuration." +func (b *Backend) GetObjectLockConfiguration(ctx context.Context, bucket string) ([]byte, error) { + if _, err := b.reg.Get(ctx, bucket); err != nil { + if errors.Is(err, registry.ErrNotFound) { + return nil, s3err.GetAPIError(s3err.ErrNoSuchBucket) + } + return nil, err + } + return nil, s3err.GetAPIError(s3err.ErrObjectLockConfigurationNotFound) +} + +// GetBucketPolicy is called from auth.VerifyAccess (access-control.go:103) +// for non-root requests and from auth.VerifyPublicAccess for anonymous +// ones. Authenticated root requests short-circuit before this is hit +// today, but stubbing it now keeps non-root authz paths from tripping +// the same NotImplemented trap. +func (b *Backend) GetBucketPolicy(ctx context.Context, bucket string) ([]byte, error) { + if _, err := b.reg.Get(ctx, bucket); err != nil { + if errors.Is(err, registry.ErrNotFound) { + return nil, s3err.GetAPIError(s3err.ErrNoSuchBucket) + } + return nil, err + } + return nil, s3err.GetAPIError(s3err.ErrNoSuchBucketPolicy) +} + +// GetBucketVersioning is called from auth.CheckObjectAccess +// (object_lock.go:220, 257). Both call sites tolerate any error by +// treating versioning as disabled, so we could leave the default +// ErrNotImplemented — but returning a clean "Suspended" status is +// less noisy in logs and makes the no-op intent explicit. +func (b *Backend) GetBucketVersioning(ctx context.Context, bucket string) (s3response.GetBucketVersioningOutput, error) { + if _, err := b.reg.Get(ctx, bucket); err != nil { + if errors.Is(err, registry.ErrNotFound) { + return s3response.GetBucketVersioningOutput{}, s3err.GetAPIError(s3err.ErrNoSuchBucket) + } + return s3response.GetBucketVersioningOutput{}, err + } + return s3response.GetBucketVersioningOutput{}, nil +} + +func (b *Backend) HeadBucket(ctx context.Context, input *s3.HeadBucketInput) (*s3.HeadBucketOutput, error) { + if input.Bucket == nil { + return nil, s3err.GetAPIError(s3err.ErrInvalidBucketName) + } + if _, err := b.reg.Get(ctx, *input.Bucket); err != nil { + if errors.Is(err, registry.ErrNotFound) { + return nil, s3err.GetAPIError(s3err.ErrNoSuchBucket) + } + return nil, err + } + return &s3.HeadBucketOutput{}, nil +} + +func (b *Backend) CreateBucket(ctx context.Context, input *s3.CreateBucketInput, _ []byte) error { + if input.Bucket == nil { + return s3err.GetAPIError(s3err.ErrInvalidBucketName) + } + // strings.Clone: versitygw passes us a fiber.Ctx.Params() string + // whose backing buffer is recycled when the request completes. + // Storing it directly in the registry produces map-key corruption + // once the buffer is reused for the next request. + name := strings.Clone(*input.Bucket) + if !validBucketName(name) { + return s3err.GetAPIError(s3err.ErrInvalidBucketName) + } + if err := b.reg.Create(ctx, name, time.Now().Unix()); err != nil { + if errors.Is(err, registry.ErrExists) { + return s3err.GetAPIError(s3err.ErrBucketAlreadyExists) + } + return err + } + return nil +} + +func (b *Backend) DeleteBucket(ctx context.Context, name string) error { + return b.txns.WithLock(ctx, name, func(ctx context.Context) error { + st, err := b.reg.Get(ctx, name) + if err != nil { + if errors.Is(err, registry.ErrNotFound) { + return s3err.GetAPIError(s3err.ErrNoSuchBucket) + } + return err + } + + // S3 forbids deleting non-empty buckets. Walk the MST until + // we see any leaf, then bail. + if st.Root.Defined() { + t := mst.LoadMST(b.read, st.Root) + var seen bool + walkErr := t.WalkLeavesFromNocache(ctx, "", func(string, cid.Cid) error { + seen = true + return mst.ErrStopWalk + }) + if walkErr != nil { + return walkErr + } + if seen { + return s3err.GetAPIError(s3err.ErrBucketNotEmpty) + } + } + + if err := b.reg.Delete(ctx, name); err != nil { + if errors.Is(err, registry.ErrNotFound) { + return s3err.GetAPIError(s3err.ErrNoSuchBucket) + } + return err + } + return nil + }) +} + +// validBucketName mirrors the rules from the prior bucket.Service: +// 3-63 chars, lowercase letters, digits, dots, dashes; cannot begin +// with a dot or dash. This is the S3 DNS-compliant subset. +func validBucketName(s string) bool { + if len(s) < 3 || len(s) > 63 { + return false + } + for i, r := range s { + switch { + case r >= 'a' && r <= 'z': + case r >= '0' && r <= '9': + case r == '-' || r == '.': + if i == 0 { + return false + } + default: + return false + } + } + return true +} diff --git a/pkg/ms3t/s3frontend/object.go b/pkg/ms3t/s3frontend/object.go new file mode 100644 index 0000000..a5485a0 --- /dev/null +++ b/pkg/ms3t/s3frontend/object.go @@ -0,0 +1,441 @@ +package s3frontend + +import ( + "context" + "encoding/hex" + "errors" + "fmt" + "strings" + "time" + + "github.com/aws/aws-sdk-go-v2/service/s3" + "github.com/aws/aws-sdk-go-v2/service/s3/types" + "github.com/ipfs/go-cid" + "github.com/versity/versitygw/backend" + "github.com/versity/versitygw/s3err" + "github.com/versity/versitygw/s3response" + + msbucket "github.com/storacha/sprue/pkg/ms3t/bucket" + "github.com/storacha/sprue/pkg/ms3t/bucketop" + "github.com/storacha/sprue/pkg/ms3t/mst" + "github.com/storacha/sprue/pkg/ms3t/registry" +) + +const defaultMaxKeys = 1000 + +// PutObject writes an object. Tagging, user metadata, ACLs, +// checksums, retention, and preconditions are dropped on the floor +// for now — the manifest schema has no place for them yet (see +// bucket-metadata.rfc §"Canonical state vs service state"). ETag is +// the hex sha256 of the body, quoted per S3 wire format. +func (b *Backend) PutObject(ctx context.Context, input s3response.PutObjectInput) (s3response.PutObjectOutput, error) { + if input.Bucket == nil { + return s3response.PutObjectOutput{}, s3err.GetAPIError(s3err.ErrInvalidBucketName) + } + if input.Key == nil { + return s3response.PutObjectOutput{}, s3err.GetAPIError(s3err.ErrNoSuchKey) + } + bucketName := *input.Bucket + key := *input.Key + if !mst.IsValidKey(key) { + return s3response.PutObjectOutput{}, s3err.GetAPIError(s3err.ErrInvalidArgument) + } + + contentType := backend.GetStringFromPtr(input.ContentType) + if contentType == "" { + contentType = "application/octet-stream" + } + + // mf is captured by the closure and read after WithTx commits, + // so we can build the response (ETag = sha256, size) from the + // same manifest that was committed. + var mf *msbucket.ObjectManifest + + err := b.txns.WithTx(ctx, bucketName, func(ctx context.Context, tx *bucketop.Tx) (cid.Cid, error) { + bodyRec, err := b.codec.Chunk(ctx, tx, input.Body) + if err != nil { + return cid.Undef, fmt.Errorf("chunk body: %w", err) + } + mf = &msbucket.ObjectManifest{ + Key: key, + ContentType: contentType, + Created: time.Now().Unix(), + Body: bodyRec, + } + mfCid, err := tx.Put(ctx, mf) + if err != nil { + return cid.Undef, fmt.Errorf("manifest put: %w", err) + } + + t := tx.LoadTree() + t2, err := t.Add(ctx, key, mfCid, -1) + if errors.Is(err, mst.ErrAlreadyExists) { + t2, err = t.Update(ctx, key, mfCid) + } + if err != nil { + return cid.Undef, fmt.Errorf("mst write: %w", err) + } + + return t2.GetPointer(ctx, tx) + }) + if err != nil { + if errors.Is(err, bucketop.ErrBucketNotFound) { + return s3response.PutObjectOutput{}, s3err.GetAPIError(s3err.ErrNoSuchBucket) + } + return s3response.PutObjectOutput{}, fmt.Errorf("s3frontend: put: %w", err) + } + + size := mf.Body.Size + return s3response.PutObjectOutput{ + ETag: etagOf(mf), + Size: &size, + }, nil +} + +// HeadObject returns the manifest's metadata. Range, partNumber, +// preconditions, versioning, and checksums are not implemented. +func (b *Backend) HeadObject(ctx context.Context, input *s3.HeadObjectInput) (*s3.HeadObjectOutput, error) { + if input.Bucket == nil { + return nil, s3err.GetAPIError(s3err.ErrInvalidBucketName) + } + if input.Key == nil { + return nil, s3err.GetAPIError(s3err.ErrNoSuchKey) + } + mf, err := b.lookupManifest(ctx, *input.Bucket, *input.Key) + if err != nil { + return nil, err + } + etag := etagOf(mf) + size := mf.Body.Size + lastModified := time.Unix(mf.Created, 0) + contentType := mf.ContentType + return &s3.HeadObjectOutput{ + AcceptRanges: backend.GetPtrFromString("bytes"), + ContentLength: &size, + ContentType: &contentType, + ETag: &etag, + LastModified: &lastModified, + StorageClass: types.StorageClassStandard, + }, nil +} + +// GetObject returns an object body, optionally restricted to a byte +// range supplied via the Range header. The body io.ReadCloser is +// owned by the caller (versitygw closes it after streaming). +func (b *Backend) GetObject(ctx context.Context, input *s3.GetObjectInput) (*s3.GetObjectOutput, error) { + if input.Bucket == nil { + return nil, s3err.GetAPIError(s3err.ErrInvalidBucketName) + } + if input.Key == nil { + return nil, s3err.GetAPIError(s3err.ErrNoSuchKey) + } + mf, err := b.lookupManifest(ctx, *input.Bucket, *input.Key) + if err != nil { + return nil, err + } + + objSize := mf.Body.Size + startOffset, length, isRange, err := backend.ParseObjectRange(objSize, backend.GetStringFromPtr(input.Range)) + if err != nil { + return nil, err + } + + var contentRange *string + var body = b.codec.Open(ctx, b.read, mf.Body) + if isRange { + body = b.codec.OpenRange(ctx, b.read, mf.Body, startOffset, startOffset+length-1) + cr := fmt.Sprintf("bytes %d-%d/%d", startOffset, startOffset+length-1, objSize) + contentRange = &cr + } + + etag := etagOf(mf) + lastModified := time.Unix(mf.Created, 0) + contentType := mf.ContentType + return &s3.GetObjectOutput{ + AcceptRanges: backend.GetPtrFromString("bytes"), + Body: body, + ContentLength: &length, + ContentType: &contentType, + ContentRange: contentRange, + ETag: &etag, + LastModified: &lastModified, + StorageClass: types.StorageClassStandard, + }, nil +} + +// DeleteObject removes an object. Missing keys are no-ops (matching +// S3's idempotent DELETE semantics). +func (b *Backend) DeleteObject(ctx context.Context, input *s3.DeleteObjectInput) (*s3.DeleteObjectOutput, error) { + if input.Bucket == nil { + return nil, s3err.GetAPIError(s3err.ErrInvalidBucketName) + } + if input.Key == nil { + return nil, s3err.GetAPIError(s3err.ErrNoSuchKey) + } + bucketName := *input.Bucket + key := *input.Key + + err := b.txns.WithTx(ctx, bucketName, func(ctx context.Context, tx *bucketop.Tx) (cid.Cid, error) { + // Empty bucket: nothing to delete. Returning cid.Undef from + // the closure tells WithTx to discard cleanly with no + // commit — the equivalent of "no-op success." + if !tx.State().Root.Defined() { + return cid.Undef, nil + } + t := tx.LoadTree() + t2, err := t.Delete(ctx, key) + if errors.Is(err, mst.ErrNotFound) { + // Idempotent DELETE: missing key isn't an error. + return cid.Undef, nil + } + if err != nil { + return cid.Undef, fmt.Errorf("mst delete: %w", err) + } + return t2.GetPointer(ctx, tx) + }) + if err != nil { + if errors.Is(err, bucketop.ErrBucketNotFound) { + return nil, s3err.GetAPIError(s3err.ErrNoSuchBucket) + } + return nil, fmt.Errorf("s3frontend: delete: %w", err) + } + return &s3.DeleteObjectOutput{}, nil +} + +// ListObjects (V1) walks the MST in lexicographic order, applying +// S3-style prefix / delimiter filtering with V1's Marker-based +// pagination. +func (b *Backend) ListObjects(ctx context.Context, input *s3.ListObjectsInput) (s3response.ListObjectsResult, error) { + if input.Bucket == nil { + return s3response.ListObjectsResult{}, s3err.GetAPIError(s3err.ErrInvalidBucketName) + } + bucketName := *input.Bucket + prefix := backend.GetStringFromPtr(input.Prefix) + delimiter := backend.GetStringFromPtr(input.Delimiter) + marker := backend.GetStringFromPtr(input.Marker) + + maxKeys := int32(0) + if input.MaxKeys != nil { + maxKeys = *input.MaxKeys + } + limit := int(maxKeys) + if limit <= 0 { + limit = defaultMaxKeys + } + + from := prefix + if marker != "" && marker > from { + // V1 Marker: list strictly after this key. + from = marker + "\x01" + } + + res, err := b.listWalk(ctx, bucketName, prefix, delimiter, from, limit) + if err != nil { + return s3response.ListObjectsResult{}, err + } + + out := s3response.ListObjectsResult{ + Name: &bucketName, + Prefix: &prefix, + Delimiter: &delimiter, + MaxKeys: &maxKeys, + IsTruncated: &res.truncated, + Contents: res.contents, + CommonPrefixes: res.commonPrefixes, + } + if input.Marker != nil { + out.Marker = input.Marker + } + // NextMarker is only set when delimiter is specified and the + // page was truncated, per AWS docs. Without delimiter, callers + // use the last Key in Contents as the marker for the next page. + if res.truncated && delimiter != "" && res.nextKey != "" { + next := res.nextKey + out.NextMarker = &next + } + return out, nil +} + +// ListObjectsV2 walks the MST in lexicographic order, applying +// S3-style prefix and delimiter filtering with V2's +// ContinuationToken-based pagination. +func (b *Backend) ListObjectsV2(ctx context.Context, input *s3.ListObjectsV2Input) (s3response.ListObjectsV2Result, error) { + if input.Bucket == nil { + return s3response.ListObjectsV2Result{}, s3err.GetAPIError(s3err.ErrInvalidBucketName) + } + bucketName := *input.Bucket + prefix := backend.GetStringFromPtr(input.Prefix) + delimiter := backend.GetStringFromPtr(input.Delimiter) + + // ContinuationToken (resumption) takes precedence over StartAfter + // (first-page hint) per S3 semantics. + startAfter := backend.GetStringFromPtr(input.StartAfter) + if input.ContinuationToken != nil && *input.ContinuationToken != "" { + startAfter = *input.ContinuationToken + } + + maxKeys := int32(0) + if input.MaxKeys != nil { + maxKeys = *input.MaxKeys + } + limit := int(maxKeys) + if limit <= 0 { + limit = defaultMaxKeys + } + + from := prefix + if startAfter != "" && startAfter > from { + // Walk strictly past startAfter by appending a low byte. + from = startAfter + "\x01" + } + + res, err := b.listWalk(ctx, bucketName, prefix, delimiter, from, limit) + if err != nil { + return s3response.ListObjectsV2Result{}, err + } + + keyCount := int32(len(res.contents) + len(res.commonPrefixes)) + out := s3response.ListObjectsV2Result{ + Name: &bucketName, + Prefix: &prefix, + Delimiter: &delimiter, + MaxKeys: &maxKeys, + KeyCount: &keyCount, + IsTruncated: &res.truncated, + Contents: res.contents, + CommonPrefixes: res.commonPrefixes, + } + if input.ContinuationToken != nil { + out.ContinuationToken = input.ContinuationToken + } + if input.StartAfter != nil { + out.StartAfter = input.StartAfter + } + if res.truncated && res.nextKey != "" { + next := res.nextKey + out.NextContinuationToken = &next + } + return out, nil +} + +// listWalkResult is the shared output of one MST walk for V1 and V2 +// list. nextKey is the last key (or common prefix) that ended the +// page when truncated; empty when the walk completed. +type listWalkResult struct { + contents []s3response.Object + commonPrefixes []types.CommonPrefix + truncated bool + nextKey string +} + +// listWalk drives a single MST walk shared by ListObjects and +// ListObjectsV2. The version-specific pieces (Marker vs. +// ContinuationToken / StartAfter, NextMarker vs. +// NextContinuationToken) live in the callers; this helper only +// understands prefix, delimiter, and the [from, ...) starting key. +func (b *Backend) listWalk(ctx context.Context, bucketName, prefix, delimiter, from string, limit int) (listWalkResult, error) { + out := listWalkResult{ + contents: []s3response.Object{}, + commonPrefixes: []types.CommonPrefix{}, + } + + st, err := b.reg.Get(ctx, bucketName) + if err != nil { + if errors.Is(err, registry.ErrNotFound) { + return out, s3err.GetAPIError(s3err.ErrNoSuchBucket) + } + return out, err + } + if !st.Root.Defined() { + return out, nil + } + + t := mst.LoadMST(b.read, st.Root) + seenPrefix := map[string]struct{}{} + walkErr := t.WalkLeavesFromNocache(ctx, from, func(k string, mfCid cid.Cid) error { + if prefix != "" && !strings.HasPrefix(k, prefix) { + return mst.ErrStopWalk + } + + if delimiter != "" { + tail := k[len(prefix):] + if i := strings.Index(tail, delimiter); i >= 0 { + cp := prefix + tail[:i+len(delimiter)] + if _, dup := seenPrefix[cp]; !dup { + seenPrefix[cp] = struct{}{} + cpCopy := cp + out.commonPrefixes = append(out.commonPrefixes, types.CommonPrefix{Prefix: &cpCopy}) + if len(out.contents)+len(out.commonPrefixes) >= limit { + out.truncated = true + out.nextKey = cp + return mst.ErrStopWalk + } + } + return nil + } + } + + var mf msbucket.ObjectManifest + if err := b.read.Get(ctx, mfCid, &mf); err != nil { + return fmt.Errorf("manifest get %s: %w", mfCid, err) + } + key := k + etag := etagOf(&mf) + size := mf.Body.Size + lastModified := time.Unix(mf.Created, 0) + out.contents = append(out.contents, s3response.Object{ + Key: &key, + ETag: &etag, + Size: &size, + LastModified: &lastModified, + StorageClass: types.ObjectStorageClassStandard, + }) + if len(out.contents)+len(out.commonPrefixes) >= limit { + out.truncated = true + out.nextKey = k + return mst.ErrStopWalk + } + return nil + }) + if walkErr != nil { + return out, fmt.Errorf("s3frontend: walk: %w", walkErr) + } + return out, nil +} + +// lookupManifest is the shared HEAD/GET path: registry → MST → CBOR +// decode of the manifest pointed at by the leaf. Maps "missing +// bucket" / "missing key" to S3 errors. +func (b *Backend) lookupManifest(ctx context.Context, bucketName, key string) (*msbucket.ObjectManifest, error) { + st, err := b.reg.Get(ctx, bucketName) + if err != nil { + if errors.Is(err, registry.ErrNotFound) { + return nil, s3err.GetAPIError(s3err.ErrNoSuchBucket) + } + return nil, err + } + if !st.Root.Defined() { + return nil, s3err.GetAPIError(s3err.ErrNoSuchKey) + } + t := mst.LoadMST(b.read, st.Root) + mfCid, err := t.Get(ctx, key) + if errors.Is(err, mst.ErrNotFound) { + return nil, s3err.GetAPIError(s3err.ErrNoSuchKey) + } + if err != nil { + return nil, fmt.Errorf("s3frontend: mst get: %w", err) + } + var mf msbucket.ObjectManifest + if err := b.read.Get(ctx, mfCid, &mf); err != nil { + return nil, fmt.Errorf("s3frontend: manifest get: %w", err) + } + return &mf, nil +} + +// etagOf returns the manifest's body sha256 as a hex string with +// surrounding double quotes — the format clients expect on the wire. +// Multipart-style ETags ("-") are out of scope until multipart +// is implemented. +func etagOf(mf *msbucket.ObjectManifest) string { + return `"` + hex.EncodeToString(mf.Body.SHA256) + `"` +} diff --git a/pkg/ms3t/server.go b/pkg/ms3t/server.go new file mode 100644 index 0000000..cf003cb --- /dev/null +++ b/pkg/ms3t/server.go @@ -0,0 +1,319 @@ +package ms3t + +import ( + "context" + "errors" + "fmt" + "path/filepath" + "time" + + "github.com/ipfs/go-cid" + "github.com/multiformats/go-multihash" + "github.com/versity/versitygw/auth" + "github.com/versity/versitygw/metrics" + "github.com/versity/versitygw/s3api" + "github.com/versity/versitygw/s3api/middlewares" + "github.com/versity/versitygw/s3event" + "github.com/versity/versitygw/s3log" + "go.uber.org/zap" + + "github.com/storacha/sprue/pkg/ms3t/blockstore" + msbucket "github.com/storacha/sprue/pkg/ms3t/bucket" + "github.com/storacha/sprue/pkg/ms3t/logstore" + "github.com/storacha/sprue/pkg/ms3t/registry" + "github.com/storacha/sprue/pkg/ms3t/s3frontend" + "github.com/storacha/sprue/pkg/ms3t/uploader" +) + +// ServerConfig captures the user-facing knobs of an ms3t S3 listener. +// New() applies defaults for any zero-valued knobs. SealAge is in +// time.Duration form because callers parse the string config field +// once before constructing the server. +type ServerConfig struct { + // Addr is the host:port to bind the S3 listener to. Required. + Addr string + + // DataDir is where the log writes its segments dir; the caller + // is responsible for creating this directory before calling New. + // Required. + DataDir string + + // Region is the AWS region advertised over sigv4. Defaults to + // "us-east-1". + Region string + + // RootAccess / RootSecret configure the single-account IAM root + // user for the embedded S3 listener. Both required. + RootAccess string + RootSecret string + + // ChunkSize is the body chunk size for new objects, in bytes. + // 0 → bucket.DefaultChunkSize. + ChunkSize int64 + + // SealBytes / SealAge / Retain are passed through to logstore.Open. + // Zero values pick logstore defaults (64 MiB / 5 s / 6 segments). + SealBytes int64 + SealAge time.Duration + Retain int + + // MaxConnections / MaxRequests configure versitygw's hard + // concurrency limit. Zero is unsafe (yields 503 SlowDown on every + // request), so New substitutes a sensible default. + MaxConnections int + MaxRequests int +} + +// ServerDeps bundles the runtime collaborators of an ms3t Server +// behind interfaces. Production wiring uses Forge / Internal / +// Postgres; tests can substitute in-memory equivalents without +// standing up Postgres, piri, or the indexing-service. +type ServerDeps struct { + // Logger is optional; defaults to zap.NewNop(). + Logger *zap.Logger + + // BaseBlockReader is the bottom tier of the layered read path — + // what the log falls through to on misses. In production this is + // *blockstore.Forge (network-backed via indexer + piri); in tests + // it can be any IpldBlockstore. + BaseBlockReader blockstore.BlockReader + + // Uploader is the destination for sealed segments. + Uploader uploader.Uploader + + // Registry tracks per-bucket roots. *registry.Postgres satisfies + // both Registry and Meta in production; tests can supply two + // separate implementations or one that does both. + Registry registry.Registry + + // Meta is the persistence backing for log-segment metadata. + // Typically the same instance as Registry. + Meta logstore.Meta +} + +// Server is a fully-wired ms3t S3 listener. Use Start/Stop for +// lifecycle. fx callers wrap these in OnStart/OnStop hooks; tests +// call them directly. +type Server struct { + cfg ServerConfig + logger *zap.Logger + log blockstore.Log + backend *s3frontend.Backend + api *s3api.S3ApiServer +} + +// New wires a ServerDeps + ServerConfig into a runnable Server. The +// caller is responsible for ensuring cfg.DataDir exists before +// calling. +func New(ctx context.Context, cfg ServerConfig, deps ServerDeps) (*Server, error) { + if err := validateServerInputs(cfg, deps); err != nil { + return nil, err + } + cfg = applyServerDefaults(cfg) + + logger := deps.Logger + if logger == nil { + logger = zap.NewNop() + } + + flush := newFlushFunc(deps.Uploader, deps.Meta) + + log, err := logstore.Open(ctx, logstore.Config{ + Dir: filepath.Join(cfg.DataDir, "segments"), + Meta: deps.Meta, + SealBytes: cfg.SealBytes, + SealAge: cfg.SealAge, + Retain: cfg.Retain, + Flush: flush, + Logger: logger, + }) + if err != nil { + return nil, fmt.Errorf("ms3t: logstore: %w", err) + } + + bs := blockstore.NewLayered(log, deps.BaseBlockReader) + codec := &msbucket.FixedChunker{ChunkSize: cfg.ChunkSize} + backend := s3frontend.New(deps.Registry, bs, log, codec) + + api, err := buildS3API(ctx, backend, cfg) + if err != nil { + // Best-effort cleanup if we got past the log open: the caller + // has no Server handle to call Stop on. + _ = log.Close(ctx) + return nil, err + } + + return &Server{ + cfg: cfg, + logger: logger, + log: log, + backend: backend, + api: api, + }, nil +} + +// Start runs Backend.Recover and spawns the S3 listener goroutine. +// Returns once the listener has been kicked off (does NOT wait for +// it to start serving on Addr). +func (s *Server) Start(ctx context.Context) error { + if err := s.backend.Recover(ctx); err != nil { + return fmt.Errorf("ms3t: recover: %w", err) + } + s.logger.Info("starting ms3t S3 listener", + zap.String("addr", s.cfg.Addr), + zap.String("region", s.cfg.Region), + zap.String("data_dir", s.cfg.DataDir), + zap.Int64("chunk_size", s.cfg.ChunkSize), + ) + go func() { + if err := s.api.ServeMultiPort([]string{s.cfg.Addr}); err != nil { + s.logger.Error("ms3t listener error", zap.Error(err)) + } + }() + return nil +} + +// Stop shuts the listener down and drains the log. Always returns +// the combined error of the two operations so callers see all +// failure modes; either alone is non-fatal to the other. +func (s *Server) Stop(ctx context.Context) error { + s.logger.Info("shutting down ms3t S3 listener") + + var errs []error + if err := s.api.ShutDown(); err != nil { + errs = append(errs, fmt.Errorf("s3api shutdown: %w", err)) + } + if err := s.backend.Drain(ctx); err != nil { + errs = append(errs, fmt.Errorf("backend drain: %w", err)) + } + if len(errs) > 0 { + return fmt.Errorf("ms3t shutdown: %v", errs) + } + return nil +} + +// newFlushFunc captures uploader + meta into the closure passed to +// logstore.Open. Each sealed segment becomes one Forge round trip +// (CAR + index + indexer claim) plus one Postgres tx that flips the +// segment row to flushed and advances each affected bucket's +// forge_root_cid. +// +// The sealed CAR file is the wire payload — uploader.SubmitCAR +// streams it directly into the HTTP PUT, and the segment's +// already-computed digest and append-time position table feed +// allocate/accept and the index view without rescanning the file. +func newFlushFunc(up uploader.Uploader, meta logstore.Meta) logstore.FlushFunc { + return func(ctx context.Context, seg *logstore.Segment) error { + opRoots := seg.OpRoots() + positions := seg.BlockPositions() + if len(positions) == 0 || len(opRoots) == 0 { + // Empty or no-op segment (e.g., force-sealed during a + // quiet startup). Mark flushed so retention can sweep + // it; no Forge ship and no forge_root advance are + // needed. + return meta.MarkSegmentFlushed(ctx, seg.Seq(), time.Now().Unix(), nil) + } + // Segment stores the raw 32-byte SHA-256 of the CAR file; + // the uploader and ShardedDagIndexView want the multihash + // form (varint code + length + digest). + sha, err := multihash.Encode(seg.SHA256(), multihash.SHA2_256) + if err != nil { + return fmt.Errorf("encode segment %d sha: %w", seg.Seq(), err) + } + rootCids := make([]cid.Cid, len(opRoots)) + for i, opr := range opRoots { + rootCids[i] = opr.Root + } + src := uploader.CARSource{ + Path: seg.CARPath(), + Size: seg.Size(), + SHA256: sha, + Positions: positions, + } + if err := up.SubmitCAR(ctx, rootCids, src); err != nil { + return fmt.Errorf("submit segment %d: %w", seg.Seq(), err) + } + return meta.MarkSegmentFlushed(ctx, seg.Seq(), time.Now().Unix(), opRoots) + } +} + +// buildS3API constructs the versitygw S3ApiServer with the wiring +// ms3t needs: single-account IAM, no audit / event sinks, generous +// concurrency limits. +func buildS3API(ctx context.Context, backend *s3frontend.Backend, cfg ServerConfig) (*s3api.S3ApiServer, error) { + rootAcc := auth.Account{ + Access: cfg.RootAccess, + Secret: cfg.RootSecret, + Role: auth.RoleAdmin, + } + iam := auth.NewIAMServiceSingle(rootAcc) + + loggers, err := s3log.InitLogger(&s3log.LogConfig{}) + if err != nil { + return nil, fmt.Errorf("ms3t: loggers: %w", err) + } + evSender, err := s3event.InitEventSender(&s3event.EventConfig{}) + if err != nil { + return nil, fmt.Errorf("ms3t: event sender: %w", err) + } + mm, err := metrics.NewManager(ctx, metrics.Config{}) + if err != nil { + return nil, fmt.Errorf("ms3t: metrics: %w", err) + } + + api, err := s3api.New(backend, + middlewares.RootUserConfig{Access: rootAcc.Access, Secret: rootAcc.Secret}, + cfg.Region, iam, loggers.S3Logger, loggers.AdminLogger, evSender, mm, + s3api.WithQuiet(), + s3api.WithHealth("/health"), + s3api.WithConcurrencyLimiter(cfg.MaxConnections, cfg.MaxRequests), + ) + if err != nil { + return nil, fmt.Errorf("ms3t: s3api: %w", err) + } + return api, nil +} + +func validateServerInputs(cfg ServerConfig, deps ServerDeps) error { + if cfg.Addr == "" { + return errors.New("ms3t: ServerConfig.Addr is required") + } + if cfg.DataDir == "" { + return errors.New("ms3t: ServerConfig.DataDir is required") + } + if cfg.RootAccess == "" || cfg.RootSecret == "" { + return errors.New("ms3t: ServerConfig.RootAccess and ServerConfig.RootSecret are required") + } + if deps.BaseBlockReader == nil { + return errors.New("ms3t: ServerDeps.BaseBlockReader is required") + } + if deps.Uploader == nil { + return errors.New("ms3t: ServerDeps.Uploader is required") + } + if deps.Registry == nil { + return errors.New("ms3t: ServerDeps.Registry is required") + } + if deps.Meta == nil { + return errors.New("ms3t: ServerDeps.Meta is required") + } + return nil +} + +func applyServerDefaults(cfg ServerConfig) ServerConfig { + if cfg.Region == "" { + cfg.Region = "us-east-1" + } + if cfg.ChunkSize <= 0 { + cfg.ChunkSize = msbucket.DefaultChunkSize + } + // SealBytes / SealAge / Retain pass through to logstore.Open + // untouched; logstore.Config.defaults handles its own fallbacks. + + if cfg.MaxConnections <= 0 { + cfg.MaxConnections = 4096 + } + if cfg.MaxRequests <= 0 { + cfg.MaxRequests = 4096 + } + return cfg +} diff --git a/pkg/ms3t/testing/harness.go b/pkg/ms3t/testing/harness.go new file mode 100644 index 0000000..0ba0100 --- /dev/null +++ b/pkg/ms3t/testing/harness.go @@ -0,0 +1,443 @@ +package testing + +import ( + "context" + "fmt" + "net" + "os" + "sort" + "sync" + "time" + + block "github.com/ipfs/go-block-format" + "github.com/ipfs/go-cid" + "go.uber.org/zap" + + "github.com/storacha/sprue/pkg/ms3t" + "github.com/storacha/sprue/pkg/ms3t/blockstore" + "github.com/storacha/sprue/pkg/ms3t/logstore" + "github.com/storacha/sprue/pkg/ms3t/registry" + "github.com/storacha/sprue/pkg/ms3t/uploader" +) + +// DefaultAccessKey / DefaultSecretKey are the sigv4 credentials a +// freshly started Harness uses unless overridden via WithCredentials. +// They are not secrets — the harness binds to 127.0.0.1 only. +const ( + DefaultAccessKey = "ms3t-test-access" + DefaultSecretKey = "ms3t-test-secret" +) + +// Harness is an in-process ms3t.Server backed by in-memory deps. +// No Postgres, no piri, no indexer: a sealed segment's flush is a +// no-op that just advances bookkeeping. Sufficient for driving the +// upstream versitygw integration suite against the listener via +// Run + Suite. +type Harness struct { + Endpoint string + AccessKey string + SecretKey string + Region string + + server *ms3t.Server + dataDir string +} + +// HarnessOption customizes StartHarness. Each option mutates a +// HarnessOptions value in place. +type HarnessOption func(*harnessOptions) + +type harnessOptions struct { + logger *zap.Logger + region string + accessKey string + secretKey string + chunkSize int64 + sealBytes int64 + sealAge time.Duration + retain int + readyAfter time.Duration +} + +// WithLogger sets the zap logger handed to ms3t.Server. Default nop. +func WithLogger(l *zap.Logger) HarnessOption { + return func(o *harnessOptions) { o.logger = l } +} + +// WithRegion overrides the default "us-east-1" sigv4 region. +func WithRegion(r string) HarnessOption { + return func(o *harnessOptions) { o.region = r } +} + +// WithCredentials overrides DefaultAccessKey / DefaultSecretKey. +func WithCredentials(access, secret string) HarnessOption { + return func(o *harnessOptions) { + o.accessKey = access + o.secretKey = secret + } +} + +// WithChunkSize overrides the per-object body chunk size. +// 0 means use bucket.DefaultChunkSize. +func WithChunkSize(n int64) HarnessOption { + return func(o *harnessOptions) { o.chunkSize = n } +} + +// WithSealConfig forwards SealBytes / SealAge / Retain to logstore. +// Tests that exercise seal-on-size or seal-on-age behavior use this; +// the default leaves all three zero so logstore picks its own +// defaults. +func WithSealConfig(sealBytes int64, sealAge time.Duration, retain int) HarnessOption { + return func(o *harnessOptions) { + o.sealBytes = sealBytes + o.sealAge = sealAge + o.retain = retain + } +} + +// WithReadyTimeout caps how long StartHarness will dial the listener +// before giving up. Default 5 s. +func WithReadyTimeout(d time.Duration) HarnessOption { + return func(o *harnessOptions) { o.readyAfter = d } +} + +// StartHarness stands up an in-process ms3t.Server bound to a random +// 127.0.0.1 port and waits for it to accept TCP connections. The +// caller must call Stop to drain the log and remove scratch state. +func StartHarness(ctx context.Context, opts ...HarnessOption) (*Harness, error) { + options := harnessOptions{ + logger: zap.NewNop(), + region: "us-east-1", + accessKey: DefaultAccessKey, + secretKey: DefaultSecretKey, + readyAfter: 5 * time.Second, + } + for _, o := range opts { + o(&options) + } + + addr, err := pickFreeAddr() + if err != nil { + return nil, fmt.Errorf("ms3t harness: pick port: %w", err) + } + + dataDir, err := os.MkdirTemp("", "ms3t-harness-") + if err != nil { + return nil, fmt.Errorf("ms3t harness: tempdir: %w", err) + } + + mem := newMemStore() + + srv, err := ms3t.New(ctx, ms3t.ServerConfig{ + Addr: addr, + DataDir: dataDir, + Region: options.region, + RootAccess: options.accessKey, + RootSecret: options.secretKey, + ChunkSize: options.chunkSize, + SealBytes: options.sealBytes, + SealAge: options.sealAge, + Retain: options.retain, + }, ms3t.ServerDeps{ + Logger: options.logger, + BaseBlockReader: nopBaseReader{}, + Uploader: nopUploader{}, + Registry: mem, + Meta: mem, + }) + if err != nil { + _ = os.RemoveAll(dataDir) + return nil, fmt.Errorf("ms3t harness: New: %w", err) + } + + if err := srv.Start(ctx); err != nil { + _ = os.RemoveAll(dataDir) + return nil, fmt.Errorf("ms3t harness: Start: %w", err) + } + + if err := waitListening(ctx, addr, options.readyAfter); err != nil { + _ = srv.Stop(ctx) + _ = os.RemoveAll(dataDir) + return nil, fmt.Errorf("ms3t harness: %w", err) + } + + return &Harness{ + Endpoint: "http://" + addr, + AccessKey: options.accessKey, + SecretKey: options.secretKey, + Region: options.region, + server: srv, + dataDir: dataDir, + }, nil +} + +// Stop shuts the listener down, drains the log, and removes the +// scratch data directory. Safe to call once; subsequent calls +// no-op. Errors from each step are joined. +func (h *Harness) Stop(ctx context.Context) error { + var errs []error + if h.server != nil { + if err := h.server.Stop(ctx); err != nil { + errs = append(errs, err) + } + h.server = nil + } + if h.dataDir != "" { + if err := os.RemoveAll(h.dataDir); err != nil { + errs = append(errs, fmt.Errorf("remove dataDir: %w", err)) + } + h.dataDir = "" + } + if len(errs) > 0 { + return fmt.Errorf("ms3t harness stop: %v", errs) + } + return nil +} + +// Config returns a Config wired against the harness's listener, +// suitable for passing to Run. +func (h *Harness) Config() Config { + return Config{ + Endpoint: h.Endpoint, + AccessKey: h.AccessKey, + SecretKey: h.SecretKey, + Region: h.Region, + } +} + +// Server exposes the underlying *ms3t.Server for tests that want to +// reach past the S3 protocol layer (e.g., direct backend calls, +// log inspection). +func (h *Harness) Server() *ms3t.Server { return h.server } + +// pickFreeAddr asks the kernel for a free 127.0.0.1 port by binding +// and immediately closing. There is a small race window between +// close and ms3t's rebind, but for serial unit tests it is +// effectively zero. +func pickFreeAddr() (string, error) { + l, err := net.Listen("tcp", "127.0.0.1:0") + if err != nil { + return "", err + } + addr := l.Addr().String() + if err := l.Close(); err != nil { + return "", err + } + return addr, nil +} + +// waitListening polls TCP connect to addr until it succeeds, ctx +// is canceled, or the timeout fires. +func waitListening(ctx context.Context, addr string, timeout time.Duration) error { + deadline := time.Now().Add(timeout) + var d net.Dialer + for { + if !time.Now().Before(deadline) { + return fmt.Errorf("listener not ready at %s after %s", addr, timeout) + } + dialCtx, cancel := context.WithTimeout(ctx, 100*time.Millisecond) + conn, err := d.DialContext(dialCtx, "tcp", addr) + cancel() + if err == nil { + _ = conn.Close() + return nil + } + select { + case <-ctx.Done(): + return ctx.Err() + case <-time.After(20 * time.Millisecond): + } + } +} + +// memStore is an in-memory implementation of registry.Registry + +// logstore.Meta. The two interfaces overlap on bucket state because +// MarkSegmentFlushed advances forge_root_cid; production wires a +// single *registry.Postgres for both seams, and this fake follows +// suit so flush behavior matches. +type memStore struct { + mu sync.Mutex + buckets map[string]*registry.State + segments map[uint64]*logstore.SegmentMeta + nextSeq uint64 +} + +func newMemStore() *memStore { + return &memStore{ + buckets: map[string]*registry.State{}, + segments: map[uint64]*logstore.SegmentMeta{}, + } +} + +// Registry methods =========================================================== + +func (m *memStore) Create(_ context.Context, name string, createdAt int64) error { + m.mu.Lock() + defer m.mu.Unlock() + if _, ok := m.buckets[name]; ok { + return registry.ErrExists + } + m.buckets[name] = ®istry.State{Name: name, CreatedAt: createdAt} + return nil +} + +func (m *memStore) Get(_ context.Context, name string) (*registry.State, error) { + m.mu.Lock() + defer m.mu.Unlock() + s, ok := m.buckets[name] + if !ok { + return nil, registry.ErrNotFound + } + cp := *s + return &cp, nil +} + +func (m *memStore) List(_ context.Context) ([]*registry.State, error) { + m.mu.Lock() + defer m.mu.Unlock() + out := make([]*registry.State, 0, len(m.buckets)) + for _, s := range m.buckets { + cp := *s + out = append(out, &cp) + } + sort.Slice(out, func(i, j int) bool { return out[i].Name < out[j].Name }) + return out, nil +} + +func (m *memStore) Delete(_ context.Context, name string) error { + m.mu.Lock() + defer m.mu.Unlock() + if _, ok := m.buckets[name]; !ok { + return registry.ErrNotFound + } + delete(m.buckets, name) + return nil +} + +func (m *memStore) CASRoot(_ context.Context, name string, expect, next cid.Cid) error { + m.mu.Lock() + defer m.mu.Unlock() + s, ok := m.buckets[name] + if !ok { + return registry.ErrNotFound + } + if !s.Root.Equals(expect) { + return registry.ErrConflict + } + s.Root = next + return nil +} + +func (m *memStore) SetForgeRoot(_ context.Context, name string, root cid.Cid) error { + m.mu.Lock() + defer m.mu.Unlock() + s, ok := m.buckets[name] + if !ok { + return registry.ErrNotFound + } + s.ForgeRoot = root + return nil +} + +// Meta methods =============================================================== + +func (m *memStore) NextSegmentSeq(_ context.Context) (uint64, error) { + m.mu.Lock() + defer m.mu.Unlock() + m.nextSeq++ + return m.nextSeq, nil +} + +func (m *memStore) InsertSegmentOpen(_ context.Context, seq uint64) error { + m.mu.Lock() + defer m.mu.Unlock() + if _, ok := m.segments[seq]; ok { + return nil + } + m.segments[seq] = &logstore.SegmentMeta{Seq: seq, State: logstore.StateOpen} + return nil +} + +func (m *memStore) MarkSegmentSealed(_ context.Context, seq uint64, sealedAt int64, sizeBytes int64, sha256 []byte, opRoots []blockstore.OpRoot) error { + m.mu.Lock() + defer m.mu.Unlock() + r, ok := m.segments[seq] + if !ok || r.State != logstore.StateOpen { + return nil + } + r.State = logstore.StateSealed + r.SealedAt = sealedAt + r.SizeBytes = sizeBytes + r.SHA256 = append([]byte(nil), sha256...) + r.OpRoots = append([]blockstore.OpRoot(nil), opRoots...) + return nil +} + +func (m *memStore) MarkSegmentFlushed(_ context.Context, seq uint64, flushedAt int64, opRoots []blockstore.OpRoot) error { + m.mu.Lock() + defer m.mu.Unlock() + if r, ok := m.segments[seq]; ok { + r.State = logstore.StateFlushed + r.FlushedAt = flushedAt + } + for _, opr := range opRoots { + if b, ok := m.buckets[opr.Bucket]; ok { + b.ForgeRoot = opr.Root + } + } + return nil +} + +func (m *memStore) DeleteSegment(_ context.Context, seq uint64) error { + m.mu.Lock() + defer m.mu.Unlock() + delete(m.segments, seq) + return nil +} + +func (m *memStore) ListUnflushedSegments(_ context.Context) ([]logstore.SegmentMeta, error) { + m.mu.Lock() + defer m.mu.Unlock() + var out []logstore.SegmentMeta + for _, r := range m.segments { + if r.State == logstore.StateOpen || r.State == logstore.StateSealed { + out = append(out, *r) + } + } + sort.Slice(out, func(i, j int) bool { return out[i].Seq < out[j].Seq }) + return out, nil +} + +func (m *memStore) RehydrateSegment(_ context.Context, sm logstore.SegmentMeta) error { + m.mu.Lock() + defer m.mu.Unlock() + cp := sm + m.segments[sm.Seq] = &cp + return nil +} + +// nopBaseReader is the base tier of the layered read path for the +// harness: every miss past the log returns ErrNotFound. Production +// wires *blockstore.Forge here; tests don't have piri to talk to. +type nopBaseReader struct{} + +func (nopBaseReader) GetBlock(_ context.Context, _ cid.Cid) (block.Block, error) { + return nil, blockstore.ErrNotFound +} + +// nopUploader is the flush sink for the harness: SubmitCAR returns +// nil so the segment is marked flushed without touching the network. +type nopUploader struct{} + +func (nopUploader) SubmitCAR(_ context.Context, _ []cid.Cid, _ uploader.CARSource) error { + return nil +} + +// Compile-time guarantees the fakes still match the contracts after +// upstream interface drift. +var ( + _ registry.Registry = (*memStore)(nil) + _ logstore.Meta = (*memStore)(nil) + _ blockstore.BlockReader = nopBaseReader{} + _ uploader.Uploader = nopUploader{} +) diff --git a/pkg/ms3t/testing/harness_test.go b/pkg/ms3t/testing/harness_test.go new file mode 100644 index 0000000..7811a42 --- /dev/null +++ b/pkg/ms3t/testing/harness_test.go @@ -0,0 +1,52 @@ +package testing_test + +import ( + "context" + "net/http" + "strings" + "testing" + "time" + + "go.uber.org/zap/zaptest" + + mstesting "github.com/storacha/sprue/pkg/ms3t/testing" +) + +func TestHarnessLifecycle(t *testing.T) { + ctx, cancel := context.WithTimeout(t.Context(), 10*time.Second) + defer cancel() + + h, err := mstesting.StartHarness(ctx, mstesting.WithLogger(zaptest.NewLogger(t))) + if err != nil { + t.Fatalf("StartHarness: %v", err) + } + t.Cleanup(func() { + if err := h.Stop(t.Context()); err != nil { + t.Errorf("Stop: %v", err) + } + }) + + if !strings.HasPrefix(h.Endpoint, "http://127.0.0.1:") { + t.Fatalf("unexpected endpoint %q", h.Endpoint) + } + + // /health is wired in buildS3API; hit it to confirm the listener + // is actually serving HTTP, not just accepting TCP. + req, err := http.NewRequestWithContext(ctx, http.MethodGet, h.Endpoint+"/health", nil) + if err != nil { + t.Fatalf("NewRequest: %v", err) + } + resp, err := http.DefaultClient.Do(req) + if err != nil { + t.Fatalf("GET /health: %v", err) + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + t.Fatalf("/health status = %d, want 200", resp.StatusCode) + } + + cfg := h.Config() + if cfg.Endpoint != h.Endpoint || cfg.AccessKey != h.AccessKey || cfg.SecretKey != h.SecretKey { + t.Fatalf("Config() mismatch: %+v vs %+v", cfg, h) + } +} diff --git a/pkg/ms3t/testing/integration.go b/pkg/ms3t/testing/integration.go new file mode 100644 index 0000000..1cd4866 --- /dev/null +++ b/pkg/ms3t/testing/integration.go @@ -0,0 +1,228 @@ +// Package testing wires the upstream versitygw integration suite +// (github.com/versity/versitygw/tests/integration) against a running +// ms3t S3 listener. Callers own server lifecycle and pass connection +// details in via Config; this package only selects which upstream +// group functions to run. Each group prints its own per-test results +// to stdout; Run additionally returns a Result summarizing how many +// cases passed and failed so Go tests can fail a *testing.T when the +// suite reports any failures (see RunT). +package testing + +import ( + "context" + "fmt" + "sync" + "testing" + + "github.com/versity/versitygw/tests/integration" +) + +// Config addresses the ms3t S3 listener under test. +type Config struct { + Endpoint string + AccessKey string + SecretKey string + + // Region must match the listener's configured region. Empty + // defaults to "us-east-1". + Region string + + // Parallel queues each Run-mode test on TestState's worker + // pool instead of running serially. Sync-mode tests still + // run after the parallel batch drains. + Parallel bool + + // HostStyle uses host-style bucket addressing (bucket.host) + // instead of path-style (host/bucket). + HostStyle bool + + // VersioningEnabled tells the suite the bucket-versioning + // feature is on; group functions branch on this flag. + VersioningEnabled bool + + // SkipTLSVerify accepts self-signed certs. + SkipTLSVerify bool +} + +// Suite is an ordered list of upstream group functions. Each takes a +// *integration.TestState and dispatches its individual tests via +// ts.Run / ts.Sync. Compose ad-hoc suites by listing +// integration.TestXxx values directly: +// +// testing.Run(ctx, cfg, testing.Suite{ +// integration.TestCreateBucket, +// integration.TestPutObject, +// }) +type Suite []func(*integration.TestState) + +// Result summarizes a single suite Run. Counts are deltas — the +// versitygw counters are package-level atomics shared across every +// caller in the process, so Run snapshots them on entry and reports +// the difference. +type Result struct { + // Ran is the number of individual case functions that started. + Ran uint32 + // Passed is the number that ended in passF. + Passed uint32 + // Failed is the number that ended in failF. + Failed uint32 +} + +// Err returns a non-nil error if any case failed. Use this when the +// caller is not a *testing.T (e.g., a CLI runner). For Go tests, +// prefer RunT which propagates failures into t.Errorf directly. +func (r Result) Err() error { + if r.Failed > 0 { + return fmt.Errorf("integration suite: %d of %d cases failed", r.Failed, r.Ran) + } + return nil +} + +// runMu serializes concurrent Run calls so the global versitygw +// counters can be sampled before/after one Run without interleaving +// with another. Two parallel Run calls in the same process would +// otherwise contaminate each other's deltas. +var runMu sync.Mutex + +// Run drives suite against a fresh TestState bound to ctx and c. +// Blocks until queued (Run-mode) and deferred (Sync-mode) tests +// complete, then returns this run's case counts. +func Run(ctx context.Context, c Config, suite Suite) Result { + runMu.Lock() + defer runMu.Unlock() + + ranBefore := integration.RunCount.Load() + passedBefore := integration.PassCount.Load() + failedBefore := integration.FailCount.Load() + + ts := integration.NewTestState(ctx, newS3Conf(c), c.Parallel) + for _, group := range suite { + group(ts) + } + ts.Wait() + + return Result{ + Ran: integration.RunCount.Load() - ranBefore, + Passed: integration.PassCount.Load() - passedBefore, + Failed: integration.FailCount.Load() - failedBefore, + } +} + +// RunT is the Go-test-friendly form of Run. On any failure the +// returned Result is also reported via t.Errorf so `go test` exits +// non-zero. The per-case FAIL lines printed by versitygw are +// captured in t's log output, so the test author sees exactly which +// cases failed without RunT having to summarize them. +func RunT(t *testing.T, c Config, suite Suite) Result { + t.Helper() + r := Run(t.Context(), c, suite) + if r.Failed > 0 { + t.Errorf("integration suite: %d of %d cases failed (see test output for per-case details)", r.Failed, r.Ran) + } + return r +} + +func newS3Conf(c Config) *integration.S3Conf { + region := c.Region + if region == "" { + region = "us-east-1" + } + opts := []integration.Option{ + integration.WithEndpoint(c.Endpoint), + integration.WithAccess(c.AccessKey), + integration.WithSecret(c.SecretKey), + integration.WithRegion(region), + integration.WithTLSStatus(c.SkipTLSVerify), + } + if c.HostStyle { + opts = append(opts, integration.WithHostStyle()) + } + if c.VersioningEnabled { + opts = append(opts, integration.WithVersioningEnabled()) + } + return integration.NewS3Conf(opts...) +} + +// Smoke is the minimum subset that should pass on a working listener: +// bucket lifecycle plus single-object CRUD. +var Smoke = Suite{ + integration.TestCreateBucket, + integration.TestHeadBucket, + integration.TestListBuckets, + integration.TestPutObject, + integration.TestGetObject, + integration.TestHeadObject, + integration.TestDeleteObject, + integration.TestDeleteBucket, +} + +// CRUD covers Smoke plus listing, multi-delete, copy, and the +// GetObjectAttributes surface. Stays inside features that don't +// require multipart, versioning, ACL, policy, CORS, lock, or tagging. +var CRUD = Suite{ + integration.TestCreateBucket, + integration.TestHeadBucket, + integration.TestListBuckets, + integration.TestDeleteBucket, + integration.TestPutObject, + integration.TestHeadObject, + integration.TestGetObject, + integration.TestGetObjectAttributes, + integration.TestListObjects, + integration.TestListObjectsV2, + integration.TestCopyObject, + integration.TestDeleteObject, + integration.TestDeleteObjects, +} + +// Multipart covers the multipart-upload group set. +var Multipart = Suite{ + integration.TestCreateMultipartUpload, + integration.TestUploadPart, + integration.TestUploadPartCopy, + integration.TestListParts, + integration.TestListMultipartUploads, + integration.TestAbortMultipartUpload, + integration.TestCompleteMultipartUpload, +} + +// Tagging covers object and bucket tagging APIs. +var Tagging = Suite{ + integration.TestPutBucketTagging, + integration.TestGetBucketTagging, + integration.TestDeleteBucketTagging, + integration.TestPutObjectTagging, + integration.TestGetObjectTagging, + integration.TestDeleteObjectTagging, +} + +// ObjectLock covers retention, legal hold, lock config, and +// WORM-protection groups. +var ObjectLock = Suite{ + integration.TestPutObjectLockConfiguration, + integration.TestGetObjectLockConfiguration, + integration.TestPutObjectRetention, + integration.TestGetObjectRetention, + integration.TestPutObjectLegalHold, + integration.TestGetObjectLegalHold, + integration.TestWORMProtection, +} + +// Versioning runs the version-aware group. Set +// Config.VersioningEnabled = true. +var Versioning = Suite{ + integration.TestVersioning, + integration.TestVersioningDisabled, + integration.TestListObjectVersions_VD, +} + +// Auth runs sigv4 + presigned-URL authentication groups. +var Auth = Suite{ + integration.TestAuthentication, + integration.TestPresignedAuthentication, +} + +// Full is the upstream TestFullFlow rolled-up suite — the +// "how-far-from-full-compatibility" gauge. Expect noisy failures +// until ms3t closes the gaps tracked by the focused suites above. +var Full = Suite{integration.TestFullFlow} diff --git a/pkg/ms3t/testing/smoke_test.go b/pkg/ms3t/testing/smoke_test.go new file mode 100644 index 0000000..1f35ab2 --- /dev/null +++ b/pkg/ms3t/testing/smoke_test.go @@ -0,0 +1,374 @@ +package testing + +import ( + "context" + "testing" + + "github.com/versity/versitygw/tests/integration" + "go.uber.org/zap/zaptest" +) + +// smokeCase pairs an upstream versitygw integration case with its +// subtest name. Each TestSmoke_* / TestSmokeXFail_* function below +// declares its cases inline as a []smokeCase, so GoLand (and any +// other IDE that parses table-driven Go tests) renders one +// play-icon per row in the gutter. +type smokeCase struct { + name string + fn integration.IntTest +} + +// Layout: one top-level test per S3 group, in two flavors: +// +// TestSmoke_ — known-passing cases (every case must pass) +// TestSmokeXFail_ — cases ms3t fails today; each one is +// expected to fail and reported as SKIP. +// An unexpected pass errors so the case +// can be promoted. +// +// Adding a case: when a fix lands, run the matching TestSmokeXFail_*. +// Cases that flip green will report "case unexpectedly passed" — move +// the line from the XFail function to the matching TestSmoke_* one. +// +// Each top-level test boots its own Harness (via smokeHarness), so +// failures in one group can't leak buckets / segments / op-roots +// into another. Cases within a group share one harness because the +// upstream cases create + tear down their own buckets internally. + +// smokeHarness boots a Harness scoped to t and registers cleanup. +func smokeHarness(t *testing.T) *Harness { + t.Helper() + h, err := StartHarness(t.Context(), WithLogger(zaptest.NewLogger(t))) + if err != nil { + t.Fatalf("StartHarness: %v", err) + } + t.Cleanup(func() { _ = h.Stop(context.Background()) }) + return h +} + +// ============================================================= +// Known-passing cases +// ============================================================= + +func TestSmoke_CreateBucket(t *testing.T) { + tests := []smokeCase{ + {"invalid_bucket_name", integration.CreateBucket_invalid_bucket_name}, + {"invalid_canned_acl", integration.CreateBucket_invalid_canned_acl}, + {"invalid_location_constraint", integration.CreateBucket_invalid_location_constraint}, + {"invalid_ownership", integration.CreateBucket_invalid_ownership}, + {"ownership_with_acl", integration.CreateBucket_ownership_with_acl}, + {"success", integration.CreateBucket_success}, + } + s3conf := newS3Conf(smokeHarness(t).Config()) + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if err := tt.fn(s3conf); err != nil { + t.Fatalf("%v", err) + } + }) + } +} + +func TestSmoke_HeadBucket(t *testing.T) { + tests := []smokeCase{ + {"non_existing_bucket", integration.HeadBucket_non_existing_bucket}, + {"success", integration.HeadBucket_success}, + } + s3conf := newS3Conf(smokeHarness(t).Config()) + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if err := tt.fn(s3conf); err != nil { + t.Fatalf("%v", err) + } + }) + } +} + +func TestSmoke_ListBuckets(t *testing.T) { + tests := []smokeCase{ + {"empty_success", integration.ListBuckets_empty_success}, + {"invalid_max_buckets", integration.ListBuckets_invalid_max_buckets}, + {"success", integration.ListBuckets_success}, + {"truncated", integration.ListBuckets_truncated}, + {"with_prefix", integration.ListBuckets_with_prefix}, + } + s3conf := newS3Conf(smokeHarness(t).Config()) + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if err := tt.fn(s3conf); err != nil { + t.Fatalf("%v", err) + } + }) + } +} + +func TestSmoke_DeleteBucket(t *testing.T) { + tests := []smokeCase{ + {"incorrect_expected_bucket_owner", integration.DeleteBucket_incorrect_expected_bucket_owner}, + {"non_empty_bucket", integration.DeleteBucket_non_empty_bucket}, + {"non_existing_bucket", integration.DeleteBucket_non_existing_bucket}, + {"success_status_code", integration.DeleteBucket_success_status_code}, + } + s3conf := newS3Conf(smokeHarness(t).Config()) + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if err := tt.fn(s3conf); err != nil { + t.Fatalf("%v", err) + } + }) + } +} + +func TestSmoke_PutObject(t *testing.T) { + tests := []smokeCase{ + {"checksum_algorithm_and_header_mismatch", integration.PutObject_checksum_algorithm_and_header_mismatch}, + {"default_content_type", integration.PutObject_default_content_type}, + {"false_negative_object_names", integration.PutObject_false_negative_object_names}, + {"invalid_checksum_header", integration.PutObject_invalid_checksum_header}, + {"invalid_legal_hold", integration.PutObject_invalid_legal_hold}, + {"invalid_object_lock_mode", integration.PutObject_invalid_object_lock_mode}, + {"invalid_object_names", integration.PutObject_invalid_object_names}, + {"invalid_retain_until_date", integration.PutObject_invalid_retain_until_date}, + {"long_metadata", integration.PutObject_long_metadata}, + {"missing_object_lock_retention_config", integration.PutObject_missing_object_lock_retention_config}, + {"multiple_checksum_headers", integration.PutObject_multiple_checksum_headers}, + {"non_existing_bucket", integration.PutObject_non_existing_bucket}, + {"past_retain_until_date", integration.PutObject_past_retain_until_date}, + {"racey_success", integration.PutObject_racey_success}, + {"special_chars", integration.PutObject_special_chars}, + } + s3conf := newS3Conf(smokeHarness(t).Config()) + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if err := tt.fn(s3conf); err != nil { + t.Fatalf("%v", err) + } + }) + } +} + +func TestSmoke_GetObject(t *testing.T) { + tests := []smokeCase{ + {"by_range_resp_status", integration.GetObject_by_range_resp_status}, + {"dir_with_range", integration.GetObject_dir_with_range}, + {"directory_object_noslash", integration.GetObject_directory_object_noslash}, + {"empty_object_part_number_1", integration.GetObject_empty_object_part_number_1}, + {"invalid_parent", integration.GetObject_invalid_parent}, + {"invalid_part_number", integration.GetObject_invalid_part_number}, + {"non_existing_dir_object", integration.GetObject_non_existing_dir_object}, + {"non_existing_key", integration.GetObject_non_existing_key}, + {"not_enabled_checksum_mode", integration.GetObject_not_enabled_checksum_mode}, + {"overrides_presign_success", integration.GetObject_overrides_presign_success}, + {"overrides_success", integration.GetObject_overrides_success}, + {"range_and_part_number", integration.GetObject_range_and_part_number}, + {"with_range", integration.GetObject_with_range}, + {"zero_len_with_range", integration.GetObject_zero_len_with_range}, + } + s3conf := newS3Conf(smokeHarness(t).Config()) + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if err := tt.fn(s3conf); err != nil { + t.Fatalf("%v", err) + } + }) + } +} + +func TestSmoke_HeadObject(t *testing.T) { + tests := []smokeCase{ + {"conditional_reads", integration.HeadObject_conditional_reads}, + {"directory_object_noslash", integration.HeadObject_directory_object_noslash}, + {"empty_object_part_number_1", integration.HeadObject_empty_object_part_number_1}, + {"invalid_parent_dir", integration.HeadObject_invalid_parent_dir}, + {"invalid_part_number", integration.HeadObject_invalid_part_number}, + {"non_existing_dir_object", integration.HeadObject_non_existing_dir_object}, + {"non_existing_object", integration.HeadObject_non_existing_object}, + {"not_enabled_checksum_mode", integration.HeadObject_not_enabled_checksum_mode}, + {"overrides_presign_success", integration.HeadObject_overrides_presign_success}, + {"overrides_success", integration.HeadObject_overrides_success}, + {"range_and_part_number", integration.HeadObject_range_and_part_number}, + } + s3conf := newS3Conf(smokeHarness(t).Config()) + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if err := tt.fn(s3conf); err != nil { + t.Fatalf("%v", err) + } + }) + } +} + +func TestSmoke_DeleteObject(t *testing.T) { + tests := []smokeCase{ + {"directory_object", integration.DeleteObject_directory_object}, + {"directory_object_noslash", integration.DeleteObject_directory_object_noslash}, + {"expected_bucket_owner", integration.DeleteObject_expected_bucket_owner}, + {"incorrect_expected_bucket_owner", integration.DeleteObject_incorrect_expected_bucket_owner}, + {"non_empty_dir_obj", integration.DeleteObject_non_empty_dir_obj}, + {"non_existing_dir_object", integration.DeleteObject_non_existing_dir_object}, + {"non_existing_object", integration.DeleteObject_non_existing_object}, + {"success", integration.DeleteObject_success}, + {"success_status_code", integration.DeleteObject_success_status_code}, + } + s3conf := newS3Conf(smokeHarness(t).Config()) + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if err := tt.fn(s3conf); err != nil { + t.Fatalf("%v", err) + } + }) + } +} + +// ============================================================= +// Known-failing cases (XFail) +// ============================================================= + +func TestSmokeXFail_CreateBucket(t *testing.T) { + tests := []smokeCase{ + {"as_user", integration.CreateBucket_as_user}, + {"default_acl", integration.CreateBucket_default_acl}, + {"default_object_lock", integration.CreateBucket_default_object_lock}, + {"duplicate_keys", integration.CreateBucket_duplicate_keys}, + {"existing_bucket", integration.CreateBucket_existing_bucket}, + {"invalid_tags", integration.CreateBucket_invalid_tags}, + {"long_tags", integration.CreateBucket_long_tags}, + {"non_default_acl", integration.CreateBucket_non_default_acl}, + {"owned_by_you", integration.CreateBucket_owned_by_you}, + {"private_canned_acl", integration.CreateBucket_private_canned_acl}, + {"private_canned_acl_bucket_owner_enforced_ownership", integration.CreateBucket_private_canned_acl_bucket_owner_enforced_ownership}, + {"tag_count_limit", integration.CreateBucket_tag_count_limit}, + } + s3conf := newS3Conf(smokeHarness(t).Config()) + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + err := tt.fn(s3conf) + if err == nil { + t.Errorf("case unexpectedly passed; promote it from TestSmokeXFail_CreateBucket to TestSmoke_CreateBucket") + return + } + t.Skipf("known-failing: %v", err) + }) + } +} + +func TestSmokeXFail_ListBuckets(t *testing.T) { + tests := []smokeCase{ + {"as_admin", integration.ListBuckets_as_admin}, + {"as_user", integration.ListBuckets_as_user}, + } + s3conf := newS3Conf(smokeHarness(t).Config()) + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + err := tt.fn(s3conf) + if err == nil { + t.Errorf("case unexpectedly passed; promote it from TestSmokeXFail_ListBuckets to TestSmoke_ListBuckets") + return + } + t.Skipf("known-failing: %v", err) + }) + } +} + +func TestSmokeXFail_PutObject(t *testing.T) { + tests := []smokeCase{ + {"checksums_success", integration.PutObject_checksums_success}, + {"conditional_writes", integration.PutObject_conditional_writes}, + {"default_checksum", integration.PutObject_default_checksum}, + {"dir_object_checksums_success", integration.PutObject_dir_object_checksums_success}, + {"dir_object_default_checksum", integration.PutObject_dir_object_default_checksum}, + {"incorrect_checksums", integration.PutObject_incorrect_checksums}, + {"invalid_credentials", integration.PutObject_invalid_credentials}, + {"missing_bucket_lock", integration.PutObject_missing_bucket_lock}, + {"object_acl_not_supported", integration.PutObject_object_acl_not_supported}, + {"should_combine_metadata", integration.PutObject_should_combine_metadata}, + {"success", integration.PutObject_success}, + {"tagging", integration.PutObject_tagging}, + {"with_metadata", integration.PutObject_with_metadata}, + {"with_object_lock", integration.PutObject_with_object_lock}, + } + s3conf := newS3Conf(smokeHarness(t).Config()) + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + err := tt.fn(s3conf) + if err == nil { + t.Errorf("case unexpectedly passed; promote it from TestSmokeXFail_PutObject to TestSmoke_PutObject") + return + } + t.Skipf("known-failing: %v", err) + }) + } +} + +func TestSmokeXFail_GetObject(t *testing.T) { + tests := []smokeCase{ + {"checksums", integration.GetObject_checksums}, + {"conditional_reads", integration.GetObject_conditional_reads}, + {"dir_object_checksum", integration.GetObject_dir_object_checksum}, + {"directory_success", integration.GetObject_directory_success}, + {"large_object", integration.GetObject_large_object}, + {"mp_part_number_exceeds_parts_count", integration.GetObject_mp_part_number_exceeds_parts_count}, + {"mp_part_number_resp_status", integration.GetObject_mp_part_number_resp_status}, + {"mp_part_number_success", integration.GetObject_mp_part_number_success}, + {"non_mp_part_number_1_success", integration.GetObject_non_mp_part_number_1_success}, + {"overrides_fail_public", integration.GetObject_overrides_fail_public}, + {"ranged_with_checksum_mode", integration.GetObject_ranged_with_checksum_mode}, + {"success", integration.GetObject_success}, + } + s3conf := newS3Conf(smokeHarness(t).Config()) + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + err := tt.fn(s3conf) + if err == nil { + t.Errorf("case unexpectedly passed; promote it from TestSmokeXFail_GetObject to TestSmoke_GetObject") + return + } + t.Skipf("known-failing: %v", err) + }) + } +} + +func TestSmokeXFail_HeadObject(t *testing.T) { + tests := []smokeCase{ + {"by_range_resp_status", integration.HeadObject_by_range_resp_status}, + {"checksums", integration.HeadObject_checksums}, + {"dir_with_range", integration.HeadObject_dir_with_range}, + {"mp_part_number_exceeds_parts_count", integration.HeadObject_mp_part_number_exceeds_parts_count}, + {"mp_part_number_resp_status", integration.HeadObject_mp_part_number_resp_status}, + {"mp_part_number_success", integration.HeadObject_mp_part_number_success}, + {"non_mp_part_number_1_success", integration.HeadObject_non_mp_part_number_1_success}, + {"overrides_fail_public", integration.HeadObject_overrides_fail_public}, + {"ranged_with_checksum_mode", integration.HeadObject_ranged_with_checksum_mode}, + {"success", integration.HeadObject_success}, + {"with_range", integration.HeadObject_with_range}, + {"zero_len_with_range", integration.HeadObject_zero_len_with_range}, + } + s3conf := newS3Conf(smokeHarness(t).Config()) + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + err := tt.fn(s3conf) + if err == nil { + t.Errorf("case unexpectedly passed; promote it from TestSmokeXFail_HeadObject to TestSmoke_HeadObject") + return + } + t.Skipf("known-failing: %v", err) + }) + } +} + +func TestSmokeXFail_DeleteObject(t *testing.T) { + tests := []smokeCase{ + {"conditional_writes", integration.DeleteObject_conditional_writes}, + } + s3conf := newS3Conf(smokeHarness(t).Config()) + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + err := tt.fn(s3conf) + if err == nil { + t.Errorf("case unexpectedly passed; promote it from TestSmokeXFail_DeleteObject to TestSmoke_DeleteObject") + return + } + t.Skipf("known-failing: %v", err) + }) + } +} diff --git a/pkg/ms3t/uploader/forge.go b/pkg/ms3t/uploader/forge.go new file mode 100644 index 0000000..4a17f5d --- /dev/null +++ b/pkg/ms3t/uploader/forge.go @@ -0,0 +1,474 @@ +package uploader + +import ( + "bytes" + "context" + "crypto/ed25519" + "errors" + "fmt" + "io" + nethttp "net/http" + "os" + + "github.com/ipfs/go-cid" + "github.com/ipld/go-ipld-prime/datamodel" + "github.com/ipld/go-ipld-prime/fluent/qp" + cidlink "github.com/ipld/go-ipld-prime/linking/cid" + basicnode "github.com/ipld/go-ipld-prime/node/basic" + "github.com/multiformats/go-multicodec" + "github.com/multiformats/go-multihash" + "github.com/storacha/go-libstoracha/blobindex" + httpcap "github.com/storacha/go-libstoracha/capabilities/http" + spaceblobcap "github.com/storacha/go-libstoracha/capabilities/space/blob" + contentcap "github.com/storacha/go-libstoracha/capabilities/space/content" + captypes "github.com/storacha/go-libstoracha/capabilities/types" + "github.com/storacha/go-ucanto/core/delegation" + "github.com/storacha/go-ucanto/core/invocation" + "github.com/storacha/go-ucanto/did" + "github.com/storacha/go-ucanto/principal" + ed25519signer "github.com/storacha/go-ucanto/principal/ed25519/signer" + "github.com/storacha/go-ucanto/ucan" + "go.uber.org/zap" + + "github.com/storacha/sprue/pkg/indexerclient" + "github.com/storacha/sprue/pkg/ms3t/blockstore" + "github.com/storacha/sprue/pkg/piriclient" + "github.com/storacha/sprue/pkg/routing" +) + +// placeholderCID is the smallest legal raw-codec CID with an +// identity-hashed two-byte payload. It mirrors guppy's internal +// PlaceholderCID and is used as the "root" for the +// ShardedDagIndexView and the SpaceIndexAdd invocation: the index's +// Content() field and SpaceIndexAdd's rootCID parameter aren't +// load-bearing for inner-CID lookups (per guppy's own usage), so +// instead of inventing a synthetic root for each multi-root CAR we +// just pass this placeholder through. +var placeholderCID = cid.NewCidV1(cid.Raw, []byte{0x00, 0x00}) + +// Uploader is the seam between the log flusher and durable Forge +// storage. +type Uploader interface { + // SubmitCAR ships one sealed CAR file (one log segment) to + // Forge. The implementation streams the file body straight + // into the HTTP PUT, never materializing it as a []block.Block + // or re-encoding it as a CAR. + SubmitCAR(ctx context.Context, roots []cid.Cid, src CARSource) error +} + +// CARSource describes a sealed CAR file ready to ship. All fields +// refer to data that already exists on disk or was precomputed at +// seal time, so the uploader's per-flush memory footprint is +// dominated by HTTP send buffers rather than segment size. +type CARSource struct { + // Path is the absolute path to the sealed CAR file. SubmitCAR + // streams from this path into the HTTP PUT body. + Path string + // Size is the file's byte length. Set as the request's + // Content-Length so net/http does not fall back to chunked + // transfer encoding (piri requires Content-Length). + Size int64 + // SHA256 is the SHA-256 multihash of the CAR's bytes. Computed + // once at seal time and reused both as the blob digest in + // allocate / accept and as the CAR digest the + // ShardedDagIndexView is keyed by. + SHA256 multihash.Multihash + // Positions maps each block's CID to its offset/length inside + // the CAR file, in the same shape Log appends populate at + // write time. Used to build the index view without rescanning. + Positions map[cid.Cid]blockstore.BlockLoc +} + +// Forge is an Uploader that ships CARs to Forge from inside sprue, +// using sprue's own piriclient and indexerclient. No UCAN-over-HTTP +// loopback to sprue's own UCAN endpoint, no separate principal or +// delegation file: sprue's identity is the signer, and storage +// provider delegations are pulled live from sprue's routing service. +// +// One Submit: +// +// 1. Encode the CAR for this batch (with byte positions for each +// inner block). +// 2. Allocate + HTTP PUT + Accept the CAR through a piri selected +// by routing.Service. +// 3. Build a ShardedDagIndexView and archive it. +// 4. Allocate + HTTP PUT + Accept the index through a piri. +// 5. PublishIndexClaim against the indexing-service. +// +// Steps 2 and 4 share a helper that synthesizes the cause and put +// invocations that the existing space_blob_add handler builds from +// the inbound user UCAN. Here there's no inbound user UCAN — sprue's +// signer self-issues them so the audit shape matches. +type Forge struct { + router *routing.Service + piriProvider piriclient.Provider + indexerClient *indexerclient.Client + signer principal.Signer + spaceSigner principal.Signer + httpClient *nethttp.Client + logger *zap.Logger +} + +// ForgeConfig wires sprue's existing services into an Forge +// uploader. All fields are required. +// +// Signer is sprue's upload-service identity — used for piriclient +// invocations and as the audience of the self-issued retrieval +// delegation. +// +// SpaceSigner is the keypair of the space ms3t owns. ms3t generates +// and persists this on first run; its DID is the space resource for +// every PUT, and it acts as the root authority for self-issued +// space/content/retrieve delegations (so the indexer can fetch the +// index blob from piri on assert/index validation). +type ForgeConfig struct { + Router *routing.Service + PiriProvider piriclient.Provider + IndexerClient *indexerclient.Client + Signer principal.Signer + SpaceSigner principal.Signer + HTTPClient *nethttp.Client // optional; defaults to nethttp.DefaultClient + Logger *zap.Logger +} + +// NewForge validates the config and returns an Uploader that +// writes through sprue's internal services. +func NewForge(cfg ForgeConfig) (*Forge, error) { + if cfg.Router == nil { + return nil, errors.New("uploader: routing service is required") + } + if cfg.PiriProvider == nil { + return nil, errors.New("uploader: piri provider is required") + } + if cfg.IndexerClient == nil { + return nil, errors.New("uploader: indexer client is required") + } + if cfg.Signer == nil { + return nil, errors.New("uploader: signer is required") + } + if cfg.SpaceSigner == nil { + return nil, errors.New("uploader: space signer is required") + } + httpc := cfg.HTTPClient + if httpc == nil { + httpc = nethttp.DefaultClient + } + logger := cfg.Logger + if logger == nil { + logger = zap.NewNop() + } + return &Forge{ + router: cfg.Router, + piriProvider: cfg.PiriProvider, + indexerClient: cfg.IndexerClient, + signer: cfg.Signer, + spaceSigner: cfg.SpaceSigner, + httpClient: httpc, + logger: logger, + }, nil +} + +// SpaceDID returns the DID of the space ms3t owns. +func (u *Forge) SpaceDID() did.DID { return u.spaceSigner.DID() } + +func (u *Forge) SubmitCAR(ctx context.Context, roots []cid.Cid, src CARSource) error { + if len(roots) == 0 { + return errors.New("uploader: at least one root required") + } + if src.Size <= 0 || len(src.Positions) == 0 { + return nil + } + + // 1. PUT the data CAR by streaming from disk. The sealed CAR + // file is byte-identical to what cars.WriteWithPositions + // would produce here (same placeholder header, same block + // order), and the seal step already hashed it — so we skip + // re-encoding and rehashing entirely. + putCAR := func(url string, headers nethttp.Header) error { + return httpPutFile(ctx, u.httpClient, url, headers, src.Path, src.Size) + } + if err := u.uploadBlob(ctx, src.SHA256, uint64(src.Size), putCAR); err != nil { + return fmt.Errorf("uploader: ship car: %w", err) + } + + // 2. Build a ShardedDagIndexView keyed off the CAR's multihash, + // using the precomputed positions from the segment. + view := blobindex.NewShardedDagIndexView(cidlink.Link{Cid: placeholderCID}, 1) + for c, loc := range src.Positions { + view.SetSlice(src.SHA256, c.Hash(), blobindex.Position{ + Offset: loc.Offset, + Length: loc.Length, + }) + } + archReader, err := view.Archive() + if err != nil { + return fmt.Errorf("uploader: archive index: %w", err) + } + indexBytes, err := io.ReadAll(archReader) + if err != nil { + return fmt.Errorf("uploader: read archived index: %w", err) + } + indexDigest, err := multihash.Sum(indexBytes, multihash.SHA2_256, -1) + if err != nil { + return fmt.Errorf("uploader: hash index: %w", err) + } + + // 3. PUT the index blob. Small (one entry per inner CID), so + // in-memory is fine. + putIndex := func(url string, headers nethttp.Header) error { + return httpPut(ctx, u.httpClient, url, headers, indexBytes) + } + if err := u.uploadBlob(ctx, indexDigest, uint64(len(indexBytes)), putIndex); err != nil { + return fmt.Errorf("uploader: ship index: %w", err) + } + + // 4. Publish the index claim. The indexer needs to fetch our + // index blob from piri to validate the assertion, and piri + // requires UCAN auth on retrieval. We self-issue a + // space/content/retrieve delegation scoped to this specific + // index blob and pass it as clientAuth; sprue's + // indexerclient re-delegates from us to the indexer using + // that as the proof chain (mirrors the user-facing flow, + // just with sprue's signer playing the user's role). + indexCID := cid.NewCidV1(uint64(multicodec.Car), indexDigest) + retrievalAuth, err := contentcap.Retrieve.Delegate( + u.spaceSigner, // issuer = space owner (root authority) + u.signer, // audience = sprue (next hop) + u.SpaceDID().String(), + contentcap.RetrieveCaveats{ + Blob: contentcap.BlobDigest{Digest: indexDigest}, + Range: contentcap.Range{Start: 0, End: uint64(len(indexBytes)) - 1}, + }, + delegation.WithNoExpiration(), + ) + if err != nil { + return fmt.Errorf("uploader: build retrieval auth: %w", err) + } + if err := u.indexerClient.PublishIndexClaim(ctx, u.SpaceDID(), placeholderCID, indexCID, retrievalAuth); err != nil { + return fmt.Errorf("uploader: publish index claim: %w", err) + } + return nil +} + +// uploadBlob runs the allocate → PUT → accept dance for one blob. +// putBody is invoked at most once per call, after a successful +// Allocate, with the URL and headers piri returned. The retry loop +// only re-runs Allocate (on ErrCandidateUnavailable), never the +// PUT itself, so a streaming putBody can safely consume its source +// in one shot. If Allocate reports the blob is already present +// (Address == nil), putBody is skipped entirely and accept proceeds. +func (u *Forge) uploadBlob( + ctx context.Context, + digest multihash.Multihash, + size uint64, + putBody func(url string, headers nethttp.Header) error, +) error { + blob := captypes.Blob{Digest: digest, Size: size} + + // Synthesize a self-issued space/blob/add invocation as the cause. + // Its link feeds the audit chain piri's handlers expect; never sent + // over the wire. + causeInv, err := spaceblobcap.Add.Invoke( + u.signer, u.signer, u.SpaceDID().String(), + spaceblobcap.AddCaveats{Blob: blob}, + ) + if err != nil { + return fmt.Errorf("synthesize cause: %w", err) + } + cause := causeInv.Link() + + var exclusions []ucan.Principal + for { + provider, err := u.router.SelectStorageProvider(ctx, blob, routing.WithExclusions(exclusions...)) + if err != nil { + return fmt.Errorf("select provider: %w", err) + } + log := u.logger.With( + zap.Stringer("provider", provider.ID.DID()), + zap.String("endpoint", provider.Endpoint.String()), + ) + + client, err := u.piriProvider.Client(provider.ID, provider.Endpoint) + if err != nil { + return fmt.Errorf("piri client: %w", err) + } + fetcher := internalDelegationFetcher{proof: provider.Proof} + + allocResp, allocInv, _, err := client.Allocate(ctx, &piriclient.AllocateRequest{ + Space: u.SpaceDID(), + Digest: digest, + Size: blob.Size, + Cause: cause, + }, fetcher) + if err != nil { + if errors.Is(err, routing.ErrCandidateUnavailable) { + log.Warn("provider unavailable, excluding and retrying", zap.Error(err)) + exclusions = append(exclusions, provider.ID) + continue + } + return fmt.Errorf("allocate: %w", err) + } + + // PUT bytes if piri allocated a fresh slot. If Address is nil + // piri already has the blob; skip the upload. + if allocResp.Address != nil { + if err := putBody(allocResp.Address.URL.String(), allocResp.Address.Headers); err != nil { + return fmt.Errorf("http put: %w", err) + } + } + + // Synthesize the http/put invocation (matches genPut in + // sprue/pkg/service/handlers/space_blob_add.go) so Accept has + // a stable Put link to chain off. + putInv, err := synthesizePut(blob, allocInv) + if err != nil { + return fmt.Errorf("synthesize put: %w", err) + } + + if _, _, _, err := client.Accept(ctx, &piriclient.AcceptRequest{ + Space: u.SpaceDID(), + Digest: digest, + Size: blob.Size, + Put: putInv.Link(), + }, fetcher); err != nil { + return fmt.Errorf("accept: %w", err) + } + return nil + } +} + +// synthesizePut mirrors genPut in space_blob_add.go: derive a +// principal from the blob's digest, issue an http/put invocation +// with caveats that promise to fulfill from the alloc invocation's +// effects. The invocation is never executed; we only need its Link +// for AcceptRequest.Put. +func synthesizePut(blob captypes.Blob, allocInv invocation.Invocation) (invocation.Invocation, error) { + provider, err := deriveDIDFromDigest(blob.Digest) + if err != nil { + return nil, err + } + fct := httpPutFact{id: provider.DID().String(), key: provider.Encode()} + return httpcap.Put.Invoke( + provider, provider, provider.DID().String(), + httpcap.PutCaveats{ + URL: captypes.Promise{ + UcanAwait: captypes.Await{ + Selector: ".out.ok.address.url", + Link: allocInv.Link(), + }, + }, + Headers: captypes.Promise{ + UcanAwait: captypes.Await{ + Selector: ".out.ok.address.headers", + Link: allocInv.Link(), + }, + }, + Body: httpcap.Body{Digest: blob.Digest, Size: blob.Size}, + }, + delegation.WithFacts([]ucan.FactBuilder{fct}), + ) +} + +func httpPut(ctx context.Context, client *nethttp.Client, urlStr string, headers nethttp.Header, body []byte) error { + req, err := nethttp.NewRequestWithContext(ctx, nethttp.MethodPut, urlStr, bytes.NewReader(body)) + if err != nil { + return err + } + for k, v := range headers { + if len(v) > 0 { + req.Header.Set(k, v[0]) + } + } + resp, err := client.Do(req) + if err != nil { + return err + } + defer resp.Body.Close() + if resp.StatusCode < 200 || resp.StatusCode >= 300 { + return fmt.Errorf("http put status %s", resp.Status) + } + return nil +} + +// httpPutFile streams a file body to the given URL. Setting +// req.ContentLength explicitly keeps net/http from defaulting to +// chunked transfer encoding on a non-Reader body — piri's PUT +// endpoint requires Content-Length. +func httpPutFile(ctx context.Context, client *nethttp.Client, urlStr string, headers nethttp.Header, path string, size int64) error { + f, err := os.Open(path) + if err != nil { + return fmt.Errorf("open car %s: %w", path, err) + } + defer f.Close() + + req, err := nethttp.NewRequestWithContext(ctx, nethttp.MethodPut, urlStr, f) + if err != nil { + return err + } + req.ContentLength = size + for k, v := range headers { + if len(v) > 0 { + req.Header.Set(k, v[0]) + } + } + resp, err := client.Do(req) + if err != nil { + return err + } + defer resp.Body.Close() + if resp.StatusCode < 200 || resp.StatusCode >= 300 { + return fmt.Errorf("http put status %s", resp.Status) + } + return nil +} + +// internalDelegationFetcher matches the shape of the unexported +// delegationFetcher in space_blob_add.go: returns the storage +// provider's pre-issued delegation when the audience matches. +type internalDelegationFetcher struct { + proof delegation.Delegation +} + +func (df internalDelegationFetcher) GetDelegation(ctx context.Context, audience ucan.Principal) (delegation.Delegation, error) { + if df.proof == nil { + return nil, nil + } + if df.proof.Audience().DID() != audience.DID() { + return nil, fmt.Errorf("delegation audience is %s, but invocation requires proof with audience %s", + df.proof.Audience().DID(), audience.DID()) + } + return df.proof, nil +} + +// deriveDIDFromDigest mirrors deriveDID in space_blob_add.go. The +// derived principal is deterministic per digest. +func deriveDIDFromDigest(digest multihash.Multihash) (principal.Signer, error) { + if len(digest) < ed25519.SeedSize { + return nil, fmt.Errorf("digest too short for ed25519 seed: %d < %d", len(digest), ed25519.SeedSize) + } + seed := digest[len(digest)-ed25519.SeedSize:] + pk := ed25519.NewKeyFromSeed(seed) + return ed25519signer.FromRaw(pk) +} + +// httpPutFact mirrors the unexported fact in space_blob_add.go. +// Embeds the derived principal's keys so downstream actors can +// re-derive and sign receipts. +type httpPutFact struct { + id string + key []byte +} + +func (hpf httpPutFact) ToIPLD() (map[string]datamodel.Node, error) { + keys, err := qp.BuildMap(basicnode.Prototype.Any, 1, func(ma datamodel.MapAssembler) { + qp.MapEntry(ma, hpf.id, qp.Bytes(hpf.key)) + }) + if err != nil { + return nil, err + } + return map[string]datamodel.Node{ + "keys": keys, + }, nil +} + +var _ Uploader = (*Forge)(nil) diff --git a/pkg/ms3t/util.go b/pkg/ms3t/util.go new file mode 100644 index 0000000..133b821 --- /dev/null +++ b/pkg/ms3t/util.go @@ -0,0 +1,42 @@ +package ms3t + +import ( + "fmt" + "os" + + "github.com/storacha/go-ucanto/principal" + "github.com/storacha/go-ucanto/principal/ed25519/signer" +) + +// LoadOrCreateSigner reads a persisted principal.Signer from path or +// generates and writes a fresh one if the file does not exist. The +// on-disk format is the canonical did:key string representation +// (signer.Format). +// +// The returned signer's DID is what the operator passes to a delegator +// when requesting a `space/blob/add` + `space/index/add` delegation. +func LoadOrCreateSigner(path string) (principal.Signer, error) { + data, err := os.ReadFile(path) + if os.IsNotExist(err) { + s, err := signer.Generate() + if err != nil { + return nil, fmt.Errorf("uploader: generate signer: %w", err) + } + formatted, err := signer.Format(s) + if err != nil { + return nil, fmt.Errorf("uploader: format signer: %w", err) + } + if err := os.WriteFile(path, []byte(formatted), 0o600); err != nil { + return nil, fmt.Errorf("uploader: persist signer: %w", err) + } + return s, nil + } + if err != nil { + return nil, fmt.Errorf("uploader: read signer: %w", err) + } + s, err := signer.Parse(string(data)) + if err != nil { + return nil, fmt.Errorf("uploader: parse signer: %w", err) + } + return s, nil +}