diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..929ec2a --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,34 @@ +name: CI + +on: + push: + branches: + - main + pull_request: + +jobs: + test: + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4.3.1 + + # Run checks in the same digest-pinned Go image the release builds with. + - name: Format, vet, and unit tests + run: | + set -euo pipefail + GO_IMAGE="$(sed -n 's/^FROM \(golang:[^ ]*\) AS build$/\1/p' Dockerfile)" + docker run --rm -v "$PWD:/src" -w /src -e GOFLAGS=-buildvcs=false \ + "${GO_IMAGE}" \ + sh -c 'test -z "$(gofmt -l .)" && go vet ./... && go vet -tags=integration ./... && go test ./...' + + e2e: + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4.3.1 + + # Builds the packer image, runs the pack->unwrap round-trip + # integration test inside it, then smoke-tests the CLI entrypoint. + - name: End-to-end test + run: bash test/e2e.sh diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 95a3033..8b3c4e6 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -13,7 +13,7 @@ jobs: build-and-push: runs-on: ubuntu-latest permissions: - contents: read + contents: write packages: write steps: @@ -43,12 +43,38 @@ jobs: uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3.12.0 - name: Build and push Docker image + id: push uses: docker/build-push-action@ca052bb54ab0790a636c9b5f226502c73d547a25 # v5.4.0 with: context: . - platforms: linux/amd64,linux/arm64 + # The packer pins x86-only tool versions (e.g. gdisk). + platforms: linux/amd64 push: true tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} cache-from: type=gha cache-to: type=gha,mode=max + + # Build the launcher CLI (linux/amd64 only, like everything else) + # with the just-pushed image digest baked in, so installed binaries + # always run the matching pinned packer image. + - name: Build CLI binary + run: | + set -euo pipefail + IMAGE_REF="${REGISTRY}/${IMAGE_NAME}@${{ steps.push.outputs.digest }}" + GO_IMAGE="$(sed -n 's/^FROM \(golang:[^ ]*\) AS build$/\1/p' Dockerfile)" + mkdir -p dist + docker run --rm -v "$PWD:/src" -w /src \ + -e CGO_ENABLED=0 -e GOOS=linux -e GOARCH=amd64 -e GOFLAGS=-buildvcs=false \ + "${GO_IMAGE}" \ + go build -trimpath -ldflags "-buildid= -X main.defaultImage=${IMAGE_REF}" \ + -o dist/modelwrap ./cmd/modelwrap + (cd dist && sha256sum modelwrap > SHA256SUMS) + + - name: Create GitHub release + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + gh release create "${GITHUB_REF_NAME}" dist/modelwrap dist/SHA256SUMS \ + --title "${GITHUB_REF_NAME}" \ + --notes "Packer image: \`${REGISTRY}/${IMAGE_NAME}@${{ steps.push.outputs.digest }}\`" diff --git a/Dockerfile b/Dockerfile index aa0b9b2..2d55dc6 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,3 +1,13 @@ +FROM golang:1.25-trixie@sha256:3140b898a3ec52ec5e8a7dc325a3dbdc732c35e0bde3fcc0e0d764c781d7da10 AS build + +WORKDIR /src +COPY go.mod go.sum ./ +RUN go mod download +COPY *.go ./ +COPY wrap ./wrap +COPY cmd ./cmd +RUN CGO_ENABLED=0 go build -trimpath -buildvcs=false -ldflags=-buildid= -o /modelwrap ./cmd/modelwrap + FROM python:3.13-slim-trixie@sha256:b04b5d7233d2ad9c379e22ea8927cd1378cd15c60d4ef876c065b25ea8fb3bf3 ARG DEBIAN_SNAPSHOT=20260518T000000Z @@ -19,9 +29,13 @@ COPY requirements.txt . ENV CACHE_DIR="/cache" ENV OUTPUT_DIR="/output" +# Marks the packing context so the CLI runs directly instead of +# re-launching itself in a container. +ENV MODELWRAP_IN_CONTAINER=1 +# huggingface_hub provides the `hf` CLI used for model downloads. RUN pip install --no-cache-dir --require-hashes -r requirements.txt -COPY pack.py . +COPY --from=build /modelwrap /usr/local/bin/modelwrap -ENTRYPOINT ["python3", "pack.py"] +ENTRYPOINT ["modelwrap"] diff --git a/README.md b/README.md index 563288c..62f8e9b 100644 --- a/README.md +++ b/README.md @@ -4,52 +4,51 @@ Builds reproducible dm-verity EROFS images of Hugging Face models. Learn more ab For the artifact format, trust assumptions, and EMWP cryptographic parameters, see [SPEC.md](SPEC.md). +This repository is also the Go module `github.com/tinfoilsh/modelwrap`. The root package defines the protocol surface shared by both sides (format constants, artifact reference parsing, EMWP key derivation), `wrap` implements the packer, and `unwrap` implements the consumer side (used by [cvmimage](https://github.com/tinfoilsh/cvmimage) to mount model packs at boot). + ## Usage +The `modelwrap` CLI is a launcher: it runs the packing inside a +digest-pinned container image (requires docker), so artifact bytes are +always produced by the pinned toolchain. Release binaries embed the +matching image digest. + ```bash -docker run --rm -it \ - -v $(pwd)/cache:/cache \ - -v $(pwd)/output:/output \ - -e HF_TOKEN="${HF_TOKEN}" \ - ghcr.io/tinfoilsh/modelwrap@sha256: \ - meta-llama/Llama-3.2-1B@4e20de362430cd3b72f300e6b0f18e50e7166e08 +modelwrap mistralai/Ministral-3-3B-Instruct-2512@cfcb068fa7c44114cf77a462357c6cdcd2c304b4 ``` To pack and encrypt a local/private model directory: ```bash -docker run --rm -it --privileged \ - -v /path/to/model:/model:ro \ - -v $(pwd)/output:/output \ - -e PRIVATE_MODEL_KEY_B64="${PRIVATE_MODEL_KEY_B64}" \ - ghcr.io/tinfoilsh/modelwrap@sha256: \ - --model-dir /model \ - --encrypt +PRIVATE_MODEL_KEY_B64="${PRIVATE_MODEL_KEY_B64}" modelwrap --model-dir /path/to/model --encrypt ``` ## Arguments - `model`: Hugging Face model ID, preferably with `@revision`. If omitted with `--model-dir`, modelwrap derives `basename@contentHash`. - `--model-dir `: pack a local/private model directory instead of downloading from Hugging Face. If `model` is provided without `@revision`, modelwrap uses the directory content hash as the revision. -- `--encrypt`: emit encrypted modelwrap output (`.emwp`). Requires device-mapper and loop device access; `--privileged` is the simplest Docker setup. Also requires `--key-file`, `PRIVATE_MODEL_KEY_FILE`, or `PRIVATE_MODEL_KEY_B64`. +- `--encrypt`: emit encrypted modelwrap output (`.emwp`). Requires a master key via `--key-file` or `PRIVATE_MODEL_KEY_B64`. - `--key-file `: file containing the base64-encoded 64-byte EMWP master key. - `--verify`: optional. Runs `veritysetup verify` for MWP and decrypts then verifies EMWP, which is useful for cached artifacts or release checks. +- `--output ` / `--cache `: output and download cache directories (default `./output`, `./cache`). +- `--image `: override the packer container image the launcher runs (defaults to the release-pinned digest; also `MODELWRAP_IMAGE`). +- `--local`: run the packer directly on the current machine instead of in a container. Artifact bytes then depend on locally installed tool versions. -Environment fallbacks are supported for wrapper scripts: `MODEL`, `MODEL_DIR`, `VERIFY=1`, and `ENCRYPTION=1`. Set `HF_TOKEN` when accessing gated or private Hugging Face models. Use a digest-pinned container image, as shown above, when invoking modelwrap in production. +Set `HF_TOKEN` when accessing gated or private Hugging Face models; the launcher passes it into the container without exposing the value on the docker command line. MWP mode emits: -- `output/meta-llama/Llama-3.2-1B/4e20de362430cd3b72f300e6b0f18e50e7166e08.mpk`: dm-verity EROFS image -- `output/meta-llama/Llama-3.2-1B/4e20de362430cd3b72f300e6b0f18e50e7166e08.info`: metadata file in the format `ROOTHASH_OFFSET_VERITYUUID` +- `output/mistralai/Ministral-3-3B-Instruct-2512/cfcb068fa7c44114cf77a462357c6cdcd2c304b4.mpk`: dm-verity EROFS image +- `output/mistralai/Ministral-3-3B-Instruct-2512/cfcb068fa7c44114cf77a462357c6cdcd2c304b4.info`: metadata file in the format `ROOTHASH_OFFSET_VERITYUUID` EMWP mode additionally emits: -- `output/meta-llama/Llama-3.2-1B/4e20de362430cd3b72f300e6b0f18e50e7166e08.emwp`: disk image with one encrypted payload partition -- `output/meta-llama/Llama-3.2-1B/4e20de362430cd3b72f300e6b0f18e50e7166e08.emwp.info`: metadata file in the format `ROOTHASH_OFFSET_PARTUUID` +- `output/mistralai/Ministral-3-3B-Instruct-2512/cfcb068fa7c44114cf77a462357c6cdcd2c304b4.emwp`: disk image with one encrypted payload partition +- `output/mistralai/Ministral-3-3B-Instruct-2512/cfcb068fa7c44114cf77a462357c6cdcd2c304b4.emwp.info`: metadata file in the format `ROOTHASH_OFFSET_PARTUUID` ## Supply Chain Pins -The published image is built from a digest-pinned official Python image on Debian Trixie, installs `erofs-utils` and `cryptsetup` from a dated `snapshot.debian.org` archive, and installs Python dependencies from a hash-checked `requirements.txt`. +The published image is built in two digest-pinned stages: a Go builder that compiles the `modelwrap` binary (`CGO_ENABLED=0`, `-trimpath`, hash-locked Go dependencies via `go.sum`), and a runtime image based on the official Python image on Debian Trixie that installs `erofs-utils`, `cryptsetup`, and `gdisk` from a dated `snapshot.debian.org` archive plus a hash-checked `requirements.txt` (only `huggingface_hub`, which provides the `hf` CLI used for downloads). The packer currently pins: @@ -57,7 +56,7 @@ The packer currently pins: - `cryptsetup=2:2.7.5-2` - `gdisk=1.0.10-2` -`pack.py` passes the dm-verity hash algorithm, format, and block sizes explicitly so tool default changes do not silently alter the dm-verity format. +The packer passes the dm-verity hash algorithm, format, and block sizes explicitly so tool default changes do not silently alter the dm-verity format. To update Python dependencies, edit `requirements.in` and regenerate the lockfile: diff --git a/SPEC.md b/SPEC.md index 9c93844..559227c 100644 --- a/SPEC.md +++ b/SPEC.md @@ -6,6 +6,10 @@ The trust assumptions are: artifact bytes are *untrusted*; at runtime, identity Modelwrap's verifiability is based on *reproducibility*, not attested provenance: artifacts are bit-for-bit reproducible, and the Modelwrap root hash is derived deterministically. An auditor can independently recompute the root hash from the model weights, for example from a pinned Hugging Face revision. +## Reproducibility and Packer Versioning + +Artifact bytes, and therefore root hashes, are reproducible with respect to a specific digest-pinned packer image: the EROFS encoder and dm-verity tooling contribute to the output bytes, so a toolchain upgrade (e.g. a new `erofs-utils` version) can produce a different root hash for the same input model. This is not a format change and requires no version field in the artifact: existing artifacts remain valid and mountable because consumers act only on the attested reference, never on tool versions. An auditor recomputing a root hash must use the same packer image digest that produced the artifact. + ## MWP Format An MWP format is: diff --git a/cmd/modelwrap/launcher.go b/cmd/modelwrap/launcher.go new file mode 100644 index 0000000..2e5b346 --- /dev/null +++ b/cmd/modelwrap/launcher.go @@ -0,0 +1,129 @@ +package main + +import ( + "errors" + "fmt" + "os" + "os/exec" + "path/filepath" +) + +// Fixed container-side paths the launcher rewrites host paths to. /cache +// and /output match the image's CACHE_DIR and OUTPUT_DIR environment. +const ( + containerModelDir = "/model" + containerKeyFile = "/run/modelwrap-key" + containerCacheDir = "/cache" + containerOutput = "/output" +) + +// Secrets passed through to the container by name only, so values never +// appear in the docker command line. +var passthroughEnv = []string{"HF_TOKEN", "PRIVATE_MODEL_KEY_B64"} + +// launch re-executes the CLI inside the packer container image and +// returns the process exit code. +func launch(opts cliOptions) int { + args, err := dockerRunArgs(opts) + if err != nil { + fmt.Fprintln(os.Stderr, "Error:", err) + return 1 + } + + cmd := exec.Command("docker", args...) + cmd.Stdin = os.Stdin + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + if err := cmd.Run(); err != nil { + var exitErr *exec.ExitError + if errors.As(err, &exitErr) { + return exitErr.ExitCode() + } + fmt.Fprintln(os.Stderr, "Error: running docker:", err) + return 1 + } + return 0 +} + +// dockerRunArgs translates host-side options into a docker run invocation +// of the same CLI inside the packer image. +func dockerRunArgs(opts cliOptions) ([]string, error) { + args := []string{"run", "--rm"} + if opts.Encrypt { + // EMWP packing needs loop device and device-mapper access. + args = append(args, "--privileged") + } + + hostDir := func(path, fallback string) (string, error) { + if path == "" { + path = fallback + } + abs, err := filepath.Abs(path) + if err != nil { + return "", err + } + // Pre-create so docker does not create it root-owned. + if err := os.MkdirAll(abs, 0755); err != nil { + return "", err + } + return abs, nil + } + + outputDir, err := hostDir(opts.OutputDir, "output") + if err != nil { + return nil, err + } + cacheDir, err := hostDir(opts.CacheDir, "cache") + if err != nil { + return nil, err + } + args = append(args, + "-v", outputDir+":"+containerOutput, + "-v", cacheDir+":"+containerCacheDir, + ) + + if opts.ModelDir != "" { + abs, err := filepath.Abs(opts.ModelDir) + if err != nil { + return nil, err + } + args = append(args, "-v", abs+":"+containerModelDir+":ro") + } + + keyFile := opts.KeyFile + if keyFile == "" { + keyFile = os.Getenv("PRIVATE_MODEL_KEY_FILE") + } + if keyFile != "" { + abs, err := filepath.Abs(keyFile) + if err != nil { + return nil, err + } + args = append(args, "-v", abs+":"+containerKeyFile+":ro") + } + + for _, name := range passthroughEnv { + if os.Getenv(name) != "" { + args = append(args, "-e", name) + } + } + + args = append(args, opts.image) + + if opts.ModelDir != "" { + args = append(args, "--model-dir", containerModelDir) + } + if keyFile != "" { + args = append(args, "--key-file", containerKeyFile) + } + if opts.Encrypt { + args = append(args, "--encrypt") + } + if opts.Verify { + args = append(args, "--verify") + } + if opts.Model != "" { + args = append(args, opts.Model) + } + return args, nil +} diff --git a/cmd/modelwrap/launcher_test.go b/cmd/modelwrap/launcher_test.go new file mode 100644 index 0000000..e3a6ef1 --- /dev/null +++ b/cmd/modelwrap/launcher_test.go @@ -0,0 +1,93 @@ +package main + +import ( + "os" + "path/filepath" + "reflect" + "testing" + + "github.com/tinfoilsh/modelwrap/wrap" +) + +func TestDockerRunArgs(t *testing.T) { + dir := t.TempDir() + t.Chdir(dir) + t.Setenv("HF_TOKEN", "secret") + t.Setenv("PRIVATE_MODEL_KEY_B64", "") + t.Setenv("PRIVATE_MODEL_KEY_FILE", "") + if err := os.WriteFile(filepath.Join(dir, "master.key"), []byte("x"), 0600); err != nil { + t.Fatal(err) + } + if err := os.MkdirAll(filepath.Join(dir, "weights"), 0755); err != nil { + t.Fatal(err) + } + + opts := cliOptions{ + Options: wrap.Options{ + Model: "org/model@rev", + ModelDir: "weights", + KeyFile: "master.key", + Encrypt: true, + Verify: true, + }, + image: "ghcr.io/tinfoilsh/modelwrap@sha256:deadbeef", + } + got, err := dockerRunArgs(opts) + if err != nil { + t.Fatalf("dockerRunArgs: %v", err) + } + + want := []string{ + "run", "--rm", "--privileged", + "-v", filepath.Join(dir, "output") + ":/output", + "-v", filepath.Join(dir, "cache") + ":/cache", + "-v", filepath.Join(dir, "weights") + ":/model:ro", + "-v", filepath.Join(dir, "master.key") + ":/run/modelwrap-key:ro", + "-e", "HF_TOKEN", + "ghcr.io/tinfoilsh/modelwrap@sha256:deadbeef", + "--model-dir", "/model", + "--key-file", "/run/modelwrap-key", + "--encrypt", "--verify", + "org/model@rev", + } + if !reflect.DeepEqual(got, want) { + t.Fatalf("dockerRunArgs mismatch:\n got %q\nwant %q", got, want) + } + + // Secret values must never appear in the docker command line. + for _, arg := range got { + if arg == "secret" { + t.Fatal("secret value leaked into docker args") + } + } + + // Host directories are pre-created so docker does not own them as root. + for _, sub := range []string{"output", "cache"} { + if fi, err := os.Stat(filepath.Join(dir, sub)); err != nil || !fi.IsDir() { + t.Fatalf("expected %s directory to be created: %v", sub, err) + } + } +} + +func TestDockerRunArgsPlainMWP(t *testing.T) { + t.Chdir(t.TempDir()) + t.Setenv("HF_TOKEN", "") + t.Setenv("PRIVATE_MODEL_KEY_B64", "") + t.Setenv("PRIVATE_MODEL_KEY_FILE", "") + + got, err := dockerRunArgs(cliOptions{ + Options: wrap.Options{Model: "org/model@rev"}, + image: "img", + }) + if err != nil { + t.Fatalf("dockerRunArgs: %v", err) + } + for _, arg := range got { + if arg == "--privileged" { + t.Fatal("plain MWP packing should not be privileged") + } + } + if got[len(got)-1] != "org/model@rev" { + t.Fatalf("expected model as final arg: %q", got) + } +} diff --git a/cmd/modelwrap/main.go b/cmd/modelwrap/main.go new file mode 100644 index 0000000..530ae0f --- /dev/null +++ b/cmd/modelwrap/main.go @@ -0,0 +1,120 @@ +// Command modelwrap packs model weights into MWP/EMWP artifacts. +// +// The same binary runs in two contexts. Inside the packer container +// (marked by MODELWRAP_IN_CONTAINER=1) it runs the packer directly. On a +// host it acts as a launcher: it re-executes itself inside the +// digest-pinned packer image via docker, translating host paths into +// container mounts, so artifact bytes are always produced by the pinned +// toolchain. +package main + +import ( + "errors" + "flag" + "fmt" + "os" + + "github.com/tinfoilsh/modelwrap/wrap" +) + +// defaultImage is the packer image used by the launcher. Release builds +// override it with a digest-pinned reference via +// -ldflags "-X main.defaultImage=ghcr.io/tinfoilsh/modelwrap@sha256:...". +var defaultImage = "ghcr.io/tinfoilsh/modelwrap:latest" + +const usage = `Usage: modelwrap [flags] [model[@revision]] + +Packs a Hugging Face model or local directory into a reproducible +dm-verity EROFS image (MWP), optionally encrypted (EMWP). + +Run on a host, modelwrap re-executes itself inside the pinned packer +container image (requires docker). Inside the container it packs directly. + +Flags: + --model-dir pack a local model directory instead of downloading + --encrypt emit encrypted EMWP output (requires a master key) + --key-file file containing the base64-encoded 64-byte EMWP master key + --verify verify artifacts after packing + --output output directory (default ./output) + --cache download cache directory (default ./cache) + --image packer container image to launch (default release-pinned) + --local run the packer directly instead of in a container + -h, --help show this help + +Environment fallbacks: MODEL, MODEL_DIR, VERIFY=1, ENCRYPTION=1, HF_TOKEN, +PRIVATE_MODEL_KEY_FILE, PRIVATE_MODEL_KEY_B64, CACHE_DIR, OUTPUT_DIR, +MODELWRAP_IMAGE.` + +// cliOptions extends the packer options with launcher-only settings. +type cliOptions struct { + wrap.Options + image string + local bool +} + +func main() { + opts, err := parseArgs(os.Args[1:]) + if err != nil { + if errors.Is(err, flag.ErrHelp) { + os.Exit(0) + } + os.Exit(2) + } + + if opts.local || os.Getenv("MODELWRAP_IN_CONTAINER") == "1" { + ref, err := wrap.Pack(opts.Options) + if err != nil { + fmt.Fprintln(os.Stderr, "Error:", err) + os.Exit(1) + } + fmt.Println(ref) + return + } + + os.Exit(launch(opts)) +} + +// parseArgs parses flags (before the optional positional model argument) +// with environment variable fallbacks for container-style invocation. +func parseArgs(args []string) (cliOptions, error) { + opts := cliOptions{ + Options: wrap.Options{ + Model: os.Getenv("MODEL"), + ModelDir: os.Getenv("MODEL_DIR"), + CacheDir: os.Getenv("CACHE_DIR"), + OutputDir: os.Getenv("OUTPUT_DIR"), + Verify: os.Getenv("VERIFY") == "1", + Encrypt: os.Getenv("ENCRYPTION") == "1", + HFToken: os.Getenv("HF_TOKEN"), + }, + image: defaultImage, + } + if image := os.Getenv("MODELWRAP_IMAGE"); image != "" { + opts.image = image + } + + fs := flag.NewFlagSet("modelwrap", flag.ContinueOnError) + fs.Usage = func() { fmt.Fprintln(os.Stderr, usage) } + fs.StringVar(&opts.ModelDir, "model-dir", opts.ModelDir, "") + fs.StringVar(&opts.KeyFile, "key-file", "", "") + fs.StringVar(&opts.OutputDir, "output", opts.OutputDir, "") + fs.StringVar(&opts.CacheDir, "cache", opts.CacheDir, "") + fs.StringVar(&opts.image, "image", opts.image, "") + fs.BoolVar(&opts.Verify, "verify", opts.Verify, "") + fs.BoolVar(&opts.Encrypt, "encrypt", opts.Encrypt, "") + fs.BoolVar(&opts.local, "local", false, "") + if err := fs.Parse(args); err != nil { + return opts, err + } + + rest := fs.Args() + if len(rest) > 1 { + fmt.Fprintf(os.Stderr, "unexpected arguments: %v (flags must come before the model argument)\n", rest[1:]) + fs.Usage() + return opts, fmt.Errorf("unexpected arguments") + } + if len(rest) == 1 { + opts.Model = rest[0] + } + return opts, nil +} diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..0cefaa8 --- /dev/null +++ b/go.mod @@ -0,0 +1,7 @@ +module github.com/tinfoilsh/modelwrap + +go 1.25 + +require github.com/google/uuid v1.6.0 + +require golang.org/x/mod v0.31.0 diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..5d7866a --- /dev/null +++ b/go.sum @@ -0,0 +1,4 @@ +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +golang.org/x/mod v0.31.0 h1:HaW9xtz0+kOcWKwli0ZXy79Ix+UW/vOfmWI5QVd2tgI= +golang.org/x/mod v0.31.0/go.mod h1:43JraMp9cGx1Rx3AqioxrbrhNsLl2l/iNAvuBkrezpg= diff --git a/integration_test.go b/integration_test.go new file mode 100644 index 0000000..4f69f7b --- /dev/null +++ b/integration_test.go @@ -0,0 +1,116 @@ +//go:build integration + +package modelwrap_test + +import ( + "encoding/base64" + "fmt" + "os" + "os/exec" + "path/filepath" + "strings" + "testing" + + "github.com/tinfoilsh/modelwrap" + "github.com/tinfoilsh/modelwrap/unwrap" + "github.com/tinfoilsh/modelwrap/wrap" +) + +const ( + integrationModelName = "hf-internal-testing/tiny-random-GPT2Model" + integrationModelRevision = "d6694b0d8fe17978761c9305dc151780506b192e" + integrationModel = integrationModelName + "@" + integrationModelRevision +) + +// TestEMWPRoundTripIntegration downloads and packs a tiny public model as +// EMWP and then consumes it through the unwrap path: loop-mount the +// encrypted payload partition, open dm-crypt with the derived key, open +// dm-verity, and mount the verified EROFS. It needs a privileged Linux +// environment with the modelwrap packer tools, network access, loop +// devices, and dm_verity available (run via test/e2e.sh). +func TestEMWPRoundTripIntegration(t *testing.T) { + if os.Getenv("TINFOIL_MODELWRAP_INTEGRATION") != "1" { + t.Skip("set TINFOIL_MODELWRAP_INTEGRATION=1 to run") + } + + work := t.TempDir() + masterKey := []byte(strings.Repeat("k", modelwrap.EMWPMasterKeyBytes)) + keyFile := filepath.Join(work, "master.key") + if err := os.WriteFile(keyFile, []byte(base64.StdEncoding.EncodeToString(masterKey)), 0600); err != nil { + t.Fatal(err) + } + + rawRef, err := wrap.Pack(wrap.Options{ + Model: integrationModel, + CacheDir: filepath.Join(work, "cache"), + OutputDir: filepath.Join(work, "output"), + Encrypt: true, + Verify: true, + KeyFile: keyFile, + }) + if err != nil { + t.Fatalf("packing EMWP: %v", err) + } + ref, err := modelwrap.ParseRef(rawRef) + if err != nil { + t.Fatalf("parsing packed ref %q: %v", rawRef, err) + } + if want := modelwrap.UUIDv5URL(integrationModel + "-emwp-outer"); ref.UUID != want { + t.Fatalf("EMWP PARTUUID = %s, want %s", ref.UUID, want) + } + + emwpFile := filepath.Join(work, "output", integrationModelName, integrationModelRevision+".emwp") + + // Expose the encrypted payload partition the same way the consumer + // sees it: a read-only block device covering exactly the partition. + fi, err := os.Stat(emwpFile) + if err != nil { + t.Fatal(err) + } + partOffset := int64(modelwrap.EMWPPartitionStartSector * modelwrap.GPTSectorSize) + partSize := fi.Size() - partOffset - int64(modelwrap.EMWPGPTTrailingSectors*modelwrap.GPTSectorSize) + out, err := exec.Command( + "losetup", "--read-only", "--find", "--show", + "--offset", fmt.Sprint(partOffset), + "--sizelimit", fmt.Sprint(partSize), + emwpFile, + ).Output() + if err != nil { + t.Fatalf("losetup: %v", err) + } + loopDev := strings.TrimSpace(string(out)) + t.Cleanup(func() { _ = exec.Command("losetup", "-d", loopDev).Run() }) + + dmKey, err := modelwrap.DeriveKey(masterKey, ref) + if err != nil { + t.Fatalf("deriving dm-crypt key: %v", err) + } + dmKeyFile := filepath.Join(work, "dm.key") + if err := os.WriteFile(dmKeyFile, dmKey, 0600); err != nil { + t.Fatal(err) + } + + cryptName := "modelwrap-it-crypt" + if err := unwrap.OpenCrypt(loopDev, cryptName, dmKeyFile); err != nil { + t.Fatalf("opening dm-crypt: %v", err) + } + t.Cleanup(func() { unwrap.CloseCrypt(cryptName) }) + + verityName := "modelwrap-it-verity" + if err := unwrap.OpenVerity("/dev/mapper/"+cryptName, verityName, ref.RootHash, ref.HashOffset); err != nil { + t.Fatalf("opening dm-verity: %v", err) + } + t.Cleanup(func() { unwrap.CloseVerity(verityName) }) + + mountPoint := filepath.Join(work, "mnt") + if err := unwrap.Mount("/dev/mapper/"+verityName, mountPoint); err != nil { + t.Fatalf("mounting verified EROFS: %v", err) + } + t.Cleanup(func() { _ = exec.Command("umount", mountPoint).Run() }) + + for _, name := range []string{"config.json", "pytorch_model.bin"} { + if _, err := os.Stat(filepath.Join(mountPoint, name)); err != nil { + t.Fatalf("checking mounted model file %s: %v", name, err) + } + } +} diff --git a/modelwrap.go b/modelwrap.go new file mode 100644 index 0000000..655531d --- /dev/null +++ b/modelwrap.go @@ -0,0 +1,139 @@ +// Package modelwrap defines the Modelwrap (MWP) and Encrypted Modelwrap +// (EMWP) artifact format: the cryptographic constants, the artifact +// reference grammar, the deterministic identity derivations, and the key +// derivation used by both the packer and the consumer. See SPEC.md for the +// full format specification. +package modelwrap + +import ( + "crypto/hkdf" + "crypto/sha256" + "encoding/base64" + "encoding/hex" + "fmt" + "regexp" + "strings" + + "github.com/google/uuid" + "golang.org/x/mod/sumdb/dirhash" +) + +// dm-verity format parameters. These are passed explicitly to veritysetup +// so tool default changes never silently alter the artifact format. +const ( + VerityFormat = 1 + VerityHashAlgorithm = "sha256" + VerityDataBlockSize = 4096 + VerityHashBlockSize = 4096 +) + +// EMWP dm-crypt parameters. +const ( + EMWPCipher = "aes-xts-plain64" + EMWPKeySizeBits = 512 + EMWPKeyBytes = EMWPKeySizeBits / 8 + EMWPMasterKeyBytes = 64 + EMWPSectorSize = 4096 + EMWPKeyDeriveInfo = "tinfoil/emwp/dm-crypt-key/v1" +) + +// EMWP GPT disk image geometry. The encrypted payload partition starts at +// a fixed sector so ciphertext placement is deterministic. +const ( + GPTSectorSize = 512 + EMWPPartitionStartSector = 2048 + EMWPGPTTrailingSectors = 40 +) + +var ( + rootHashPattern = regexp.MustCompile(`^[a-f0-9]{64}$`) + hashOffsetPattern = regexp.MustCompile(`^[0-9]+$`) + uuidPattern = regexp.MustCompile(`^[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}$`) +) + +// ArtifactRef is a parsed artifact reference of the form +// rootHash_hashOffset_uuid. For MWP artifacts the UUID is the dm-verity +// superblock UUID; for EMWP artifacts it is the GPT PARTUUID of the +// encrypted payload partition. +type ArtifactRef struct { + RootHash string + HashOffset string + UUID string +} + +// ParseRef parses and validates a rootHash_hashOffset_uuid reference. +func ParseRef(ref string) (*ArtifactRef, error) { + parts := strings.Split(ref, "_") + if len(parts) != 3 { + return nil, fmt.Errorf("expected rootHash_hashOffset_uuid") + } + + r := &ArtifactRef{ + RootHash: parts[0], + HashOffset: parts[1], + UUID: parts[2], + } + if !rootHashPattern.MatchString(r.RootHash) { + return nil, fmt.Errorf("invalid root hash format: %s", r.RootHash) + } + if !hashOffsetPattern.MatchString(r.HashOffset) { + return nil, fmt.Errorf("invalid hash offset format: %s", r.HashOffset) + } + if !uuidPattern.MatchString(r.UUID) { + return nil, fmt.Errorf("invalid UUID format: %s", r.UUID) + } + return r, nil +} + +// String returns the canonical rootHash_hashOffset_uuid form. +func (r *ArtifactRef) String() string { + return r.RootHash + "_" + r.HashOffset + "_" + r.UUID +} + +// ArtifactID returns the rootHash_uuid identity used as the HKDF salt for +// EMWP key derivation, binding the derived key to one specific artifact. +func (r *ArtifactRef) ArtifactID() string { + return r.RootHash + "_" + r.UUID +} + +// DeriveKey derives the per-artifact dm-crypt key from the EMWP master key +// using HKDF-SHA256 with the artifact ID as salt. +func DeriveKey(masterKey []byte, ref *ArtifactRef) ([]byte, error) { + if len(masterKey) != EMWPMasterKeyBytes { + return nil, fmt.Errorf("EMWP master key is %d bytes, want %d", len(masterKey), EMWPMasterKeyBytes) + } + return hkdf.Key(sha256.New, masterKey, []byte(ref.ArtifactID()), EMWPKeyDeriveInfo, EMWPKeyBytes) +} + +// ParseMasterKey decodes and validates a base64-encoded EMWP master key. +func ParseMasterKey(encoded string) ([]byte, error) { + key, err := base64.StdEncoding.Strict().DecodeString(strings.TrimSpace(encoded)) + if err != nil { + return nil, fmt.Errorf("decoding EMWP master key as base64: %w", err) + } + if len(key) != EMWPMasterKeyBytes { + return nil, fmt.Errorf("EMWP master key decoded to %d bytes, want %d", len(key), EMWPMasterKeyBytes) + } + return key, nil +} + +// UUIDv5URL computes the deterministic RFC 4122 version 5 UUID of name in +// the URL namespace. All artifact UUIDs (dm-verity UUID, GPT disk GUID, +// GPT PARTUUID) are derived from the model identity this way. +func UUIDv5URL(name string) string { + return uuid.NewSHA1(uuid.NameSpaceURL, []byte(name)).String() +} + +// HashDir computes a deterministic content hash over a model directory +// tree, used as the revision for local model directories. +func HashDir(dir string) (string, error) { + h1, err := dirhash.HashDir(dir, "", dirhash.Hash1) + if err != nil { + return "", err + } + raw, err := base64.StdEncoding.DecodeString(strings.TrimPrefix(h1, "h1:")) + if err != nil { + return "", fmt.Errorf("decoding dirhash output: %w", err) + } + return hex.EncodeToString(raw), nil +} diff --git a/modelwrap_test.go b/modelwrap_test.go new file mode 100644 index 0000000..ad04aa6 --- /dev/null +++ b/modelwrap_test.go @@ -0,0 +1,152 @@ +package modelwrap + +import ( + "encoding/hex" + "os" + "path/filepath" + "testing" +) + +// Golden vectors pinned during the Python-to-Go migration, generated from +// the original Python packer and verified byte-identical. All values are +// independently derivable from the underlying standards: HKDF-SHA256 +// (RFC 5869), UUIDv5 in the URL namespace (RFC 4122), and the Go module +// Hash1 directory hash. +const ( + vectorRootHash = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855" + vectorPartUUID = "8a3c9f0e-1111-5222-b333-444444444444" + vectorModel = "hf-internal-testing/tiny-random-GPT2Model@d6694b0d8fe17978761c9305dc151780506b192e" +) + +func TestParseRef(t *testing.T) { + ref, err := ParseRef(vectorRootHash + "_4096_" + vectorPartUUID) + if err != nil { + t.Fatalf("ParseRef: %v", err) + } + if ref.RootHash != vectorRootHash || ref.HashOffset != "4096" || ref.UUID != vectorPartUUID { + t.Fatalf("unexpected ref: %+v", ref) + } + if got := ref.String(); got != vectorRootHash+"_4096_"+vectorPartUUID { + t.Fatalf("String() = %q", got) + } + if got := ref.ArtifactID(); got != vectorRootHash+"_"+vectorPartUUID { + t.Fatalf("ArtifactID() = %q", got) + } + + for _, invalid := range []string{ + "", + "a_b", + "nothex_4096_" + vectorPartUUID, + vectorRootHash + "_x_" + vectorPartUUID, + vectorRootHash + "_4096_not-a-uuid", + vectorRootHash + "_4096_" + vectorPartUUID + "_extra", + "E3B0C44298FC1C149AFBF4C8996FB92427AE41E4649B934CA495991B7852B855_4096_" + vectorPartUUID, + } { + if _, err := ParseRef(invalid); err == nil { + t.Errorf("ParseRef(%q) should fail", invalid) + } + } +} + +func TestDeriveKey(t *testing.T) { + master := make([]byte, 64) + for i := range master { + master[i] = byte(i) + } + ref := &ArtifactRef{RootHash: vectorRootHash, HashOffset: "4096", UUID: vectorPartUUID} + key, err := DeriveKey(master, ref) + if err != nil { + t.Fatalf("DeriveKey: %v", err) + } + want := "201747a5e94aebc493296f77a33301f58144765be712a17685106ecd623b7ba2ac68ee4cf46d025ed111bd3dfdeda807ce9577f682561a6219a4eb25ae1a48d6" + if got := hex.EncodeToString(key); got != want { + t.Fatalf("DeriveKey = %s, want %s", got, want) + } + + if _, err := DeriveKey(master[:32], ref); err == nil { + t.Fatal("DeriveKey should reject short master keys") + } +} + +func TestParseMasterKey(t *testing.T) { + encoded := "a2tra2tra2tra2tra2tra2tra2tra2tra2tra2tra2tra2tra2tra2tra2tra2tra2tra2tra2tra2tra2traw==" // b"k"*64 + key, err := ParseMasterKey(encoded + "\n") + if err != nil { + t.Fatalf("ParseMasterKey: %v", err) + } + if len(key) != EMWPMasterKeyBytes { + t.Fatalf("key length = %d", len(key)) + } + for _, invalid := range []string{"", "!!!!", "a2tr"} { + if _, err := ParseMasterKey(invalid); err == nil { + t.Errorf("ParseMasterKey(%q) should fail", invalid) + } + } +} + +func TestUUIDv5URL(t *testing.T) { + // The "-emwp-outer" vector is also confirmed against a real packed + // artifact: the EMWP round-trip integration test packs this model and + // asserts the same PARTUUID in the emitted reference. + cases := map[string]string{ + vectorModel + "-inner": "905785b4-b198-5ed6-b97d-e7c36a0be1df", + vectorModel + "-emwp-outer": "9701bf21-6de2-59a2-bdea-141aaae05fc3", + vectorRootHash + "-emwp-disk": "f5bb5818-295a-5b3e-910a-82f72eca4cc2", + } + for name, want := range cases { + if got := UUIDv5URL(name); got != want { + t.Errorf("UUIDv5URL(%q) = %s, want %s", name, got, want) + } + } +} + +func TestHashDir(t *testing.T) { + d := t.TempDir() + write := func(path, content string) { + full := filepath.Join(d, path) + if err := os.MkdirAll(filepath.Dir(full), 0755); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(full, []byte(content), 0644); err != nil { + t.Fatal(err) + } + } + write("b.txt", "beta\n") + write("a.txt", "alpha\n") + nested := make([]byte, 256) + for i := range nested { + nested[i] = byte(i) + } + if err := os.MkdirAll(filepath.Join(d, "sub"), 0755); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(filepath.Join(d, "sub", "nested.bin"), nested, 0644); err != nil { + t.Fatal(err) + } + if err := os.Symlink("a.txt", filepath.Join(d, "link.txt")); err != nil { + t.Fatal(err) + } + if err := os.MkdirAll(filepath.Join(d, "empty"), 0755); err != nil { + t.Fatal(err) + } + write("zsub/deep/x", "x") + + got, err := HashDir(d) + if err != nil { + t.Fatalf("HashDir: %v", err) + } + // Pinned hex encoding of the go.sum Hash1 digest for this fixture, + // independently recomputed from the Hash1 specification. + want := "560b265285af784ef29389714811ab259fee8b31225cc62e1811be662b7d4f36" + if got != want { + t.Fatalf("HashDir = %s, want %s", got, want) + } + + // Symlinks to directories are not representable in Hash1. + if err := os.Symlink("sub", filepath.Join(d, "dirlink")); err != nil { + t.Fatal(err) + } + if _, err := HashDir(d); err == nil { + t.Fatal("HashDir should fail on symlinks to directories") + } +} diff --git a/pack.py b/pack.py deleted file mode 100644 index f5b7207..0000000 --- a/pack.py +++ /dev/null @@ -1,385 +0,0 @@ -import os -import argparse -import base64 -import hashlib -import subprocess -import uuid -import shutil -from hashlib import sha256 -from cryptography.hazmat.primitives import hashes -from cryptography.hazmat.primitives.kdf.hkdf import HKDF -from huggingface_hub import snapshot_download, HfApi - -VERITY_FORMAT = "1" -VERITY_HASH = "sha256" -VERITY_DATA_BLOCK_SIZE = "4096" -VERITY_HASH_BLOCK_SIZE = "4096" -EMWP_CIPHER = "aes-xts-plain64" -EMWP_KEY_SIZE = "512" -EMWP_SECTOR_SIZE = "4096" -EMWP_KEY_DERIVE_INFO = b"tinfoil/emwp/dm-crypt-key/v1" -GPT_SECTOR_SIZE = 512 -EMWP_SECTOR_SIZE_BYTES = 4096 -EMWP_PARTITION_START_SECTOR = 2048 -EMWP_GPT_TRAILING_SECTORS = 40 - - -def derive_emwp_key(master_key, salt): - return HKDF( - algorithm=hashes.SHA256(), - length=64, - salt=salt, - info=EMWP_KEY_DERIVE_INFO, - ).derive(master_key) - - -def hash_model_dir(model_dir): - digest = hashlib.sha256() - for root, dirs, files in os.walk(model_dir): - dirs.sort() - files.sort() - rel_root = os.path.relpath(root, model_dir) - if rel_root != ".": - digest.update(b"d\0") - digest.update(rel_root.encode()) - digest.update(b"\0") - for name in files: - path = os.path.join(root, name) - rel_path = os.path.relpath(path, model_dir) - if os.path.islink(path): - digest.update(b"l\0") - digest.update(rel_path.encode()) - digest.update(b"\0") - digest.update(os.readlink(path).encode()) - digest.update(b"\0") - continue - digest.update(b"f\0") - digest.update(rel_path.encode()) - digest.update(b"\0") - with open(path, "rb") as f: - for chunk in iter(lambda: f.read(1024 * 1024), b""): - digest.update(chunk) - digest.update(b"\0") - return digest.hexdigest() - - -def emwp_master_key(key_file=None): - if not key_file: - key_file = os.getenv("PRIVATE_MODEL_KEY_FILE") - if key_file: - with open(key_file, "r") as f: - key_b64 = f.read().strip() - else: - key_b64 = os.getenv("PRIVATE_MODEL_KEY_B64") - if not key_b64: - raise Exception("--key-file, PRIVATE_MODEL_KEY_FILE, or PRIVATE_MODEL_KEY_B64 is required for --encrypt") - key = base64.b64decode(key_b64, validate=True) - if len(key) != 64: - raise Exception(f"EMWP master key decoded to {len(key)} bytes, want 64") - return key - - -def write_key_file(path, key): - with open(path, "wb") as f: - f.write(key) - os.chmod(path, 0o600) - - -def close_crypt_mapper(name): - subprocess.run(["cryptsetup", "close", name], check=False) - - -def encrypt_emwp(mwp_file, emwp_file, root_hash, part_uuid, key_file=None): - size = os.path.getsize(mwp_file) - encrypted_size = (size + EMWP_SECTOR_SIZE_BYTES - 1) // EMWP_SECTOR_SIZE_BYTES * EMWP_SECTOR_SIZE_BYTES - sectors = encrypted_size // GPT_SECTOR_SIZE - end_sector = EMWP_PARTITION_START_SECTOR + sectors - 1 - total_sectors = end_sector + 1 + EMWP_GPT_TRAILING_SECTORS - disk_uuid = str(uuid.uuid5(uuid.NAMESPACE_URL, root_hash + "-emwp-disk")) - tmp_file = emwp_file + ".tmp" - dm_key_file = emwp_file + ".key.tmp" - mapper_name = "modelwrap-emwp-" + root_hash[:16] - - master_key = emwp_master_key(key_file) - dm_key = derive_emwp_key(master_key, f"{root_hash}_{part_uuid}".encode()) - - for path in (tmp_file, dm_key_file): - if os.path.exists(path): - os.remove(path) - - try: - print(f"Creating EMWP GPT image {emwp_file}") - subprocess.run(["truncate", "-s", str(total_sectors * GPT_SECTOR_SIZE), tmp_file], check=True) - subprocess.run([ - "sgdisk", - "--clear", - f"--disk-guid={disk_uuid}", - f"--new=1:{EMWP_PARTITION_START_SECTOR}:{end_sector}", - "--typecode=1:8300", - f"--partition-guid=1:{part_uuid}", - "--change-name=1:emwp", - tmp_file, - ], check=True) - - write_key_file(dm_key_file, dm_key) - subprocess.run([ - "cryptsetup", "open", - "--type", "plain", - "--cipher", EMWP_CIPHER, - "--key-size", EMWP_KEY_SIZE, - "--sector-size", EMWP_SECTOR_SIZE, - "--key-file", dm_key_file, - "--offset", str(EMWP_PARTITION_START_SECTOR), - "--skip", "0", - "--size", str(sectors), - tmp_file, - mapper_name, - ], check=True) - - subprocess.run([ - "dd", - f"if={mwp_file}", - f"of=/dev/mapper/{mapper_name}", - "bs=4M", - "conv=fsync", - "status=none", - ], check=True) - close_crypt_mapper(mapper_name) - os.replace(tmp_file, emwp_file) - finally: - close_crypt_mapper(mapper_name) - if os.path.exists(dm_key_file): - os.remove(dm_key_file) - if os.path.exists(tmp_file): - os.remove(tmp_file) - - -def verify_emwp(emwp_file, info_file, key_file_override=None): - raw_info, root_hash, offset, part_uuid = parse_info_file(info_file) - if not os.path.exists(emwp_file): - raise Exception(f"EMWP artifact not found: {emwp_file}") - - size = os.path.getsize(emwp_file) - sectors = size // GPT_SECTOR_SIZE - EMWP_PARTITION_START_SECTOR - EMWP_GPT_TRAILING_SECTORS - key_file = emwp_file + ".key.tmp" - mapper_name = "modelwrap-emwp-verify-" + root_hash[:16] - dm_key = derive_emwp_key(emwp_master_key(key_file_override), f"{root_hash}_{part_uuid}".encode()) - - try: - write_key_file(key_file, dm_key) - subprocess.run([ - "cryptsetup", "open", - "--type", "plain", - "--cipher", EMWP_CIPHER, - "--key-size", EMWP_KEY_SIZE, - "--sector-size", EMWP_SECTOR_SIZE, - "--key-file", key_file, - "--offset", str(EMWP_PARTITION_START_SECTOR), - "--skip", "0", - "--size", str(sectors), - emwp_file, - mapper_name, - ], check=True) - - verify_verity(f"/dev/mapper/{mapper_name}", info_file) - finally: - close_crypt_mapper(mapper_name) - if os.path.exists(key_file): - os.remove(key_file) - return raw_info - -def parse_info_file(info_file): - with open(info_file, "r") as f: - raw_info = f.read().strip() - - parts = raw_info.split("_") - if len(parts) != 3: - raise Exception( - f"Invalid info file {info_file}: expected ROOTHASH_OFFSET_VERITYUUID" - ) - - root_hash, offset, verity_uuid = parts - if len(root_hash) != 64 or any(c not in "0123456789abcdef" for c in root_hash): - raise Exception(f"Invalid root hash in {info_file}: {root_hash}") - if not offset.isdigit(): - raise Exception(f"Invalid hash offset in {info_file}: {offset}") - try: - uuid.UUID(verity_uuid) - except ValueError as err: - raise Exception(f"Invalid verity UUID in {info_file}: {verity_uuid}") from err - - return raw_info, root_hash, offset, verity_uuid - - -def verify_verity(mpk_file, info_file): - raw_info, root_hash, offset, _ = parse_info_file(info_file) - if not os.path.exists(mpk_file): - raise Exception(f"MWP artifact not found: {mpk_file}") - - verify_cmd = [ - "veritysetup", - f"--format={VERITY_FORMAT}", - f"--hash={VERITY_HASH}", - f"--data-block-size={VERITY_DATA_BLOCK_SIZE}", - f"--hash-block-size={VERITY_HASH_BLOCK_SIZE}", - f"--hash-offset={offset}", - "verify", - mpk_file, # data dev - mpk_file, # hash dev - root_hash, - ] - print(f"Verifying dm-verity artifact {mpk_file}") - subprocess.run(verify_cmd, check=True) - print("Verification OK.") - return raw_info - - -parser = argparse.ArgumentParser() -parser.add_argument("model", nargs="?", default=os.getenv("MODEL")) -parser.add_argument("--verify", action="store_true", default=os.getenv("VERIFY") == "1") -parser.add_argument("--encrypt", action="store_true", default=os.getenv("ENCRYPTION") == "1") -parser.add_argument("--model-dir", default=os.getenv("MODEL_DIR")) -parser.add_argument("--key-file") -args = parser.parse_args() - -cache_dir = os.getenv("CACHE_DIR") or "cache" -output_dir = os.getenv("OUTPUT_DIR") or "output" -model_dir_override = args.model_dir -hf_token = os.getenv("HF_TOKEN") -verify_after_pack = args.verify -encrypt_output = args.encrypt -model = args.model - -if not os.path.exists(cache_dir): - os.makedirs(cache_dir) -if not os.path.exists(output_dir): - os.makedirs(output_dir) - -if model_dir_override: - if not os.path.isdir(model_dir_override): - raise Exception(f"MODEL_DIR is not a directory: {model_dir_override}") - local_revision = hash_model_dir(model_dir_override) - if not model: - local_name = os.path.basename(os.path.abspath(model_dir_override)) or "model" - model = f"{local_name}@{local_revision}" - elif "@" not in model: - model = f"{model}@{local_revision}" -elif not model: - raise Exception("model argument or MODEL environment variable is required") - -if not model_dir_override and "@" not in model: - api = HfApi(token=hf_token) - info = api.model_info(model) - if not info.sha: - raise Exception(f"Could not resolve HEAD commit for {model}. Please specify commit explicitly: MODEL={model}@") - print(f"Resolved {model} default branch HEAD -> {info.sha}") - model = f"{model}@{info.sha}" - -model_name, model_commit = model.split("@", 1) -model_dir = model_dir_override or os.path.join(cache_dir, model.replace("@", "/") ) - -if model_dir_override: - print(f"Using local model directory {model_dir} as {model}") -else: - print(f"Downloading {model} to {model_dir}") - - snapshot_download( - model_name, - local_dir=model_dir, - token=hf_token, - revision=model_commit, - ) - -# Remove cache dir for reproducibility -if not model_dir_override: - cache_dir = os.path.join(model_dir, ".cache") - if os.path.exists(cache_dir): - shutil.rmtree(cache_dir) - -# Create EROFS image - -output_model_dir = os.path.join(output_dir, model_name) -if not os.path.exists(output_model_dir): - os.makedirs(output_model_dir) - -mpk_file = os.path.join(output_model_dir, f"{model_commit}.mpk") - -if not os.path.exists(mpk_file): - mkfs_cmd = [ - "mkfs.erofs", - "--all-root", - "-T0", # Zero timestamp - f"-U{uuid.uuid5(uuid.NAMESPACE_URL, model+'-inner')}", # Static UUID - mpk_file+".tmp", - model_dir - ] - print(f"Creating EROFS image {mpk_file}") - subprocess.run(mkfs_cmd, check=True) - os.rename(mpk_file+".tmp", mpk_file) -else: - print(f"Using existing EROFS image {mpk_file}") - -# Wrap with dm-verity - -info_file = os.path.join(output_dir, model_name, f"{model_commit}.info") - -if not os.path.exists(info_file): - size = os.path.getsize(mpk_file) - offset = (size + 4095) // 4096 * 4096 - - verity_uuid = uuid.uuid5(uuid.NAMESPACE_URL, model+'-inner') - - veritysetup_cmd = [ - "veritysetup", - f"--format={VERITY_FORMAT}", - f"--hash={VERITY_HASH}", - f"--data-block-size={VERITY_DATA_BLOCK_SIZE}", - f"--hash-block-size={VERITY_HASH_BLOCK_SIZE}", - f"--salt={sha256(model.encode()).hexdigest()}", - f"--uuid={verity_uuid}", - f"--hash-offset={offset}", - f"--root-hash-file={info_file}", - "format", - mpk_file, # data dev - mpk_file, # hash dev - ] - print(f"Running veritysetup on {mpk_file}") - subprocess.run(veritysetup_cmd, check=True) - - if not os.path.exists(info_file): - raise Exception(f"Failed to create dm-verity info file {info_file}") - - with open(info_file, "a") as f: - f.write(f"_{offset}_{verity_uuid}") -else: - print(f"dm-verity volume already exists at {mpk_file}") - -if verify_after_pack: - verify_verity(mpk_file, info_file) -else: - print("Skipping dm-verity verification. Pass --verify or set VERIFY=1 to verify cached artifacts.") - -with open(info_file, "r") as f: - info = f.read().strip() - -if encrypt_output: - _, root_hash, offset, _ = parse_info_file(info_file) - part_uuid = str(uuid.uuid5(uuid.NAMESPACE_URL, model + "-emwp-outer")) - emwp_file = os.path.join(output_model_dir, f"{model_commit}.emwp") - emwp_info_file = os.path.join(output_model_dir, f"{model_commit}.emwp.info") - emwp_info = f"{root_hash}_{offset}_{part_uuid}" - - if not os.path.exists(emwp_file): - encrypt_emwp(mpk_file, emwp_file, root_hash, part_uuid, args.key_file) - else: - print(f"Using existing EMWP artifact: {emwp_file}") - - with open(emwp_info_file+".tmp", "w") as f: - f.write(emwp_info) - os.replace(emwp_info_file+".tmp", emwp_info_file) - - if verify_after_pack: - verify_emwp(emwp_file, emwp_info_file, args.key_file) - print(emwp_info) -else: - print(info) diff --git a/requirements.in b/requirements.in index 2629a3b..42e0890 100644 --- a/requirements.in +++ b/requirements.in @@ -1,2 +1 @@ -cryptography huggingface_hub[hf_xet] diff --git a/requirements.txt b/requirements.txt index a933622..fc650db 100644 --- a/requirements.txt +++ b/requirements.txt @@ -18,149 +18,12 @@ certifi==2026.5.20 \ # via # httpcore # httpx -cffi==2.0.0 \ - --hash=sha256:00bdf7acc5f795150faa6957054fbbca2439db2f775ce831222b66f192f03beb \ - --hash=sha256:07b271772c100085dd28b74fa0cd81c8fb1a3ba18b21e03d7c27f3436a10606b \ - --hash=sha256:087067fa8953339c723661eda6b54bc98c5625757ea62e95eb4898ad5e776e9f \ - --hash=sha256:0a1527a803f0a659de1af2e1fd700213caba79377e27e4693648c2923da066f9 \ - --hash=sha256:0cf2d91ecc3fcc0625c2c530fe004f82c110405f101548512cce44322fa8ac44 \ - --hash=sha256:0f6084a0ea23d05d20c3edcda20c3d006f9b6f3fefeac38f59262e10cef47ee2 \ - --hash=sha256:12873ca6cb9b0f0d3a0da705d6086fe911591737a59f28b7936bdfed27c0d47c \ - --hash=sha256:19f705ada2530c1167abacb171925dd886168931e0a7b78f5bffcae5c6b5be75 \ - --hash=sha256:1cd13c99ce269b3ed80b417dcd591415d3372bcac067009b6e0f59c7d4015e65 \ - --hash=sha256:1e3a615586f05fc4065a8b22b8152f0c1b00cdbc60596d187c2a74f9e3036e4e \ - --hash=sha256:1f72fb8906754ac8a2cc3f9f5aaa298070652a0ffae577e0ea9bd480dc3c931a \ - --hash=sha256:1fc9ea04857caf665289b7a75923f2c6ed559b8298a1b8c49e59f7dd95c8481e \ - --hash=sha256:203a48d1fb583fc7d78a4c6655692963b860a417c0528492a6bc21f1aaefab25 \ - --hash=sha256:2081580ebb843f759b9f617314a24ed5738c51d2aee65d31e02f6f7a2b97707a \ - --hash=sha256:21d1152871b019407d8ac3985f6775c079416c282e431a4da6afe7aefd2bccbe \ - --hash=sha256:24b6f81f1983e6df8db3adc38562c83f7d4a0c36162885ec7f7b77c7dcbec97b \ - --hash=sha256:256f80b80ca3853f90c21b23ee78cd008713787b1b1e93eae9f3d6a7134abd91 \ - --hash=sha256:28a3a209b96630bca57cce802da70c266eb08c6e97e5afd61a75611ee6c64592 \ - --hash=sha256:2c8f814d84194c9ea681642fd164267891702542f028a15fc97d4674b6206187 \ - --hash=sha256:2de9a304e27f7596cd03d16f1b7c72219bd944e99cc52b84d0145aefb07cbd3c \ - --hash=sha256:38100abb9d1b1435bc4cc340bb4489635dc2f0da7456590877030c9b3d40b0c1 \ - --hash=sha256:3925dd22fa2b7699ed2617149842d2e6adde22b262fcbfada50e3d195e4b3a94 \ - --hash=sha256:3e17ed538242334bf70832644a32a7aae3d83b57567f9fd60a26257e992b79ba \ - --hash=sha256:3e837e369566884707ddaf85fc1744b47575005c0a229de3327f8f9a20f4efeb \ - --hash=sha256:3f4d46d8b35698056ec29bca21546e1551a205058ae1a181d871e278b0b28165 \ - --hash=sha256:44d1b5909021139fe36001ae048dbdde8214afa20200eda0f64c068cac5d5529 \ - --hash=sha256:45d5e886156860dc35862657e1494b9bae8dfa63bf56796f2fb56e1679fc0bca \ - --hash=sha256:4647afc2f90d1ddd33441e5b0e85b16b12ddec4fca55f0d9671fef036ecca27c \ - --hash=sha256:4671d9dd5ec934cb9a73e7ee9676f9362aba54f7f34910956b84d727b0d73fb6 \ - --hash=sha256:53f77cbe57044e88bbd5ed26ac1d0514d2acf0591dd6bb02a3ae37f76811b80c \ - --hash=sha256:5eda85d6d1879e692d546a078b44251cdd08dd1cfb98dfb77b670c97cee49ea0 \ - --hash=sha256:5fed36fccc0612a53f1d4d9a816b50a36702c28a2aa880cb8a122b3466638743 \ - --hash=sha256:61d028e90346df14fedc3d1e5441df818d095f3b87d286825dfcbd6459b7ef63 \ - --hash=sha256:66f011380d0e49ed280c789fbd08ff0d40968ee7b665575489afa95c98196ab5 \ - --hash=sha256:6824f87845e3396029f3820c206e459ccc91760e8fa24422f8b0c3d1731cbec5 \ - --hash=sha256:6c6c373cfc5c83a975506110d17457138c8c63016b563cc9ed6e056a82f13ce4 \ - --hash=sha256:6d02d6655b0e54f54c4ef0b94eb6be0607b70853c45ce98bd278dc7de718be5d \ - --hash=sha256:6d50360be4546678fc1b79ffe7a66265e28667840010348dd69a314145807a1b \ - --hash=sha256:730cacb21e1bdff3ce90babf007d0a0917cc3e6492f336c2f0134101e0944f93 \ - --hash=sha256:737fe7d37e1a1bffe70bd5754ea763a62a066dc5913ca57e957824b72a85e205 \ - --hash=sha256:74a03b9698e198d47562765773b4a8309919089150a0bb17d829ad7b44b60d27 \ - --hash=sha256:7553fb2090d71822f02c629afe6042c299edf91ba1bf94951165613553984512 \ - --hash=sha256:7a66c7204d8869299919db4d5069a82f1561581af12b11b3c9f48c584eb8743d \ - --hash=sha256:7cc09976e8b56f8cebd752f7113ad07752461f48a58cbba644139015ac24954c \ - --hash=sha256:81afed14892743bbe14dacb9e36d9e0e504cd204e0b165062c488942b9718037 \ - --hash=sha256:8941aaadaf67246224cee8c3803777eed332a19d909b47e29c9842ef1e79ac26 \ - --hash=sha256:89472c9762729b5ae1ad974b777416bfda4ac5642423fa93bd57a09204712322 \ - --hash=sha256:8ea985900c5c95ce9db1745f7933eeef5d314f0565b27625d9a10ec9881e1bfb \ - --hash=sha256:8eca2a813c1cb7ad4fb74d368c2ffbbb4789d377ee5bb8df98373c2cc0dee76c \ - --hash=sha256:92b68146a71df78564e4ef48af17551a5ddd142e5190cdf2c5624d0c3ff5b2e8 \ - --hash=sha256:9332088d75dc3241c702d852d4671613136d90fa6881da7d770a483fd05248b4 \ - --hash=sha256:94698a9c5f91f9d138526b48fe26a199609544591f859c870d477351dc7b2414 \ - --hash=sha256:9a67fc9e8eb39039280526379fb3a70023d77caec1852002b4da7e8b270c4dd9 \ - --hash=sha256:9de40a7b0323d889cf8d23d1ef214f565ab154443c42737dfe52ff82cf857664 \ - --hash=sha256:a05d0c237b3349096d3981b727493e22147f934b20f6f125a3eba8f994bec4a9 \ - --hash=sha256:afb8db5439b81cf9c9d0c80404b60c3cc9c3add93e114dcae767f1477cb53775 \ - --hash=sha256:b18a3ed7d5b3bd8d9ef7a8cb226502c6bf8308df1525e1cc676c3680e7176739 \ - --hash=sha256:b1e74d11748e7e98e2f426ab176d4ed720a64412b6a15054378afdb71e0f37dc \ - --hash=sha256:b21e08af67b8a103c71a250401c78d5e0893beff75e28c53c98f4de42f774062 \ - --hash=sha256:b4c854ef3adc177950a8dfc81a86f5115d2abd545751a304c5bcf2c2c7283cfe \ - --hash=sha256:b882b3df248017dba09d6b16defe9b5c407fe32fc7c65a9c69798e6175601be9 \ - --hash=sha256:baf5215e0ab74c16e2dd324e8ec067ef59e41125d3eade2b863d294fd5035c92 \ - --hash=sha256:c649e3a33450ec82378822b3dad03cc228b8f5963c0c12fc3b1e0ab940f768a5 \ - --hash=sha256:c654de545946e0db659b3400168c9ad31b5d29593291482c43e3564effbcee13 \ - --hash=sha256:c6638687455baf640e37344fe26d37c404db8b80d037c3d29f58fe8d1c3b194d \ - --hash=sha256:c8d3b5532fc71b7a77c09192b4a5a200ea992702734a2e9279a37f2478236f26 \ - --hash=sha256:cb527a79772e5ef98fb1d700678fe031e353e765d1ca2d409c92263c6d43e09f \ - --hash=sha256:cf364028c016c03078a23b503f02058f1814320a56ad535686f90565636a9495 \ - --hash=sha256:d48a880098c96020b02d5a1f7d9251308510ce8858940e6fa99ece33f610838b \ - --hash=sha256:d68b6cef7827e8641e8ef16f4494edda8b36104d79773a334beaa1e3521430f6 \ - --hash=sha256:d9b29c1f0ae438d5ee9acb31cadee00a58c46cc9c0b2f9038c6b0b3470877a8c \ - --hash=sha256:d9b97165e8aed9272a6bb17c01e3cc5871a594a446ebedc996e2397a1c1ea8ef \ - --hash=sha256:da68248800ad6320861f129cd9c1bf96ca849a2771a59e0344e88681905916f5 \ - --hash=sha256:da902562c3e9c550df360bfa53c035b2f241fed6d9aef119048073680ace4a18 \ - --hash=sha256:dbd5c7a25a7cb98f5ca55d258b103a2054f859a46ae11aaf23134f9cc0d356ad \ - --hash=sha256:dd4f05f54a52fb558f1ba9f528228066954fee3ebe629fc1660d874d040ae5a3 \ - --hash=sha256:de8dad4425a6ca6e4e5e297b27b5c824ecc7581910bf9aee86cb6835e6812aa7 \ - --hash=sha256:e11e82b744887154b182fd3e7e8512418446501191994dbf9c9fc1f32cc8efd5 \ - --hash=sha256:e6e73b9e02893c764e7e8d5bb5ce277f1a009cd5243f8228f75f842bf937c534 \ - --hash=sha256:f73b96c41e3b2adedc34a7356e64c8eb96e03a3782b535e043a986276ce12a49 \ - --hash=sha256:f93fd8e5c8c0a4aa1f424d6173f14a892044054871c771f8566e4008eaa359d2 \ - --hash=sha256:fc33c5141b55ed366cfaad382df24fe7dcbc686de5be719b207bb248e3053dc5 \ - --hash=sha256:fc7de24befaeae77ba923797c7c87834c73648a05a4bde34b3b7e5588973a453 \ - --hash=sha256:fe562eb1a64e67dd297ccc4f5addea2501664954f2692b69a76449ec7913ecbf - # via cryptography click==8.4.1 \ --hash=sha256:482be17c6991b8c19c5429a1e995d9b0efdbb63172824c41f99965dc0ade8ec2 \ --hash=sha256:918b5633eddf6b41c32d4f454bf0de810065c74e3f7dbf8ee5452f8be88d3e96 # via # huggingface-hub # typer -cryptography==48.0.0 \ - --hash=sha256:0890f502ddf7d9c6426129c3f49f5c0a39278ed7cd6322c8755ffca6ee675a13 \ - --hash=sha256:0c558d2cdffd8f4bbb30fc7134c74d2ca9a476f830bb053074498fbc86f41ed6 \ - --hash=sha256:16cd65b9330583e4619939b3a3843eec1e6e789744bb01e7c7e2e62e33c239c8 \ - --hash=sha256:18349bbc56f4743c8b12dc32e2bccb2cf83ee8b69a3bba74ef8ae857e26b3d25 \ - --hash=sha256:1e2d54c8be6152856a36f0882ab231e70f8ec7f14e93cf87db8a2ed056bf160c \ - --hash=sha256:22a5cb272895dce158b2cacdfdc3debd299019659f42947dbdac6f32d68fe832 \ - --hash=sha256:27241b1dc9962e056062a8eef1991d02c3a24569c95975bd2322a8a52c6e5e12 \ - --hash=sha256:2b4d59804e8408e2fea7d1fbaf218e5ec984325221db76e6a241a9abd6cdd95c \ - --hash=sha256:2eb992bbd4661238c5a397594c83f5b4dc2bc5b848c365c8f991b6780efcc5c7 \ - --hash=sha256:369a6348999f94bbd53435c894377b20ab95f25a9065c283570e70150d8abc3c \ - --hash=sha256:3cb07a3ed6431663cd321ea8a000a1314c74211f823e4177fefa2255e057d1ec \ - --hash=sha256:40ba1f85eaa6959837b1d51c9767e230e14612eea4ef110ee8854ada22da1bf5 \ - --hash=sha256:4defde8685ae324a9eb9d818717e93b4638ef67070ac9bc15b8ca85f63048355 \ - --hash=sha256:55b7718303bf06a5753dcdccf2f3945cf18ad7bffde41b61226e4db31ab89a9c \ - --hash=sha256:561215ea3879cb1cbbf272867e2efda62476f240fb58c64de6b393ae19246741 \ - --hash=sha256:58d00498e8933e4a194f3076aee1b4a97dfec1a6da444535755822fe5d8b0b86 \ - --hash=sha256:59baa2cb386c4f0b9905bd6eb4c2a79a69a128408fd31d32ca4d7102d4156321 \ - --hash=sha256:5a5ed8fde7a1d09376ca0b40e68cd59c69fe23b1f9768bd5824f54681626032a \ - --hash=sha256:5b012212e08b8dd5edc78ef54da83dd9892fd9105323b3993eff6bea65dc21d7 \ - --hash=sha256:5c3932f4436d1cccb036cb0eaef46e6e2db91035166f1ad6505c3c9d5a635920 \ - --hash=sha256:614d0949f4790582d2cc25553abd09dd723025f0c0e7c67376a1d77196743d6e \ - --hash=sha256:76341972e1eff8b4bea859f09c0d3e64b96ce931b084f9b9b7db8ef364c30eff \ - --hash=sha256:77a2ccbbe917f6710e05ba9adaa25fb5075620bf3ea6fb751997875aff4ae4bd \ - --hash=sha256:7995ef305d7165c3f11ae07f2517e5a4f1d5c18da1376a0a9ed496336b69e5f3 \ - --hash=sha256:7ce4bfae76319a532a2dc68f82cc32f5676ee792a983187dac07183690e5c66f \ - --hash=sha256:7e8eac43dfca5c4cccc6dad9a80504436fca53bb9bc3100a2386d730fbe6b602 \ - --hash=sha256:84cf79f0dc8b36ac5da873481716e87aef31fcfa0444f9e1d8b4b2cece142855 \ - --hash=sha256:8c7378637d7d88016fa6791c159f698b3d3eed28ebf844ac36b9dc04a14dae18 \ - --hash=sha256:8cd666227ef7af430aa5914a9910e0ddd703e75f039cef0825cd0da71b6b711a \ - --hash=sha256:906cbf0670286c6e0044156bc7d4af9cbb0ef6db9f73e52c3ec56ba6bdde5336 \ - --hash=sha256:9071196d81abc88b3516ac8cdfad32e2b66dd4a5393a8e68a961e9161ddc6239 \ - --hash=sha256:9249e3cd978541d665967ac2cb2787fd6a62bddf1e75b3e347a594d7dacf4f74 \ - --hash=sha256:984a20b0f62a26f48a3396c72e4bc34c66e356d356bf370053066b3b6d54634a \ - --hash=sha256:9be5aafa5736574f8f15f262adc81b2a9869e2cfe9014d52a44633905b40d52c \ - --hash=sha256:9c459db21422be75e2809370b829a87eb37f74cd785fc4aa9ea1e5f43b47cda4 \ - --hash=sha256:9ccdac7d40688ecb5a3b4a604b8a88c8002e3442d6c60aead1db2a89a041560c \ - --hash=sha256:a0e692c683f4df67815a2d258b324e66f4738bd7a96a218c826dce4f4bd05d8f \ - --hash=sha256:a5da777e32ffed6f85a7b2b3f7c5cbc88c146bfcd0a1d7baf5fcc6c52ee35dd4 \ - --hash=sha256:a64697c641c7b1b2178e573cbc31c7c6684cd56883a478d75143dbb7118036db \ - --hash=sha256:ad64688338ed4bc1a6618076ba75fd7194a5f1797ac60b47afe926285adb3166 \ - --hash=sha256:bd72e68b06bb1e96913f97dd4901119bc17f39d4586a5adf2d3e47bc2b9d58b5 \ - --hash=sha256:c17dfe85494deaeddc5ce251aebd1d60bbe6afc8b62071bb0b469431a000124f \ - --hash=sha256:c18684a7f0cc9a3cb60328f496b8e3372def7c5d2df39ac267878b05565aaaae \ - --hash=sha256:cc90c0b39b2e3c65ef52c804b72e3c58f8a04ab2a1871272798e5f9572c17d20 \ - --hash=sha256:db63bf618e5dea46c07de12e900fe1cdd2541e6dc9dbae772a70b7d4d4765f6a \ - --hash=sha256:ea8990436d914540a40ab24b6a77c0969695ed52f4a4874c5137ccf7045a7057 \ - --hash=sha256:ecde28a596bead48b0cfd2a1b4416c3d43074c2d785e3a398d7ec1fc4d0f7fbb \ - --hash=sha256:f5333311663ea94f75dd408665686aaf426563556bb5283554a3539177e03b8c \ - --hash=sha256:fdfef35d751d510fcef5252703621574364fec16418c4a1e5e1055248401054b - # via -r requirements.in filelock==3.29.1 \ --hash=sha256:85199dfd706869641b72b2e8955d5416a4b2b7dc4b0e8e6d97b4cc1299a6983b \ --hash=sha256:d97e6b1b9757569626c58caa07dc4beb1613f4a2938b1e8cc81afca398906c9e @@ -230,10 +93,6 @@ packaging==26.2 \ --hash=sha256:5fc45236b9446107ff2415ce77c807cee2862cb6fac22b8a73826d0693b0980e \ --hash=sha256:ff452ff5a3e828ce110190feff1178bb1f2ea2281fa2075aadb987c2fb221661 # via huggingface-hub -pycparser==3.0 \ - --hash=sha256:600f49d217304a5902ac3c37e1281c9fe94e4d0489de643a9504c5cdfdfc6b29 \ - --hash=sha256:b727414169a36b7d524c1c3e31839a521725078d7b2ff038656844266160a992 - # via cffi pygments==2.20.0 \ --hash=sha256:6757cd03768053ff99f3039c1a36d6c0aa0b263438fcab17520b30a303a82b5f \ --hash=sha256:81a9e26dd42fd28a23a2d169d86d7ac03b46e2f8b59ed4698fb4785f946d0176 @@ -321,9 +180,9 @@ shellingham==1.5.4 \ --hash=sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686 \ --hash=sha256:8dbca0739d487e5bd35ab3ca4b36e11c4078f3a234bfce294b0a0291363404de # via typer -tqdm==4.68.1 \ - --hash=sha256:fc163d96b287bd031e1aa24421ce4411b25559bd0a1be4fe649bdaa4d2c02bf5 \ - --hash=sha256:fea4a90e4023f764914569f7802a297277c5ab1a66be5144143e142e1a4031d8 +tqdm==4.68.2 \ + --hash=sha256:89c230e8dbc67c7615c142487111222f878c77427ea09549960f62389e258add \ + --hash=sha256:d4240441fb5353290b87d6a85968c9decc131a99b8c7faa28269d829de669ede # via huggingface-hub typer==0.25.1 \ --hash=sha256:75caa44ed46a03fb2dab8808753ffacdbfea88495e74c85a28c5eefcf5f39c89 \ diff --git a/scripts/emwp-e2e.sh b/scripts/emwp-e2e.sh deleted file mode 100755 index b6220f8..0000000 --- a/scripts/emwp-e2e.sh +++ /dev/null @@ -1,100 +0,0 @@ -#!/usr/bin/env bash -# End-to-end EMWP regression harness. -# -# Builds modelwrap, packs a tiny local model directory as EMWP, exposes the -# encrypted payload partition through a synthetic PARTUUID path, then runs the -# compiled cvmimage boot mount path against it. -# -# Requires an x86 Linux host with Docker and the dm_verity kernel module -# available. -set -euo pipefail - -ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" -MODELWRAP_DIR="${ROOT}/modelwrap" -CVMIMAGE_DIR="${ROOT}/cvmimage" -IMAGE="${IMAGE:-tinfoil-modelwrap:emwp-test}" -GO_IMAGE="${GO_IMAGE:-golang:1.25-trixie}" -MODEL="${MODEL:-}" -if [[ -z "${GOARCH:-}" ]]; then - case "$(uname -m)" in - x86_64) GOARCH=amd64 ;; - arm64|aarch64) GOARCH=arm64 ;; - *) echo "unsupported host architecture: $(uname -m); set GOARCH explicitly" >&2; exit 1 ;; - esac -fi -WORK_DIR="$(mktemp -d "${TMPDIR:-/tmp}/modelwrap-emwp-e2e.XXXXXX")" -KEY_B64="$(python3 - <<'PY' -import base64 -print(base64.b64encode(b"k" * 64).decode()) -PY -)" - -cleanup() { - rm -rf "${WORK_DIR}" 2>/dev/null || sudo rm -rf "${WORK_DIR}" 2>/dev/null || true -} -trap cleanup EXIT - -if [[ "$(uname -s)" == "Linux" ]] && command -v sudo >/dev/null 2>&1; then - sudo modprobe dm_verity 2>/dev/null || true -fi - -docker build -t "${IMAGE}" "${MODELWRAP_DIR}" -docker run --rm \ - -v "${CVMIMAGE_DIR}:/src/cvmimage:ro" \ - -v "${WORK_DIR}:/work" \ - -e GOARCH="${GOARCH}" \ - "${GO_IMAGE}" \ - bash -lc 'cd /src/cvmimage/tinfoil && GOOS=linux /usr/local/go/bin/go test -tags=integration -c ./cmd/boot -o /work/boot.test' - -mkdir -p "${WORK_DIR}/cache" -mkdir -p "${WORK_DIR}/model" -mkdir -p "${WORK_DIR}/output" -printf 'hello emwp\n' > "${WORK_DIR}/model/model.txt" -printf '%s\n' "${KEY_B64}" > "${WORK_DIR}/emwp-key" - -pack_args=(/app/pack.py --model-dir /model --encrypt --key-file /run/emwp-key --verify) -if [[ -n "${MODEL}" ]]; then - pack_args=(/app/pack.py "${MODEL}" --model-dir /model --encrypt --key-file /run/emwp-key --verify) -fi - -docker run --rm --privileged \ - -v "${WORK_DIR}/cache:/cache" \ - -v "${WORK_DIR}/model:/model:ro" \ - -v "${WORK_DIR}/emwp-key:/run/emwp-key:ro" \ - -v "${WORK_DIR}/output:/output" \ - --entrypoint python3 \ - "${IMAGE}" \ - "${pack_args[@]}" - -INFO_FILE="$(find "${WORK_DIR}/output" -name '*.emwp.info' -print -quit)" -EMWP_FILE="${INFO_FILE%.info}" -test -f "${EMWP_FILE}" -test -f "${INFO_FILE}" -REF="$(cat "${INFO_FILE}")" -PARTUUID="${REF##*_}" - -docker run --rm --privileged \ - -v "${WORK_DIR}/boot.test:/tmp/boot.test:ro" \ - -v "${EMWP_FILE}:/tmp/model.emwp:ro" \ - -e TINFOIL_EMWP_INTEGRATION=1 \ - -e TINFOIL_EMWP_REF="${REF}" \ - -e TINFOIL_EMWP_KEY_B64="${KEY_B64}" \ - --entrypoint bash \ - "${IMAGE}" \ - -lc ' - set -euo pipefail - if ! dmsetup targets | awk "{print \$1}" | grep -qx verity; then - echo "dm-verity device-mapper target is unavailable in this kernel; run this e2e on a Linux host with dm_verity loaded." >&2 - exit 2 - fi - - mkdir -p /mnt/ramdisk/private /mnt/ramdisk/public /dev/disk/by-partuuid - LOOP="$(losetup --read-only --find --show /tmp/model.emwp)" - PART_SECTORS="$(( $(blockdev --getsz "${LOOP}") - 2048 - 40 ))" - dmsetup create emwp-e2e-part --table "0 ${PART_SECTORS} linear ${LOOP} 2048" - dmsetup mknodes - trap "umount /mnt/ramdisk/public/mwp/* 2>/dev/null || true; veritysetup close mwp-* 2>/dev/null || true; cryptsetup close emwp-*-crypt 2>/dev/null || true; dmsetup remove emwp-e2e-part 2>/dev/null || true; losetup -d ${LOOP} 2>/dev/null || true" EXIT - ln -sf /dev/mapper/emwp-e2e-part "/dev/disk/by-partuuid/'"${PARTUUID}"'" - - /tmp/boot.test -test.run TestMountEncryptedModelPackIntegration -test.v - ' diff --git a/test/e2e.sh b/test/e2e.sh new file mode 100755 index 0000000..cacabdb --- /dev/null +++ b/test/e2e.sh @@ -0,0 +1,60 @@ +#!/usr/bin/env bash +# Modelwrap end-to-end test, self-contained to this repository. +# +# Builds the modelwrap image, runs the module's pack->unwrap round-trip +# integration test inside it, then smoke-tests the CLI entrypoint by +# packing and verifying a tiny public model as EMWP. +# +# Requires an x86 Linux host with Docker, network access, and the +# dm_verity kernel module available. The consumer-side test for cvmimage +# lives in the cvmimage repository (emwp-e2e.sh). +set -euo pipefail + +ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +IMAGE="${IMAGE:-tinfoil-modelwrap:e2e}" +GO_IMAGE="${GO_IMAGE:-golang:1.25-trixie}" +MODEL="${MODEL:-hf-internal-testing/tiny-random-GPT2Model@d6694b0d8fe17978761c9305dc151780506b192e}" +WORK_DIR="$(mktemp -d "${TMPDIR:-/tmp}/modelwrap-e2e.XXXXXX")" +KEY_B64="$(head -c 64 /dev/zero | tr '\0' 'k' | base64 | tr -d '\n')" + +cleanup() { + rm -rf "${WORK_DIR}" 2>/dev/null || sudo rm -rf "${WORK_DIR}" 2>/dev/null || true +} +trap cleanup EXIT + +if [[ "$(uname -s)" == "Linux" ]] && command -v sudo >/dev/null 2>&1; then + sudo modprobe dm_verity 2>/dev/null || true +fi + +docker build -t "${IMAGE}" "${ROOT}" + +docker run --rm \ + -v "${ROOT}:/src:ro" \ + -v "${WORK_DIR}:/work" \ + -e GOFLAGS=-buildvcs=false \ + "${GO_IMAGE}" \ + bash -c 'export PATH="${PATH}:/usr/local/go/bin" && cd /src && GOOS=linux go test -tags=integration -c . -o /work/modelwrap.test' + +# Protocol round trip: pack with wrap, consume with unwrap. +docker run --rm --privileged \ + -v "${WORK_DIR}/modelwrap.test:/tmp/modelwrap.test:ro" \ + -e TINFOIL_MODELWRAP_INTEGRATION=1 \ + --entrypoint /tmp/modelwrap.test \ + "${IMAGE}" \ + -test.run TestEMWPRoundTripIntegration -test.v + +# CLI smoke test: the user-facing entrypoint with volumes and key file. +mkdir -p "${WORK_DIR}/cache" "${WORK_DIR}/output" +printf '%s\n' "${KEY_B64}" > "${WORK_DIR}/emwp-key" + +docker run --rm --privileged \ + -v "${WORK_DIR}/cache:/cache" \ + -v "${WORK_DIR}/emwp-key:/run/emwp-key:ro" \ + -v "${WORK_DIR}/output:/output" \ + "${IMAGE}" \ + --encrypt --key-file /run/emwp-key --verify "${MODEL}" + +INFO_FILE="$(find "${WORK_DIR}/output" -name '*.emwp.info' -print -quit)" +test -f "${INFO_FILE}" +test -f "${INFO_FILE%.info}" +echo "modelwrap e2e OK: $(cat "${INFO_FILE}")" diff --git a/unwrap/unwrap.go b/unwrap/unwrap.go new file mode 100644 index 0000000..36ecc45 --- /dev/null +++ b/unwrap/unwrap.go @@ -0,0 +1,97 @@ +// Package unwrap implements the consumer side of the Modelwrap protocol: +// opening dm-crypt and dm-verity mappings over MWP/EMWP artifacts and +// mounting the verified filesystem read-only. +// +// It shells out to cryptsetup, veritysetup, and mount, and is intended to +// run in environments that ship those tools (e.g. the cvmimage initramfs). +package unwrap + +import ( + "fmt" + "os" + "os/exec" + "strconv" + + "github.com/tinfoilsh/modelwrap" +) + +// Filesystem and mount hardening parameters for model pack mounts. +const ( + FilesystemType = "erofs" + MountOptions = "ro,nodev,nosuid,noexec" +) + +// OpenCrypt opens a read-only dm-crypt plain mapping over an EMWP payload +// device using the format's cipher parameters. The key file must contain +// the raw per-artifact key derived with modelwrap.DeriveKey. +func OpenCrypt(device, name, keyFile string) error { + cmd := exec.Command( + "cryptsetup", "open", + "--type", "plain", + "--cipher", modelwrap.EMWPCipher, + "--key-size", strconv.Itoa(modelwrap.EMWPKeySizeBits), + "--sector-size", strconv.Itoa(modelwrap.EMWPSectorSize), + "--key-file", keyFile, + "--readonly", + device, + name, + ) + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + if err := cmd.Run(); err != nil { + return fmt.Errorf("cryptsetup open: %w", err) + } + return nil +} + +// CloseCrypt tears down a dm-crypt mapping, ignoring errors. +func CloseCrypt(name string) { + _ = exec.Command("cryptsetup", "close", name).Run() +} + +// OpenVerity opens a dm-verity mapping over a device that contains a +// filesystem followed by its hash tree at hashOffset. The remaining +// verity parameters come from the superblock, which is authenticated by +// the root hash. +func OpenVerity(device, name, rootHash, hashOffset string) error { + cmd := exec.Command( + "veritysetup", "open", + device, + name, + device, + rootHash, + "--hash-offset="+hashOffset, + ) + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + if err := cmd.Run(); err != nil { + return fmt.Errorf("veritysetup open: %w", err) + } + return nil +} + +// CloseVerity tears down a dm-verity mapping, ignoring errors. +func CloseVerity(name string) { + _ = exec.Command("veritysetup", "close", name).Run() +} + +// Mount mounts a verified model pack device read-only with hardened +// options at mountPoint, creating the mount point if needed. +func Mount(device, mountPoint string) error { + if err := os.MkdirAll(mountPoint, 0755); err != nil { + return fmt.Errorf("creating mount point: %w", err) + } + cmd := exec.Command( + "mount", + "-t", FilesystemType, + "-o", MountOptions, + device, + mountPoint, + ) + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + if err := cmd.Run(); err != nil { + return fmt.Errorf("mounting verity device: %w", err) + } + return nil +} diff --git a/wrap/wrap.go b/wrap/wrap.go new file mode 100644 index 0000000..b1db99d --- /dev/null +++ b/wrap/wrap.go @@ -0,0 +1,539 @@ +// Package wrap implements the Modelwrap packer: it builds deterministic +// EROFS images of model directories, wraps them with dm-verity (MWP), and +// optionally encrypts them into a GPT disk image with dm-crypt (EMWP). +// +// It shells out to mkfs.erofs, veritysetup, cryptsetup, and sgdisk, and is +// only intended to run inside the modelwrap container on Linux. +package wrap + +import ( + "crypto/sha256" + "encoding/hex" + "encoding/json" + "fmt" + "io" + "net/http" + "net/url" + "os" + "os/exec" + "path/filepath" + "strconv" + "strings" + + "github.com/tinfoilsh/modelwrap" +) + +// Options configures a single packing run. Model is a Hugging Face model +// ID, preferably name@revision. ModelDir packs a local directory instead +// of downloading; if Model has no revision, the directory content hash is +// used as the revision. +type Options struct { + Model string + ModelDir string + CacheDir string + OutputDir string + Encrypt bool + Verify bool + KeyFile string + HFToken string +} + +// Pack runs the full packing flow and returns the final artifact +// reference string (EMWP if encryption was requested, MWP otherwise). +func Pack(opts Options) (string, error) { + if opts.CacheDir == "" { + opts.CacheDir = "cache" + } + if opts.OutputDir == "" { + opts.OutputDir = "output" + } + for _, dir := range []string{opts.CacheDir, opts.OutputDir} { + if err := os.MkdirAll(dir, 0755); err != nil { + return "", err + } + } + + model, modelDir, err := resolveModel(opts) + if err != nil { + return "", err + } + modelName, modelCommit, _ := strings.Cut(model, "@") + + if opts.ModelDir != "" { + fmt.Printf("Using local model directory %s as %s\n", modelDir, model) + } else { + fmt.Printf("Downloading %s to %s\n", model, modelDir) + if err := downloadModel(modelName, modelCommit, modelDir, opts.HFToken); err != nil { + return "", err + } + // Remove the download cache for reproducibility. + if err := os.RemoveAll(filepath.Join(modelDir, ".cache")); err != nil { + return "", err + } + } + + outputModelDir := filepath.Join(opts.OutputDir, modelName) + if err := os.MkdirAll(outputModelDir, 0755); err != nil { + return "", err + } + + mwpFile := filepath.Join(outputModelDir, modelCommit+".mpk") + infoFile := filepath.Join(outputModelDir, modelCommit+".info") + + if err := makeEROFS(model, modelDir, mwpFile); err != nil { + return "", err + } + if err := formatVerity(model, mwpFile, infoFile); err != nil { + return "", err + } + + if opts.Verify { + if err := VerifyMWP(mwpFile, infoFile); err != nil { + return "", err + } + } else { + fmt.Println("Skipping dm-verity verification. Pass --verify or set VERIFY=1 to verify cached artifacts.") + } + + ref, err := parseInfoFile(infoFile) + if err != nil { + return "", err + } + + if !opts.Encrypt { + return ref.String(), nil + } + + masterKey, err := loadMasterKey(opts.KeyFile) + if err != nil { + return "", err + } + + emwpRef := &modelwrap.ArtifactRef{ + RootHash: ref.RootHash, + HashOffset: ref.HashOffset, + UUID: modelwrap.UUIDv5URL(model + "-emwp-outer"), + } + emwpFile := filepath.Join(outputModelDir, modelCommit+".emwp") + emwpInfoFile := filepath.Join(outputModelDir, modelCommit+".emwp.info") + + if _, err := os.Stat(emwpFile); os.IsNotExist(err) { + if err := encryptEMWP(mwpFile, emwpFile, emwpRef, masterKey); err != nil { + return "", err + } + } else if err != nil { + return "", err + } else { + fmt.Printf("Using existing EMWP artifact: %s\n", emwpFile) + } + + if err := os.WriteFile(emwpInfoFile+".tmp", []byte(emwpRef.String()), 0644); err != nil { + return "", err + } + if err := os.Rename(emwpInfoFile+".tmp", emwpInfoFile); err != nil { + return "", err + } + + if opts.Verify { + if err := VerifyEMWP(emwpFile, emwpInfoFile, masterKey); err != nil { + return "", err + } + } + return emwpRef.String(), nil +} + +// resolveModel determines the model@revision identity and the directory +// containing the model files. +func resolveModel(opts Options) (model, modelDir string, err error) { + model = opts.Model + + if opts.ModelDir != "" { + fi, err := os.Stat(opts.ModelDir) + if err != nil || !fi.IsDir() { + return "", "", fmt.Errorf("MODEL_DIR is not a directory: %s", opts.ModelDir) + } + localRevision, err := modelwrap.HashDir(opts.ModelDir) + if err != nil { + return "", "", fmt.Errorf("hashing model directory: %w", err) + } + if model == "" { + abs, err := filepath.Abs(opts.ModelDir) + if err != nil { + return "", "", err + } + name := filepath.Base(abs) + if name == "/" || name == "." { + name = "model" + } + model = name + "@" + localRevision + } else if !strings.Contains(model, "@") { + model = model + "@" + localRevision + } + return model, opts.ModelDir, nil + } + + if model == "" { + return "", "", fmt.Errorf("model argument or MODEL environment variable is required") + } + if !strings.Contains(model, "@") { + sha, err := resolveHFRevision(model, opts.HFToken) + if err != nil { + return "", "", err + } + fmt.Printf("Resolved %s default branch HEAD -> %s\n", model, sha) + model = model + "@" + sha + } + return model, filepath.Join(opts.CacheDir, strings.Replace(model, "@", "/", 1)), nil +} + +// resolveHFRevision resolves the default branch HEAD commit of a Hugging +// Face model via the Hub API. +func resolveHFRevision(model, token string) (string, error) { + // Escape each path segment individually: model IDs contain a "/" + // separator (org/name) that must not be percent-encoded. + segments := strings.Split(model, "/") + for i, segment := range segments { + segments[i] = url.PathEscape(segment) + } + req, err := http.NewRequest("GET", "https://huggingface.co/api/models/"+strings.Join(segments, "/"), nil) + if err != nil { + return "", err + } + if token != "" { + req.Header.Set("Authorization", "Bearer "+token) + } + resp, err := http.DefaultClient.Do(req) + if err != nil { + return "", fmt.Errorf("querying Hugging Face API for %s: %w", model, err) + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + return "", fmt.Errorf("querying Hugging Face API for %s: %s", model, resp.Status) + } + var info struct { + SHA string `json:"sha"` + } + if err := json.NewDecoder(resp.Body).Decode(&info); err != nil { + return "", fmt.Errorf("decoding Hugging Face API response for %s: %w", model, err) + } + if info.SHA == "" { + return "", fmt.Errorf("could not resolve HEAD commit for %s; specify the commit explicitly: %s@", model, model) + } + return info.SHA, nil +} + +// downloadModel fetches a model snapshot using the official `hf` CLI from +// huggingface_hub, which handles auth, resume, and xet-backed transfers. +func downloadModel(name, revision, dir, token string) error { + cmd := exec.Command("hf", "download", name, "--revision", revision, "--local-dir", dir) + if token != "" { + cmd.Env = append(os.Environ(), "HF_TOKEN="+token) + } + return run(cmd) +} + +// makeEROFS builds the deterministic EROFS image if it does not exist. +func makeEROFS(model, modelDir, mwpFile string) error { + if _, err := os.Stat(mwpFile); err == nil { + fmt.Printf("Using existing EROFS image %s\n", mwpFile) + return nil + } else if !os.IsNotExist(err) { + return err + } + + fmt.Printf("Creating EROFS image %s\n", mwpFile) + err := run(exec.Command( + "mkfs.erofs", + "--all-root", + "-T0", // Zero timestamps + "-U"+modelwrap.UUIDv5URL(model+"-inner"), // Static filesystem UUID + mwpFile+".tmp", + modelDir, + )) + if err != nil { + return err + } + return os.Rename(mwpFile+".tmp", mwpFile) +} + +// formatVerity appends the dm-verity hash tree to the EROFS image and +// writes the rootHash_hashOffset_uuid info file, if not already done. +func formatVerity(model, mwpFile, infoFile string) error { + if _, err := os.Stat(infoFile); err == nil { + fmt.Printf("dm-verity volume already exists at %s\n", mwpFile) + return nil + } else if !os.IsNotExist(err) { + return err + } + + fi, err := os.Stat(mwpFile) + if err != nil { + return err + } + offset := (fi.Size() + 4095) / 4096 * 4096 + verityUUID := modelwrap.UUIDv5URL(model + "-inner") + salt := sha256.Sum256([]byte(model)) + + fmt.Printf("Running veritysetup on %s\n", mwpFile) + err = run(exec.Command( + "veritysetup", + fmt.Sprintf("--format=%d", modelwrap.VerityFormat), + "--hash="+modelwrap.VerityHashAlgorithm, + fmt.Sprintf("--data-block-size=%d", modelwrap.VerityDataBlockSize), + fmt.Sprintf("--hash-block-size=%d", modelwrap.VerityHashBlockSize), + "--salt="+hex.EncodeToString(salt[:]), + "--uuid="+verityUUID, + fmt.Sprintf("--hash-offset=%d", offset), + "--root-hash-file="+infoFile, + "format", + mwpFile, // data device + mwpFile, // hash device + )) + if err != nil { + return err + } + + f, err := os.OpenFile(infoFile, os.O_WRONLY|os.O_APPEND, 0644) + if err != nil { + return fmt.Errorf("failed to open dm-verity info file %s: %w", infoFile, err) + } + defer f.Close() + if _, err := fmt.Fprintf(f, "_%d_%s", offset, verityUUID); err != nil { + return err + } + return nil +} + +// encryptEMWP wraps an MWP file into a GPT disk image whose single +// partition is a raw dm-crypt encryption of the MWP content. The disk +// GUID and PARTUUID are deterministic so the artifact is reproducible. +func encryptEMWP(mwpFile, emwpFile string, ref *modelwrap.ArtifactRef, masterKey []byte) error { + fi, err := os.Stat(mwpFile) + if err != nil { + return err + } + encryptedSize := (fi.Size() + modelwrap.EMWPSectorSize - 1) / modelwrap.EMWPSectorSize * modelwrap.EMWPSectorSize + sectors := encryptedSize / modelwrap.GPTSectorSize + endSector := modelwrap.EMWPPartitionStartSector + sectors - 1 + totalSectors := endSector + 1 + modelwrap.EMWPGPTTrailingSectors + diskUUID := modelwrap.UUIDv5URL(ref.RootHash + "-emwp-disk") + tmpFile := emwpFile + ".tmp" + dmKeyFile := emwpFile + ".key.tmp" + mapperName := "modelwrap-emwp-" + ref.RootHash[:16] + + dmKey, err := modelwrap.DeriveKey(masterKey, ref) + if err != nil { + return err + } + + for _, path := range []string{tmpFile, dmKeyFile} { + if err := os.Remove(path); err != nil && !os.IsNotExist(err) { + return err + } + } + defer func() { + closeCryptMapper(mapperName) + os.Remove(dmKeyFile) + os.Remove(tmpFile) + }() + + fmt.Printf("Creating EMWP GPT image %s\n", emwpFile) + if err := createSparseFile(tmpFile, totalSectors*modelwrap.GPTSectorSize); err != nil { + return err + } + err = run(exec.Command( + "sgdisk", + "--clear", + "--disk-guid="+diskUUID, + fmt.Sprintf("--new=1:%d:%d", modelwrap.EMWPPartitionStartSector, endSector), + "--typecode=1:8300", + "--partition-guid=1:"+ref.UUID, + "--change-name=1:emwp", + tmpFile, + )) + if err != nil { + return err + } + + if err := os.WriteFile(dmKeyFile, dmKey, 0600); err != nil { + return err + } + err = run(exec.Command( + "cryptsetup", "open", + "--type", "plain", + "--cipher", modelwrap.EMWPCipher, + "--key-size", strconv.Itoa(modelwrap.EMWPKeySizeBits), + "--sector-size", strconv.Itoa(modelwrap.EMWPSectorSize), + "--key-file", dmKeyFile, + "--offset", strconv.Itoa(modelwrap.EMWPPartitionStartSector), + "--skip", "0", + "--size", strconv.FormatInt(sectors, 10), + tmpFile, + mapperName, + )) + if err != nil { + return err + } + + if err := copyToDevice(mwpFile, "/dev/mapper/"+mapperName); err != nil { + return err + } + if err := closeCryptMapper(mapperName); err != nil { + return err + } + return os.Rename(tmpFile, emwpFile) +} + +// VerifyMWP runs an offline dm-verity verification of an MWP artifact +// against its info file. +func VerifyMWP(mwpFile, infoFile string) error { + ref, err := parseInfoFile(infoFile) + if err != nil { + return err + } + if _, err := os.Stat(mwpFile); err != nil { + return fmt.Errorf("MWP artifact not found: %s", mwpFile) + } + + fmt.Printf("Verifying dm-verity artifact %s\n", mwpFile) + err = run(exec.Command( + "veritysetup", + fmt.Sprintf("--format=%d", modelwrap.VerityFormat), + "--hash="+modelwrap.VerityHashAlgorithm, + fmt.Sprintf("--data-block-size=%d", modelwrap.VerityDataBlockSize), + fmt.Sprintf("--hash-block-size=%d", modelwrap.VerityHashBlockSize), + "--hash-offset="+ref.HashOffset, + "verify", + mwpFile, // data device + mwpFile, // hash device + ref.RootHash, + )) + if err != nil { + return err + } + fmt.Println("Verification OK.") + return nil +} + +// VerifyEMWP decrypts an EMWP artifact through a temporary dm-crypt +// mapping and verifies the inner dm-verity tree. +func VerifyEMWP(emwpFile, infoFile string, masterKey []byte) error { + ref, err := parseInfoFile(infoFile) + if err != nil { + return err + } + fi, err := os.Stat(emwpFile) + if err != nil { + return fmt.Errorf("EMWP artifact not found: %s", emwpFile) + } + + sectors := fi.Size()/modelwrap.GPTSectorSize - modelwrap.EMWPPartitionStartSector - modelwrap.EMWPGPTTrailingSectors + dmKeyFile := emwpFile + ".key.tmp" + mapperName := "modelwrap-emwp-verify-" + ref.RootHash[:16] + dmKey, err := modelwrap.DeriveKey(masterKey, ref) + if err != nil { + return err + } + + defer func() { + closeCryptMapper(mapperName) + os.Remove(dmKeyFile) + }() + if err := os.WriteFile(dmKeyFile, dmKey, 0600); err != nil { + return err + } + err = run(exec.Command( + "cryptsetup", "open", + "--type", "plain", + "--cipher", modelwrap.EMWPCipher, + "--key-size", strconv.Itoa(modelwrap.EMWPKeySizeBits), + "--sector-size", strconv.Itoa(modelwrap.EMWPSectorSize), + "--key-file", dmKeyFile, + "--offset", strconv.Itoa(modelwrap.EMWPPartitionStartSector), + "--skip", "0", + "--size", strconv.FormatInt(sectors, 10), + emwpFile, + mapperName, + )) + if err != nil { + return err + } + return VerifyMWP("/dev/mapper/"+mapperName, infoFile) +} + +// LoadMasterKey loads the EMWP master key from keyFile if set, else from +// the PRIVATE_MODEL_KEY_FILE or PRIVATE_MODEL_KEY_B64 environment. +func loadMasterKey(keyFile string) ([]byte, error) { + if keyFile == "" { + keyFile = os.Getenv("PRIVATE_MODEL_KEY_FILE") + } + var encoded string + if keyFile != "" { + data, err := os.ReadFile(keyFile) + if err != nil { + return nil, fmt.Errorf("reading EMWP master key file: %w", err) + } + encoded = string(data) + } else { + encoded = os.Getenv("PRIVATE_MODEL_KEY_B64") + } + if strings.TrimSpace(encoded) == "" { + return nil, fmt.Errorf("--key-file, PRIVATE_MODEL_KEY_FILE, or PRIVATE_MODEL_KEY_B64 is required for --encrypt") + } + return modelwrap.ParseMasterKey(encoded) +} + +func parseInfoFile(infoFile string) (*modelwrap.ArtifactRef, error) { + data, err := os.ReadFile(infoFile) + if err != nil { + return nil, err + } + ref, err := modelwrap.ParseRef(strings.TrimSpace(string(data))) + if err != nil { + return nil, fmt.Errorf("invalid info file %s: %w", infoFile, err) + } + return ref, nil +} + +func createSparseFile(path string, size int64) error { + f, err := os.OpenFile(path, os.O_CREATE|os.O_WRONLY|os.O_EXCL, 0644) + if err != nil { + return err + } + defer f.Close() + return f.Truncate(size) +} + +// copyToDevice writes src into the block device dst and syncs it. +func copyToDevice(src, dst string) error { + in, err := os.Open(src) + if err != nil { + return err + } + defer in.Close() + out, err := os.OpenFile(dst, os.O_WRONLY, 0) + if err != nil { + return err + } + defer out.Close() + if _, err := io.CopyBuffer(out, in, make([]byte, 4*1024*1024)); err != nil { + return fmt.Errorf("writing %s to %s: %w", src, dst, err) + } + return out.Sync() +} + +func closeCryptMapper(name string) error { + if _, err := os.Stat("/dev/mapper/" + name); os.IsNotExist(err) { + return nil + } + return run(exec.Command("cryptsetup", "close", name)) +} + +func run(cmd *exec.Cmd) error { + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + if err := cmd.Run(); err != nil { + return fmt.Errorf("%s: %w", cmd.Args[0], err) + } + return nil +}