Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
name: CI

on:
push:
branches:
- main
pull_request:

jobs:
test:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4.3.1

# Run checks in the same digest-pinned Go image the release builds with.
- name: Format, vet, and unit tests
run: |
set -euo pipefail
GO_IMAGE="$(sed -n 's/^FROM \(golang:[^ ]*\) AS build$/\1/p' Dockerfile)"
docker run --rm -v "$PWD:/src" -w /src -e GOFLAGS=-buildvcs=false \
"${GO_IMAGE}" \
sh -c 'test -z "$(gofmt -l .)" && go vet ./... && go vet -tags=integration ./... && go test ./...'

e2e:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4.3.1

# Builds the packer image, runs the pack->unwrap round-trip
# integration test inside it, then smoke-tests the CLI entrypoint.
- name: End-to-end test
run: bash test/e2e.sh
30 changes: 28 additions & 2 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ jobs:
build-and-push:
runs-on: ubuntu-latest
permissions:
contents: read
contents: write
packages: write

steps:
Expand Down Expand Up @@ -43,12 +43,38 @@ jobs:
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3.12.0

- name: Build and push Docker image
id: push
uses: docker/build-push-action@ca052bb54ab0790a636c9b5f226502c73d547a25 # v5.4.0
with:
context: .
platforms: linux/amd64,linux/arm64
# The packer pins x86-only tool versions (e.g. gdisk).
platforms: linux/amd64
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
cache-from: type=gha
cache-to: type=gha,mode=max

# Build the launcher CLI (linux/amd64 only, like everything else)
# with the just-pushed image digest baked in, so installed binaries
# always run the matching pinned packer image.
- name: Build CLI binary
run: |
set -euo pipefail
IMAGE_REF="${REGISTRY}/${IMAGE_NAME}@${{ steps.push.outputs.digest }}"
GO_IMAGE="$(sed -n 's/^FROM \(golang:[^ ]*\) AS build$/\1/p' Dockerfile)"
mkdir -p dist
docker run --rm -v "$PWD:/src" -w /src \
-e CGO_ENABLED=0 -e GOOS=linux -e GOARCH=amd64 -e GOFLAGS=-buildvcs=false \
"${GO_IMAGE}" \
go build -trimpath -ldflags "-buildid= -X main.defaultImage=${IMAGE_REF}" \
-o dist/modelwrap ./cmd/modelwrap
(cd dist && sha256sum modelwrap > SHA256SUMS)

- name: Create GitHub release
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
gh release create "${GITHUB_REF_NAME}" dist/modelwrap dist/SHA256SUMS \
--title "${GITHUB_REF_NAME}" \
--notes "Packer image: \`${REGISTRY}/${IMAGE_NAME}@${{ steps.push.outputs.digest }}\`"
18 changes: 16 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,3 +1,13 @@
FROM golang:1.25-trixie@sha256:3140b898a3ec52ec5e8a7dc325a3dbdc732c35e0bde3fcc0e0d764c781d7da10 AS build

WORKDIR /src
COPY go.mod go.sum ./
RUN go mod download
COPY *.go ./
COPY wrap ./wrap
COPY cmd ./cmd
RUN CGO_ENABLED=0 go build -trimpath -buildvcs=false -ldflags=-buildid= -o /modelwrap ./cmd/modelwrap

FROM python:3.13-slim-trixie@sha256:b04b5d7233d2ad9c379e22ea8927cd1378cd15c60d4ef876c065b25ea8fb3bf3

ARG DEBIAN_SNAPSHOT=20260518T000000Z
Expand All @@ -19,9 +29,13 @@ COPY requirements.txt .

ENV CACHE_DIR="/cache"
ENV OUTPUT_DIR="/output"
# Marks the packing context so the CLI runs directly instead of
# re-launching itself in a container.
ENV MODELWRAP_IN_CONTAINER=1

# huggingface_hub provides the `hf` CLI used for model downloads.
RUN pip install --no-cache-dir --require-hashes -r requirements.txt

COPY pack.py .
COPY --from=build /modelwrap /usr/local/bin/modelwrap

ENTRYPOINT ["python3", "pack.py"]
ENTRYPOINT ["modelwrap"]
41 changes: 20 additions & 21 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,60 +4,59 @@ Builds reproducible dm-verity EROFS images of Hugging Face models. Learn more ab

For the artifact format, trust assumptions, and EMWP cryptographic parameters, see [SPEC.md](SPEC.md).

This repository is also the Go module `github.com/tinfoilsh/modelwrap`. The root package defines the protocol surface shared by both sides (format constants, artifact reference parsing, EMWP key derivation), `wrap` implements the packer, and `unwrap` implements the consumer side (used by [cvmimage](https://github.com/tinfoilsh/cvmimage) to mount model packs at boot).

## Usage

The `modelwrap` CLI is a launcher: it runs the packing inside a
digest-pinned container image (requires docker), so artifact bytes are
always produced by the pinned toolchain. Release binaries embed the
matching image digest.

```bash
docker run --rm -it \
-v $(pwd)/cache:/cache \
-v $(pwd)/output:/output \
-e HF_TOKEN="${HF_TOKEN}" \
ghcr.io/tinfoilsh/modelwrap@sha256:<digest> \
meta-llama/Llama-3.2-1B@4e20de362430cd3b72f300e6b0f18e50e7166e08
modelwrap mistralai/Ministral-3-3B-Instruct-2512@cfcb068fa7c44114cf77a462357c6cdcd2c304b4
```

To pack and encrypt a local/private model directory:

```bash
docker run --rm -it --privileged \
-v /path/to/model:/model:ro \
-v $(pwd)/output:/output \
-e PRIVATE_MODEL_KEY_B64="${PRIVATE_MODEL_KEY_B64}" \
ghcr.io/tinfoilsh/modelwrap@sha256:<digest> \
--model-dir /model \
--encrypt
PRIVATE_MODEL_KEY_B64="${PRIVATE_MODEL_KEY_B64}" modelwrap --model-dir /path/to/model --encrypt
```

## Arguments

- `model`: Hugging Face model ID, preferably with `@revision`. If omitted with `--model-dir`, modelwrap derives `basename@contentHash`.
- `--model-dir <path>`: pack a local/private model directory instead of downloading from Hugging Face. If `model` is provided without `@revision`, modelwrap uses the directory content hash as the revision.
- `--encrypt`: emit encrypted modelwrap output (`.emwp`). Requires device-mapper and loop device access; `--privileged` is the simplest Docker setup. Also requires `--key-file`, `PRIVATE_MODEL_KEY_FILE`, or `PRIVATE_MODEL_KEY_B64`.
- `--encrypt`: emit encrypted modelwrap output (`.emwp`). Requires a master key via `--key-file` or `PRIVATE_MODEL_KEY_B64`.
- `--key-file <path>`: file containing the base64-encoded 64-byte EMWP master key.
- `--verify`: optional. Runs `veritysetup verify` for MWP and decrypts then verifies EMWP, which is useful for cached artifacts or release checks.
- `--output <path>` / `--cache <path>`: output and download cache directories (default `./output`, `./cache`).
- `--image <ref>`: override the packer container image the launcher runs (defaults to the release-pinned digest; also `MODELWRAP_IMAGE`).
- `--local`: run the packer directly on the current machine instead of in a container. Artifact bytes then depend on locally installed tool versions.

Environment fallbacks are supported for wrapper scripts: `MODEL`, `MODEL_DIR`, `VERIFY=1`, and `ENCRYPTION=1`. Set `HF_TOKEN` when accessing gated or private Hugging Face models. Use a digest-pinned container image, as shown above, when invoking modelwrap in production.
Set `HF_TOKEN` when accessing gated or private Hugging Face models; the launcher passes it into the container without exposing the value on the docker command line.

MWP mode emits:

- `output/meta-llama/Llama-3.2-1B/4e20de362430cd3b72f300e6b0f18e50e7166e08.mpk`: dm-verity EROFS image
- `output/meta-llama/Llama-3.2-1B/4e20de362430cd3b72f300e6b0f18e50e7166e08.info`: metadata file in the format `ROOTHASH_OFFSET_VERITYUUID`
- `output/mistralai/Ministral-3-3B-Instruct-2512/cfcb068fa7c44114cf77a462357c6cdcd2c304b4.mpk`: dm-verity EROFS image
- `output/mistralai/Ministral-3-3B-Instruct-2512/cfcb068fa7c44114cf77a462357c6cdcd2c304b4.info`: metadata file in the format `ROOTHASH_OFFSET_VERITYUUID`

EMWP mode additionally emits:

- `output/meta-llama/Llama-3.2-1B/4e20de362430cd3b72f300e6b0f18e50e7166e08.emwp`: disk image with one encrypted payload partition
- `output/meta-llama/Llama-3.2-1B/4e20de362430cd3b72f300e6b0f18e50e7166e08.emwp.info`: metadata file in the format `ROOTHASH_OFFSET_PARTUUID`
- `output/mistralai/Ministral-3-3B-Instruct-2512/cfcb068fa7c44114cf77a462357c6cdcd2c304b4.emwp`: disk image with one encrypted payload partition
- `output/mistralai/Ministral-3-3B-Instruct-2512/cfcb068fa7c44114cf77a462357c6cdcd2c304b4.emwp.info`: metadata file in the format `ROOTHASH_OFFSET_PARTUUID`

## Supply Chain Pins

The published image is built from a digest-pinned official Python image on Debian Trixie, installs `erofs-utils` and `cryptsetup` from a dated `snapshot.debian.org` archive, and installs Python dependencies from a hash-checked `requirements.txt`.
The published image is built in two digest-pinned stages: a Go builder that compiles the `modelwrap` binary (`CGO_ENABLED=0`, `-trimpath`, hash-locked Go dependencies via `go.sum`), and a runtime image based on the official Python image on Debian Trixie that installs `erofs-utils`, `cryptsetup`, and `gdisk` from a dated `snapshot.debian.org` archive plus a hash-checked `requirements.txt` (only `huggingface_hub`, which provides the `hf` CLI used for downloads).

The packer currently pins:

- `erofs-utils=1.8.6-1`
- `cryptsetup=2:2.7.5-2`
- `gdisk=1.0.10-2`

`pack.py` passes the dm-verity hash algorithm, format, and block sizes explicitly so tool default changes do not silently alter the dm-verity format.
The packer passes the dm-verity hash algorithm, format, and block sizes explicitly so tool default changes do not silently alter the dm-verity format.

To update Python dependencies, edit `requirements.in` and regenerate the lockfile:

Expand Down
4 changes: 4 additions & 0 deletions SPEC.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ The trust assumptions are: artifact bytes are *untrusted*; at runtime, identity

Modelwrap's verifiability is based on *reproducibility*, not attested provenance: artifacts are bit-for-bit reproducible, and the Modelwrap root hash is derived deterministically. An auditor can independently recompute the root hash from the model weights, for example from a pinned Hugging Face revision.

## Reproducibility and Packer Versioning

Artifact bytes, and therefore root hashes, are reproducible with respect to a specific digest-pinned packer image: the EROFS encoder and dm-verity tooling contribute to the output bytes, so a toolchain upgrade (e.g. a new `erofs-utils` version) can produce a different root hash for the same input model. This is not a format change and requires no version field in the artifact: existing artifacts remain valid and mountable because consumers act only on the attested reference, never on tool versions. An auditor recomputing a root hash must use the same packer image digest that produced the artifact.

## MWP Format

An MWP format is:
Expand Down
129 changes: 129 additions & 0 deletions cmd/modelwrap/launcher.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
package main

import (
"errors"
"fmt"
"os"
"os/exec"
"path/filepath"
)

// Fixed container-side paths the launcher rewrites host paths to. /cache
// and /output match the image's CACHE_DIR and OUTPUT_DIR environment.
const (
containerModelDir = "/model"
containerKeyFile = "/run/modelwrap-key"
containerCacheDir = "/cache"
containerOutput = "/output"
)

// Secrets passed through to the container by name only, so values never
// appear in the docker command line.
var passthroughEnv = []string{"HF_TOKEN", "PRIVATE_MODEL_KEY_B64"}

// launch re-executes the CLI inside the packer container image and
// returns the process exit code.
func launch(opts cliOptions) int {
args, err := dockerRunArgs(opts)
if err != nil {
fmt.Fprintln(os.Stderr, "Error:", err)
return 1
}

cmd := exec.Command("docker", args...)
cmd.Stdin = os.Stdin
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
if err := cmd.Run(); err != nil {
var exitErr *exec.ExitError
if errors.As(err, &exitErr) {
return exitErr.ExitCode()
}
fmt.Fprintln(os.Stderr, "Error: running docker:", err)
return 1
}
return 0
}

// dockerRunArgs translates host-side options into a docker run invocation
// of the same CLI inside the packer image.
func dockerRunArgs(opts cliOptions) ([]string, error) {
args := []string{"run", "--rm"}
if opts.Encrypt {
// EMWP packing needs loop device and device-mapper access.
args = append(args, "--privileged")
}

hostDir := func(path, fallback string) (string, error) {
if path == "" {
path = fallback
}
abs, err := filepath.Abs(path)
if err != nil {
return "", err
}
// Pre-create so docker does not create it root-owned.
if err := os.MkdirAll(abs, 0755); err != nil {
return "", err
}
return abs, nil
}

outputDir, err := hostDir(opts.OutputDir, "output")
if err != nil {
return nil, err
}
cacheDir, err := hostDir(opts.CacheDir, "cache")
if err != nil {
return nil, err
}
args = append(args,
"-v", outputDir+":"+containerOutput,
"-v", cacheDir+":"+containerCacheDir,
)

if opts.ModelDir != "" {
abs, err := filepath.Abs(opts.ModelDir)
if err != nil {
return nil, err
}
args = append(args, "-v", abs+":"+containerModelDir+":ro")
}

keyFile := opts.KeyFile
if keyFile == "" {
keyFile = os.Getenv("PRIVATE_MODEL_KEY_FILE")
}
if keyFile != "" {
abs, err := filepath.Abs(keyFile)
if err != nil {
return nil, err
}
args = append(args, "-v", abs+":"+containerKeyFile+":ro")
}

for _, name := range passthroughEnv {
if os.Getenv(name) != "" {
args = append(args, "-e", name)
}
}

args = append(args, opts.image)

if opts.ModelDir != "" {
args = append(args, "--model-dir", containerModelDir)
}
if keyFile != "" {
args = append(args, "--key-file", containerKeyFile)
}
if opts.Encrypt {
args = append(args, "--encrypt")
}
if opts.Verify {
args = append(args, "--verify")
}
if opts.Model != "" {
args = append(args, opts.Model)
}
return args, nil
}
Loading
Loading