Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ require (
golang.org/x/sync v0.19.0
golang.org/x/sys v0.41.0
google.golang.org/grpc v1.80.0
google.golang.org/protobuf v1.36.11
oras.land/oras-go/v2 v2.6.0
)

Expand Down Expand Up @@ -164,7 +165,6 @@ require (
google.golang.org/api v0.214.0 // indirect
google.golang.org/genproto/googleapis/api v0.0.0-20260226221140-a57be14db171 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20260226221140-a57be14db171 // indirect
google.golang.org/protobuf v1.36.11 // indirect
gopkg.in/ini.v1 v1.67.0 // indirect
gopkg.in/warnings.v0 v0.1.2 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
Expand Down
4 changes: 4 additions & 0 deletions pkg/modelfile/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,10 @@ var (
"*.ftz", // FastText compressed model
"*.ark", // Kaldi ark format (speech/audio models)
"*.db", // Database files (LMDB, etc.)

// TensorFlow SavedModel literal-name files (no extension).
"feature_map", // TF SavedModel feature map definition
"checkpoint", // TF checkpoint pointer file (literal name)
}

// Code file patterns - supported script and notebook files.
Expand Down
9 changes: 9 additions & 0 deletions pkg/modelfile/constants_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,15 @@ func TestInferFileType(t *testing.T) {
{"at threshold", "borderline", WeightFileSizeThreshold, FileTypeCode},
// Just above threshold should be model
{"above threshold", "borderline", WeightFileSizeThreshold + 1, FileTypeModel},

// TF SavedModel literal-name files: must be MODEL even when 0 bytes,
// independent of the size heuristic that would otherwise classify them as CODE.
{"feature_map literal", "feature_map", 0, FileTypeModel},
{"feature_map small", "feature_map", 1024, FileTypeModel},
{"checkpoint literal small", "checkpoint", 32, FileTypeModel},
// Negative: the literal patterns must not match same-stem-different-extension files.
{"feature_map.json is config", "feature_map.json", 1024, FileTypeConfig},
{"checkpoint.bin is model via *.bin", "checkpoint.bin", 1024, FileTypeModel},
}

assert := assert.New(t)
Expand Down
32 changes: 32 additions & 0 deletions pkg/modelfile/modelfile.go
Original file line number Diff line number Diff line change
Expand Up @@ -346,13 +346,45 @@ func (mf *modelfile) generateByWorkspace(config *configmodelfile.GenerateConfig)
return err
}

// ONNX external_data post-processing: any tensor file referenced by an .onnx
// file via external_data.location is unconditionally a model weight file,
// regardless of its name or size. Walker may have classified small external
// tensor files as code/config/doc by extension/size heuristic; reclassify them.
mf.reclassifyONNXExternalData()

if mf.model.Size() == 0 && mf.code.Size() == 0 && mf.dataset.Size() == 0 {
return fmt.Errorf("no model/code/dataset found - you have to create the Modelfile by yourself")
}

return nil
}

// reclassifyONNXExternalData scans every .onnx file already in mf.model,
// extracts external_data.location paths, and moves those paths into mf.model
// from whichever bucket the walker placed them in.
func (mf *modelfile) reclassifyONNXExternalData() {
for _, raw := range mf.model.Values() {
modelRel, ok := raw.(string)
if !ok || !strings.HasSuffix(strings.ToLower(modelRel), ".onnx") {
continue
}
onnxAbs := filepath.Join(mf.workspace, modelRel)
extPaths, err := ExtractONNXExternalDataPaths(onnxAbs)
if err != nil {
fmt.Fprintf(os.Stderr, "modelfile: parse ONNX external_data from %s failed: %v\n", modelRel, err)
continue
}
onnxDir := filepath.Dir(modelRel)
for _, ext := range extPaths {
relExt := filepath.Join(onnxDir, ext)
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Constrain ONNX external_data paths to workspace

external_data.location is trusted verbatim and joined into relExt without any containment check, so a crafted ONNX can inject ../... (or absolute) paths into the generated MODEL entries. In this repo, build processing later accepts these paths as real files (see processor.base.Process) and will read/upload them, which can pull data from outside the workspace. Please reject non-local locations (absolute or traversal) and only promote files that resolve under the workspace root.

Useful? React with 👍 / 👎.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Addressed in 660ae9b. reclassifyONNXExternalData now only moves paths already collected by the workspace walker (walkerCollected membership check). Paths that resolve outside the workspace, get cleaned to absolute paths, or otherwise never enter mf.{config,model,code,doc} are silently dropped — no ../escape or absolute path can leak into MODEL.

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

security-high high

The ext path extracted from the ONNX file should be validated to ensure it does not escape the workspace directory via path traversal (e.g., ../../etc/passwd) or absolute paths. While filepath.Join cleans the path, it doesn't prevent escaping if the resulting relative path starts with ...

Suggested change
relExt := filepath.Join(onnxDir, ext)
relExt := filepath.Clean(filepath.Join(onnxDir, ext))
if strings.HasPrefix(relExt, "..") || filepath.IsAbs(ext) {
continue
}

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Addressed in 660ae9b, but with a stronger invariant than the prefix check. We now require the path to already be present in one of the walker's hashsets (walkerCollected) before promoting it to MODEL. This subsumes the HasPrefix("..") and IsAbs rejections (the walker never visits such paths), and additionally absorbs ExcludePatterns, isSkippable directories, file count / size limits, and missing-on-disk files — all in one membership check. Test: TestNewModelfileByWorkspace_ONNXExternalDataPathTraversalIgnored.

mf.code.Remove(relExt)
mf.config.Remove(relExt)
mf.doc.Remove(relExt)
mf.model.Add(relExt)
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Preserve include/exclude filters for ONNX reclassification

The ONNX post-pass unconditionally does mf.model.Add(relExt) for every referenced tensor path, but this runs after the walk/filter phase and does not reapply GenerateConfig include/exclude rules. As a result, files that users explicitly excluded can be reintroduced into MODEL and then included in builds/uploads, which violates CLI filtering intent. Reclassification should only move paths that were already admitted by the workspace filter logic.

Useful? React with 👍 / 👎.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Addressed in 660ae9b. The walker-membership check is exactly the proposed fix: a path can only be reclassified if it survived the walker's filter pass — which already enforces ExcludePatterns, isSkippable, file count / size limits, and workspace boundary. So an --exclude'd tensor can never be re-added by the ONNX post-pass.

}
}
}

// generateByModelConfig generates the modelfile by the model config, such as config.json and generation_config.json.
func (mf *modelfile) generateByModelConfig() error {
// Get config map from json files. Collect all the keys and values from the config files
Expand Down
171 changes: 171 additions & 0 deletions pkg/modelfile/modelfile_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -811,6 +811,111 @@ func TestNewModelfileByWorkspace(t *testing.T) {
expectCodes: []string{},
expectName: "selective-include",
},
{
// Mirrors screenshot 1 (TF SavedModel directory) plus the screenshots 4/5
// /home/admin/<ts>/{checkpoint,variables,...} layout: variables/ shards
// and a sibling checkpoint/ directory with model.ckpt-NNN.{data-*,index,meta}.
// Pre-fix, feature_map and the literal `checkpoint` files would have been
// classified as CODE because they have no extension and are small.
name: "tensorflow saved_model directory",
setupFiles: map[string]string{
"saved_model.pb": "",
"feature_map": "",
"tf_signature.txt": "",
"alps.meta": "",
"ais-sync.done": "",
"variables/checkpoint": "",
"variables/variables.index": "",
"variables/variables.data-00000-of-00002": "",
"variables/variables.data-00001-of-00002": "",
"checkpoint/checkpoint": "",
"checkpoint/model.ckpt-756921.index": "",
"checkpoint/model.ckpt-756921.meta": "",
"checkpoint/model.ckpt-756921.data-00000-of-00002": "",
"checkpoint/model.ckpt-756921.data-00001-of-00002": "",
},
config: &configmodelfile.GenerateConfig{
Name: "tf-savedmodel",
},
expectError: false,
// *.meta hits ConfigFilePatterns first (alps.meta, model.ckpt-*.meta).
expectConfigs: []string{
"alps.meta",
"checkpoint/model.ckpt-756921.meta",
},
expectModels: []string{
"saved_model.pb",
"feature_map",
"variables/checkpoint",
"variables/variables.index",
"variables/variables.data-00000-of-00002",
"variables/variables.data-00001-of-00002",
"checkpoint/checkpoint",
"checkpoint/model.ckpt-756921.index",
"checkpoint/model.ckpt-756921.data-00000-of-00002",
"checkpoint/model.ckpt-756921.data-00001-of-00002",
},
// ais-sync.done has no extension and is empty -> falls back to CODE by
// the size heuristic. Documented intentional behavior (user accepted).
expectCodes: []string{"ais-sync.done"},
expectDocs: []string{"tf_signature.txt"},
expectName: "tf-savedmodel",
},
{
// Mirrors screenshot 3/4/5: a parent directory containing base/<ts>/
// and delta/<ts>/ subdirs, each holding a complete TF SavedModel +
// checkpoint layout. Verifies the walker recurses correctly and that
// relative paths in the resulting Modelfile preserve the timestamp
// directory prefix so pull-side reconstruction restores the full tree.
name: "online-learning base plus delta tree",
setupFiles: map[string]string{
"base/20251202030708/saved_model.pb": "",
"base/20251202030708/feature_map": "",
"base/20251202030708/tf_signature.txt": "",
"base/20251202030708/alps.meta": "",
"base/20251202030708/ais-sync.done": "",
"base/20251202030708/variables/checkpoint": "",
"base/20251202030708/variables/variables.index": "",
"base/20251202030708/variables/variables.data-00000-of-00002": "",
"delta/20251202060205/saved_model.pb": "",
"delta/20251202060205/feature_map": "",
"delta/20251202060205/tf_signature.txt": "",
"delta/20251202060205/alps.meta": "",
"delta/20251202060205/ais-sync.done": "",
"delta/20251202060205/variables/checkpoint": "",
"delta/20251202060205/variables/variables.index": "",
"delta/20251202060205/variables/variables.data-00000-of-00002": "",
},
config: &configmodelfile.GenerateConfig{
Name: "online-learning",
},
expectError: false,
expectConfigs: []string{
"base/20251202030708/alps.meta",
"delta/20251202060205/alps.meta",
},
expectModels: []string{
"base/20251202030708/saved_model.pb",
"base/20251202030708/feature_map",
"base/20251202030708/variables/checkpoint",
"base/20251202030708/variables/variables.index",
"base/20251202030708/variables/variables.data-00000-of-00002",
"delta/20251202060205/saved_model.pb",
"delta/20251202060205/feature_map",
"delta/20251202060205/variables/checkpoint",
"delta/20251202060205/variables/variables.index",
"delta/20251202060205/variables/variables.data-00000-of-00002",
},
expectCodes: []string{
"base/20251202030708/ais-sync.done",
"delta/20251202060205/ais-sync.done",
},
expectDocs: []string{
"base/20251202030708/tf_signature.txt",
"delta/20251202060205/tf_signature.txt",
},
expectName: "online-learning",
},
}

assert := assert.New(t)
Expand Down Expand Up @@ -2125,3 +2230,69 @@ func min(a, b int64) int64 {
}
return b
}

// TestNewModelfileByWorkspace_ONNXExternalData mirrors screenshot 2: an ONNX
// directory where model.onnx references many extension-less tensor files via
// external_data. Pre-fix, small external tensor files were classified as CODE
// by the size heuristic; the ONNX post-processor must reclassify them all as
// MODEL deterministically — regardless of the file's size.
func TestNewModelfileByWorkspace_ONNXExternalData(t *testing.T) {
// Subset of the 36 extension-less files seen in screenshot 2. Names taken
// verbatim from the actual workflow_10062365 directory.
externalLocations := []string{
"tower_deep_layer_0_kernel_read__448_1",
"tower_shallow_layer_0_kernel_read__440_0",
"moe_layer_layer_0_kernel__399_15",
"moe_layer_layer_0_domain_bn_ExpandDims__400_16",
"moe_layer_search_layer_0_kernel__413_3",
"moe_layer_non_search_layer_0_kernel__427_9",
"feature_gate_main_kernel_read__352_27",
"feature_gate_main_domain_bn_ExpandDims__354_28",
"gated_dcn_layer_0_transform_kernel__385_21",
"conversion_layer_0_kernel_read__364_35",
"main_domain_embedding_Mark_output_user_main_domain_kernel_read__270_20",
"external_data_for_resource_handle",
}

tempDir, err := os.MkdirTemp("", "modelfile-onnx-*")
require.NoError(t, err)
defer os.RemoveAll(tempDir)

// Write the ONNX file with external_data references encoded into protobuf bytes.
require.NoError(t, os.WriteFile(
filepath.Join(tempDir, "model.onnx"),
buildMinimalONNXBytes(externalLocations),
0o644,
))

// Create each external tensor file as an empty placeholder.
for _, name := range externalLocations {
require.NoError(t, os.WriteFile(filepath.Join(tempDir, name), nil, 0o644))
}

// Sibling files from the screenshot.
require.NoError(t, os.WriteFile(filepath.Join(tempDir, "tf_signature.txt"), nil, 0o644))
require.NoError(t, os.WriteFile(filepath.Join(tempDir, "variable_sequence_def_0.onnx"), buildMinimalONNXBytes(nil), 0o644))

mf, err := NewModelfileByWorkspace(tempDir, &configmodelfile.GenerateConfig{
Name: "onnx-external-data",
Workspace: tempDir,
})
require.NoError(t, err)

models := mf.GetModels()
codes := mf.GetCodes()
configs := mf.GetConfigs()

// Every external_data location must appear in MODEL, regardless of file size.
for _, name := range externalLocations {
assert.Contains(t, models, name, "external_data tensor %q must be reclassified as MODEL", name)
assert.NotContains(t, codes, name, "external_data tensor %q must NOT be in CODE", name)
assert.NotContains(t, configs, name, "external_data tensor %q must NOT be in CONFIG", name)
}
// Both .onnx files themselves are MODELs by extension.
assert.Contains(t, models, "model.onnx")
assert.Contains(t, models, "variable_sequence_def_0.onnx")
// tf_signature.txt is a doc per default DocFilePatterns (*.txt).
assert.Contains(t, mf.GetDocs(), "tf_signature.txt")
}
Loading
Loading