Skip to content

Commit 3164737

Browse files
wesmclaude
andauthored
Add create-subset command for database subsetting (#162)
## Summary - Adds a `create-subset` CLI command that copies the N most recent messages (and all referenced data) from the archive into a new, standalone msgvault database - Copies all dependent rows (conversations, participants, labels, recipients, bodies, attachments) in FK-safe dependency order with foreign key verification - Updates denormalized conversation counts and populates FTS index in the destination database - Includes tests covering basic subsetting, row limits, FTS population, conversation count consistency, pre-existing directories, duplicate destination detection, and SQL injection/control character path validation Supersedes #101. ## Test plan - [x] Unit tests pass (`go test ./internal/store/ -run TestCopySubset`) - [ ] Manual test: `./msgvault create-subset -o /tmp/subset --rows 100` against a real archive - [ ] Verify subset DB works: `MSGVAULT_HOME=/tmp/subset msgvault tui` 🤖 Generated with [Claude Code](https://claude.com/claude-code) --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
1 parent ee59808 commit 3164737

3 files changed

Lines changed: 1770 additions & 0 deletions

File tree

cmd/msgvault/cmd/create_subset.go

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
package cmd
2+
3+
import (
4+
"fmt"
5+
"os"
6+
"path/filepath"
7+
"time"
8+
9+
"github.com/spf13/cobra"
10+
"github.com/wesm/msgvault/internal/store"
11+
)
12+
13+
var createSubsetCmd = &cobra.Command{
14+
Use: "create-subset",
15+
Short: "Create a smaller database from the archive",
16+
Long: `Create a new msgvault database containing a subset of the
17+
most recent messages. Useful for testing, demos, or sharing.
18+
19+
The destination directory will contain a complete msgvault.db with
20+
all referenced data (conversations, participants, labels, etc.)
21+
and can be used directly:
22+
23+
MSGVAULT_HOME=/path/to/subset msgvault tui`,
24+
RunE: runCreateSubset,
25+
}
26+
27+
var (
28+
subsetOutput string
29+
subsetRows int
30+
)
31+
32+
func init() {
33+
createSubsetCmd.Flags().StringVarP(
34+
&subsetOutput, "output", "o", "",
35+
"destination directory (msgvault.db created inside)",
36+
)
37+
createSubsetCmd.Flags().IntVar(
38+
&subsetRows, "rows", 0,
39+
"number of most recent messages to copy",
40+
)
41+
_ = createSubsetCmd.MarkFlagRequired("output")
42+
_ = createSubsetCmd.MarkFlagRequired("rows")
43+
rootCmd.AddCommand(createSubsetCmd)
44+
}
45+
46+
func runCreateSubset(cmd *cobra.Command, _ []string) error {
47+
if err := MustBeLocal("create-subset"); err != nil {
48+
return err
49+
}
50+
51+
if subsetRows <= 0 {
52+
return fmt.Errorf("--rows must be a positive integer")
53+
}
54+
55+
srcDBPath := cfg.DatabaseDSN()
56+
if _, err := os.Stat(srcDBPath); os.IsNotExist(err) {
57+
return fmt.Errorf(
58+
"source database not found: %s\n"+
59+
"Run 'msgvault init-db' and sync first",
60+
srcDBPath,
61+
)
62+
}
63+
64+
dstDir, err := filepath.Abs(subsetOutput)
65+
if err != nil {
66+
return fmt.Errorf("resolve output path: %w", err)
67+
}
68+
69+
fmt.Fprintf(os.Stderr,
70+
"Copying %d messages from %s...\n", subsetRows, srcDBPath,
71+
)
72+
73+
result, err := store.CopySubset(srcDBPath, dstDir, subsetRows)
74+
if err != nil {
75+
return fmt.Errorf("create subset: %w", err)
76+
}
77+
78+
fmt.Fprintf(os.Stderr,
79+
"Created subset (%s)\n", result.Elapsed.Round(time.Millisecond),
80+
)
81+
fmt.Printf("Sources: %d\n", result.Sources)
82+
fmt.Printf("Messages: %d\n", result.Messages)
83+
fmt.Printf("Conversations: %d\n", result.Conversations)
84+
fmt.Printf("Participants: %d\n", result.Participants)
85+
fmt.Printf("Labels: %d\n", result.Labels)
86+
fmt.Printf("Database size: %s\n", formatSize(result.DBSize))
87+
88+
if int64(subsetRows) > result.Messages {
89+
fmt.Fprintf(os.Stderr,
90+
"Note: requested %d messages but source only had %d\n",
91+
subsetRows, result.Messages,
92+
)
93+
}
94+
95+
fmt.Fprintf(os.Stderr,
96+
"\nTo use: MSGVAULT_HOME=%s msgvault tui\n", dstDir,
97+
)
98+
99+
return nil
100+
}

0 commit comments

Comments
 (0)