diff --git a/Taskfile.yml b/Taskfile.yml index 0374715f3..250c9d056 100644 --- a/Taskfile.yml +++ b/Taskfile.yml @@ -111,6 +111,23 @@ tasks: cmds: - go test {{ .CLI_ARGS }} ./... + go:test:meilisearch: + desc: Runs the Meilisearch search engine integration tests against a dockerized instance + dir: backend + cmds: + - docker run -d --rm --name homebox-meili-test -p 7711:7700 -e MEILI_MASTER_KEY=test-master-key -e MEILI_NO_ANALYTICS=true getmeili/meilisearch:v1.22 + - defer: docker stop homebox-meili-test + - | + i=0 + while [ "$i" -lt 60 ]; do + if curl -sf http://localhost:7711/health > /dev/null; then exit 0; fi + i=$((i + 1)) + sleep 0.5 + done + echo "Meilisearch did not become healthy within 30s" >&2 + exit 1 + - TEST_MEILISEARCH_URL=http://localhost:7711 TEST_MEILISEARCH_KEY=test-master-key go test ./internal/data/search/ -v -count=1 + go:coverage: desc: Runs all go tests with -race flag and generates a coverage report dir: backend diff --git a/backend/app/api/cli_reset_password.go b/backend/app/api/cli_reset_password.go index b9cbe8c33..9d9e4ca71 100644 --- a/backend/app/api/cli_reset_password.go +++ b/backend/app/api/cli_reset_password.go @@ -118,7 +118,7 @@ func generateResetLinkOffline(cfg *config.Config, email string) (string, error) } bus := eventbus.New() - repos := repo.New(c, bus, cfg.Storage, cfg.Database.PubSubConnString, cfg.Thumbnail) + repos := repo.New(c, bus, cfg.Storage, cfg.Database.PubSubConnString, cfg.Thumbnail, nil) svc := services.New(repos) baseURL := strings.TrimSuffix(cfg.Options.Hostname, "/") diff --git a/backend/app/api/handlers/v1/v1_ctrl_entities.go b/backend/app/api/handlers/v1/v1_ctrl_entities.go index a64d5c541..03be2ce10 100644 --- a/backend/app/api/handlers/v1/v1_ctrl_entities.go +++ b/backend/app/api/handlers/v1/v1_ctrl_entities.go @@ -48,12 +48,13 @@ func startEntityCtrlSpan(ctx context.Context, name string, attrs ...attribute.Ke // @Summary Query All Entities // @Tags Entities // @Produce json -// @Param q query string false "search string" -// @Param page query int false "page number" -// @Param pageSize query int false "items per page" -// @Param tags query []string false "tags Ids" collectionFormat(multi) -// @Param parentIds query []string false "parent Ids" collectionFormat(multi) -// @Success 200 {object} repo.EntityListResult +// @Param q query string false "search string; matches names, descriptions, serial/model numbers, manufacturers, notes, purchase sources, tag names, and custom field values. Use # to look up by asset ID and double quotes for exact phrases" +// @Param page query int false "page number" +// @Param pageSize query int false "items per page" +// @Param tags query []string false "tags Ids" collectionFormat(multi) +// @Param matchAllTags query bool false "require all selected tags to match (AND) instead of any (OR)" +// @Param parentIds query []string false "parent Ids" collectionFormat(multi) +// @Success 200 {object} repo.EntityListResult // @Router /v1/entities [GET] // @Security Bearer func (ctrl *V1Controller) HandleEntitiesGetAll() errchain.HandlerFunc { @@ -80,6 +81,7 @@ func (ctrl *V1Controller) HandleEntitiesGetAll() errchain.HandlerFunc { ParentIDs: queryUUIDList(params, "parentIds"), TagIDs: queryUUIDList(params, "tags"), NegateTags: queryBool(params.Get("negateTags")), + MatchAllTags: queryBool(params.Get("matchAllTags")), OnlyWithoutPhoto: queryBool(params.Get("onlyWithoutPhoto")), OnlyWithPhoto: queryBool(params.Get("onlyWithPhoto")), IncludeArchived: queryBool(params.Get("includeArchived")), diff --git a/backend/app/api/main.go b/backend/app/api/main.go index c33fa025e..f3a3fcc75 100644 --- a/backend/app/api/main.go +++ b/backend/app/api/main.go @@ -16,6 +16,7 @@ import ( "github.com/sysadminsmedia/homebox/backend/internal/core/services/reporting/eventbus" "github.com/sysadminsmedia/homebox/backend/internal/data/ent" "github.com/sysadminsmedia/homebox/backend/internal/data/repo" + "github.com/sysadminsmedia/homebox/backend/internal/data/search" "github.com/sysadminsmedia/homebox/backend/internal/sys/analytics" "github.com/sysadminsmedia/homebox/backend/internal/sys/config" "github.com/sysadminsmedia/homebox/backend/internal/sys/otel" @@ -162,7 +163,13 @@ func run(cfg *config.Config) error { app.bus = eventbus.New() app.db = c - app.repos = repo.New(c, app.bus, cfg.Storage, cfg.Database.PubSubConnString, cfg.Thumbnail) + + searchEngine, err := search.NewEngine(cfg.Search, c, app.bus) + if err != nil { + log.Error().Err(err).Str("driver", cfg.Search.Driver).Msg("failed to create search engine") + return err + } + app.repos = repo.New(c, app.bus, cfg.Storage, cfg.Database.PubSubConnString, cfg.Thumbnail, searchEngine) // Attachment-key escaping in fileblob only flattens paths on Windows // (where os.PathSeparator is "\"), so the legacy-path rename is a Windows- diff --git a/backend/app/api/static/docs/docs.go b/backend/app/api/static/docs/docs.go index 279bc5eb8..e9824f2f1 100644 --- a/backend/app/api/static/docs/docs.go +++ b/backend/app/api/static/docs/docs.go @@ -247,7 +247,7 @@ const docTemplate = `{ "parameters": [ { "type": "string", - "description": "search string", + "description": "search string; matches names, descriptions, serial/model numbers, manufacturers, notes, purchase sources, tag names, and custom field values. Use #\u003cassetId\u003e to look up by asset ID and double quotes for exact phrases", "name": "q", "in": "query" }, @@ -273,6 +273,12 @@ const docTemplate = `{ "name": "tags", "in": "query" }, + { + "type": "boolean", + "description": "require all selected tags to match (AND) instead of any (OR)", + "name": "matchAllTags", + "in": "query" + }, { "type": "array", "items": { diff --git a/backend/app/api/static/docs/openapi-3.json b/backend/app/api/static/docs/openapi-3.json index f24740842..33e9def05 100644 --- a/backend/app/api/static/docs/openapi-3.json +++ b/backend/app/api/static/docs/openapi-3.json @@ -245,7 +245,7 @@ "summary": "Query All Entities", "parameters": [ { - "description": "search string", + "description": "search string; matches names, descriptions, serial/model numbers, manufacturers, notes, purchase sources, tag names, and custom field values. Use # to look up by asset ID and double quotes for exact phrases", "name": "q", "in": "query", "schema": { @@ -280,6 +280,14 @@ } } }, + { + "description": "require all selected tags to match (AND) instead of any (OR)", + "name": "matchAllTags", + "in": "query", + "schema": { + "type": "boolean" + } + }, { "description": "parent Ids", "name": "parentIds", diff --git a/backend/app/api/static/docs/openapi-3.yaml b/backend/app/api/static/docs/openapi-3.yaml index c074bb7be..002d3368e 100644 --- a/backend/app/api/static/docs/openapi-3.yaml +++ b/backend/app/api/static/docs/openapi-3.yaml @@ -144,7 +144,10 @@ paths: - Entities summary: Query All Entities parameters: - - description: search string + - description: "search string; matches names, descriptions, serial/model numbers, + manufacturers, notes, purchase sources, tag names, and custom field + values. Use # to look up by asset ID and double quotes for + exact phrases" name: q in: query schema: @@ -167,6 +170,11 @@ paths: type: array items: type: string + - description: require all selected tags to match (AND) instead of any (OR) + name: matchAllTags + in: query + schema: + type: boolean - description: parent Ids name: parentIds in: query diff --git a/backend/app/api/static/docs/swagger.json b/backend/app/api/static/docs/swagger.json index 666f56c41..fe9108833 100644 --- a/backend/app/api/static/docs/swagger.json +++ b/backend/app/api/static/docs/swagger.json @@ -244,7 +244,7 @@ "parameters": [ { "type": "string", - "description": "search string", + "description": "search string; matches names, descriptions, serial/model numbers, manufacturers, notes, purchase sources, tag names, and custom field values. Use #\u003cassetId\u003e to look up by asset ID and double quotes for exact phrases", "name": "q", "in": "query" }, @@ -270,6 +270,12 @@ "name": "tags", "in": "query" }, + { + "type": "boolean", + "description": "require all selected tags to match (AND) instead of any (OR)", + "name": "matchAllTags", + "in": "query" + }, { "type": "array", "items": { diff --git a/backend/app/api/static/docs/swagger.yaml b/backend/app/api/static/docs/swagger.yaml index 2f8280708..99b9dd00d 100644 --- a/backend/app/api/static/docs/swagger.yaml +++ b/backend/app/api/static/docs/swagger.yaml @@ -2479,7 +2479,9 @@ paths: /v1/entities: get: parameters: - - description: search string + - description: 'search string; matches names, descriptions, serial/model numbers, + manufacturers, notes, purchase sources, tag names, and custom field values. + Use # to look up by asset ID and double quotes for exact phrases' in: query name: q type: string @@ -2498,6 +2500,10 @@ paths: type: string name: tags type: array + - description: require all selected tags to match (AND) instead of any (OR) + in: query + name: matchAllTags + type: boolean - collectionFormat: multi description: parent Ids in: query diff --git a/backend/go.mod b/backend/go.mod index d58323597..38c33821a 100644 --- a/backend/go.mod +++ b/backend/go.mod @@ -21,6 +21,7 @@ require ( github.com/gorilla/schema v1.4.1 github.com/hay-kot/httpkit v0.0.11 github.com/jackc/pgx/v5 v5.10.0 + github.com/meilisearch/meilisearch-go v0.36.3 github.com/olahol/melody v1.4.0 github.com/pkg/errors v0.9.1 github.com/pressly/goose/v3 v3.27.1 @@ -95,6 +96,7 @@ require ( github.com/IBM/sarama v1.50.2 // indirect github.com/KyleBanks/depth v1.2.1 // indirect github.com/agext/levenshtein v1.2.3 // indirect + github.com/andybalholm/brotli v1.2.1 // indirect github.com/apparentlymart/go-textseg/v15 v15.0.0 // indirect github.com/aws/aws-sdk-go-v2 v1.41.9 // indirect github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.11 // indirect diff --git a/backend/go.sum b/backend/go.sum index 9493468fe..400418d28 100644 --- a/backend/go.sum +++ b/backend/go.sum @@ -79,6 +79,8 @@ github.com/XSAM/otelsql v0.42.0 h1:Li0xF4eJUxG2e0x3D4rvRlys1f27yJKvjTh7ljkUP5o= github.com/XSAM/otelsql v0.42.0/go.mod h1:4mOrEv+cS1KmKzrvTktvJnstr5GtKSAK+QHvFR9OcpI= github.com/agext/levenshtein v1.2.3 h1:YB2fHEn0UJagG8T1rrWknE3ZQzWM06O8AMAatNn7lmo= github.com/agext/levenshtein v1.2.3/go.mod h1:JEDfjyjHDjOF/1e4FlBE/PkbqA9OfWu2ki2W0IB5558= +github.com/andybalholm/brotli v1.2.1 h1:R+f5xP285VArJDRgowrfb9DqL18yVK0gKAW/F+eTWro= +github.com/andybalholm/brotli v1.2.1/go.mod h1:rzTDkvFWvIrjDXZHkuS16NPggd91W3kUSvPlQ1pLaKY= github.com/antithesishq/antithesis-sdk-go v0.5.0-default-no-op h1:Ucf+QxEKMbPogRO5guBNe5cgd9uZgfoJLOYs8WWhtjM= github.com/antithesishq/antithesis-sdk-go v0.5.0-default-no-op/go.mod h1:IUpT2DPAKh6i/YhSbt6Gl3v2yvUZjmKncl7U91fup7E= github.com/apparentlymart/go-textseg/v15 v15.0.0 h1:uYvfpb3DyLSCGWnctWKGj857c6ew1u1fNQOlOtuGxQY= @@ -133,12 +135,6 @@ github.com/cenkalti/backoff/v5 v5.0.3 h1:ZN+IMa753KfX5hd8vVaMixjnqRZ3y8CuJKRKj1x github.com/cenkalti/backoff/v5 v5.0.3/go.mod h1:rkhZdG3JZukswDf7f0cwqPNk4K0sa+F97BxZthm/crw= github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= -github.com/clipperhouse/displaywidth v0.6.2 h1:ZDpTkFfpHOKte4RG5O/BOyf3ysnvFswpyYrV7z2uAKo= -github.com/clipperhouse/displaywidth v0.6.2/go.mod h1:R+kHuzaYWFkTm7xoMmK1lFydbci4X2CicfbGstSGg0o= -github.com/clipperhouse/stringish v0.1.1 h1:+NSqMOr3GR6k1FdRhhnXrLfztGzuG+VuFDfatpWHKCs= -github.com/clipperhouse/stringish v0.1.1/go.mod h1:v/WhFtE1q0ovMta2+m+UbpZ+2/HEXNWYXQgCt4hdOzA= -github.com/clipperhouse/uax29/v2 v2.3.0 h1:SNdx9DVUqMoBuBoW3iLOj4FQv3dN5mDtuqwuhIGpJy4= -github.com/clipperhouse/uax29/v2 v2.3.0/go.mod h1:Wn1g7MK6OoeDT0vL+Q0SQLDz/KpfsVRgg6W7ihQeh4g= github.com/cncf/xds/go v0.0.0-20260202195803-dba9d589def2 h1:aBangftG7EVZoUb69Os8IaYg++6uMOdKK83QtkkvJik= github.com/cncf/xds/go v0.0.0-20260202195803-dba9d589def2/go.mod h1:qwXFYgsP6T7XnJtbKlf1HP8AjxZZyzxMmc+Lq5GjlU4= github.com/coder/websocket v1.8.14 h1:9L0p0iKiNOibykf283eHkKUHHrpG7f65OE3BhhO7v9g= @@ -334,10 +330,10 @@ github.com/mattn/go-colorable v0.1.15 h1:+u9SLTRGnXv73cEsnsmoZBom+dMU88B2M0aDcWy github.com/mattn/go-colorable v0.1.15/go.mod h1:6LmQG8QLFO4G5z1gPvYEzlUgJ2wF+stgPZH1UqBm1s8= github.com/mattn/go-isatty v0.0.22 h1:j8l17JJ9i6VGPUFUYoTUKPSgKe/83EYU2zBC7YNKMw4= github.com/mattn/go-isatty v0.0.22/go.mod h1:ZXfXG4SQHsB/w3ZeOYbR0PrPwLy+n6xiMrJlRFqopa4= -github.com/mattn/go-runewidth v0.0.19 h1:v++JhqYnZuu5jSKrk9RbgF5v4CGUjqRfBm05byFGLdw= -github.com/mattn/go-runewidth v0.0.19/go.mod h1:XBkDxAl56ILZc9knddidhrOlY5R/pDhgLpndooCuJAs= github.com/mattn/go-sqlite3 v1.14.34 h1:3NtcvcUnFBPsuRcno8pUtupspG/GM+9nZ88zgJcp6Zk= github.com/mattn/go-sqlite3 v1.14.34/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y= +github.com/meilisearch/meilisearch-go v0.36.3 h1:Yx1aTY5jDgtbStPVkhJTDoLnZTy5sejQSPyjfNMy6e4= +github.com/meilisearch/meilisearch-go v0.36.3/go.mod h1:hWcR0MuWLSzHfbz9GGzIr3s9rnXLm1jqkmHkJPbUSvM= github.com/mfridman/interpolate v0.0.2 h1:pnuTK7MQIxxFz1Gr+rjSIx9u7qVjf5VOoM/u6BbAxPY= github.com/mfridman/interpolate v0.0.2/go.mod h1:p+7uk6oE07mpE/Ik1b8EckO0O4ZXiGAfshKBWLUM9Xg= github.com/minio/highwayhash v1.0.4-0.20251030100505-070ab1a87a76 h1:KGuD/pM2JpL9FAYvBrnBBeENKZNh6eNtjqytV6TYjnk= @@ -358,14 +354,6 @@ github.com/ncruces/go-strftime v1.0.0 h1:HMFp8mLCTPp341M/ZnA4qaf7ZlsbTc+miZjCLOF github.com/ncruces/go-strftime v1.0.0/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls= github.com/olahol/melody v1.4.0 h1:Pa5SdeZL/zXPi1tJuMAPDbl4n3gQOThSL6G1p4qZ4SI= github.com/olahol/melody v1.4.0/go.mod h1:GgkTl6Y7yWj/HtfD48Q5vLKPVoZOH+Qqgfa7CvJgJM4= -github.com/olekukonko/cat v0.0.0-20250911104152-50322a0618f6 h1:zrbMGy9YXpIeTnGj4EljqMiZsIcE09mmF8XsD5AYOJc= -github.com/olekukonko/cat v0.0.0-20250911104152-50322a0618f6/go.mod h1:rEKTHC9roVVicUIfZK7DYrdIoM0EOr8mK1Hj5s3JjH0= -github.com/olekukonko/errors v1.1.0 h1:RNuGIh15QdDenh+hNvKrJkmxxjV4hcS50Db478Ou5sM= -github.com/olekukonko/errors v1.1.0/go.mod h1:ppzxA5jBKcO1vIpCXQ9ZqgDh8iwODz6OXIGKU8r5m4Y= -github.com/olekukonko/ll v0.1.4-0.20260115111900-9e59c2286df0 h1:jrYnow5+hy3WRDCBypUFvVKNSPPCdqgSXIE9eJDD8LM= -github.com/olekukonko/ll v0.1.4-0.20260115111900-9e59c2286df0/go.mod h1:b52bVQRRPObe+yyBl0TxNfhesL0nedD4Cht0/zx55Ew= -github.com/olekukonko/tablewriter v1.1.3 h1:VSHhghXxrP0JHl+0NnKid7WoEmd9/urKRJLysb70nnA= -github.com/olekukonko/tablewriter v1.1.3/go.mod h1:9VU0knjhmMkXjnMKrZ3+L2JhhtsQ/L38BbL3CRNE8tM= github.com/onsi/ginkgo/v2 v2.9.2 h1:BA2GMJOtfGAfagzYtrAlufIP0lq6QERkFmHLMLPwFSU= github.com/onsi/ginkgo/v2 v2.9.2/go.mod h1:WHcJJG2dIlcCqVfBAwUCrJxSPFb6v4azBwgxeMeDuts= github.com/onsi/gomega v1.27.6 h1:ENqfyGeS5AX/rlXDd/ETokDz93u0YufY1Pgxuy/PvWE= @@ -409,10 +397,6 @@ github.com/shirou/gopsutil/v4 v4.26.5 h1:RPcBXkpz7kOj9PqGFQOlBPZHsyaPvPVQc098y9R github.com/shirou/gopsutil/v4 v4.26.5/go.mod h1:LZ6ewCSkBqUpvSOf+LsTGnRinC6iaNUNMGBtDkJBaLQ= github.com/skip2/go-qrcode v0.0.0-20200617195104-da1b6568686e h1:MRM5ITcdelLK2j1vwZ3Je0FKVCfqOLp5zO6trqMLYs0= github.com/skip2/go-qrcode v0.0.0-20200617195104-da1b6568686e/go.mod h1:XV66xRDqSt+GTGFMVlhk3ULuV0y9ZmzeVGR4mloJI3M= -github.com/spf13/cobra v1.7.0 h1:hyqWnYt1ZQShIddO5kBpj3vu05/++x6tJ6dg8EC572I= -github.com/spf13/cobra v1.7.0/go.mod h1:uLxZILRyS/50WlhOIKD7W6V5bgeIt+4sICxh6uRMrb0= -github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= -github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/spiffe/go-spiffe/v2 v2.6.0 h1:l+DolpxNWYgruGQVV0xsfeya3CsC7m8iBzDnMpsbLuo= github.com/spiffe/go-spiffe/v2 v2.6.0/go.mod h1:gm2SeUoMZEtpnzPNs2Csc0D/gX33k1xIx7lEzqblHEs= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= @@ -441,6 +425,8 @@ github.com/tklauser/go-sysconf v0.3.16 h1:frioLaCQSsF5Cy1jgRBrzr6t502KIIwQ0MArYI github.com/tklauser/go-sysconf v0.3.16/go.mod h1:/qNL9xxDhc7tx3HSRsLWNnuzbVfh3e7gh/BmM179nYI= github.com/tklauser/numcpus v0.11.0 h1:nSTwhKH5e1dMNsCdVBukSZrURJRoHbSEQjdEbY+9RXw= github.com/tklauser/numcpus v0.11.0/go.mod h1:z+LwcLq54uWZTX0u/bGobaV34u6V7KNlTZejzM6/3MQ= +github.com/xyproto/randomstring v1.0.5 h1:YtlWPoRdgMu3NZtP45drfy1GKoojuR7hmRcnhZqKjWU= +github.com/xyproto/randomstring v1.0.5/go.mod h1:rgmS5DeNXLivK7YprL0pY+lTuhNQW3iGxZ18UQApw/E= github.com/yeqown/go-qrcode/v2 v2.2.5 h1:HCOe2bSjkhZyYoyyNaXNzh4DJZll6inVJQQw+8228Zk= github.com/yeqown/go-qrcode/v2 v2.2.5/go.mod h1:uHpt9CM0V1HeXLz+Wg5MN50/sI/fQhfkZlOM+cOTHxw= github.com/yeqown/go-qrcode/writer/standard v1.3.0 h1:chdyhEfRtUPgQtuPeaWVGQ/TQx4rE1PqeoW3U+53t34= diff --git a/backend/internal/core/services/main_test.go b/backend/internal/core/services/main_test.go index cc15811b3..2925d4714 100644 --- a/backend/internal/core/services/main_test.go +++ b/backend/internal/core/services/main_test.go @@ -75,7 +75,7 @@ func MainNoExit(m *testing.M) int { Enabled: false, Width: 0, Height: 0, - }) + }, nil) err = os.MkdirAll(os.TempDir()+"/homebox", 0o755) if err != nil { diff --git a/backend/internal/core/services/service_items_attachments_test.go b/backend/internal/core/services/service_items_attachments_test.go index 422f3c60e..fe0236da1 100644 --- a/backend/internal/core/services/service_items_attachments_test.go +++ b/backend/internal/core/services/service_items_attachments_test.go @@ -76,7 +76,7 @@ func TestEntityService_AddAttachment_InvalidStorage(t *testing.T) { Enabled: false, Width: 0, Height: 0, - }) + }, nil) svc.repo = invalidRepos diff --git a/backend/internal/data/ent/external.go b/backend/internal/data/ent/external.go index d094da862..906766e32 100644 --- a/backend/internal/data/ent/external.go +++ b/backend/internal/data/ent/external.go @@ -11,3 +11,9 @@ import ( func (c *Client) Sql() *sql.DB { return c.driver.(*entsql.Driver).DB() } + +// Dialect returns the dialect name of the underlying database driver +// (dialect.SQLite or dialect.Postgres). +func (c *Client) Dialect() string { + return c.driver.Dialect() +} diff --git a/backend/internal/data/repo/main_test.go b/backend/internal/data/repo/main_test.go index 57c1ec845..d7ff85c07 100644 --- a/backend/internal/data/repo/main_test.go +++ b/backend/internal/data/repo/main_test.go @@ -64,7 +64,7 @@ func MainNoExit(m *testing.M) int { Enabled: false, Width: 0, Height: 0, - }) + }, nil) err = os.MkdirAll(os.TempDir()+"/homebox", 0o755) if err != nil { return 0 diff --git a/backend/internal/data/repo/repo_entities.go b/backend/internal/data/repo/repo_entities.go index 2f5500104..918a74a25 100644 --- a/backend/internal/data/repo/repo_entities.go +++ b/backend/internal/data/repo/repo_entities.go @@ -21,6 +21,7 @@ import ( "github.com/sysadminsmedia/homebox/backend/internal/data/ent/maintenanceentry" "github.com/sysadminsmedia/homebox/backend/internal/data/ent/predicate" "github.com/sysadminsmedia/homebox/backend/internal/data/ent/tag" + "github.com/sysadminsmedia/homebox/backend/internal/data/search" "github.com/sysadminsmedia/homebox/backend/internal/data/types" "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/attribute" @@ -44,6 +45,7 @@ type EntityRepository struct { db *ent.Client bus *eventbus.EventBus attachments *AttachmentRepo + search search.Engine } type ( @@ -60,6 +62,7 @@ type ( ParentIDs []uuid.UUID `json:"parentIds"` TagIDs []uuid.UUID `json:"tagIds"` NegateTags bool `json:"negateTags"` + MatchAllTags bool `json:"matchAllTags"` // require every selected tag (AND) instead of any (OR); ignored when NegateTags is set OnlyWithoutPhoto bool `json:"onlyWithoutPhoto"` OnlyWithPhoto bool `json:"onlyWithPhoto"` ParentItemIDs []uuid.UUID `json:"parentItemIds"` @@ -528,6 +531,7 @@ func entityQuerySpanAttrs(gid uuid.UUID, q EntityQuery) []attribute.KeyValue { attribute.String("query.search", q.Search), attribute.Int("query.tag_ids.count", len(q.TagIDs)), attribute.Bool("query.negate_tags", q.NegateTags), + attribute.Bool("query.match_all_tags", q.MatchAllTags), attribute.Int("query.parent_ids.count", len(q.ParentIDs)), attribute.Int("query.parent_item_ids.count", len(q.ParentItemIDs)), attribute.Int("query.fields.count", len(q.Fields)), @@ -542,6 +546,58 @@ func entityQuerySpanAttrs(gid uuid.UUID, q EntityQuery) []attribute.KeyValue { } } +// tagPredicates translates the tag filter portion of q into predicates that +// QueryByGroup ANDs with the rest of the query. Selected tags also match any +// of their descendant tags. +func (r *EntityRepository) tagPredicates(ctx context.Context, q EntityQuery) []predicate.Entity { + tagRepo := &TagRepository{r.db, r.bus} + ctxDescendants, descSpan := entityTracer().Start(ctx, "repo.EntityRepository.QueryByGroup.tagDescendants", + trace.WithAttributes(attribute.Int("query.tag_ids.count", len(q.TagIDs)))) + defer descSpan.End() + + // expandTags returns the given tags plus all their descendant tags, + // falling back to just the given tags when expansion fails. + expandTags := func(ids []uuid.UUID) []uuid.UUID { + descendants, err := tagRepo.GetDescendantTagIDs(ctxDescendants, ids) + if err != nil { + recordSpanError(descSpan, err) + log.Warn().Err(err).Msg("failed to get descendant tags, using only direct tags") + return ids + } + if len(descendants) == 0 { + return ids + } + return descendants + } + + hasTag := func(l uuid.UUID, _ int) predicate.Entity { + return entity.HasTagWith(tag.ID(l)) + } + + switch { + case q.NegateTags: + descendants := expandTags(q.TagIDs) + descSpan.SetAttributes(attribute.Int("query.tag_descendants.count", len(descendants))) + notTag := lo.Map(descendants, func(l uuid.UUID, _ int) predicate.Entity { + return entity.Not(entity.HasTagWith(tag.ID(l))) + }) + return []predicate.Entity{entity.And(notTag...)} + case q.MatchAllTags: + // Every selected tag must be present, where each tag also counts as + // matched by any of its descendants. + preds := make([]predicate.Entity, 0, len(q.TagIDs)) + for _, id := range q.TagIDs { + expanded := expandTags([]uuid.UUID{id}) + preds = append(preds, entity.Or(lo.Map(expanded, hasTag)...)) + } + return preds + default: + descendants := expandTags(q.TagIDs) + descSpan.SetAttributes(attribute.Int("query.tag_descendants.count", len(descendants))) + return []predicate.Entity{entity.Or(lo.Map(descendants, hasTag)...)} + } +} + // QueryByGroup returns a list of entities that belong to a specific group based on the provided query. func (r *EntityRepository) QueryByGroup(ctx context.Context, gid uuid.UUID, q EntityQuery) (PaginationResult[EntitySummary], error) { ctx, span := entityTracer().Start(ctx, "repo.EntityRepository.QueryByGroup", @@ -583,16 +639,14 @@ func (r *EntityRepository) QueryByGroup(ctx context.Context, gid uuid.UUID, q En } if q.Search != "" { - qb.Where( - entity.Or( - entity.NameContainsFold(q.Search), - entity.DescriptionContainsFold(q.Search), - entity.SerialNumberContainsFold(q.Search), - entity.ModelNumberContainsFold(q.Search), - entity.ManufacturerContainsFold(q.Search), - entity.NotesContainsFold(q.Search), - ), - ) + searchPred, err := r.search.Predicate(ctx, gid, q.Search) + if err != nil { + recordSpanError(span, err) + return PaginationResult[EntitySummary]{}, err + } + if searchPred != nil { + qb = qb.Where(searchPred) + } } if !q.AssetID.Nil() { @@ -602,32 +656,7 @@ func (r *EntityRepository) QueryByGroup(ctx context.Context, gid uuid.UUID, q En var andPredicates []predicate.Entity { if len(q.TagIDs) > 0 { - tagRepo := &TagRepository{r.db, r.bus} - ctxDescendants, descSpan := entityTracer().Start(ctx, "repo.EntityRepository.QueryByGroup.tagDescendants", - trace.WithAttributes(attribute.Int("query.tag_ids.count", len(q.TagIDs)))) - descendants, err := tagRepo.GetDescendantTagIDs(ctxDescendants, q.TagIDs) - if err != nil { - recordSpanError(descSpan, err) - log.Warn().Err(err).Msg("failed to get descendant tags, using only direct tags") - descendants = q.TagIDs - } else if len(descendants) == 0 { - descendants = q.TagIDs - } - descSpan.SetAttributes(attribute.Int("query.tag_descendants.count", len(descendants))) - descSpan.End() - - var tagPredicates []predicate.Entity - if !q.NegateTags { - tagPredicates = lo.Map(descendants, func(l uuid.UUID, _ int) predicate.Entity { - return entity.HasTagWith(tag.ID(l)) - }) - andPredicates = append(andPredicates, entity.Or(tagPredicates...)) - } else { - tagPredicates = lo.Map(descendants, func(l uuid.UUID, _ int) predicate.Entity { - return entity.Not(entity.HasTagWith(tag.ID(l))) - }) - andPredicates = append(andPredicates, entity.And(tagPredicates...)) - } + andPredicates = append(andPredicates, r.tagPredicates(ctx, q)...) } if q.OnlyWithoutPhoto { diff --git a/backend/internal/data/repo/repo_item_attachments_test.go b/backend/internal/data/repo/repo_item_attachments_test.go index 1bcfbfd61..561e1858c 100644 --- a/backend/internal/data/repo/repo_item_attachments_test.go +++ b/backend/internal/data/repo/repo_item_attachments_test.go @@ -188,7 +188,7 @@ func TestAttachmentRepo_DeleteExternalLink(t *testing.T) { func TestAttachmentRepo_DeleteExternalLink_DoesNotRequireBlobStorage(t *testing.T) { ctx := context.Background() - repos := New(tClient, tbus, config.Storage{PrefixPath: "/", ConnString: "mem://"}, "mem://{{ .Topic }}", config.Thumbnail{Enabled: false}) + repos := New(tClient, tbus, config.Storage{PrefixPath: "/", ConnString: "mem://"}, "mem://{{ .Topic }}", config.Thumbnail{Enabled: false}, nil) entity := useEntities(t, 1)[0] att, err := repos.Attachments.CreateExternalLink( diff --git a/backend/internal/data/repo/repo_items_search_test.go b/backend/internal/data/repo/repo_items_search_test.go index 4d3f2164a..23a52884f 100644 --- a/backend/internal/data/repo/repo_items_search_test.go +++ b/backend/internal/data/repo/repo_items_search_test.go @@ -1,216 +1,213 @@ package repo import ( + "context" "testing" + "github.com/google/uuid" "github.com/stretchr/testify/assert" - "github.com/sysadminsmedia/homebox/backend/pkgs/textutils" + "github.com/stretchr/testify/require" ) -// Repeated test fixture; constant satisfies goconst across the test cases below. -const fixtureElectronicaAccented = "electrónica" - -func TestEntityRepository_AccentInsensitiveSearch(t *testing.T) { - // Test cases for accent-insensitive search - testCases := []struct { - name string - itemName string - searchQuery string - shouldMatch bool - description string - }{ - { - name: "Spanish accented item, search without accents", - itemName: fixtureElectronicaAccented, - searchQuery: "electronica", - shouldMatch: true, - description: "Should find 'electrónica' when searching for 'electronica'", - }, - { - name: "Spanish accented item, search with accents", - itemName: fixtureElectronicaAccented, - searchQuery: fixtureElectronicaAccented, - shouldMatch: true, - description: "Should find 'electrónica' when searching for 'electrónica'", - }, - { - name: "Non-accented item, search with accents", - itemName: "electronica", - searchQuery: fixtureElectronicaAccented, - shouldMatch: true, - description: "Should find 'electronica' when searching for 'electrónica' (bidirectional search)", - }, - { - name: "Spanish item with tilde, search without accents", - itemName: "café", - searchQuery: "cafe", - shouldMatch: true, - description: "Should find 'café' when searching for 'cafe'", - }, - { - name: "Spanish item without tilde, search with accents", - itemName: "cafe", - searchQuery: "café", - shouldMatch: true, - description: "Should find 'cafe' when searching for 'café' (bidirectional)", - }, - { - name: "French accented item, search without accents", - itemName: "pére", - searchQuery: "pere", - shouldMatch: true, - description: "Should find 'pére' when searching for 'pere'", - }, - { - name: "French: père without accent, search with accents", - itemName: "pere", - searchQuery: "père", - shouldMatch: true, - description: "Should find 'pere' when searching for 'père' (bidirectional)", - }, - { - name: "Mixed case with accents", - itemName: "Electrónica", - searchQuery: "ELECTRONICA", - shouldMatch: true, - description: "Should find 'Electrónica' when searching for 'ELECTRONICA' (case insensitive)", - }, - { - name: "Bidirectional: Non-accented item, search with different accents", - itemName: "cafe", - searchQuery: "café", - shouldMatch: true, - description: "Should find 'cafe' when searching for 'café' (bidirectional)", - }, - { - name: "Bidirectional: Item with accent, search with different accent", - itemName: "résumé", - searchQuery: "resume", - shouldMatch: true, - description: "Should find 'résumé' when searching for 'resume' (bidirectional)", - }, - { - name: "Bidirectional: Spanish ñ to n", - itemName: "espanol", - searchQuery: "español", - shouldMatch: true, - description: "Should find 'espanol' when searching for 'español' (bidirectional ñ)", - }, - { - name: "French: français with accent, search without", - itemName: "français", - searchQuery: "francais", - shouldMatch: true, - description: "Should find 'français' when searching for 'francais'", - }, - { - name: "French: français without accent, search with", - itemName: "francais", - searchQuery: "français", - shouldMatch: true, - description: "Should find 'francais' when searching for 'français' (bidirectional)", - }, - { - name: "French: été with accent, search without", - itemName: "été", - searchQuery: "ete", - shouldMatch: true, - description: "Should find 'été' when searching for 'ete'", - }, - { - name: "French: été without accent, search with", - itemName: "ete", - searchQuery: "été", - shouldMatch: true, - description: "Should find 'ete' when searching for 'été' (bidirectional)", - }, - { - name: "French: hôtel with accent, search without", - itemName: "hôtel", - searchQuery: "hotel", - shouldMatch: true, - description: "Should find 'hôtel' when searching for 'hotel'", - }, - { - name: "French: hôtel without accent, search with", - itemName: "hotel", - searchQuery: "hôtel", - shouldMatch: true, - description: "Should find 'hotel' when searching for 'hôtel' (bidirectional)", - }, - { - name: "French: naïve with accent, search without", - itemName: "naïve", - searchQuery: "naive", - shouldMatch: true, - description: "Should find 'naïve' when searching for 'naive'", - }, - { - name: "French: naïve without accent, search with", - itemName: "naive", - searchQuery: "naïve", - shouldMatch: true, - description: "Should find 'naive' when searching for 'naïve' (bidirectional)", - }, - } +// useSearchableItem creates an item with the given name and applies an +// optional full update so tests can populate any searchable field. +func useSearchableItem(t *testing.T, name string, mutate func(u *EntityUpdate)) EntityOut { + t.Helper() + ctx := context.Background() + itemET := useItemEntityType(t) + + e, err := tRepos.Entities.Create(ctx, tGroup.ID, EntityCreate{ + Name: name, + EntityTypeID: itemET.ID, + }) + require.NoError(t, err) + t.Cleanup(func() { _ = tRepos.Entities.Delete(context.Background(), e.ID) }) - for _, tc := range testCases { - t.Run(tc.name, func(t *testing.T) { - // Test the normalization logic used in the repository - normalizedSearch := textutils.NormalizeSearchQuery(tc.searchQuery) - - // This simulates what happens in the repository - // The original search would find exact matches (case-insensitive) - // The normalized search would find accent-insensitive matches - - // Test that our normalization works as expected - if tc.shouldMatch { - // If it should match, then either the original query should match - // or the normalized query should match when applied to the stored data - assert.NotEmpty(t, normalizedSearch, "Normalized search should not be empty") - - // The key insight is that we're searching with both the original and normalized queries - // So "electrónica" will be found when searching for "electronica" because: - // 1. Original search: "electronica" doesn't match "electrónica" - // 2. Normalized search: "electronica" matches the normalized version - t.Logf("✓ %s: Item '%s' should be found with search '%s' (normalized: '%s')", - tc.description, tc.itemName, tc.searchQuery, normalizedSearch) - } else { - t.Logf("✗ %s: Item '%s' should NOT be found with search '%s' (normalized: '%s')", - tc.description, tc.itemName, tc.searchQuery, normalizedSearch) - } - }) + if mutate != nil { + u := EntityUpdate{ + ID: e.ID, + Name: name, + Quantity: 1, + EntityTypeID: itemET.ID, + } + mutate(&u) + _, err = tRepos.Entities.UpdateByGroup(ctx, tGroup.ID, u) + require.NoError(t, err) } + return e } -func TestNormalizeSearchQueryIntegration(t *testing.T) { - // Test that the normalization function works correctly - testCases := []struct { - input string - expected string - }{ - {fixtureElectronicaAccented, "electronica"}, - {"café", "cafe"}, - {"ELECTRÓNICA", "electronica"}, - {"Café París", "cafe paris"}, - {"hello world", "hello world"}, - // French accented words - {"père", "pere"}, - {"français", "francais"}, - {"été", "ete"}, - {"hôtel", "hotel"}, - {"naïve", "naive"}, - {"PÈRE", "pere"}, - {"FRANÇAIS", "francais"}, - {"ÉTÉ", "ete"}, - {"HÔTEL", "hotel"}, - {"NAÏVE", "naive"}, +// searchIDs runs a search query and returns the set of matched entity IDs. +// The test group is shared across the package, so assertions check membership +// instead of exact result counts. +func searchIDs(t *testing.T, q EntityQuery) map[uuid.UUID]bool { + t.Helper() + q.Page, q.PageSize = -1, -1 + res, err := tRepos.Entities.QueryByGroup(context.Background(), tGroup.ID, q) + require.NoError(t, err) + + ids := make(map[uuid.UUID]bool, len(res.Items)) + for _, item := range res.Items { + ids[item.ID] = true } + return ids +} - for _, tc := range testCases { - t.Run(tc.input, func(t *testing.T) { - result := textutils.NormalizeSearchQuery(tc.input) - assert.Equal(t, tc.expected, result, "Normalization should work correctly") - }) +func assertSearchFinds(t *testing.T, query string, item EntityOut, want bool) { + t.Helper() + found := searchIDs(t, EntityQuery{Search: query})[item.ID] + if want { + assert.True(t, found, "search %q should find item %q", query, item.Name) + } else { + assert.False(t, found, "search %q should NOT find item %q", query, item.Name) } } + +func TestEntitySearch_UnicodeCaseInsensitive(t *testing.T) { + ukrainian := useSearchableItem(t, "Тестовий Запис", nil) + greek := useSearchableItem(t, "Υπολογιστής", nil) + + // Cyrillic: lowercase, uppercase, and partial queries must match + // uppercase stored text (issue #1021). + assertSearchFinds(t, "тест", ukrainian, true) + assertSearchFinds(t, "ТЕСТ", ukrainian, true) + assertSearchFinds(t, "тестовий запис", ukrainian, true) + assertSearchFinds(t, "запис", ukrainian, true) + + // Greek, including the final-sigma form difference (issue #1367). + assertSearchFinds(t, "Υπολογιστής", greek, true) + assertSearchFinds(t, "υπολογιστής", greek, true) + assertSearchFinds(t, "ΥΠΟΛΟΓΙΣΤΗΣ", greek, true) + assertSearchFinds(t, "υπολογιστης", greek, true) + + assertSearchFinds(t, "холодильник", ukrainian, false) +} + +func TestEntitySearch_AccentInsensitive(t *testing.T) { + accented := useSearchableItem(t, "Electrónica de café", nil) + plain := useSearchableItem(t, "electronica cafe pere", nil) + + assertSearchFinds(t, "electronica", accented, true) + assertSearchFinds(t, "café", accented, true) + assertSearchFinds(t, "CAFE", accented, true) + assertSearchFinds(t, "electrónica", plain, true) + assertSearchFinds(t, "père", plain, true) +} + +func TestEntitySearch_MultiTokenAnd(t *testing.T) { + item := useSearchableItem(t, "Red Tool Box", nil) + + // every token must match, in any order + assertSearchFinds(t, "box red", item, true) + assertSearchFinds(t, "red tool", item, true) + assertSearchFinds(t, "red hammer", item, false) + + // quoted phrases match as a unit + assertSearchFinds(t, `"tool box"`, item, true) + assertSearchFinds(t, `"box tool"`, item, false) +} + +func TestEntitySearch_MatchesAcrossFields(t *testing.T) { + item := useSearchableItem(t, "Multifield", func(u *EntityUpdate) { + u.SerialNumber = "SN-998877" + u.ModelNumber = "MX-1000" + u.Manufacturer = "Acme Corp" + u.Notes = "stored in the attic" + u.PurchaseFrom = "Conrad Electronic" + }) + + assertSearchFinds(t, "998877", item, true) + assertSearchFinds(t, "mx-1000", item, true) + assertSearchFinds(t, "acme", item, true) + assertSearchFinds(t, "attic", item, true) + assertSearchFinds(t, "conrad", item, true) + + // tokens may match across different fields of the same item + assertSearchFinds(t, "acme attic", item, true) +} + +func TestEntitySearch_MatchesTagNames(t *testing.T) { + ctx := context.Background() + + tagOut, err := tRepos.Tags.Create(ctx, tGroup.ID, TagCreate{Name: "Электроника-поиск"}) + require.NoError(t, err) + t.Cleanup(func() { _ = tRepos.Tags.delete(context.Background(), tagOut.ID) }) + + itemET := useItemEntityType(t) + tagged, err := tRepos.Entities.Create(ctx, tGroup.ID, EntityCreate{ + Name: "Tagged thing", + EntityTypeID: itemET.ID, + TagIDs: []uuid.UUID{tagOut.ID}, + }) + require.NoError(t, err) + t.Cleanup(func() { _ = tRepos.Entities.Delete(context.Background(), tagged.ID) }) + + untagged := useSearchableItem(t, "Untagged thing", nil) + + // tag names are searchable from the search bar (#1509), with the same + // UTF-8 case folding as other fields + assertSearchFinds(t, "электроника-поиск", tagged, true) + assertSearchFinds(t, "электроника-поиск", untagged, false) +} + +func TestEntitySearch_MatchesCustomFieldValues(t *testing.T) { + item := useSearchableItem(t, "Phone", func(u *EntityUpdate) { + u.Fields = []EntityFieldData{ + {Type: "text", Name: "IMEI", TextValue: "351234567891011"}, + } + }) + other := useSearchableItem(t, "Other phone", nil) + + // custom field values are searchable from the search bar (#1380) + assertSearchFinds(t, "351234567891011", item, true) + assertSearchFinds(t, "3512345", item, true) + assertSearchFinds(t, "351234567891011", other, false) +} + +func TestEntitySearch_LikeWildcardsAreLiteral(t *testing.T) { + percent := useSearchableItem(t, "100% cotton", nil) + plain := useSearchableItem(t, "100x cotton", nil) + + assertSearchFinds(t, "100%", percent, true) + assertSearchFinds(t, "100%", plain, false) + + underscore := useSearchableItem(t, "a_b pattern", nil) + noUnderscore := useSearchableItem(t, "axb pattern", nil) + + assertSearchFinds(t, "a_b", underscore, true) + assertSearchFinds(t, "a_b", noUnderscore, false) +} + +func TestQueryByGroup_MatchAllTags(t *testing.T) { + ctx := context.Background() + tags := useTags(t, 2) + + itemET := useItemEntityType(t) + both, err := tRepos.Entities.Create(ctx, tGroup.ID, EntityCreate{ + Name: "Has both tags", + EntityTypeID: itemET.ID, + TagIDs: []uuid.UUID{tags[0].ID, tags[1].ID}, + }) + require.NoError(t, err) + t.Cleanup(func() { _ = tRepos.Entities.Delete(context.Background(), both.ID) }) + + one, err := tRepos.Entities.Create(ctx, tGroup.ID, EntityCreate{ + Name: "Has one tag", + EntityTypeID: itemET.ID, + TagIDs: []uuid.UUID{tags[0].ID}, + }) + require.NoError(t, err) + t.Cleanup(func() { _ = tRepos.Entities.Delete(context.Background(), one.ID) }) + + tagIDs := []uuid.UUID{tags[0].ID, tags[1].ID} + + // default OR behavior: any selected tag matches + anyMatch := searchIDs(t, EntityQuery{TagIDs: tagIDs}) + assert.True(t, anyMatch[both.ID], "OR mode should match item with both tags") + assert.True(t, anyMatch[one.ID], "OR mode should match item with one tag") + + // matchAllTags: every selected tag must be present (#1454) + allMatch := searchIDs(t, EntityQuery{TagIDs: tagIDs, MatchAllTags: true}) + assert.True(t, allMatch[both.ID], "AND mode should match item with both tags") + assert.False(t, allMatch[one.ID], "AND mode should NOT match item with only one tag") +} diff --git a/backend/internal/data/repo/repos_all.go b/backend/internal/data/repo/repos_all.go index 31ab59d12..5d4c9081e 100644 --- a/backend/internal/data/repo/repos_all.go +++ b/backend/internal/data/repo/repos_all.go @@ -4,6 +4,7 @@ package repo import ( "github.com/sysadminsmedia/homebox/backend/internal/core/services/reporting/eventbus" "github.com/sysadminsmedia/homebox/backend/internal/data/ent" + "github.com/sysadminsmedia/homebox/backend/internal/data/search" "github.com/sysadminsmedia/homebox/backend/internal/sys/config" ) @@ -24,7 +25,12 @@ type AllRepos struct { Exports *ExportRepository } -func New(db *ent.Client, bus *eventbus.EventBus, storage config.Storage, pubSubConn string, thumbnail config.Thumbnail) *AllRepos { +// New constructs the repository container. searchEngine selects the free-text +// search implementation; nil falls back to the default database engine. +func New(db *ent.Client, bus *eventbus.EventBus, storage config.Storage, pubSubConn string, thumbnail config.Thumbnail, searchEngine search.Engine) *AllRepos { + if searchEngine == nil { + searchEngine = search.NewDatabaseEngine(db) + } attachments := &AttachmentRepo{db, storage, pubSubConn, thumbnail} return &AllRepos{ Users: &UserRepository{db}, @@ -32,7 +38,7 @@ func New(db *ent.Client, bus *eventbus.EventBus, storage config.Storage, pubSubC PasswordResetTokens: &PasswordResetTokenRepository{db}, APIKeys: NewAPIKeyRepository(db), Groups: NewGroupRepository(db), - Entities: &EntityRepository{db, bus, attachments}, + Entities: &EntityRepository{db, bus, attachments, searchEngine}, EntityTypes: &EntityTypeRepository{db, bus}, EntityTemplates: &EntityTemplatesRepository{db, bus}, Tags: &TagRepository{db, bus}, diff --git a/backend/internal/data/search/database.go b/backend/internal/data/search/database.go new file mode 100644 index 000000000..2d654beb4 --- /dev/null +++ b/backend/internal/data/search/database.go @@ -0,0 +1,321 @@ +package search + +import ( + "context" + "errors" + "fmt" + "strings" + "sync" + + "entgo.io/ent/dialect" + entsql "entgo.io/ent/dialect/sql" + "github.com/google/uuid" + "github.com/rs/zerolog/log" + "github.com/sysadminsmedia/homebox/backend/internal/data/ent" + "github.com/sysadminsmedia/homebox/backend/internal/data/ent/entity" + "github.com/sysadminsmedia/homebox/backend/internal/data/ent/entityfield" + "github.com/sysadminsmedia/homebox/backend/internal/data/ent/predicate" + "github.com/sysadminsmedia/homebox/backend/internal/data/ent/tag" + "github.com/sysadminsmedia/homebox/backend/pkgs/textutils" +) + +// entityColumns are the entity table columns matched against each token. +var entityColumns = []string{ + entity.FieldName, + entity.FieldDescription, + entity.FieldSerialNumber, + entity.FieldModelNumber, + entity.FieldManufacturer, + entity.FieldNotes, + entity.FieldPurchaseFrom, +} + +// DatabaseEngine implements Engine with tokenized substring matching executed +// by the database itself, so it needs no external services or index +// maintenance. +// +// The query is split into tokens (see Tokenize); an entity matches when every +// token matches at least one searched column, tag name, or custom field +// value. All matching is case-insensitive across the full Unicode range and +// accent-insensitive where the dialect allows: +// +// - SQLite: both sides of the comparison go through hb_fold, a Go-defined +// SQL function (registered by pkgs/cgofreesqlite) that applies Unicode +// case folding and strips diacritics. SQLite's native LIKE/lower() are +// ASCII-only and silently fail for Cyrillic, Greek, etc. +// - PostgreSQL: ILIKE provides Unicode case-insensitivity natively, and the +// unaccent extension is used for accent-insensitivity when available +// (the engine tries to enable it once and degrades gracefully when the +// database user lacks the privilege). +type DatabaseEngine struct { + dialect string + db *ent.Client + + unaccentMu sync.Mutex + unaccentChecked bool + unaccent bool +} + +// NewDatabaseEngine returns a database-backed search engine querying through +// the given ent client. +func NewDatabaseEngine(db *ent.Client) *DatabaseEngine { + return &DatabaseEngine{dialect: db.Dialect(), db: db} +} + +// Predicate implements Engine. +func (e *DatabaseEngine) Predicate(ctx context.Context, _ uuid.UUID, query string) (predicate.Entity, error) { + tokens := Tokenize(query) + if len(tokens) == 0 { + return nil, nil + } + + match := e.matcher(ctx) + + tokenPreds := make([]predicate.Entity, 0, len(tokens)) + for _, token := range tokens { + fieldPreds := make([]predicate.Entity, 0, len(entityColumns)+2) + for _, col := range entityColumns { + fieldPreds = append(fieldPreds, predicate.Entity(match(col, token))) + } + fieldPreds = append(fieldPreds, + // Tag names and custom field values are searchable too + // (requested in #1509 and #1380). + entity.HasTagWith(predicate.Tag(match(tag.FieldName, token))), + entity.HasFieldsWith(predicate.EntityField(match(entityfield.FieldTextValue, token))), + ) + tokenPreds = append(tokenPreds, entity.Or(fieldPreds...)) + } + return entity.And(tokenPreds...), nil +} + +// matcher returns a function that builds a dialect-appropriate +// "column contains token" SQL condition. The returned closure is generic over +// the table being selected (entity, tag, entity_fields), qualifying the +// column through the active selector. +func (e *DatabaseEngine) matcher(ctx context.Context) func(col, token string) func(*entsql.Selector) { + return func(col, token string) func(*entsql.Selector) { + return func(s *entsql.Selector) { + s.Where(e.foldContains(ctx, s.C(col), token)) + } + } +} + +// foldContains builds a dialect-appropriate, case- and (where available) +// accent-insensitive "qualified column contains token" predicate. col must +// already be qualified (e.g. via Selector.C or SelectTable.C). It is the shared +// core behind both free-text matching and facet-value narrowing. +func (e *DatabaseEngine) foldContains(ctx context.Context, col, token string) *entsql.Predicate { + if e.dialect == dialect.Postgres { + unaccent := e.unaccentAvailable(ctx) + pattern := "%" + escapeLike(token) + "%" + return entsql.P(func(b *entsql.Builder) { + if unaccent { + b.WriteString("unaccent(").WriteString(col).WriteString(") ILIKE unaccent(") + b.Arg(pattern) + b.WriteString(")") + } else { + b.WriteString(col).WriteString(" ILIKE ") + b.Arg(pattern) + } + }) + } + + // SQLite + pattern := "%" + escapeLike(textutils.Fold(token)) + "%" + return entsql.P(func(b *entsql.Builder) { + b.WriteString("hb_fold(").WriteString(col).WriteString(") LIKE ") + b.Arg(pattern) + b.WriteString(" ESCAPE '\\'") + }) +} + +// unaccentAvailable reports whether the PostgreSQL unaccent extension can be +// used. On first call it tries to enable the extension (ignoring permission +// errors) and caches the result for the lifetime of the engine. +func (e *DatabaseEngine) unaccentAvailable(ctx context.Context) bool { + e.unaccentMu.Lock() + defer e.unaccentMu.Unlock() + + if e.unaccentChecked { + return e.unaccent + } + if e.db == nil { + return false + } + + if _, err := e.db.Sql().ExecContext(ctx, "CREATE EXTENSION IF NOT EXISTS unaccent"); err != nil { + log.Debug().Err(err).Msg("could not create unaccent extension (insufficient privileges?), checking if it already exists") + } + + var count int + row := e.db.Sql().QueryRowContext(ctx, "SELECT COUNT(*) FROM pg_extension WHERE extname = 'unaccent'") + if err := row.Scan(&count); err != nil { + // A caller-context cancellation or deadline is transient: leave the + // probe unmarked so a later call retries, rather than permanently + // caching accent-sensitive search from a one-off timeout. + if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) { + log.Debug().Err(err).Msg("unaccent probe interrupted by context; will retry on next search") + return false + } + // Any other failure is treated as a definitive negative result. + log.Warn().Err(err).Msg("failed to check for unaccent extension; search will be accent-sensitive") + e.unaccentChecked = true + return false + } + + e.unaccent = count > 0 + e.unaccentChecked = true + if e.unaccent { + log.Info().Msg("postgres unaccent extension available; search is accent-insensitive") + } else { + log.Info().Msg("postgres unaccent extension not available; search will be accent-sensitive (install it with: CREATE EXTENSION unaccent)") + } + return e.unaccent +} + +// escapeLike escapes the LIKE wildcards in a literal token so user input +// cannot inject wildcard matching. Backslash is the escape character on both +// dialects (PostgreSQL's default; SQLite via an explicit ESCAPE clause). +func escapeLike(s string) string { + r := strings.NewReplacer(`\`, `\\`, `%`, `\%`, `_`, `\_`) + return r.Replace(s) +} + +// --- Faceter implementation ------------------------------------------------- +// +// These mirror the Meilisearch engine's facet methods so the search UI behaves +// the same regardless of driver. Counts are entity counts within the group; the +// grouping key (tag name / field value) keeps its original casing while the +// optional narrowing query matches case- and accent-insensitively, matching +// both the free-text search and Meilisearch's facetQuery. + +// notEmpty is the " is a non-empty string" predicate, used to mirror the +// Meilisearch document builder, which only facets fields that have a text value. +func notEmpty(col string) *entsql.Predicate { + return entsql.P(func(b *entsql.Builder) { + b.WriteString(col).WriteString(" <> ''") + }) +} + +// SearchTags implements Faceter. +func (e *DatabaseEngine) SearchTags(ctx context.Context, gid uuid.UUID, query string) ([]TagFacet, error) { + t := entsql.Table(tag.Table).As("t") + te := entsql.Table(entity.TagTable).As("te") + // entity.TagPrimaryKey is {tag_id, entity_id}; count distinct entities. + cnt := entsql.Count(entsql.Distinct(te.C(entity.TagPrimaryKey[1]))) + + sel := entsql.Dialect(e.dialect). + Select(t.C(tag.FieldName), entsql.As(cnt, "count")). + From(t). + Join(te).On(te.C(entity.TagPrimaryKey[0]), t.C(tag.FieldID)). + Where(entsql.EQ(t.C(tag.GroupColumn), gid)). + GroupBy(t.C(tag.FieldName)). + OrderBy(entsql.Desc(cnt), entsql.Asc(t.C(tag.FieldName))) + if q := strings.TrimSpace(query); q != "" { + sel.Where(e.foldContains(ctx, t.C(tag.FieldName), q)) + } + + rows, err := e.scanFacets(ctx, sel) + if err != nil { + return nil, fmt.Errorf("database tag facets: %w", err) + } + out := make([]TagFacet, len(rows)) + for i, r := range rows { + out[i] = TagFacet{Name: r.key, Count: r.count} + } + return out, nil +} + +// SearchFieldValues implements Faceter. +func (e *DatabaseEngine) SearchFieldValues(ctx context.Context, gid uuid.UUID, field, query string) ([]FieldFacet, error) { + f := entsql.Table(entityfield.Table).As("f") + en := entsql.Table(entity.Table).As("e") + cnt := entsql.Count(entsql.Distinct(f.C(entityfield.EntityColumn))) + + sel := entsql.Dialect(e.dialect). + Select(f.C(entityfield.FieldTextValue), entsql.As(cnt, "count")). + From(f). + Join(en).On(f.C(entityfield.EntityColumn), en.C(entity.FieldID)). + Where(entsql.EQ(en.C(entity.GroupColumn), gid)). + Where(entsql.EQ(f.C(entityfield.FieldName), field)). + Where(notEmpty(f.C(entityfield.FieldTextValue))). + GroupBy(f.C(entityfield.FieldTextValue)). + OrderBy(entsql.Desc(cnt), entsql.Asc(f.C(entityfield.FieldTextValue))) + if q := strings.TrimSpace(query); q != "" { + sel.Where(e.foldContains(ctx, f.C(entityfield.FieldTextValue), q)) + } + + rows, err := e.scanFacets(ctx, sel) + if err != nil { + return nil, fmt.Errorf("database field value facets: %w", err) + } + out := make([]FieldFacet, len(rows)) + for i, r := range rows { + out[i] = FieldFacet{Value: r.key, Count: r.count} + } + return out, nil +} + +// FieldFacets implements Faceter. A single grouped query yields every +// (field name, value) pair with its entity count, which is then bucketed by +// field name. +func (e *DatabaseEngine) FieldFacets(ctx context.Context, gid uuid.UUID) (map[string][]FieldFacet, error) { + f := entsql.Table(entityfield.Table).As("f") + en := entsql.Table(entity.Table).As("e") + cnt := entsql.Count(entsql.Distinct(f.C(entityfield.EntityColumn))) + + sel := entsql.Dialect(e.dialect). + Select(f.C(entityfield.FieldName), f.C(entityfield.FieldTextValue), entsql.As(cnt, "count")). + From(f). + Join(en).On(f.C(entityfield.EntityColumn), en.C(entity.FieldID)). + Where(entsql.EQ(en.C(entity.GroupColumn), gid)). + Where(notEmpty(f.C(entityfield.FieldTextValue))). + GroupBy(f.C(entityfield.FieldName), f.C(entityfield.FieldTextValue)). + OrderBy(entsql.Asc(f.C(entityfield.FieldName)), entsql.Desc(cnt), entsql.Asc(f.C(entityfield.FieldTextValue))) + + q, args := sel.Query() + rows, err := e.db.Sql().QueryContext(ctx, q, args...) + if err != nil { + return nil, fmt.Errorf("database field facets: %w", err) + } + defer func() { _ = rows.Close() }() + + out := make(map[string][]FieldFacet) + for rows.Next() { + var name, value string + var count int + if err := rows.Scan(&name, &value, &count); err != nil { + return nil, fmt.Errorf("database field facets: %w", err) + } + out[name] = append(out[name], FieldFacet{Value: value, Count: count}) + } + if err := rows.Err(); err != nil { + return nil, fmt.Errorf("database field facets: %w", err) + } + return out, nil +} + +// facetRow is a (grouping key, entity count) pair from a two-column facet query. +type facetRow struct { + key string + count int +} + +func (e *DatabaseEngine) scanFacets(ctx context.Context, sel *entsql.Selector) ([]facetRow, error) { + q, args := sel.Query() + rows, err := e.db.Sql().QueryContext(ctx, q, args...) + if err != nil { + return nil, err + } + defer func() { _ = rows.Close() }() + + var out []facetRow + for rows.Next() { + var r facetRow + if err := rows.Scan(&r.key, &r.count); err != nil { + return nil, err + } + out = append(out, r) + } + return out, rows.Err() +} diff --git a/backend/internal/data/search/database_test.go b/backend/internal/data/search/database_test.go new file mode 100644 index 000000000..4f753cb62 --- /dev/null +++ b/backend/internal/data/search/database_test.go @@ -0,0 +1,208 @@ +package search + +import ( + "context" + "database/sql" + "os" + "testing" + + "entgo.io/ent/dialect" + entsql "entgo.io/ent/dialect/sql" + "github.com/google/uuid" + _ "github.com/jackc/pgx/v5/stdlib" // registers the "pgx" database/sql driver + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "github.com/sysadminsmedia/homebox/backend/internal/data/ent" + "github.com/sysadminsmedia/homebox/backend/internal/data/ent/entity" + "github.com/sysadminsmedia/homebox/backend/internal/data/ent/predicate" +) + +// renderPredicate applies a predicate to a bare entity selector and returns +// the generated SQL and bound arguments. The end-to-end behavior against a +// real SQLite database is covered by the repo package tests; these tests pin +// the SQL shape per dialect, including the PostgreSQL form that cannot run in +// unit tests. +func renderPredicate(t *testing.T, dialectName string, p predicate.Entity) (string, []any) { + t.Helper() + s := entsql.Dialect(dialectName). + Select(entity.FieldID). + From(entsql.Table(entity.Table)) + p(s) + query, args := s.Query() + return query, args +} + +func TestDatabaseEngine_SQLiteSQL(t *testing.T) { + e := &DatabaseEngine{dialect: dialect.SQLite} + + pred, err := e.Predicate(context.Background(), uuid.Nil, "Straße") + require.NoError(t, err) + require.NotNil(t, pred) + + query, args := renderPredicate(t, dialect.SQLite, pred) + + // both sides folded: the column through hb_fold, the pattern in Go + assert.Contains(t, query, "hb_fold(`entities`.`name`) LIKE ? ESCAPE '\\'") + assert.Contains(t, args, "%strasse%") +} + +func TestDatabaseEngine_PostgresSQL(t *testing.T) { + // no db handle: unaccent probing reports unavailable, exercising the + // plain ILIKE fallback + e := &DatabaseEngine{dialect: dialect.Postgres} + + pred, err := e.Predicate(context.Background(), uuid.Nil, "café") + require.NoError(t, err) + require.NotNil(t, pred) + + query, args := renderPredicate(t, dialect.Postgres, pred) + + // ILIKE is Unicode case-insensitive natively; without unaccent the token + // keeps its accents so accented data still matches accented queries + assert.Contains(t, query, `"entities"."name" ILIKE $`) + assert.Contains(t, args, "%café%") +} + +func TestDatabaseEngine_PostgresUnaccentSQL(t *testing.T) { + e := &DatabaseEngine{dialect: dialect.Postgres, unaccent: true, unaccentChecked: true} // mark probed + + pred, err := e.Predicate(context.Background(), uuid.Nil, "café") + require.NoError(t, err) + require.NotNil(t, pred) + + query, args := renderPredicate(t, dialect.Postgres, pred) + + assert.Contains(t, query, `unaccent("entities"."name") ILIKE unaccent($`) + assert.Contains(t, args, "%café%") +} + +func TestDatabaseEngine_EmptyQuery(t *testing.T) { + e := &DatabaseEngine{dialect: dialect.SQLite} + + pred, err := e.Predicate(context.Background(), uuid.Nil, " ") + require.NoError(t, err) + assert.Nil(t, pred) +} + +// TestDatabaseEngine_Facets exercises the Faceter implementation end-to-end +// against a real (in-memory SQLite) database, including per-group scoping, +// entity counts, ranking, and case-insensitive narrowing. +func TestDatabaseEngine_Facets(t *testing.T) { + assertFacets(t, newTestEntClient(t)) +} + +// TestDatabaseEngine_FacetsPostgres runs the same scenario against PostgreSQL, +// which—unlike SQLite—stores ids in a native uuid column. This is what proves +// the raw-SQL group filter binds the uuid argument correctly on pgx. Set +// TEST_POSTGRES_URL (e.g. from a throwaway docker container) to run it. +func TestDatabaseEngine_FacetsPostgres(t *testing.T) { + assertFacets(t, newTestPostgresClient(t)) +} + +// assertFacets seeds a fixed two-group dataset and asserts the facet behavior; +// shared so SQLite and PostgreSQL are held to identical expectations. +func assertFacets(t *testing.T, db *ent.Client) { + t.Helper() + ctx := context.Background() + e := NewDatabaseEngine(db) + + g1 := db.Group.Create().SetName("g1").SaveX(ctx) + g2 := db.Group.Create().SetName("g2").SaveX(ctx) + et1 := db.EntityType.Create().SetName("Item").SetGroup(g1).SaveX(ctx) + et2 := db.EntityType.Create().SetName("Item").SetGroup(g2).SaveX(ctx) + + elec := db.Tag.Create().SetName("Electronics").SetGroup(g1).SaveX(ctx) + tools := db.Tag.Create().SetName("Tools").SetGroup(g1).SaveX(ctx) + elec2 := db.Tag.Create().SetName("Electronics").SetGroup(g2).SaveX(ctx) + + newItem := func(g *ent.Group, et *ent.EntityType, name string) *ent.EntityCreate { + return db.Entity.Create().SetName(name).SetGroup(g).SetEntityType(et) + } + field := func(name, val string) *ent.EntityField { + return db.EntityField.Create().SetName(name).SetType("text").SetTextValue(val).SaveX(ctx) + } + + newItem(g1, et1, "Phone").AddTag(elec).AddFields(field("Condition", "Clean")).SaveX(ctx) + newItem(g1, et1, "Laptop").AddTag(elec).AddFields(field("Condition", "Clean"), field("Color", "Red")).SaveX(ctx) + newItem(g1, et1, "Hammer").AddTag(tools).AddFields(field("Condition", "Dirty")).SaveX(ctx) + // g2 carries the same tag/field names to prove scoping isolates groups + newItem(g2, et2, "Tablet").AddTag(elec2).AddFields(field("Condition", "Clean")).SaveX(ctx) + + t.Run("tag facets ranked by entity count", func(t *testing.T) { + tags, err := e.SearchTags(ctx, g1.ID, "") + require.NoError(t, err) + require.Len(t, tags, 2) + assert.Equal(t, TagFacet{Name: "Electronics", Count: 2}, tags[0]) + assert.Equal(t, TagFacet{Name: "Tools", Count: 1}, tags[1]) + }) + + t.Run("tag facet narrowing is scoped and case-insensitive", func(t *testing.T) { + tags, err := e.SearchTags(ctx, g1.ID, "elec") + require.NoError(t, err) + require.Len(t, tags, 1) + // 2, not 3: g2's Electronics tag must not be counted + assert.Equal(t, TagFacet{Name: "Electronics", Count: 2}, tags[0]) + }) + + t.Run("field facets discovery", func(t *testing.T) { + facets, err := e.FieldFacets(ctx, g1.ID) + require.NoError(t, err) + require.Contains(t, facets, "Condition") + require.Contains(t, facets, "Color", "each field is its own facet") + + counts := map[string]int{} + for _, f := range facets["Condition"] { + counts[f.Value] = f.Count + } + assert.Equal(t, 2, counts["Clean"]) + assert.Equal(t, 1, counts["Dirty"]) + }) + + t.Run("field value narrowing and scoping", func(t *testing.T) { + vals, err := e.SearchFieldValues(ctx, g1.ID, "Condition", "cle") + require.NoError(t, err) + require.Len(t, vals, 1) + assert.Equal(t, FieldFacet{Value: "Clean", Count: 2}, vals[0]) + + g2vals, err := e.SearchFieldValues(ctx, g2.ID, "Condition", "") + require.NoError(t, err) + require.Len(t, g2vals, 1) + assert.Equal(t, FieldFacet{Value: "Clean", Count: 1}, g2vals[0], "facets must be scoped to the group") + }) +} + +// newTestPostgresClient connects an ent client to the PostgreSQL instance in +// TEST_POSTGRES_URL (skipping when unset), using the same pgx driver the app +// uses, and creates the schema. Run one with: +// +// docker run -d --rm -p 5433:5432 -e POSTGRES_PASSWORD=pw postgres +// TEST_POSTGRES_URL=postgres://postgres:pw@localhost:5433/postgres?sslmode=disable go test ./internal/data/search/ +func newTestPostgresClient(t *testing.T) *ent.Client { + t.Helper() + dsn := os.Getenv("TEST_POSTGRES_URL") + if dsn == "" { + t.Skip("TEST_POSTGRES_URL not set; skipping Postgres integration test") + } + db, err := sql.Open("pgx", dsn) + require.NoError(t, err) + client := ent.NewClient(ent.Driver(entsql.OpenDB(dialect.Postgres, db))) + t.Cleanup(func() { _ = client.Close() }) + require.NoError(t, client.Schema.Create(context.Background())) + return client +} + +func TestDatabaseEngine_MultiTokenStructure(t *testing.T) { + e := &DatabaseEngine{dialect: dialect.SQLite} + + pred, err := e.Predicate(context.Background(), uuid.Nil, "red box") + require.NoError(t, err) + + query, args := renderPredicate(t, dialect.SQLite, pred) + + // one AND-ed group per token, each ORing all searched surfaces, + // including tag names and custom field values + assert.Contains(t, args, "%red%") + assert.Contains(t, args, "%box%") + assert.Contains(t, query, "`tags`") + assert.Contains(t, query, "`entity_fields`") +} diff --git a/backend/internal/data/search/meilisearch.go b/backend/internal/data/search/meilisearch.go new file mode 100644 index 000000000..d58f3866e --- /dev/null +++ b/backend/internal/data/search/meilisearch.go @@ -0,0 +1,533 @@ +package search + +import ( + "context" + "encoding/json" + "fmt" + "strings" + "sync" + "time" + + "github.com/google/uuid" + "github.com/meilisearch/meilisearch-go" + "github.com/rs/zerolog/log" + "github.com/sysadminsmedia/homebox/backend/internal/core/services/reporting/eventbus" + "github.com/sysadminsmedia/homebox/backend/internal/data/ent" + "github.com/sysadminsmedia/homebox/backend/internal/data/ent/entity" + "github.com/sysadminsmedia/homebox/backend/internal/data/ent/group" + "github.com/sysadminsmedia/homebox/backend/internal/data/ent/predicate" + "github.com/sysadminsmedia/homebox/backend/internal/sys/config" +) + +const ( + // meiliTaskPollInterval is how often task completion is polled while + // indexing. + meiliTaskPollInterval = 50 * time.Millisecond + + // meiliReindexBatch is the number of entities loaded from the database + // and pushed to Meilisearch per request during reindexing. + meiliReindexBatch = 1000 + + // meiliFieldFacetPrefix namespaces the per-field facet attributes + // (field_facets.). Each custom field becomes its own facet + // under this prefix so the search UI can offer an independent value filter + // per field. See FieldFacets/SearchFieldValues. + meiliFieldFacetPrefix = "field_facets" +) + +// meiliReindexDebounce coalesces bursts of mutation events (e.g. a CSV +// import) into a single reindex per group. Variable so tests can shorten it. +var meiliReindexDebounce = 2 * time.Second + +// meiliDocument is the shape of an entity stored in the Meilisearch index. +// It mirrors the surfaces the database engine searches: the entity columns in +// entityColumns plus tag names and custom field text values. +// +// Custom fields are stored twice, for two different jobs: +// - Fields (an array) is searchable, so a field's value matches free-text +// queries just like the database engine. +// - FieldFacets (an object keyed by field name) is filterable, so each field +// becomes its own facet — "Special Field" and "Color" are independent +// filters rather than one undifferentiated bucket of values. +type meiliDocument struct { + ID string `json:"id"` + GroupID string `json:"group_id"` + Name string `json:"name"` + Description string `json:"description"` + SerialNumber string `json:"serial_number"` + ModelNumber string `json:"model_number"` + Manufacturer string `json:"manufacturer"` + Notes string `json:"notes"` + PurchaseFrom string `json:"purchase_from"` + Tags []string `json:"tags"` + Fields []meiliField `json:"fields"` + FieldFacets map[string]string `json:"field_facets"` +} + +// meiliField is a custom field on an entity. The name is stored for +// inspectability but only the value is searchable, matching the database +// engine's behavior. +type meiliField struct { + Name string `json:"name"` + Value string `json:"value"` +} + +// MeilisearchEngine implements Engine backed by an external Meilisearch +// instance, providing typo-tolerant ("fuzzy") relevance-ranked matching. +// +// Queries are sent to Meilisearch scoped to the group and the matching entity +// IDs are returned as an entity.IDIn predicate, which the repository then +// intersects with its own filters and pagination. Because of that +// intersection, documents that linger in the index after an entity is deleted +// can never surface in results — index maintenance only has to guarantee that +// *existing* entities are indexed, which keeps it simple: +// +// - the full index is rebuilt (upserted) in the background at startup, and +// - entity/tag mutation events trigger a debounced reindex of the affected +// group, which also prunes that group's stale documents. +// +// Results are capped at MaxHits (HBOX_SEARCH_MEILISEARCH_MAX_HITS); a search +// that legitimately matches more entities than that is truncated. +type MeilisearchEngine struct { + client meilisearch.ServiceManager + index meilisearch.IndexManager + db *ent.Client + maxHits int64 + + mu sync.Mutex + pending map[uuid.UUID]struct{} + timer *time.Timer +} + +// NewMeilisearchEngine connects to Meilisearch, ensures the index and its +// settings exist, subscribes to mutation events for incremental indexing, and +// kicks off a full reindex in the background. It fails fast when the instance +// is unreachable so a misconfiguration is caught at startup. +func NewMeilisearchEngine(cfg config.MeilisearchConf, db *ent.Client, bus *eventbus.EventBus) (*MeilisearchEngine, error) { + client := meilisearch.New(cfg.Host, meilisearch.WithAPIKey(cfg.APIKey)) + + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + + if _, err := client.HealthWithContext(ctx); err != nil { + return nil, fmt.Errorf("meilisearch is not reachable at %s: %w", cfg.Host, err) + } + + e := &MeilisearchEngine{ + client: client, + index: client.Index(cfg.Index), + db: db, + maxHits: cfg.MaxHits, + pending: map[uuid.UUID]struct{}{}, + } + + if err := e.ensureIndex(ctx, cfg.Index); err != nil { + return nil, err + } + + if bus != nil { + onMutation := func(data any) { + if event, ok := data.(eventbus.GroupMutationEvent); ok { + e.scheduleReindex(event.GID) + } + } + bus.Subscribe(eventbus.EventEntityMutation, onMutation) + bus.Subscribe(eventbus.EventTagMutation, onMutation) + } + + go func() { + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute) + defer cancel() + if err := e.ReindexAll(ctx); err != nil { + log.Error().Err(err).Msg("meilisearch: initial reindex failed; results may be incomplete until entities are modified or the server restarts") + } + }() + + return e, nil +} + +// Predicate implements Engine. +func (e *MeilisearchEngine) Predicate(ctx context.Context, gid uuid.UUID, query string) (predicate.Entity, error) { + query = strings.TrimSpace(query) + if query == "" { + return nil, nil + } + + resp, err := e.index.SearchWithContext(ctx, query, &meilisearch.SearchRequest{ + // uuid.String() emits only [0-9a-f-], so inlining it in the filter + // expression is safe + Filter: fmt.Sprintf("group_id = %q", gid.String()), + Limit: e.maxHits, + AttributesToRetrieve: []string{"id"}, + // require every term to match, mirroring the database engine's + // AND-of-tokens semantics (typo tolerance still applies per term) + MatchingStrategy: meilisearch.All, + }) + if err != nil { + return nil, fmt.Errorf("meilisearch query failed: %w", err) + } + + ids := make([]uuid.UUID, 0, len(resp.Hits)) + for _, hit := range resp.Hits { + var doc struct { + ID string `json:"id"` + } + if err := hit.DecodeInto(&doc); err != nil { + return nil, fmt.Errorf("meilisearch returned an undecodable hit: %w", err) + } + id, err := uuid.Parse(doc.ID) + if err != nil { + log.Warn().Str("id", doc.ID).Msg("meilisearch: skipping hit with non-uuid id") + continue + } + ids = append(ids, id) + } + + // entity.IDIn with no ids compiles to FALSE: no matches + return entity.IDIn(ids...), nil +} + +// SearchTags returns the tag values used within a group, ranked by how many +// entities carry each tag, optionally narrowed to those matching query (a +// prefix/substring of the tag name). It powers the tag filter in the search UI: +// an empty query lists a group's most-used tags, and a non-empty one +// autocompletes as the user types. +// +// This relies on tags being a filterable, facet-searched attribute (see +// ensureIndex). Unlike Predicate it does not return entities; the UI feeds the +// chosen tags back into its own tag filter. +func (e *MeilisearchEngine) SearchTags(ctx context.Context, gid uuid.UUID, query string) ([]TagFacet, error) { + raw, err := e.index.FacetSearchWithContext(ctx, &meilisearch.FacetSearchRequest{ + FacetName: "tags", + FacetQuery: strings.TrimSpace(query), + // uuid.String() emits only [0-9a-f-], so inlining it is safe + Filter: fmt.Sprintf("group_id = %q", gid.String()), + }) + if err != nil { + return nil, fmt.Errorf("meilisearch facet search failed: %w", err) + } + + var resp struct { + FacetHits []TagFacet `json:"facetHits"` + } + if err := json.Unmarshal(*raw, &resp); err != nil { + return nil, fmt.Errorf("meilisearch returned an undecodable facet response: %w", err) + } + return resp.FacetHits, nil +} + +// FieldFacets returns every custom field present on a group's entities mapped +// to its value distribution (value -> entity count). It drives the search UI's +// filter sidebar: which fields can be filtered on and what values each one +// currently has. Values within a field are unordered. +// +// It reads the facet distribution of every field_facets. attribute in a +// single request, so the UI need not know the field names in advance. +func (e *MeilisearchEngine) FieldFacets(ctx context.Context, gid uuid.UUID) (map[string][]FieldFacet, error) { + resp, err := e.index.SearchWithContext(ctx, "", &meilisearch.SearchRequest{ + // uuid.String() emits only [0-9a-f-], so inlining it is safe + Filter: fmt.Sprintf("group_id = %q", gid.String()), + // "*" expands to every filterable attribute; we keep only the + // field_facets. ones below. Distribution is computed over the + // whole filtered set, independent of the (unused) hit page. + Facets: []string{"*"}, + AttributesToRetrieve: []string{"id"}, + }) + if err != nil { + return nil, fmt.Errorf("meilisearch field facet distribution failed: %w", err) + } + if len(resp.FacetDistribution) == 0 { + return map[string][]FieldFacet{}, nil + } + + var dist map[string]map[string]int + if err := json.Unmarshal(resp.FacetDistribution, &dist); err != nil { + return nil, fmt.Errorf("meilisearch returned an undecodable facet distribution: %w", err) + } + + prefix := meiliFieldFacetPrefix + "." + out := make(map[string][]FieldFacet) + for attr, values := range dist { + name, ok := strings.CutPrefix(attr, prefix) + if !ok { + continue // group_id, tags, the empty field_facets parent, ... + } + // "*" enumerates every field_facets. in the whole index, so a + // field used only by *other* groups shows up here with an empty + // distribution (the group filter zeroes its counts). Skipping empties + // is what scopes the result to fields this group actually uses. + if len(values) == 0 { + continue + } + facets := make([]FieldFacet, 0, len(values)) + for v, c := range values { + facets = append(facets, FieldFacet{Value: v, Count: c}) + } + out[name] = facets + } + return out, nil +} + +// SearchFieldValues returns the distinct values of a single custom field within +// a group, ranked by how many entities carry each value and optionally narrowed +// to those whose value matches query (a prefix/substring). It powers a per-field +// value picker in the search UI, e.g. field "Special Field" -> [{"Clean",12}]. +// +// Like SearchTags it only enumerates facet values; applying the chosen filter +// to the result set remains the repository's job. +func (e *MeilisearchEngine) SearchFieldValues(ctx context.Context, gid uuid.UUID, field, query string) ([]FieldFacet, error) { + raw, err := e.index.FacetSearchWithContext(ctx, &meilisearch.FacetSearchRequest{ + FacetName: meiliFieldFacetPrefix + "." + field, + FacetQuery: strings.TrimSpace(query), + // uuid.String() emits only [0-9a-f-], so inlining it is safe + Filter: fmt.Sprintf("group_id = %q", gid.String()), + }) + if err != nil { + return nil, fmt.Errorf("meilisearch field facet search failed: %w", err) + } + + var resp struct { + FacetHits []FieldFacet `json:"facetHits"` + } + if err := json.Unmarshal(*raw, &resp); err != nil { + return nil, fmt.Errorf("meilisearch returned an undecodable facet response: %w", err) + } + return resp.FacetHits, nil +} + +// ensureIndex creates the index (ignoring "already exists") and applies the +// searchable/filterable attribute settings. +// +// tags is both searchable (so a tag name matches in free-text queries) and +// filterable. Filterability is what makes it a facet: it lets the index be +// queried for the tag values present in a group and narrowed by tag via the +// facet-search endpoint (see SearchTags). The forthcoming "e-commerce" search +// UI builds its tag filter from those facets, so facetSearch is enabled here. +func (e *MeilisearchEngine) ensureIndex(ctx context.Context, uid string) error { + task, err := e.client.CreateIndexWithContext(ctx, &meilisearch.IndexConfig{Uid: uid, PrimaryKey: "id"}) + if err != nil { + return fmt.Errorf("meilisearch create index: %w", err) + } + done, err := e.client.WaitForTaskWithContext(ctx, task.TaskUID, meiliTaskPollInterval) + if err != nil { + return fmt.Errorf("meilisearch create index: %w", err) + } + if done.Status == meilisearch.TaskStatusFailed && done.Error.Code != "index_already_exists" { + return fmt.Errorf("meilisearch create index: %s", done.Error.Message) + } + + task, err = e.index.UpdateSettingsWithContext(ctx, &meilisearch.Settings{ + SearchableAttributes: []string{ + "name", "description", "serial_number", "model_number", + "manufacturer", "notes", "purchase_from", "tags", "fields.value", + }, + // group_id scopes every query; tags and the per-field facets under + // field_facets.* are faceted for the tag/field filter UI. Marking the + // field_facets parent filterable makes every nested field_facets. + // a facet without having to enumerate field names up front. + FilterableAttributes: []string{"group_id", "tags", meiliFieldFacetPrefix}, + // facet search is disabled by default in Meilisearch >= 1.12; enable it + // so SearchTags/SearchFieldValues can resolve/autocomplete facet values. + FacetSearch: true, + }) + if err != nil { + return fmt.Errorf("meilisearch update settings: %w", err) + } + if err := e.waitForTask(ctx, task, "update settings"); err != nil { + return err + } + return nil +} + +// ReindexAll rebuilds the documents for every entity in the database. Existing +// documents are upserted in place, so search keeps working while it runs. +func (e *MeilisearchEngine) ReindexAll(ctx context.Context) error { + return e.reindex(ctx, nil) +} + +// ReindexGroup rebuilds the documents for a single group and prunes documents +// for entities that no longer exist in it. +func (e *MeilisearchEngine) ReindexGroup(ctx context.Context, gid uuid.UUID) error { + return e.reindex(ctx, &gid) +} + +// reindex upserts documents for all entities (gid == nil) or one group's +// entities, then deletes that scope's documents that no longer correspond to +// a database row. +func (e *MeilisearchEngine) reindex(ctx context.Context, gid *uuid.UUID) error { + idQuery := e.db.Entity.Query().Order(ent.Asc(entity.FieldID)) + if gid != nil { + idQuery = idQuery.Where(entity.HasGroupWith(group.ID(*gid))) + } + + // Capture a stable snapshot of the target entity IDs up front. Paging the + // full-entity query by offset is not a stable snapshot: a concurrent insert + // or delete shifts later offsets and can skip a live entity, which would + // then be absent from `indexed` and wrongly pruned as stale. Iterating a + // fixed ID slice removes that race — `indexed` is derived from the snapshot, + // so pruning only targets documents whose entity was absent at snapshot time. + ids, err := idQuery.IDs(ctx) + if err != nil { + return fmt.Errorf("meilisearch reindex: loading entity ids: %w", err) + } + + indexed := make(map[string]struct{}, len(ids)) + for _, id := range ids { + indexed[id.String()] = struct{}{} + } + + for start := 0; start < len(ids); start += meiliReindexBatch { + end := start + meiliReindexBatch + if end > len(ids) { + end = len(ids) + } + + entities, err := e.db.Entity.Query(). + Where(entity.IDIn(ids[start:end]...)). + WithGroup(). + WithTag(). + WithFields(). + All(ctx) + if err != nil { + return fmt.Errorf("meilisearch reindex: loading entities: %w", err) + } + if len(entities) == 0 { + // Every entity in this batch was deleted after the snapshot; their + // documents (if any) are left for pruneStale's live-DB intersection. + continue + } + + docs := make([]meiliDocument, 0, len(entities)) + for _, row := range entities { + docs = append(docs, buildMeiliDocument(row)) + } + + task, err := e.index.AddDocumentsWithContext(ctx, docs, nil) + if err != nil { + return fmt.Errorf("meilisearch reindex: adding documents: %w", err) + } + if err := e.waitForTask(ctx, task, "add documents"); err != nil { + return err + } + } + + return e.pruneStale(ctx, gid, indexed) +} + +// pruneStale removes documents within the reindexed scope whose entity no +// longer exists. Stale documents are harmless for correctness (the predicate +// is intersected with the live database), so pruning is best-effort hygiene. +func (e *MeilisearchEngine) pruneStale(ctx context.Context, gid *uuid.UUID, indexed map[string]struct{}) error { + queryFields := []string{"id"} + dq := &meilisearch.DocumentsQuery{Fields: queryFields, Limit: meiliReindexBatch} + if gid != nil { + dq.Filter = fmt.Sprintf("group_id = %q", gid.String()) + } + + var stale []string + for offset := int64(0); ; offset += meiliReindexBatch { + dq.Offset = offset + var page meilisearch.DocumentsResult + if err := e.index.GetDocumentsWithContext(ctx, dq, &page); err != nil { + return fmt.Errorf("meilisearch reindex: listing documents: %w", err) + } + for _, hit := range page.Results { + var doc struct { + ID string `json:"id"` + } + if err := hit.DecodeInto(&doc); err != nil { + continue + } + if _, ok := indexed[doc.ID]; !ok { + stale = append(stale, doc.ID) + } + } + if int64(len(page.Results)) < meiliReindexBatch { + break + } + } + + if len(stale) == 0 { + return nil + } + + task, err := e.index.DeleteDocumentsWithContext(ctx, stale, nil) + if err != nil { + return fmt.Errorf("meilisearch reindex: deleting stale documents: %w", err) + } + return e.waitForTask(ctx, task, "delete stale documents") +} + +// scheduleReindex queues a group for reindexing, coalescing rapid mutation +// bursts into one pass per group. +func (e *MeilisearchEngine) scheduleReindex(gid uuid.UUID) { + e.mu.Lock() + defer e.mu.Unlock() + e.pending[gid] = struct{}{} + if e.timer == nil { + e.timer = time.AfterFunc(meiliReindexDebounce, e.flushPending) + } +} + +func (e *MeilisearchEngine) flushPending() { + e.mu.Lock() + gids := make([]uuid.UUID, 0, len(e.pending)) + for gid := range e.pending { + gids = append(gids, gid) + } + e.pending = map[uuid.UUID]struct{}{} + e.timer = nil + e.mu.Unlock() + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) + defer cancel() + + for _, gid := range gids { + if err := e.ReindexGroup(ctx, gid); err != nil { + log.Error().Err(err).Str("group_id", gid.String()).Msg("meilisearch: group reindex failed") + } + } +} + +func (e *MeilisearchEngine) waitForTask(ctx context.Context, task *meilisearch.TaskInfo, op string) error { + done, err := e.client.WaitForTaskWithContext(ctx, task.TaskUID, meiliTaskPollInterval) + if err != nil { + return fmt.Errorf("meilisearch %s: %w", op, err) + } + if done.Status == meilisearch.TaskStatusFailed { + return fmt.Errorf("meilisearch %s: %s", op, done.Error.Message) + } + return nil +} + +func buildMeiliDocument(e *ent.Entity) meiliDocument { + doc := meiliDocument{ + ID: e.ID.String(), + Name: e.Name, + Description: e.Description, + SerialNumber: e.SerialNumber, + ModelNumber: e.ModelNumber, + Manufacturer: e.Manufacturer, + Notes: e.Notes, + PurchaseFrom: e.PurchaseFrom, + // empty slices/maps (not nil) so documents serialize as []/{} not null + Tags: []string{}, + Fields: []meiliField{}, + FieldFacets: map[string]string{}, + } + if e.Edges.Group != nil { + doc.GroupID = e.Edges.Group.ID.String() + } + for _, t := range e.Edges.Tag { + doc.Tags = append(doc.Tags, t.Name) + } + for _, f := range e.Edges.Fields { + if f.TextValue != "" { + doc.Fields = append(doc.Fields, meiliField{Name: f.Name, Value: f.TextValue}) + // last value wins if a field name repeats on one entity; a facet + // only needs one value per (entity, field) anyway + doc.FieldFacets[f.Name] = f.TextValue + } + } + return doc +} diff --git a/backend/internal/data/search/meilisearch_test.go b/backend/internal/data/search/meilisearch_test.go new file mode 100644 index 000000000..7987cf1df --- /dev/null +++ b/backend/internal/data/search/meilisearch_test.go @@ -0,0 +1,272 @@ +package search + +import ( + "context" + "os" + "testing" + "time" + + "github.com/google/uuid" + "github.com/meilisearch/meilisearch-go" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "github.com/sysadminsmedia/homebox/backend/internal/core/services/reporting/eventbus" + "github.com/sysadminsmedia/homebox/backend/internal/data/ent" + "github.com/sysadminsmedia/homebox/backend/internal/sys/config" + _ "github.com/sysadminsmedia/homebox/backend/pkgs/cgofreesqlite" +) + +// meiliTestConfig returns the Meilisearch connection settings for integration +// tests, skipping the test when no instance is configured. Run one with: +// +// docker run -d --rm -p 7700:7700 -e MEILI_MASTER_KEY=test-master-key getmeili/meilisearch +// TEST_MEILISEARCH_URL=http://localhost:7700 TEST_MEILISEARCH_KEY=test-master-key go test ./internal/data/search/ +func meiliTestConfig(t *testing.T) config.MeilisearchConf { + t.Helper() + host := os.Getenv("TEST_MEILISEARCH_URL") + if host == "" { + t.Skip("TEST_MEILISEARCH_URL not set; skipping Meilisearch integration test") + } + return config.MeilisearchConf{ + Host: host, + APIKey: os.Getenv("TEST_MEILISEARCH_KEY"), + // unique index per run so concurrent/repeated runs don't interfere + Index: "homebox_test_" + uuid.NewString(), + MaxHits: 1000, + } +} + +func newTestEntClient(t *testing.T) *ent.Client { + t.Helper() + client, err := ent.Open("sqlite3", "file:"+uuid.NewString()+"?mode=memory&cache=shared&_fk=1&_time_format=sqlite") + require.NoError(t, err) + t.Cleanup(func() { _ = client.Close() }) + require.NoError(t, client.Schema.Create(context.Background())) + return client +} + +func TestMeilisearchEngine_Integration(t *testing.T) { + cfg := meiliTestConfig(t) + db := newTestEntClient(t) + ctx := context.Background() + + g1 := db.Group.Create().SetName("group-one").SaveX(ctx) + g2 := db.Group.Create().SetName("group-two").SaveX(ctx) + et1 := db.EntityType.Create().SetName("Item").SetGroup(g1).SaveX(ctx) + et2 := db.EntityType.Create().SetName("Item").SetGroup(g2).SaveX(ctx) + + electronicsTag := db.Tag.Create().SetName("Электроника").SetGroup(g1).SaveX(ctx) + imeiField := db.EntityField.Create().SetName("IMEI").SetType("text").SetTextValue("351234567891011").SaveX(ctx) + + newItem := func(g *ent.Group, et *ent.EntityType, name string) *ent.EntityCreate { + return db.Entity.Create().SetName(name).SetGroup(g).SetEntityType(et) + } + + ukrainian := newItem(g1, et1, "Тестовий Запис").SaveX(ctx) + greek := newItem(g1, et1, "Υπολογιστής").SaveX(ctx) + tagged := newItem(g1, et1, "Tagged item").AddTag(electronicsTag).SaveX(ctx) + phone := newItem(g1, et1, "Smartphone").AddFields(imeiField).SaveX(ctx) + toolbox := newItem(g1, et1, "Red Tool Box").SaveX(ctx) + foreign := newItem(g2, et2, "Тестовий Запис").SaveX(ctx) + + engine, err := NewMeilisearchEngine(cfg, db, nil) + require.NoError(t, err) + t.Cleanup(func() { _, _ = engine.client.DeleteIndex(cfg.Index) }) + + require.NoError(t, engine.ReindexAll(ctx)) + + // search applies only the engine predicate (no extra group filter) so the + // assertions also verify Meilisearch-side group scoping + search := func(q string) map[uuid.UUID]bool { + t.Helper() + pred, err := engine.Predicate(ctx, g1.ID, q) + require.NoError(t, err) + require.NotNil(t, pred) + rows, err := db.Entity.Query().Where(pred).All(ctx) + require.NoError(t, err) + ids := make(map[uuid.UUID]bool, len(rows)) + for _, r := range rows { + ids[r.ID] = true + } + return ids + } + + t.Run("unicode case-insensitive", func(t *testing.T) { + assert.True(t, search("тест")[ukrainian.ID], "lowercase Cyrillic query should match uppercase name") + assert.True(t, search("ТЕСТОВИЙ")[ukrainian.ID]) + assert.True(t, search("υπολογιστής")[greek.ID]) + assert.True(t, search("ΥΠΟΛΟΓΙΣΤΗΣ")[greek.ID]) + }) + + t.Run("group scoping", func(t *testing.T) { + ids := search("тестовий") + assert.True(t, ids[ukrainian.ID]) + assert.False(t, ids[foreign.ID], "results must be scoped to the queried group") + }) + + t.Run("tag names searchable", func(t *testing.T) { + ids := search("электроника") + assert.True(t, ids[tagged.ID]) + assert.False(t, ids[ukrainian.ID]) + }) + + t.Run("custom field values searchable", func(t *testing.T) { + assert.True(t, search("351234567891011")[phone.ID]) + // field names are stored for inspectability but intentionally not + // searchable, matching the database engine + assert.False(t, search("IMEI")[phone.ID]) + }) + + t.Run("multi-word requires all terms", func(t *testing.T) { + assert.True(t, search("box red")[toolbox.ID], "word order should not matter") + assert.False(t, search("red hammer")[toolbox.ID], "all terms must match") + }) + + t.Run("typo tolerance", func(t *testing.T) { + assert.True(t, search("smartphnoe")[phone.ID], "single-word typo should still match") + }) + + t.Run("incremental reindex adds new entities", func(t *testing.T) { + bicycle := newItem(g1, et1, "Blue Bicycle").SaveX(ctx) + require.NoError(t, engine.ReindexGroup(ctx, g1.ID)) + assert.True(t, search("bicycle")[bicycle.ID]) + }) + + t.Run("reindex prunes deleted entities", func(t *testing.T) { + require.True(t, search("tool box")[toolbox.ID]) + db.Entity.DeleteOneID(toolbox.ID).ExecX(ctx) + require.NoError(t, engine.ReindexGroup(ctx, g1.ID)) + + // assert against the raw index: the document itself must be gone + // (going through the predicate + DB would hide staleness, since the + // deleted row can never be selected anyway) + resp, err := engine.index.SearchWithContext(ctx, "tool box", &meilisearch.SearchRequest{Limit: 100}) + require.NoError(t, err) + for _, hit := range resp.Hits { + var doc struct { + ID string `json:"id"` + } + require.NoError(t, hit.DecodeInto(&doc)) + assert.NotEqual(t, toolbox.ID.String(), doc.ID, "deleted entity's document should be pruned from the index") + } + }) + + t.Run("empty query yields no predicate", func(t *testing.T) { + pred, err := engine.Predicate(ctx, g1.ID, " ") + require.NoError(t, err) + assert.Nil(t, pred) + }) + + t.Run("tag facet search", func(t *testing.T) { + // the only tagged entity in g1 carries "Электроника" + facets, err := engine.SearchTags(ctx, g1.ID, "") + require.NoError(t, err) + byName := make(map[string]int, len(facets)) + for _, f := range facets { + byName[f.Name] = f.Count + } + assert.Equal(t, 1, byName["Электроника"], "facet should report the tag and its entity count") + + // facetQuery narrows by tag name (case-insensitive substring) + filtered, err := engine.SearchTags(ctx, g1.ID, "электро") + require.NoError(t, err) + require.Len(t, filtered, 1) + assert.Equal(t, "Электроника", filtered[0].Name) + + // the tag belongs to g1, so g2's facets must not include it + other, err := engine.SearchTags(ctx, g2.ID, "") + require.NoError(t, err) + for _, f := range other { + assert.NotEqual(t, "Электроника", f.Name, "facets must be scoped to the group") + } + }) + + t.Run("custom field facets", func(t *testing.T) { + // three entities sharing one field name ("Condition") with two values, + // plus a field name with a space to exercise nested facet attributes + clean1 := db.EntityField.Create().SetName("Condition").SetType("text").SetTextValue("Clean").SaveX(ctx) + clean2 := db.EntityField.Create().SetName("Condition").SetType("text").SetTextValue("Clean").SaveX(ctx) + dirty := db.EntityField.Create().SetName("Condition").SetType("text").SetTextValue("Dirty").SaveX(ctx) + special := db.EntityField.Create().SetName("Special Field").SetType("text").SetTextValue("Clean").SaveX(ctx) + newItem(g1, et1, "Sofa").AddFields(clean1).SaveX(ctx) + newItem(g1, et1, "Rug").AddFields(clean2, special).SaveX(ctx) + newItem(g1, et1, "Doormat").AddFields(dirty).SaveX(ctx) + require.NoError(t, engine.ReindexGroup(ctx, g1.ID)) + + // discovery: each field is its own facet with per-value counts + facets, err := engine.FieldFacets(ctx, g1.ID) + require.NoError(t, err) + require.Contains(t, facets, "Condition") + require.Contains(t, facets, "Special Field", "field names with spaces are faceted") + require.Contains(t, facets, "IMEI", "fields are independent of one another") + + counts := map[string]int{} + for _, f := range facets["Condition"] { + counts[f.Value] = f.Count + } + assert.Equal(t, 2, counts["Clean"]) + assert.Equal(t, 1, counts["Dirty"]) + + // per-field value autocomplete, scoped and narrowed by query + vals, err := engine.SearchFieldValues(ctx, g1.ID, "Condition", "cle") + require.NoError(t, err) + require.Len(t, vals, 1) + assert.Equal(t, "Clean", vals[0].Value) + assert.Equal(t, 2, vals[0].Count) + + // g2 has none of these fields + g2facets, err := engine.FieldFacets(ctx, g2.ID) + require.NoError(t, err) + assert.NotContains(t, g2facets, "Condition", "facets must be scoped to the group") + }) +} + +func TestMeilisearchEngine_EventDrivenReindex(t *testing.T) { + cfg := meiliTestConfig(t) + db := newTestEntClient(t) + ctx := context.Background() + + oldDebounce := meiliReindexDebounce + meiliReindexDebounce = 100 * time.Millisecond + t.Cleanup(func() { meiliReindexDebounce = oldDebounce }) + + bus := eventbus.New() + busCtx, cancel := context.WithCancel(ctx) + t.Cleanup(cancel) + go func() { _ = bus.Run(busCtx) }() + + g := db.Group.Create().SetName("group-bus").SaveX(ctx) + et := db.EntityType.Create().SetName("Item").SetGroup(g).SaveX(ctx) + + engine, err := NewMeilisearchEngine(cfg, db, bus) + require.NoError(t, err) + t.Cleanup(func() { _, _ = engine.client.DeleteIndex(cfg.Index) }) + + // created after engine startup, so only the mutation event can index it + lamp := db.Entity.Create().SetName("Vintage Lamp").SetGroup(g).SetEntityType(et).SaveX(ctx) + bus.Publish(eventbus.EventEntityMutation, eventbus.GroupMutationEvent{GID: g.ID}) + + assert.Eventually(t, func() bool { + pred, err := engine.Predicate(ctx, g.ID, "vintage lamp") + if err != nil || pred == nil { + return false + } + ids, err := db.Entity.Query().Where(pred).IDs(ctx) + return err == nil && len(ids) == 1 && ids[0] == lamp.ID + }, 15*time.Second, 200*time.Millisecond, "mutation event should trigger a debounced group reindex") +} + +func TestNewEngine_UnknownDriver(t *testing.T) { + _, err := NewEngine(config.SearchConf{Driver: "sphinx"}, nil, nil) + require.Error(t, err) + assert.Contains(t, err.Error(), "unsupported search driver") +} + +func TestNewEngine_MeilisearchUnreachable(t *testing.T) { + _, err := NewEngine(config.SearchConf{ + Driver: DriverMeilisearch, + Meilisearch: config.MeilisearchConf{Host: "http://127.0.0.1:1", MaxHits: 10}, + }, nil, nil) + require.Error(t, err) + assert.Contains(t, err.Error(), "not reachable") +} diff --git a/backend/internal/data/search/search.go b/backend/internal/data/search/search.go new file mode 100644 index 000000000..7cb0404ab --- /dev/null +++ b/backend/internal/data/search/search.go @@ -0,0 +1,118 @@ +// Package search provides the pluggable free-text search abstraction used by +// the entity repository. +// +// A search Engine translates a user-supplied query string into an ent +// predicate that selects the matching entities. The default engine +// (DriverDatabase) performs tokenized, case- and accent-insensitive matching +// directly in the database and works on both SQLite and PostgreSQL with no +// extra infrastructure. The Meilisearch engine (DriverMeilisearch) delegates +// matching to an external Meilisearch instance for typo-tolerant, +// relevance-ranked search. +// +// To add a new engine (e.g. Elasticsearch): +// +// 1. Implement the Engine interface. An external engine typically queries +// its own index scoped to the group ID and returns +// entity.IDIn(matchedIDs...) as the predicate, which preserves the +// repository's filtering, pagination, and eager-loading behavior. See +// MeilisearchEngine for the reference implementation. +// 2. Keep the engine's index up to date by subscribing to entity mutations +// (the repositories publish events on the event bus). +// 3. Register a new driver constant and construction case in NewEngine, and +// document the driver value for HBOX_SEARCH_DRIVER. +package search + +import ( + "context" + "fmt" + "strings" + + "github.com/google/uuid" + "github.com/sysadminsmedia/homebox/backend/internal/core/services/reporting/eventbus" + "github.com/sysadminsmedia/homebox/backend/internal/data/ent" + "github.com/sysadminsmedia/homebox/backend/internal/data/ent/predicate" + "github.com/sysadminsmedia/homebox/backend/internal/sys/config" +) + +// Supported search drivers. +const ( + DriverDatabase = "database" + DriverMeilisearch = "meilisearch" +) + +// Engine translates free-text queries into entity predicates. +type Engine interface { + // Predicate returns an ent predicate selecting the entities within the + // given group that match the free-text query. A nil predicate (with nil + // error) means the query has no usable terms and no search filter should + // be applied. + // + // The caller is responsible for all non-search filtering (group, type, + // tags, pagination, ...); implementations must only express the text + // match itself. + Predicate(ctx context.Context, gid uuid.UUID, query string) (predicate.Entity, error) +} + +// TagFacet is a tag value present on a group's entities together with how many +// of them carry it, e.g. {"Electronics", 12}. The json tags decode a +// Meilisearch facet hit; the database engine populates the fields directly. +type TagFacet struct { + Name string `json:"value"` + Count int `json:"count"` +} + +// FieldFacet is one value of a custom field together with the number of a +// group's entities that carry that value, e.g. {"Clean", 12}. +type FieldFacet struct { + Value string `json:"value"` + Count int `json:"count"` +} + +// Faceter is an optional capability for engines that can enumerate the values +// available for filtering — tag names and per-custom-field values, each with +// the number of matching entities. It backs the search UI's filter sidebar +// (filter by tag, by "Special Field = Clean", ...). Both the database and +// Meilisearch engines implement it; callers type-assert for it: +// +// if f, ok := engine.(search.Faceter); ok { +// tags, err := f.SearchTags(ctx, gid, "") +// } +// +// Like Predicate, these methods only enumerate facet values; applying a chosen +// filter to the result set remains the repository's job. +type Faceter interface { + // SearchTags returns the tag names used within a group ranked by entity + // count, optionally narrowed to those matching query (a case-insensitive + // substring of the tag name). An empty query returns the most-used tags. + SearchTags(ctx context.Context, gid uuid.UUID, query string) ([]TagFacet, error) + + // FieldFacets returns every custom field present on a group's entities + // mapped to its value distribution (value -> entity count). It is the + // discovery call: which fields can be filtered on and what values each has. + FieldFacets(ctx context.Context, gid uuid.UUID) (map[string][]FieldFacet, error) + + // SearchFieldValues returns the distinct values of a single custom field + // within a group ranked by entity count, optionally narrowed to those whose + // value matches query (a case-insensitive substring). + SearchFieldValues(ctx context.Context, gid uuid.UUID, field, query string) ([]FieldFacet, error) +} + +// Both engines provide the faceting capability. +var ( + _ Faceter = (*DatabaseEngine)(nil) + _ Faceter = (*MeilisearchEngine)(nil) +) + +// NewEngine constructs the search engine selected by cfg.Driver. An empty +// driver selects the database engine. The event bus may be nil, in which case +// external engines fall back to startup-only index builds. +func NewEngine(cfg config.SearchConf, db *ent.Client, bus *eventbus.EventBus) (Engine, error) { + switch strings.ToLower(strings.TrimSpace(cfg.Driver)) { + case "", DriverDatabase: + return NewDatabaseEngine(db), nil + case DriverMeilisearch: + return NewMeilisearchEngine(cfg.Meilisearch, db, bus) + default: + return nil, fmt.Errorf("unsupported search driver: %q (supported: %s, %s)", cfg.Driver, DriverDatabase, DriverMeilisearch) + } +} diff --git a/backend/internal/data/search/tokenize.go b/backend/internal/data/search/tokenize.go new file mode 100644 index 000000000..c7f46b270 --- /dev/null +++ b/backend/internal/data/search/tokenize.go @@ -0,0 +1,58 @@ +package search + +import ( + "strings" + "unicode" +) + +// maxTokens bounds the number of tokens a single query can expand into so a +// pathological query cannot generate an unbounded SQL statement. +const maxTokens = 8 + +// Tokenize splits a free-text query into match tokens. +// +// Tokens are separated by whitespace. A double-quoted span is kept together +// as a single token (without the quotes) so users can search for exact +// phrases, e.g. `red "tool box"` yields ["red", "tool box"]. Duplicate tokens +// are dropped, and at most maxTokens tokens are returned. +func Tokenize(query string) []string { + var ( + tokens []string + current strings.Builder + inQuotes bool + ) + + seen := make(map[string]struct{}) + flush := func() { + tok := current.String() + current.Reset() + if tok == "" { + return + } + if _, dup := seen[tok]; dup { + return + } + seen[tok] = struct{}{} + tokens = append(tokens, tok) + } + + for _, r := range query { + switch { + case r == '"': + if inQuotes { + flush() + } + inQuotes = !inQuotes + case !inQuotes && unicode.IsSpace(r): + flush() + default: + current.WriteRune(r) + } + } + flush() + + if len(tokens) > maxTokens { + tokens = tokens[:maxTokens] + } + return tokens +} diff --git a/backend/internal/data/search/tokenize_test.go b/backend/internal/data/search/tokenize_test.go new file mode 100644 index 000000000..4366385f0 --- /dev/null +++ b/backend/internal/data/search/tokenize_test.go @@ -0,0 +1,78 @@ +package search + +import ( + "strings" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestTokenize(t *testing.T) { + testCases := []struct { + name string + input string + expected []string + }{ + { + name: "single word", + input: "hammer", + expected: []string{"hammer"}, + }, + { + name: "multiple words", + input: "red tool box", + expected: []string{"red", "tool", "box"}, + }, + { + name: "extra whitespace", + input: " red \t tool\n", + expected: []string{"red", "tool"}, + }, + { + name: "quoted phrase", + input: `red "tool box"`, + expected: []string{"red", "tool box"}, + }, + { + name: "unterminated quote", + input: `red "tool box`, + expected: []string{"red", "tool box"}, + }, + { + name: "empty quotes ignored", + input: `red ""`, + expected: []string{"red"}, + }, + { + name: "duplicates removed", + input: "red red red", + expected: []string{"red"}, + }, + { + name: "empty input", + input: "", + expected: nil, + }, + { + name: "whitespace only", + input: " ", + expected: nil, + }, + { + name: "unicode words", + input: "Тестовий Запис", + expected: []string{"Тестовий", "Запис"}, + }, + { + name: "token count capped", + input: "t1 t2 t3 t4 t5 t6 t7 t8 t9 t10", + expected: strings.Fields("t1 t2 t3 t4 t5 t6 t7 t8"), + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + assert.Equal(t, tc.expected, Tokenize(tc.input)) + }) + } +} diff --git a/backend/internal/sys/config/conf.go b/backend/internal/sys/config/conf.go index e947ccb4a..db59f9995 100644 --- a/backend/internal/sys/config/conf.go +++ b/backend/internal/sys/config/conf.go @@ -7,6 +7,7 @@ import ( "fmt" "net/url" "os" + "strings" "time" "github.com/ardanlabs/conf/v3" @@ -62,6 +63,65 @@ type Config struct { Otel OTelConfig `yaml:"otel"` Auth AuthConfig `yaml:"auth"` Notifier NotifierConf `yaml:"notifier"` + Search SearchConf `yaml:"search"` +} + +// SearchConf selects and configures the free-text search engine. The default +// "database" driver searches directly in SQLite/PostgreSQL and needs no extra +// services. The "meilisearch" driver delegates matching to an external +// Meilisearch instance for typo-tolerant, relevance-ranked search. +type SearchConf struct { + Driver string `yaml:"driver" conf:"default:database"` + Meilisearch MeilisearchConf `yaml:"meilisearch"` +} + +// MeilisearchConf configures the connection to a Meilisearch instance when +// SearchConf.Driver is "meilisearch". +type MeilisearchConf struct { + Host string `yaml:"host" conf:"default:http://localhost:7700"` + APIKey string `yaml:"api_key"` + // Index is the Meilisearch index uid entities are stored in. + Index string `yaml:"index" conf:"default:homebox_entities"` + // MaxHits caps how many matching entity IDs a single search retrieves + // from Meilisearch before the database applies its own filters and + // pagination. + MaxHits int64 `yaml:"max_hits" conf:"default:1000"` +} + +func (c MeilisearchConf) MarshalJSON() ([]byte, error) { + type alias MeilisearchConf + a := alias(c) + if a.APIKey != "" { + a.APIKey = redactedValue + } + return json.Marshal(a) +} + +// Validate enforces secure transport for the Meilisearch connection. The API +// key is sent on every request, so a non-local endpoint reached over plaintext +// http would leak it on the wire. https is always allowed; http is permitted +// only for loopback hosts. +func (c MeilisearchConf) Validate() error { + if c.Host == "" { + return errors.New("search.meilisearch.host must not be empty") + } + u, err := url.Parse(c.Host) + if err != nil { + return fmt.Errorf("search.meilisearch.host is not a valid URL: %w", err) + } + switch u.Scheme { + case "https": + return nil + case "http": + switch strings.ToLower(u.Hostname()) { + case "localhost", "127.0.0.1", "::1": + return nil + default: + return fmt.Errorf("search.meilisearch.host uses insecure http for non-local host %q: use https", u.Host) + } + default: + return fmt.Errorf("search.meilisearch.host has unsupported scheme %q: must be http or https", u.Scheme) + } } type Options struct { @@ -204,6 +264,14 @@ func New(buildstr string, description string) (*Config, error) { return &cfg, fmt.Errorf("parsing config: %w", err) } + // Only enforce Meilisearch transport rules when that driver is selected; + // the unused default host is otherwise irrelevant. + if cfg.Search.Driver == "meilisearch" { + if err := cfg.Search.Meilisearch.Validate(); err != nil { + return &cfg, fmt.Errorf("invalid meilisearch config: %w", err) + } + } + return &cfg, nil } diff --git a/backend/internal/sys/config/conf_search_test.go b/backend/internal/sys/config/conf_search_test.go new file mode 100644 index 000000000..c391a40ed --- /dev/null +++ b/backend/internal/sys/config/conf_search_test.go @@ -0,0 +1,59 @@ +package config + +import ( + "encoding/json" + "testing" + + "github.com/ardanlabs/conf/v3" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func Test_SearchConf_Defaults(t *testing.T) { + var cfg Config + _, err := conf.Parse("HBOXTEST", &cfg) + require.NoError(t, err) + + assert.Equal(t, "database", cfg.Search.Driver) + assert.Equal(t, "http://localhost:7700", cfg.Search.Meilisearch.Host) + assert.Equal(t, "homebox_entities", cfg.Search.Meilisearch.Index) + assert.Equal(t, int64(1000), cfg.Search.Meilisearch.MaxHits) +} + +func Test_MeilisearchConf_Validate(t *testing.T) { + tests := []struct { + name string + host string + wantErr bool + }{ + {"https remote", "https://search.example.com", false}, + {"https remote with port", "https://search.example.com:7700", false}, + {"http localhost", "http://localhost:7700", false}, + {"http 127.0.0.1", "http://127.0.0.1:7700", false}, + {"http ipv6 loopback", "http://[::1]:7700", false}, + {"http remote rejected", "http://search.example.com:7700", true}, + {"http remote ip rejected", "http://10.0.0.5:7700", true}, + {"empty host", "", true}, + {"unsupported scheme", "ftp://localhost", true}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + err := MeilisearchConf{Host: tt.host}.Validate() + if tt.wantErr { + require.Error(t, err) + } else { + require.NoError(t, err) + } + }) + } +} + +func Test_MeilisearchConf_RedactsAPIKey(t *testing.T) { + c := MeilisearchConf{APIKey: "super-secret-meili-key"} + + out, err := json.Marshal(c) + require.NoError(t, err) + + assert.NotContains(t, string(out), "super-secret-meili-key") + assert.Contains(t, string(out), sentinel) +} diff --git a/backend/pkgs/cgofreesqlite/sqlite.go b/backend/pkgs/cgofreesqlite/sqlite.go index c9faf7a80..d593ab668 100644 --- a/backend/pkgs/cgofreesqlite/sqlite.go +++ b/backend/pkgs/cgofreesqlite/sqlite.go @@ -10,7 +10,9 @@ package cgofreesqlite import ( "database/sql" "database/sql/driver" + "fmt" + "github.com/sysadminsmedia/homebox/backend/pkgs/textutils" "modernc.org/sqlite" ) @@ -35,6 +37,37 @@ func (d CGOFreeSqliteDriver) Open(name string) (conn driver.Conn, err error) { return conn, err } +// modernDriver returns modernc's package-level driver singleton (the instance +// it registers as "sqlite"). Functions registered through the sqlite package +// (like hb_fold below) are stored on that singleton only, so wrapping a fresh +// &sqlite.Driver{} would silently lose them. +func modernDriver() *sqlite.Driver { + db, err := sql.Open("sqlite", "") + if err != nil { + panic(err) + } + defer func() { _ = db.Close() }() + return db.Driver().(*sqlite.Driver) +} + func init() { //nolint:gochecknoinits - sql.Register("sqlite3", CGOFreeSqliteDriver{Driver: &sqlite.Driver{}}) + sql.Register("sqlite3", CGOFreeSqliteDriver{Driver: modernDriver()}) + + // hb_fold(text) folds its argument for case- and accent-insensitive + // comparison (full Unicode case folding + diacritic removal). SQLite's + // built-in lower()/LIKE only handle ASCII, which breaks search for + // Cyrillic, Greek, and other non-ASCII scripts. The search engine compares + // hb_fold(column) against patterns folded the same way in Go. + sqlite.MustRegisterDeterministicScalarFunction("hb_fold", 1, func(_ *sqlite.FunctionContext, args []driver.Value) (driver.Value, error) { + switch v := args[0].(type) { + case nil: + return nil, nil + case string: + return textutils.Fold(v), nil + case []byte: + return textutils.Fold(string(v)), nil + default: + return nil, fmt.Errorf("hb_fold: unsupported argument type %T", v) + } + }) } diff --git a/backend/pkgs/textutils/normalize.go b/backend/pkgs/textutils/normalize.go index f484f4d69..a48a18f04 100644 --- a/backend/pkgs/textutils/normalize.go +++ b/backend/pkgs/textutils/normalize.go @@ -1,9 +1,11 @@ +// Package textutils provides text normalization helpers used by the search +// system to implement case- and accent-insensitive matching across scripts. package textutils import ( - "strings" "unicode" + "golang.org/x/text/cases" "golang.org/x/text/runes" "golang.org/x/text/transform" "golang.org/x/text/unicode/norm" @@ -32,9 +34,15 @@ func RemoveAccents(text string) string { return result } -// NormalizeSearchQuery normalizes a search query for accent-insensitive matching. -// This function removes accents and converts to lowercase for consistent search behavior. -func NormalizeSearchQuery(query string) string { - normalized := RemoveAccents(query) - return strings.ToLower(normalized) +// Fold returns a canonical caseless, accent-less representation of text for +// search comparison. Two strings match case- and accent-insensitively iff +// their folded forms are equal (or one contains the other). +// +// Unicode case folding is used instead of lowercasing so that scripts with +// non-trivial case rules compare correctly (e.g. Greek final sigma "ς" and +// "σ" both fold to "σ", "Σ" included; Cyrillic "Тест" folds to "тест"). +// Folding can introduce new combining marks (e.g. "İ" folds to "i" + U+0307), +// so accents are stripped after folding as well as before. +func Fold(text string) string { + return RemoveAccents(cases.Fold().String(RemoveAccents(text))) } diff --git a/backend/pkgs/textutils/normalize_test.go b/backend/pkgs/textutils/normalize_test.go index 8e6bd2423..7627dcff8 100644 --- a/backend/pkgs/textutils/normalize_test.go +++ b/backend/pkgs/textutils/normalize_test.go @@ -113,7 +113,7 @@ func TestRemoveAccents(t *testing.T) { } } -func TestNormalizeSearchQuery(t *testing.T) { +func TestFold(t *testing.T) { testCases := []struct { name string input string @@ -139,13 +139,40 @@ func TestNormalizeSearchQuery(t *testing.T) { input: "Hello World", expected: "hello world", }, + { + // й decomposes to и + combining breve, which accent stripping + // removes — like ñ→n, both sides of a match fold the same way. + name: "Ukrainian Cyrillic uppercase", + input: "Тестовий Запис", + expected: "тестовии запис", + }, + { + name: "Greek uppercase with final sigma", + input: "Υπολογιστής", + expected: "υπολογιστησ", + }, + { + name: "Greek lowercase final and medial sigma fold identically", + input: "υπολογιστης σ ς", + expected: "υπολογιστησ σ σ", + }, + { + name: "German sharp s folds to ss", + input: "Straße", + expected: "strasse", + }, + { + name: "Turkish dotted capital I", + input: "İstanbul", + expected: "istanbul", + }, } for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { - result := NormalizeSearchQuery(tc.input) + result := Fold(tc.input) if result != tc.expected { - t.Errorf("NormalizeSearchQuery(%q) = %q, expected %q", tc.input, result, tc.expected) + t.Errorf("Fold(%q) = %q, expected %q", tc.input, result, tc.expected) } }) } diff --git a/docs/public/api/openapi-3.0.json b/docs/public/api/openapi-3.0.json index f24740842..33e9def05 100644 --- a/docs/public/api/openapi-3.0.json +++ b/docs/public/api/openapi-3.0.json @@ -245,7 +245,7 @@ "summary": "Query All Entities", "parameters": [ { - "description": "search string", + "description": "search string; matches names, descriptions, serial/model numbers, manufacturers, notes, purchase sources, tag names, and custom field values. Use # to look up by asset ID and double quotes for exact phrases", "name": "q", "in": "query", "schema": { @@ -280,6 +280,14 @@ } } }, + { + "description": "require all selected tags to match (AND) instead of any (OR)", + "name": "matchAllTags", + "in": "query", + "schema": { + "type": "boolean" + } + }, { "description": "parent Ids", "name": "parentIds", diff --git a/docs/public/api/openapi-3.0.yaml b/docs/public/api/openapi-3.0.yaml index c074bb7be..002d3368e 100644 --- a/docs/public/api/openapi-3.0.yaml +++ b/docs/public/api/openapi-3.0.yaml @@ -144,7 +144,10 @@ paths: - Entities summary: Query All Entities parameters: - - description: search string + - description: "search string; matches names, descriptions, serial/model numbers, + manufacturers, notes, purchase sources, tag names, and custom field + values. Use # to look up by asset ID and double quotes for + exact phrases" name: q in: query schema: @@ -167,6 +170,11 @@ paths: type: array items: type: string + - description: require all selected tags to match (AND) instead of any (OR) + name: matchAllTags + in: query + schema: + type: boolean - description: parent Ids name: parentIds in: query diff --git a/docs/public/api/swagger-2.0.json b/docs/public/api/swagger-2.0.json index 666f56c41..fe9108833 100644 --- a/docs/public/api/swagger-2.0.json +++ b/docs/public/api/swagger-2.0.json @@ -244,7 +244,7 @@ "parameters": [ { "type": "string", - "description": "search string", + "description": "search string; matches names, descriptions, serial/model numbers, manufacturers, notes, purchase sources, tag names, and custom field values. Use #\u003cassetId\u003e to look up by asset ID and double quotes for exact phrases", "name": "q", "in": "query" }, @@ -270,6 +270,12 @@ "name": "tags", "in": "query" }, + { + "type": "boolean", + "description": "require all selected tags to match (AND) instead of any (OR)", + "name": "matchAllTags", + "in": "query" + }, { "type": "array", "items": { diff --git a/docs/public/api/swagger-2.0.yaml b/docs/public/api/swagger-2.0.yaml index 2f8280708..99b9dd00d 100644 --- a/docs/public/api/swagger-2.0.yaml +++ b/docs/public/api/swagger-2.0.yaml @@ -2479,7 +2479,9 @@ paths: /v1/entities: get: parameters: - - description: search string + - description: 'search string; matches names, descriptions, serial/model numbers, + manufacturers, notes, purchase sources, tag names, and custom field values. + Use # to look up by asset ID and double quotes for exact phrases' in: query name: q type: string @@ -2498,6 +2500,10 @@ paths: type: string name: tags type: array + - description: require all selected tags to match (AND) instead of any (OR) + in: query + name: matchAllTags + type: boolean - collectionFormat: multi description: parent Ids in: query diff --git a/docs/src/content/docs/en/quick-start/configure/database.mdx b/docs/src/content/docs/en/quick-start/configure/database.mdx index 36c02894d..823d92dcb 100644 --- a/docs/src/content/docs/en/quick-start/configure/database.mdx +++ b/docs/src/content/docs/en/quick-start/configure/database.mdx @@ -6,9 +6,11 @@ By default, Homebox will use an SQLite database. This allows for quick setup and However, you can configure Homebox to use a PostgreSQL database instead. We highly recommend this for larger installations, as it will allow you to scale Homebox horizontally. -> [!WARNING] -> PostgreSQL is **HIGHLY** recommended if you're using a complex character set language such as Russian, Chinese, etc. -> (basically any language that requires non-ASCII characters) as SQLite has very poor search support for these languages. +> [!NOTE] +> Search is fully Unicode-aware on both databases: Homebox implements its own +> case- and accent-insensitive matching on SQLite, so non-ASCII languages +> (Cyrillic, Greek, etc.) work out of the box. See the [Search](./search) page +> for details and optional external search engines. ## SQLite This is the default; generally speaking, there is no need to change anything here. We've already configured it with our @@ -53,3 +55,18 @@ Optionally, you can also add the following environment variable to the Homebox s - `HBOX_DATABASE_SSL_ROOT_CERT=path/to/root.crt` (path to the root certificate file) - `HBOX_DATABASE_SSL_CERT=path/to/server.crt` (path to the server certificate file) - `HBOX_DATABASE_SSL_KEY=path/to/server.key` (path to the server key file) + +## Accent-insensitive search on PostgreSQL + +Search is case-insensitive on both databases. To make it accent-insensitive on +PostgreSQL as well (e.g. so `cafe` matches `café`), Homebox uses the standard +[`unaccent`](https://www.postgresql.org/docs/current/unaccent.html) extension. +Homebox tries to enable it automatically on startup; if the database user +lacks the privilege to create extensions, enable it once as a superuser: + +```sql +CREATE EXTENSION IF NOT EXISTS unaccent; +``` + +Without the extension, search still works but treats accented characters as +distinct. SQLite needs no setup — accent-insensitive search is built in. diff --git a/docs/src/content/docs/en/quick-start/configure/index.mdx b/docs/src/content/docs/en/quick-start/configure/index.mdx index 27181cf72..144037a93 100644 --- a/docs/src/content/docs/en/quick-start/configure/index.mdx +++ b/docs/src/content/docs/en/quick-start/configure/index.mdx @@ -46,6 +46,11 @@ import {Tabs, TabItem} from "@astrojs/starlight/components"; | HBOX_OPTIONS_ALLOW_LOCAL_LOGIN | true | allow users to login with username/password when OIDC is enabled | | HBOX_OPTIONS_TRUST_PROXY | false | trust proxy headers for determining request scheme (X-Forwarded-Proto) | | HBOX_OPTIONS_HOSTNAME | | override hostname used for OIDC redirect URLs and other absolute URLs | +| HBOX_SEARCH_DRIVER | database | selects the free-text search engine: `database` (searches directly in SQLite/PostgreSQL, no extra services needed) or `meilisearch`. See the [Search](./search) page. | +| HBOX_SEARCH_MEILISEARCH_HOST | http://localhost:7700 | URL of the Meilisearch instance (when `HBOX_SEARCH_DRIVER=meilisearch`) | +| HBOX_SEARCH_MEILISEARCH_API_KEY | | Meilisearch API key | +| HBOX_SEARCH_MEILISEARCH_INDEX | homebox_entities | Meilisearch index uid Homebox stores entity documents in | +| HBOX_SEARCH_MEILISEARCH_MAX_HITS | 1000 | maximum matches retrieved per search before database filters and pagination are applied | | HBOX_AUTH_API_KEY_PEPPER | | **Required.** Server-side secret HMAC-keyed into stored API key hashes; the binary refuses to start if this is shorter than 32 bytes. Generate with `openssl rand -base64 48`. Must stay stable across restarts — rotating it invalidates every issued API key. | | HBOX_AUTH_RATE_LIMIT_ENABLED | true | enable rate limiting for authentication attempts | | HBOX_AUTH_RATE_LIMIT_MAX_ATTEMPTS | 5 | maximum number of failed authentication attempts before rate limiting | diff --git a/docs/src/content/docs/en/quick-start/configure/search.mdx b/docs/src/content/docs/en/quick-start/configure/search.mdx new file mode 100644 index 000000000..8690b120c --- /dev/null +++ b/docs/src/content/docs/en/quick-start/configure/search.mdx @@ -0,0 +1,73 @@ +--- +title: Search +--- + +Homebox ships with a built-in search engine and optionally integrates with +[Meilisearch](https://www.meilisearch.com/) for typo-tolerant, +relevance-ranked search. The engine is selected with `HBOX_SEARCH_DRIVER`. + +## Database search (default) + +`HBOX_SEARCH_DRIVER=database` needs no extra services and works on both SQLite +and PostgreSQL. Searches match item names, descriptions, serial numbers, model +numbers, manufacturers, notes, purchase sources, tag names, and custom field +values: + +- Every word in the query must match somewhere on the item (in any field). +- Wrap words in double quotes to match an exact phrase: `"tool box"`. +- Matching is case-insensitive across the full Unicode range (Cyrillic, + Greek, etc.) and accent-insensitive (`cafe` matches `café`). +- Prefix the query with `#` to look an item up by asset ID (`#000-001`). + +Accent-insensitivity on PostgreSQL uses the `unaccent` extension; see the +[Database](./database#accent-insensitive-search-on-postgresql) page. + +## Meilisearch + +`HBOX_SEARCH_DRIVER=meilisearch` delegates text matching to a Meilisearch +instance. On top of everything the database engine does, Meilisearch adds typo +tolerance (`smartphnoe` still finds "Smartphone") and relevance-based +matching. Homebox keeps the index up to date automatically: it rebuilds the +index in the background at startup and re-indexes after items or tags change. + +Example compose setup: + +```yaml +services: + homebox: + image: ghcr.io/sysadminsmedia/homebox:latest + environment: + - HBOX_SEARCH_DRIVER=meilisearch + - HBOX_SEARCH_MEILISEARCH_HOST=http://meilisearch:7700 + - HBOX_SEARCH_MEILISEARCH_API_KEY=your_master_key + depends_on: + - meilisearch + + meilisearch: + image: getmeili/meilisearch:v1.22 + environment: + - MEILI_MASTER_KEY=your_master_key + - MEILI_NO_ANALYTICS=true + volumes: + - meilisearch:/meili_data + +volumes: + meilisearch: +``` + +| Variable | Default | Description | +| ----------------------------------- | ----------------------- | ---------------------------------------------------------------------------- | +| HBOX_SEARCH_DRIVER | database | search engine: `database` or `meilisearch` | +| HBOX_SEARCH_MEILISEARCH_HOST | `http://localhost:7700` | URL of the Meilisearch instance | +| HBOX_SEARCH_MEILISEARCH_API_KEY | | Meilisearch API key (the master key, or a key with index read/write access) | +| HBOX_SEARCH_MEILISEARCH_INDEX | homebox_entities | index uid Homebox stores entity documents in | +| HBOX_SEARCH_MEILISEARCH_MAX_HITS | 1000 | maximum matches retrieved per search before filters/pagination are applied | + +Notes: + +- Homebox fails to start when the driver is `meilisearch` and the instance is + unreachable, so misconfigurations surface immediately. +- The index is rebuilt automatically; no manual reindexing is needed. If the + index is deleted or Meilisearch data is lost, restart Homebox to rebuild it. +- Searches never leak across homes/groups: every query is filtered by group, + and results are re-validated against the database. diff --git a/frontend/lib/api/classes/items.ts b/frontend/lib/api/classes/items.ts index 5bb28bbae..363bda2a2 100644 --- a/frontend/lib/api/classes/items.ts +++ b/frontend/lib/api/classes/items.ts @@ -25,6 +25,7 @@ export type ItemsQuery = { parentIds?: string[]; tags?: string[]; negateTags?: boolean; + matchAllTags?: boolean; onlyWithoutPhoto?: boolean; onlyWithPhoto?: boolean; q?: string; diff --git a/frontend/locales/en.json b/frontend/locales/en.json index d88fe4ddc..ee665368d 100644 --- a/frontend/locales/en.json +++ b/frontend/locales/en.json @@ -600,6 +600,7 @@ "manual": "Manual", "manuals": "Manuals", "manufacturer": "Manufacturer", + "match_all_tags": "Require All Selected Tags", "model_number": "Model Number", "name": "Name", "negate_tags": "Negate Selected Tags", @@ -638,6 +639,7 @@ "sync_child_locations": "Sync child items' locations", "tip_1": "Location and tag filters use the 'OR' operation. If more than one is selected only one will be\n required for a match.", "tip_2": "Searches prefixed with '#'' will query for a asset ID (example '#000-001')", + "tip_4": "Search matches names, descriptions, serial and model numbers, manufacturers, notes, purchase sources, tag names, and custom field values. Every word must match somewhere; wrap words in double quotes to match an exact phrase.", "tip_3": "Field filters use the 'OR' operation. If more than one is selected only one will be required for a\n match.", "tips": "Tips", "tips_sub": "Search Tips", diff --git a/frontend/pages/items.vue b/frontend/pages/items.vue index 99762bb24..26f1796ef 100644 --- a/frontend/pages/items.vue +++ b/frontend/pages/items.vue @@ -85,6 +85,7 @@ const includeArchived = useOptionalRouteQuery("archived", false); const fieldSelector = useOptionalRouteQuery("fieldSelector", false); const negateTags = useOptionalRouteQuery("negateTags", false); + const matchAllTags = useOptionalRouteQuery("matchAllTags", false); const onlyWithoutPhoto = useOptionalRouteQuery("onlyWithoutPhoto", false); const onlyWithPhoto = useOptionalRouteQuery("onlyWithPhoto", false); const orderBy = useOptionalRouteQuery("orderBy", "name"); @@ -207,6 +208,12 @@ } }); + watch(matchAllTags, (newV, oldV) => { + if (newV !== oldV) { + search(); + } + }); + watch(onlyWithoutPhoto, (newV, oldV) => { if (newV && onlyWithPhoto.value) { // this triggers the watch on onlyWithPhoto @@ -275,6 +282,7 @@ archived: includeArchived.value, fieldSelector: fieldSelector.value, negateTags: negateTags.value, + matchAllTags: matchAllTags.value, onlyWithoutPhoto: onlyWithoutPhoto.value, onlyWithPhoto: onlyWithPhoto.value, orderBy: orderBy.value, @@ -312,6 +320,7 @@ parentIds: locIDs.value, tags: tagIDs.value, negateTags: negateTags.value, + matchAllTags: matchAllTags.value, onlyWithoutPhoto: onlyWithoutPhoto.value, onlyWithPhoto: onlyWithPhoto.value, includeArchived: includeArchived.value, @@ -426,6 +435,11 @@
{{ $t("items.negate_tags") }} +