From 8947c384bbbbd553371a4a09bb5d08a484b0f876 Mon Sep 17 00:00:00 2001 From: Matthew Kilgore Date: Thu, 11 Jun 2026 20:22:39 -0400 Subject: [PATCH 1/5] fix: better db search --- backend/app/api/cli_reset_password.go | 2 +- .../app/api/handlers/v1/v1_ctrl_entities.go | 14 +- backend/app/api/main.go | 9 +- backend/app/api/static/docs/docs.go | 8 +- backend/app/api/static/docs/openapi-3.json | 10 +- backend/app/api/static/docs/openapi-3.yaml | 10 +- backend/app/api/static/docs/swagger.json | 8 +- backend/app/api/static/docs/swagger.yaml | 8 +- backend/internal/core/services/main_test.go | 2 +- .../service_items_attachments_test.go | 2 +- backend/internal/data/ent/external.go | 6 + backend/internal/data/repo/main_test.go | 2 +- backend/internal/data/repo/repo_entities.go | 101 +++-- .../data/repo/repo_item_attachments_test.go | 2 +- .../data/repo/repo_items_search_test.go | 397 +++++++++--------- backend/internal/data/repo/repos_all.go | 10 +- backend/internal/data/search/database.go | 160 +++++++ backend/internal/data/search/database_test.go | 98 +++++ backend/internal/data/search/search.go | 59 +++ backend/internal/data/search/tokenize.go | 58 +++ backend/internal/data/search/tokenize_test.go | 78 ++++ backend/internal/sys/config/conf.go | 9 + backend/pkgs/cgofreesqlite/sqlite.go | 35 +- backend/pkgs/textutils/normalize.go | 20 +- backend/pkgs/textutils/normalize_test.go | 33 +- docs/public/api/openapi-3.0.json | 10 +- docs/public/api/openapi-3.0.yaml | 10 +- docs/public/api/swagger-2.0.json | 8 +- docs/public/api/swagger-2.0.yaml | 8 +- .../en/quick-start/configure/database.mdx | 15 + .../docs/en/quick-start/configure/index.mdx | 1 + frontend/lib/api/classes/items.ts | 1 + frontend/locales/en.json | 2 + frontend/pages/items.vue | 17 + 34 files changed, 944 insertions(+), 269 deletions(-) create mode 100644 backend/internal/data/search/database.go create mode 100644 backend/internal/data/search/database_test.go create mode 100644 backend/internal/data/search/search.go create mode 100644 backend/internal/data/search/tokenize.go create mode 100644 backend/internal/data/search/tokenize_test.go diff --git a/backend/app/api/cli_reset_password.go b/backend/app/api/cli_reset_password.go index b9cbe8c33..9d9e4ca71 100644 --- a/backend/app/api/cli_reset_password.go +++ b/backend/app/api/cli_reset_password.go @@ -118,7 +118,7 @@ func generateResetLinkOffline(cfg *config.Config, email string) (string, error) } bus := eventbus.New() - repos := repo.New(c, bus, cfg.Storage, cfg.Database.PubSubConnString, cfg.Thumbnail) + repos := repo.New(c, bus, cfg.Storage, cfg.Database.PubSubConnString, cfg.Thumbnail, nil) svc := services.New(repos) baseURL := strings.TrimSuffix(cfg.Options.Hostname, "/") diff --git a/backend/app/api/handlers/v1/v1_ctrl_entities.go b/backend/app/api/handlers/v1/v1_ctrl_entities.go index 6758d0a43..f9349a806 100644 --- a/backend/app/api/handlers/v1/v1_ctrl_entities.go +++ b/backend/app/api/handlers/v1/v1_ctrl_entities.go @@ -48,12 +48,13 @@ func startEntityCtrlSpan(ctx context.Context, name string, attrs ...attribute.Ke // @Summary Query All Entities // @Tags Entities // @Produce json -// @Param q query string false "search string" -// @Param page query int false "page number" -// @Param pageSize query int false "items per page" -// @Param tags query []string false "tags Ids" collectionFormat(multi) -// @Param parentIds query []string false "parent Ids" collectionFormat(multi) -// @Success 200 {object} repo.EntityListResult +// @Param q query string false "search string; matches names, descriptions, serial/model numbers, manufacturers, notes, purchase sources, tag names, and custom field values. Use # to look up by asset ID and double quotes for exact phrases" +// @Param page query int false "page number" +// @Param pageSize query int false "items per page" +// @Param tags query []string false "tags Ids" collectionFormat(multi) +// @Param matchAllTags query bool false "require all selected tags to match (AND) instead of any (OR)" +// @Param parentIds query []string false "parent Ids" collectionFormat(multi) +// @Success 200 {object} repo.EntityListResult // @Router /v1/entities [GET] // @Security Bearer func (ctrl *V1Controller) HandleEntitiesGetAll() errchain.HandlerFunc { @@ -80,6 +81,7 @@ func (ctrl *V1Controller) HandleEntitiesGetAll() errchain.HandlerFunc { ParentIDs: queryUUIDList(params, "parentIds"), TagIDs: queryUUIDList(params, "tags"), NegateTags: queryBool(params.Get("negateTags")), + MatchAllTags: queryBool(params.Get("matchAllTags")), OnlyWithoutPhoto: queryBool(params.Get("onlyWithoutPhoto")), OnlyWithPhoto: queryBool(params.Get("onlyWithPhoto")), IncludeArchived: queryBool(params.Get("includeArchived")), diff --git a/backend/app/api/main.go b/backend/app/api/main.go index d3e0b8cae..dd9039195 100644 --- a/backend/app/api/main.go +++ b/backend/app/api/main.go @@ -16,6 +16,7 @@ import ( "github.com/sysadminsmedia/homebox/backend/internal/core/services/reporting/eventbus" "github.com/sysadminsmedia/homebox/backend/internal/data/ent" "github.com/sysadminsmedia/homebox/backend/internal/data/repo" + "github.com/sysadminsmedia/homebox/backend/internal/data/search" "github.com/sysadminsmedia/homebox/backend/internal/sys/analytics" "github.com/sysadminsmedia/homebox/backend/internal/sys/config" "github.com/sysadminsmedia/homebox/backend/internal/sys/otel" @@ -162,7 +163,13 @@ func run(cfg *config.Config) error { app.bus = eventbus.New() app.db = c - app.repos = repo.New(c, app.bus, cfg.Storage, cfg.Database.PubSubConnString, cfg.Thumbnail) + + searchEngine, err := search.NewEngine(cfg.Search.Driver, c) + if err != nil { + log.Error().Err(err).Str("driver", cfg.Search.Driver).Msg("failed to create search engine") + return err + } + app.repos = repo.New(c, app.bus, cfg.Storage, cfg.Database.PubSubConnString, cfg.Thumbnail, searchEngine) // Attachment-key escaping in fileblob only flattens paths on Windows // (where os.PathSeparator is "\"), so the legacy-path rename is a Windows- diff --git a/backend/app/api/static/docs/docs.go b/backend/app/api/static/docs/docs.go index c6a1a696b..686c4f879 100644 --- a/backend/app/api/static/docs/docs.go +++ b/backend/app/api/static/docs/docs.go @@ -247,7 +247,7 @@ const docTemplate = `{ "parameters": [ { "type": "string", - "description": "search string", + "description": "search string; matches names, descriptions, serial/model numbers, manufacturers, notes, purchase sources, tag names, and custom field values. Use #\u003cassetId\u003e to look up by asset ID and double quotes for exact phrases", "name": "q", "in": "query" }, @@ -273,6 +273,12 @@ const docTemplate = `{ "name": "tags", "in": "query" }, + { + "type": "boolean", + "description": "require all selected tags to match (AND) instead of any (OR)", + "name": "matchAllTags", + "in": "query" + }, { "type": "array", "items": { diff --git a/backend/app/api/static/docs/openapi-3.json b/backend/app/api/static/docs/openapi-3.json index a16bb34b1..17b3e3ba9 100644 --- a/backend/app/api/static/docs/openapi-3.json +++ b/backend/app/api/static/docs/openapi-3.json @@ -245,7 +245,7 @@ "summary": "Query All Entities", "parameters": [ { - "description": "search string", + "description": "search string; matches names, descriptions, serial/model numbers, manufacturers, notes, purchase sources, tag names, and custom field values. Use # to look up by asset ID and double quotes for exact phrases", "name": "q", "in": "query", "schema": { @@ -280,6 +280,14 @@ } } }, + { + "description": "require all selected tags to match (AND) instead of any (OR)", + "name": "matchAllTags", + "in": "query", + "schema": { + "type": "boolean" + } + }, { "description": "parent Ids", "name": "parentIds", diff --git a/backend/app/api/static/docs/openapi-3.yaml b/backend/app/api/static/docs/openapi-3.yaml index 867abc4fa..21c61e8fd 100644 --- a/backend/app/api/static/docs/openapi-3.yaml +++ b/backend/app/api/static/docs/openapi-3.yaml @@ -144,7 +144,10 @@ paths: - Entities summary: Query All Entities parameters: - - description: search string + - description: "search string; matches names, descriptions, serial/model numbers, + manufacturers, notes, purchase sources, tag names, and custom field + values. Use # to look up by asset ID and double quotes for + exact phrases" name: q in: query schema: @@ -167,6 +170,11 @@ paths: type: array items: type: string + - description: require all selected tags to match (AND) instead of any (OR) + name: matchAllTags + in: query + schema: + type: boolean - description: parent Ids name: parentIds in: query diff --git a/backend/app/api/static/docs/swagger.json b/backend/app/api/static/docs/swagger.json index 257fa81d1..75346ddf8 100644 --- a/backend/app/api/static/docs/swagger.json +++ b/backend/app/api/static/docs/swagger.json @@ -244,7 +244,7 @@ "parameters": [ { "type": "string", - "description": "search string", + "description": "search string; matches names, descriptions, serial/model numbers, manufacturers, notes, purchase sources, tag names, and custom field values. Use #\u003cassetId\u003e to look up by asset ID and double quotes for exact phrases", "name": "q", "in": "query" }, @@ -270,6 +270,12 @@ "name": "tags", "in": "query" }, + { + "type": "boolean", + "description": "require all selected tags to match (AND) instead of any (OR)", + "name": "matchAllTags", + "in": "query" + }, { "type": "array", "items": { diff --git a/backend/app/api/static/docs/swagger.yaml b/backend/app/api/static/docs/swagger.yaml index 6332c8ec5..39e9ea703 100644 --- a/backend/app/api/static/docs/swagger.yaml +++ b/backend/app/api/static/docs/swagger.yaml @@ -2474,7 +2474,9 @@ paths: /v1/entities: get: parameters: - - description: search string + - description: 'search string; matches names, descriptions, serial/model numbers, + manufacturers, notes, purchase sources, tag names, and custom field values. + Use # to look up by asset ID and double quotes for exact phrases' in: query name: q type: string @@ -2493,6 +2495,10 @@ paths: type: string name: tags type: array + - description: require all selected tags to match (AND) instead of any (OR) + in: query + name: matchAllTags + type: boolean - collectionFormat: multi description: parent Ids in: query diff --git a/backend/internal/core/services/main_test.go b/backend/internal/core/services/main_test.go index cc15811b3..2925d4714 100644 --- a/backend/internal/core/services/main_test.go +++ b/backend/internal/core/services/main_test.go @@ -75,7 +75,7 @@ func MainNoExit(m *testing.M) int { Enabled: false, Width: 0, Height: 0, - }) + }, nil) err = os.MkdirAll(os.TempDir()+"/homebox", 0o755) if err != nil { diff --git a/backend/internal/core/services/service_items_attachments_test.go b/backend/internal/core/services/service_items_attachments_test.go index 422f3c60e..fe0236da1 100644 --- a/backend/internal/core/services/service_items_attachments_test.go +++ b/backend/internal/core/services/service_items_attachments_test.go @@ -76,7 +76,7 @@ func TestEntityService_AddAttachment_InvalidStorage(t *testing.T) { Enabled: false, Width: 0, Height: 0, - }) + }, nil) svc.repo = invalidRepos diff --git a/backend/internal/data/ent/external.go b/backend/internal/data/ent/external.go index d094da862..906766e32 100644 --- a/backend/internal/data/ent/external.go +++ b/backend/internal/data/ent/external.go @@ -11,3 +11,9 @@ import ( func (c *Client) Sql() *sql.DB { return c.driver.(*entsql.Driver).DB() } + +// Dialect returns the dialect name of the underlying database driver +// (dialect.SQLite or dialect.Postgres). +func (c *Client) Dialect() string { + return c.driver.Dialect() +} diff --git a/backend/internal/data/repo/main_test.go b/backend/internal/data/repo/main_test.go index 57c1ec845..d7ff85c07 100644 --- a/backend/internal/data/repo/main_test.go +++ b/backend/internal/data/repo/main_test.go @@ -64,7 +64,7 @@ func MainNoExit(m *testing.M) int { Enabled: false, Width: 0, Height: 0, - }) + }, nil) err = os.MkdirAll(os.TempDir()+"/homebox", 0o755) if err != nil { return 0 diff --git a/backend/internal/data/repo/repo_entities.go b/backend/internal/data/repo/repo_entities.go index c92b17775..1e320653a 100644 --- a/backend/internal/data/repo/repo_entities.go +++ b/backend/internal/data/repo/repo_entities.go @@ -21,6 +21,7 @@ import ( "github.com/sysadminsmedia/homebox/backend/internal/data/ent/maintenanceentry" "github.com/sysadminsmedia/homebox/backend/internal/data/ent/predicate" "github.com/sysadminsmedia/homebox/backend/internal/data/ent/tag" + "github.com/sysadminsmedia/homebox/backend/internal/data/search" "github.com/sysadminsmedia/homebox/backend/internal/data/types" "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/attribute" @@ -44,6 +45,7 @@ type EntityRepository struct { db *ent.Client bus *eventbus.EventBus attachments *AttachmentRepo + search search.Engine } type ( @@ -60,6 +62,7 @@ type ( ParentIDs []uuid.UUID `json:"parentIds"` TagIDs []uuid.UUID `json:"tagIds"` NegateTags bool `json:"negateTags"` + MatchAllTags bool `json:"matchAllTags"` // require every selected tag (AND) instead of any (OR); ignored when NegateTags is set OnlyWithoutPhoto bool `json:"onlyWithoutPhoto"` OnlyWithPhoto bool `json:"onlyWithPhoto"` ParentItemIDs []uuid.UUID `json:"parentItemIds"` @@ -528,6 +531,7 @@ func entityQuerySpanAttrs(gid uuid.UUID, q EntityQuery) []attribute.KeyValue { attribute.String("query.search", q.Search), attribute.Int("query.tag_ids.count", len(q.TagIDs)), attribute.Bool("query.negate_tags", q.NegateTags), + attribute.Bool("query.match_all_tags", q.MatchAllTags), attribute.Int("query.parent_ids.count", len(q.ParentIDs)), attribute.Int("query.parent_item_ids.count", len(q.ParentItemIDs)), attribute.Int("query.fields.count", len(q.Fields)), @@ -542,6 +546,58 @@ func entityQuerySpanAttrs(gid uuid.UUID, q EntityQuery) []attribute.KeyValue { } } +// tagPredicates translates the tag filter portion of q into predicates that +// QueryByGroup ANDs with the rest of the query. Selected tags also match any +// of their descendant tags. +func (r *EntityRepository) tagPredicates(ctx context.Context, q EntityQuery) []predicate.Entity { + tagRepo := &TagRepository{r.db, r.bus} + ctxDescendants, descSpan := entityTracer().Start(ctx, "repo.EntityRepository.QueryByGroup.tagDescendants", + trace.WithAttributes(attribute.Int("query.tag_ids.count", len(q.TagIDs)))) + defer descSpan.End() + + // expandTags returns the given tags plus all their descendant tags, + // falling back to just the given tags when expansion fails. + expandTags := func(ids []uuid.UUID) []uuid.UUID { + descendants, err := tagRepo.GetDescendantTagIDs(ctxDescendants, ids) + if err != nil { + recordSpanError(descSpan, err) + log.Warn().Err(err).Msg("failed to get descendant tags, using only direct tags") + return ids + } + if len(descendants) == 0 { + return ids + } + return descendants + } + + hasTag := func(l uuid.UUID, _ int) predicate.Entity { + return entity.HasTagWith(tag.ID(l)) + } + + switch { + case q.NegateTags: + descendants := expandTags(q.TagIDs) + descSpan.SetAttributes(attribute.Int("query.tag_descendants.count", len(descendants))) + notTag := lo.Map(descendants, func(l uuid.UUID, _ int) predicate.Entity { + return entity.Not(entity.HasTagWith(tag.ID(l))) + }) + return []predicate.Entity{entity.And(notTag...)} + case q.MatchAllTags: + // Every selected tag must be present, where each tag also counts as + // matched by any of its descendants. + preds := make([]predicate.Entity, 0, len(q.TagIDs)) + for _, id := range q.TagIDs { + expanded := expandTags([]uuid.UUID{id}) + preds = append(preds, entity.Or(lo.Map(expanded, hasTag)...)) + } + return preds + default: + descendants := expandTags(q.TagIDs) + descSpan.SetAttributes(attribute.Int("query.tag_descendants.count", len(descendants))) + return []predicate.Entity{entity.Or(lo.Map(descendants, hasTag)...)} + } +} + // QueryByGroup returns a list of entities that belong to a specific group based on the provided query. func (r *EntityRepository) QueryByGroup(ctx context.Context, gid uuid.UUID, q EntityQuery) (PaginationResult[EntitySummary], error) { ctx, span := entityTracer().Start(ctx, "repo.EntityRepository.QueryByGroup", @@ -583,16 +639,14 @@ func (r *EntityRepository) QueryByGroup(ctx context.Context, gid uuid.UUID, q En } if q.Search != "" { - qb.Where( - entity.Or( - entity.NameContainsFold(q.Search), - entity.DescriptionContainsFold(q.Search), - entity.SerialNumberContainsFold(q.Search), - entity.ModelNumberContainsFold(q.Search), - entity.ManufacturerContainsFold(q.Search), - entity.NotesContainsFold(q.Search), - ), - ) + searchPred, err := r.search.Predicate(ctx, gid, q.Search) + if err != nil { + recordSpanError(span, err) + return PaginationResult[EntitySummary]{}, err + } + if searchPred != nil { + qb = qb.Where(searchPred) + } } if !q.AssetID.Nil() { @@ -602,32 +656,7 @@ func (r *EntityRepository) QueryByGroup(ctx context.Context, gid uuid.UUID, q En var andPredicates []predicate.Entity { if len(q.TagIDs) > 0 { - tagRepo := &TagRepository{r.db, r.bus} - ctxDescendants, descSpan := entityTracer().Start(ctx, "repo.EntityRepository.QueryByGroup.tagDescendants", - trace.WithAttributes(attribute.Int("query.tag_ids.count", len(q.TagIDs)))) - descendants, err := tagRepo.GetDescendantTagIDs(ctxDescendants, q.TagIDs) - if err != nil { - recordSpanError(descSpan, err) - log.Warn().Err(err).Msg("failed to get descendant tags, using only direct tags") - descendants = q.TagIDs - } else if len(descendants) == 0 { - descendants = q.TagIDs - } - descSpan.SetAttributes(attribute.Int("query.tag_descendants.count", len(descendants))) - descSpan.End() - - var tagPredicates []predicate.Entity - if !q.NegateTags { - tagPredicates = lo.Map(descendants, func(l uuid.UUID, _ int) predicate.Entity { - return entity.HasTagWith(tag.ID(l)) - }) - andPredicates = append(andPredicates, entity.Or(tagPredicates...)) - } else { - tagPredicates = lo.Map(descendants, func(l uuid.UUID, _ int) predicate.Entity { - return entity.Not(entity.HasTagWith(tag.ID(l))) - }) - andPredicates = append(andPredicates, entity.And(tagPredicates...)) - } + andPredicates = append(andPredicates, r.tagPredicates(ctx, q)...) } if q.OnlyWithoutPhoto { diff --git a/backend/internal/data/repo/repo_item_attachments_test.go b/backend/internal/data/repo/repo_item_attachments_test.go index 1bcfbfd61..561e1858c 100644 --- a/backend/internal/data/repo/repo_item_attachments_test.go +++ b/backend/internal/data/repo/repo_item_attachments_test.go @@ -188,7 +188,7 @@ func TestAttachmentRepo_DeleteExternalLink(t *testing.T) { func TestAttachmentRepo_DeleteExternalLink_DoesNotRequireBlobStorage(t *testing.T) { ctx := context.Background() - repos := New(tClient, tbus, config.Storage{PrefixPath: "/", ConnString: "mem://"}, "mem://{{ .Topic }}", config.Thumbnail{Enabled: false}) + repos := New(tClient, tbus, config.Storage{PrefixPath: "/", ConnString: "mem://"}, "mem://{{ .Topic }}", config.Thumbnail{Enabled: false}, nil) entity := useEntities(t, 1)[0] att, err := repos.Attachments.CreateExternalLink( diff --git a/backend/internal/data/repo/repo_items_search_test.go b/backend/internal/data/repo/repo_items_search_test.go index 4d3f2164a..23a52884f 100644 --- a/backend/internal/data/repo/repo_items_search_test.go +++ b/backend/internal/data/repo/repo_items_search_test.go @@ -1,216 +1,213 @@ package repo import ( + "context" "testing" + "github.com/google/uuid" "github.com/stretchr/testify/assert" - "github.com/sysadminsmedia/homebox/backend/pkgs/textutils" + "github.com/stretchr/testify/require" ) -// Repeated test fixture; constant satisfies goconst across the test cases below. -const fixtureElectronicaAccented = "electrónica" - -func TestEntityRepository_AccentInsensitiveSearch(t *testing.T) { - // Test cases for accent-insensitive search - testCases := []struct { - name string - itemName string - searchQuery string - shouldMatch bool - description string - }{ - { - name: "Spanish accented item, search without accents", - itemName: fixtureElectronicaAccented, - searchQuery: "electronica", - shouldMatch: true, - description: "Should find 'electrónica' when searching for 'electronica'", - }, - { - name: "Spanish accented item, search with accents", - itemName: fixtureElectronicaAccented, - searchQuery: fixtureElectronicaAccented, - shouldMatch: true, - description: "Should find 'electrónica' when searching for 'electrónica'", - }, - { - name: "Non-accented item, search with accents", - itemName: "electronica", - searchQuery: fixtureElectronicaAccented, - shouldMatch: true, - description: "Should find 'electronica' when searching for 'electrónica' (bidirectional search)", - }, - { - name: "Spanish item with tilde, search without accents", - itemName: "café", - searchQuery: "cafe", - shouldMatch: true, - description: "Should find 'café' when searching for 'cafe'", - }, - { - name: "Spanish item without tilde, search with accents", - itemName: "cafe", - searchQuery: "café", - shouldMatch: true, - description: "Should find 'cafe' when searching for 'café' (bidirectional)", - }, - { - name: "French accented item, search without accents", - itemName: "pére", - searchQuery: "pere", - shouldMatch: true, - description: "Should find 'pére' when searching for 'pere'", - }, - { - name: "French: père without accent, search with accents", - itemName: "pere", - searchQuery: "père", - shouldMatch: true, - description: "Should find 'pere' when searching for 'père' (bidirectional)", - }, - { - name: "Mixed case with accents", - itemName: "Electrónica", - searchQuery: "ELECTRONICA", - shouldMatch: true, - description: "Should find 'Electrónica' when searching for 'ELECTRONICA' (case insensitive)", - }, - { - name: "Bidirectional: Non-accented item, search with different accents", - itemName: "cafe", - searchQuery: "café", - shouldMatch: true, - description: "Should find 'cafe' when searching for 'café' (bidirectional)", - }, - { - name: "Bidirectional: Item with accent, search with different accent", - itemName: "résumé", - searchQuery: "resume", - shouldMatch: true, - description: "Should find 'résumé' when searching for 'resume' (bidirectional)", - }, - { - name: "Bidirectional: Spanish ñ to n", - itemName: "espanol", - searchQuery: "español", - shouldMatch: true, - description: "Should find 'espanol' when searching for 'español' (bidirectional ñ)", - }, - { - name: "French: français with accent, search without", - itemName: "français", - searchQuery: "francais", - shouldMatch: true, - description: "Should find 'français' when searching for 'francais'", - }, - { - name: "French: français without accent, search with", - itemName: "francais", - searchQuery: "français", - shouldMatch: true, - description: "Should find 'francais' when searching for 'français' (bidirectional)", - }, - { - name: "French: été with accent, search without", - itemName: "été", - searchQuery: "ete", - shouldMatch: true, - description: "Should find 'été' when searching for 'ete'", - }, - { - name: "French: été without accent, search with", - itemName: "ete", - searchQuery: "été", - shouldMatch: true, - description: "Should find 'ete' when searching for 'été' (bidirectional)", - }, - { - name: "French: hôtel with accent, search without", - itemName: "hôtel", - searchQuery: "hotel", - shouldMatch: true, - description: "Should find 'hôtel' when searching for 'hotel'", - }, - { - name: "French: hôtel without accent, search with", - itemName: "hotel", - searchQuery: "hôtel", - shouldMatch: true, - description: "Should find 'hotel' when searching for 'hôtel' (bidirectional)", - }, - { - name: "French: naïve with accent, search without", - itemName: "naïve", - searchQuery: "naive", - shouldMatch: true, - description: "Should find 'naïve' when searching for 'naive'", - }, - { - name: "French: naïve without accent, search with", - itemName: "naive", - searchQuery: "naïve", - shouldMatch: true, - description: "Should find 'naive' when searching for 'naïve' (bidirectional)", - }, - } +// useSearchableItem creates an item with the given name and applies an +// optional full update so tests can populate any searchable field. +func useSearchableItem(t *testing.T, name string, mutate func(u *EntityUpdate)) EntityOut { + t.Helper() + ctx := context.Background() + itemET := useItemEntityType(t) + + e, err := tRepos.Entities.Create(ctx, tGroup.ID, EntityCreate{ + Name: name, + EntityTypeID: itemET.ID, + }) + require.NoError(t, err) + t.Cleanup(func() { _ = tRepos.Entities.Delete(context.Background(), e.ID) }) - for _, tc := range testCases { - t.Run(tc.name, func(t *testing.T) { - // Test the normalization logic used in the repository - normalizedSearch := textutils.NormalizeSearchQuery(tc.searchQuery) - - // This simulates what happens in the repository - // The original search would find exact matches (case-insensitive) - // The normalized search would find accent-insensitive matches - - // Test that our normalization works as expected - if tc.shouldMatch { - // If it should match, then either the original query should match - // or the normalized query should match when applied to the stored data - assert.NotEmpty(t, normalizedSearch, "Normalized search should not be empty") - - // The key insight is that we're searching with both the original and normalized queries - // So "electrónica" will be found when searching for "electronica" because: - // 1. Original search: "electronica" doesn't match "electrónica" - // 2. Normalized search: "electronica" matches the normalized version - t.Logf("✓ %s: Item '%s' should be found with search '%s' (normalized: '%s')", - tc.description, tc.itemName, tc.searchQuery, normalizedSearch) - } else { - t.Logf("✗ %s: Item '%s' should NOT be found with search '%s' (normalized: '%s')", - tc.description, tc.itemName, tc.searchQuery, normalizedSearch) - } - }) + if mutate != nil { + u := EntityUpdate{ + ID: e.ID, + Name: name, + Quantity: 1, + EntityTypeID: itemET.ID, + } + mutate(&u) + _, err = tRepos.Entities.UpdateByGroup(ctx, tGroup.ID, u) + require.NoError(t, err) } + return e } -func TestNormalizeSearchQueryIntegration(t *testing.T) { - // Test that the normalization function works correctly - testCases := []struct { - input string - expected string - }{ - {fixtureElectronicaAccented, "electronica"}, - {"café", "cafe"}, - {"ELECTRÓNICA", "electronica"}, - {"Café París", "cafe paris"}, - {"hello world", "hello world"}, - // French accented words - {"père", "pere"}, - {"français", "francais"}, - {"été", "ete"}, - {"hôtel", "hotel"}, - {"naïve", "naive"}, - {"PÈRE", "pere"}, - {"FRANÇAIS", "francais"}, - {"ÉTÉ", "ete"}, - {"HÔTEL", "hotel"}, - {"NAÏVE", "naive"}, +// searchIDs runs a search query and returns the set of matched entity IDs. +// The test group is shared across the package, so assertions check membership +// instead of exact result counts. +func searchIDs(t *testing.T, q EntityQuery) map[uuid.UUID]bool { + t.Helper() + q.Page, q.PageSize = -1, -1 + res, err := tRepos.Entities.QueryByGroup(context.Background(), tGroup.ID, q) + require.NoError(t, err) + + ids := make(map[uuid.UUID]bool, len(res.Items)) + for _, item := range res.Items { + ids[item.ID] = true } + return ids +} - for _, tc := range testCases { - t.Run(tc.input, func(t *testing.T) { - result := textutils.NormalizeSearchQuery(tc.input) - assert.Equal(t, tc.expected, result, "Normalization should work correctly") - }) +func assertSearchFinds(t *testing.T, query string, item EntityOut, want bool) { + t.Helper() + found := searchIDs(t, EntityQuery{Search: query})[item.ID] + if want { + assert.True(t, found, "search %q should find item %q", query, item.Name) + } else { + assert.False(t, found, "search %q should NOT find item %q", query, item.Name) } } + +func TestEntitySearch_UnicodeCaseInsensitive(t *testing.T) { + ukrainian := useSearchableItem(t, "Тестовий Запис", nil) + greek := useSearchableItem(t, "Υπολογιστής", nil) + + // Cyrillic: lowercase, uppercase, and partial queries must match + // uppercase stored text (issue #1021). + assertSearchFinds(t, "тест", ukrainian, true) + assertSearchFinds(t, "ТЕСТ", ukrainian, true) + assertSearchFinds(t, "тестовий запис", ukrainian, true) + assertSearchFinds(t, "запис", ukrainian, true) + + // Greek, including the final-sigma form difference (issue #1367). + assertSearchFinds(t, "Υπολογιστής", greek, true) + assertSearchFinds(t, "υπολογιστής", greek, true) + assertSearchFinds(t, "ΥΠΟΛΟΓΙΣΤΗΣ", greek, true) + assertSearchFinds(t, "υπολογιστης", greek, true) + + assertSearchFinds(t, "холодильник", ukrainian, false) +} + +func TestEntitySearch_AccentInsensitive(t *testing.T) { + accented := useSearchableItem(t, "Electrónica de café", nil) + plain := useSearchableItem(t, "electronica cafe pere", nil) + + assertSearchFinds(t, "electronica", accented, true) + assertSearchFinds(t, "café", accented, true) + assertSearchFinds(t, "CAFE", accented, true) + assertSearchFinds(t, "electrónica", plain, true) + assertSearchFinds(t, "père", plain, true) +} + +func TestEntitySearch_MultiTokenAnd(t *testing.T) { + item := useSearchableItem(t, "Red Tool Box", nil) + + // every token must match, in any order + assertSearchFinds(t, "box red", item, true) + assertSearchFinds(t, "red tool", item, true) + assertSearchFinds(t, "red hammer", item, false) + + // quoted phrases match as a unit + assertSearchFinds(t, `"tool box"`, item, true) + assertSearchFinds(t, `"box tool"`, item, false) +} + +func TestEntitySearch_MatchesAcrossFields(t *testing.T) { + item := useSearchableItem(t, "Multifield", func(u *EntityUpdate) { + u.SerialNumber = "SN-998877" + u.ModelNumber = "MX-1000" + u.Manufacturer = "Acme Corp" + u.Notes = "stored in the attic" + u.PurchaseFrom = "Conrad Electronic" + }) + + assertSearchFinds(t, "998877", item, true) + assertSearchFinds(t, "mx-1000", item, true) + assertSearchFinds(t, "acme", item, true) + assertSearchFinds(t, "attic", item, true) + assertSearchFinds(t, "conrad", item, true) + + // tokens may match across different fields of the same item + assertSearchFinds(t, "acme attic", item, true) +} + +func TestEntitySearch_MatchesTagNames(t *testing.T) { + ctx := context.Background() + + tagOut, err := tRepos.Tags.Create(ctx, tGroup.ID, TagCreate{Name: "Электроника-поиск"}) + require.NoError(t, err) + t.Cleanup(func() { _ = tRepos.Tags.delete(context.Background(), tagOut.ID) }) + + itemET := useItemEntityType(t) + tagged, err := tRepos.Entities.Create(ctx, tGroup.ID, EntityCreate{ + Name: "Tagged thing", + EntityTypeID: itemET.ID, + TagIDs: []uuid.UUID{tagOut.ID}, + }) + require.NoError(t, err) + t.Cleanup(func() { _ = tRepos.Entities.Delete(context.Background(), tagged.ID) }) + + untagged := useSearchableItem(t, "Untagged thing", nil) + + // tag names are searchable from the search bar (#1509), with the same + // UTF-8 case folding as other fields + assertSearchFinds(t, "электроника-поиск", tagged, true) + assertSearchFinds(t, "электроника-поиск", untagged, false) +} + +func TestEntitySearch_MatchesCustomFieldValues(t *testing.T) { + item := useSearchableItem(t, "Phone", func(u *EntityUpdate) { + u.Fields = []EntityFieldData{ + {Type: "text", Name: "IMEI", TextValue: "351234567891011"}, + } + }) + other := useSearchableItem(t, "Other phone", nil) + + // custom field values are searchable from the search bar (#1380) + assertSearchFinds(t, "351234567891011", item, true) + assertSearchFinds(t, "3512345", item, true) + assertSearchFinds(t, "351234567891011", other, false) +} + +func TestEntitySearch_LikeWildcardsAreLiteral(t *testing.T) { + percent := useSearchableItem(t, "100% cotton", nil) + plain := useSearchableItem(t, "100x cotton", nil) + + assertSearchFinds(t, "100%", percent, true) + assertSearchFinds(t, "100%", plain, false) + + underscore := useSearchableItem(t, "a_b pattern", nil) + noUnderscore := useSearchableItem(t, "axb pattern", nil) + + assertSearchFinds(t, "a_b", underscore, true) + assertSearchFinds(t, "a_b", noUnderscore, false) +} + +func TestQueryByGroup_MatchAllTags(t *testing.T) { + ctx := context.Background() + tags := useTags(t, 2) + + itemET := useItemEntityType(t) + both, err := tRepos.Entities.Create(ctx, tGroup.ID, EntityCreate{ + Name: "Has both tags", + EntityTypeID: itemET.ID, + TagIDs: []uuid.UUID{tags[0].ID, tags[1].ID}, + }) + require.NoError(t, err) + t.Cleanup(func() { _ = tRepos.Entities.Delete(context.Background(), both.ID) }) + + one, err := tRepos.Entities.Create(ctx, tGroup.ID, EntityCreate{ + Name: "Has one tag", + EntityTypeID: itemET.ID, + TagIDs: []uuid.UUID{tags[0].ID}, + }) + require.NoError(t, err) + t.Cleanup(func() { _ = tRepos.Entities.Delete(context.Background(), one.ID) }) + + tagIDs := []uuid.UUID{tags[0].ID, tags[1].ID} + + // default OR behavior: any selected tag matches + anyMatch := searchIDs(t, EntityQuery{TagIDs: tagIDs}) + assert.True(t, anyMatch[both.ID], "OR mode should match item with both tags") + assert.True(t, anyMatch[one.ID], "OR mode should match item with one tag") + + // matchAllTags: every selected tag must be present (#1454) + allMatch := searchIDs(t, EntityQuery{TagIDs: tagIDs, MatchAllTags: true}) + assert.True(t, allMatch[both.ID], "AND mode should match item with both tags") + assert.False(t, allMatch[one.ID], "AND mode should NOT match item with only one tag") +} diff --git a/backend/internal/data/repo/repos_all.go b/backend/internal/data/repo/repos_all.go index 31ab59d12..5d4c9081e 100644 --- a/backend/internal/data/repo/repos_all.go +++ b/backend/internal/data/repo/repos_all.go @@ -4,6 +4,7 @@ package repo import ( "github.com/sysadminsmedia/homebox/backend/internal/core/services/reporting/eventbus" "github.com/sysadminsmedia/homebox/backend/internal/data/ent" + "github.com/sysadminsmedia/homebox/backend/internal/data/search" "github.com/sysadminsmedia/homebox/backend/internal/sys/config" ) @@ -24,7 +25,12 @@ type AllRepos struct { Exports *ExportRepository } -func New(db *ent.Client, bus *eventbus.EventBus, storage config.Storage, pubSubConn string, thumbnail config.Thumbnail) *AllRepos { +// New constructs the repository container. searchEngine selects the free-text +// search implementation; nil falls back to the default database engine. +func New(db *ent.Client, bus *eventbus.EventBus, storage config.Storage, pubSubConn string, thumbnail config.Thumbnail, searchEngine search.Engine) *AllRepos { + if searchEngine == nil { + searchEngine = search.NewDatabaseEngine(db) + } attachments := &AttachmentRepo{db, storage, pubSubConn, thumbnail} return &AllRepos{ Users: &UserRepository{db}, @@ -32,7 +38,7 @@ func New(db *ent.Client, bus *eventbus.EventBus, storage config.Storage, pubSubC PasswordResetTokens: &PasswordResetTokenRepository{db}, APIKeys: NewAPIKeyRepository(db), Groups: NewGroupRepository(db), - Entities: &EntityRepository{db, bus, attachments}, + Entities: &EntityRepository{db, bus, attachments, searchEngine}, EntityTypes: &EntityTypeRepository{db, bus}, EntityTemplates: &EntityTemplatesRepository{db, bus}, Tags: &TagRepository{db, bus}, diff --git a/backend/internal/data/search/database.go b/backend/internal/data/search/database.go new file mode 100644 index 000000000..d839d6776 --- /dev/null +++ b/backend/internal/data/search/database.go @@ -0,0 +1,160 @@ +package search + +import ( + "context" + "strings" + "sync" + + "entgo.io/ent/dialect" + entsql "entgo.io/ent/dialect/sql" + "github.com/google/uuid" + "github.com/rs/zerolog/log" + "github.com/sysadminsmedia/homebox/backend/internal/data/ent" + "github.com/sysadminsmedia/homebox/backend/internal/data/ent/entity" + "github.com/sysadminsmedia/homebox/backend/internal/data/ent/entityfield" + "github.com/sysadminsmedia/homebox/backend/internal/data/ent/predicate" + "github.com/sysadminsmedia/homebox/backend/internal/data/ent/tag" + "github.com/sysadminsmedia/homebox/backend/pkgs/textutils" +) + +// entityColumns are the entity table columns matched against each token. +var entityColumns = []string{ + entity.FieldName, + entity.FieldDescription, + entity.FieldSerialNumber, + entity.FieldModelNumber, + entity.FieldManufacturer, + entity.FieldNotes, + entity.FieldPurchaseFrom, +} + +// DatabaseEngine implements Engine with tokenized substring matching executed +// by the database itself, so it needs no external services or index +// maintenance. +// +// The query is split into tokens (see Tokenize); an entity matches when every +// token matches at least one searched column, tag name, or custom field +// value. All matching is case-insensitive across the full Unicode range and +// accent-insensitive where the dialect allows: +// +// - SQLite: both sides of the comparison go through hb_fold, a Go-defined +// SQL function (registered by pkgs/cgofreesqlite) that applies Unicode +// case folding and strips diacritics. SQLite's native LIKE/lower() are +// ASCII-only and silently fail for Cyrillic, Greek, etc. +// - PostgreSQL: ILIKE provides Unicode case-insensitivity natively, and the +// unaccent extension is used for accent-insensitivity when available +// (the engine tries to enable it once and degrades gracefully when the +// database user lacks the privilege). +type DatabaseEngine struct { + dialect string + db *ent.Client + + unaccentOnce sync.Once + unaccent bool +} + +// NewDatabaseEngine returns a database-backed search engine querying through +// the given ent client. +func NewDatabaseEngine(db *ent.Client) *DatabaseEngine { + return &DatabaseEngine{dialect: db.Dialect(), db: db} +} + +// Predicate implements Engine. +func (e *DatabaseEngine) Predicate(ctx context.Context, _ uuid.UUID, query string) (predicate.Entity, error) { + tokens := Tokenize(query) + if len(tokens) == 0 { + return nil, nil + } + + match := e.matcher(ctx) + + tokenPreds := make([]predicate.Entity, 0, len(tokens)) + for _, token := range tokens { + fieldPreds := make([]predicate.Entity, 0, len(entityColumns)+2) + for _, col := range entityColumns { + fieldPreds = append(fieldPreds, predicate.Entity(match(col, token))) + } + fieldPreds = append(fieldPreds, + // Tag names and custom field values are searchable too + // (requested in #1509 and #1380). + entity.HasTagWith(predicate.Tag(match(tag.FieldName, token))), + entity.HasFieldsWith(predicate.EntityField(match(entityfield.FieldTextValue, token))), + ) + tokenPreds = append(tokenPreds, entity.Or(fieldPreds...)) + } + return entity.And(tokenPreds...), nil +} + +// matcher returns a function that builds a dialect-appropriate +// "column contains token" SQL condition. The returned closure is generic over +// the table being selected (entity, tag, entity_fields), qualifying the +// column through the active selector. +func (e *DatabaseEngine) matcher(ctx context.Context) func(col, token string) func(*entsql.Selector) { + if e.dialect == dialect.Postgres { + unaccent := e.unaccentAvailable(ctx) + return func(col, token string) func(*entsql.Selector) { + pattern := "%" + escapeLike(token) + "%" + return func(s *entsql.Selector) { + s.Where(entsql.P(func(b *entsql.Builder) { + if unaccent { + b.WriteString("unaccent(").WriteString(s.C(col)).WriteString(") ILIKE unaccent(") + b.Arg(pattern) + b.WriteString(")") + } else { + b.WriteString(s.C(col)).WriteString(" ILIKE ") + b.Arg(pattern) + } + })) + } + } + } + + // SQLite + return func(col, token string) func(*entsql.Selector) { + pattern := "%" + escapeLike(textutils.Fold(token)) + "%" + return func(s *entsql.Selector) { + s.Where(entsql.P(func(b *entsql.Builder) { + b.WriteString("hb_fold(").WriteString(s.C(col)).WriteString(") LIKE ") + b.Arg(pattern) + b.WriteString(" ESCAPE '\\'") + })) + } + } +} + +// unaccentAvailable reports whether the PostgreSQL unaccent extension can be +// used. On first call it tries to enable the extension (ignoring permission +// errors) and caches the result for the lifetime of the engine. +func (e *DatabaseEngine) unaccentAvailable(ctx context.Context) bool { + e.unaccentOnce.Do(func() { + if e.db == nil { + return + } + if _, err := e.db.Sql().ExecContext(ctx, "CREATE EXTENSION IF NOT EXISTS unaccent"); err != nil { + log.Debug().Err(err).Msg("could not create unaccent extension (insufficient privileges?), checking if it already exists") + } + + var count int + row := e.db.Sql().QueryRowContext(ctx, "SELECT COUNT(*) FROM pg_extension WHERE extname = 'unaccent'") + if err := row.Scan(&count); err != nil { + log.Warn().Err(err).Msg("failed to check for unaccent extension; search will be accent-sensitive") + return + } + + e.unaccent = count > 0 + if e.unaccent { + log.Info().Msg("postgres unaccent extension available; search is accent-insensitive") + } else { + log.Info().Msg("postgres unaccent extension not available; search will be accent-sensitive (install it with: CREATE EXTENSION unaccent)") + } + }) + return e.unaccent +} + +// escapeLike escapes the LIKE wildcards in a literal token so user input +// cannot inject wildcard matching. Backslash is the escape character on both +// dialects (PostgreSQL's default; SQLite via an explicit ESCAPE clause). +func escapeLike(s string) string { + r := strings.NewReplacer(`\`, `\\`, `%`, `\%`, `_`, `\_`) + return r.Replace(s) +} diff --git a/backend/internal/data/search/database_test.go b/backend/internal/data/search/database_test.go new file mode 100644 index 000000000..2017449e6 --- /dev/null +++ b/backend/internal/data/search/database_test.go @@ -0,0 +1,98 @@ +package search + +import ( + "context" + "testing" + + "entgo.io/ent/dialect" + entsql "entgo.io/ent/dialect/sql" + "github.com/google/uuid" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "github.com/sysadminsmedia/homebox/backend/internal/data/ent/entity" + "github.com/sysadminsmedia/homebox/backend/internal/data/ent/predicate" +) + +// renderPredicate applies a predicate to a bare entity selector and returns +// the generated SQL and bound arguments. The end-to-end behavior against a +// real SQLite database is covered by the repo package tests; these tests pin +// the SQL shape per dialect, including the PostgreSQL form that cannot run in +// unit tests. +func renderPredicate(t *testing.T, dialectName string, p predicate.Entity) (string, []any) { + t.Helper() + s := entsql.Dialect(dialectName). + Select(entity.FieldID). + From(entsql.Table(entity.Table)) + p(s) + query, args := s.Query() + return query, args +} + +func TestDatabaseEngine_SQLiteSQL(t *testing.T) { + e := &DatabaseEngine{dialect: dialect.SQLite} + + pred, err := e.Predicate(context.Background(), uuid.Nil, "Straße") + require.NoError(t, err) + require.NotNil(t, pred) + + query, args := renderPredicate(t, dialect.SQLite, pred) + + // both sides folded: the column through hb_fold, the pattern in Go + assert.Contains(t, query, "hb_fold(`entities`.`name`) LIKE ? ESCAPE '\\'") + assert.Contains(t, args, "%strasse%") +} + +func TestDatabaseEngine_PostgresSQL(t *testing.T) { + // no db handle: unaccent probing reports unavailable, exercising the + // plain ILIKE fallback + e := &DatabaseEngine{dialect: dialect.Postgres} + + pred, err := e.Predicate(context.Background(), uuid.Nil, "café") + require.NoError(t, err) + require.NotNil(t, pred) + + query, args := renderPredicate(t, dialect.Postgres, pred) + + // ILIKE is Unicode case-insensitive natively; without unaccent the token + // keeps its accents so accented data still matches accented queries + assert.Contains(t, query, `"entities"."name" ILIKE $`) + assert.Contains(t, args, "%café%") +} + +func TestDatabaseEngine_PostgresUnaccentSQL(t *testing.T) { + e := &DatabaseEngine{dialect: dialect.Postgres, unaccent: true} + e.unaccentOnce.Do(func() {}) // mark probed + + pred, err := e.Predicate(context.Background(), uuid.Nil, "café") + require.NoError(t, err) + require.NotNil(t, pred) + + query, args := renderPredicate(t, dialect.Postgres, pred) + + assert.Contains(t, query, `unaccent("entities"."name") ILIKE unaccent($`) + assert.Contains(t, args, "%café%") +} + +func TestDatabaseEngine_EmptyQuery(t *testing.T) { + e := &DatabaseEngine{dialect: dialect.SQLite} + + pred, err := e.Predicate(context.Background(), uuid.Nil, " ") + require.NoError(t, err) + assert.Nil(t, pred) +} + +func TestDatabaseEngine_MultiTokenStructure(t *testing.T) { + e := &DatabaseEngine{dialect: dialect.SQLite} + + pred, err := e.Predicate(context.Background(), uuid.Nil, "red box") + require.NoError(t, err) + + query, args := renderPredicate(t, dialect.SQLite, pred) + + // one AND-ed group per token, each ORing all searched surfaces, + // including tag names and custom field values + assert.Contains(t, args, "%red%") + assert.Contains(t, args, "%box%") + assert.Contains(t, query, "`tags`") + assert.Contains(t, query, "`entity_fields`") +} diff --git a/backend/internal/data/search/search.go b/backend/internal/data/search/search.go new file mode 100644 index 000000000..85ca0540b --- /dev/null +++ b/backend/internal/data/search/search.go @@ -0,0 +1,59 @@ +// Package search provides the pluggable free-text search abstraction used by +// the entity repository. +// +// A search Engine translates a user-supplied query string into an ent +// predicate that selects the matching entities. The default engine +// (DriverDatabase) performs tokenized, case- and accent-insensitive matching +// directly in the database and works on both SQLite and PostgreSQL with no +// extra infrastructure. +// +// To add a new engine (e.g. Meilisearch or Elasticsearch): +// +// 1. Implement the Engine interface. An external engine typically queries +// its own index scoped to the group ID and returns +// entity.IDIn(matchedIDs...) as the predicate, which preserves the +// repository's filtering, pagination, and eager-loading behavior. +// 2. Keep the engine's index up to date by subscribing to entity mutations +// (the repositories publish events on the event bus). +// 3. Register a new driver constant and construction case in NewEngine, and +// document the driver value for HBOX_SEARCH_DRIVER. +package search + +import ( + "context" + "fmt" + "strings" + + "github.com/google/uuid" + "github.com/sysadminsmedia/homebox/backend/internal/data/ent" + "github.com/sysadminsmedia/homebox/backend/internal/data/ent/predicate" +) + +// Supported search drivers. +const ( + DriverDatabase = "database" +) + +// Engine translates free-text queries into entity predicates. +type Engine interface { + // Predicate returns an ent predicate selecting the entities within the + // given group that match the free-text query. A nil predicate (with nil + // error) means the query has no usable terms and no search filter should + // be applied. + // + // The caller is responsible for all non-search filtering (group, type, + // tags, pagination, ...); implementations must only express the text + // match itself. + Predicate(ctx context.Context, gid uuid.UUID, query string) (predicate.Entity, error) +} + +// NewEngine constructs the search engine selected by driver. An empty driver +// selects the database engine. +func NewEngine(driver string, db *ent.Client) (Engine, error) { + switch strings.ToLower(strings.TrimSpace(driver)) { + case "", DriverDatabase: + return NewDatabaseEngine(db), nil + default: + return nil, fmt.Errorf("unsupported search driver: %q (supported: %s)", driver, DriverDatabase) + } +} diff --git a/backend/internal/data/search/tokenize.go b/backend/internal/data/search/tokenize.go new file mode 100644 index 000000000..c7f46b270 --- /dev/null +++ b/backend/internal/data/search/tokenize.go @@ -0,0 +1,58 @@ +package search + +import ( + "strings" + "unicode" +) + +// maxTokens bounds the number of tokens a single query can expand into so a +// pathological query cannot generate an unbounded SQL statement. +const maxTokens = 8 + +// Tokenize splits a free-text query into match tokens. +// +// Tokens are separated by whitespace. A double-quoted span is kept together +// as a single token (without the quotes) so users can search for exact +// phrases, e.g. `red "tool box"` yields ["red", "tool box"]. Duplicate tokens +// are dropped, and at most maxTokens tokens are returned. +func Tokenize(query string) []string { + var ( + tokens []string + current strings.Builder + inQuotes bool + ) + + seen := make(map[string]struct{}) + flush := func() { + tok := current.String() + current.Reset() + if tok == "" { + return + } + if _, dup := seen[tok]; dup { + return + } + seen[tok] = struct{}{} + tokens = append(tokens, tok) + } + + for _, r := range query { + switch { + case r == '"': + if inQuotes { + flush() + } + inQuotes = !inQuotes + case !inQuotes && unicode.IsSpace(r): + flush() + default: + current.WriteRune(r) + } + } + flush() + + if len(tokens) > maxTokens { + tokens = tokens[:maxTokens] + } + return tokens +} diff --git a/backend/internal/data/search/tokenize_test.go b/backend/internal/data/search/tokenize_test.go new file mode 100644 index 000000000..4366385f0 --- /dev/null +++ b/backend/internal/data/search/tokenize_test.go @@ -0,0 +1,78 @@ +package search + +import ( + "strings" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestTokenize(t *testing.T) { + testCases := []struct { + name string + input string + expected []string + }{ + { + name: "single word", + input: "hammer", + expected: []string{"hammer"}, + }, + { + name: "multiple words", + input: "red tool box", + expected: []string{"red", "tool", "box"}, + }, + { + name: "extra whitespace", + input: " red \t tool\n", + expected: []string{"red", "tool"}, + }, + { + name: "quoted phrase", + input: `red "tool box"`, + expected: []string{"red", "tool box"}, + }, + { + name: "unterminated quote", + input: `red "tool box`, + expected: []string{"red", "tool box"}, + }, + { + name: "empty quotes ignored", + input: `red ""`, + expected: []string{"red"}, + }, + { + name: "duplicates removed", + input: "red red red", + expected: []string{"red"}, + }, + { + name: "empty input", + input: "", + expected: nil, + }, + { + name: "whitespace only", + input: " ", + expected: nil, + }, + { + name: "unicode words", + input: "Тестовий Запис", + expected: []string{"Тестовий", "Запис"}, + }, + { + name: "token count capped", + input: "t1 t2 t3 t4 t5 t6 t7 t8 t9 t10", + expected: strings.Fields("t1 t2 t3 t4 t5 t6 t7 t8"), + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + assert.Equal(t, tc.expected, Tokenize(tc.input)) + }) + } +} diff --git a/backend/internal/sys/config/conf.go b/backend/internal/sys/config/conf.go index e947ccb4a..bd77d52ed 100644 --- a/backend/internal/sys/config/conf.go +++ b/backend/internal/sys/config/conf.go @@ -62,6 +62,15 @@ type Config struct { Otel OTelConfig `yaml:"otel"` Auth AuthConfig `yaml:"auth"` Notifier NotifierConf `yaml:"notifier"` + Search SearchConf `yaml:"search"` +} + +// SearchConf selects and configures the free-text search engine. The default +// "database" driver searches directly in SQLite/PostgreSQL and needs no extra +// services; the driver abstraction exists so external engines (e.g. +// Meilisearch, Elasticsearch) can be added later. +type SearchConf struct { + Driver string `yaml:"driver" conf:"default:database"` } type Options struct { diff --git a/backend/pkgs/cgofreesqlite/sqlite.go b/backend/pkgs/cgofreesqlite/sqlite.go index c9faf7a80..d593ab668 100644 --- a/backend/pkgs/cgofreesqlite/sqlite.go +++ b/backend/pkgs/cgofreesqlite/sqlite.go @@ -10,7 +10,9 @@ package cgofreesqlite import ( "database/sql" "database/sql/driver" + "fmt" + "github.com/sysadminsmedia/homebox/backend/pkgs/textutils" "modernc.org/sqlite" ) @@ -35,6 +37,37 @@ func (d CGOFreeSqliteDriver) Open(name string) (conn driver.Conn, err error) { return conn, err } +// modernDriver returns modernc's package-level driver singleton (the instance +// it registers as "sqlite"). Functions registered through the sqlite package +// (like hb_fold below) are stored on that singleton only, so wrapping a fresh +// &sqlite.Driver{} would silently lose them. +func modernDriver() *sqlite.Driver { + db, err := sql.Open("sqlite", "") + if err != nil { + panic(err) + } + defer func() { _ = db.Close() }() + return db.Driver().(*sqlite.Driver) +} + func init() { //nolint:gochecknoinits - sql.Register("sqlite3", CGOFreeSqliteDriver{Driver: &sqlite.Driver{}}) + sql.Register("sqlite3", CGOFreeSqliteDriver{Driver: modernDriver()}) + + // hb_fold(text) folds its argument for case- and accent-insensitive + // comparison (full Unicode case folding + diacritic removal). SQLite's + // built-in lower()/LIKE only handle ASCII, which breaks search for + // Cyrillic, Greek, and other non-ASCII scripts. The search engine compares + // hb_fold(column) against patterns folded the same way in Go. + sqlite.MustRegisterDeterministicScalarFunction("hb_fold", 1, func(_ *sqlite.FunctionContext, args []driver.Value) (driver.Value, error) { + switch v := args[0].(type) { + case nil: + return nil, nil + case string: + return textutils.Fold(v), nil + case []byte: + return textutils.Fold(string(v)), nil + default: + return nil, fmt.Errorf("hb_fold: unsupported argument type %T", v) + } + }) } diff --git a/backend/pkgs/textutils/normalize.go b/backend/pkgs/textutils/normalize.go index f484f4d69..a48a18f04 100644 --- a/backend/pkgs/textutils/normalize.go +++ b/backend/pkgs/textutils/normalize.go @@ -1,9 +1,11 @@ +// Package textutils provides text normalization helpers used by the search +// system to implement case- and accent-insensitive matching across scripts. package textutils import ( - "strings" "unicode" + "golang.org/x/text/cases" "golang.org/x/text/runes" "golang.org/x/text/transform" "golang.org/x/text/unicode/norm" @@ -32,9 +34,15 @@ func RemoveAccents(text string) string { return result } -// NormalizeSearchQuery normalizes a search query for accent-insensitive matching. -// This function removes accents and converts to lowercase for consistent search behavior. -func NormalizeSearchQuery(query string) string { - normalized := RemoveAccents(query) - return strings.ToLower(normalized) +// Fold returns a canonical caseless, accent-less representation of text for +// search comparison. Two strings match case- and accent-insensitively iff +// their folded forms are equal (or one contains the other). +// +// Unicode case folding is used instead of lowercasing so that scripts with +// non-trivial case rules compare correctly (e.g. Greek final sigma "ς" and +// "σ" both fold to "σ", "Σ" included; Cyrillic "Тест" folds to "тест"). +// Folding can introduce new combining marks (e.g. "İ" folds to "i" + U+0307), +// so accents are stripped after folding as well as before. +func Fold(text string) string { + return RemoveAccents(cases.Fold().String(RemoveAccents(text))) } diff --git a/backend/pkgs/textutils/normalize_test.go b/backend/pkgs/textutils/normalize_test.go index 8e6bd2423..7627dcff8 100644 --- a/backend/pkgs/textutils/normalize_test.go +++ b/backend/pkgs/textutils/normalize_test.go @@ -113,7 +113,7 @@ func TestRemoveAccents(t *testing.T) { } } -func TestNormalizeSearchQuery(t *testing.T) { +func TestFold(t *testing.T) { testCases := []struct { name string input string @@ -139,13 +139,40 @@ func TestNormalizeSearchQuery(t *testing.T) { input: "Hello World", expected: "hello world", }, + { + // й decomposes to и + combining breve, which accent stripping + // removes — like ñ→n, both sides of a match fold the same way. + name: "Ukrainian Cyrillic uppercase", + input: "Тестовий Запис", + expected: "тестовии запис", + }, + { + name: "Greek uppercase with final sigma", + input: "Υπολογιστής", + expected: "υπολογιστησ", + }, + { + name: "Greek lowercase final and medial sigma fold identically", + input: "υπολογιστης σ ς", + expected: "υπολογιστησ σ σ", + }, + { + name: "German sharp s folds to ss", + input: "Straße", + expected: "strasse", + }, + { + name: "Turkish dotted capital I", + input: "İstanbul", + expected: "istanbul", + }, } for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { - result := NormalizeSearchQuery(tc.input) + result := Fold(tc.input) if result != tc.expected { - t.Errorf("NormalizeSearchQuery(%q) = %q, expected %q", tc.input, result, tc.expected) + t.Errorf("Fold(%q) = %q, expected %q", tc.input, result, tc.expected) } }) } diff --git a/docs/public/api/openapi-3.0.json b/docs/public/api/openapi-3.0.json index a16bb34b1..17b3e3ba9 100644 --- a/docs/public/api/openapi-3.0.json +++ b/docs/public/api/openapi-3.0.json @@ -245,7 +245,7 @@ "summary": "Query All Entities", "parameters": [ { - "description": "search string", + "description": "search string; matches names, descriptions, serial/model numbers, manufacturers, notes, purchase sources, tag names, and custom field values. Use # to look up by asset ID and double quotes for exact phrases", "name": "q", "in": "query", "schema": { @@ -280,6 +280,14 @@ } } }, + { + "description": "require all selected tags to match (AND) instead of any (OR)", + "name": "matchAllTags", + "in": "query", + "schema": { + "type": "boolean" + } + }, { "description": "parent Ids", "name": "parentIds", diff --git a/docs/public/api/openapi-3.0.yaml b/docs/public/api/openapi-3.0.yaml index 867abc4fa..21c61e8fd 100644 --- a/docs/public/api/openapi-3.0.yaml +++ b/docs/public/api/openapi-3.0.yaml @@ -144,7 +144,10 @@ paths: - Entities summary: Query All Entities parameters: - - description: search string + - description: "search string; matches names, descriptions, serial/model numbers, + manufacturers, notes, purchase sources, tag names, and custom field + values. Use # to look up by asset ID and double quotes for + exact phrases" name: q in: query schema: @@ -167,6 +170,11 @@ paths: type: array items: type: string + - description: require all selected tags to match (AND) instead of any (OR) + name: matchAllTags + in: query + schema: + type: boolean - description: parent Ids name: parentIds in: query diff --git a/docs/public/api/swagger-2.0.json b/docs/public/api/swagger-2.0.json index 257fa81d1..75346ddf8 100644 --- a/docs/public/api/swagger-2.0.json +++ b/docs/public/api/swagger-2.0.json @@ -244,7 +244,7 @@ "parameters": [ { "type": "string", - "description": "search string", + "description": "search string; matches names, descriptions, serial/model numbers, manufacturers, notes, purchase sources, tag names, and custom field values. Use #\u003cassetId\u003e to look up by asset ID and double quotes for exact phrases", "name": "q", "in": "query" }, @@ -270,6 +270,12 @@ "name": "tags", "in": "query" }, + { + "type": "boolean", + "description": "require all selected tags to match (AND) instead of any (OR)", + "name": "matchAllTags", + "in": "query" + }, { "type": "array", "items": { diff --git a/docs/public/api/swagger-2.0.yaml b/docs/public/api/swagger-2.0.yaml index 6332c8ec5..39e9ea703 100644 --- a/docs/public/api/swagger-2.0.yaml +++ b/docs/public/api/swagger-2.0.yaml @@ -2474,7 +2474,9 @@ paths: /v1/entities: get: parameters: - - description: search string + - description: 'search string; matches names, descriptions, serial/model numbers, + manufacturers, notes, purchase sources, tag names, and custom field values. + Use # to look up by asset ID and double quotes for exact phrases' in: query name: q type: string @@ -2493,6 +2495,10 @@ paths: type: string name: tags type: array + - description: require all selected tags to match (AND) instead of any (OR) + in: query + name: matchAllTags + type: boolean - collectionFormat: multi description: parent Ids in: query diff --git a/docs/src/content/docs/en/quick-start/configure/database.mdx b/docs/src/content/docs/en/quick-start/configure/database.mdx index 36c02894d..1ec1dda69 100644 --- a/docs/src/content/docs/en/quick-start/configure/database.mdx +++ b/docs/src/content/docs/en/quick-start/configure/database.mdx @@ -53,3 +53,18 @@ Optionally, you can also add the following environment variable to the Homebox s - `HBOX_DATABASE_SSL_ROOT_CERT=path/to/root.crt` (path to the root certificate file) - `HBOX_DATABASE_SSL_CERT=path/to/server.crt` (path to the server certificate file) - `HBOX_DATABASE_SSL_KEY=path/to/server.key` (path to the server key file) + +## Accent-insensitive search on PostgreSQL + +Search is case-insensitive on both databases. To make it accent-insensitive on +PostgreSQL as well (e.g. so `cafe` matches `café`), Homebox uses the standard +[`unaccent`](https://www.postgresql.org/docs/current/unaccent.html) extension. +Homebox tries to enable it automatically on startup; if the database user +lacks the privilege to create extensions, enable it once as a superuser: + +```sql +CREATE EXTENSION IF NOT EXISTS unaccent; +``` + +Without the extension, search still works but treats accented characters as +distinct. SQLite needs no setup — accent-insensitive search is built in. diff --git a/docs/src/content/docs/en/quick-start/configure/index.mdx b/docs/src/content/docs/en/quick-start/configure/index.mdx index 640a124a3..7f45c2e39 100644 --- a/docs/src/content/docs/en/quick-start/configure/index.mdx +++ b/docs/src/content/docs/en/quick-start/configure/index.mdx @@ -45,6 +45,7 @@ import {Tabs, TabItem} from "@astrojs/starlight/components"; | HBOX_OPTIONS_ALLOW_LOCAL_LOGIN | true | allow users to login with username/password when OIDC is enabled | | HBOX_OPTIONS_TRUST_PROXY | false | trust proxy headers for determining request scheme (X-Forwarded-Proto) | | HBOX_OPTIONS_HOSTNAME | | override hostname used for OIDC redirect URLs and other absolute URLs | +| HBOX_SEARCH_DRIVER | database | selects the free-text search engine. `database` searches directly in SQLite/PostgreSQL (Unicode case-insensitive and accent-insensitive) and needs no extra services. The setting exists so external engines (e.g. Meilisearch) can be added in the future. | | HBOX_AUTH_API_KEY_PEPPER | | **Required.** Server-side secret HMAC-keyed into stored API key hashes; the binary refuses to start if this is shorter than 32 bytes. Generate with `openssl rand -base64 48`. Must stay stable across restarts — rotating it invalidates every issued API key. | | HBOX_AUTH_RATE_LIMIT_ENABLED | true | enable rate limiting for authentication attempts | | HBOX_AUTH_RATE_LIMIT_MAX_ATTEMPTS | 5 | maximum number of failed authentication attempts before rate limiting | diff --git a/frontend/lib/api/classes/items.ts b/frontend/lib/api/classes/items.ts index 64fa3f308..cbe14e141 100644 --- a/frontend/lib/api/classes/items.ts +++ b/frontend/lib/api/classes/items.ts @@ -25,6 +25,7 @@ export type ItemsQuery = { parentIds?: string[]; tags?: string[]; negateTags?: boolean; + matchAllTags?: boolean; onlyWithoutPhoto?: boolean; onlyWithPhoto?: boolean; q?: string; diff --git a/frontend/locales/en.json b/frontend/locales/en.json index 5aac53f06..c91f1654b 100644 --- a/frontend/locales/en.json +++ b/frontend/locales/en.json @@ -555,6 +555,7 @@ "manual": "Manual", "manuals": "Manuals", "manufacturer": "Manufacturer", + "match_all_tags": "Require All Selected Tags", "model_number": "Model Number", "name": "Name", "negate_tags": "Negate Selected Tags", @@ -593,6 +594,7 @@ "sync_child_locations": "Sync child items' locations", "tip_1": "Location and tag filters use the 'OR' operation. If more than one is selected only one will be\n required for a match.", "tip_2": "Searches prefixed with '#'' will query for a asset ID (example '#000-001')", + "tip_4": "Search matches names, descriptions, serial and model numbers, manufacturers, notes, purchase sources, tag names, and custom field values. Every word must match somewhere; wrap words in double quotes to match an exact phrase.", "tip_3": "Field filters use the 'OR' operation. If more than one is selected only one will be required for a\n match.", "tips": "Tips", "tips_sub": "Search Tips", diff --git a/frontend/pages/items.vue b/frontend/pages/items.vue index 99762bb24..26f1796ef 100644 --- a/frontend/pages/items.vue +++ b/frontend/pages/items.vue @@ -85,6 +85,7 @@ const includeArchived = useOptionalRouteQuery("archived", false); const fieldSelector = useOptionalRouteQuery("fieldSelector", false); const negateTags = useOptionalRouteQuery("negateTags", false); + const matchAllTags = useOptionalRouteQuery("matchAllTags", false); const onlyWithoutPhoto = useOptionalRouteQuery("onlyWithoutPhoto", false); const onlyWithPhoto = useOptionalRouteQuery("onlyWithPhoto", false); const orderBy = useOptionalRouteQuery("orderBy", "name"); @@ -207,6 +208,12 @@ } }); + watch(matchAllTags, (newV, oldV) => { + if (newV !== oldV) { + search(); + } + }); + watch(onlyWithoutPhoto, (newV, oldV) => { if (newV && onlyWithPhoto.value) { // this triggers the watch on onlyWithPhoto @@ -275,6 +282,7 @@ archived: includeArchived.value, fieldSelector: fieldSelector.value, negateTags: negateTags.value, + matchAllTags: matchAllTags.value, onlyWithoutPhoto: onlyWithoutPhoto.value, onlyWithPhoto: onlyWithPhoto.value, orderBy: orderBy.value, @@ -312,6 +320,7 @@ parentIds: locIDs.value, tags: tagIDs.value, negateTags: negateTags.value, + matchAllTags: matchAllTags.value, onlyWithoutPhoto: onlyWithoutPhoto.value, onlyWithPhoto: onlyWithPhoto.value, includeArchived: includeArchived.value, @@ -426,6 +435,11 @@
{{ $t("items.negate_tags") }} +