From 81976c5fa417290a5d0ddcf3999e4d2075132413 Mon Sep 17 00:00:00 2001 From: Agampreet Singh Date: Fri, 13 Mar 2026 02:35:34 +0530 Subject: [PATCH] Fix: Enable qualifier searches for unnamed POIs near small places (#3750) --- src/nominatim_api/search/db_search_builder.py | 42 +++++ .../search/db_searches/__init__.py | 1 + .../search/db_searches/qualifier_search.py | 157 ++++++++++++++++++ test/bdd/features/api/search/queries.feature | 10 ++ .../features/db/query/search_simple.feature | 16 ++ test/bdd/test_db.py | 11 ++ .../api/search/test_db_search_builder.py | 9 +- 7 files changed, 243 insertions(+), 3 deletions(-) create mode 100644 src/nominatim_api/search/db_searches/qualifier_search.py diff --git a/src/nominatim_api/search/db_search_builder.py b/src/nominatim_api/search/db_search_builder.py index 66bd592b0..bd43f4827 100644 --- a/src/nominatim_api/search/db_search_builder.py +++ b/src/nominatim_api/search/db_search_builder.py @@ -103,6 +103,15 @@ def build(self, assignment: TokenAssignment) -> Iterator[dbs.AbstractSearch]: builder = self.build_special_search(sdata, assignment.address, bool(near_items)) else: + if (sdata.qualifiers and not near_items and + not self.details.categories): + near_builder = self._build_qualifier_address_search( + sdata, + assignment.name, assignment.address, + assignment.penalty) + for search in near_builder: + yield search + builder = self.build_name_search(sdata, assignment.name, assignment.address, bool(near_items)) @@ -119,6 +128,39 @@ def build(self, assignment: TokenAssignment) -> Iterator[dbs.AbstractSearch]: search.penalty += assignment.penalty yield search + def _build_qualifier_address_search(self, sdata: dbf.SearchData, + name: qmod.TokenRange, + address: List[qmod.TokenRange], + base_penalty: float + ) -> Iterator[dbs.AbstractSearch]: + """ Build a QualifierNearSearch for queries like 'Kingston pub' where + a qualifier category is combined with an address but no explicit + name. Searches for the address as a place, then finds POIs of the + given category nearby. + + Temporarily modifies sdata in place and restores it before + returning. + """ + categories = sdata.qualifiers + penalty = min(categories.penalties) + categories.penalties = [p - penalty for p in categories.penalties] + + sdata.qualifiers = dbf.WeightedCategories([], []) + saved_rankings = sdata.rankings + sdata.rankings = list(saved_rankings) + + for search in self.build_name_search(sdata, name, address, + is_category=False): + if isinstance(search, dbs.PlaceSearch): + search_penalty = search.penalty + search.penalty = 0.0 + yield dbs.QualifierNearSearch( + penalty + base_penalty + search_penalty, + categories, search) + + sdata.qualifiers = categories + sdata.rankings = saved_rankings + def build_poi_search(self, sdata: dbf.SearchData) -> Iterator[dbs.AbstractSearch]: """ Build abstract search query for a simple category search. This kind of search requires an additional geographic constraint. diff --git a/src/nominatim_api/search/db_searches/__init__.py b/src/nominatim_api/search/db_searches/__init__.py index 1e3b6119e..c2b36d438 100644 --- a/src/nominatim_api/search/db_searches/__init__.py +++ b/src/nominatim_api/search/db_searches/__init__.py @@ -15,3 +15,4 @@ from .postcode_search import PostcodeSearch as PostcodeSearch from .place_search import PlaceSearch as PlaceSearch from .address_search import AddressSearch as AddressSearch +from .qualifier_search import QualifierNearSearch as QualifierNearSearch diff --git a/src/nominatim_api/search/db_searches/qualifier_search.py b/src/nominatim_api/search/db_searches/qualifier_search.py new file mode 100644 index 000000000..20365becf --- /dev/null +++ b/src/nominatim_api/search/db_searches/qualifier_search.py @@ -0,0 +1,157 @@ +# SPDX-License-Identifier: GPL-3.0-or-later +# +# This file is part of Nominatim. (https://nominatim.org) +# +# Copyright (C) 2026 by the Nominatim developer community. +# For a full list of authors see the git log. +""" +Implementation of search for qualifier+address queries like 'Kingston pub'. + +Finds a place by name, then searches for nearby POIs of the given category. +The inner place search is restricted to actual places (not POIs) and the +near lookup only proceeds when there is exactly one matching place. +""" +from typing import List, Tuple + +import sqlalchemy as sa + +from . import base +from ...typing import SaBind +from ...types import SearchDetails, Bbox +from ...connection import SearchConnection +from ... import results as nres +from ..db_search_fields import WeightedCategories + + +LIMIT_PARAM: SaBind = sa.bindparam('limit') +MIN_RANK_PARAM: SaBind = sa.bindparam('min_rank') +MAX_RANK_PARAM: SaBind = sa.bindparam('max_rank') +COUNTRIES_PARAM: SaBind = sa.bindparam('countries') + + +class QualifierNearSearch(base.AbstractSearch): + """ Search for unnamed POIs of a qualifier category near a small place. + + This handles queries like 'Kingston pub' where a qualifier word + (pub) is combined with an address (Kingston) but there is no + explicit name. The inner search finds the place, and the outer + search finds nearby POIs of the qualifier category. + + Unlike NearSearch, this search: + - restricts the inner results to actual places (rank_address < 30) + - only proceeds when there is a single matching place + - applies penalties for named POIs and for multiple category results + """ + def __init__(self, penalty: float, categories: WeightedCategories, + search: base.AbstractSearch) -> None: + super().__init__(penalty) + self.search = search + self.categories = categories + + async def lookup(self, conn: SearchConnection, + details: SearchDetails) -> nres.SearchResults: + """ Find results for the search in the database. + """ + results = nres.SearchResults() + base_results = await self.search.lookup(conn, details) + + if not base_results: + return results + + base_results.sort(key=lambda r: (r.accuracy, r.rank_search)) + max_accuracy = base_results[0].accuracy + 0.5 + + # Restrict to actual places (rank_address < 30), not POIs. + base_results = nres.SearchResults( + r for r in base_results + if (r.source_table == nres.SourceTable.PLACEX + and r.accuracy <= max_accuracy + and r.bbox and r.bbox.area < 5.0 + and r.rank_address >= 1 + and r.rank_address < 30)) + + # Only proceed if there is exactly one matching place. + if len(base_results) != 1: + return results + + baseids = [b.place_id for b in base_results if b.place_id] + + for category, penalty in self.categories: + await self.lookup_category(results, conn, baseids, category, penalty, details) + if len(results) >= details.max_results: + break + + return results + + async def lookup_category(self, results: nres.SearchResults, + conn: SearchConnection, ids: List[int], + category: Tuple[str, str], penalty: float, + details: SearchDetails) -> None: + """ Find places of the given category near the list of + place ids and add the results to 'results'. + """ + table = await conn.get_class_table(*category) + + tgeom = conn.t.placex.alias('pgeom') + + if table is None: + # No classtype table available, do a simplified lookup in placex. + table = conn.t.placex + sql = sa.select(table.c.place_id, + sa.func.min(tgeom.c.centroid.ST_Distance(table.c.centroid)) + .label('dist'))\ + .join(tgeom, table.c.geometry.intersects(tgeom.c.centroid.ST_Expand(0.01)))\ + .where(table.c.class_ == category[0])\ + .where(table.c.type == category[1]) + else: + # Use classtype table. We can afford to use a larger + # radius for the lookup. + sql = sa.select(table.c.place_id, + sa.func.min(tgeom.c.centroid.ST_Distance(table.c.centroid)) + .label('dist'))\ + .join(tgeom, + table.c.centroid.ST_CoveredBy( + sa.case((sa.and_(tgeom.c.rank_address > 9, + tgeom.c.geometry.is_area()), + tgeom.c.geometry), + else_=tgeom.c.centroid.ST_Expand(0.05)))) + + inner = sql.where(tgeom.c.place_id.in_(ids))\ + .group_by(table.c.place_id).subquery() + + t = conn.t.placex + sql = base.select_placex(t).add_columns((-inner.c.dist).label('importance'))\ + .join(inner, inner.c.place_id == t.c.place_id)\ + .order_by(inner.c.dist) + + sql = sql.where(base.no_index(t.c.rank_address).between(MIN_RANK_PARAM, MAX_RANK_PARAM)) + if details.countries: + sql = sql.where(t.c.country_code.in_(COUNTRIES_PARAM)) + if details.excluded: + sql = sql.where(base.exclude_places(t)) + if details.layers is not None: + sql = sql.where(base.filter_by_layer(t, details.layers)) + + sql = sql.limit(LIMIT_PARAM) + + bind_params = {'limit': details.max_results, + 'min_rank': details.min_rank, + 'max_rank': details.max_rank, + 'excluded': details.excluded, + 'countries': details.countries} + new_results = [] + for row in await conn.execute(sql, bind_params): + result = nres.create_from_placex_row(row, nres.SearchResult) + result.accuracy = self.penalty + penalty + # Penalize named POIs: unnamed POIs are expected here. + if result.names: + result.accuracy += 0.4 + result.bbox = Bbox.from_wkb(row.bbox) + new_results.append(result) + + # Penalize when there are multiple results of this category. + if len(new_results) > 1: + for result in new_results: + result.accuracy += 0.2 + + results.extend(new_results) diff --git a/test/bdd/features/api/search/queries.feature b/test/bdd/features/api/search/queries.feature index 8453b53bb..61cad15f6 100644 --- a/test/bdd/features/api/search/queries.feature +++ b/test/bdd/features/api/search/queries.feature @@ -210,3 +210,13 @@ Feature: Search queries Then result 0 contains | address+town | | Vaduz | + + # github #3750 + Scenario: Qualifier search finds unnamed POI near a place + When geocoding "[amenity=restaurant] Vaduz" + Then all results contain + | category | type | + | amenity | restaurant | + And result 0 contains + | address+town | + | Vaduz | diff --git a/test/bdd/features/db/query/search_simple.feature b/test/bdd/features/db/query/search_simple.feature index e5e771f79..7d3fe9e6c 100644 --- a/test/bdd/features/db/query/search_simple.feature +++ b/test/bdd/features/db/query/search_simple.feature @@ -81,6 +81,22 @@ Feature: Searching of simple objects | Auburn | Alabama | AL | | New Orleans | Louisiana | LA | + Scenario: Qualifier search finds unnamed POI near a small place + Given the places + | osm | class | type | name+name | geometry | + | N1 | place | village | Kingston | 10.0 -10.0 | + And the places + | osm | class | type | geometry | + | N2 | amenity | pub | 10.0001 -10.0001 | + And the special phrases + | phrase | class | type | operator | + | Pub | amenity | pub | - | + When importing + And geocoding "Kingston pub" + Then result 0 contains + | object | category | type | + | N2 | amenity | pub | + # github #3210 Scenario: Country with alternate-language name does not dominate when locale differs Given the 1.0 grid with origin DE diff --git a/test/bdd/test_db.py b/test/bdd/test_db.py index 47a995787..20668d8b5 100644 --- a/test/bdd/test_db.py +++ b/test/bdd/test_db.py @@ -219,6 +219,17 @@ def import_rels(row_factory, datatable): members=psycopg.types.json.Json(members)) +@given('the special phrases', target_fixture=None) +def add_special_phrases(def_config, datatable): + """ Add special phrases to the tokenizer for use in search queries. + Expects a table with columns: phrase, class, type, operator. + """ + tokenizer = tokenizer_factory.get_tokenizer_for_db(def_config) + phrases = [(row[0], row[1], row[2], row[3]) for row in datatable[1:]] + with tokenizer.name_analyzer() as analyzer: + analyzer.update_special_phrases(phrases, False) + + @when('importing', target_fixture='place_ids') def do_import(db_conn, def_config): """ Run a reduced version of the Nominatim import. diff --git a/test/python/api/search/test_db_search_builder.py b/test/python/api/search/test_db_search_builder.py index 0380d0417..261384d89 100644 --- a/test/python/api/search/test_db_search_builder.py +++ b/test/python/api/search/test_db_search_builder.py @@ -225,10 +225,13 @@ def test_name_with_qualifier(): searches = list(builder.build(TokenAssignment(name=TokenRange(0, 1), qualifier=TokenRange(1, 2)))) - assert len(searches) == 1 - search = searches[0] + assert len(searches) == 2 + + assert isinstance(searches[0], dbs.QualifierNearSearch) + assert isinstance(searches[1], dbs.PlaceSearch) + + search = searches[1] - assert isinstance(search, dbs.PlaceSearch) assert not search.postcodes.values assert not search.countries.values assert search.qualifiers.values == [('this', 'that')]