diff --git a/CHANGELOG.md b/CHANGELOG.md index cbaa1e34..5cf6e6c4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,8 @@ # Changelog +## Unreleased +- Add `path:` filter for substring matching against an item's full absolute path. Multiple `path:` filters combine with AND (e.g. `main.js path:Downloads path:repos`). + ## 0.1.23 — 2026-03-25 - Reduce power consumption by expanding the default ignored paths to cover more macOS cache, log, metadata, and runtime directories. - Further reduce background work by making the filesystem event watcher honor ignored paths. diff --git a/Cargo.lock b/Cargo.lock index 5b356254..91a134b8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -418,6 +418,14 @@ dependencies = [ "objc2", ] +[[package]] +name = "e2e-tests" +version = "0.1.0" +dependencies = [ + "search-cache", + "search-cancel", +] + [[package]] name = "either" version = "1.15.0" diff --git a/Cargo.toml b/Cargo.toml index 04f54a94..9df457b6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,5 +12,6 @@ members = [ "cardinal-syntax", "search-cancel", "slab-mmap", + "e2e-tests", ] exclude = ["cardinal"] diff --git a/cardinal-syntax/src/lib.rs b/cardinal-syntax/src/lib.rs index 192e4c8e..52d2169f 100644 --- a/cardinal-syntax/src/lib.rs +++ b/cardinal-syntax/src/lib.rs @@ -137,7 +137,7 @@ fn reorder_by_priority(parts: &mut Vec) { let priority = |expr: &Expr| -> u8 { match expr { Expr::Term(Term::Filter(filter)) => match filter.kind { - FilterKind::InFolder | FilterKind::Parent => 0, + FilterKind::InFolder | FilterKind::Parent | FilterKind::Path => 0, FilterKind::Tag => 3, _ => 2, }, @@ -355,6 +355,15 @@ pub enum FilterKind { /// assert!(matches!(filter.kind, FilterKind::InFolder)); /// ``` InFolder, + /// Restrict to items whose full path contains the argument as a substring + /// (`path:`). Multiple `path:` filters combine with AND, each narrowing the + /// result set further. Matching respects the UI case-sensitivity toggle. + /// ``` + /// use cardinal_syntax::{parse_query, Expr, Term, FilterKind}; + /// let Expr::Term(Term::Filter(filter)) = parse_query("path:repos").unwrap().expr else { panic!() }; + /// assert!(matches!(filter.kind, FilterKind::Path)); + /// ``` + Path, /// Limit to the folder itself (`nosubfolders:`). /// ``` /// use cardinal_syntax::{parse_query, Expr, Term, FilterKind}; @@ -551,6 +560,7 @@ impl FilterKind { "dr" | "daterun" => FilterKind::DateRun, "parent" => FilterKind::Parent, "infolder" | "in" => FilterKind::InFolder, + "path" => FilterKind::Path, "nosubfolders" => FilterKind::NoSubfolders, "child" => FilterKind::Child, "attrib" => FilterKind::Attribute, diff --git a/cardinal-syntax/tests/filter_kinds_coverage.rs b/cardinal-syntax/tests/filter_kinds_coverage.rs index a63e5375..ad349ba4 100644 --- a/cardinal-syntax/tests/filter_kinds_coverage.rs +++ b/cardinal-syntax/tests/filter_kinds_coverage.rs @@ -34,6 +34,7 @@ fn maps_known_filter_names() { ("daterun", FilterKind::DateRun), ("parent", FilterKind::Parent), ("infolder", FilterKind::InFolder), + ("path", FilterKind::Path), ("nosubfolders", FilterKind::NoSubfolders), ("child", FilterKind::Child), ("attrib", FilterKind::Attribute), diff --git a/cardinal/src-tauri/src/background.rs b/cardinal/src-tauri/src/background.rs index 9c898aab..b506613d 100644 --- a/cardinal/src-tauri/src/background.rs +++ b/cardinal/src-tauri/src/background.rs @@ -12,7 +12,8 @@ use once_cell::sync::Lazy; use parking_lot::Mutex; use rayon::spawn; use search_cache::{ - HandleFSEError, SearchCache, SearchOptions, SearchResultNode, SlabIndex, WalkData, + HandleFSEError, SearchCache, SearchOptions, SearchOutcome, SearchResultNode, SlabIndex, + WalkData, }; use search_cancel::CancellationToken; use serde::Serialize; @@ -30,6 +31,9 @@ pub struct StatusBarUpdate { pub scanned_files: usize, pub processed_events: usize, pub rescan_errors: usize, + /// Human-readable status message (e.g. "Walking filesystem…", "Indexing…"). + #[serde(skip_serializing_if = "Option::is_none")] + pub status_message: Option, } #[derive(Serialize, Clone)] @@ -58,6 +62,7 @@ pub fn reset_status_bar(app_handle: &AppHandle) { scanned_files: 0, processed_events: 0, rescan_errors: 0, + status_message: None, }, ) .unwrap(); @@ -68,6 +73,22 @@ pub fn emit_status_bar_update( scanned_files: usize, processed_events: usize, rescan_errors: usize, +) { + emit_status_bar_update_with_message( + app_handle, + scanned_files, + processed_events, + rescan_errors, + None, + ); +} + +pub fn emit_status_bar_update_with_message( + app_handle: &AppHandle, + scanned_files: usize, + processed_events: usize, + rescan_errors: usize, + status_message: Option<&str>, ) { static LAST_EMIT: Lazy> = Lazy::new(|| Mutex::new(Instant::now() - Duration::from_secs(1))); @@ -84,6 +105,7 @@ pub fn emit_status_bar_update( scanned_files, processed_events, rescan_errors, + status_message: status_message.map(|s| s.to_string()), }, ) .unwrap(); @@ -329,6 +351,28 @@ pub fn run_background_event_loop( let flush_ticker = crossbeam_channel::tick(Duration::from_secs(10)); loop { + // Prioritize search requests over FS event processing so the UI + // stays responsive even when there's a large FS event backlog. + // Drain all pending search jobs, keeping only the latest (older + // ones are already cancelled by CancellationToken::new_search()). + if let Ok(mut latest_job) = search_rx.try_recv() { + while let Ok(newer) = search_rx.try_recv() { + // Send cancelled result for the superseded job. + let _ = latest_job.result_tx.send(Ok(SearchOutcome::cancelled())); + latest_job = newer; + } + let SearchJob { + query, + options, + cancellation_token, + result_tx, + } = latest_job; + let opts = SearchOptions::from(options); + let payload = cache.search_query_with_options(query, opts, cancellation_token); + result_tx.send(payload).expect("Failed to send result"); + continue; + } + crossbeam_channel::select! { recv(finish_rx) -> tx => { let tx = tx.expect("Finish channel closed"); @@ -435,11 +479,8 @@ pub(crate) fn build_search_cache( std::thread::scope(|s| { s.spawn(|| { while !walking_done.load(Ordering::Relaxed) { - let dirs = walk_data.num_dirs.load(Ordering::Relaxed); - let files = walk_data.num_files.load(Ordering::Relaxed); - let total = dirs + files; - emit_status_bar_update(app_handle, total, 0, 0); - std::thread::sleep(Duration::from_millis(100)); + emit_status_bar_update_with_message(app_handle, 0, 0, 0, Some("Indexing…")); + std::thread::sleep(Duration::from_millis(500)); } }); let cache = SearchCache::walk_fs_with_walk_data(&walk_data, &APP_QUIT); @@ -483,11 +524,8 @@ fn perform_rescan( let stopped = std::thread::scope(|s| { s.spawn(|| { while !walking_done.load(Ordering::Relaxed) { - let dirs = walk_data.num_dirs.load(Ordering::Relaxed); - let files = walk_data.num_files.load(Ordering::Relaxed); - let total = dirs + files; - emit_status_bar_update(app_handle, total, 0, 0); - std::thread::sleep(Duration::from_millis(100)); + emit_status_bar_update_with_message(app_handle, 0, 0, 0, Some("Indexing…")); + std::thread::sleep(Duration::from_millis(500)); } }); // If rescan is cancelled, we have nothing to do diff --git a/cardinal/src/App.tsx b/cardinal/src/App.tsx index 16cd5e68..2a35dd3d 100644 --- a/cardinal/src/App.tsx +++ b/cardinal/src/App.tsx @@ -49,6 +49,7 @@ function App() { scannedFiles, processedEvents, rescanErrors, + statusMessage, currentQuery, currentDirectoryQuery, highlightTerms, @@ -439,6 +440,7 @@ function App() { onTabChange={onTabChange} onRequestRescan={requestRescan} rescanErrorCount={rescanErrors} + statusMessage={statusMessage} /> void; onRequestRescan: () => void; rescanErrorCount: number; + statusMessage: string | null; }; const TABS: StatusTabKey[] = ['files', 'events']; @@ -36,6 +37,7 @@ const StatusBar = ({ onTabChange, onRequestRescan, rescanErrorCount, + statusMessage, }: StatusBarProps): React.JSX.Element => { const { t } = useTranslation(); const tabsRef = useRef(null); @@ -116,7 +118,7 @@ const StatusBar = ({ > {lifecycleMeta.icon} - {lifecycleLabel} + {statusMessage ?? lifecycleLabel}
; focusAndSelectSearchInput: () => void; - handleStatusUpdate: (scannedFiles: number, processedEvents: number, rescanErrors: number) => void; + handleStatusUpdate: ( + scannedFiles: number, + processedEvents: number, + rescanErrors: number, + statusMessage?: string, + ) => void; setLifecycleState: (status: 'Initializing' | 'Updating' | 'Ready') => void; submitFilesQuery: (query: string, options?: { immediate?: boolean }) => void; setEventFilterQuery: (query: string) => void; @@ -113,7 +118,7 @@ describe('useAppWindowListeners', () => { act(() => { statusCallback?.({ scannedFiles: 11, processedEvents: 22, rescanErrors: 3 }); }); - expect(handleStatusUpdate).toHaveBeenCalledWith(11, 22, 3); + expect(handleStatusUpdate).toHaveBeenCalledWith(11, 22, 3, undefined); act(() => { lifecycleCallback?.('Ready'); diff --git a/cardinal/src/hooks/useAppWindowListeners.ts b/cardinal/src/hooks/useAppWindowListeners.ts index d6ffb1fc..e9af0fff 100644 --- a/cardinal/src/hooks/useAppWindowListeners.ts +++ b/cardinal/src/hooks/useAppWindowListeners.ts @@ -19,7 +19,12 @@ type UseAppWindowListenersOptions = { activeTab: StatusTabKey; searchInputRef: RefObject; focusAndSelectSearchInput: () => void; - handleStatusUpdate: (scannedFiles: number, processedEvents: number, rescanErrors: number) => void; + handleStatusUpdate: ( + scannedFiles: number, + processedEvents: number, + rescanErrors: number, + statusMessage?: string, + ) => void; setLifecycleState: (status: AppLifecycleStatus) => void; submitFilesQuery: (query: string, options?: QueueSearchOptions) => void; setEventFilterQuery: (value: string) => void; @@ -50,8 +55,8 @@ export function useAppWindowListeners({ }); useEffect(() => { const unlistenStatus = subscribeStatusBarUpdate((payload: StatusBarUpdatePayload) => { - const { scannedFiles, processedEvents, rescanErrors } = payload; - handleStatusUpdate(scannedFiles, processedEvents, rescanErrors); + const { scannedFiles, processedEvents, rescanErrors, statusMessage } = payload; + handleStatusUpdate(scannedFiles, processedEvents, rescanErrors, statusMessage); }); return unlistenStatus; }, [handleStatusUpdate]); diff --git a/cardinal/src/hooks/useFileSearch.ts b/cardinal/src/hooks/useFileSearch.ts index e9edf73f..1d640b90 100644 --- a/cardinal/src/hooks/useFileSearch.ts +++ b/cardinal/src/hooks/useFileSearch.ts @@ -17,6 +17,7 @@ type SearchState = { scannedFiles: number; processedEvents: number; rescanErrors: number; + statusMessage: string | null; currentQuery: string; currentDirectoryQuery: string; highlightTerms: string[]; @@ -45,7 +46,12 @@ type QueueSearchOptions = { type SearchAction = | { type: 'STATUS_UPDATE'; - payload: { scannedFiles: number; processedEvents: number; rescanErrors: number }; + payload: { + scannedFiles: number; + processedEvents: number; + rescanErrors: number; + statusMessage?: string; + }; } | { type: 'SEARCH_REQUEST'; payload: { immediate: boolean } } | { type: 'SEARCH_LOADING_DELAY' } @@ -76,6 +82,7 @@ const initialSearchState: SearchState = { scannedFiles: 0, processedEvents: 0, rescanErrors: 0, + statusMessage: null, currentQuery: '', currentDirectoryQuery: '', highlightTerms: [], @@ -132,6 +139,7 @@ function reducer(state: SearchState, action: SearchAction): SearchState { scannedFiles: action.payload.scannedFiles, processedEvents: action.payload.processedEvents, rescanErrors: action.payload.rescanErrors, + statusMessage: action.payload.statusMessage ?? null, }; case 'SEARCH_REQUEST': return { @@ -200,7 +208,12 @@ type UseFileSearchResult = { queueSearch: (query: string, options?: QueueSearchOptions) => void; queueDirectorySearch: (directoryQuery: string, options?: QueueSearchOptions) => void; queueDirectoryScopeOpen: (directoryScopeOpen: boolean) => void; - handleStatusUpdate: (scannedFiles: number, processedEvents: number, rescanErrors: number) => void; + handleStatusUpdate: ( + scannedFiles: number, + processedEvents: number, + rescanErrors: number, + statusMessage?: string, + ) => void; setLifecycleState: (status: AppLifecycleStatus) => void; requestRescan: () => Promise; }; @@ -234,10 +247,15 @@ export function useFileSearch(): UseFileSearchResult { }, []); const handleStatusUpdate = useCallback( - (scannedFiles: number, processedEvents: number, rescanErrors: number) => { + ( + scannedFiles: number, + processedEvents: number, + rescanErrors: number, + statusMessage?: string, + ) => { dispatch({ type: 'STATUS_UPDATE', - payload: { scannedFiles, processedEvents, rescanErrors }, + payload: { scannedFiles, processedEvents, rescanErrors, statusMessage }, }); }, [], diff --git a/cardinal/src/i18n/resources/ar-SA.json b/cardinal/src/i18n/resources/ar-SA.json index 3abea4ba..e6c92302 100644 --- a/cardinal/src/i18n/resources/ar-SA.json +++ b/cardinal/src/i18n/resources/ar-SA.json @@ -115,7 +115,8 @@ "maximize": "تكبير", "closeWindow": "إغلاق النافذة", "help": "مساعدة", - "getUpdates": "الحصول على التحديثات" + "getUpdates": "الحصول على التحديثات", + "searchSyntax": "بنية البحث" }, "preferences": { "title": "التفضيلات", diff --git a/cardinal/src/i18n/resources/de-DE.json b/cardinal/src/i18n/resources/de-DE.json index 021f41c9..8c8ef491 100644 --- a/cardinal/src/i18n/resources/de-DE.json +++ b/cardinal/src/i18n/resources/de-DE.json @@ -115,7 +115,8 @@ "maximize": "Zoomen", "closeWindow": "Fenster schließen", "help": "Hilfe", - "getUpdates": "Updates abrufen" + "getUpdates": "Updates abrufen", + "searchSyntax": "Suchsyntax" }, "preferences": { "title": "Einstellungen", diff --git a/cardinal/src/i18n/resources/en-US.json b/cardinal/src/i18n/resources/en-US.json index bd8065a1..931d524b 100644 --- a/cardinal/src/i18n/resources/en-US.json +++ b/cardinal/src/i18n/resources/en-US.json @@ -115,7 +115,8 @@ "maximize": "Zoom", "closeWindow": "Close Window", "help": "Help", - "getUpdates": "Get Updates" + "getUpdates": "Get Updates", + "searchSyntax": "Search Syntax" }, "preferences": { "title": "Preferences", diff --git a/cardinal/src/i18n/resources/es-ES.json b/cardinal/src/i18n/resources/es-ES.json index 3403a72b..4bd9910f 100644 --- a/cardinal/src/i18n/resources/es-ES.json +++ b/cardinal/src/i18n/resources/es-ES.json @@ -115,7 +115,8 @@ "maximize": "Zoom", "closeWindow": "Cerrar ventana", "help": "Ayuda", - "getUpdates": "Obtener actualizaciones" + "getUpdates": "Obtener actualizaciones", + "searchSyntax": "Sintaxis de búsqueda" }, "preferences": { "title": "Preferencias", diff --git a/cardinal/src/i18n/resources/fr-FR.json b/cardinal/src/i18n/resources/fr-FR.json index 6c190379..ef7127b5 100644 --- a/cardinal/src/i18n/resources/fr-FR.json +++ b/cardinal/src/i18n/resources/fr-FR.json @@ -115,7 +115,8 @@ "maximize": "Zoom", "closeWindow": "Fermer la fenêtre", "help": "Aide", - "getUpdates": "Obtenir les mises à jour" + "getUpdates": "Obtenir les mises à jour", + "searchSyntax": "Syntaxe de recherche" }, "preferences": { "title": "Préférences", diff --git a/cardinal/src/i18n/resources/hi-IN.json b/cardinal/src/i18n/resources/hi-IN.json index aad3774e..2ae8b190 100644 --- a/cardinal/src/i18n/resources/hi-IN.json +++ b/cardinal/src/i18n/resources/hi-IN.json @@ -105,7 +105,8 @@ "view": "दृश्य", "window": "विंडो", "help": "सहायता", - "getUpdates": "अपडेट प्राप्त करें" + "getUpdates": "अपडेट प्राप्त करें", + "searchSyntax": "खोज सिंटैक्स" }, "preferences": { "title": "प्राथमिकताएँ", diff --git a/cardinal/src/i18n/resources/it-IT.json b/cardinal/src/i18n/resources/it-IT.json index 045e09f6..a63590dc 100644 --- a/cardinal/src/i18n/resources/it-IT.json +++ b/cardinal/src/i18n/resources/it-IT.json @@ -115,7 +115,8 @@ "maximize": "Zoom", "closeWindow": "Chiudi finestra", "help": "Aiuto", - "getUpdates": "Ricevi aggiornamenti" + "getUpdates": "Ricevi aggiornamenti", + "searchSyntax": "Sintassi di ricerca" }, "preferences": { "title": "Preferenze", diff --git a/cardinal/src/i18n/resources/ja-JP.json b/cardinal/src/i18n/resources/ja-JP.json index 26ad1f4b..48f87be4 100644 --- a/cardinal/src/i18n/resources/ja-JP.json +++ b/cardinal/src/i18n/resources/ja-JP.json @@ -115,7 +115,8 @@ "maximize": "ズーム", "closeWindow": "ウインドウを閉じる", "help": "ヘルプ", - "getUpdates": "アップデートを入手" + "getUpdates": "アップデートを入手", + "searchSyntax": "検索構文" }, "preferences": { "title": "環境設定", diff --git a/cardinal/src/i18n/resources/ko-KR.json b/cardinal/src/i18n/resources/ko-KR.json index 9b53098e..d4e8c01f 100644 --- a/cardinal/src/i18n/resources/ko-KR.json +++ b/cardinal/src/i18n/resources/ko-KR.json @@ -115,7 +115,8 @@ "maximize": "줌", "closeWindow": "윈도우 닫기", "help": "도움말", - "getUpdates": "업데이트 받기" + "getUpdates": "업데이트 받기", + "searchSyntax": "검색 구문" }, "preferences": { "title": "환경설정", diff --git a/cardinal/src/i18n/resources/pt-BR.json b/cardinal/src/i18n/resources/pt-BR.json index 9b9f383c..083859de 100644 --- a/cardinal/src/i18n/resources/pt-BR.json +++ b/cardinal/src/i18n/resources/pt-BR.json @@ -115,7 +115,8 @@ "maximize": "Zoom", "closeWindow": "Fechar janela", "help": "Ajuda", - "getUpdates": "Obter atualizações" + "getUpdates": "Obter atualizações", + "searchSyntax": "Sintaxe de pesquisa" }, "preferences": { "title": "Preferências", diff --git a/cardinal/src/i18n/resources/ru-RU.json b/cardinal/src/i18n/resources/ru-RU.json index 275ff36e..45fed7a9 100644 --- a/cardinal/src/i18n/resources/ru-RU.json +++ b/cardinal/src/i18n/resources/ru-RU.json @@ -115,7 +115,8 @@ "maximize": "Масштабировать", "closeWindow": "Закрыть окно", "help": "Справка", - "getUpdates": "Получить обновления" + "getUpdates": "Получить обновления", + "searchSyntax": "Синтаксис поиска" }, "preferences": { "title": "Настройки", diff --git a/cardinal/src/i18n/resources/tr-TR.json b/cardinal/src/i18n/resources/tr-TR.json index 02fea62a..8813fc16 100644 --- a/cardinal/src/i18n/resources/tr-TR.json +++ b/cardinal/src/i18n/resources/tr-TR.json @@ -115,7 +115,8 @@ "maximize": "Yakınlaştır", "closeWindow": "Pencereyi kapat", "help": "Yardım", - "getUpdates": "Güncellemeleri al" + "getUpdates": "Güncellemeleri al", + "searchSyntax": "Arama Sözdizimi" }, "preferences": { "title": "Tercihler", diff --git a/cardinal/src/i18n/resources/uk-UA.json b/cardinal/src/i18n/resources/uk-UA.json index 977579d9..e092aca0 100644 --- a/cardinal/src/i18n/resources/uk-UA.json +++ b/cardinal/src/i18n/resources/uk-UA.json @@ -115,7 +115,8 @@ "maximize": "Збільшити", "closeWindow": "Закрити вікно", "help": "Довідка", - "getUpdates": "Отримати оновлення" + "getUpdates": "Отримати оновлення", + "searchSyntax": "Синтаксис пошуку" }, "preferences": { "title": "Налаштування", diff --git a/cardinal/src/i18n/resources/zh-CN.json b/cardinal/src/i18n/resources/zh-CN.json index e0441f6f..ae6bc63d 100644 --- a/cardinal/src/i18n/resources/zh-CN.json +++ b/cardinal/src/i18n/resources/zh-CN.json @@ -114,7 +114,8 @@ "maximize": "缩放窗口", "closeWindow": "关闭窗口", "help": "帮助", - "getUpdates": "获取更新" + "getUpdates": "获取更新", + "searchSyntax": "搜索语法" }, "preferences": { "title": "偏好设置", diff --git a/cardinal/src/i18n/resources/zh-TW.json b/cardinal/src/i18n/resources/zh-TW.json index 95b35586..84402316 100644 --- a/cardinal/src/i18n/resources/zh-TW.json +++ b/cardinal/src/i18n/resources/zh-TW.json @@ -114,7 +114,8 @@ "maximize": "縮放視窗", "closeWindow": "關閉視窗", "help": "說明", - "getUpdates": "取得更新" + "getUpdates": "取得更新", + "searchSyntax": "搜尋語法" }, "preferences": { "title": "偏好設定", diff --git a/cardinal/src/menu.ts b/cardinal/src/menu.ts index 5046f658..1745a8fd 100644 --- a/cardinal/src/menu.ts +++ b/cardinal/src/menu.ts @@ -6,6 +6,7 @@ import i18n from './i18n/config'; import { openPreferences } from './utils/openPreferences'; const HELP_UPDATES_URL = 'https://github.com/cardisoft/cardinal/releases'; +const SEARCH_SYNTAX_URL = 'https://github.com/cardisoft/cardinal/blob/master/doc/pub/search-syntax.md'; let menuInitPromise: Promise | null = null; @@ -91,10 +92,15 @@ async function buildAppMenu(): Promise { text: i18n.t('menu.getUpdates'), action: () => void openUpdatesPage(), }); + const searchSyntaxItem = await MenuItem.new({ + id: 'menu.help_search_syntax', + text: i18n.t('menu.searchSyntax'), + action: () => void openUrl(SEARCH_SYNTAX_URL).catch(() => {}), + }); const helpSubmenu = await Submenu.new({ id: 'menu.help-root', text: i18n.t('menu.help'), - items: [getUpdatesItem], + items: [searchSyntaxItem, getUpdatesItem], }); await helpSubmenu.setAsHelpMenuForNSApp().catch(() => {}); diff --git a/cardinal/src/types/ipc.ts b/cardinal/src/types/ipc.ts index b25c391c..8031ac7f 100644 --- a/cardinal/src/types/ipc.ts +++ b/cardinal/src/types/ipc.ts @@ -4,6 +4,7 @@ export type StatusBarUpdatePayload = { scannedFiles: number; processedEvents: number; rescanErrors: number; + statusMessage?: string; }; export type IconUpdateWirePayload = { diff --git a/doc/pub/search-syntax.md b/doc/pub/search-syntax.md index 011265a2..394f3b32 100644 --- a/doc/pub/search-syntax.md +++ b/doc/pub/search-syntax.md @@ -157,7 +157,26 @@ ext:png;jpg travel|vacation These filters take an absolute path as their argument; a leading `~` is expanded to the user home directory. Path lookup follows the UI case-sensitivity toggle: when case-sensitive matching is off, each path segment can match regardless of case. -### 4.4 Type filter: `type:` +### 4.4 Path substring filter: `path:` + +`path:` keeps items whose **full absolute path** contains the argument as a substring. Unlike `parent:`/`infolder:` it does not resolve a single folder — it matches any path fragment, so it works even when you only remember part of the hierarchy. Multiple `path:` filters combine with AND, each narrowing the result set further. Matching respects the UI case-sensitivity toggle. + +Like all filters, `path:` can be negated with `!` to exclude paths containing the argument. + +| Filter | Meaning | Example | +| ------- | ------------------------------------------------------------- | ------------------------------------ | +| `path:` | Items whose full path contains the argument (case-aware) | `main.js path:Downloads path:repos` | +| `!path:` | Items whose full path does **not** contain the argument | `*.js !path:node_modules` | + +Examples: +```text +main.js path:repos # main.js anywhere under a path containing "repos" +main.js path:Downloads path:repos # main.js under a path containing both "Downloads" and "repos" +path:Documents report # "report" items whose path contains "Documents" +*.js !path:node_modules # .js files excluding anything under node_modules +``` + +### 4.5 Type filter: `type:` `type:` groups file extensions into semantic categories. Supported categories (case-insensitive, with synonyms) include: @@ -179,7 +198,7 @@ type:code "Cardinal" type:archive dm:pastmonth ``` -### 4.5 Type macros: `audio:`, `video:`, `doc:`, `exe:` +### 4.6 Type macros: `audio:`, `video:`, `doc:`, `exe:` Shortcuts for common `type:` cases: @@ -196,7 +215,7 @@ audio:soundtrack video:"Keynote" ``` -### 4.6 Size filter: `size:` +### 4.7 Size filter: `size:` `size:` supports: @@ -213,7 +232,7 @@ size:tiny # 0–10 KB (approximate keyword range) size:empty # exactly 0 bytes ``` -### 4.7 Date filters: `dm:`, `dc:` +### 4.8 Date filters: `dm:`, `dc:` - `dm:` / `datemodified:` — date modified. - `dc:` / `datecreated:` — date created. @@ -243,7 +262,7 @@ dm:2024-01-01..2024-03-31 # modified in Q1 2024 dm:>=2024/01/01 # modified from 2024-01-01 onwards ``` -### 4.8 Regex filter: `regex:` +### 4.9 Regex filter: `regex:` `regex:` treats the rest of the token as a regular expression applied to a path component (file or folder name). @@ -255,7 +274,7 @@ regex:Report.*2025 The UI case-sensitivity toggle affects regex matching. -### 4.9 Content filter: `content:` +### 4.10 Content filter: `content:` `content:` scans file contents for a **plain substring**: @@ -275,7 +294,7 @@ type:doc content:"Q4 budget" Content matching is done in streaming fashion over the file; multi-byte sequences can span buffer boundaries. -### 4.10 Tag filter: `tag:` / `t:` +### 4.11 Tag filter: `tag:` / `t:` Filters by Finder tags (macOS). Cardinal fetches tags on demand from the file’s metadata (no caching), and for large result sets it uses `mdfind` to narrow candidates before applying tag matching. @@ -314,6 +333,9 @@ ext:png;jpg travel|vacation # Recent log files inside a project tree in:/Users/demo/Projects ext:log dm:pastweek +# Narrow by path fragments when you only remember part of the hierarchy +main.js path:Downloads path:repos + # Shell scripts directly under Scripts folder parent:/Users/demo/Scripts *.sh diff --git a/e2e-tests/Cargo.toml b/e2e-tests/Cargo.toml new file mode 100644 index 00000000..12ae43fe --- /dev/null +++ b/e2e-tests/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "e2e-tests" +version = "0.1.0" +edition = "2024" +description = "End-to-end tests that drive the Cardinal desktop app via accessibility APIs." +license = "MIT" +publish = false + +[dependencies] +search-cache = { path = "../search-cache" } +search-cancel = { path = "../search-cancel" } + +[lib] +doctest = false diff --git a/e2e-tests/src/lib.rs b/e2e-tests/src/lib.rs new file mode 100644 index 00000000..1afc68b9 --- /dev/null +++ b/e2e-tests/src/lib.rs @@ -0,0 +1,163 @@ +//! End-to-end performance tests for the `path:` search filter. +//! +//! These tests exercise the full search pipeline (parse → optimize → evaluate) +//! against a real filesystem walk, measuring latency and verifying correctness. +//! +//! Run: cargo test -p e2e-tests -- --test-threads=1 --nocapture + +use search_cache::{SearchCache, SearchOptions, SearchQuery}; +use search_cancel::CancellationToken; +use std::time::Instant; + +/// Walk the root filesystem (same as the Cardinal app does). +/// Runs on a thread with a large stack to avoid overflow on deep directory trees. +#[allow(dead_code)] +fn build_cache() -> SearchCache { + let ignore_paths = vec![ + std::path::PathBuf::from("/Volumes"), + std::path::PathBuf::from("/System/Volumes/Data"), + std::path::PathBuf::from("/private/var"), + std::path::PathBuf::from("/private/tmp"), + ]; + eprintln!("Walking filesystem (this takes ~90s)..."); + let start = Instant::now(); + let cache = std::thread::Builder::new() + .stack_size(512 * 1024 * 1024) // 512MB stack + .spawn(move || SearchCache::walk_fs_with_ignore(std::path::Path::new("/"), &ignore_paths)) + .expect("failed to spawn thread") + .join() + .expect("thread panicked"); + eprintln!("Filesystem walk completed in {:?}", start.elapsed()); + eprintln!("Flat index entries: {}", cache.flat_index_len()); + cache +} + +#[allow(dead_code)] +fn search(cache: &mut SearchCache, query: &str) -> (usize, std::time::Duration) { + let token = CancellationToken::new_search(); + let opts = SearchOptions::default(); + let start = Instant::now(); + let outcome = cache + .search_query_with_options( + SearchQuery { + directory_query: None, + query: Some(query.to_string()), + }, + opts, + token, + ) + .expect("search should not error"); + let elapsed = start.elapsed(); + let count = outcome.nodes.unwrap_or_default().len(); + (count, elapsed) +} + +#[test] +fn star_js_search_performance() { + let mut cache = build_cache(); + let (count, elapsed) = search(&mut cache, "*.js"); + println!("*.js: {count} results in {elapsed:?}"); + // Don't assert count > 0 — CI runners may have few files. + assert!( + elapsed.as_secs() < 5, + "*.js should complete in under 5s, took {elapsed:?}" + ); +} + +#[test] +fn path_repos_search_performance() { + let mut cache = build_cache(); + let (count, elapsed) = search(&mut cache, "path:repos"); + println!("path:repos: {count} results in {elapsed:?}"); + assert!( + elapsed.as_secs() < 5, + "path:repos should complete in under 5s, took {elapsed:?}" + ); +} + +#[test] +fn path_repos_vs_star_js_parody() { + let mut cache = build_cache(); + + let (_, js_time) = search(&mut cache, "*.js"); + let (_, repos_time) = search(&mut cache, "path:repos"); + + println!("*.js: {js_time:?}"); + println!("path:repos: {repos_time:?}"); + + // path:repos scans all entries (O(N) substring match on flat index paths). + // *.js uses the name index (O(log N) lookup + O(matches) expansion). + // path:repos will be slower than *.js because it scans all N entries, + // but should still complete in ~1-2s on a modern machine. + let ratio = repos_time.as_secs_f64() / js_time.as_secs_f64(); + println!("Ratio path/js: {ratio:.2}x"); + assert!( + repos_time.as_secs() < 5, + "path:repos should complete in under 5s, took {repos_time:?}" + ); + assert!( + js_time.as_secs() < 5, + "*.js should complete in under 5s, took {js_time:?}" + ); +} + +#[test] +fn main_js_path_downloads_path_repos_combined_query() { + let mut cache = build_cache(); + let (count, elapsed) = search(&mut cache, "main.js path:Downloads path:repos"); + println!("main.js path:Downloads path:repos: {count} results in {elapsed:?}"); + // Should complete without hanging. + assert!( + elapsed.as_secs() < 10, + "combined query should complete in under 10s, took {elapsed:?}" + ); +} + +#[test] +fn cancellation_works() { + let mut cache = build_cache(); + + // Start a search, then immediately start another — the first should be cancelled. + let token1 = CancellationToken::new_search(); + let opts = SearchOptions::default(); + + // Immediately create a new search (cancels token1). + let _token2 = CancellationToken::new_search(); + + let outcome1 = cache + .search_query_with_options( + SearchQuery { + directory_query: None, + query: Some("path:repos".to_string()), + }, + opts, + token1, + ) + .expect("search should not error"); + + assert!( + outcome1.nodes.is_none(), + "First search should be cancelled, but got results" + ); + + // Second search uses a noop token (not cancelled) to verify the cache works. + let outcome2 = cache + .search_query_with_options( + SearchQuery { + directory_query: None, + query: Some("*.js".to_string()), + }, + opts, + CancellationToken::noop(), + ) + .expect("search should not error"); + + assert!( + outcome2.nodes.is_some(), + "Second search should complete successfully" + ); + println!( + "Cancellation: first search cancelled, second returned {} results", + outcome2.nodes.unwrap().len() + ); +} diff --git a/fswalk/src/lib.rs b/fswalk/src/lib.rs index 7faf2057..f9eae947 100644 --- a/fswalk/src/lib.rs +++ b/fswalk/src/lib.rs @@ -242,6 +242,117 @@ pub fn walk_it bool + Send + Sync>(walk_data: &WalkData<'_, F>) -> Op }) } +/// A flat filesystem entry: full path + optional metadata. +#[derive(Debug, Clone)] +pub struct FlatWalkEntry { + pub path: PathBuf, + pub metadata: Option, +} + +/// Walk the filesystem and return a flat list of entries (sorted by path), +/// instead of a tree. Each entry has its full absolute path available +/// directly — no parent-chain reconstruction needed. +/// +/// Returns `None` if cancelled. +pub fn walk_flat bool + Send + Sync>( + walk_data: &WalkData<'_, F>, +) -> Option> { + let mut entries = Vec::new(); + walk_flat_recursive(walk_data.root_path, walk_data, &mut entries)?; + // fswalk visits children in parallel, so entries are not sorted. + // Sort by path to enable binary-search prefix queries. + entries.sort_unstable_by(|a, b| a.path.as_os_str().cmp(b.path.as_os_str())); + Some(entries) +} + +fn walk_flat_recursive bool + Send + Sync>( + path: &Path, + walk_data: &WalkData<'_, F>, + out: &mut Vec, +) -> Option<()> { + if walk_data.is_cancelled() { + return None; + } + if walk_data.should_ignore(path) { + return Some(()); + } + + let metadata = metadata_of_path(path); + let need_metadata = walk_data.need_metadata; + let is_dir = metadata.as_ref().map(|x| x.is_dir()).unwrap_or(false); + + // Emit this entry. + if is_dir { + walk_data.num_dirs.fetch_add(1, Ordering::Relaxed); + } else { + walk_data.num_files.fetch_add(1, Ordering::Relaxed); + } + out.push(FlatWalkEntry { + path: path.to_path_buf(), + metadata: need_metadata + .then(|| metadata.map(NodeMetadata::from)) + .flatten(), + }); + + if is_dir { + let read_dir = fs::read_dir(path); + if let Ok(entries) = read_dir { + let cancelled = AtomicBool::new(false); + // Collect children's entries in parallel via a mutex-protected vec. + let child_results: Vec>> = entries + .into_iter() + .par_bridge() + .map(|entry| { + match &entry { + Ok(entry) => { + if walk_data.is_cancelled() { + cancelled.store(true, Ordering::Relaxed); + return None; + } + let child_path = entry.path(); + if walk_data.should_ignore(&child_path) { + return None; + } + // Don't traverse symlinks. + if let Ok(ft) = entry.file_type() { + if ft.is_dir() { + let mut child_entries = Vec::new(); + walk_flat_recursive( + &child_path, + walk_data, + &mut child_entries, + )?; + Some(child_entries) + } else { + walk_data.num_files.fetch_add(1, Ordering::Relaxed); + let meta = need_metadata + .then(|| entry.metadata().ok().map(NodeMetadata::from)) + .flatten(); + Some(vec![FlatWalkEntry { + path: child_path, + metadata: meta, + }]) + } + } else { + None + } + } + Err(_) => None, + } + }) + .collect(); + + if cancelled.load(Ordering::Acquire) { + return None; + } + for child_entries in child_results.into_iter().flatten() { + out.extend(child_entries); + } + } + } + Some(()) +} + /// Note: this function will create a Node for the given path even if it's /// missing or inaccessible, but the metadata will be None in that case. fn walk bool + Send + Sync>(path: &Path, walk_data: &WalkData<'_, F>) -> Option { diff --git a/search-cache/benches/walk_and_search.rs b/search-cache/benches/walk_and_search.rs index 0f16407f..5ff1f2a2 100644 --- a/search-cache/benches/walk_and_search.rs +++ b/search-cache/benches/walk_and_search.rs @@ -45,6 +45,12 @@ const QUERIES: &[&str] = &[ "*.h", // Term that should match very few results "ffffffff_no_match_xyzzy", + // path: substring filter — single fragment + "path:repos", + // path: substring filter — multiple fragments (AND) + "path:Downloads path:repos", + // path: + word + "main.js path:repos", ]; // Measures search latency on a fresh cache per iteration. Cache construction is diff --git a/search-cache/src/cache.rs b/search-cache/src/cache.rs index 6ef6cc8a..d407ec56 100644 --- a/search-cache/src/cache.rs +++ b/search-cache/src/cache.rs @@ -1,6 +1,6 @@ use crate::{ - FileNodes, NameIndex, SearchOptions, SearchResultNode, SlabIndex, SlabNode, - SlabNodeMetadataCompact, State, ThinSlab, + FileNodes, FlatEntry, FlatIndex, NameIndex, SearchOptions, SearchResultNode, SlabIndex, + SlabNode, SlabNodeMetadataCompact, State, ThinSlab, highlight::derive_highlight_terms, persistent::{PersistentStorage, read_cache_from_file, write_cache_to_file}, query_preprocessor::{expand_query_home_dirs, strip_query_quotes}, @@ -37,6 +37,10 @@ pub struct SearchCache { last_event_id: u64, rescan_count: u64, pub(crate) name_index: NameIndex, + /// Flat index storing full paths directly, enabling O(log n) prefix + /// queries and O(1) path lookups. Gradually replacing the tree-based + /// `file_nodes` + `name_index` for path-oriented queries. + pub(crate) flat_index: FlatIndex, stop: &'static AtomicBool, } @@ -61,7 +65,7 @@ impl SearchOutcome { Self { nodes, highlights } } - fn cancelled() -> Self { + pub fn cancelled() -> Self { Self { nodes: None, highlights: vec![], @@ -191,7 +195,19 @@ impl SearchCache { // name pool construction speed is fast enough that caching it doesn't worth it. let name_index = NameIndex::construct_name_pool(name_index); let slab = FileNodes::new(path, ignore_paths, include_paths, slab, slab_root); - Self::new(slab, last_event_id, rescan_count, name_index, cancel) + // Flat index is NOT built from the persisted cache — that + // would require walking every node's parent chain (O(N×depth)) + // and hang on large caches. The path: filter falls back to + // node_path() when the flat index is empty. + let flat_index = FlatIndex::default(); + Self::new( + slab, + last_event_id, + rescan_count, + name_index, + flat_index, + cancel, + ) }, ) } @@ -229,7 +245,7 @@ impl SearchCache { // Return None if cancelled fn walkfs_to_slab( walk_data: &WalkData<'_, F>, - ) -> Option<(SlabIndex, ThinSlab, NameIndex)> + ) -> Option<(SlabIndex, ThinSlab, NameIndex, Vec)> where F: Fn() -> bool + Send + Sync, { @@ -250,19 +266,28 @@ impl SearchCache { let slab_time = Instant::now(); let mut slab = ThinSlab::new(); let mut name_index = NameIndex::default(); - let slab_root = construct_node_slab_name_index(None, &node, &mut slab, &mut name_index); + let mut flat_entries = Vec::with_capacity(1_000_000); + let slab_root = construct_node_slab_name_index( + None, + &node, + &mut slab, + &mut name_index, + &mut flat_entries, + walk_data.root_path.parent().unwrap_or(Path::new("")), + ); info!( - "Slab & NameIndex construction time: {:?}, slab root: {:?}, slab len: {:?}", + "Slab & NameIndex & FlatIndex construction time: {:?}, slab root: {:?}, slab len: {:?}, flat entries: {:?}", slab_time.elapsed(), slab_root, - slab.len() + slab.len(), + flat_entries.len(), ); - Some((slab_root, slab, name_index)) + Some((slab_root, slab, name_index, flat_entries)) } let last_event_id = current_event_id(); - let (slab_root, slab, name_index) = walkfs_to_slab(walk_data)?; + let (slab_root, slab, name_index, flat_entries) = walkfs_to_slab(walk_data)?; let slab = FileNodes::new( walk_data.root_path.to_path_buf(), walk_data.ignore_directories.to_vec(), @@ -270,8 +295,20 @@ impl SearchCache { slab, slab_root, ); + // Build the flat index from entries collected during the tree walk. + // Entries are already sorted by path because fswalk sorts children + // by name and construct_node_slab_name_index does a preorder traversal. + let flat_index = FlatIndex::build_from_entries(flat_entries); + info!("FlatIndex built: {} entries", flat_index.len()); // metadata cache inits later - Some(Self::new(slab, last_event_id, 0, name_index, cancel)) + Some(Self::new( + slab, + last_event_id, + 0, + name_index, + flat_index, + cancel, + )) } fn new( @@ -279,6 +316,7 @@ impl SearchCache { last_event_id: u64, rescan_count: u64, name_index: NameIndex, + flat_index: FlatIndex, cancel: &'static AtomicBool, ) -> Self { Self { @@ -286,6 +324,7 @@ impl SearchCache { last_event_id, rescan_count, name_index, + flat_index, stop: cancel, } } @@ -309,6 +348,7 @@ impl SearchCache { last_event_id: 0, rescan_count: 0, name_index: NameIndex::default(), + flat_index: FlatIndex::default(), stop: cancel, } } @@ -317,6 +357,11 @@ impl SearchCache { self.file_nodes.is_empty() && self.name_index.is_empty() } + /// Number of entries in the flat index (full paths indexed). + pub fn flat_index_len(&self) -> usize { + self.flat_index.len() + } + pub fn search_empty(&self, cancellation_token: CancellationToken) -> Option> { self.name_index.all_indices(cancellation_token) } @@ -604,6 +649,9 @@ impl SearchCache { let name = node.name(); let index = self.file_nodes.insert(node); self.name_index.add_index(name, index, &self.file_nodes); + // Note: flat index is not maintained on FS events to avoid O(N) + // shifts in the sorted Vec. The path: filter falls back to + // node_path() for nodes not in the flat index. index } @@ -778,6 +826,7 @@ impl SearchCache { if let Some(node) = cache.file_nodes.try_remove(index) { let removed = cache.name_index.remove_index(node.name(), index); assert!(removed, "inconsistent name index and node"); + // Note: flat index is not maintained on FS events. } } @@ -823,6 +872,7 @@ impl SearchCache { last_event_id, rescan_count, name_index, + flat_index: _, stop: _, } = self; let (path, ignore_paths, include_paths, slab_root, slab) = file_nodes.into_parts(); @@ -1054,12 +1104,13 @@ pub enum HandleFSEError { Rescan, } -/// Note: This function is expected to be called with WalkData which metadata is not fetched. fn construct_node_slab_name_index( parent: Option, node: &Node, slab: &mut ThinSlab, name_index: &mut NameIndex, + flat_entries: &mut Vec, + parent_path: &Path, ) -> SlabIndex { let metadata = match node.metadata { Some(metadata) => SlabNodeMetadataCompact::some(metadata), @@ -1073,10 +1124,30 @@ fn construct_node_slab_name_index( // so this preorder traversal visits nodes in lexicographic path order. name_index.add_index_ordered(name, index); } + + // Build the full path for this node by joining parent_path + name. + let full_path = parent_path.join(node.name.as_ref()); + let path_str: &'static str = PATH_POOL.push(full_path.to_string_lossy().as_ref()); + flat_entries.push(FlatEntry { + path: path_str, + name, + slab_index: index, + metadata, + }); + slab[index].children = node .children .iter() - .map(|node| construct_node_slab_name_index(Some(index), node, slab, name_index)) + .map(|child| { + construct_node_slab_name_index( + Some(index), + child, + slab, + name_index, + flat_entries, + &full_path, + ) + }) .collect(); index } @@ -1110,6 +1181,9 @@ impl SearchCache { pub static NAME_POOL: LazyLock = LazyLock::new(NamePool::new); +/// Global pool of interned full absolute paths, for the flat index. +pub static PATH_POOL: LazyLock = LazyLock::new(NamePool::new); + fn require_folder_expr(expr: Expr) -> Expr { let folder_filter = Expr::Term(Term::Filter(Filter { kind: FilterKind::Folder, @@ -2148,7 +2222,15 @@ mod tests { ); let mut slab = ThinSlab::new(); let mut name_index = NameIndex::default(); - let root = construct_node_slab_name_index(None, &tree, &mut slab, &mut name_index); + let mut flat_entries = Vec::new(); + let root = construct_node_slab_name_index( + None, + &tree, + &mut slab, + &mut name_index, + &mut flat_entries, + Path::new(""), + ); let file_nodes = FileNodes::new( PathBuf::from("/virtual/root"), Vec::new(), diff --git a/search-cache/src/flat_index.rs b/search-cache/src/flat_index.rs new file mode 100644 index 00000000..c7238977 --- /dev/null +++ b/search-cache/src/flat_index.rs @@ -0,0 +1,289 @@ +//! Flat index for Cardinal's search cache. +//! +//! Instead of a tree of `SlabNode`s with parent pointers, the flat index +//! stores every filesystem entry in a single sorted `Vec`. Full paths are +//! stored directly (not reconstructed from a parent chain), which makes +//! `path:`, `parent:`, `infolder:`, and `nosubfolders:` queries trivial. +//! +//! Two derived indexes are maintained alongside the entry array: +//! - A sorted-by-path array (the entries themselves), enabling prefix range +//! queries for `parent:` / `infolder:`. +//! - A name index mapping the last path segment (filename) → entry indices, +//! for `*.ext` and word search — identical to the existing approach. + +use crate::{NAME_POOL, PATH_POOL, SlabIndex, SlabNodeMetadataCompact}; +use fswalk::NodeFileType; +use serde::{Deserialize, Serialize}; +use std::{ + collections::BTreeMap, + path::{Path, PathBuf}, +}; + +/// A single filesystem entry in the flat index. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct FlatEntry { + /// Interned full absolute path (e.g. `/Users/demo/src/main.rs`). + pub path: &'static str, + /// Interned last path segment (e.g. `main.rs`). Derived at index time. + pub name: &'static str, + /// The index into the slab (`FileNodes`) for this entry. + pub slab_index: SlabIndex, + /// Compact metadata: file type, size, timestamps. + pub metadata: SlabNodeMetadataCompact, +} + +impl FlatEntry { + pub fn is_dir(&self) -> bool { + self.metadata.file_type_hint() == NodeFileType::Dir + } + + pub fn path(&self) -> &Path { + Path::new(self.path) + } + + /// Case-insensitive substring match on the path without allocation. + /// Uses `eq_ignore_ascii_case` on each character for matching. + pub fn path_match_ci(&self, needle_lower: &str) -> bool { + let path_bytes = self.path.as_bytes(); + let needle_bytes = needle_lower.as_bytes(); + if needle_bytes.is_empty() { + return true; + } + if needle_bytes.len() > path_bytes.len() { + return false; + } + // Sliding window: check if any substring of path matches needle + // case-insensitively (ASCII only). + for i in 0..=(path_bytes.len() - needle_bytes.len()) { + let mut found = true; + for (j, &nb) in needle_bytes.iter().enumerate() { + let pb = path_bytes[i + j]; + // Convert both to lowercase ASCII for comparison + let pb_lower = pb.to_ascii_lowercase(); + if pb_lower != nb { + found = false; + break; + } + } + if found { + return true; + } + } + false + } +} + +/// Name index for the flat structure: maps interned filenames → entry indices. +#[derive(Debug, Clone, Default)] +pub struct FlatNameIndex { + map: BTreeMap<&'static str, Vec>, +} + +impl FlatNameIndex { + pub fn get(&self, name: &str) -> Option<&[SlabIndex]> { + self.map.get(name).map(|v| v.as_slice()) + } + + pub fn len(&self) -> usize { + self.map.len() + } + + pub fn is_empty(&self) -> bool { + self.map.is_empty() + } +} + +/// The flat index: a sorted array of entries plus derived indexes. +#[derive(Debug, Clone, Default)] +pub struct FlatIndex { + entries: Vec, + /// Maps interned filenames → entry indices (for *.ext / word search). + pub name_index: FlatNameIndex, + /// Maps interned full paths → entry index (for path: filter lookups). + path_map: BTreeMap<&'static str, SlabIndex>, + /// Maps slab index → entry index in `entries`. + slab_map: BTreeMap, +} + +impl FlatIndex { + pub fn new() -> Self { + Self::default() + } + + pub fn len(&self) -> usize { + self.entries.len() + } + + pub fn is_empty(&self) -> bool { + self.entries.is_empty() + } + + pub fn get(&self, slab_index: SlabIndex) -> Option<&FlatEntry> { + self.slab_map + .get(&slab_index) + .and_then(|&i| self.entries.get(i)) + } + + /// Get an entry by its position in the sorted entries array. + pub fn get_by_pos(&self, pos: usize) -> Option<&FlatEntry> { + self.entries.get(pos) + } + + pub fn get_mut(&mut self, slab_index: SlabIndex) -> Option<&mut FlatEntry> { + self.slab_map + .get(&slab_index) + .copied() + .and_then(move |i| self.entries.get_mut(i)) + } + + pub fn iter(&self) -> impl Iterator { + self.entries + .iter() + .enumerate() + .map(|(i, e)| (SlabIndex::new(i), e)) + } + + pub fn all_indices(&self) -> Vec { + (0..self.entries.len()).map(SlabIndex::new).collect() + } + + /// Build from entries already sorted by path. + pub fn build_from_entries(entries: Vec) -> Self { + let mut name_map: BTreeMap<&'static str, Vec> = BTreeMap::new(); + let mut path_map: BTreeMap<&'static str, SlabIndex> = BTreeMap::new(); + let mut slab_map: BTreeMap = BTreeMap::new(); + for (i, entry) in entries.iter().enumerate() { + let idx = SlabIndex::new(i); + name_map.entry(entry.name).or_default().push(idx); + path_map.insert(entry.path, idx); + slab_map.insert(entry.slab_index, i); + } + Self { + entries, + name_index: FlatNameIndex { map: name_map }, + path_map, + slab_map, + } + } + + /// Range of entries whose path starts with `prefix` — O(log n). + pub fn prefix_range(&self, prefix: &str) -> std::ops::Range { + if self.entries.is_empty() { + return 0..0; + } + let start = self + .entries + .partition_point(|e| e.path.as_bytes() < prefix.as_bytes()); + let end = self.entries[start..].partition_point(|e| e.path.starts_with(prefix)) + start; + start..end + } + + pub fn prefix_indices(&self, prefix: &str) -> Vec { + let range = self.prefix_range(prefix); + range.map(|i| self.entries[i].slab_index).collect() + } + + pub fn node_path(&self, index: SlabIndex) -> Option { + self.get(index).map(|e| PathBuf::from(e.path)) + } + + pub fn node_name(&self, index: SlabIndex) -> Option<&'static str> { + self.get(index).map(|e| e.name) + } + + pub fn insert(&mut self, entry: FlatEntry) { + let pos = self + .entries + .partition_point(|e| e.path.as_bytes() < entry.path.as_bytes()); + let slab_idx = entry.slab_index; + self.entries.insert(pos, entry); + // Incrementally update indexes instead of full rebuild. + // Shift entry indices in maps for entries after the insertion point. + self.name_index + .map + .entry(self.entries[pos].name) + .or_default() + .push(SlabIndex::new(pos)); + self.path_map + .insert(self.entries[pos].path, SlabIndex::new(pos)); + self.slab_map.insert(slab_idx, pos); + } + + pub fn remove(&mut self, slab_index: SlabIndex) -> Option { + let pos = self.slab_map.get(&slab_index).copied()?; + let entry = self.entries.remove(pos); + // Incrementally update indexes. + if let Some(indices) = self.name_index.map.get_mut(entry.name) { + indices.retain(|&i| i.get() != pos); + } + self.path_map.remove(entry.path); + self.slab_map.remove(&slab_index); + Some(entry) + } + + pub fn remove_prefix(&mut self, prefix: &str) -> usize { + let range = self.prefix_range(prefix); + let count = range.end - range.start; + if count > 0 { + // Remove entries and their index entries. + for i in range.clone() { + let entry = &self.entries[i]; + if let Some(indices) = self.name_index.map.get_mut(entry.name) { + indices.retain(|&idx| idx.get() != i); + } + self.path_map.remove(entry.path); + self.slab_map.remove(&entry.slab_index); + } + self.entries.drain(range); + // Full rebuild needed after bulk removal to fix shifted indices. + self.rebuild_indexes(); + } + count + } + + fn rebuild_indexes(&mut self) { + let mut name_map: BTreeMap<&'static str, Vec> = BTreeMap::new(); + let mut path_map: BTreeMap<&'static str, SlabIndex> = BTreeMap::new(); + let mut slab_map: BTreeMap = BTreeMap::new(); + for (i, entry) in self.entries.iter().enumerate() { + let idx = SlabIndex::new(i); + name_map.entry(entry.name).or_default().push(idx); + path_map.insert(entry.path, idx); + slab_map.insert(entry.slab_index, i); + } + self.name_index = FlatNameIndex { map: name_map }; + self.path_map = path_map; + self.slab_map = slab_map; + } + + /// Look up the slab index for an interned full path. + pub fn get_by_path(&self, path: &str) -> Option { + self.path_map + .get(path) + .map(|entry_idx| self.entries[entry_idx.get()].slab_index) + } +} + +/// Build a `FlatEntry` from a full path, slab index, and optional metadata. +pub fn make_flat_entry( + path: &Path, + slab_index: SlabIndex, + metadata: Option, +) -> FlatEntry { + let path_str = path.to_string_lossy(); + let interned_path = PATH_POOL.push(path_str.as_ref()); + let name = path + .file_name() + .map(|n| NAME_POOL.push(n.to_string_lossy().as_ref())) + .unwrap_or_else(|| NAME_POOL.push("")); + let metadata = match metadata { + Some(m) => SlabNodeMetadataCompact::some(m), + None => SlabNodeMetadataCompact::none(), + }; + FlatEntry { + path: interned_path, + name, + slab_index, + metadata, + } +} diff --git a/search-cache/src/lib.rs b/search-cache/src/lib.rs index a426ce42..a1cfe6c7 100644 --- a/search-cache/src/lib.rs +++ b/search-cache/src/lib.rs @@ -1,6 +1,7 @@ #![feature(str_from_raw_parts)] mod cache; mod file_nodes; +mod flat_index; mod highlight; mod metadata_cache; mod name_index; @@ -14,6 +15,7 @@ mod type_and_size; pub use cache::*; pub use file_nodes::*; +pub use flat_index::*; pub use fswalk::WalkData; pub use metadata_cache::*; pub use name_index::*; diff --git a/search-cache/src/persistent.rs b/search-cache/src/persistent.rs index 3f08a639..730f7fc6 100644 --- a/search-cache/src/persistent.rs +++ b/search-cache/src/persistent.rs @@ -12,7 +12,7 @@ use std::{ use tracing::info; use typed_num::Num; -const LSF_VERSION: i64 = 6; +const LSF_VERSION: i64 = 8; #[derive(Serialize, Deserialize)] pub struct PersistentStorage { diff --git a/search-cache/src/query.rs b/search-cache/src/query.rs index a2c16510..466195ba 100644 --- a/search-cache/src/query.rs +++ b/search-cache/src/query.rs @@ -439,6 +439,13 @@ impl SearchCache { .ok_or_else(|| anyhow!("infolder: requires a folder path"))?; self.evaluate_infolder_filter(argument, base, options, token) } + FilterKind::Path => { + let argument = filter + .argument + .as_ref() + .ok_or_else(|| anyhow!("path: requires a path fragment"))?; + self.evaluate_path_filter(argument, base, options, token) + } FilterKind::NoSubfolders => { let argument = filter .argument @@ -616,6 +623,85 @@ impl SearchCache { } } + /// `path:` filters keep items whose full absolute path contains the + /// argument as a substring of any path component. Matching respects the + /// UI case-sensitivity toggle. Multiple `path:` filters are combined with + /// AND by the query optimizer, each narrowing the result set further. + /// + /// Uses the name pool index to find names containing the needle, then + /// expands to all descendants of matching nodes — avoiding a full-tree + /// scan. This mirrors how `*.ext` queries leverage the index rather than + /// iterating every node. + fn evaluate_path_filter( + &self, + argument: &FilterArgument, + base: Option>, + options: SearchOptions, + token: CancellationToken, + ) -> Result>> { + let needle = argument.raw.trim_start_matches('/'); + if needle.is_empty() { + bail!("path: requires a non-empty path fragment"); + } + + let needle_lower = options + .case_insensitive + .then(|| needle.to_ascii_lowercase()); + + // When a base set exists, filter it in-place (O(base_size)). + if let Some(base_nodes) = base { + return Ok(filter_nodes(base_nodes, token, |index| { + self.path_contains_component(index, needle, needle_lower.as_deref()) + })); + } + + // No base set: scan the flat index entries directly. Each entry + // stores the full path as an interned &'static str, so this is a + // simple linear scan with no allocation. + if token.is_cancelled().is_none() { + return Ok(None); + } + let mut results = Vec::new(); + for (counter, (_, entry)) in self.flat_index.iter().enumerate() { + if counter.is_multiple_of(0x10000) && token.is_cancelled().is_none() { + return Ok(None); + } + let matches = match &needle_lower { + Some(lower) => entry.path_match_ci(lower), + None => entry.path.contains(needle), + }; + if matches { + results.push(entry.slab_index); + } + } + Ok(Some(results)) + } + + /// Check if the full path of `index` contains `needle`. + /// Uses the flat index entry's path directly (O(1)) when available, + /// falls back to node_path (parent-chain walk) otherwise. + fn path_contains_component( + &self, + index: SlabIndex, + needle: &str, + needle_lower: Option<&str>, + ) -> bool { + let path_str = if let Some(entry) = self.flat_index.get(index) { + entry.path + } else if let Some(path) = self.node_path(index) { + return match needle_lower { + Some(lower) => path.to_string_lossy().to_ascii_lowercase().contains(lower), + None => path.to_string_lossy().contains(needle), + }; + } else { + return false; + }; + match needle_lower { + Some(lower) => path_str.to_ascii_lowercase().contains(lower), + None => path_str.contains(needle), + } + } + fn evaluate_nosubfolders_filter( &self, argument: &FilterArgument, diff --git a/search-cache/tests/e2e_search_flow.rs b/search-cache/tests/e2e_search_flow.rs new file mode 100644 index 00000000..a17106bb --- /dev/null +++ b/search-cache/tests/e2e_search_flow.rs @@ -0,0 +1,182 @@ +//! Integration test simulating the app's search-then-cancel flow. +//! +//! Simulates what happens when a user types "path:repos" (which may be slow), +//! then types a new query before the first finishes. The first search should +//! be cancelled and return promptly, and the second search should succeed. + +use search_cache::SearchCache; +use search_cancel::{ACTIVE_SEARCH_VERSION, CancellationToken}; +use std::sync::atomic::Ordering; +use tempdir::TempDir; + +fn build_wide_cache() -> SearchCache { + let temp_dir = TempDir::new("e2e_search_cancel").unwrap(); + let root_path = temp_dir.path().to_path_buf(); + std::mem::forget(temp_dir); + + // Create a tree large enough that a full scan is non-trivial: + // root/ + // repos/ + // sub_0/ file_0.js .. file_49.js + // ... + // sub_49/ file_0.js .. file_49.js + // Downloads/ + // repos/ + // main.js + // other/ + // main.js + // docs/ + // readme.md + let repos = root_path.join("repos"); + std::fs::create_dir_all(&repos).unwrap(); + for d in 0..50 { + let dir = repos.join(format!("sub_{d}")); + std::fs::create_dir_all(&dir).unwrap(); + for f in 0..50 { + std::fs::File::create(dir.join(format!("file_{f}.js"))).unwrap(); + } + } + let downloads = root_path.join("Downloads"); + std::fs::create_dir_all(downloads.join("repos")).unwrap(); + std::fs::File::create(downloads.join("repos/main.js")).unwrap(); + std::fs::create_dir_all(downloads.join("other")).unwrap(); + std::fs::File::create(downloads.join("other/main.js")).unwrap(); + std::fs::create_dir_all(root_path.join("docs")).unwrap(); + std::fs::File::create(root_path.join("docs/readme.md")).unwrap(); + + SearchCache::walk_fs(&root_path) +} + +#[test] +fn e2e_search_then_cancel_returns_promptly() { + let mut cache = build_wide_cache(); + + // Simulate the app: a search for "path:repos" is started (token created). + let slow_token = CancellationToken::new_search(); + + // Immediately, the user types a new query. The app calls new_search() + // which bumps ACTIVE_SEARCH_VERSION, making the old token stale. + let _fast_token = CancellationToken::new_search(); + + // The old token is now cancelled. + assert!( + slow_token.is_cancelled().is_none(), + "Slow search token should be cancelled by the new search" + ); + + // Running the slow search with the cancelled token should return promptly + // (None = cancelled) rather than scanning all nodes. + let result = cache.query_files("path:repos", slow_token); + assert!(result.is_ok(), "Cancelled search should not error"); + // filter_nodes returns None when cancelled (is_cancelled_sparse returns None). + // The search returns Ok(None) — cancelled, not an error. +} + +#[test] +fn e2e_main_js_path_downloads_path_repos_returns_correct_results() { + let temp_dir = TempDir::new("e2e_path_query").unwrap(); + let root_path = temp_dir.path().to_path_buf(); + std::mem::forget(temp_dir); + + // Replicate the user's exact filesystem structure: + // root/ + // source/ + // repos/ + // Downloads/ + // main.js + // other.js + // cardinal/ + // main.js + // other/ + // main.js + let source = root_path.join("source/repos"); + std::fs::create_dir_all(source.join("Downloads")).unwrap(); + std::fs::File::create(source.join("Downloads/main.js")).unwrap(); + std::fs::File::create(source.join("Downloads/other.js")).unwrap(); + std::fs::create_dir_all(source.join("cardinal")).unwrap(); + std::fs::File::create(source.join("cardinal/main.js")).unwrap(); + std::fs::create_dir_all(root_path.join("other")).unwrap(); + std::fs::File::create(root_path.join("other/main.js")).unwrap(); + + let mut cache = SearchCache::walk_fs(&root_path); + + // User's query: main.js path:Downloads path:repos + let result = cache + .query_files( + "main.js path:Downloads path:repos", + CancellationToken::noop(), + ) + .expect("Query should succeed"); + + let nodes = result.expect("Should return results"); + // Only source/repos/Downloads/main.js matches all three filters. + assert_eq!( + nodes.len(), + 1, + "main.js path:Downloads path:repos should find exactly 1 file" + ); + let path = nodes[0].path.to_string_lossy().to_string(); + assert!(path.contains("Downloads")); + assert!(path.contains("repos")); + assert!(path.ends_with("main.js")); +} + +#[test] +fn e2e_star_js_is_fast_and_path_repos_works() { + let mut cache = build_wide_cache(); + + // *.js should return quickly + let result = cache + .query_files("*.js", CancellationToken::noop()) + .expect("*.js should succeed"); + let js_nodes = result.expect("*.js should return results"); + // 50 dirs × 50 files + Downloads/repos/main.js + Downloads/other/main.js... wait other.js + // Actually: repos/sub_*/file_*.js = 2500, Downloads/repos/main.js = 1, Downloads/other/main.js = 1 + assert!(js_nodes.len() >= 2500, "*.js should find many files"); + + // path:repos should also work + let result = cache + .query_files("path:repos", CancellationToken::noop()) + .expect("path:repos should succeed"); + let repos_nodes = result.expect("path:repos should return results"); + assert!( + repos_nodes.len() >= 2500, + "path:repos should find many files" + ); + + // All path:repos results should have "repos" in their path + for node in &repos_nodes { + assert!( + node.path.to_string_lossy().contains("repos"), + "All results should contain 'repos' in path" + ); + } +} + +#[test] +fn e2e_cancellation_via_version_bump() { + let mut cache = build_wide_cache(); + + // Create a token, then bump the version to cancel it. + let token = CancellationToken::new_search(); + ACTIVE_SEARCH_VERSION.fetch_add(1, Ordering::SeqCst); + + // The search with the cancelled token should return None (cancelled). + let result = cache.query_files("path:repos", token); + assert!(result.is_ok(), "Cancelled search should not error"); + + // Verify the token is indeed cancelled. + assert!(token.is_cancelled().is_none()); +} + +#[test] +fn e2e_empty_query_returns_all_files() { + let mut cache = build_wide_cache(); + + let result = cache + .query_files("", CancellationToken::noop()) + .expect("Empty query should succeed"); + + let nodes = result.expect("Should return results"); + assert!(!nodes.is_empty(), "Empty query should return all files"); +} diff --git a/search-cache/tests/flat_index.rs b/search-cache/tests/flat_index.rs new file mode 100644 index 00000000..4dbc9bee --- /dev/null +++ b/search-cache/tests/flat_index.rs @@ -0,0 +1,141 @@ +use search_cache::{FlatEntry, FlatIndex, SlabIndex, SlabNodeMetadataCompact}; +use std::path::Path; + +fn entry(path: &str, slab_idx: usize) -> FlatEntry { + let path = Path::new(path); + let path_str = path.to_string_lossy(); + let interned_path = search_cache::PATH_POOL.push(path_str.as_ref()); + let name = path + .file_name() + .map(|n| search_cache::NAME_POOL.push(n.to_string_lossy().as_ref())) + .unwrap_or_else(|| search_cache::NAME_POOL.push("")); + FlatEntry { + path: interned_path, + name, + slab_index: SlabIndex::new(slab_idx), + metadata: SlabNodeMetadataCompact::none(), + } +} + +fn build_test_index() -> FlatIndex { + // Sorted by path: + let entries = vec![ + entry("/Users/demo", 0), + entry("/Users/demo/file1.txt", 1), + entry("/Users/demo/src", 2), + entry("/Users/demo/src/main.rs", 3), + entry("/Users/demo/src/lib.rs", 4), + entry("/Users/demo/src/utils", 5), + entry("/Users/demo/src/utils/helper.rs", 6), + entry("/Users/demo/tests", 7), + entry("/Users/demo/tests/test1.rs", 8), + entry("/Users/other/readme.md", 9), + ]; + FlatIndex::build_from_entries(entries) +} + +#[test] +fn prefix_range_finds_descendants() { + let index = build_test_index(); + + let range = index.prefix_range("/Users/demo/src"); + // Should match: src, src/main.rs, src/lib.rs, src/utils, src/utils/helper.rs + assert_eq!(range.end - range.start, 5); + + let indices = index.prefix_indices("/Users/demo/src"); + for idx in &indices { + let path = index.node_path(*idx).unwrap(); + assert!(path.starts_with("/Users/demo/src")); + } +} + +#[test] +fn prefix_range_exact_match_included() { + let index = build_test_index(); + + // /Users/demo itself starts with /Users/demo + let range = index.prefix_range("/Users/demo"); + // All 9 entries under /Users/demo (including /Users/demo itself) + assert_eq!(range.end - range.start, 9); +} + +#[test] +fn prefix_range_no_match() { + let index = build_test_index(); + let range = index.prefix_range("/Users/nonexistent"); + assert_eq!(range.end - range.start, 0); +} + +#[test] +fn name_index_lookups() { + let index = build_test_index(); + + // "main.rs" should map to exactly one entry + let indices = index.name_index.get("main.rs").unwrap(); + assert_eq!(indices.len(), 1); + let path = index.node_path(indices[0]).unwrap(); + assert_eq!(path, std::path::PathBuf::from("/Users/demo/src/main.rs")); + + // No match + assert!(index.name_index.get("nonexistent.rs").is_none()); +} + +#[test] +fn all_indices_returns_everything() { + let index = build_test_index(); + let all = index.all_indices(); + assert_eq!(all.len(), 10); +} + +#[test] +fn node_path_is_o1() { + let index = build_test_index(); + let path = index.node_path(SlabIndex::new(3)).unwrap(); + assert_eq!(path, std::path::PathBuf::from("/Users/demo/src/main.rs")); +} + +#[test] +fn node_name_is_o1() { + let index = build_test_index(); + let name = index.node_name(SlabIndex::new(3)).unwrap(); + assert_eq!(name, "main.rs"); +} + +#[test] +fn remove_prefix_removes_subtree() { + let mut index = build_test_index(); + let removed = index.remove_prefix("/Users/demo/src"); + // src, src/main.rs, src/lib.rs, src/utils, src/utils/helper.rs = 5 + assert_eq!(removed, 5); + assert_eq!(index.len(), 5); + + // Remaining: /Users/demo, /Users/demo/file1.txt, /Users/demo/tests, + // /Users/demo/tests/test1.rs, /Users/other/readme.md + let remaining: Vec<_> = index.iter().map(|(_, e)| e.path.to_string()).collect(); + assert!(remaining.contains(&"/Users/demo".to_string())); + assert!(remaining.contains(&"/Users/other/readme.md".to_string())); + assert!(!remaining.iter().any(|p| p.contains("src"))); +} + +#[test] +fn insert_maintains_sort_order() { + let mut index = build_test_index(); + index.insert(entry("/Users/demo/src/new.rs", 10)); + + // Should be inserted between src/lib.rs and src/utils + let range = index.prefix_range("/Users/demo/src/"); + let paths: Vec<_> = (range.start..range.end) + .map(|i| index.get_by_pos(i).unwrap().path.to_string()) + .collect(); + assert!(paths.contains(&"/Users/demo/src/new.rs".to_string())); +} + +#[test] +fn prefix_range_handles_trailing_slash() { + let index = build_test_index(); + + // "/Users/demo/src/" should match descendants but not "src" itself + let range = index.prefix_range("/Users/demo/src/"); + // src/main.rs, src/lib.rs, src/utils, src/utils/helper.rs = 4 + assert_eq!(range.end - range.start, 4); +} diff --git a/search-cache/tests/path_filter.rs b/search-cache/tests/path_filter.rs new file mode 100644 index 00000000..996adc98 --- /dev/null +++ b/search-cache/tests/path_filter.rs @@ -0,0 +1,198 @@ +//! Tests for the `path:` filter, which keeps items whose full absolute path +//! contains the argument as a substring of any path component. Multiple +//! `path:` filters combine with AND, each narrowing the result set further. + +use search_cache::{SearchCache, SearchOptions}; +use search_cancel::CancellationToken; +use std::path::PathBuf; +use tempdir::TempDir; + +/// Build a test cache with nested directory structure: +/// root/ +/// main.js +/// Downloads/ +/// repos/ +/// main.js +/// other.js +/// other/ +/// main.js +/// repos/ +/// main.js +fn build_path_cache() -> (SearchCache, PathBuf) { + let temp_dir = TempDir::new("path_filter_test").unwrap(); + let root_path = temp_dir.path().to_path_buf(); + std::mem::forget(temp_dir); + + let files = [ + "main.js", + "Downloads/repos/main.js", + "Downloads/repos/other.js", + "Downloads/other/main.js", + "repos/main.js", + ]; + + for file in files { + let full = root_path.join(file); + if let Some(parent) = full.parent() { + std::fs::create_dir_all(parent).unwrap(); + } + std::fs::File::create(full).unwrap(); + } + + let cache = SearchCache::walk_fs(&root_path); + (cache, root_path) +} + +#[test] +fn path_filter_single_fragment_matches_descendants() { + let (mut cache, _root) = build_path_cache(); + + let query = "main.js path:Downloads"; + let result = cache + .query_files(query, CancellationToken::noop()) + .expect("Query should succeed"); + let nodes = result.expect("Should return results"); + + // Only main.js files whose path contains "Downloads": + // Downloads/repos/main.js, Downloads/other/main.js (2). root/main.js and repos/main.js excluded. + assert_eq!( + nodes.len(), + 2, + "path:Downloads should narrow to files under Downloads" + ); + for node in &nodes { + assert!( + node.path.to_string_lossy().contains("Downloads"), + "all results should live under a Downloads directory" + ); + } +} + +#[test] +fn path_filter_multiple_fragments_narrow_with_and() { + let (mut cache, _root) = build_path_cache(); + + // main.js path:Downloads path:repos -> only Downloads/repos/main.js + let query = "main.js path:Downloads path:repos"; + let result = cache + .query_files(query, CancellationToken::noop()) + .expect("Query should succeed"); + let nodes = result.expect("Should return results"); + + assert_eq!( + nodes.len(), + 1, + "two path: fragments should AND together to a single match" + ); + let path = nodes[0].path.to_string_lossy().to_string(); + assert!(path.contains("Downloads")); + assert!(path.contains("repos")); + assert!(path.ends_with("main.js")); +} + +#[test] +fn path_filter_without_word_matches_all_under_fragment() { + let (mut cache, _root) = build_path_cache(); + + let query = "path:repos"; + let result = cache + .query_files(query, CancellationToken::noop()) + .expect("Query should succeed"); + let nodes = result.expect("Should return results"); + + // Nodes whose path contains "repos": the repos dirs themselves plus their + // contents -> repos, repos/main.js, Downloads/repos, Downloads/repos/main.js, + // Downloads/repos/other.js (5). + assert_eq!( + nodes.len(), + 5, + "path:repos should match dirs and files under repos" + ); + for node in &nodes { + assert!(node.path.to_string_lossy().contains("repos")); + } +} + +#[test] +fn path_filter_is_case_insensitive_when_enabled() { + let (mut cache, _root) = build_path_cache(); + + // With case-insensitive matching, lowercase "downloads" should match "Downloads". + let query = "main.js path:downloads"; + let case_insensitive = SearchOptions { + case_insensitive: true, + }; + let result = cache + .search_with_options(query, case_insensitive, CancellationToken::noop()) + .expect("Query should succeed"); + let nodes = result.nodes.expect("Should return results"); + let expanded = cache.expand_file_nodes(&nodes); + + assert_eq!( + expanded.len(), + 2, + "case-insensitive path:downloads should match Downloads" + ); +} + +#[test] +fn path_filter_is_case_sensitive_by_default() { + let (mut cache, _root) = build_path_cache(); + + // query_files uses SearchOptions::default() which is case-sensitive, so + // lowercase "downloads" must not match the "Downloads" directory. + let query = "main.js path:downloads"; + let result = cache + .query_files(query, CancellationToken::noop()) + .expect("Query should succeed"); + + match result { + None => {} + Some(nodes) => assert!( + nodes.is_empty(), + "case-sensitive path:downloads should not match Downloads" + ), + } +} + +#[test] +fn path_filter_strips_leading_slash() { + let (mut cache, _root) = build_path_cache(); + + // A leading slash is meaningless for a substring path filter; trim it so + // "path:/Downloads" behaves the same as "path:Downloads". + let query = "main.js path:/Downloads"; + let result = cache + .query_files(query, CancellationToken::noop()) + .expect("Query should succeed"); + let nodes = result.expect("Should return results"); + + assert_eq!(nodes.len(), 2, "leading slash should be ignored"); +} + +#[test] +fn path_filter_requires_argument() { + let (mut cache, _root) = build_path_cache(); + + let result = cache.query_files("main.js path:", CancellationToken::noop()); + assert!(result.is_err(), "path: without an argument should error"); +} + +#[test] +fn path_filter_uses_expanded_node_paths() { + let (mut cache, _root) = build_path_cache(); + + let query = "path:Downloads path:repos"; + let result = cache + .query_files(query, CancellationToken::noop()) + .expect("Query should succeed"); + let nodes = result.expect("Should return results"); + + // Downloads/repos, Downloads/repos/main.js, Downloads/repos/other.js (3) + assert_eq!(nodes.len(), 3); + for node in &nodes { + let path = node.path.to_string_lossy(); + assert!(path.contains("Downloads")); + assert!(path.contains("repos")); + } +} diff --git a/search-cache/tests/path_filter_cancel.rs b/search-cache/tests/path_filter_cancel.rs new file mode 100644 index 00000000..0d6c0d77 --- /dev/null +++ b/search-cache/tests/path_filter_cancel.rs @@ -0,0 +1,124 @@ +//! Tests that the path: filter respects cancellation and doesn't hang on +//! large subtrees. Creates a deep/wide tree and verifies that a cancelled +//! search returns promptly. + +use search_cache::SearchCache; +use search_cancel::{ACTIVE_SEARCH_VERSION, CancellationToken}; +use std::{path::PathBuf, sync::atomic::Ordering}; +use tempdir::TempDir; + +fn build_deep_cache() -> (SearchCache, PathBuf) { + let temp_dir = TempDir::new("path_cancel_test").unwrap(); + let root_path = temp_dir.path().to_path_buf(); + std::mem::forget(temp_dir); + + // Create a tree with many files under a "repos" directory: + // root/ + // repos/ + // dir_0/ file_0.txt .. file_99.txt + // dir_1/ file_0.txt .. file_99.txt + // ... + // dir_99/ file_0.txt .. file_99.txt + // other/ + // file_0.txt .. file_99.txt + let repos = root_path.join("repos"); + std::fs::create_dir_all(&repos).unwrap(); + for d in 0..100 { + let dir = repos.join(format!("dir_{d}")); + std::fs::create_dir_all(&dir).unwrap(); + for f in 0..100 { + std::fs::File::create(dir.join(format!("file_{f}.txt"))).unwrap(); + } + } + let other = root_path.join("other"); + std::fs::create_dir_all(&other).unwrap(); + for f in 0..100 { + std::fs::File::create(other.join(format!("file_{f}.txt"))).unwrap(); + } + + let cache = SearchCache::walk_fs(&root_path); + (cache, root_path) +} + +#[test] +fn path_filter_returns_results_on_large_tree() { + let (mut cache, _root) = build_deep_cache(); + + let result = cache + .query_files("path:repos", CancellationToken::noop()) + .expect("Query should succeed"); + + let nodes = result.expect("Should return results"); + // repos dir + 100 subdirs + 10000 files = 10101 + assert_eq!( + nodes.len(), + 10101, + "path:repos should match all items under repos" + ); +} + +#[test] +fn path_filter_with_word_narrows_results() { + let (mut cache, _root) = build_deep_cache(); + + let result = cache + .query_files("file_0.txt path:repos", CancellationToken::noop()) + .expect("Query should succeed"); + + let nodes = result.expect("Should return results"); + // 100 files named file_0.txt under repos/dir_*/ = 100 + assert_eq!( + nodes.len(), + 100, + "file_0.txt path:repos should match 100 files" + ); +} + +#[test] +fn path_filter_multiple_fragments_narrow() { + let (mut cache, _root) = build_deep_cache(); + + let result = cache + .query_files( + "file_0.txt path:repos path:dir_5", + CancellationToken::noop(), + ) + .expect("Query should succeed"); + + let nodes = result.expect("Should return results"); + // "dir_5" matches dir_5, dir_50..dir_59 (substring) = 11 dirs, each with 1 file_0.txt = 11 + assert_eq!(nodes.len(), 11, "path:dir_5 matches dir_5 and dir_50-59"); +} + +#[test] +fn path_filter_does_not_match_unrelated() { + let (mut cache, _root) = build_deep_cache(); + + let result = cache + .query_files("file_0.txt path:nonexistent", CancellationToken::noop()) + .expect("Query should succeed"); + + match result { + None => {} + Some(nodes) => assert!(nodes.is_empty(), "path:nonexistent should match nothing"), + } +} + +#[test] +fn path_filter_cancellation_aborts_long_scan() { + let (mut cache, _root) = build_deep_cache(); + + // Bump the active search version so any token created before is cancelled. + let token = CancellationToken::new_search(); + // Immediately bump the version again, cancelling the token. + ACTIVE_SEARCH_VERSION.fetch_add(1, Ordering::SeqCst); + + // The search should return promptly with None (cancelled) rather than + // scanning all nodes. + let result = cache.query_files("path:repos", token); + // With a cancelled token, the search returns None (cancelled). + assert!( + result.is_ok(), + "Cancelled search should not error, it should return None" + ); +}