Skip to content
Closed
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/glob-ignore-files.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@moonshot-ai/kimi-code": patch
---

Fix glob file searches so all patterns continue to respect ignore files. A positive ripgrep `--glob` always overrides ignore logic, so any non-broad user pattern (e.g. `*.ts`, `dist/**/*.js`) could re-include files excluded by `.gitignore`. The pattern is now filtered in-process after `rg --files` enumerates non-ignored files.
231 changes: 215 additions & 16 deletions packages/agent-core/src/tools/builtin/file/glob.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
* files (e.g. build outputs, `node_modules`). Sensitive files such
* as `.env` are always filtered out.
* - Brace expansion (`*.{ts,tsx}`, `{src,test}/**`) is handled by
* ripgrep's glob engine.
* picomatch in-process.
* - `path` is validated by `resolvePathAccess` in `absolute-outside-allowed`
* mode. Explicit absolute paths outside the workspace are allowed; relative
* paths that escape the workspace stay rejected.
Expand All @@ -25,7 +25,8 @@
*/

import type { Kaos } from '@moonshot-ai/kaos';
import { normalize, resolve } from 'pathe';
import { dirname, isAbsolute, join, normalize, relative, resolve } from 'pathe';
import picomatch from 'picomatch';
import { z } from 'zod';

import type { BuiltinTool } from '../../../agent/tool';
Expand Down Expand Up @@ -160,6 +161,11 @@ export class GlobTool implements BuiltinTool<GlobInput> {
): Promise<ExecutableToolResult> {
const searchRoot = searchRoots[0] ?? this.workspace.workspaceDir;

const patternError = validateGlobPattern(args.pattern);

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Preprocess comment globs before validating

When the pattern starts with #, ripgrep applies .gitignore parsing first (rg --help says --glob uses .gitignore globs), so a commented glob like #[ is ignored and behaves like a broad search; I checked rg --files -g '#[' returns the normal file list. This validation runs on the raw pattern before preprocessGitignoreGlobPattern, so the same input is rejected as an unclosed character class instead of searching, even though the later matcher already treats #... as broad. Skip validation for comment globs or validate the preprocessed form.

Useful? React with 👍 / 👎.

if (patternError !== undefined) {
return { isError: true, output: patternError };
}

// Validate the search root is a directory. `rg --files <file>` exits 0
// and lists the file itself, so without this check a file root would be
// returned as its own match instead of rejected. A missing root surfaces
Expand Down Expand Up @@ -201,19 +207,23 @@ export class GlobTool implements BuiltinTool<GlobInput> {
// Running from the search root makes glob matching relative to it.
const execKaos = this.kaos.withCwd(searchRoot);

let runResult = await runRipgrepOnce(
execKaos,
buildRgArgs(rgPath, args),
signal,
{ abortedMessage: 'Glob aborted' },
);
const insideGitRepo =
args.include_ignored === true ? true : await isInsideGitRepo(this.kaos, searchRoot);

const pathClass = this.kaos.pathClass();
const lineFilter = makeLineFilter(args.pattern, pathClass, searchRoot);

let runResult = await runRipgrepOnce(execKaos, buildRgArgs(rgPath, args, insideGitRepo), signal, {
abortedMessage: 'Glob aborted',
lineFilter,
});
if (runResult.kind === 'tool-error') return runResult.result;
if (shouldRetryRipgrepEagain(runResult)) {
runResult = await runRipgrepOnce(
execKaos,
buildRgArgs(rgPath, args, true),
buildRgArgs(rgPath, args, insideGitRepo, true),
signal,
{ abortedMessage: 'Glob aborted' },
{ abortedMessage: 'Glob aborted', lineFilter },
);
if (runResult.kind === 'tool-error') return runResult.result;
}
Expand Down Expand Up @@ -250,10 +260,19 @@ export class GlobTool implements BuiltinTool<GlobInput> {
resolve(searchRoot, p),
);

// Filter by the user's positive glob pattern in-process. A positive
// --glob would override ignore-file logic, so the pattern is not passed
// to rg; instead rg --files enumerates non-ignored files and we filter
// here to preserve both the pattern match and the ignore-file respect.
// The streaming lineFilter already applied this filter before the cap,
// but we re-check here as a safety net for any paths that slipped
// through (e.g. broad patterns where the filter is skipped).
const patternMatched = filterByPattern(rawPaths, searchRoot, args.pattern, pathClass);

// Authoritative sensitive-file check (the rg prefilter is conservative).
const kept: string[] = [];
let filteredSensitive = 0;
for (const p of rawPaths) {
for (const p of patternMatched) {
if (isSensitiveFile(p)) {
filteredSensitive++;
} else {
Expand All @@ -277,7 +296,6 @@ export class GlobTool implements BuiltinTool<GlobInput> {
// save tokens, but only for the primary workspace. Relative paths are
// later resolved against workspaceDir, so additionalDir matches stay
// absolute to keep follow-up Read/Edit calls on the same file.
const pathClass = this.kaos.pathClass();
const shouldRelativize = isWithinDirectory(searchRoot, this.workspace.workspaceDir, pathClass);
const displayLines = limited.map((p) =>
shouldRelativize ? relativizeIfUnder(p, searchRoot, pathClass) : p,
Expand Down Expand Up @@ -312,16 +330,26 @@ export class GlobTool implements BuiltinTool<GlobInput> {
}
}

function buildRgArgs(rgPath: string, args: GlobInput, singleThreaded = false): string[] {
function buildRgArgs(
rgPath: string,
args: GlobInput,
insideGitRepo: boolean,
singleThreaded = false,
): string[] {
const cmd: string[] = [rgPath];
if (singleThreaded) cmd.push('-j', '1');
cmd.push('--files', '--hidden', '--sortr=modified');
if (!insideGitRepo && args.include_ignored !== true) {
cmd.push('--no-require-git');
}
for (const dir of VCS_DIRECTORIES_TO_EXCLUDE) {
cmd.push('--glob', `!${dir}`);
}
// Positive pattern first, then sensitive-file exclusions so a broad
// pattern cannot re-include a sensitive path.
cmd.push('--glob', args.pattern);
// The user's positive pattern is NOT passed as --glob. A positive --glob
// "always overrides any other ignore logic" (ripgrep docs), so it would
// re-include files excluded by .gitignore/.ignore/.rgignore. Instead, let
// rg --files enumerate non-ignored files and filter the results
// in-process via matchUserPattern().
Comment thread
morluto marked this conversation as resolved.
for (const glob of SENSITIVE_GLOBS_TO_EXCLUDE) {
cmd.push('--glob', `!${glob}`);
}
Expand All @@ -332,6 +360,177 @@ function buildRgArgs(rgPath: string, args: GlobInput, singleThreaded = false): s
return cmd;
}

function isBroadPattern(pattern: string): boolean {
return pattern === '*' || pattern === '**' || pattern === '**/*';
Comment thread
morluto marked this conversation as resolved.
Outdated
}

/**
* Compile a glob matcher for the user's pattern, using the same semantics
* ripgrep's `--glob` would have applied:
* - Patterns without `/` match the basename at any depth.
* - Patterns with `/` match the relative path from the search root.
* - A leading `/` is stripped — ripgrep treats `/src/*.ts` as rooted at
* the search root, equivalent to `src/*.ts`.
* - `**` matches zero or more directory levels.
* - Dotfiles are matched (rg runs with `--hidden`).
* - Matching is case-sensitive, matching ripgrep's default (use
* `--glob-case-insensitive` / `--iglob` for case-insensitive mode).
* - Brace expansion (`*.{ts,tsx}`) is handled by picomatch natively.
*
* Returns a function that takes a relative path and returns whether it
* matches. The picomatch matcher is compiled once so large trees don't
* reparse the pattern on every line.
*/
function compileGlobMatcher(pattern: string): (relPath: string) => boolean {
const opts = { dot: true };
const rooted = pattern.startsWith('/');
const normalizedPattern = rooted ? pattern.slice(1) : pattern;
// A pattern without `/` matches the basename at any depth — unless the
// original pattern was rooted with a leading `/`, in which case it
// matches only at the search root (gitignore-style rooted globs).
if (!normalizedPattern.includes('/') && !rooted) {
const matcher = picomatch(normalizedPattern, opts);
return (relPath: string) => matcher(relPath.split('/').pop()!);
}
const matcher = picomatch(normalizedPattern, opts);
Comment thread
morluto marked this conversation as resolved.
Outdated
return (relPath: string) => matcher(relPath);
}

/**
* Filter absolute paths from `rg --files` against the user's positive glob
* pattern. Broad patterns (star, double-star, star-slash-star) match
* everything, so the filter is skipped for those. Returns the filtered list
* in the same order. On Windows, the search root and absolute paths are
* case-normalized only to compute the relative path boundary — the original-
* case relative path is then used for case-sensitive pattern matching.
*/
function filterByPattern(
absPaths: string[],
searchRoot: string,
pattern: string,
pathClass: PathClass,
): string[] {
if (isBroadPattern(pattern)) return absPaths;
const matches = compileGlobMatcher(pattern);
const result: string[] = [];
for (const absPath of absPaths) {
const rel = relativePath(searchRoot, absPath, pathClass);
if (rel === undefined) continue;
if (matches(rel)) result.push(absPath);
}
return result;
}

/**
* Compute the relative path from `searchRoot` to `absPath`, preserving the
* original casing of `absPath`. On Windows, the root and path are compared
* case-insensitively to find the boundary, but the returned relative path
* keeps the original case so case-sensitive glob matching works correctly.
* Returns `undefined` if `absPath` is not under `searchRoot`.
*/
function relativePath(searchRoot: string, absPath: string, pathClass: PathClass): string | undefined {
const normAbs = normalize(absPath);
const normRoot = normalize(searchRoot);
if (pathClass !== 'win32') {
const rel = relative(normRoot, normAbs);
return rel.startsWith('..') ? undefined : rel;
Comment thread
morluto marked this conversation as resolved.
Outdated
}
const lowerAbs = normAbs.toLowerCase();
const lowerRoot = normRoot.toLowerCase();
if (lowerAbs === lowerRoot) return '.';
const prefix = lowerRoot.endsWith('/') ? lowerRoot : lowerRoot + '/';
if (!lowerAbs.startsWith(prefix)) return undefined;
return normAbs.slice(prefix.length);
}

/**
* Build a streaming line filter for `runRipgrepOnce` that applies the user's
* glob pattern to each path line before it counts toward the output cap.
* Returns `undefined` for broad patterns (no filtering needed) so the
* original byte-level cap path is used.
*
* rg runs with cwd pinned to the search root, so each line is normally a
* relative path like `./src/a.ts` (POSIX) or `.\src\a.ts` (Windows). The
* filter strips the leading `./` or `.\` and normalizes backslashes to
* forward slashes on Windows before matching. If the line is an absolute
* path (e.g. from a test mock or an external root), it is made relative to
* the search root first, preserving original case for pattern matching.
*/
function makeLineFilter(
pattern: string,
pathClass: PathClass,
searchRoot: string,
): ((line: string) => boolean) | undefined {
if (isBroadPattern(pattern)) return undefined;
const matches = compileGlobMatcher(pattern);
return (line: string): boolean => {
let relPath = line;
if (pathClass === 'win32') relPath = relPath.replace(/\\/g, '/');
if (relPath.startsWith('./') || relPath.startsWith('.\\')) {
relPath = relPath.slice(2);
} else if (isAbsolute(relPath)) {
const rel = relativePath(searchRoot, relPath, pathClass);
if (rel === undefined) return false;
relPath = rel;
}
return matches(relPath);
};
}

/**
* Validate a glob pattern for common malformed syntax (unclosed `[` or `{`).
* Returns an error message if the pattern is invalid, or `undefined` if it
* is well-formed. This mirrors ripgrep's globset parser, which rejects
* unclosed character classes and brace expansions with a hard error —
* picomatch would silently treat them as literals and report "No matches
* found" instead. Braces inside a character class (`[{]foo`) are literal
* characters and do not count toward brace depth.
*/
function validateGlobPattern(pattern: string): string | undefined {
let inBracket = false;
let bracketDepth = 0;
let braceDepth = 0;
for (let i = 0; i < pattern.length; i++) {
const ch = pattern[i];
if (ch === '\\') {
i++;
continue;
}
Comment thread
morluto marked this conversation as resolved.
if (ch === '[') {
inBracket = true;
bracketDepth++;
Comment thread
morluto marked this conversation as resolved.
Outdated
} else if (ch === ']' && bracketDepth > 0) {
inBracket = false;
bracketDepth--;
} else if (!inBracket) {
if (ch === '{') braceDepth++;
else if (ch === '}' && braceDepth > 0) braceDepth--;
}
}
if (bracketDepth > 0) return `Invalid glob pattern: unclosed '[' in "${pattern}"`;
if (braceDepth > 0) return `Invalid glob pattern: unclosed '{' in "${pattern}"`;
return undefined;
}

async function isInsideGitRepo(kaos: Kaos, searchRoot: string): Promise<boolean> {
let current = kaos.normpath(searchRoot);
for (;;) {
if (await pathExists(kaos, join(current, '.git'))) return true;
const parent = dirname(current);
if (parent === current) return false;
current = parent;
}
}

async function pathExists(kaos: Kaos, path: string): Promise<boolean> {
try {
await kaos.stat(path);
return true;
} catch {
return false;
}
}

function formatGlobError(searchRoot: string, stderr: string): string {
const trimmed = stderr.trim();
if (/no such file or directory/i.test(trimmed)) {
Expand Down
Loading