Skip to content
Closed
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/glob-ignore-files.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@moonshot-ai/kimi-code": patch
---

Fix glob file searches so all patterns continue to respect ignore files. A positive ripgrep `--glob` always overrides ignore logic, so any non-broad user pattern (e.g. `*.ts`, `dist/**/*.js`) could re-include files excluded by `.gitignore`. The pattern is now filtered in-process after `rg --files` enumerates non-ignored files.
100 changes: 87 additions & 13 deletions packages/agent-core/src/tools/builtin/file/glob.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@
*/

import type { Kaos } from '@moonshot-ai/kaos';
import { normalize, resolve } from 'pathe';
import { dirname, join, normalize, relative, resolve } from 'pathe';
import picomatch from 'picomatch';
import { z } from 'zod';

import type { BuiltinTool } from '../../../agent/tool';
Expand Down Expand Up @@ -201,17 +202,17 @@ export class GlobTool implements BuiltinTool<GlobInput> {
// Running from the search root makes glob matching relative to it.
const execKaos = this.kaos.withCwd(searchRoot);

let runResult = await runRipgrepOnce(
execKaos,
buildRgArgs(rgPath, args),
signal,
{ abortedMessage: 'Glob aborted' },
);
const insideGitRepo =
args.include_ignored === true ? true : await isInsideGitRepo(this.kaos, searchRoot);

let runResult = await runRipgrepOnce(execKaos, buildRgArgs(rgPath, args, insideGitRepo), signal, {
abortedMessage: 'Glob aborted',
});
if (runResult.kind === 'tool-error') return runResult.result;
if (shouldRetryRipgrepEagain(runResult)) {
runResult = await runRipgrepOnce(
execKaos,
buildRgArgs(rgPath, args, true),
buildRgArgs(rgPath, args, insideGitRepo, true),
signal,
{ abortedMessage: 'Glob aborted' },
);
Expand Down Expand Up @@ -250,10 +251,16 @@ export class GlobTool implements BuiltinTool<GlobInput> {
resolve(searchRoot, p),
);

// Filter by the user's positive glob pattern in-process. A positive
// --glob would override ignore-file logic, so the pattern is not passed
// to rg; instead rg --files enumerates non-ignored files and we filter
// here to preserve both the pattern match and the ignore-file respect.
const patternMatched = filterByPattern(rawPaths, searchRoot, args.pattern);
Comment thread
morluto marked this conversation as resolved.
Outdated

// Authoritative sensitive-file check (the rg prefilter is conservative).
const kept: string[] = [];
let filteredSensitive = 0;
for (const p of rawPaths) {
for (const p of patternMatched) {
if (isSensitiveFile(p)) {
filteredSensitive++;
} else {
Expand Down Expand Up @@ -312,16 +319,26 @@ export class GlobTool implements BuiltinTool<GlobInput> {
}
}

function buildRgArgs(rgPath: string, args: GlobInput, singleThreaded = false): string[] {
function buildRgArgs(
rgPath: string,
args: GlobInput,
insideGitRepo: boolean,
singleThreaded = false,
): string[] {
const cmd: string[] = [rgPath];
if (singleThreaded) cmd.push('-j', '1');
cmd.push('--files', '--hidden', '--sortr=modified');
if (!insideGitRepo && args.include_ignored !== true) {
cmd.push('--no-require-git');
}
for (const dir of VCS_DIRECTORIES_TO_EXCLUDE) {
cmd.push('--glob', `!${dir}`);
}
// Positive pattern first, then sensitive-file exclusions so a broad
// pattern cannot re-include a sensitive path.
cmd.push('--glob', args.pattern);
// The user's positive pattern is NOT passed as --glob. A positive --glob
// "always overrides any other ignore logic" (ripgrep docs), so it would
// re-include files excluded by .gitignore/.ignore/.rgignore. Instead, let
// rg --files enumerate non-ignored files and filter the results
// in-process via matchUserPattern().
Comment thread
morluto marked this conversation as resolved.
for (const glob of SENSITIVE_GLOBS_TO_EXCLUDE) {
cmd.push('--glob', `!${glob}`);
}
Expand All @@ -332,6 +349,63 @@ function buildRgArgs(rgPath: string, args: GlobInput, singleThreaded = false): s
return cmd;
}

function isBroadPattern(pattern: string): boolean {
return pattern === '*' || pattern === '**' || pattern === '**/*';
Comment thread
morluto marked this conversation as resolved.
Outdated
}

/**
* Match a file path against the user's glob pattern, using the same
* semantics ripgrep's `--glob` would have applied:
* - Patterns without `/` match the basename at any depth.
* - Patterns with `/` match the relative path from the search root.
* - `**` matches zero or more directory levels.
* - Dotfiles are matched (rg runs with `--hidden`).
* - Brace expansion (`*.{ts,tsx}`) is handled by picomatch natively.
*/
function matchUserPattern(relPath: string, pattern: string): boolean {
const opts = { dot: true, nocase: true };
Comment thread
morluto marked this conversation as resolved.
Outdated
if (!pattern.includes('/')) {
const basename = relPath.split('/').pop()!;
return picomatch.isMatch(basename, pattern, opts);
Comment thread
morluto marked this conversation as resolved.
Outdated
}
return picomatch.isMatch(relPath, pattern, opts);
}

/**
* Filter absolute paths from `rg --files` against the user's positive glob
* pattern. Broad patterns (star, double-star, star-slash-star) match
* everything, so the filter is skipped for those. Returns the filtered list
* in the same order.
*/
function filterByPattern(absPaths: string[], searchRoot: string, pattern: string): string[] {
if (isBroadPattern(pattern)) return absPaths;
const result: string[] = [];
for (const absPath of absPaths) {
const rel = relative(searchRoot, absPath);
Comment thread
morluto marked this conversation as resolved.
Outdated
if (matchUserPattern(rel, pattern)) result.push(absPath);
}
return result;
}

async function isInsideGitRepo(kaos: Kaos, searchRoot: string): Promise<boolean> {
let current = kaos.normpath(searchRoot);
for (;;) {
if (await pathExists(kaos, join(current, '.git'))) return true;
const parent = dirname(current);
if (parent === current) return false;
current = parent;
}
}

async function pathExists(kaos: Kaos, path: string): Promise<boolean> {
try {
await kaos.stat(path);
return true;
} catch {
return false;
}
}

function formatGlobError(searchRoot: string, stderr: string): string {
const trimmed = stderr.trim();
if (/no such file or directory/i.test(trimmed)) {
Expand Down
132 changes: 118 additions & 14 deletions packages/agent-core/test/tools/glob.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ describe('GlobTool', () => {
additionalDirs: [],
});

const result = await executeTool(tool, context({ pattern: 'src/**/*.ts', path: 'C:\\WORKSPACE' }));
const result = await executeTool(tool, context({ pattern: 'src/**/*.ts', path: 'C:\\workspace' }));

// pathe.normalize renders Windows paths with forward slashes, so the
// relativized result keeps `/` regardless of the backend path class.
Expand All @@ -183,31 +183,37 @@ describe('GlobTool', () => {
expect(result.output).toContain(`[Truncated at ${String(MAX_MATCHES)} matches`);
});

it('passes a brace pattern through to a single rg --glob', async () => {
const exec = execReturning('/workspace/a.ts\n/workspace/shared.ts\n/workspace/shared.tsx\n');
it('filters brace patterns in-process without passing them as a positive --glob', async () => {
const exec = execReturning(
'/workspace/a.ts\n/workspace/shared.ts\n/workspace/shared.tsx\n/workspace/b.js\n',
);
const tool = new GlobTool(kaosWithExec(exec), workspace);

const result = await executeTool(tool, context({ pattern: '*.{ts,tsx}' }));

expect(result.isError).toBeFalsy();
expect(execArgs(exec)).toContain('*.{ts,tsx}');
// The pattern must NOT be passed as a positive --glob — that would
// override ignore-file logic. Filtering happens in-process.
expect(execArgs(exec)).not.toContain('*.{ts,tsx}');
expect(result.output).toContain('a.ts');
expect(result.output).toContain('shared.ts');
expect(result.output).toContain('shared.tsx');
expect(result.output).not.toContain('b.js');
});

it('passes an escaped-brace pattern through unchanged so literal-brace files stay matchable', async () => {
it('matches an escaped-brace pattern in-process so literal-brace files stay matchable', async () => {
// `\{a,b\}.ts` opts out of brace expansion — the user wants a file
// literally named `{a,b}.ts`. The pattern must reach rg with the escapes
// intact (the tool must not strip or reinterpret the backslashes).
const exec = execReturning('/workspace/{a,b}.ts\n');
// literally named `{a,b}.ts`. The pattern is matched in-process, so the
// escapes are handled by picomatch, not rg.
const exec = execReturning('/workspace/{a,b}.ts\n/workspace/other.ts\n');
const tool = new GlobTool(kaosWithExec(exec), workspace);

const result = await executeTool(tool, context({ pattern: '\\{a,b\\}.ts' }));

expect(result.isError).toBeFalsy();
expect(execArgs(exec)).toContain('\\{a,b\\}.ts');
expect(execArgs(exec)).not.toContain('\\{a,b\\}.ts');
expect(result.output).toContain('{a,b}.ts');
expect(result.output).not.toContain('other.ts');
});

it('searches only the current workspace when path is omitted', async () => {
Expand Down Expand Up @@ -250,6 +256,46 @@ describe('GlobTool', () => {
expect(execArgs(exec)).not.toContain('--no-ignore');
});

it('does not emit a positive --glob for broad all-file patterns', async () => {
for (const pattern of ['*', '**', '**/*'] as const) {
const exec = execReturning('/workspace/a.ts\n');
const tool = new GlobTool(kaosWithExec(exec), workspace);

await executeTool(tool, context({ pattern }));

const args = execArgs(exec);
expect(args).not.toContain(pattern);
expect(args).toContain('--glob');
expect(args.some((arg) => arg.startsWith('!'))).toBe(true);
}
});

it('filters anchored patterns in-process without a positive --glob', async () => {
const exec = execReturning('/workspace/src/a.ts\n/workspace/other/b.ts\n');
const tool = new GlobTool(kaosWithExec(exec), workspace);

const result = await executeTool(tool, context({ pattern: 'src/**/*.ts' }));

expect(execArgs(exec)).not.toContain('src/**/*.ts');
expect(result.output).toContain('src/a.ts');
expect(result.output).not.toContain('other/b.ts');
});

it('adds --no-require-git when the search root is outside a git repo', async () => {
const exec = execReturning('/workspace/a.ts\n');
const stat = vi.fn(async (candidate: string) => {
if (candidate.endsWith('/.git')) {
throw Object.assign(new Error('ENOENT: no such file or directory'), { code: 'ENOENT' });
}
return dirStat();
});
const tool = new GlobTool(createFakeKaos({ exec, stat }), workspace);

await executeTool(tool, context({ pattern: '*.ts', path: '/workspace' }));

expect(execArgs(exec)).toContain('--no-require-git');
});

it('caps returned matches and surfaces the truncation header', async () => {
const stdout =
Array.from({ length: MAX_MATCHES + 1 }, (_, i) => `/workspace/${String(i)}.ts`).join('\n') +
Expand Down Expand Up @@ -291,7 +337,7 @@ describe('GlobTool', () => {
});

it('filters sensitive files from results', async () => {
const exec = execReturning('/workspace/.env\n/workspace/src/a.ts\n');
const exec = execReturning('/workspace/src/.env\n/workspace/src/a.ts\n');
const tool = new GlobTool(kaosWithExec(exec), workspace);

const result = await executeTool(tool, context({ pattern: 'src/**' }));
Expand Down Expand Up @@ -358,15 +404,16 @@ describe('GlobTool', () => {
});

it('walks "**/" prefix patterns with a literal anchor', async () => {
const exec = execReturning('/workspace/a.py\n/workspace/sub/b.py\n');
const exec = execReturning('/workspace/a.py\n/workspace/sub/b.py\n/workspace/other.txt\n');
const tool = new GlobTool(kaosWithExec(exec), workspace);

const result = await executeTool(tool, context({ pattern: '**/*.py' }));

expect(result.isError).toBeFalsy();
expect(execArgs(exec)).toContain('**/*.py');
expect(execArgs(exec)).not.toContain('**/*.py');
expect(result.output).toContain('a.py');
expect(result.output).toContain('sub/b.py');
expect(result.output).not.toContain('other.txt');
});

it('walks safe recursive patterns with a literal subdirectory anchor', async () => {
Expand Down Expand Up @@ -456,14 +503,15 @@ describe('GlobTool', () => {
});

it('walks "**/" patterns with literal subdirectory anchors after the prefix', async () => {
const exec = execReturning('/workspace/src/main/app.py\n');
const exec = execReturning('/workspace/src/main/app.py\n/workspace/other/x.py\n');
const tool = new GlobTool(kaosWithExec(exec), workspace);

const result = await executeTool(tool, context({ pattern: '**/main/*.py' }));

expect(result.isError).toBeFalsy();
expect(execArgs(exec)).toContain('**/main/*.py');
expect(execArgs(exec)).not.toContain('**/main/*.py');
expect(result.output).toContain('src/main/app.py');
expect(result.output).not.toContain('other/x.py');
});

it('matches dotfiles like .gitlab-ci.yml under a simple "*.yml" pattern', async () => {
Expand Down Expand Up @@ -727,4 +775,60 @@ describe('GlobTool integration (real ripgrep)', () => {
await fs.rm(externalDir, { recursive: true, force: true });
}
});

it('respects .gitignore by default for broad patterns in a git repo', async () => {
await touch('kept.ts', new Date('2024-01-01T00:00:00Z'));
await touch('ignored.log', new Date('2024-01-01T00:00:00Z'));
await fs.writeFile(path.join(tmpDir!, '.gitignore'), '*.log\n');
await fs.mkdir(path.join(tmpDir!, '.git'), { recursive: true });
const tool = new GlobTool(kaos, ws());

const result = await executeTool(tool, context({ pattern: '*', path: tmpDir! }));

expect(result.output).toContain('kept.ts');
expect(result.output).not.toContain('ignored.log');
});

it('respects .gitignore by default in a non-git directory', async () => {
await touch('kept.ts', new Date('2024-01-01T00:00:00Z'));
await touch('ignored.log', new Date('2024-01-01T00:00:00Z'));
await fs.writeFile(path.join(tmpDir!, '.gitignore'), '*.log\n');
const tool = new GlobTool(kaos, ws());

const result = await executeTool(tool, context({ pattern: '*', path: tmpDir! }));

expect(result.output).toContain('kept.ts');
expect(result.output).not.toContain('ignored.log');
});

it('respects .gitignore for specific patterns that would re-include ignored files', async () => {
// A positive --glob overrides ignore logic in ripgrep, so
// Glob({ pattern: '*.ts' }) in a repo with .gitignore containing
// *.ts would surface ignored.ts. The in-process filter avoids this
// by letting rg --files enumerate non-ignored files first.
await touch('kept.ts', new Date('2024-01-01T00:00:00Z'));
await touch('ignored.ts', new Date('2024-01-01T00:00:00Z'));
await fs.writeFile(path.join(tmpDir!, '.gitignore'), '*.ts\n');
await fs.mkdir(path.join(tmpDir!, '.git'), { recursive: true });
const tool = new GlobTool(kaos, ws());

const result = await executeTool(tool, context({ pattern: '*.ts', path: tmpDir! }));

expect(result.output).toContain('No matches');
expect(result.output).not.toContain('kept.ts');
expect(result.output).not.toContain('ignored.ts');
});

it('respects .gitignore for specific patterns in a non-git directory', async () => {
await touch('kept.ts', new Date('2024-01-01T00:00:00Z'));
await touch('ignored.ts', new Date('2024-01-01T00:00:00Z'));
await fs.writeFile(path.join(tmpDir!, '.gitignore'), '*.ts\n');
const tool = new GlobTool(kaos, ws());

const result = await executeTool(tool, context({ pattern: '*.ts', path: tmpDir! }));

expect(result.output).toContain('No matches');
expect(result.output).not.toContain('kept.ts');
expect(result.output).not.toContain('ignored.ts');
});
});