diff --git a/CLAUDE.md b/CLAUDE.md index a6a77ea..45d4d08 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -127,13 +127,15 @@ npm run prettier:write 3. Generator reads YAML configs and Jinja2 templates 4. Transforms content through markdown pipeline 5. Outputs MDX files and JSON configs -6. Generates `public/llms.txt` with all AEP contents for LLM consumption -7. Astro builds static site from generated content +6. Generates `public/llms.txt` with AEP summaries and rules for LLM consumption +7. Generates per-AEP `public/{id}/llms.txt` files with full Markdown content +8. Astro builds static site from generated content ## Generated Files - `src/content/docs/` - Site content (MDX files) - `src/content/docs/aep-2026/` - AEP Edition 2026 content (MDX files, if `AEP_EDITION_2026` is set) - `generated/` - JSON configuration files (sidebar, redirects, etc.) -- `public/llms.txt` - Consolidated AEP content for LLM training/reference +- `public/llms.txt` - Site-wide AEP summaries and rules for LLM consumption +- `public/{id}/llms.txt` - Per-AEP full Markdown content for LLM consumption - `public/json-schema/` - JSON schemas from components repo diff --git a/scripts/generate.ts b/scripts/generate.ts index 70de2b5..c806f40 100644 --- a/scripts/generate.ts +++ b/scripts/generate.ts @@ -274,30 +274,159 @@ function buildRedirects(aeps: AEP[]): object { return Object.fromEntries(aeps.map((aep) => [`/${aep.slug}`, `/${aep.id}`])); } +/** + * Cleans transformed MDX/JSX content back into plain Markdown + * suitable for LLM consumption. + */ +export function cleanContentForLLMs(content: string): string { + let cleaned = content; + + // Remove import statements + cleaned = cleaned.replace(/import\s+.*from\s+['"].*['"];?\n?/g, ""); + + // Replace keyword with **keyword** (restore rule identifiers) + cleaned = cleaned.replace(/(.*?)<\/b>/g, "**$1**"); + + // Replace text with [text](href) + cleaned = cleaned.replace( + /(.*?)<\/AepLink>/g, + "[$2]($1)", + ); + cleaned = cleaned.replace(/(.*?)<\/AepLink>/g, "$1"); + + // Replace