From 8dce597eba6edc81e86e9d58b65407dfbe6f2861 Mon Sep 17 00:00:00 2001 From: 0xDevNinja Date: Fri, 29 May 2026 23:36:21 +0530 Subject: [PATCH] fix(make-pdf): assign body heading ids so --toc anchors resolve - buildTocBlock minted `toc-N` ids for its `#toc-N` anchors and `data-toc-target` spans, but no body heading ever received those ids, so every TOC link was a dead fragment and Paged.js had no target to count page numbers against - Add annotateHeadingIds: walk H1-H3 in document order, inject `id="toc-N"` on headings that lack one, preserve any heading that already declares an id and point the TOC at it - Share the resolved heading/id list between body and TOC so anchors and targets always agree - 3 regression tests: anchor-resolves-to-heading, existing-id preserved, no id injection when toc is off Fixes #1689. --- make-pdf/src/render.ts | 78 ++++++++++++++++++++++++++---------- make-pdf/test/render.test.ts | 42 +++++++++++++++++++ 2 files changed, 98 insertions(+), 22 deletions(-) diff --git a/make-pdf/src/render.ts b/make-pdf/src/render.ts index ae5228f42d..3057e92541 100644 --- a/make-pdf/src/render.ts +++ b/make-pdf/src/render.ts @@ -106,14 +106,22 @@ export function render(opts: RenderOptions): RenderResult { }) : ""; + // Assign stable ids to body headings so the TOC's `#toc-N` anchors and + // `data-toc-target` spans resolve to a real element. Headings that already + // declare an id keep it; the TOC points at whatever id the heading carries. + // Only worth doing when a TOC is requested (the ids exist solely for it). + const { html: bodyHtml, headings: tocHeadings } = opts.toc + ? annotateHeadingIds(typographicHtml) + : { html: typographicHtml, headings: [] }; + const tocBlock = opts.toc - ? buildTocBlock(typographicHtml) + ? buildTocBlock(tocHeadings) : ""; // Wrap body in .chapter sections at H1 boundaries if chapter breaks are on. const chapterHtml = opts.noChapterBreaks - ? `
${typographicHtml}
` - : wrapChaptersByH1(typographicHtml); + ? `
${bodyHtml}
` + : wrapChaptersByH1(bodyHtml); const watermarkBlock = opts.watermark ? `
${escapeHtml(opts.watermark)}
` @@ -251,23 +259,29 @@ function buildCoverBlock(opts: { ].filter(Boolean).join("\n"); } +interface TocHeading { + level: number; + text: string; + id: string; +} + /** - * Scan HTML for H1/H2/H3 headings and emit a TOC placeholder. - * Page numbers are filled in by Paged.js (when --toc is passed and Paged.js - * polyfill is injected). + * Emit a TOC placeholder from headings that already carry ids (assigned by + * annotateHeadingIds). Each entry's `#id` anchor and `data-toc-target` span + * resolve to the matching body heading. Page numbers are filled in by Paged.js + * (when --toc is passed and the Paged.js polyfill is injected), which needs the + * target heading to exist with the referenced id before it can count pages. */ -function buildTocBlock(html: string): string { - const headings = extractHeadings(html); +function buildTocBlock(headings: TocHeading[]): string { if (headings.length === 0) return ""; - const items = headings.map((h, i) => { + const items = headings.map((h) => { const level = h.level >= 2 ? "level-2" : "level-1"; - const id = `toc-${i}`; return [ `
  • `, - ` ${escapeHtml(h.text)}`, + ` ${escapeHtml(h.text)}`, ` `, - ` `, + ` `, `
  • `, ].join("\n"); }).join("\n"); @@ -282,16 +296,36 @@ function buildTocBlock(html: string): string { ].join("\n"); } -function extractHeadings(html: string): Array<{ level: number; text: string }> { - const re = /<(h[1-3])[^>]*>([\s\S]*?)<\/\1>/gi; - const headings: Array<{ level: number; text: string }> = []; - let match; - while ((match = re.exec(html)) !== null) { - const level = parseInt(match[1].slice(1), 10); - const text = decodeTextEntities(stripTags(match[2]).trim()); - if (text) headings.push({ level, text }); - } - return headings; +/** + * Walk H1-H3 headings in document order, assigning each a stable id the TOC can + * link to. A heading that already declares an `id` keeps it (the TOC points at + * the existing id); a heading with no id gets `id="toc-N"` injected, where N is + * its document-order index. Returns the rewritten HTML plus the heading list + * (level, decoded text, resolved id) for buildTocBlock to consume, so anchors + * and targets are guaranteed to agree. + */ +function annotateHeadingIds(html: string): { html: string; headings: TocHeading[] } { + const headings: TocHeading[] = []; + let i = 0; + const out = html.replace( + /<(h[1-3])([^>]*)>([\s\S]*?)<\/\1>/gi, + (whole, tag: string, attrs: string, inner: string) => { + const level = parseInt(tag.slice(1), 10); + const text = decodeTextEntities(stripTags(inner).trim()); + // Empty headings carry no TOC entry; leave them untouched. + if (!text) return whole; + const idx = i++; + const existing = attrs.match(/\bid\s*=\s*["']([^"']*)["']/i); + if (existing) { + headings.push({ level, text, id: existing[1] }); + return whole; + } + const id = `toc-${idx}`; + headings.push({ level, text, id }); + return `<${tag}${attrs} id="${id}">${inner}`; + }, + ); + return { html: out, headings }; } /** diff --git a/make-pdf/test/render.test.ts b/make-pdf/test/render.test.ts index a61dea5040..1ed75733c0 100644 --- a/make-pdf/test/render.test.ts +++ b/make-pdf/test/render.test.ts @@ -227,6 +227,48 @@ describe("render (end-to-end)", () => { expect(result.html).toContain("Two"); }); + // Issue #1689: every TOC anchor (`#toc-N`) and page-number target + // (`data-toc-target="toc-N"`) must resolve to a body heading that actually + // carries that id. Before the fix, the TOC minted ids no heading ever + // received, so anchors were dead and Paged.js had no target to count pages + // against. + test("TOC anchors resolve to body heading ids (issue #1689)", () => { + const result = render({ + markdown: `# One\n\n## Sub\n\nbody\n\n# Two\n\nbody\n`, + toc: true, + }); + const hrefs = [...result.html.matchAll(/href="#([^"]+)"/g)].map((m) => m[1]); + const targets = [...result.html.matchAll(/data-toc-target="([^"]+)"/g)].map((m) => m[1]); + const headingIds = [...result.html.matchAll(/]*\bid="([^"]+)"/g)].map((m) => m[1]); + + expect(hrefs.length).toBe(3); + expect(targets).toEqual(hrefs); + // Every anchor + target points at a real heading id. + for (const ref of [...hrefs, ...targets]) { + expect(headingIds).toContain(ref); + } + }); + + test("TOC keeps a heading's pre-existing id instead of overwriting it (issue #1689)", () => { + const result = render({ + markdown: `

    Intro

    \n\n# Two\n`, + toc: true, + }); + // The heading's own id is preserved and the TOC links to it. + expect(result.html).toContain(`id="intro"`); + expect(result.html).toContain(`href="#intro"`); + expect(result.html).toContain(`data-toc-target="intro"`); + // The id-less second heading still gets a minted id its entry points at. + const headingIds = [...result.html.matchAll(/]*\bid="([^"]+)"/g)].map((m) => m[1]); + const hrefs = [...result.html.matchAll(/href="#([^"]+)"/g)].map((m) => m[1]); + for (const ref of hrefs) expect(headingIds).toContain(ref); + }); + + test("no toc-id injection when toc is off (issue #1689)", () => { + const result = render({ markdown: `# One\n\n## Sub\n`, toc: false }); + expect(result.html).not.toContain(`id="toc-`); + }); + test("strips dangerous HTML from untrusted markdown", () => { const result = render({ markdown: `# Safe\n\n\n\nBody.`,