diff --git a/make-pdf/src/render.ts b/make-pdf/src/render.ts
index ae5228f42d..3057e92541 100644
--- a/make-pdf/src/render.ts
+++ b/make-pdf/src/render.ts
@@ -106,14 +106,22 @@ export function render(opts: RenderOptions): RenderResult {
})
: "";
+ // Assign stable ids to body headings so the TOC's `#toc-N` anchors and
+ // `data-toc-target` spans resolve to a real element. Headings that already
+ // declare an id keep it; the TOC points at whatever id the heading carries.
+ // Only worth doing when a TOC is requested (the ids exist solely for it).
+ const { html: bodyHtml, headings: tocHeadings } = opts.toc
+ ? annotateHeadingIds(typographicHtml)
+ : { html: typographicHtml, headings: [] };
+
const tocBlock = opts.toc
- ? buildTocBlock(typographicHtml)
+ ? buildTocBlock(tocHeadings)
: "";
// Wrap body in .chapter sections at H1 boundaries if chapter breaks are on.
const chapterHtml = opts.noChapterBreaks
- ? ``
- : wrapChaptersByH1(typographicHtml);
+ ? ``
+ : wrapChaptersByH1(bodyHtml);
const watermarkBlock = opts.watermark
? `
${escapeHtml(opts.watermark)}
`
@@ -251,23 +259,29 @@ function buildCoverBlock(opts: {
].filter(Boolean).join("\n");
}
+interface TocHeading {
+ level: number;
+ text: string;
+ id: string;
+}
+
/**
- * Scan HTML for H1/H2/H3 headings and emit a TOC placeholder.
- * Page numbers are filled in by Paged.js (when --toc is passed and Paged.js
- * polyfill is injected).
+ * Emit a TOC placeholder from headings that already carry ids (assigned by
+ * annotateHeadingIds). Each entry's `#id` anchor and `data-toc-target` span
+ * resolve to the matching body heading. Page numbers are filled in by Paged.js
+ * (when --toc is passed and the Paged.js polyfill is injected), which needs the
+ * target heading to exist with the referenced id before it can count pages.
*/
-function buildTocBlock(html: string): string {
- const headings = extractHeadings(html);
+function buildTocBlock(headings: TocHeading[]): string {
if (headings.length === 0) return "";
- const items = headings.map((h, i) => {
+ const items = headings.map((h) => {
const level = h.level >= 2 ? "level-2" : "level-1";
- const id = `toc-${i}`;
return [
` `,
- ` ${escapeHtml(h.text)}`,
+ ` ${escapeHtml(h.text)}`,
` `,
- ` `,
+ ` `,
` `,
].join("\n");
}).join("\n");
@@ -282,16 +296,36 @@ function buildTocBlock(html: string): string {
].join("\n");
}
-function extractHeadings(html: string): Array<{ level: number; text: string }> {
- const re = /<(h[1-3])[^>]*>([\s\S]*?)<\/\1>/gi;
- const headings: Array<{ level: number; text: string }> = [];
- let match;
- while ((match = re.exec(html)) !== null) {
- const level = parseInt(match[1].slice(1), 10);
- const text = decodeTextEntities(stripTags(match[2]).trim());
- if (text) headings.push({ level, text });
- }
- return headings;
+/**
+ * Walk H1-H3 headings in document order, assigning each a stable id the TOC can
+ * link to. A heading that already declares an `id` keeps it (the TOC points at
+ * the existing id); a heading with no id gets `id="toc-N"` injected, where N is
+ * its document-order index. Returns the rewritten HTML plus the heading list
+ * (level, decoded text, resolved id) for buildTocBlock to consume, so anchors
+ * and targets are guaranteed to agree.
+ */
+function annotateHeadingIds(html: string): { html: string; headings: TocHeading[] } {
+ const headings: TocHeading[] = [];
+ let i = 0;
+ const out = html.replace(
+ /<(h[1-3])([^>]*)>([\s\S]*?)<\/\1>/gi,
+ (whole, tag: string, attrs: string, inner: string) => {
+ const level = parseInt(tag.slice(1), 10);
+ const text = decodeTextEntities(stripTags(inner).trim());
+ // Empty headings carry no TOC entry; leave them untouched.
+ if (!text) return whole;
+ const idx = i++;
+ const existing = attrs.match(/\bid\s*=\s*["']([^"']*)["']/i);
+ if (existing) {
+ headings.push({ level, text, id: existing[1] });
+ return whole;
+ }
+ const id = `toc-${idx}`;
+ headings.push({ level, text, id });
+ return `<${tag}${attrs} id="${id}">${inner}${tag}>`;
+ },
+ );
+ return { html: out, headings };
}
/**
diff --git a/make-pdf/test/render.test.ts b/make-pdf/test/render.test.ts
index a61dea5040..1ed75733c0 100644
--- a/make-pdf/test/render.test.ts
+++ b/make-pdf/test/render.test.ts
@@ -227,6 +227,48 @@ describe("render (end-to-end)", () => {
expect(result.html).toContain("Two");
});
+ // Issue #1689: every TOC anchor (`#toc-N`) and page-number target
+ // (`data-toc-target="toc-N"`) must resolve to a body heading that actually
+ // carries that id. Before the fix, the TOC minted ids no heading ever
+ // received, so anchors were dead and Paged.js had no target to count pages
+ // against.
+ test("TOC anchors resolve to body heading ids (issue #1689)", () => {
+ const result = render({
+ markdown: `# One\n\n## Sub\n\nbody\n\n# Two\n\nbody\n`,
+ toc: true,
+ });
+ const hrefs = [...result.html.matchAll(/href="#([^"]+)"/g)].map((m) => m[1]);
+ const targets = [...result.html.matchAll(/data-toc-target="([^"]+)"/g)].map((m) => m[1]);
+ const headingIds = [...result.html.matchAll(/]*\bid="([^"]+)"/g)].map((m) => m[1]);
+
+ expect(hrefs.length).toBe(3);
+ expect(targets).toEqual(hrefs);
+ // Every anchor + target points at a real heading id.
+ for (const ref of [...hrefs, ...targets]) {
+ expect(headingIds).toContain(ref);
+ }
+ });
+
+ test("TOC keeps a heading's pre-existing id instead of overwriting it (issue #1689)", () => {
+ const result = render({
+ markdown: `Intro
\n\n# Two\n`,
+ toc: true,
+ });
+ // The heading's own id is preserved and the TOC links to it.
+ expect(result.html).toContain(`id="intro"`);
+ expect(result.html).toContain(`href="#intro"`);
+ expect(result.html).toContain(`data-toc-target="intro"`);
+ // The id-less second heading still gets a minted id its entry points at.
+ const headingIds = [...result.html.matchAll(/]*\bid="([^"]+)"/g)].map((m) => m[1]);
+ const hrefs = [...result.html.matchAll(/href="#([^"]+)"/g)].map((m) => m[1]);
+ for (const ref of hrefs) expect(headingIds).toContain(ref);
+ });
+
+ test("no toc-id injection when toc is off (issue #1689)", () => {
+ const result = render({ markdown: `# One\n\n## Sub\n`, toc: false });
+ expect(result.html).not.toContain(`id="toc-`);
+ });
+
test("strips dangerous HTML from untrusted markdown", () => {
const result = render({
markdown: `# Safe\n\n\n\nBody.`,