Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 56 additions & 22 deletions make-pdf/src/render.ts
Original file line number Diff line number Diff line change
Expand Up @@ -106,14 +106,22 @@ export function render(opts: RenderOptions): RenderResult {
})
: "";

// Assign stable ids to body headings so the TOC's `#toc-N` anchors and
// `data-toc-target` spans resolve to a real element. Headings that already
// declare an id keep it; the TOC points at whatever id the heading carries.
// Only worth doing when a TOC is requested (the ids exist solely for it).
const { html: bodyHtml, headings: tocHeadings } = opts.toc
? annotateHeadingIds(typographicHtml)
: { html: typographicHtml, headings: [] };

const tocBlock = opts.toc
? buildTocBlock(typographicHtml)
? buildTocBlock(tocHeadings)
: "";

// Wrap body in .chapter sections at H1 boundaries if chapter breaks are on.
const chapterHtml = opts.noChapterBreaks
? `<section class="chapter">${typographicHtml}</section>`
: wrapChaptersByH1(typographicHtml);
? `<section class="chapter">${bodyHtml}</section>`
: wrapChaptersByH1(bodyHtml);

const watermarkBlock = opts.watermark
? `<div class="watermark">${escapeHtml(opts.watermark)}</div>`
Expand Down Expand Up @@ -251,23 +259,29 @@ function buildCoverBlock(opts: {
].filter(Boolean).join("\n");
}

interface TocHeading {
level: number;
text: string;
id: string;
}

/**
* Scan HTML for H1/H2/H3 headings and emit a TOC placeholder.
* Page numbers are filled in by Paged.js (when --toc is passed and Paged.js
* polyfill is injected).
* Emit a TOC placeholder from headings that already carry ids (assigned by
* annotateHeadingIds). Each entry's `#id` anchor and `data-toc-target` span
* resolve to the matching body heading. Page numbers are filled in by Paged.js
* (when --toc is passed and the Paged.js polyfill is injected), which needs the
* target heading to exist with the referenced id before it can count pages.
*/
function buildTocBlock(html: string): string {
const headings = extractHeadings(html);
function buildTocBlock(headings: TocHeading[]): string {
if (headings.length === 0) return "";

const items = headings.map((h, i) => {
const items = headings.map((h) => {
const level = h.level >= 2 ? "level-2" : "level-1";
const id = `toc-${i}`;
return [
` <li class="${level}">`,
` <span class="toc-title"><a href="#${id}">${escapeHtml(h.text)}</a></span>`,
` <span class="toc-title"><a href="#${h.id}">${escapeHtml(h.text)}</a></span>`,
` <span class="toc-dots"></span>`,
` <span class="toc-page" data-toc-target="${id}"></span>`,
` <span class="toc-page" data-toc-target="${h.id}"></span>`,
` </li>`,
].join("\n");
}).join("\n");
Expand All @@ -282,16 +296,36 @@ function buildTocBlock(html: string): string {
].join("\n");
}

function extractHeadings(html: string): Array<{ level: number; text: string }> {
const re = /<(h[1-3])[^>]*>([\s\S]*?)<\/\1>/gi;
const headings: Array<{ level: number; text: string }> = [];
let match;
while ((match = re.exec(html)) !== null) {
const level = parseInt(match[1].slice(1), 10);
const text = decodeTextEntities(stripTags(match[2]).trim());
if (text) headings.push({ level, text });
}
return headings;
/**
* Walk H1-H3 headings in document order, assigning each a stable id the TOC can
* link to. A heading that already declares an `id` keeps it (the TOC points at
* the existing id); a heading with no id gets `id="toc-N"` injected, where N is
* its document-order index. Returns the rewritten HTML plus the heading list
* (level, decoded text, resolved id) for buildTocBlock to consume, so anchors
* and targets are guaranteed to agree.
*/
function annotateHeadingIds(html: string): { html: string; headings: TocHeading[] } {
const headings: TocHeading[] = [];
let i = 0;
const out = html.replace(
/<(h[1-3])([^>]*)>([\s\S]*?)<\/\1>/gi,
(whole, tag: string, attrs: string, inner: string) => {
const level = parseInt(tag.slice(1), 10);
const text = decodeTextEntities(stripTags(inner).trim());
// Empty headings carry no TOC entry; leave them untouched.
if (!text) return whole;
const idx = i++;
const existing = attrs.match(/\bid\s*=\s*["']([^"']*)["']/i);
if (existing) {
headings.push({ level, text, id: existing[1] });
return whole;
}
const id = `toc-${idx}`;
headings.push({ level, text, id });
return `<${tag}${attrs} id="${id}">${inner}</${tag}>`;
},
);
return { html: out, headings };
}

/**
Expand Down
42 changes: 42 additions & 0 deletions make-pdf/test/render.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,48 @@ describe("render (end-to-end)", () => {
expect(result.html).toContain("Two");
});

// Issue #1689: every TOC anchor (`#toc-N`) and page-number target
// (`data-toc-target="toc-N"`) must resolve to a body heading that actually
// carries that id. Before the fix, the TOC minted ids no heading ever
// received, so anchors were dead and Paged.js had no target to count pages
// against.
test("TOC anchors resolve to body heading ids (issue #1689)", () => {
const result = render({
markdown: `# One\n\n## Sub\n\nbody\n\n# Two\n\nbody\n`,
toc: true,
});
const hrefs = [...result.html.matchAll(/href="#([^"]+)"/g)].map((m) => m[1]);
const targets = [...result.html.matchAll(/data-toc-target="([^"]+)"/g)].map((m) => m[1]);
const headingIds = [...result.html.matchAll(/<h[1-3][^>]*\bid="([^"]+)"/g)].map((m) => m[1]);

expect(hrefs.length).toBe(3);
expect(targets).toEqual(hrefs);
// Every anchor + target points at a real heading id.
for (const ref of [...hrefs, ...targets]) {
expect(headingIds).toContain(ref);
}
});

test("TOC keeps a heading's pre-existing id instead of overwriting it (issue #1689)", () => {
const result = render({
markdown: `<h1 id="intro">Intro</h1>\n\n# Two\n`,
toc: true,
});
// The heading's own id is preserved and the TOC links to it.
expect(result.html).toContain(`id="intro"`);
expect(result.html).toContain(`href="#intro"`);
expect(result.html).toContain(`data-toc-target="intro"`);
// The id-less second heading still gets a minted id its entry points at.
const headingIds = [...result.html.matchAll(/<h[1-3][^>]*\bid="([^"]+)"/g)].map((m) => m[1]);
const hrefs = [...result.html.matchAll(/href="#([^"]+)"/g)].map((m) => m[1]);
for (const ref of hrefs) expect(headingIds).toContain(ref);
});

test("no toc-id injection when toc is off (issue #1689)", () => {
const result = render({ markdown: `# One\n\n## Sub\n`, toc: false });
expect(result.html).not.toContain(`id="toc-`);
});

test("strips dangerous HTML from untrusted markdown", () => {
const result = render({
markdown: `# Safe\n\n<script>alert('xss')</script>\n\nBody.`,
Expand Down
Loading