diff --git a/codegen/src/main.rs b/codegen/src/main.rs index 675fa0be5..15ab17dac 100644 --- a/codegen/src/main.rs +++ b/codegen/src/main.rs @@ -325,6 +325,12 @@ fn write_tag_kind(f: &mut impl std::fmt::Write) { } writeln!(f, " {name}(Tag),").ok(); } + writeln!( + f, + " /// An arbitrary custom tag with role mapping to a standard PDF role." + ) + .ok(); + writeln!(f, " Custom(CustomTag),").ok(); writeln!(f, "}}").ok(); writeln!(f).ok(); @@ -336,6 +342,7 @@ fn write_tag_kind(f: &mut impl std::fmt::Write) { for TagVariant { name, .. } in TAG.variants.iter() { writeln!(f, " Self::{name}(tag) => tag.as_any(),").ok(); } + writeln!(f, " Self::Custom(ct) => &ct.inner,").ok(); writeln!(f, " }}").ok(); writeln!(f, " }}").ok(); writeln!(f).ok(); @@ -347,6 +354,7 @@ fn write_tag_kind(f: &mut impl std::fmt::Write) { for TagVariant { name, .. } in TAG.variants.iter() { writeln!(f, " Self::{name}(tag) => tag.as_any_mut(),").ok(); } + writeln!(f, " Self::Custom(ct) => &mut ct.inner,").ok(); writeln!(f, " }}").ok(); writeln!(f, " }}").ok(); diff --git a/crates/krilla/src/interchange/tagging/fmt.rs b/crates/krilla/src/interchange/tagging/fmt.rs index 66c9bdbdc..5ba92e67b 100644 --- a/crates/krilla/src/interchange/tagging/fmt.rs +++ b/crates/krilla/src/interchange/tagging/fmt.rs @@ -158,6 +158,7 @@ impl Output for TagKind { TagKind::Title(_) => write!(f, "Title"), TagKind::Strong(_) => write!(f, "Strong"), TagKind::Em(_) => write!(f, "Em"), + TagKind::Custom(ct) => write!(f, "{}", ct.name()), }?; writeln!(f)?; diff --git a/crates/krilla/src/interchange/tagging/generated.rs b/crates/krilla/src/interchange/tagging/generated.rs index c4c237178..b8d7cb949 100644 --- a/crates/krilla/src/interchange/tagging/generated.rs +++ b/crates/krilla/src/interchange/tagging/generated.rs @@ -132,6 +132,8 @@ pub enum TagKind { Strong(Tag), /// Encloses content that is emphasized, most commonly *italic* text. Em(Tag), + /// An arbitrary custom tag with role mapping to a standard PDF role. + Custom(CustomTag), } impl TagKind { @@ -177,6 +179,7 @@ impl TagKind { Self::Title(tag) => tag.as_any(), Self::Strong(tag) => tag.as_any(), Self::Em(tag) => tag.as_any(), + Self::Custom(ct) => &ct.inner, } } @@ -223,6 +226,7 @@ impl TagKind { Self::Title(tag) => tag.as_any_mut(), Self::Strong(tag) => tag.as_any_mut(), Self::Em(tag) => tag.as_any_mut(), + Self::Custom(ct) => &mut ct.inner, } } diff --git a/crates/krilla/src/interchange/tagging/mod.rs b/crates/krilla/src/interchange/tagging/mod.rs index 9fcba3f87..6f209c1f9 100644 --- a/crates/krilla/src/interchange/tagging/mod.rs +++ b/crates/krilla/src/interchange/tagging/mod.rs @@ -486,6 +486,11 @@ impl TagKind { struct_elem.kind_2(StructRole2::Em, sc.pdf2_ns.ssn_ref); } } + // Arbitrary custom role-mapped tags. + Self::Custom(ct) => { + write_kind_custom(sc, struct_elem, Name(ct.name.as_bytes())); + sc.register_custom_role(&ct.name, ct.maps_to.into()); + } }; } @@ -533,6 +538,7 @@ impl TagKind { Self::Title(_) => PdfVersion::Pdf14, Self::Strong(_) => PdfVersion::Pdf14, Self::Em(_) => PdfVersion::Pdf14, + Self::Custom(_) => PdfVersion::Pdf14, } } diff --git a/crates/krilla/src/interchange/tagging/tag.rs b/crates/krilla/src/interchange/tagging/tag.rs index 7d03ed439..3a5020a20 100644 --- a/crates/krilla/src/interchange/tagging/tag.rs +++ b/crates/krilla/src/interchange/tagging/tag.rs @@ -89,6 +89,202 @@ impl Tag { } } +/// Standard PDF 1.7 structure roles. +#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] +#[allow(missing_docs)] +pub enum StandardRole { + Document, + Part, + Art, + Sect, + Div, + BlockQuote, + Caption, + TOC, + TOCI, + Index, + NonStruct, + Private, + P, + StructuredHeading, + H1, + H2, + H3, + H4, + H5, + H6, + L, + LI, + Lbl, + LBody, + Table, + TR, + TH, + TD, + THead, + TBody, + TFoot, + Span, + Quote, + Note, + Reference, + BibEntry, + Code, + Link, + Annot, + Ruby, + RB, + RT, + RP, + Warichu, + WT, + WP, + Figure, + Formula, + Form, +} + +impl From for pdf_writer::types::StructRole { + fn from(role: StandardRole) -> Self { + match role { + StandardRole::Document => Self::Document, + StandardRole::Part => Self::Part, + StandardRole::Art => Self::Art, + StandardRole::Sect => Self::Sect, + StandardRole::Div => Self::Div, + StandardRole::BlockQuote => Self::BlockQuote, + StandardRole::Caption => Self::Caption, + StandardRole::TOC => Self::TOC, + StandardRole::TOCI => Self::TOCI, + StandardRole::Index => Self::Index, + StandardRole::NonStruct => Self::NonStruct, + StandardRole::Private => Self::Private, + StandardRole::P => Self::P, + StandardRole::StructuredHeading => Self::StructuredHeading, + StandardRole::H1 => Self::H1, + StandardRole::H2 => Self::H2, + StandardRole::H3 => Self::H3, + StandardRole::H4 => Self::H4, + StandardRole::H5 => Self::H5, + StandardRole::H6 => Self::H6, + StandardRole::L => Self::L, + StandardRole::LI => Self::LI, + StandardRole::Lbl => Self::Lbl, + StandardRole::LBody => Self::LBody, + StandardRole::Table => Self::Table, + StandardRole::TR => Self::TR, + StandardRole::TH => Self::TH, + StandardRole::TD => Self::TD, + StandardRole::THead => Self::THead, + StandardRole::TBody => Self::TBody, + StandardRole::TFoot => Self::TFoot, + StandardRole::Span => Self::Span, + StandardRole::Quote => Self::Quote, + StandardRole::Note => Self::Note, + StandardRole::Reference => Self::Reference, + StandardRole::BibEntry => Self::BibEntry, + StandardRole::Code => Self::Code, + StandardRole::Link => Self::Link, + StandardRole::Annot => Self::Annot, + StandardRole::Ruby => Self::Ruby, + StandardRole::RB => Self::RB, + StandardRole::RT => Self::RT, + StandardRole::RP => Self::RP, + StandardRole::Warichu => Self::Warichu, + StandardRole::WT => Self::WT, + StandardRole::WP => Self::WP, + StandardRole::Figure => Self::Figure, + StandardRole::Formula => Self::Formula, + StandardRole::Form => Self::Form, + } + } +} + +/// An arbitrary custom tag with role mapping to a standard PDF role. +/// +/// Custom tags are emitted with a custom `/S` name and registered in the +/// PDF's `/RoleMap` (PDF 1.7) or namespace role map (PDF 2.0), mapping them +/// to a standard structure role. +/// +/// # Example +/// ``` +/// use krilla::tagging::{Tag, StandardRole}; +/// +/// let tag = Tag::custom("Slide", StandardRole::NonStruct) +/// .with_lang(Some("en".to_string())); +/// ``` +#[derive(Clone, Debug, PartialEq)] +pub struct CustomTag { + /// The raw PDF tag name (e.g., "Slide", "Textbox"). + pub(crate) name: String, + /// The standard PDF 1.7 role this maps to. + pub(crate) maps_to: StandardRole, + /// Global attributes (lang, alt, id, etc.) + pub(crate) inner: AnyTag, +} + +impl CustomTag { + /// The raw PDF tag name. + pub fn name(&self) -> &str { + &self.name + } + + /// The standard role this custom tag maps to. + pub fn maps_to(&self) -> StandardRole { + self.maps_to + } + + /// A raw tag, which allows reading all attributes. + pub fn as_any(&self) -> &AnyTag { + &self.inner + } + + /// A raw tag, which allows reading all attributes and additionally writing + /// all global ones. + pub fn as_any_mut(&mut self) -> &mut AnyTag { + &mut self.inner + } + + /// Set the language. + pub fn with_lang(mut self, lang: Option) -> Self { + self.inner.set_lang(lang); + self + } + + /// Set the alt text. + pub fn with_alt_text(mut self, alt_text: Option) -> Self { + self.inner.set_alt_text(alt_text); + self + } + + /// Set the tag id. + pub fn with_id(mut self, id: Option) -> Self { + self.inner.set_id(id); + self + } +} + +impl From for TagKind { + fn from(value: CustomTag) -> Self { + Self::Custom(value) + } +} + +// Constructor for custom tags via `Tag::custom()`. +impl Tag<()> { + /// Create a custom tag with the given name, role-mapped to a standard role. + /// + /// The tag name will be used as-is in the PDF structure tree's `/S` entry, + /// and registered in the `/RoleMap` to map to the given standard role. + pub fn custom(name: impl Into, maps_to: StandardRole) -> CustomTag { + CustomTag { + name: name.into(), + maps_to, + inner: AnyTag::new(), + } + } +} + /// A raw tag, which allows reading all attributes and additionally writing all /// global ones. #[derive(Clone, Debug, PartialEq)] diff --git a/crates/krilla/src/serialize.rs b/crates/krilla/src/serialize.rs index 6853e5f59..2623fd81d 100644 --- a/crates/krilla/src/serialize.rs +++ b/crates/krilla/src/serialize.rs @@ -489,6 +489,14 @@ impl SerializeContext { self.global_objects.named_destinations.insert(nd, dest_ref); } + /// Register a custom role-mapped tag for inclusion in the PDF's RoleMap. + pub(crate) fn register_custom_role(&mut self, name: &str, maps_to: StructRole) { + self.global_objects + .custom_roles + .entry(name.to_string()) + .or_insert(maps_to); + } + pub(crate) fn register_page(&mut self, page: InternalPage) { let ref_ = self.new_ref(); self.page_infos.push(PageInfo::Krilla { @@ -754,6 +762,11 @@ impl SerializeContext { let role2 = StructRole2::Heading(*level); role_map.insert(role2.to_name(&mut [0; 6]), StructRole::P); } + + // Dynamic custom role-mapped tags. + for (name, role) in self.global_objects.custom_roles.iter() { + role_map.insert(Name(name.as_bytes()), *role); + } } else { let mut namespaces = tree.namespaces(); @@ -770,10 +783,19 @@ impl SerializeContext { ns.ns(TextStr("https://github.com/LaurenzV/krilla")); // Custom structure elements. - ns.role_map_ns() + let mut role_map_ns = ns.role_map_ns(); + role_map_ns .to_pdf_2_0(Name(b"Datetime"), StructRole2::Span, self.pdf2_ns.ssn_ref) .to_pdf_2_0(Name(b"Terms"), StructRole2::Part, self.pdf2_ns.ssn_ref); + // Dynamic custom role-mapped tags. + for (name, role) in self.global_objects.custom_roles.iter() { + if let Some(role2) = role.into_pdf_2_0() { + role_map_ns.to_pdf_2_0(Name(name.as_bytes()), role2, self.pdf2_ns.ssn_ref); + } + } + role_map_ns.finish(); + ns.finish(); sub_chunks.push(ns_chunk); } @@ -968,6 +990,8 @@ pub(crate) struct GlobalObjects { pub(crate) embedded_files: MaybeTaken>, /// A list of custom headings numbers used in the document. pub(crate) custom_heading_roles: BTreeSet, + /// Dynamically registered custom role-mapped tags. + pub(crate) custom_roles: BTreeMap, /// The context tracking all of the pdfs and their pages that have been inserted. #[cfg(feature = "pdf")] pub(crate) pdf_ctx: MaybeTaken,