diff --git a/crates/krilla-tests/src/main.rs b/crates/krilla-tests/src/main.rs index caa649a3e..f8fa4306f 100644 --- a/crates/krilla-tests/src/main.rs +++ b/crates/krilla-tests/src/main.rs @@ -1085,12 +1085,26 @@ pub fn settings_30() -> SerializeSettings { } } +pub fn settings_31() -> SerializeSettings { + SerializeSettings { + configuration: Configuration::new_with(Validator::UA1, PdfVersion::Pdf14).unwrap(), + ..settings_1() + } +} + pub fn metadata_1() -> Metadata { Metadata::new() .language("en".to_string()) .creation_date(DateTime::new(2001)) } +pub fn metadata_2() -> Metadata { + Metadata::new() + .language("en".to_string()) + .title("Test Document".to_string()) + .creation_date(DateTime::new(2001)) +} + fn main() { panic!("This crate shouldn't be run directly."); } diff --git a/crates/krilla-tests/src/validate.rs b/crates/krilla-tests/src/validate.rs index 8b2ccdea4..7cde69b00 100644 --- a/crates/krilla-tests/src/validate.rs +++ b/crates/krilla-tests/src/validate.rs @@ -1,5 +1,6 @@ use krilla::action::LinkAction; use krilla::annotation::{Annotation, LinkAnnotation, Target}; +use krilla::configure::validate::PdfFeature; use krilla::configure::ValidationError; use krilla::embed::EmbedError; use krilla::error::KrillaError; @@ -17,9 +18,9 @@ use krilla_macros::snapshot; use crate::embed::{embedded_file_impl, file_1}; use crate::{ blue_fill, cmyk_fill, dummy_text_with_spans, green_fill, load_jpg_image, load_png_image, loc, - metadata_1, rect_to_path, red_fill, settings_13, settings_15, settings_19, settings_20, - settings_23, settings_24, settings_7, settings_8, settings_9, stops_with_2_solid_1, - youtube_link, NOTO_SANS, + metadata_1, metadata_2, rect_to_path, red_fill, settings_13, settings_15, settings_19, + settings_20, settings_23, settings_24, settings_31, settings_7, settings_8, settings_9, + stops_with_2_solid_1, youtube_link, NOTO_SANS, }; use crate::{Document, SerializeSettings}; @@ -930,3 +931,117 @@ fn validate_deduplicate_errors() { ])) ); } + +#[test] +fn validate_pdf14_ua1_header_footer_artifact_subtypes() { + let mut document = Document::new_with(settings_31()); + let mut page = document.start_page(); + let mut surface = page.surface(); + + let id = surface.start_tagged(ContentTag::Artifact(ArtifactType::Header)); + surface.set_fill(Some(red_fill(1.0))); + surface.draw_path(&rect_to_path(30.0, 30.0, 70.0, 70.0)); + surface.end_tagged(); + + surface.finish(); + page.finish(); + + let mut tag_tree = TagTree::new(); + tag_tree.push(id); + document.set_tag_tree(tag_tree); + + document.set_metadata(metadata_2()); + + let outline = Outline::new(); + document.set_outline(outline); + + assert_eq!( + document.finish(), + Err(KrillaError::Validation(vec![ + ValidationError::RequiresLaterPdfVersion( + PdfFeature::HeaderFooterArtifactSubtypes, + None + ) + ])) + ); +} + +#[test] +fn validate_pdf14_ua1_structure_order_tabbing() { + let mut document = Document::new_with(settings_31()); + let mut page = document.start_page(); + + let annot = page.add_tagged_annotation(Annotation::new_link( + LinkAnnotation::new( + Rect::from_xywh(50.0, 50.0, 100.0, 100.0).unwrap(), + Target::Action(LinkAction::new("https://www.youtube.com".to_string()).into()), + ), + Some("Link to YouTube".to_string()), + )); + + page.finish(); + + let mut tag_tree = TagTree::new(); + tag_tree.push(annot); + document.set_tag_tree(tag_tree); + + document.set_metadata(metadata_2()); + + let outline = Outline::new(); + document.set_outline(outline); + + assert_eq!( + document.finish(), + Err(KrillaError::Validation(vec![ + ValidationError::RequiresLaterPdfVersion(PdfFeature::StructureOrderTabbing, None) + ])) + ); +} + +#[test] +fn validate_pdf14_ua1_table_header_scope() { + let mut document = Document::new_with(settings_31()); + let mut page = document.start_page(); + let mut surface = page.surface(); + + let font_data = NOTO_SANS.clone(); + let font = Font::new(font_data, 0).unwrap(); + + let text_id = surface.start_tagged(ContentTag::Span(SpanTag::empty())); + surface.draw_text( + Point::from_xy(0.0, 100.0), + font, + 20.0, + "header", + false, + TextDirection::Auto, + ); + surface.end_tagged(); + + surface.finish(); + page.finish(); + + let mut row = TagGroup::new(Tag::TR); + let mut th = TagGroup::new(Tag::TH(TableHeaderScope::Row)); + th.push(text_id); + row.push(th); + + let mut table = TagGroup::new(Tag::Table); + table.push(row); + + let mut tag_tree = TagTree::new(); + tag_tree.push(table); + document.set_tag_tree(tag_tree); + + document.set_metadata(metadata_2()); + + let outline = Outline::new(); + document.set_outline(outline); + + assert_eq!( + document.finish(), + Err(KrillaError::Validation(vec![ + ValidationError::RequiresLaterPdfVersion(PdfFeature::TableHeaderScope, None) + ])) + ); +} diff --git a/crates/krilla/src/configure/validate.rs b/crates/krilla/src/configure/validate.rs index 0baf1443c..8ea72ffd9 100644 --- a/crates/krilla/src/configure/validate.rs +++ b/crates/krilla/src/configure/validate.rs @@ -132,6 +132,30 @@ pub enum ValidationError { /// This is currently forbidden in validated export because we cannot manually verify /// whether the file actually fulfills all the criteria for the export mode. EmbeddedPDF(Option), + /// A feature only available in a later PDF version was required. + RequiresLaterPdfVersion(PdfFeature, Option), +} + +/// Features that may require a later PDF version than the current one. +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum PdfFeature { + /// Tabbing through the document according to the structure order. + StructureOrderTabbing, + /// Header and footer artifact subtypes. + HeaderFooterArtifactSubtypes, + /// Scope attribute for table header cells. + TableHeaderScope, +} + +impl PdfFeature { + /// Get the minimum PDF version required for this feature. + pub fn minimum_pdf_version(&self) -> PdfVersion { + match self { + PdfFeature::StructureOrderTabbing => PdfVersion::Pdf15, + PdfFeature::HeaderFooterArtifactSubtypes => PdfVersion::Pdf17, + PdfFeature::TableHeaderScope => PdfVersion::Pdf15, + } + } } /// A validator for exporting PDF documents to a specific subset of PDF. @@ -342,6 +366,7 @@ impl Validator { ValidationError::MissingTagging => *self == Validator::A1_A, ValidationError::MissingDocumentDate => true, ValidationError::EmbeddedPDF(_) => true, + ValidationError::RequiresLaterPdfVersion(_, _) => false, }, Validator::A2_A | Validator::A2_B | Validator::A2_U => match validation_error { ValidationError::TooLongString => true, @@ -382,6 +407,7 @@ impl Validator { ValidationError::MissingTagging => *self == Validator::A2_A, ValidationError::MissingDocumentDate => true, ValidationError::EmbeddedPDF(_) => true, + ValidationError::RequiresLaterPdfVersion(_, _) => false, }, Validator::A3_A | Validator::A3_B | Validator::A3_U => match validation_error { ValidationError::TooLongString => true, @@ -417,6 +443,7 @@ impl Validator { ValidationError::MissingTagging => *self == Validator::A3_A, ValidationError::MissingDocumentDate => true, ValidationError::EmbeddedPDF(_) => true, + ValidationError::RequiresLaterPdfVersion(_, _) => false, }, Validator::A4 | Validator::A4F | Validator::A4E => match validation_error { ValidationError::TooLongString => false, @@ -458,6 +485,7 @@ impl Validator { ValidationError::MissingTagging => false, ValidationError::MissingDocumentDate => true, ValidationError::EmbeddedPDF(_) => true, + ValidationError::RequiresLaterPdfVersion(_, _) => false, }, Validator::UA1 => match validation_error { ValidationError::TooLongString => false, @@ -493,6 +521,12 @@ impl Validator { ValidationError::MissingTagging => true, ValidationError::MissingDocumentDate => false, ValidationError::EmbeddedPDF(_) => true, + ValidationError::RequiresLaterPdfVersion( + PdfFeature::HeaderFooterArtifactSubtypes + | PdfFeature::StructureOrderTabbing + | PdfFeature::TableHeaderScope, + _, + ) => true, }, } } diff --git a/crates/krilla/src/interchange/tagging/mod.rs b/crates/krilla/src/interchange/tagging/mod.rs index 324b71abd..8aa416478 100644 --- a/crates/krilla/src/interchange/tagging/mod.rs +++ b/crates/krilla/src/interchange/tagging/mod.rs @@ -135,6 +135,7 @@ use pdf_writer::writers::{PropertyList, StructElement}; use pdf_writer::{Chunk, Finish, Name, Ref, Str, TextStr}; use smallvec::SmallVec; +use crate::configure::validate::PdfFeature; use crate::configure::{PdfVersion, ValidationError}; use crate::error::{KrillaError, KrillaResult}; use crate::page::page_root_transform; @@ -224,15 +225,27 @@ impl ContentTag<'_> { ArtifactType::Other => unreachable!(), }; - if sc.serialize_settings().pdf_version() >= PdfVersion::Pdf17 { - if *at == ArtifactType::Header { - artifact.attached([pdf_writer::types::ArtifactAttachment::Top]); - artifact.subtype(ArtifactSubtype::Header); - } + let attachments_and_subtype = match &at { + ArtifactType::Header => Some(( + [pdf_writer::types::ArtifactAttachment::Top], + ArtifactSubtype::Header, + )), + ArtifactType::Footer => Some(( + [pdf_writer::types::ArtifactAttachment::Bottom], + ArtifactSubtype::Footer, + )), + _ => None, + }; - if *at == ArtifactType::Footer { - artifact.attached([pdf_writer::types::ArtifactAttachment::Bottom]); - artifact.subtype(ArtifactSubtype::Footer); + if let Some((attachments, subtype)) = attachments_and_subtype { + if sc.serialize_settings().pdf_version() >= PdfVersion::Pdf17 { + artifact.attached(attachments); + artifact.subtype(subtype); + } else { + sc.register_validation_error(ValidationError::RequiresLaterPdfVersion( + PdfFeature::HeaderFooterArtifactSubtypes, + sc.location, + )); } } @@ -605,6 +618,41 @@ impl Node { }, } } + + // For tables to be valid in PDF versions before 1.5, we need to ensure that + // each table header cell (`TH`) is inside a table header (`THead`). + // Otherwise, the unsupported `Scope` attribute would be required to express + // the correct semantics. + fn validate_table_structure_before_pdf15(&self, in_header: bool, sc: &mut SerializeContext) { + match self { + Node::Group(g) => { + let in_header = in_header || matches!(g.tag, TagKind::THead(_)); + if !in_header && matches!(g.tag, TagKind::TH(_)) { + sc.register_validation_error(ValidationError::RequiresLaterPdfVersion( + PdfFeature::TableHeaderScope, + g.tag.as_any().location, + )); + } + + match g.tag { + TagKind::TBody(_) + | TagKind::TFoot(_) + | TagKind::THead(_) + | TagKind::TR(_) + | TagKind::TD(_) + | TagKind::TH(_) => { + // We want to recurse into table-related tags. + for child in &g.children { + child.validate_table_structure_before_pdf15(in_header, sc); + } + } + // Other tags serve as a boundary for table structure. + _ => {} + } + } + Node::Leaf(_) => {} + } + } } impl From for Node { @@ -719,6 +767,12 @@ impl TagGroup { sc.register_validation_error(ValidationError::MissingAltText(tag.location)); } + if matches!(self.tag, TagKind::Table(_)) && pdf_version < PdfVersion::Pdf15 { + for child in &self.children { + child.validate_table_structure_before_pdf15(false, sc); + } + } + for attr in tag.attrs.iter() { let Attr::Struct(attr) = attr else { continue; @@ -784,6 +838,12 @@ impl TagGroup { TableAttr::HeaderScope(scope) => { if pdf_version >= PdfVersion::Pdf15 { table_attributes.get().scope(scope.to_pdf()); + } else if scope == &TableHeaderScope::Row { + // Without `Scope`, the correct cell to point to is ambiguous. + sc.register_validation_error(ValidationError::RequiresLaterPdfVersion( + PdfFeature::TableHeaderScope, + tag.location, + )); } } TableAttr::CellHeaders(headers) => { diff --git a/crates/krilla/src/page.rs b/crates/krilla/src/page.rs index e04b38f66..8ae9fb204 100644 --- a/crates/krilla/src/page.rs +++ b/crates/krilla/src/page.rs @@ -8,7 +8,8 @@ use pdf_writer::types::TabOrder; use pdf_writer::writers::NumberTree; use pdf_writer::{Chunk, Finish, Ref, TextStr}; -use crate::configure::PdfVersion; +use crate::configure::validate::PdfFeature; +use crate::configure::{PdfVersion, ValidationError}; use crate::content::ContentBuilder; use crate::error::KrillaResult; use crate::geom::{Rect, Size, Transform}; @@ -424,11 +425,15 @@ impl InternalPage { } // Only required for PDF/UA, but might as well always set it. - if !self.annotations.is_empty() - && sc.serialize_settings().enable_tagging - && sc.serialize_settings().pdf_version() >= PdfVersion::Pdf15 - { - page.tab_order(TabOrder::StructureOrder); + if !self.annotations.is_empty() && sc.serialize_settings().enable_tagging { + if sc.serialize_settings().pdf_version() >= PdfVersion::Pdf15 { + page.tab_order(TabOrder::StructureOrder); + } else { + sc.register_validation_error(ValidationError::RequiresLaterPdfVersion( + PdfFeature::StructureOrderTabbing, + sc.location, + )); + } } page.parent(sc.page_tree_ref());