-
Notifications
You must be signed in to change notification settings - Fork 39
Check that features required by UA-1 are available in the current PDF version #278
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,5 +1,6 @@ | ||
| use krilla::action::LinkAction; | ||
| use krilla::annotation::{Annotation, LinkAnnotation, Target}; | ||
| use krilla::configure::validate::PdfFeature; | ||
| use krilla::configure::ValidationError; | ||
| use krilla::embed::EmbedError; | ||
| use krilla::error::KrillaError; | ||
|
|
@@ -17,9 +18,9 @@ use krilla_macros::snapshot; | |
| use crate::embed::{embedded_file_impl, file_1}; | ||
| use crate::{ | ||
| blue_fill, cmyk_fill, dummy_text_with_spans, green_fill, load_jpg_image, load_png_image, loc, | ||
| metadata_1, rect_to_path, red_fill, settings_13, settings_15, settings_19, settings_20, | ||
| settings_23, settings_24, settings_7, settings_8, settings_9, stops_with_2_solid_1, | ||
| youtube_link, NOTO_SANS, | ||
| metadata_1, metadata_2, rect_to_path, red_fill, settings_13, settings_15, settings_19, | ||
| settings_20, settings_23, settings_24, settings_31, settings_7, settings_8, settings_9, | ||
| stops_with_2_solid_1, youtube_link, NOTO_SANS, | ||
| }; | ||
| use crate::{Document, SerializeSettings}; | ||
|
|
||
|
|
@@ -930,3 +931,117 @@ fn validate_deduplicate_errors() { | |
| ])) | ||
| ); | ||
| } | ||
|
|
||
| #[test] | ||
| fn validate_pdf14_ua1_header_footer_artifact_subtypes() { | ||
| let mut document = Document::new_with(settings_31()); | ||
| let mut page = document.start_page(); | ||
| let mut surface = page.surface(); | ||
|
|
||
| let id = surface.start_tagged(ContentTag::Artifact(ArtifactType::Header)); | ||
| surface.set_fill(Some(red_fill(1.0))); | ||
| surface.draw_path(&rect_to_path(30.0, 30.0, 70.0, 70.0)); | ||
| surface.end_tagged(); | ||
|
|
||
| surface.finish(); | ||
| page.finish(); | ||
|
|
||
| let mut tag_tree = TagTree::new(); | ||
| tag_tree.push(id); | ||
| document.set_tag_tree(tag_tree); | ||
|
|
||
| document.set_metadata(metadata_2()); | ||
|
|
||
| let outline = Outline::new(); | ||
| document.set_outline(outline); | ||
|
|
||
| assert_eq!( | ||
| document.finish(), | ||
| Err(KrillaError::Validation(vec![ | ||
| ValidationError::RequiresLaterPdfVersion( | ||
| PdfFeature::HeaderFooterArtifactSubtypes, | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe |
||
| None | ||
| ) | ||
| ])) | ||
| ); | ||
| } | ||
|
|
||
| #[test] | ||
| fn validate_pdf14_ua1_structure_order_tabbing() { | ||
| let mut document = Document::new_with(settings_31()); | ||
| let mut page = document.start_page(); | ||
|
|
||
| let annot = page.add_tagged_annotation(Annotation::new_link( | ||
| LinkAnnotation::new( | ||
| Rect::from_xywh(50.0, 50.0, 100.0, 100.0).unwrap(), | ||
| Target::Action(LinkAction::new("https://www.youtube.com".to_string()).into()), | ||
| ), | ||
| Some("Link to YouTube".to_string()), | ||
| )); | ||
|
|
||
| page.finish(); | ||
|
|
||
| let mut tag_tree = TagTree::new(); | ||
| tag_tree.push(annot); | ||
| document.set_tag_tree(tag_tree); | ||
|
|
||
| document.set_metadata(metadata_2()); | ||
|
|
||
| let outline = Outline::new(); | ||
| document.set_outline(outline); | ||
|
|
||
| assert_eq!( | ||
| document.finish(), | ||
| Err(KrillaError::Validation(vec![ | ||
| ValidationError::RequiresLaterPdfVersion(PdfFeature::StructureOrderTabbing, None) | ||
| ])) | ||
| ); | ||
| } | ||
|
|
||
| #[test] | ||
| fn validate_pdf14_ua1_table_header_scope() { | ||
| let mut document = Document::new_with(settings_31()); | ||
| let mut page = document.start_page(); | ||
| let mut surface = page.surface(); | ||
|
|
||
| let font_data = NOTO_SANS.clone(); | ||
| let font = Font::new(font_data, 0).unwrap(); | ||
|
|
||
| let text_id = surface.start_tagged(ContentTag::Span(SpanTag::empty())); | ||
| surface.draw_text( | ||
| Point::from_xy(0.0, 100.0), | ||
| font, | ||
| 20.0, | ||
| "header", | ||
| false, | ||
| TextDirection::Auto, | ||
| ); | ||
| surface.end_tagged(); | ||
|
|
||
| surface.finish(); | ||
| page.finish(); | ||
|
|
||
| let mut row = TagGroup::new(Tag::TR); | ||
| let mut th = TagGroup::new(Tag::TH(TableHeaderScope::Row)); | ||
| th.push(text_id); | ||
| row.push(th); | ||
|
|
||
| let mut table = TagGroup::new(Tag::Table); | ||
| table.push(row); | ||
|
|
||
| let mut tag_tree = TagTree::new(); | ||
| tag_tree.push(table); | ||
| document.set_tag_tree(tag_tree); | ||
|
|
||
| document.set_metadata(metadata_2()); | ||
|
|
||
| let outline = Outline::new(); | ||
| document.set_outline(outline); | ||
|
|
||
| assert_eq!( | ||
| document.finish(), | ||
| Err(KrillaError::Validation(vec![ | ||
| ValidationError::RequiresLaterPdfVersion(PdfFeature::TableHeaderScope, None) | ||
| ])) | ||
| ); | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -132,6 +132,30 @@ pub enum ValidationError { | |
| /// This is currently forbidden in validated export because we cannot manually verify | ||
| /// whether the file actually fulfills all the criteria for the export mode. | ||
| EmbeddedPDF(Option<Location>), | ||
| /// A feature only available in a later PDF version was required. | ||
| RequiresLaterPdfVersion(PdfFeature, Option<Location>), | ||
| } | ||
|
|
||
| /// Features that may require a later PDF version than the current one. | ||
| #[derive(Debug, Clone, PartialEq, Eq, Hash)] | ||
| pub enum PdfFeature { | ||
| /// Tabbing through the document according to the structure order. | ||
| StructureOrderTabbing, | ||
| /// Header and footer artifact subtypes. | ||
| HeaderFooterArtifactSubtypes, | ||
| /// Scope attribute for table header cells. | ||
| TableHeaderScope, | ||
| } | ||
|
|
||
| impl PdfFeature { | ||
| /// Get the minimum PDF version required for this feature. | ||
| pub fn minimum_pdf_version(&self) -> PdfVersion { | ||
| match self { | ||
| PdfFeature::StructureOrderTabbing => PdfVersion::Pdf15, | ||
| PdfFeature::HeaderFooterArtifactSubtypes => PdfVersion::Pdf17, | ||
| PdfFeature::TableHeaderScope => PdfVersion::Pdf15, | ||
| } | ||
| } | ||
| } | ||
|
|
||
| /// A validator for exporting PDF documents to a specific subset of PDF. | ||
|
|
@@ -342,6 +366,7 @@ impl Validator { | |
| ValidationError::MissingTagging => *self == Validator::A1_A, | ||
| ValidationError::MissingDocumentDate => true, | ||
| ValidationError::EmbeddedPDF(_) => true, | ||
| ValidationError::RequiresLaterPdfVersion(_, _) => false, | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we should explicitly spell out the three variants in all of these branches, so in case there is a new variant which requires validation in other standards, we don't overlook it. |
||
| }, | ||
| Validator::A2_A | Validator::A2_B | Validator::A2_U => match validation_error { | ||
| ValidationError::TooLongString => true, | ||
|
|
@@ -382,6 +407,7 @@ impl Validator { | |
| ValidationError::MissingTagging => *self == Validator::A2_A, | ||
| ValidationError::MissingDocumentDate => true, | ||
| ValidationError::EmbeddedPDF(_) => true, | ||
| ValidationError::RequiresLaterPdfVersion(_, _) => false, | ||
| }, | ||
| Validator::A3_A | Validator::A3_B | Validator::A3_U => match validation_error { | ||
| ValidationError::TooLongString => true, | ||
|
|
@@ -417,6 +443,7 @@ impl Validator { | |
| ValidationError::MissingTagging => *self == Validator::A3_A, | ||
| ValidationError::MissingDocumentDate => true, | ||
| ValidationError::EmbeddedPDF(_) => true, | ||
| ValidationError::RequiresLaterPdfVersion(_, _) => false, | ||
| }, | ||
| Validator::A4 | Validator::A4F | Validator::A4E => match validation_error { | ||
| ValidationError::TooLongString => false, | ||
|
|
@@ -458,6 +485,7 @@ impl Validator { | |
| ValidationError::MissingTagging => false, | ||
| ValidationError::MissingDocumentDate => true, | ||
| ValidationError::EmbeddedPDF(_) => true, | ||
| ValidationError::RequiresLaterPdfVersion(_, _) => false, | ||
| }, | ||
| Validator::UA1 => match validation_error { | ||
| ValidationError::TooLongString => false, | ||
|
|
@@ -493,6 +521,12 @@ impl Validator { | |
| ValidationError::MissingTagging => true, | ||
| ValidationError::MissingDocumentDate => false, | ||
| ValidationError::EmbeddedPDF(_) => true, | ||
| ValidationError::RequiresLaterPdfVersion( | ||
| PdfFeature::HeaderFooterArtifactSubtypes | ||
| | PdfFeature::StructureOrderTabbing | ||
| | PdfFeature::TableHeaderScope, | ||
| _, | ||
| ) => true, | ||
| }, | ||
| } | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -135,6 +135,7 @@ use pdf_writer::writers::{PropertyList, StructElement}; | |
| use pdf_writer::{Chunk, Finish, Name, Ref, Str, TextStr}; | ||
| use smallvec::SmallVec; | ||
|
|
||
| use crate::configure::validate::PdfFeature; | ||
| use crate::configure::{PdfVersion, ValidationError}; | ||
| use crate::error::{KrillaError, KrillaResult}; | ||
| use crate::page::page_root_transform; | ||
|
|
@@ -224,15 +225,27 @@ impl ContentTag<'_> { | |
| ArtifactType::Other => unreachable!(), | ||
| }; | ||
|
|
||
| if sc.serialize_settings().pdf_version() >= PdfVersion::Pdf17 { | ||
| if *at == ArtifactType::Header { | ||
| artifact.attached([pdf_writer::types::ArtifactAttachment::Top]); | ||
| artifact.subtype(ArtifactSubtype::Header); | ||
| } | ||
| let attachments_and_subtype = match &at { | ||
| ArtifactType::Header => Some(( | ||
| [pdf_writer::types::ArtifactAttachment::Top], | ||
| ArtifactSubtype::Header, | ||
| )), | ||
| ArtifactType::Footer => Some(( | ||
| [pdf_writer::types::ArtifactAttachment::Bottom], | ||
| ArtifactSubtype::Footer, | ||
| )), | ||
| _ => None, | ||
| }; | ||
|
|
||
| if *at == ArtifactType::Footer { | ||
| artifact.attached([pdf_writer::types::ArtifactAttachment::Bottom]); | ||
| artifact.subtype(ArtifactSubtype::Footer); | ||
| if let Some((attachments, subtype)) = attachments_and_subtype { | ||
| if sc.serialize_settings().pdf_version() >= PdfVersion::Pdf17 { | ||
| artifact.attached(attachments); | ||
| artifact.subtype(subtype); | ||
| } else { | ||
| sc.register_validation_error(ValidationError::RequiresLaterPdfVersion( | ||
| PdfFeature::HeaderFooterArtifactSubtypes, | ||
| sc.location, | ||
| )); | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -605,6 +618,41 @@ impl Node { | |
| }, | ||
| } | ||
| } | ||
|
|
||
| // For tables to be valid in PDF versions before 1.5, we need to ensure that | ||
| // each table header cell (`TH`) is inside a table header (`THead`). | ||
| // Otherwise, the unsupported `Scope` attribute would be required to express | ||
| // the correct semantics. | ||
| fn validate_table_structure_before_pdf15(&self, in_header: bool, sc: &mut SerializeContext) { | ||
|
Comment on lines
+621
to
+626
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm... Can we remove this for now and add a TODO instead? I do agree it would be nice to have tag tree validation, but I want to do it properly so that we can validate other constraints as well (for example the oens outlined in WCAG). Would like to avoid adding a method just for this specific case now in this PR. |
||
| match self { | ||
| Node::Group(g) => { | ||
| let in_header = in_header || matches!(g.tag, TagKind::THead(_)); | ||
| if !in_header && matches!(g.tag, TagKind::TH(_)) { | ||
| sc.register_validation_error(ValidationError::RequiresLaterPdfVersion( | ||
| PdfFeature::TableHeaderScope, | ||
| g.tag.as_any().location, | ||
| )); | ||
| } | ||
|
|
||
| match g.tag { | ||
| TagKind::TBody(_) | ||
| | TagKind::TFoot(_) | ||
| | TagKind::THead(_) | ||
| | TagKind::TR(_) | ||
| | TagKind::TD(_) | ||
| | TagKind::TH(_) => { | ||
| // We want to recurse into table-related tags. | ||
| for child in &g.children { | ||
| child.validate_table_structure_before_pdf15(in_header, sc); | ||
| } | ||
| } | ||
| // Other tags serve as a boundary for table structure. | ||
| _ => {} | ||
| } | ||
| } | ||
| Node::Leaf(_) => {} | ||
| } | ||
| } | ||
| } | ||
|
|
||
| impl From<TagGroup> for Node { | ||
|
|
@@ -719,6 +767,12 @@ impl TagGroup { | |
| sc.register_validation_error(ValidationError::MissingAltText(tag.location)); | ||
| } | ||
|
|
||
| if matches!(self.tag, TagKind::Table(_)) && pdf_version < PdfVersion::Pdf15 { | ||
| for child in &self.children { | ||
| child.validate_table_structure_before_pdf15(false, sc); | ||
| } | ||
|
Comment on lines
+770
to
+773
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same as above. |
||
| } | ||
|
|
||
| for attr in tag.attrs.iter() { | ||
| let Attr::Struct(attr) = attr else { | ||
| continue; | ||
|
|
@@ -784,6 +838,12 @@ impl TagGroup { | |
| TableAttr::HeaderScope(scope) => { | ||
| if pdf_version >= PdfVersion::Pdf15 { | ||
| table_attributes.get().scope(scope.to_pdf()); | ||
| } else if scope == &TableHeaderScope::Row { | ||
| // Without `Scope`, the correct cell to point to is ambiguous. | ||
| sc.register_validation_error(ValidationError::RequiresLaterPdfVersion( | ||
| PdfFeature::TableHeaderScope, | ||
| tag.location, | ||
| )); | ||
| } | ||
| } | ||
| TableAttr::CellHeaders(headers) => { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -8,7 +8,8 @@ use pdf_writer::types::TabOrder; | |
| use pdf_writer::writers::NumberTree; | ||
| use pdf_writer::{Chunk, Finish, Ref, TextStr}; | ||
|
|
||
| use crate::configure::PdfVersion; | ||
| use crate::configure::validate::PdfFeature; | ||
| use crate::configure::{PdfVersion, ValidationError}; | ||
| use crate::content::ContentBuilder; | ||
| use crate::error::KrillaResult; | ||
| use crate::geom::{Rect, Size, Transform}; | ||
|
|
@@ -424,11 +425,15 @@ impl InternalPage { | |
| } | ||
|
|
||
| // Only required for PDF/UA, but might as well always set it. | ||
| if !self.annotations.is_empty() | ||
| && sc.serialize_settings().enable_tagging | ||
| && sc.serialize_settings().pdf_version() >= PdfVersion::Pdf15 | ||
| { | ||
| page.tab_order(TabOrder::StructureOrder); | ||
| if !self.annotations.is_empty() && sc.serialize_settings().enable_tagging { | ||
| if sc.serialize_settings().pdf_version() >= PdfVersion::Pdf15 { | ||
| page.tab_order(TabOrder::StructureOrder); | ||
| } else { | ||
| sc.register_validation_error(ValidationError::RequiresLaterPdfVersion( | ||
| PdfFeature::StructureOrderTabbing, | ||
| sc.location, | ||
| )); | ||
| } | ||
|
Comment on lines
+428
to
+436
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Am I understanding correctly that this will fail any PDF 1.4 export with tagging enabled as soon as there is at least one annotation. Since the above comment suggests it's only needed for PDF/UA, let's only raise this error for that mode? Adding a simple |
||
| } | ||
|
|
||
| page.parent(sc.page_tree_ref()); | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe
RequriesNewerPdfVersionwould be a bit better?