Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions crates/krilla-tests/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1085,12 +1085,26 @@ pub fn settings_30() -> SerializeSettings {
}
}

pub fn settings_31() -> SerializeSettings {
SerializeSettings {
configuration: Configuration::new_with(Validator::UA1, PdfVersion::Pdf14).unwrap(),
..settings_1()
}
}

pub fn metadata_1() -> Metadata {
Metadata::new()
.language("en".to_string())
.creation_date(DateTime::new(2001))
}

pub fn metadata_2() -> Metadata {
Metadata::new()
.language("en".to_string())
.title("Test Document".to_string())
.creation_date(DateTime::new(2001))
}

fn main() {
panic!("This crate shouldn't be run directly.");
}
121 changes: 118 additions & 3 deletions crates/krilla-tests/src/validate.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use krilla::action::LinkAction;
use krilla::annotation::{Annotation, LinkAnnotation, Target};
use krilla::configure::validate::PdfFeature;
use krilla::configure::ValidationError;
use krilla::embed::EmbedError;
use krilla::error::KrillaError;
Expand All @@ -17,9 +18,9 @@ use krilla_macros::snapshot;
use crate::embed::{embedded_file_impl, file_1};
use crate::{
blue_fill, cmyk_fill, dummy_text_with_spans, green_fill, load_jpg_image, load_png_image, loc,
metadata_1, rect_to_path, red_fill, settings_13, settings_15, settings_19, settings_20,
settings_23, settings_24, settings_7, settings_8, settings_9, stops_with_2_solid_1,
youtube_link, NOTO_SANS,
metadata_1, metadata_2, rect_to_path, red_fill, settings_13, settings_15, settings_19,
settings_20, settings_23, settings_24, settings_31, settings_7, settings_8, settings_9,
stops_with_2_solid_1, youtube_link, NOTO_SANS,
};
use crate::{Document, SerializeSettings};

Expand Down Expand Up @@ -930,3 +931,117 @@ fn validate_deduplicate_errors() {
]))
);
}

#[test]
fn validate_pdf14_ua1_header_footer_artifact_subtypes() {
let mut document = Document::new_with(settings_31());
let mut page = document.start_page();
let mut surface = page.surface();

let id = surface.start_tagged(ContentTag::Artifact(ArtifactType::Header));
surface.set_fill(Some(red_fill(1.0)));
surface.draw_path(&rect_to_path(30.0, 30.0, 70.0, 70.0));
surface.end_tagged();

surface.finish();
page.finish();

let mut tag_tree = TagTree::new();
tag_tree.push(id);
document.set_tag_tree(tag_tree);

document.set_metadata(metadata_2());

let outline = Outline::new();
document.set_outline(outline);

assert_eq!(
document.finish(),
Err(KrillaError::Validation(vec![
ValidationError::RequiresLaterPdfVersion(
Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe RequriesNewerPdfVersion would be a bit better?

PdfFeature::HeaderFooterArtifactSubtypes,
Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe VersionedFeature? PdfFeature seems a bit too generic for me.

None
)
]))
);
}

#[test]
fn validate_pdf14_ua1_structure_order_tabbing() {
let mut document = Document::new_with(settings_31());
let mut page = document.start_page();

let annot = page.add_tagged_annotation(Annotation::new_link(
LinkAnnotation::new(
Rect::from_xywh(50.0, 50.0, 100.0, 100.0).unwrap(),
Target::Action(LinkAction::new("https://www.youtube.com".to_string()).into()),
),
Some("Link to YouTube".to_string()),
));

page.finish();

let mut tag_tree = TagTree::new();
tag_tree.push(annot);
document.set_tag_tree(tag_tree);

document.set_metadata(metadata_2());

let outline = Outline::new();
document.set_outline(outline);

assert_eq!(
document.finish(),
Err(KrillaError::Validation(vec![
ValidationError::RequiresLaterPdfVersion(PdfFeature::StructureOrderTabbing, None)
]))
);
}

#[test]
fn validate_pdf14_ua1_table_header_scope() {
let mut document = Document::new_with(settings_31());
let mut page = document.start_page();
let mut surface = page.surface();

let font_data = NOTO_SANS.clone();
let font = Font::new(font_data, 0).unwrap();

let text_id = surface.start_tagged(ContentTag::Span(SpanTag::empty()));
surface.draw_text(
Point::from_xy(0.0, 100.0),
font,
20.0,
"header",
false,
TextDirection::Auto,
);
surface.end_tagged();

surface.finish();
page.finish();

let mut row = TagGroup::new(Tag::TR);
let mut th = TagGroup::new(Tag::TH(TableHeaderScope::Row));
th.push(text_id);
row.push(th);

let mut table = TagGroup::new(Tag::Table);
table.push(row);

let mut tag_tree = TagTree::new();
tag_tree.push(table);
document.set_tag_tree(tag_tree);

document.set_metadata(metadata_2());

let outline = Outline::new();
document.set_outline(outline);

assert_eq!(
document.finish(),
Err(KrillaError::Validation(vec![
ValidationError::RequiresLaterPdfVersion(PdfFeature::TableHeaderScope, None)
]))
);
}
34 changes: 34 additions & 0 deletions crates/krilla/src/configure/validate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,30 @@ pub enum ValidationError {
/// This is currently forbidden in validated export because we cannot manually verify
/// whether the file actually fulfills all the criteria for the export mode.
EmbeddedPDF(Option<Location>),
/// A feature only available in a later PDF version was required.
RequiresLaterPdfVersion(PdfFeature, Option<Location>),
}

/// Features that may require a later PDF version than the current one.
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum PdfFeature {
/// Tabbing through the document according to the structure order.
StructureOrderTabbing,
/// Header and footer artifact subtypes.
HeaderFooterArtifactSubtypes,
/// Scope attribute for table header cells.
TableHeaderScope,
}

impl PdfFeature {
/// Get the minimum PDF version required for this feature.
pub fn minimum_pdf_version(&self) -> PdfVersion {
match self {
PdfFeature::StructureOrderTabbing => PdfVersion::Pdf15,
PdfFeature::HeaderFooterArtifactSubtypes => PdfVersion::Pdf17,
PdfFeature::TableHeaderScope => PdfVersion::Pdf15,
}
}
}

/// A validator for exporting PDF documents to a specific subset of PDF.
Expand Down Expand Up @@ -342,6 +366,7 @@ impl Validator {
ValidationError::MissingTagging => *self == Validator::A1_A,
ValidationError::MissingDocumentDate => true,
ValidationError::EmbeddedPDF(_) => true,
ValidationError::RequiresLaterPdfVersion(_, _) => false,
Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we should explicitly spell out the three variants in all of these branches, so in case there is a new variant which requires validation in other standards, we don't overlook it.

},
Validator::A2_A | Validator::A2_B | Validator::A2_U => match validation_error {
ValidationError::TooLongString => true,
Expand Down Expand Up @@ -382,6 +407,7 @@ impl Validator {
ValidationError::MissingTagging => *self == Validator::A2_A,
ValidationError::MissingDocumentDate => true,
ValidationError::EmbeddedPDF(_) => true,
ValidationError::RequiresLaterPdfVersion(_, _) => false,
},
Validator::A3_A | Validator::A3_B | Validator::A3_U => match validation_error {
ValidationError::TooLongString => true,
Expand Down Expand Up @@ -417,6 +443,7 @@ impl Validator {
ValidationError::MissingTagging => *self == Validator::A3_A,
ValidationError::MissingDocumentDate => true,
ValidationError::EmbeddedPDF(_) => true,
ValidationError::RequiresLaterPdfVersion(_, _) => false,
},
Validator::A4 | Validator::A4F | Validator::A4E => match validation_error {
ValidationError::TooLongString => false,
Expand Down Expand Up @@ -458,6 +485,7 @@ impl Validator {
ValidationError::MissingTagging => false,
ValidationError::MissingDocumentDate => true,
ValidationError::EmbeddedPDF(_) => true,
ValidationError::RequiresLaterPdfVersion(_, _) => false,
},
Validator::UA1 => match validation_error {
ValidationError::TooLongString => false,
Expand Down Expand Up @@ -493,6 +521,12 @@ impl Validator {
ValidationError::MissingTagging => true,
ValidationError::MissingDocumentDate => false,
ValidationError::EmbeddedPDF(_) => true,
ValidationError::RequiresLaterPdfVersion(
PdfFeature::HeaderFooterArtifactSubtypes
| PdfFeature::StructureOrderTabbing
| PdfFeature::TableHeaderScope,
_,
) => true,
},
}
}
Expand Down
76 changes: 68 additions & 8 deletions crates/krilla/src/interchange/tagging/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ use pdf_writer::writers::{PropertyList, StructElement};
use pdf_writer::{Chunk, Finish, Name, Ref, Str, TextStr};
use smallvec::SmallVec;

use crate::configure::validate::PdfFeature;
use crate::configure::{PdfVersion, ValidationError};
use crate::error::{KrillaError, KrillaResult};
use crate::page::page_root_transform;
Expand Down Expand Up @@ -224,15 +225,27 @@ impl ContentTag<'_> {
ArtifactType::Other => unreachable!(),
};

if sc.serialize_settings().pdf_version() >= PdfVersion::Pdf17 {
if *at == ArtifactType::Header {
artifact.attached([pdf_writer::types::ArtifactAttachment::Top]);
artifact.subtype(ArtifactSubtype::Header);
}
let attachments_and_subtype = match &at {
ArtifactType::Header => Some((
[pdf_writer::types::ArtifactAttachment::Top],
ArtifactSubtype::Header,
)),
ArtifactType::Footer => Some((
[pdf_writer::types::ArtifactAttachment::Bottom],
ArtifactSubtype::Footer,
)),
_ => None,
};

if *at == ArtifactType::Footer {
artifact.attached([pdf_writer::types::ArtifactAttachment::Bottom]);
artifact.subtype(ArtifactSubtype::Footer);
if let Some((attachments, subtype)) = attachments_and_subtype {
if sc.serialize_settings().pdf_version() >= PdfVersion::Pdf17 {
artifact.attached(attachments);
artifact.subtype(subtype);
} else {
sc.register_validation_error(ValidationError::RequiresLaterPdfVersion(
PdfFeature::HeaderFooterArtifactSubtypes,
sc.location,
));
}
}

Expand Down Expand Up @@ -605,6 +618,41 @@ impl Node {
},
}
}

// For tables to be valid in PDF versions before 1.5, we need to ensure that
// each table header cell (`TH`) is inside a table header (`THead`).
// Otherwise, the unsupported `Scope` attribute would be required to express
// the correct semantics.
fn validate_table_structure_before_pdf15(&self, in_header: bool, sc: &mut SerializeContext) {
Comment on lines +621 to +626
Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm... Can we remove this for now and add a TODO instead? I do agree it would be nice to have tag tree validation, but I want to do it properly so that we can validate other constraints as well (for example the oens outlined in WCAG). Would like to avoid adding a method just for this specific case now in this PR.

match self {
Node::Group(g) => {
let in_header = in_header || matches!(g.tag, TagKind::THead(_));
if !in_header && matches!(g.tag, TagKind::TH(_)) {
sc.register_validation_error(ValidationError::RequiresLaterPdfVersion(
PdfFeature::TableHeaderScope,
g.tag.as_any().location,
));
}

match g.tag {
TagKind::TBody(_)
| TagKind::TFoot(_)
| TagKind::THead(_)
| TagKind::TR(_)
| TagKind::TD(_)
| TagKind::TH(_) => {
// We want to recurse into table-related tags.
for child in &g.children {
child.validate_table_structure_before_pdf15(in_header, sc);
}
}
// Other tags serve as a boundary for table structure.
_ => {}
}
}
Node::Leaf(_) => {}
}
}
}

impl From<TagGroup> for Node {
Expand Down Expand Up @@ -719,6 +767,12 @@ impl TagGroup {
sc.register_validation_error(ValidationError::MissingAltText(tag.location));
}

if matches!(self.tag, TagKind::Table(_)) && pdf_version < PdfVersion::Pdf15 {
for child in &self.children {
child.validate_table_structure_before_pdf15(false, sc);
}
Comment on lines +770 to +773
Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same as above.

}

for attr in tag.attrs.iter() {
let Attr::Struct(attr) = attr else {
continue;
Expand Down Expand Up @@ -784,6 +838,12 @@ impl TagGroup {
TableAttr::HeaderScope(scope) => {
if pdf_version >= PdfVersion::Pdf15 {
table_attributes.get().scope(scope.to_pdf());
} else if scope == &TableHeaderScope::Row {
// Without `Scope`, the correct cell to point to is ambiguous.
sc.register_validation_error(ValidationError::RequiresLaterPdfVersion(
PdfFeature::TableHeaderScope,
tag.location,
));
}
}
TableAttr::CellHeaders(headers) => {
Expand Down
17 changes: 11 additions & 6 deletions crates/krilla/src/page.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ use pdf_writer::types::TabOrder;
use pdf_writer::writers::NumberTree;
use pdf_writer::{Chunk, Finish, Ref, TextStr};

use crate::configure::PdfVersion;
use crate::configure::validate::PdfFeature;
use crate::configure::{PdfVersion, ValidationError};
use crate::content::ContentBuilder;
use crate::error::KrillaResult;
use crate::geom::{Rect, Size, Transform};
Expand Down Expand Up @@ -424,11 +425,15 @@ impl InternalPage {
}

// Only required for PDF/UA, but might as well always set it.
if !self.annotations.is_empty()
&& sc.serialize_settings().enable_tagging
&& sc.serialize_settings().pdf_version() >= PdfVersion::Pdf15
{
page.tab_order(TabOrder::StructureOrder);
if !self.annotations.is_empty() && sc.serialize_settings().enable_tagging {
if sc.serialize_settings().pdf_version() >= PdfVersion::Pdf15 {
page.tab_order(TabOrder::StructureOrder);
} else {
sc.register_validation_error(ValidationError::RequiresLaterPdfVersion(
PdfFeature::StructureOrderTabbing,
sc.location,
));
}
Comment on lines +428 to +436
Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Am I understanding correctly that this will fail any PDF 1.4 export with tagging enabled as soon as there is at least one annotation. Since the above comment suggests it's only needed for PDF/UA, let's only raise this error for that mode? Adding a simple require_structure_order_tabbing method to Validators should be enough. Let's also add a PDF 1.4 test with tagging and an annotation just to ensure that export succeeds (not a snapshot test, just check that finish.is_ok) so that this regression is covered.

}

page.parent(sc.page_tree_ref());
Expand Down