Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 25 additions & 5 deletions crates/krilla-tests/src/tagging.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ use krilla::page::PageSettings;
use krilla::paint::{Fill, Stroke};
use krilla::surface::Surface;
use krilla::tagging::{
ArtifactType, BBox, ColumnDimensions, ContentTag, NaiveRgbColor, Node, Sides, SpanTag,
TagGroup, TagTree,
Artifact, ArtifactType, BBox, ColumnDimensions, ContentTag, NaiveRgbColor, Node, Sides,
SpanTag, TagGroup, TagTree,
};
use krilla::tagging::{ListNumbering, Placement, TableHeaderScope, Tag, TagId, WritingMode};
use krilla::text::{Font, TextDirection};
Expand Down Expand Up @@ -178,7 +178,9 @@ fn tagging_multiple_content_tags(document: &mut Document) {
let id1 = surface.start_tagged(ContentTag::Span(SpanTag::empty()));
surface.fill_text_(25.0, "a span");
surface.end_tagged();
let id2 = surface.start_tagged(ContentTag::Artifact(ArtifactType::Header));
let id2 = surface.start_tagged(ContentTag::Artifact(Artifact::with_kind(
ArtifactType::Header,
)));
surface.fill_text_(50.0, "a header artifact");
surface.end_tagged();
let id3 = surface.start_tagged(ContentTag::Other);
Expand All @@ -204,7 +206,9 @@ fn tagging_multiple_content_tags(document: &mut Document) {
surface.pop();
surface.end_tagged();

let id6 = surface.start_tagged(ContentTag::Artifact(ArtifactType::Other));
let id6 = surface.start_tagged(ContentTag::Artifact(Artifact::with_kind(
ArtifactType::Other,
)));
surface.fill_text_(75.0, "a different type of artifact");
surface.end_tagged();

Expand Down Expand Up @@ -494,7 +498,7 @@ fn tagging_tag_attributes(document: &mut Document) {
let mut page = document.start_page();
let mut surface = page.surface();

let logo = surface.start_tagged(ContentTag::Artifact(ArtifactType::Other));
let logo = surface.start_tagged(ContentTag::Artifact(Artifact::default()));
surface.outline_text_(100.0, "NASA");
surface.end_tagged();

Expand All @@ -511,6 +515,22 @@ fn tagging_tag_attributes(document: &mut Document) {
document.set_tag_tree(tag_tree);
}

#[snapshot(document)]
fn tagging_artifact_subtypes(document: &mut Document) {
let mut page = document.start_page();
let mut surface = page.surface();

surface.start_tagged(ContentTag::Artifact(Artifact::new(
ArtifactType::Watermark,
Some(Rect::from_xywh(1.0, 88.0, 21.0, 10.0).unwrap()),
)));
surface.outline_text_(100.0, "++");
surface.end_tagged();

surface.finish();
page.finish();
}

#[snapshot(document, settings_15)]
fn tagging_figure_bounds(document: &mut Document) {
document.set_metadata(Metadata::new().title("Figure".into()).language("en".into()));
Expand Down
6 changes: 4 additions & 2 deletions crates/krilla-tests/src/validate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ use krilla::num::NormalizedF32;
use krilla::outline::Outline;
use krilla::page::Page;
use krilla::paint::{Fill, FillRule, LinearGradient, SpreadMethod};
use krilla::tagging::{ArtifactType, ContentTag, SpanTag, TagGroup, TagKind, TagTree};
use krilla::tagging::{Artifact, ArtifactType, ContentTag, SpanTag, TagGroup, TagKind, TagTree};
use krilla::tagging::{ListNumbering, TableHeaderScope, Tag};
use krilla::text::{Font, TextDirection};
use krilla::text::{GlyphId, KrillaGlyph};
Expand Down Expand Up @@ -322,7 +322,9 @@ pub(crate) fn validate_pdf_tagged_full_example(document: &mut Document) {
);
surface.end_tagged();

let id2 = surface.start_tagged(ContentTag::Artifact(ArtifactType::Header));
let id2 = surface.start_tagged(ContentTag::Artifact(Artifact::with_kind(
ArtifactType::Header,
)));
surface.set_fill(Some(red_fill(1.0)));
surface.draw_path(&rect_to_path(30.0, 30.0, 70.0, 70.0));
surface.end_tagged();
Expand Down
5 changes: 4 additions & 1 deletion crates/krilla/src/content.rs
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,10 @@ impl ContentBuilder {
properties.pairs([(Name(b"MCID"), mcid)]);
}

tag.write_properties(sc, properties);
// Page height extracted from transform and passed to function to allow
// its dependants to flip the y-axis, mirroring Krilla conventions.
let page_height = self.root_transform.ty();
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this really correct? This function will be called on the last sub_builder, which doesn't necessarily have the page root transform right?
I think to be correct we should use the root_transform of the root_builder inside Surface::start_tagged.
Also instead of extracting the page_height we could just pass through the entire transform.

tag.write_properties(sc, properties, page_height);
}

pub(crate) fn end_marked_content(&mut self) {
Expand Down
160 changes: 132 additions & 28 deletions crates/krilla/src/interchange/tagging/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -131,14 +131,15 @@ use std::collections::btree_map::Entry;
use std::collections::{BTreeMap, HashMap};
use std::io::Write as _;

use pdf_writer::types::{ArtifactSubtype, RoleMapOpts, StructRole, StructRole2};
use pdf_writer::types::{RoleMapOpts, StructRole, StructRole2};
use pdf_writer::writers::{PropertyList, StructElement};
use pdf_writer::{Chunk, Finish, Name, Ref, Str, TextStr};
use smallvec::SmallVec;

use crate::chunk_container::ChunkContainer;
use crate::configure::{PdfVersion, ValidationError};
use crate::error::{KrillaError, KrillaResult};
use crate::geom::Rect;
use crate::page::page_root_transform;
use crate::serialize::SerializeContext;

Expand All @@ -147,26 +148,120 @@ pub use tag::*;
pub mod fmt;
mod tag;

/// An artifact that should not be part of the accessible structure.
#[derive(Copy, Clone, Debug, PartialEq, Default)]
pub struct Artifact {
/// The type of the artifact.
pub kind: ArtifactType,
/// The bounding box that incloses the artifact's visual content. Required
/// for background artifacts.
pub bbox: Option<Rect>,
}

impl Artifact {
/// Create a new artifact with a type and an optional BBox.
///
/// This will panic if the artifact type is `Background` and no bounding box
/// is provided.
pub fn new(kind: ArtifactType, bbox: Option<Rect>) -> Self {
if kind == ArtifactType::Background && bbox.is_none() {
panic!("Background artifacts must have a bounding box");
}

Self { kind, bbox }
}

/// Create a new artifact with a type and no bounding box.
pub fn with_kind(kind: ArtifactType) -> Self {
Self::new(kind, None)
}

/// Whether the artifacts requires a property list.
pub(crate) fn requires_properties(self, pdf_version: PdfVersion) -> bool {
self.bbox.is_some()
|| self
.kind
.map_pdf_version(pdf_version)
.to_pdf_artifact_type()
.is_some()
}
}

/// A type of artifact.
#[derive(Copy, Clone, Debug, PartialEq)]
#[derive(Copy, Clone, Debug, PartialEq, Default)]
pub enum ArtifactType {
/// The header of a page.
Header,
/// The footer of the page.
Footer,
/// For text in the back- or foreground of all pages.
Watermark,
/// Page numbers.
PageNumber,
/// Numbering artifacts before lines.
LineNumber,
/// Areas where there formerly was content, but which has been removed.
Redaction,
/// Bates numbering.
Bates,
/// Other artifacts arising from pagination not covered by the above variants.
PaginationOther,
/// Purely cosmetic typographical or design elements.
Layout,
/// Page artifacts, such as for example cut marks or color bars.
Page,
/// Any other type of artifact (e.g. table strokes).
/// The background of a page or a graphical element.
Background,
/// Any other type of artifact.
#[default]
Other,
}

impl ArtifactType {
pub(crate) fn requires_properties(&self) -> bool {
pub(crate) fn map_pdf_version(self, version: PdfVersion) -> Self {
match self {
Self::PageNumber | Self::LineNumber | Self::Redaction | Self::Bates
if version < PdfVersion::Pdf20 =>
{
ArtifactType::PaginationOther
}
Self::Header | Self::Footer | Self::Watermark if version < PdfVersion::Pdf17 => {
ArtifactType::PaginationOther
}
Self::Background if version < PdfVersion::Pdf17 => ArtifactType::Other,
_ => self,
}
}

pub(crate) fn to_pdf_artifact_type(self) -> Option<pdf_writer::types::ArtifactType> {
match self {
ArtifactType::Header => Some(pdf_writer::types::ArtifactType::Pagination),
ArtifactType::Footer => Some(pdf_writer::types::ArtifactType::Pagination),
ArtifactType::Watermark => Some(pdf_writer::types::ArtifactType::Pagination),
ArtifactType::PageNumber => Some(pdf_writer::types::ArtifactType::Pagination),
ArtifactType::LineNumber => Some(pdf_writer::types::ArtifactType::Pagination),
ArtifactType::Redaction => Some(pdf_writer::types::ArtifactType::Pagination),
ArtifactType::Bates => Some(pdf_writer::types::ArtifactType::Pagination),
ArtifactType::PaginationOther => Some(pdf_writer::types::ArtifactType::Pagination),
ArtifactType::Layout => Some(pdf_writer::types::ArtifactType::Layout),
ArtifactType::Page => Some(pdf_writer::types::ArtifactType::Page),
ArtifactType::Background => Some(pdf_writer::types::ArtifactType::Background),
ArtifactType::Other => None,
}
}

pub(crate) fn to_pdf_artifact_subtype(
self,
) -> Option<pdf_writer::types::ArtifactSubtype<'static>> {
match self {
ArtifactType::Header => true,
ArtifactType::Footer => true,
ArtifactType::Page => true,
ArtifactType::Other => false,
ArtifactType::Header => Some(pdf_writer::types::ArtifactSubtype::Header),
ArtifactType::Footer => Some(pdf_writer::types::ArtifactSubtype::Footer),
ArtifactType::Watermark => Some(pdf_writer::types::ArtifactSubtype::Watermark),
ArtifactType::PageNumber => Some(pdf_writer::types::ArtifactSubtype::PageNumber),
ArtifactType::LineNumber => Some(pdf_writer::types::ArtifactSubtype::LineNumber),
ArtifactType::Redaction => Some(pdf_writer::types::ArtifactSubtype::Redaction),
ArtifactType::Bates => Some(pdf_writer::types::ArtifactSubtype::Bates),
_ => None,
}
}
}
Expand All @@ -189,7 +284,7 @@ pub enum ContentTag<'a> {
/// Artifacts represent pieces of content that are not really part of the logical structure
/// of a document and should be excluded in the logical tree. These include for example headers,
/// footers, page background and similar.
Artifact(ArtifactType),
Artifact(Artifact),
/// A content tag that wraps some text with specific properties.
///
/// Spans should not be too long. At most, they should contain a single line of text, but they
Expand All @@ -211,33 +306,42 @@ impl ContentTag<'_> {
}
}

pub(crate) fn write_properties(&self, sc: &mut SerializeContext, mut properties: PropertyList) {
pub(crate) fn write_properties(
&self,
sc: &mut SerializeContext,
mut properties: PropertyList,
page_height: f32,
) {
match self {
ContentTag::Artifact(at) => {
let mut artifact = properties.artifact();

let artifact_type = match at {
ArtifactType::Header => pdf_writer::types::ArtifactType::Pagination,
ArtifactType::Footer => pdf_writer::types::ArtifactType::Pagination,
ArtifactType::Page => pdf_writer::types::ArtifactType::Page,
// This method should only be called with artifacts that actually
// require a property.
ArtifactType::Other => unreachable!(),
};
ContentTag::Artifact(artifact) => {
let at = artifact
.kind
.map_pdf_version(sc.serialize_settings().pdf_version());
let mut artifact_props = properties.artifact();

if let Some(bbox) = artifact.bbox {
let transform = page_root_transform(page_height);
let actual_rect = bbox.transform(transform).unwrap();
artifact_props.bounding_box(actual_rect.to_pdf_rect());
}

if sc.serialize_settings().pdf_version() >= PdfVersion::Pdf17 {
if *at == ArtifactType::Header {
artifact.attached([pdf_writer::types::ArtifactAttachment::Top]);
artifact.subtype(ArtifactSubtype::Header);
if at == ArtifactType::Header {
artifact_props.attached([pdf_writer::types::ArtifactAttachment::Top]);
}

if *at == ArtifactType::Footer {
artifact.attached([pdf_writer::types::ArtifactAttachment::Bottom]);
artifact.subtype(ArtifactSubtype::Footer);
if at == ArtifactType::Footer {
artifact_props.attached([pdf_writer::types::ArtifactAttachment::Bottom]);
}

if let Some(subtype) = at.to_pdf_artifact_subtype() {
artifact_props.subtype(subtype);
}
}

artifact.kind(artifact_type);
if let Some(artifact_type) = at.to_pdf_artifact_type() {
artifact_props.kind(artifact_type);
}
}
ContentTag::Span(SpanTag {
lang,
Expand Down
4 changes: 2 additions & 2 deletions crates/krilla/src/surface.rs
Original file line number Diff line number Diff line change
Expand Up @@ -180,8 +180,8 @@ impl<'a> Surface<'a> {
// the API of krilla that conflates artifacts with tagged content,
// for the sake of simplicity. But the user of the library does not need to know
// about this.
ContentTag::Artifact(at) => {
if at.requires_properties() {
ContentTag::Artifact(artifact) => {
if artifact.requires_properties(self.sc.serialize_settings().pdf_version()) {
self.bd
.get_mut()
.start_marked_content_with_properties(self.sc, None, tag);
Expand Down
Loading
Loading