diff --git a/src/codecs/tiff.rs b/src/codecs/tiff.rs index d945619cae..6f5ffbb3cd 100644 --- a/src/codecs/tiff.rs +++ b/src/codecs/tiff.rs @@ -22,6 +22,8 @@ use crate::{utils, ImageDecoder, ImageEncoder, ImageFormat}; const TAG_XML_PACKET: Tag = Tag::Unknown(700); const TAG_YCBCR_COEFFICIENTS: Tag = Tag::Unknown(529); const TAG_YCBCR_SUBSAMPLING: Tag = Tag::Unknown(530); +const TAG_RICHTIFFIPTC: Tag = Tag::Unknown(33723); +const TAG_PHOTOSHOP: Tag = Tag::Unknown(34377); /// Decoder for TIFF images. pub struct TiffDecoder @@ -431,6 +433,43 @@ impl ImageDecoder for TiffDecoder { Ok(()) } + fn iptc_metadata(&mut self) -> ImageResult>> { + let Some(decoder) = &mut self.inner else { + return Ok(None); + }; + + // Try Photoshop tag + if let Ok(data) = decoder.get_tag_u8_vec(TAG_PHOTOSHOP) { + if extract_iptc_from_photoshop_irb(&data).is_some() { + return Ok(Some(data)); + } + } + + // Try RichTIFFIPTC tag + if let Ok(value) = decoder.get_tag(TAG_RICHTIFFIPTC) { + // Standard representation: defined as UNDEFINED or BYTE. + if let Some(vec) = value.clone().into_u8_vec().ok().filter(|v| !v.is_empty()) { + return Ok(Some(vec)); + } + // Fallback: Adobe software sometimes incorrectly writes this as LONG (u32). + // We convert the u32 integers back to raw little-endian bytes to recover the payload. + if let Some(vec) = value + .into_u32_vec() + .ok() + .map(|vec| { + vec.into_iter() + .flat_map(|v| v.to_le_bytes()) + .collect::>() + }) + .filter(|v| !v.is_empty()) + { + return Ok(Some(vec)); + } + } + + Ok(None) + } + fn read_image(mut self, buf: &mut [u8]) -> ImageResult<()> { assert_eq!(u64::try_from(buf.len()), Ok(self.total_bytes())); @@ -692,3 +731,69 @@ impl ImageEncoder for TiffEncoder { Ok(()) } } + +struct IrbReader<'a> { + data: &'a [u8], +} + +impl<'a> IrbReader<'a> { + fn new(data: &'a [u8]) -> Self { + Self { data } + } + + fn read_slice(&mut self, len: usize) -> Option<&'a [u8]> { + if self.data.len() < len { + return None; + } + let (head, tail) = self.data.split_at(len); + self.data = tail; + Some(head) + } + + fn read_u16(&mut self) -> Option { + let bytes = self.read_slice(2)?; + Some(u16::from_be_bytes([bytes[0], bytes[1]])) + } + + fn read_u32(&mut self) -> Option { + let bytes = self.read_slice(4)?; + Some(u32::from_be_bytes([bytes[0], bytes[1], bytes[2], bytes[3]])) + } + + fn skip_padding(&mut self, size: usize) { + if !size.is_multiple_of(2) && !self.data.is_empty() { + self.data = &self.data[1..]; + } + } +} + +fn extract_iptc_from_photoshop_irb(data: &[u8]) -> Option<&[u8]> { + const SIGNATURE: &[u8] = b"8BIM"; + const IPTC_ID: u16 = 0x0404; + const MIN_IRB_BLOCK_SIZE: usize = 12; + + let mut reader = IrbReader::new(data); + + while reader.data.len() >= MIN_IRB_BLOCK_SIZE { + let sig = reader.read_slice(SIGNATURE.len())?; + if sig != SIGNATURE { + break; + } + + let id = reader.read_u16()?; + + let name_len = reader.read_slice(1)?[0] as usize; + reader.read_slice(name_len)?; + reader.skip_padding(1 + name_len); + + let size = reader.read_u32()? as usize; + let block_data = reader.read_slice(size)?; + + if id == IPTC_ID { + return Some(block_data); + } + + reader.skip_padding(size); + } + None +} diff --git a/tests/images/tiff/testsuite/iptc_both_tags_malformed.tiff b/tests/images/tiff/testsuite/iptc_both_tags_malformed.tiff new file mode 100644 index 0000000000..c7a59710a7 Binary files /dev/null and b/tests/images/tiff/testsuite/iptc_both_tags_malformed.tiff differ diff --git a/tests/images/tiff/testsuite/iptc_photoshop.tiff b/tests/images/tiff/testsuite/iptc_photoshop.tiff new file mode 100644 index 0000000000..611decc890 Binary files /dev/null and b/tests/images/tiff/testsuite/iptc_photoshop.tiff differ diff --git a/tests/images/tiff/testsuite/iptc_standard.tiff b/tests/images/tiff/testsuite/iptc_standard.tiff new file mode 100644 index 0000000000..fea1703a24 Binary files /dev/null and b/tests/images/tiff/testsuite/iptc_standard.tiff differ diff --git a/tests/images/tiff/testsuite/iptc_standard_normal.tiff b/tests/images/tiff/testsuite/iptc_standard_normal.tiff new file mode 100644 index 0000000000..a3475c5380 Binary files /dev/null and b/tests/images/tiff/testsuite/iptc_standard_normal.tiff differ diff --git a/tests/metadata.rs b/tests/metadata.rs index 21c623033f..b04233c198 100644 --- a/tests/metadata.rs +++ b/tests/metadata.rs @@ -15,6 +15,7 @@ use image::codecs::tiff::TiffDecoder; #[cfg(feature = "webp")] use image::codecs::webp::WebPDecoder; +#[cfg(feature = "tiff")] extern crate glob; extern crate image; @@ -159,3 +160,84 @@ fn test_read_avif_compatible_brands() -> Result<(), image::ImageError> { Ok(()) } + +#[test] +#[cfg(feature = "tiff")] +fn test_read_iptc_tiff_no_iptc_metadata() -> Result<(), image::ImageError> { + const PATH: &str = "tests/images/tiff/testsuite/ycbcr_lzw_bt709.tif"; + let img_path = PathBuf::from_str(PATH).unwrap(); + + let data = fs::read(img_path)?; + let mut decoder = TiffDecoder::new(std::io::Cursor::new(data))?; + let metadata = decoder.iptc_metadata()?; + assert!(metadata.is_none()); + + Ok(()) +} + +#[test] +#[cfg(feature = "tiff")] +fn test_read_iptc_tiff_both_tags_malformed() -> Result<(), image::ImageError> { + // The testfile contains the photoshop tag with invalid IRB data and RichTIFFIPTC tag with type UNDEFINED but count 0 + const PATH: &str = "tests/images/tiff/testsuite/iptc_both_tags_malformed.tiff"; + let img_path = PathBuf::from_str(PATH).unwrap(); + + let data = fs::read(img_path)?; + let mut decoder = TiffDecoder::new(std::io::Cursor::new(data))?; + let metadata = decoder.iptc_metadata()?; + assert!(metadata.is_none()); + + Ok(()) +} + +#[test] +#[cfg(feature = "tiff")] +fn test_read_iptc_tiff_standard_normal() -> Result<(), image::ImageError> { + const PATH: &str = "tests/images/tiff/testsuite/iptc_standard_normal.tiff"; + let img_path = PathBuf::from_str(PATH).unwrap(); + + let data = fs::read(img_path)?; + let mut decoder = TiffDecoder::new(std::io::Cursor::new(data))?; + let metadata = decoder.iptc_metadata()?; + assert!(metadata.is_some()); + + let expected_iptc_metadata = &[1, 2, 3, 4]; + assert_eq!(expected_iptc_metadata, metadata.unwrap().as_slice()); + + Ok(()) +} + +#[test] +#[cfg(feature = "tiff")] +fn test_read_iptc_tiff_standard() -> Result<(), image::ImageError> { + // The testfile was generated with the RichTIFFIPTC tag stored as LONG instead of UNDEFINED/BYTE to test the fallback recovery logic + const PATH: &str = "tests/images/tiff/testsuite/iptc_standard.tiff"; + let img_path = PathBuf::from_str(PATH).unwrap(); + + let data = fs::read(img_path)?; + let mut decoder = TiffDecoder::new(std::io::Cursor::new(data))?; + let metadata = decoder.iptc_metadata()?; + assert!(metadata.is_some()); + + let expected_iptc_metadata = &[1, 2, 3, 4]; + assert_eq!(expected_iptc_metadata, metadata.unwrap().as_slice()); + + Ok(()) +} + +#[test] +#[cfg(feature = "tiff")] +fn test_read_iptc_tiff_photoshop() -> Result<(), image::ImageError> { + const PATH: &str = "tests/images/tiff/testsuite/iptc_photoshop.tiff"; + let img_path = PathBuf::from_str(PATH).unwrap(); + + let data = fs::read(img_path)?; + let mut decoder = TiffDecoder::new(std::io::Cursor::new(data))?; + let metadata = decoder.iptc_metadata()?; + assert!(metadata.is_some()); + + let expected_iptc_metadata = vec![b'8', b'B', b'I', b'M', 4, 4, 0, 0, 0, 0, 0, 4, 5, 6, 7, 8]; + assert_eq!(expected_iptc_metadata, metadata.unwrap()); + + Ok(()) +}