From 248974c64d2536ffaa71c0e35dc5ac0221459ff2 Mon Sep 17 00:00:00 2001 From: Daniel M Date: Tue, 7 Mar 2023 19:50:46 +0100 Subject: [PATCH] Gracefully handle errors during subsetting - This is still not optimal since errors are handled silently and simply cause a fallback to not using subsetting --- src/font.rs | 53 ++++++++++++++++++++++++++++++++++------------- src/subsetting.rs | 31 +++++++++++++++++---------- 2 files changed, 59 insertions(+), 25 deletions(-) diff --git a/src/font.rs b/src/font.rs index 956c555..596db34 100644 --- a/src/font.rs +++ b/src/font.rs @@ -198,11 +198,15 @@ impl ExternalFont { for (operator, operands) in operations { match operator.as_str() { "Tf" => { - let font_name = operands[0].as_name_str().unwrap(); + let font_name = operands[0] + .as_name_str() + .expect("PDF Command 'Tf' not followed by Name operand"); font_active = font_name == &self.face_name; } "Tj" if font_active => { - let gid_stream = operands[0].as_str().unwrap(); + let gid_stream = operands[0] + .as_str() + .expect("PDF Command 'Tj' not followed by String operand"); for b in gid_stream.chunks_exact(2) { let gid = b[1] as u16 | ((b[0] as u16) << 8); used_glyphs.insert(gid); @@ -211,7 +215,7 @@ impl ExternalFont { "TJ" if font_active => { let text_sections = operands[0] .as_array() - .unwrap() + .expect("PDF Command 'TJ' not followed by Array operand") .into_iter() .filter_map(|obj| obj.as_str().ok()); @@ -250,15 +254,19 @@ impl ExternalFont { for (operator, operands) in operations { match operator.as_str() { "Tf" => { - let font_name = operands[0].as_name_str().unwrap(); + let font_name = operands[0] + .as_name_str() + .expect("PDF Command 'Tf' not followed by Name operand"); font_active = font_name == &self.face_name; } "Tj" if font_active => { - let gid_stream = operands[0].as_str_mut().unwrap(); + let gid_stream = operands[0] + .as_str_mut() + .expect("PDF Command 'Tj' not followed by String operand"); for b in gid_stream.chunks_exact_mut(2) { let gid = b[1] as u16 | ((b[0] as u16) << 8); - let new_gid = gid_mapping.get(&gid).unwrap(); + let new_gid = gid_mapping.get(&gid).unwrap_or(&0); b[0] = (new_gid >> 8) as u8; b[1] = (new_gid & 0xff) as u8; } @@ -266,7 +274,7 @@ impl ExternalFont { "TJ" if font_active => { let text_sections = operands[0] .as_array_mut() - .unwrap() + .expect("PDF Command 'TJ' not followed by Array operand") .into_iter() .filter_map(|obj| obj.as_str_mut().ok()); @@ -274,7 +282,7 @@ impl ExternalFont { for b in gid_stream.chunks_exact_mut(2) { let gid = b[1] as u16 | ((b[0] as u16) << 8); - let new_gid = gid_mapping.get(&gid).unwrap(); + let new_gid = gid_mapping.get(&gid).unwrap_or(&0); b[0] = (new_gid >> 8) as u8; b[1] = (new_gid & 0xff) as u8; } @@ -311,12 +319,29 @@ impl ExternalFont { return None; } - let font_subset = crate::subsetting::subset(&self.font_bytes, &mut used_glyphs); - self.replace_glyphs(_pages, &font_subset.gid_mapping); - let font: Box = - Box::new(TtfFace::from_vec(font_subset.new_font_bytes.clone()).unwrap()); - font_data = font; - font_bytes = font_subset.new_font_bytes; + // TODO: This is ugly and will silently fall back to no subsetting on errors + match crate::subsetting::subset(&self.font_bytes, &mut used_glyphs) { + Ok(font_subset) => { + match TtfFace::from_vec(font_subset.new_font_bytes.clone()) { + Ok(face) => { + let font: Box = Box::new(face); + + self.replace_glyphs(_pages, &font_subset.gid_mapping); + + font_data = font; + font_bytes = font_subset.new_font_bytes; + } + Err(_) => { + font_data = self.font_data; + font_bytes = self.font_bytes; + } + } + } + Err(_) => { + font_data = self.font_data; + font_bytes = self.font_bytes; + } + } } else { font_data = self.font_data; font_bytes = self.font_bytes; diff --git a/src/subsetting.rs b/src/subsetting.rs index cfe09f2..e723846 100644 --- a/src/subsetting.rs +++ b/src/subsetting.rs @@ -5,7 +5,10 @@ use allsorts::{ tables::{cmap::Cmap, FontTableProvider}, tag, }; -use std::collections::{HashMap, HashSet}; +use std::{ + collections::{HashMap, HashSet}, + error::Error, +}; pub(crate) struct FontSubset { pub(crate) new_font_bytes: Vec, @@ -13,15 +16,21 @@ pub(crate) struct FontSubset { pub(crate) gid_mapping: HashMap, } -pub(crate) fn subset(font_bytes: &[u8], used_glyphs: &mut HashSet) -> FontSubset { - let font_file = ReadScope::new(font_bytes).read::>().unwrap(); - let provider = font_file.table_provider(0).unwrap(); - let cmap_data = provider.read_table_data(tag::CMAP).unwrap(); - let cmap = ReadScope::new(&cmap_data).read::>().unwrap(); - let (_, cmap_subtable) = read_cmap_subtable(&cmap).unwrap().unwrap(); +pub(crate) fn subset( + font_bytes: &[u8], + used_glyphs: &mut HashSet, +) -> Result> { + let font_file = ReadScope::new(font_bytes).read::>()?; + let provider = font_file.table_provider(0)?; + let cmap_data = provider.read_table_data(tag::CMAP)?; + let cmap = ReadScope::new(&cmap_data).read::>()?; + let (_, cmap_subtable) = + read_cmap_subtable(&cmap)?.ok_or(allsorts::error::ParseError::MissingValue)?; // Prevent `allsorts` from using MacRoman encoding by using a non supported character - let gid_eur = cmap_subtable.map_glyph('€' as u32).unwrap().unwrap(); + let gid_eur = cmap_subtable + .map_glyph('€' as u32)? + .ok_or(allsorts::error::ParseError::MissingValue)?; used_glyphs.insert(0); used_glyphs.insert(gid_eur); @@ -29,15 +38,15 @@ pub(crate) fn subset(font_bytes: &[u8], used_glyphs: &mut HashSet) -> FontS glyph_ids.sort_unstable(); - let new_font_bytes = allsorts::subset::subset(&provider, &glyph_ids).unwrap(); + let new_font_bytes = allsorts::subset::subset(&provider, &glyph_ids)?; let mut gid_mapping = HashMap::new(); for (idx, old_gid) in glyph_ids.into_iter().enumerate() { gid_mapping.insert(old_gid, idx as u16); } - FontSubset { + Ok(FontSubset { new_font_bytes, gid_mapping, - } + }) }