Skip to content

Commit

Permalink
Gracefully handle errors during subsetting
Browse files Browse the repository at this point in the history
- This is still not optimal since errors are handled silently and simply
  cause a fallback to not using subsetting
  • Loading branch information
dnlmlr committed Mar 8, 2023
1 parent b32cbac commit 248974c
Show file tree
Hide file tree
Showing 2 changed files with 59 additions and 25 deletions.
53 changes: 39 additions & 14 deletions src/font.rs
Original file line number Diff line number Diff line change
Expand Up @@ -198,11 +198,15 @@ impl ExternalFont {
for (operator, operands) in operations {
match operator.as_str() {
"Tf" => {
let font_name = operands[0].as_name_str().unwrap();
let font_name = operands[0]
.as_name_str()
.expect("PDF Command 'Tf' not followed by Name operand");
font_active = font_name == &self.face_name;
}
"Tj" if font_active => {
let gid_stream = operands[0].as_str().unwrap();
let gid_stream = operands[0]
.as_str()
.expect("PDF Command 'Tj' not followed by String operand");
for b in gid_stream.chunks_exact(2) {
let gid = b[1] as u16 | ((b[0] as u16) << 8);
used_glyphs.insert(gid);
Expand All @@ -211,7 +215,7 @@ impl ExternalFont {
"TJ" if font_active => {
let text_sections = operands[0]
.as_array()
.unwrap()
.expect("PDF Command 'TJ' not followed by Array operand")
.into_iter()
.filter_map(|obj| obj.as_str().ok());

Expand Down Expand Up @@ -250,31 +254,35 @@ impl ExternalFont {
for (operator, operands) in operations {
match operator.as_str() {
"Tf" => {
let font_name = operands[0].as_name_str().unwrap();
let font_name = operands[0]
.as_name_str()
.expect("PDF Command 'Tf' not followed by Name operand");
font_active = font_name == &self.face_name;
}
"Tj" if font_active => {
let gid_stream = operands[0].as_str_mut().unwrap();
let gid_stream = operands[0]
.as_str_mut()
.expect("PDF Command 'Tj' not followed by String operand");
for b in gid_stream.chunks_exact_mut(2) {
let gid = b[1] as u16 | ((b[0] as u16) << 8);

let new_gid = gid_mapping.get(&gid).unwrap();
let new_gid = gid_mapping.get(&gid).unwrap_or(&0);
b[0] = (new_gid >> 8) as u8;
b[1] = (new_gid & 0xff) as u8;
}
}
"TJ" if font_active => {
let text_sections = operands[0]
.as_array_mut()
.unwrap()
.expect("PDF Command 'TJ' not followed by Array operand")
.into_iter()
.filter_map(|obj| obj.as_str_mut().ok());

for gid_stream in text_sections {
for b in gid_stream.chunks_exact_mut(2) {
let gid = b[1] as u16 | ((b[0] as u16) << 8);

let new_gid = gid_mapping.get(&gid).unwrap();
let new_gid = gid_mapping.get(&gid).unwrap_or(&0);
b[0] = (new_gid >> 8) as u8;
b[1] = (new_gid & 0xff) as u8;
}
Expand Down Expand Up @@ -311,12 +319,29 @@ impl ExternalFont {
return None;
}

let font_subset = crate::subsetting::subset(&self.font_bytes, &mut used_glyphs);
self.replace_glyphs(_pages, &font_subset.gid_mapping);
let font: Box<dyn FontData> =
Box::new(TtfFace::from_vec(font_subset.new_font_bytes.clone()).unwrap());
font_data = font;
font_bytes = font_subset.new_font_bytes;
// TODO: This is ugly and will silently fall back to no subsetting on errors
match crate::subsetting::subset(&self.font_bytes, &mut used_glyphs) {
Ok(font_subset) => {
match TtfFace::from_vec(font_subset.new_font_bytes.clone()) {
Ok(face) => {
let font: Box<dyn FontData> = Box::new(face);

self.replace_glyphs(_pages, &font_subset.gid_mapping);

font_data = font;
font_bytes = font_subset.new_font_bytes;
}
Err(_) => {
font_data = self.font_data;
font_bytes = self.font_bytes;
}
}
}
Err(_) => {
font_data = self.font_data;
font_bytes = self.font_bytes;
}
}
} else {
font_data = self.font_data;
font_bytes = self.font_bytes;
Expand Down
31 changes: 20 additions & 11 deletions src/subsetting.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,39 +5,48 @@ use allsorts::{
tables::{cmap::Cmap, FontTableProvider},
tag,
};
use std::collections::{HashMap, HashSet};
use std::{
collections::{HashMap, HashSet},
error::Error,
};

pub(crate) struct FontSubset {
pub(crate) new_font_bytes: Vec<u8>,
/// Mapping from old GIDs (in the original font) to the new GIDs (in the new subset font)
pub(crate) gid_mapping: HashMap<u16, u16>,
}

pub(crate) fn subset(font_bytes: &[u8], used_glyphs: &mut HashSet<u16>) -> FontSubset {
let font_file = ReadScope::new(font_bytes).read::<FontData<'_>>().unwrap();
let provider = font_file.table_provider(0).unwrap();
let cmap_data = provider.read_table_data(tag::CMAP).unwrap();
let cmap = ReadScope::new(&cmap_data).read::<Cmap<'_>>().unwrap();
let (_, cmap_subtable) = read_cmap_subtable(&cmap).unwrap().unwrap();
pub(crate) fn subset(
font_bytes: &[u8],
used_glyphs: &mut HashSet<u16>,
) -> Result<FontSubset, Box<dyn Error>> {
let font_file = ReadScope::new(font_bytes).read::<FontData<'_>>()?;
let provider = font_file.table_provider(0)?;
let cmap_data = provider.read_table_data(tag::CMAP)?;
let cmap = ReadScope::new(&cmap_data).read::<Cmap<'_>>()?;
let (_, cmap_subtable) =
read_cmap_subtable(&cmap)?.ok_or(allsorts::error::ParseError::MissingValue)?;

// Prevent `allsorts` from using MacRoman encoding by using a non supported character
let gid_eur = cmap_subtable.map_glyph('€' as u32).unwrap().unwrap();
let gid_eur = cmap_subtable
.map_glyph('€' as u32)?
.ok_or(allsorts::error::ParseError::MissingValue)?;
used_glyphs.insert(0);
used_glyphs.insert(gid_eur);

let mut glyph_ids: Vec<u16> = used_glyphs.iter().copied().collect();

glyph_ids.sort_unstable();

let new_font_bytes = allsorts::subset::subset(&provider, &glyph_ids).unwrap();
let new_font_bytes = allsorts::subset::subset(&provider, &glyph_ids)?;

let mut gid_mapping = HashMap::new();
for (idx, old_gid) in glyph_ids.into_iter().enumerate() {
gid_mapping.insert(old_gid, idx as u16);
}

FontSubset {
Ok(FontSubset {
new_font_bytes,
gid_mapping,
}
})
}

0 comments on commit 248974c

Please sign in to comment.