Skip to content

Commit

Permalink
adds PDFBox2 thumbnail renderer; deletes JPedal
Browse files Browse the repository at this point in the history
  • Loading branch information
jeremybmerrill committed Apr 14, 2017
1 parent b340017 commit 33c8588
Show file tree
Hide file tree
Showing 4 changed files with 36 additions and 201 deletions.
165 changes: 0 additions & 165 deletions lib/jars/JPedal-LICENSE.txt

This file was deleted.

Binary file removed lib/jars/jpedal_lgpl.jar
Binary file not shown.
2 changes: 1 addition & 1 deletion lib/tabula_job_executor/jobs/generate_thumbnails.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ def perform
output_dir = options[:output_dir]
thumbnail_sizes = options[:thumbnail_sizes]

generator = JPedalThumbnailGenerator.new(filepath, output_dir, thumbnail_sizes)
generator = PDFBox2ThumbnailGenerator.new(filepath, output_dir, thumbnail_sizes)
generator.add_observer(self, :at)
generator.generate_thumbnails!

Expand Down
70 changes: 35 additions & 35 deletions lib/thumbnail_generator.rb
Original file line number Diff line number Diff line change
@@ -1,18 +1,20 @@
require 'java'
require 'observer'

java.lang.System.setProperty('org.jpedal.jai', 'true')
require_relative './jars/jpedal_lgpl.jar'
require_relative '../lib/jars/tabula-1.0.0-SNAPSHOT-jar-with-dependencies.jar'

This comment has been minimized.

Copy link
@jazzido

jazzido Apr 14, 2017

Contributor

I don't think we need this require.

This comment has been minimized.

Copy link
@jeremybmerrill

jeremybmerrill Apr 14, 2017

Author Member

Yes, you're right. I needed it to test the command line thingy, but it belongs in the if FILE = $0 bit at the bottom of the file.


java_import javax.imageio.ImageIO
java_import java.awt.image.BufferedImage
java_import java.awt.Image

java_import org.jpedal.PdfDecoder
java_import org.jpedal.fonts.FontMappings
java_import org.apache.pdfbox.rendering.PDFRenderer
java_import org.apache.pdfbox.pdmodel.PDDocument
java_import java.io.ByteArrayOutputStream


class AbstractThumbnailGenerator
include Observable
SIZE = 800

def initialize(pdf_filename, output_directory, sizes=[2048, 560])
raise Errno::ENOENT unless File.directory?(output_directory)
Expand Down Expand Up @@ -48,41 +50,38 @@ def generate_thumbnails!
end
end

class JPedalThumbnailGenerator < AbstractThumbnailGenerator
class PDFBox2ThumbnailGenerator < AbstractThumbnailGenerator
def initialize(pdf_filename, output_directory, sizes=[2048, 560])
super(pdf_filename, output_directory, sizes)
@decoder = PdfDecoder.new(true)
FontMappings.setFontReplacements
@decoder.openPdfFile(pdf_filename)
@decoder.setExtractionMode(0, 1.0)
@decoder.useHiResScreenDisplay(true)
@pdf_document = PDDocument.load(java.io.File.new(pdf_filename))
end

def generate_thumbnails!
total_pages = @decoder.getPageCount

total_pages.times do |i|

begin
image = @decoder.getPageAsImage(i+1);
image_w, image_h = image.getWidth, image.getHeight

@sizes.each do |s|
scale = s.to_f / image_w.to_f
bi = BufferedImage.new(s, image_h * scale, image.getType)
bi.getGraphics.drawImage(image.getScaledInstance(s, image_h * scale, Image::SCALE_SMOOTH), 0, 0, nil)
ImageIO.write(bi,
'png',
java.io.File.new(File.join(@output_directory,
"document_#{s}_#{i+1}.png")))
changed
notify_observers(i+1, total_pages, "generating page thumbnails...")
end
rescue java.lang.RuntimeException
# TODO What?
end
renderer = PDFRenderer.new(@pdf_document);
total_pages = @pdf_document.get_number_of_pages

total_pages.times do |pi|
image = renderer.render_image_with_dpi(pi, 75);
imageWidth = image.width # was get_width
imageHeight = image.height # was get_height
scale = SIZE / imageWidth.to_f

bi = BufferedImage.new(SIZE, (imageHeight * scale).round, image.type);
bi.get_graphics.draw_image(image.get_scaled_instance(SIZE, (imageHeight * scale).round, Image::SCALE_SMOOTH), 0, 0, nil);

out = ByteArrayOutputStream.new
ImageIO.write(bi, "png", out);

filename = "document_#{SIZE}_#{pi + 1}.png"
ImageIO.write(bi,
'png',
java.io.File.new(File.join(@output_directory,
filename)))
STDERR.puts "Writing page thumbnail #{filename}"
notify_observers(pi+1, total_pages, "generating page thumbnails...")
end
@decoder.closePdfFile

@pdf_document.close();

end
end

Expand All @@ -95,7 +94,8 @@ def update(page, total_pages)
end

#pdftg = JPedalThumbnailGenerator.new(ARGV[0], '/tmp', [560])
pdftg = MUDrawThumbnailGenerator.new(ARGV[0], '/tmp', [560])
# pdftg = MUDrawThumbnailGenerator.new(ARGV[0], '/tmp', [560])
pdftg = PDFBox2ThumbnailGenerator.new(ARGV[0], '/tmp', [560])
pdftg.add_observer(STDERRProgressReporter.new)
pdftg.generate_thumbnails!
end

0 comments on commit 33c8588

Please sign in to comment.