diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..079741f
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,62 @@
+# Set the default behavior, in case people don't have core.autocrlf set.
+* text=auto
+
+# Explicitly declare text files you want to always be normalized and converted
+# to LF line endings on checkout.
+*.afm text eol=lf
+*.cmap text eol=lf
+*.cs text eol=lf ident
+*.css text eol=lf
+*.htm text eol=lf
+*.html text eol=lf
+*.java text eol=lf ident
+*.lng text eol=lf
+*.md text eol=lf
+*.pom text eol=lf
+*.properties text eol=lf
+*.svg text eol=lf
+*.txt text eol=lf
+*.xfdf text eol=lf
+*.xht text eol=lf
+*.xhtml text eol=lf
+*.xml text eol=lf
+port-hash text eol=lf
+
+# Declare files that will always have CRLF line endings on checkout.
+*.bat text eol=crlf
+*.csproj text eol=crlf
+*.sln text eol=crlf
+
+# Denote all files that are truly binary and should not be modified.
+*.aif binary
+*.aiff binary
+*.bmp binary
+*.cer binary
+*.cmp binary
+*.crt binary
+*.dib binary
+*.gif binary
+*.icc binary
+*.j2k binary
+*.jb2 binary
+*.jp2 binary
+*.jpc binary
+*.jpg binary
+*.key binary
+*.otf binary
+*.p12 binary
+*.pdf binary
+*.pfb binary
+*.pfm binary
+*.png binary
+*.snd binary
+*.tif binary
+*.tiff binary
+*.ttc binary
+*.ttf binary
+*.u3d binary
+*.wav binary
+*.wmf binary
+*.woff binary
+*.woff2 binary
+*.dat binary
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..3352d5a
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,157 @@
+# Created by https://www.gitignore.io
+
+### Java ###
+*.class
+
+# Mobile Tools for Java (J2ME)
+.mtj.tmp/
+
+# Package Files #
+*.jar
+*.war
+*.ear
+
+# virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
+hs_err_pid*
+
+
+### Eclipse ###
+*.pydevproject
+.metadata
+.gradle
+bin/
+tmp/
+*.tmp
+*.bak
+*.swp
+*~.nib
+local.properties
+.settings/
+.loadpath
+
+# Eclipse Core
+.project
+
+# External tool builders
+.externalToolBuilders/
+
+# Locally stored "Eclipse launch configurations"
+*.launch
+
+# CDT-specific
+.cproject
+
+# JDT-specific (Eclipse Java Development Tools)
+.classpath
+
+# PDT-specific
+.buildpath
+
+# sbteclipse plugin
+.target
+
+# TeXlipse plugin
+.texlipse
+
+
+### Intellij ###
+# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm
+
+*.iml
+
+## Directory-based project format:
+.idea/
+# if you remove the above rule, at least ignore the following:
+
+# User-specific stuff:
+# .idea/workspace.xml
+# .idea/tasks.xml
+# .idea/dictionaries
+
+# Sensitive or high-churn files:
+# .idea/dataSources.ids
+# .idea/dataSources.xml
+# .idea/sqlDataSources.xml
+# .idea/dynamic.xml
+# .idea/uiDesigner.xml
+
+# Gradle:
+# .idea/gradle.xml
+# .idea/libraries
+
+# Mongo Explorer plugin:
+# .idea/mongoSettings.xml
+
+## File-based project format:
+*.ipr
+*.iws
+
+## Plugin-specific files:
+
+# IntelliJ
+out/
+
+# mpeltonen/sbt-idea plugin
+.idea_modules/
+
+# JIRA plugin
+atlassian-ide-plugin.xml
+
+# Crashlytics plugin (for Android Studio and IntelliJ)
+com_crashlytics_export_strings.xml
+crashlytics.properties
+crashlytics-build.properties
+
+
+### NetBeans ###
+nbproject/private/
+build/
+nbbuild/
+dist/
+nbdist/
+nbactions.xml
+nb-configuration.xml
+.nb-gradle/
+
+
+### Linux ###
+*~
+
+# KDE directory preferences
+.directory
+
+# Linux trash folder which might appear on any partition or disk
+.Trash-*
+
+
+### Windows ###
+# Windows image file caches
+Thumbs.db
+ehthumbs.db
+
+# Folder config file
+Desktop.ini
+
+# Recycle Bin used on file shares
+$RECYCLE.BIN/
+
+# Windows Installer files
+*.cab
+*.msi
+*.msm
+*.msp
+
+# Windows shortcuts
+*.lnk
+
+target/
+nbactions*.xml
+.checkstyle
+.pmd
+.pmdruleset.xml
+
+# Ignore generated files
+*.log
+
+.vagrant/
+.vscode/
diff --git a/.mailmap b/.mailmap
new file mode 100644
index 0000000..df7e385
--- /dev/null
+++ b/.mailmap
@@ -0,0 +1,79 @@
+Alan Goo <alangoo@hancom.com>                                 <behumble@hanjava.net>
+Alexander Chingarev <alexander.chingarev@duallab.com>         <alexander.chingarev@itextpdf.com>
+Alexander Chingarev <alexander.chingarev@svaps.com>           <alexander.chingarev@gmail.com>
+Alexander Chingarev <alexander.chingarev@svaps.com>           <alexander.chingarev@svaps.com>
+Alexey Subach <alexey.subach@duallab.com>                     <alexey.subach@gmail.com>
+Alexey Subach <alexey.subach@duallab.com>                     <alexey.subach@itextpdf.com>
+Amedee Van Gasse <amedee.vangasse@itextpdf.com>               <amedee-github@amedee.be>
+Amedee Van Gasse <amedee.vangasse@itextpdf.com>               <amedee@vangasse.eu>
+Andrew Panfilov <andrew@panfilov.tel>                         <andrew@panfilov.tel>
+Bart De Meyer <bart@bartdemeyer.be>                           <bart@itextpdf.com>
+Benoît Lagae <benoit.lagae@itextpdf.com>                      <benoit@iText-blagae>
+Benoît Lagae <benoit.lagae@itextpdf.com>                      <benoit.lagae@itextpdf.com>
+Benoît Lagae <benoit.lagae@itextpdf.com>                      <benoit.lagae.s6547@student.hogent.be>
+Bruno Lowagie <bruno.lowagie@itextpdf.com>                    <bruno_000@192.168.1.3>
+Bruno Lowagie <bruno.lowagie@itextpdf.com>                    <bruno.lowagie@itextpdf.com>
+Bruno Lowagie <bruno.lowagie@itextpdf.com>                    <iText@Catullus>
+Bryan <bryan@hancom.com>                                      <bryan@192.168.152.124>
+Dimitry Alexandrov <dimzon541@gmail.com>                      <dimzon541@gmail.com>
+Dmitry Trusevich <dmitry.trusevich@duallab.com>               <dmitry.trusevich@duallab.com>
+Dmitry Trusevich <dmitry.trusevich@duallab>                   <dmitry.trusevich@duallab>
+Dominik Helm <dominik.helm@stud.tu-darmstadt.de>              <dominik.helm@stud.tu-darmstadt.de>
+gothinkfree <tyoh@hancom.com>                                 <tyoh@hancom.com>
+Ilya Idamkin <ilya.idamkin@duallab.com>                       <ilya.idamkin@TeamCity>
+iText Software <development@itextpdf.com>                     <community@itextpdf.com>
+iText Software <development@itextpdf.com>                     <development@itextpdf.com>
+iText Software <development@itextpdf.com>                     <jenkins@itextpdf.com>
+iText Software <development@itextpdf.com>                     <jenkins@jenkins.itextsupport.com>
+iText Software <development@itextpdf.com>                     <leeroy@itextpdf.com>
+iText Software <development@itextpdf.com>                     <teamcity.bot@TeamCity>
+iText Software <development@itextpdf.com>                     <teamcity@duallab.com>
+iText Software <development@itextpdf.com>                     <teamcity@itextpdf.com>
+Jeff Monson <jeff@advizr.co>                                  <jeff@advizr.co>
+Joris Schellekens <joris.schellekens@itextpdf.com>            <joris.schellekens@itextpdf.com>
+Kevin Day <kevin@trumpetinc.com>                              <kevin@melville.trumpetinc.local>
+Kevin Day <kevin@trumpetinc.com>                              <kevin@trumpetinc.com>
+Kevin Willems <kevin.willems@itextpdf.com>                    <kevin.willems1993@gmail.com>
+LaughingMan <LingMan@users.noreply.github.com>                <LingMan@users.noreply.github.com>
+Markus Wernig <markus.wernig@xfer.ch>                         <markus.wernig@xfer.ch>
+Marvin Wichmann <marvin@fam-wichmann.de>                      <marvin@fam-wichmann.de>
+Marvin Wichmann <marvin.wichmann@fusion-hub.com>              <marvin.wichmann@fusion-hub.com>
+Marvin Wichmann <marvin.wichmann@unic.com>                    <marvin.wichmann@unic.com>
+Michaël Demey <michael.demey@itextpdf.com>                    michael.demey <>
+Michaël Demey <michael.demey@itextpdf.com>                    <michael.demey@itextpdf.com>
+Michaël Demey <michael.demey@itextpdf.com>                    <michael.demey@TeamCity>
+Michaël Demey <michael.demey@itextpdf.com>                    <mikkeldemey@gmail.com>
+Michael Glazunoff <michael.glazunoff@gmail.com>               <michael.glazunoff@gmail.com>
+Michael Klink <klink@csi.com>                                 <klink@csi.com>
+Michael Klink <mkl@wir-sind-cool.org>                         <mkl@wir-sind-cool.org>
+Nadia Ivaniukovich <nadia.ivaniuckovich@duallab.com>          <nadia.ivaniuckovich@duallab.com>
+Nadia Ivaniukovich <nadia.ivaniukovich@duallab.com>           <nadia.ivaniukovich@duallab.com>
+Nadja Sych <nadja.sych@duallab.com>                           <nadja.sych@duallab.com>
+Natalia Zgirovskaya <natalia.zgirovskaya@svaps.com>           <natalia.zgirovskaya@datylon.svaps.com>
+Natalia Zgirovskaya <natalia.zgirovskaya@svaps.com>           <natalia.zgirovskaya@svaps.com>
+Olivier Blaise <olivier@blaise.name>                          <olivier@blaise.name>
+Orabi Nakhla <orabi.nakhla@itextpdf.com>                      <orabi.nakhla@itextpdf.com>
+Orabi Nakhla <orabi.nakhla@itextpdf.com>                      <orabi.nakhleh@gmail.com>
+Paulo Soares <pgpsoares@gmail.com>                            <pgpsoares@gmail.com>
+Paulo Soares <psoares@gmail.com>                              <psoares@gmail.com>
+Pavel Alay <pavel.alay@duallab.com>                           pavel.alay <>
+Pavel Alay <pavel.alay@duallab.com>                           <pavel.alay@gmail.com>
+Pavel Alay <pavel.alay@duallab.com>                           <pavel.alay@TeamCity>
+Pavel Morozov <pavel.morozov@duallab.com>                     <pavel.morozov@duallab.com>
+Pavel Morozov <pavel.morozov@duallab.com>                     <p.morozov@i-pnt.ru>
+Peter Goodman <pete@petegoo.com>                              <github@petegoo.com>
+Peter Goodman <pete@petegoo.com>                              <pete@petegoo.com>
+Peter Goodman <pete@petegoo.com>                              <peter.goodman@pushpay.com>
+Peter Kjuak <peter.kjuak@itextpdf.com>                        <peter.kjuak@itextpdf.com>
+Richard Schwark <richard.schwark@planet.de>                   <richard.schwark@planet.de>
+Roman Leonov <roman.leonov@duallab.com>                       <roman.leonov@duallab.com>
+Roman Nadvodny <roman.nadvodniuk@duallab.com>                 <roman.nadvodniuk@duallab.com>
+Sasha Kalykhan <alexander.kalykhan@duallab.com>               <alexander.kalykhan@duallab.com>
+Sasha Kalykhan <alexander.kalykhan@duallab.com>               <alexanderkalykhan@gmail.com>
+Semen Yakushev <semen.yakushev@duallab.com>                   <semen.yakushev@duallab.com>
+Valera <7691262@mail.ru>                                      <7691262@mail.ru>
+Veronika Lisovskaya <veronika.lisovskaya@duallab.com>         <veronika.lisovskaya@TeamCity>
+Vit Nemecky <vit.nemecky@asseco-ce.com>                       <vit.nemecky@asseco-ce.com>
+Yanina Cheremisina <yanina.cheremisina@svaps.com>             <yanina.cheremisina@svaps.com>
+Yulian Gaponenko <yulian.gaponenko@duallab.com>               <duallab@DESKTOP-PG4L5J1>
+Yulian Gaponenko <yulian.gaponenko@duallab.com>               <yulian.gaponenko@TeamCity>
diff --git a/Jenkinsfile b/Jenkinsfile
new file mode 100644
index 0000000..d821d4d
--- /dev/null
+++ b/Jenkinsfile
@@ -0,0 +1,298 @@
+#!/usr/bin/env groovy
+@Library('pipeline-library')_
+
+def schedule, sonarBranchName, sonarBranchTarget
+switch (env.BRANCH_NAME) {
+    case ~/.*master.*/:
+        schedule = '@monthly'
+        sonarBranchName = '-Dsonar.branch.name=master'
+        sonarBranchTarget = ''
+        break
+    case ~/.*develop.*/:
+        schedule = '@midnight'
+        sonarBranchName = '-Dsonar.branch.name=develop'
+        sonarBranchTarget = '-Dsonar.branch.target=master'
+        break
+    default:
+        schedule = ''
+        sonarBranchName = '-Dsonar.branch.name=' + env.BRANCH_NAME
+        sonarBranchTarget = '-Dsonar.branch.target=develop'
+        break
+}
+
+pipeline {
+
+    agent { label '!master' }
+
+    environment {
+        JDK_VERSION = 'jdk-8-oracle'
+        tesseractDir = tool name: 'Tesseract', type: 'com.cloudbees.jenkins.plugins.customtools.CustomTool'
+    }
+
+    options {
+        ansiColor('xterm')
+        buildDiscarder logRotator(artifactNumToKeepStr: '1')
+        parallelsAlwaysFailFast()
+        skipStagesAfterUnstable()
+        timeout time: 1, unit: 'HOURS'
+        timestamps()
+    }
+
+    triggers {
+        cron(schedule)
+    }
+
+    tools {
+        maven 'M3'
+        jdk "${JDK_VERSION}"
+    }
+
+    stages {
+	    stage('Abort possible previous builds') {
+            steps {
+                script {
+                    abortPreviousBuilds()
+                }
+            }
+        }
+        stage('Wait for blocking jobs') {
+            steps {
+                script {
+                    properties[[
+                            $class         : 'BuildBlockerProperty',
+                            blockLevel     : 'GLOBAL',
+                            blockingJobs   : "^iText_7_Java/itextcore/$env.JOB_BASE_NAME\$",
+                            scanQueueFor   : 'ALL',
+                            useBuildBlocker: true
+                    ]]
+                }
+            }
+        }
+        stage('Build') {
+            options {
+                retry(2)
+            }
+            stages {
+                stage('Clean workspace') {
+                    options {
+                        timeout time: 5, unit: 'MINUTES'
+                    }
+                    steps {
+                        withMaven(jdk: "${JDK_VERSION}", maven: 'M3') {
+                            sh 'mvn --threads 2C --no-transfer-progress clean dependency:purge-local-repository ' +
+                                    '-Dinclude=com.itextpdf -DresolutionFuzziness=groupId -DreResolve=false ' +
+                                    "-Dmaven.repo.local=${env.WORKSPACE.replace('\\','/')}/.repository"
+                        }
+                        script {
+                            try {sh "rm -rf ${env.WORKSPACE.replace('\\','/')}/downloads"} catch (Exception ignored) {}
+                        }
+                    }
+                }
+                stage('Install branch dependencies') {
+                    options {
+                        timeout time: 5, unit: 'MINUTES'
+                    }
+                    when {
+                        not {
+                            anyOf {
+                                branch "master"
+                                branch "develop"
+                            }
+                        }
+                    }
+                    steps {
+                        script {
+                            getAndConfigureJFrogCLI()
+                            sh "./jfrog rt dl branch-artifacts/${env.JOB_BASE_NAME}/**/java/ downloads/"
+                            if (fileExists("downloads")) {
+                                dir ("downloads") {
+                                    def mainPomFiles = findFiles glob: '**/main.pom'
+                                    mainPomFiles.each { pomFile ->
+                                        pomPath = pomFile.path.replace "\\", "/"
+                                        sh "mvn org.apache.maven.plugins:maven-install-plugin:3.0.0-M1:install-file --quiet " +
+                                                "-Dmaven.repo.local=${env.WORKSPACE.replace('\\','/')}/.repository " +
+                                                "-Dpackaging=pom -Dfile=${pomPath} -DpomFile=${pomPath}"
+                                    }
+                                    def pomFiles = findFiles glob: '**/*.pom'
+                                    pomFiles.each { pomFile ->
+                                        if (pomFile.name != "main.pom") {
+                                            pomPath = pomFile.path.replace "\\", "/"
+                                            sh "mvn org.apache.maven.plugins:maven-install-plugin:3.0.0-M1:install-file --quiet " +
+                                                    "-Dmaven.repo.local=${env.WORKSPACE.replace('\\', '/')}/.repository " +
+                                                    "-Dpackaging=pom -Dfile=${pomPath} -DpomFile=${pomPath}"
+                                        }
+                                    }
+                                    def jarFiles = findFiles glob: '**/*.jar'
+                                    jarFiles.each { jarFile ->
+                                        jarPath = jarFile.path.replace "\\", "/"
+                                        sh "mvn org.apache.maven.plugins:maven-install-plugin:3.0.0-M1:install-file --quiet " +
+                                                "-Dmaven.repo.local=${env.WORKSPACE.replace('\\', '/')}/.repository " +
+                                                "-Dfile=${jarPath}"
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+                stage('Compile') {
+                    options {
+                        timeout time: 10, unit: 'MINUTES'
+                    }
+                    steps {
+                        withMaven(jdk: "${JDK_VERSION}", maven: 'M3') {
+                            sh 'mvn --threads 2C --no-transfer-progress package -Dmaven.test.skip=true ' +
+                                "-Dmaven.repo.local=${env.WORKSPACE.replace('\\','/')}/.repository"
+                        }
+                    }
+                }
+            }
+            post {
+                failure {
+                    sleep time: 2, unit: 'MINUTES'
+                }
+                success {
+                    script { currentBuild.result = 'SUCCESS' }
+                }
+            }
+        }
+        stage('Static Code Analysis') {
+            options {
+                timeout time: 1, unit: 'HOURS'
+            }
+            steps {
+                withMaven(jdk: "${JDK_VERSION}", maven: 'M3', mavenLocalRepo: '.repository') {
+                    sh 'mvn --no-transfer-progress verify --activate-profiles qa ' +
+                            '-Dpmd.analysisCache=true ' +
+                            "-Dmaven.repo.local=${env.WORKSPACE.replace('\\','/')}/.repository"
+                }
+                recordIssues(tools: [
+                        checkStyle(),
+                        pmdParser(),
+                        spotBugs(useRankAsPriority: true)
+                ])
+                dependencyCheckPublisher pattern: 'target/dependency-check-report.xml'
+            }
+        }
+        stage('Run Tests') {
+            options {
+                timeout time: 30, unit: 'MINUTES'
+            }
+            steps {
+                withMaven(jdk: "${JDK_VERSION}", maven: 'M3') {
+                    withSonarQubeEnv('Sonar') {
+                        sh 'mvn --no-transfer-progress --activate-profiles test ' +
+                                '-DgsExec="${gsExec}" -DcompareExec="${compareExec}" ' +
+                                '-DtesseractDir="${tesseractDir}" ' +
+                                '-Dmaven.main.skip=true -Dmaven.test.failure.ignore=false ' +
+                                'org.jacoco:jacoco-maven-plugin:prepare-agent verify org.jacoco:jacoco-maven-plugin:report ' +
+                                '-Dsonar.java.spotbugs.reportPaths="target/spotbugs.xml" ' +
+                                "-Dmaven.repo.local=${env.WORKSPACE.replace('\\','/')}/.repository " +
+                                'sonar:sonar ' + sonarBranchName + ' ' + sonarBranchTarget
+                    }
+                }
+            }
+        }
+        stage("Quality Gate") {
+            options {
+                timeout time: 1, unit: 'HOURS'
+            }
+            steps {
+                waitForQualityGate abortPipeline: true
+            }
+        }
+        stage('Artifactory Deploy') {
+            options {
+                timeout time: 5, unit: 'MINUTES'
+            }
+            when {
+                anyOf {
+                    branch "master"
+                    branch "develop"
+                }
+            }
+            steps {
+                withMaven(jdk: "${JDK_VERSION}", maven: 'M3') {
+                    script {
+                        def server = Artifactory.server 'itext-artifactory'
+                        def rtMaven = Artifactory.newMavenBuild()
+                        rtMaven.deployer server: server, releaseRepo: 'releases', snapshotRepo: 'snapshot'
+                        rtMaven.tool = 'M3'
+                        def buildInfo = rtMaven.run pom: 'pom.xml', goals: '--threads 2C --no-transfer-progress install --activate-profiles artifactory ' +
+                            "-Dmaven.repo.local=${env.WORKSPACE.replace('\\','/')}/.repository".toString()
+                        server.publishBuildInfo buildInfo
+                    }
+                }
+            }
+        }
+        stage('Branch Artifactory Deploy') {
+            options {
+                timeout time: 5, unit: 'MINUTES'
+            }
+            when {
+                not {
+                    anyOf {
+                        branch "master"
+                        branch "develop"
+                    }
+                }
+            }
+            steps {
+                script {
+                    if (env.GIT_URL) {
+                        repoName = ("${env.GIT_URL}" =~ /(.*\/)(.*)(\.git)/)[ 0 ][ 2 ]
+                        findFiles(glob: '*/target/*.jar').each { item ->
+                            if (!(item ==~ /.*\/[fs]b-contrib-.*?.jar/) && !(item ==~ /.*\/findsecbugs-plugin-.*?.jar/) && !(item ==~ /.*-sources.jar/) && !(item ==~ /.*-javadoc.jar/)) {
+                                sh "./jfrog rt u \"${item.path}\" branch-artifacts/${env.BRANCH_NAME}/${repoName}/java/ --recursive=false --build-name ${env.BRANCH_NAME} --build-number ${env.BUILD_NUMBER} --props \"vcs.revision=${env.GIT_COMMIT};repo.name=${repoName}\""
+                            }
+                        }
+                        findFiles(glob: '**/pom.xml').each { item ->
+                            def pomPath = item.path.replace('\\', '/')
+                            if (!(pomPath ==~ /.*target.*/)) {
+                                def resPomName = "main.pom"
+                                def subDirMatcher = (pomPath =~ /^.*(?<=\/|^)(.*)\/pom\.xml/)
+                                if (subDirMatcher.matches()) {
+                                    resPomName = "${subDirMatcher[0][1]}.pom"
+                                }
+                                sh "./jfrog rt u \"${item.path}\" branch-artifacts/${env.BRANCH_NAME}/${repoName}/java/${resPomName} --recursive=false --build-name ${env.BRANCH_NAME} --build-number ${env.BUILD_NUMBER} --props \"vcs.revision=${env.GIT_COMMIT};repo.name=${repoName}\""
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    post {
+        always {
+            echo 'One way or another, I have finished \uD83E\uDD16'
+        }
+        success {
+            echo 'I succeeeded! \u263A'
+            cleanWs deleteDirs: true
+        }
+        unstable {
+            echo 'I am unstable \uD83D\uDE2E'
+        }
+        failure {
+            echo 'I failed \uD83D\uDCA9'
+        }
+        changed {
+            echo 'Things were different before... \uD83E\uDD14'
+        }
+        fixed {
+            script {
+                if (env.BRANCH_NAME.contains('master') || env.BRANCH_NAME.contains('develop')) {
+                    slackNotifier "#ci", currentBuild.currentResult, "${env.BRANCH_NAME} - Back to normal"
+                }
+            }
+        }
+        regression {
+            script {
+                if (env.BRANCH_NAME.contains('master') || env.BRANCH_NAME.contains('develop')) {
+                    slackNotifier "#ci", currentBuild.currentResult, "${env.BRANCH_NAME} - First failure"
+                }
+            }
+        }
+    }
+
+}
diff --git a/LICENSE.md b/LICENSE.md
new file mode 100644
index 0000000..39d74c2
--- /dev/null
+++ b/LICENSE.md
@@ -0,0 +1,16 @@
+    This program is offered under a commercial and under the AGPL license.
+    For commercial licensing, contact us at https://itextpdf.com/sales.  For AGPL licensing, see below.
+
+    AGPL licensing:
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
diff --git a/gnu-agpl-v3.0.md b/gnu-agpl-v3.0.md
new file mode 100644
index 0000000..4ef32f0
--- /dev/null
+++ b/gnu-agpl-v3.0.md
@@ -0,0 +1,651 @@
+GNU Affero General Public License
+=================================
+
+_Version 3, 19 November 2007_
+_Copyright © 2007 Free Software Foundation, Inc. &lt;<http://fsf.org/>&gt;_
+
+Everyone is permitted to copy and distribute verbatim copies
+of this license document, but changing it is not allowed.
+
+## Preamble
+
+The GNU Affero General Public License is a free, copyleft license for
+software and other kinds of works, specifically designed to ensure
+cooperation with the community in the case of network server software.
+
+The licenses for most software and other practical works are designed
+to take away your freedom to share and change the works.  By contrast,
+our General Public Licenses are intended to guarantee your freedom to
+share and change all versions of a program--to make sure it remains free
+software for all its users.
+
+When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+them if you wish), that you receive source code or can get it if you
+want it, that you can change the software or use pieces of it in new
+free programs, and that you know you can do these things.
+
+Developers that use our General Public Licenses protect your rights
+with two steps: **(1)** assert copyright on the software, and **(2)** offer
+you this License which gives you legal permission to copy, distribute
+and/or modify the software.
+
+A secondary benefit of defending all users' freedom is that
+improvements made in alternate versions of the program, if they
+receive widespread use, become available for other developers to
+incorporate.  Many developers of free software are heartened and
+encouraged by the resulting cooperation.  However, in the case of
+software used on network servers, this result may fail to come about.
+The GNU General Public License permits making a modified version and
+letting the public access it on a server without ever releasing its
+source code to the public.
+
+The GNU Affero General Public License is designed specifically to
+ensure that, in such cases, the modified source code becomes available
+to the community.  It requires the operator of a network server to
+provide the source code of the modified version running there to the
+users of that server.  Therefore, public use of a modified version, on
+a publicly accessible server, gives the public access to the source
+code of the modified version.
+
+An older license, called the Affero General Public License and
+published by Affero, was designed to accomplish similar goals.  This is
+a different license, not a version of the Affero GPL, but Affero has
+released a new version of the Affero GPL which permits relicensing under
+this license.
+
+The precise terms and conditions for copying, distribution and
+modification follow.
+
+## TERMS AND CONDITIONS
+
+### 0. Definitions
+
+“This License” refers to version 3 of the GNU Affero General Public License.
+
+“Copyright” also means copyright-like laws that apply to other kinds of
+works, such as semiconductor masks.
+
+“The Program” refers to any copyrightable work licensed under this
+License.  Each licensee is addressed as “you”.  “Licensees” and
+“recipients” may be individuals or organizations.
+
+To “modify” a work means to copy from or adapt all or part of the work
+in a fashion requiring copyright permission, other than the making of an
+exact copy.  The resulting work is called a “modified version” of the
+earlier work or a work “based on” the earlier work.
+
+A “covered work” means either the unmodified Program or a work based
+on the Program.
+
+To “propagate” a work means to do anything with it that, without
+permission, would make you directly or secondarily liable for
+infringement under applicable copyright law, except executing it on a
+computer or modifying a private copy.  Propagation includes copying,
+distribution (with or without modification), making available to the
+public, and in some countries other activities as well.
+
+To “convey” a work means any kind of propagation that enables other
+parties to make or receive copies.  Mere interaction with a user through
+a computer network, with no transfer of a copy, is not conveying.
+
+An interactive user interface displays “Appropriate Legal Notices”
+to the extent that it includes a convenient and prominently visible
+feature that **(1)** displays an appropriate copyright notice, and **(2)**
+tells the user that there is no warranty for the work (except to the
+extent that warranties are provided), that licensees may convey the
+work under this License, and how to view a copy of this License.  If
+the interface presents a list of user commands or options, such as a
+menu, a prominent item in the list meets this criterion.
+
+### 1. Source Code
+
+The “source code” for a work means the preferred form of the work
+for making modifications to it.  “Object code” means any non-source
+form of a work.
+
+A “Standard Interface” means an interface that either is an official
+standard defined by a recognized standards body, or, in the case of
+interfaces specified for a particular programming language, one that
+is widely used among developers working in that language.
+
+The “System Libraries” of an executable work include anything, other
+than the work as a whole, that **(a)** is included in the normal form of
+packaging a Major Component, but which is not part of that Major
+Component, and **(b)** serves only to enable use of the work with that
+Major Component, or to implement a Standard Interface for which an
+implementation is available to the public in source code form.  A
+“Major Component”, in this context, means a major essential component
+(kernel, window system, and so on) of the specific operating system
+(if any) on which the executable work runs, or a compiler used to
+produce the work, or an object code interpreter used to run it.
+
+The “Corresponding Source” for a work in object code form means all
+the source code needed to generate, install, and (for an executable
+work) run the object code and to modify the work, including scripts to
+control those activities.  However, it does not include the work's
+System Libraries, or general-purpose tools or generally available free
+programs which are used unmodified in performing those activities but
+which are not part of the work.  For example, Corresponding Source
+includes interface definition files associated with source files for
+the work, and the source code for shared libraries and dynamically
+linked subprograms that the work is specifically designed to require,
+such as by intimate data communication or control flow between those
+subprograms and other parts of the work.
+
+The Corresponding Source need not include anything that users
+can regenerate automatically from other parts of the Corresponding
+Source.
+
+The Corresponding Source for a work in source code form is that
+same work.
+
+### 2. Basic Permissions
+
+All rights granted under this License are granted for the term of
+copyright on the Program, and are irrevocable provided the stated
+conditions are met.  This License explicitly affirms your unlimited
+permission to run the unmodified Program.  The output from running a
+covered work is covered by this License only if the output, given its
+content, constitutes a covered work.  This License acknowledges your
+rights of fair use or other equivalent, as provided by copyright law.
+
+You may make, run and propagate covered works that you do not
+convey, without conditions so long as your license otherwise remains
+in force.  You may convey covered works to others for the sole purpose
+of having them make modifications exclusively for you, or provide you
+with facilities for running those works, provided that you comply with
+the terms of this License in conveying all material for which you do
+not control copyright.  Those thus making or running the covered works
+for you must do so exclusively on your behalf, under your direction
+and control, on terms that prohibit them from making any copies of
+your copyrighted material outside their relationship with you.
+
+Conveying under any other circumstances is permitted solely under
+the conditions stated below.  Sublicensing is not allowed; section 10
+makes it unnecessary.
+
+### 3. Protecting Users' Legal Rights From Anti-Circumvention Law
+
+No covered work shall be deemed part of an effective technological
+measure under any applicable law fulfilling obligations under article
+11 of the WIPO copyright treaty adopted on 20 December 1996, or
+similar laws prohibiting or restricting circumvention of such
+measures.
+
+When you convey a covered work, you waive any legal power to forbid
+circumvention of technological measures to the extent such circumvention
+is effected by exercising rights under this License with respect to
+the covered work, and you disclaim any intention to limit operation or
+modification of the work as a means of enforcing, against the work's
+users, your or third parties' legal rights to forbid circumvention of
+technological measures.
+
+### 4. Conveying Verbatim Copies
+
+You may convey verbatim copies of the Program's source code as you
+receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice;
+keep intact all notices stating that this License and any
+non-permissive terms added in accord with section 7 apply to the code;
+keep intact all notices of the absence of any warranty; and give all
+recipients a copy of this License along with the Program.
+
+You may charge any price or no price for each copy that you convey,
+and you may offer support or warranty protection for a fee.
+
+### 5. Conveying Modified Source Versions
+
+You may convey a work based on the Program, or the modifications to
+produce it from the Program, in the form of source code under the
+terms of section 4, provided that you also meet all of these conditions:
+
+* **a)** The work must carry prominent notices stating that you modified
+it, and giving a relevant date.
+* **b)** The work must carry prominent notices stating that it is
+released under this License and any conditions added under section 7.
+This requirement modifies the requirement in section 4 to
+“keep intact all notices”.
+* **c)** You must license the entire work, as a whole, under this
+License to anyone who comes into possession of a copy.  This
+License will therefore apply, along with any applicable section 7
+additional terms, to the whole of the work, and all its parts,
+regardless of how they are packaged.  This License gives no
+permission to license the work in any other way, but it does not
+invalidate such permission if you have separately received it.
+* **d)** If the work has interactive user interfaces, each must display
+Appropriate Legal Notices; however, if the Program has interactive
+interfaces that do not display Appropriate Legal Notices, your
+work need not make them do so.
+
+A compilation of a covered work with other separate and independent
+works, which are not by their nature extensions of the covered work,
+and which are not combined with it such as to form a larger program,
+in or on a volume of a storage or distribution medium, is called an
+“aggregate” if the compilation and its resulting copyright are not
+used to limit the access or legal rights of the compilation's users
+beyond what the individual works permit.  Inclusion of a covered work
+in an aggregate does not cause this License to apply to the other
+parts of the aggregate.
+
+### 6. Conveying Non-Source Forms
+
+You may convey a covered work in object code form under the terms
+of sections 4 and 5, provided that you also convey the
+machine-readable Corresponding Source under the terms of this License,
+in one of these ways:
+
+* **a)** Convey the object code in, or embodied in, a physical product
+(including a physical distribution medium), accompanied by the
+Corresponding Source fixed on a durable physical medium
+customarily used for software interchange.
+* **b)** Convey the object code in, or embodied in, a physical product
+(including a physical distribution medium), accompanied by a
+written offer, valid for at least three years and valid for as
+long as you offer spare parts or customer support for that product
+model, to give anyone who possesses the object code either **(1)** a
+copy of the Corresponding Source for all the software in the
+product that is covered by this License, on a durable physical
+medium customarily used for software interchange, for a price no
+more than your reasonable cost of physically performing this
+conveying of source, or **(2)** access to copy the
+Corresponding Source from a network server at no charge.
+* **c)** Convey individual copies of the object code with a copy of the
+written offer to provide the Corresponding Source.  This
+alternative is allowed only occasionally and noncommercially, and
+only if you received the object code with such an offer, in accord
+with subsection 6b.
+* **d)** Convey the object code by offering access from a designated
+place (gratis or for a charge), and offer equivalent access to the
+Corresponding Source in the same way through the same place at no
+further charge.  You need not require recipients to copy the
+Corresponding Source along with the object code.  If the place to
+copy the object code is a network server, the Corresponding Source
+may be on a different server (operated by you or a third party)
+that supports equivalent copying facilities, provided you maintain
+clear directions next to the object code saying where to find the
+Corresponding Source.  Regardless of what server hosts the
+Corresponding Source, you remain obligated to ensure that it is
+available for as long as needed to satisfy these requirements.
+* **e)** Convey the object code using peer-to-peer transmission, provided
+you inform other peers where the object code and Corresponding
+Source of the work are being offered to the general public at no
+charge under subsection 6d.
+
+A separable portion of the object code, whose source code is excluded
+from the Corresponding Source as a System Library, need not be
+included in conveying the object code work.
+
+A “User Product” is either **(1)** a “consumer product”, which means any
+tangible personal property which is normally used for personal, family,
+or household purposes, or **(2)** anything designed or sold for incorporation
+into a dwelling.  In determining whether a product is a consumer product,
+doubtful cases shall be resolved in favor of coverage.  For a particular
+product received by a particular user, “normally used” refers to a
+typical or common use of that class of product, regardless of the status
+of the particular user or of the way in which the particular user
+actually uses, or expects or is expected to use, the product.  A product
+is a consumer product regardless of whether the product has substantial
+commercial, industrial or non-consumer uses, unless such uses represent
+the only significant mode of use of the product.
+
+“Installation Information” for a User Product means any methods,
+procedures, authorization keys, or other information required to install
+and execute modified versions of a covered work in that User Product from
+a modified version of its Corresponding Source.  The information must
+suffice to ensure that the continued functioning of the modified object
+code is in no case prevented or interfered with solely because
+modification has been made.
+
+If you convey an object code work under this section in, or with, or
+specifically for use in, a User Product, and the conveying occurs as
+part of a transaction in which the right of possession and use of the
+User Product is transferred to the recipient in perpetuity or for a
+fixed term (regardless of how the transaction is characterized), the
+Corresponding Source conveyed under this section must be accompanied
+by the Installation Information.  But this requirement does not apply
+if neither you nor any third party retains the ability to install
+modified object code on the User Product (for example, the work has
+been installed in ROM).
+
+The requirement to provide Installation Information does not include a
+requirement to continue to provide support service, warranty, or updates
+for a work that has been modified or installed by the recipient, or for
+the User Product in which it has been modified or installed.  Access to a
+network may be denied when the modification itself materially and
+adversely affects the operation of the network or violates the rules and
+protocols for communication across the network.
+
+Corresponding Source conveyed, and Installation Information provided,
+in accord with this section must be in a format that is publicly
+documented (and with an implementation available to the public in
+source code form), and must require no special password or key for
+unpacking, reading or copying.
+
+### 7. Additional Terms
+
+“Additional permissions” are terms that supplement the terms of this
+License by making exceptions from one or more of its conditions.
+Additional permissions that are applicable to the entire Program shall
+be treated as though they were included in this License, to the extent
+that they are valid under applicable law.  If additional permissions
+apply only to part of the Program, that part may be used separately
+under those permissions, but the entire Program remains governed by
+this License without regard to the additional permissions.
+
+When you convey a copy of a covered work, you may at your option
+remove any additional permissions from that copy, or from any part of
+it.  (Additional permissions may be written to require their own
+removal in certain cases when you modify the work.)  You may place
+additional permissions on material, added by you to a covered work,
+for which you have or can give appropriate copyright permission.
+
+Notwithstanding any other provision of this License, for material you
+add to a covered work, you may (if authorized by the copyright holders of
+that material) supplement the terms of this License with terms:
+
+* **a)** Disclaiming warranty or limiting liability differently from the
+terms of sections 15 and 16 of this License; or
+* **b)** Requiring preservation of specified reasonable legal notices or
+author attributions in that material or in the Appropriate Legal
+Notices displayed by works containing it; or
+* **c)** Prohibiting misrepresentation of the origin of that material, or
+requiring that modified versions of such material be marked in
+reasonable ways as different from the original version; or
+* **d)** Limiting the use for publicity purposes of names of licensors or
+authors of the material; or
+* **e)** Declining to grant rights under trademark law for use of some
+trade names, trademarks, or service marks; or
+* **f)** Requiring indemnification of licensors and authors of that
+material by anyone who conveys the material (or modified versions of
+it) with contractual assumptions of liability to the recipient, for
+any liability that these contractual assumptions directly impose on
+those licensors and authors.
+
+All other non-permissive additional terms are considered “further
+restrictions” within the meaning of section 10.  If the Program as you
+received it, or any part of it, contains a notice stating that it is
+governed by this License along with a term that is a further
+restriction, you may remove that term.  If a license document contains
+a further restriction but permits relicensing or conveying under this
+License, you may add to a covered work material governed by the terms
+of that license document, provided that the further restriction does
+not survive such relicensing or conveying.
+
+If you add terms to a covered work in accord with this section, you
+must place, in the relevant source files, a statement of the
+additional terms that apply to those files, or a notice indicating
+where to find the applicable terms.
+
+Additional terms, permissive or non-permissive, may be stated in the
+form of a separately written license, or stated as exceptions;
+the above requirements apply either way.
+
+### 8. Termination
+
+You may not propagate or modify a covered work except as expressly
+provided under this License.  Any attempt otherwise to propagate or
+modify it is void, and will automatically terminate your rights under
+this License (including any patent licenses granted under the third
+paragraph of section 11).
+
+However, if you cease all violation of this License, then your
+license from a particular copyright holder is reinstated **(a)**
+provisionally, unless and until the copyright holder explicitly and
+finally terminates your license, and **(b)** permanently, if the copyright
+holder fails to notify you of the violation by some reasonable means
+prior to 60 days after the cessation.
+
+Moreover, your license from a particular copyright holder is
+reinstated permanently if the copyright holder notifies you of the
+violation by some reasonable means, this is the first time you have
+received notice of violation of this License (for any work) from that
+copyright holder, and you cure the violation prior to 30 days after
+your receipt of the notice.
+
+Termination of your rights under this section does not terminate the
+licenses of parties who have received copies or rights from you under
+this License.  If your rights have been terminated and not permanently
+reinstated, you do not qualify to receive new licenses for the same
+material under section 10.
+
+### 9. Acceptance Not Required for Having Copies
+
+You are not required to accept this License in order to receive or
+run a copy of the Program.  Ancillary propagation of a covered work
+occurring solely as a consequence of using peer-to-peer transmission
+to receive a copy likewise does not require acceptance.  However,
+nothing other than this License grants you permission to propagate or
+modify any covered work.  These actions infringe copyright if you do
+not accept this License.  Therefore, by modifying or propagating a
+covered work, you indicate your acceptance of this License to do so.
+
+### 10. Automatic Licensing of Downstream Recipients
+
+Each time you convey a covered work, the recipient automatically
+receives a license from the original licensors, to run, modify and
+propagate that work, subject to this License.  You are not responsible
+for enforcing compliance by third parties with this License.
+
+An “entity transaction” is a transaction transferring control of an
+organization, or substantially all assets of one, or subdividing an
+organization, or merging organizations.  If propagation of a covered
+work results from an entity transaction, each party to that
+transaction who receives a copy of the work also receives whatever
+licenses to the work the party's predecessor in interest had or could
+give under the previous paragraph, plus a right to possession of the
+Corresponding Source of the work from the predecessor in interest, if
+the predecessor has it or can get it with reasonable efforts.
+
+You may not impose any further restrictions on the exercise of the
+rights granted or affirmed under this License.  For example, you may
+not impose a license fee, royalty, or other charge for exercise of
+rights granted under this License, and you may not initiate litigation
+(including a cross-claim or counterclaim in a lawsuit) alleging that
+any patent claim is infringed by making, using, selling, offering for
+sale, or importing the Program or any portion of it.
+
+### 11. Patents
+
+A “contributor” is a copyright holder who authorizes use under this
+License of the Program or a work on which the Program is based.  The
+work thus licensed is called the contributor's “contributor version”.
+
+A contributor's “essential patent claims” are all patent claims
+owned or controlled by the contributor, whether already acquired or
+hereafter acquired, that would be infringed by some manner, permitted
+by this License, of making, using, or selling its contributor version,
+but do not include claims that would be infringed only as a
+consequence of further modification of the contributor version.  For
+purposes of this definition, “control” includes the right to grant
+patent sublicenses in a manner consistent with the requirements of
+this License.
+
+Each contributor grants you a non-exclusive, worldwide, royalty-free
+patent license under the contributor's essential patent claims, to
+make, use, sell, offer for sale, import and otherwise run, modify and
+propagate the contents of its contributor version.
+
+In the following three paragraphs, a “patent license” is any express
+agreement or commitment, however denominated, not to enforce a patent
+(such as an express permission to practice a patent or covenant not to
+sue for patent infringement).  To “grant” such a patent license to a
+party means to make such an agreement or commitment not to enforce a
+patent against the party.
+
+If you convey a covered work, knowingly relying on a patent license,
+and the Corresponding Source of the work is not available for anyone
+to copy, free of charge and under the terms of this License, through a
+publicly available network server or other readily accessible means,
+then you must either **(1)** cause the Corresponding Source to be so
+available, or **(2)** arrange to deprive yourself of the benefit of the
+patent license for this particular work, or **(3)** arrange, in a manner
+consistent with the requirements of this License, to extend the patent
+license to downstream recipients.  “Knowingly relying” means you have
+actual knowledge that, but for the patent license, your conveying the
+covered work in a country, or your recipient's use of the covered work
+in a country, would infringe one or more identifiable patents in that
+country that you have reason to believe are valid.
+
+If, pursuant to or in connection with a single transaction or
+arrangement, you convey, or propagate by procuring conveyance of, a
+covered work, and grant a patent license to some of the parties
+receiving the covered work authorizing them to use, propagate, modify
+or convey a specific copy of the covered work, then the patent license
+you grant is automatically extended to all recipients of the covered
+work and works based on it.
+
+A patent license is “discriminatory” if it does not include within
+the scope of its coverage, prohibits the exercise of, or is
+conditioned on the non-exercise of one or more of the rights that are
+specifically granted under this License.  You may not convey a covered
+work if you are a party to an arrangement with a third party that is
+in the business of distributing software, under which you make payment
+to the third party based on the extent of your activity of conveying
+the work, and under which the third party grants, to any of the
+parties who would receive the covered work from you, a discriminatory
+patent license **(a)** in connection with copies of the covered work
+conveyed by you (or copies made from those copies), or **(b)** primarily
+for and in connection with specific products or compilations that
+contain the covered work, unless you entered into that arrangement,
+or that patent license was granted, prior to 28 March 2007.
+
+Nothing in this License shall be construed as excluding or limiting
+any implied license or other defenses to infringement that may
+otherwise be available to you under applicable patent law.
+
+### 12. No Surrender of Others' Freedom
+
+If conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot convey a
+covered work so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you may
+not convey it at all.  For example, if you agree to terms that obligate you
+to collect a royalty for further conveying from those to whom you convey
+the Program, the only way you could satisfy both those terms and this
+License would be to refrain entirely from conveying the Program.
+
+### 13. Remote Network Interaction; Use with the GNU General Public License
+
+Notwithstanding any other provision of this License, if you modify the
+Program, your modified version must prominently offer all users
+interacting with it remotely through a computer network (if your version
+supports such interaction) an opportunity to receive the Corresponding
+Source of your version by providing access to the Corresponding Source
+from a network server at no charge, through some standard or customary
+means of facilitating copying of software.  This Corresponding Source
+shall include the Corresponding Source for any work covered by version 3
+of the GNU General Public License that is incorporated pursuant to the
+following paragraph.
+
+Notwithstanding any other provision of this License, you have
+permission to link or combine any covered work with a work licensed
+under version 3 of the GNU General Public License into a single
+combined work, and to convey the resulting work.  The terms of this
+License will continue to apply to the part which is the covered work,
+but the work with which it is combined will remain governed by version
+3 of the GNU General Public License.
+
+### 14. Revised Versions of this License
+
+The Free Software Foundation may publish revised and/or new versions of
+the GNU Affero General Public License from time to time.  Such new versions
+will be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the
+Program specifies that a certain numbered version of the GNU Affero General
+Public License “or any later version” applies to it, you have the
+option of following the terms and conditions either of that numbered
+version or of any later version published by the Free Software
+Foundation.  If the Program does not specify a version number of the
+GNU Affero General Public License, you may choose any version ever published
+by the Free Software Foundation.
+
+If the Program specifies that a proxy can decide which future
+versions of the GNU Affero General Public License can be used, that proxy's
+public statement of acceptance of a version permanently authorizes you
+to choose that version for the Program.
+
+Later license versions may give you additional or different
+permissions.  However, no additional obligations are imposed on any
+author or copyright holder as a result of your choosing to follow a
+later version.
+
+### 15. Disclaimer of Warranty
+
+THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
+APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
+HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM “AS IS” WITHOUT WARRANTY
+OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
+IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
+ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+### 16. Limitation of Liability
+
+IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
+THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
+GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
+USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
+DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
+PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
+EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGES.
+
+### 17. Interpretation of Sections 15 and 16
+
+If the disclaimer of warranty and limitation of liability provided
+above cannot be given local legal effect according to their terms,
+reviewing courts shall apply local law that most closely approximates
+an absolute waiver of all civil liability in connection with the
+Program, unless a warranty or assumption of liability accompanies a
+copy of the Program in return for a fee.
+
+_END OF TERMS AND CONDITIONS_
+
+## How to Apply These Terms to Your New Programs
+
+If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+state the exclusion of warranty; and each file should have at least
+the “copyright” line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+Also add information on how to contact you by electronic and paper mail.
+
+If your software can interact with users remotely through a computer
+network, you should also make sure that it provides a way for users to
+get its source.  For example, if your program is a web application, its
+interface could display a “Source” link that leads users to an archive
+of the code.  There are many ways you could offer source, and different
+solutions will be better for different programs; see section 13 for the
+specific requirements.
+
+You should also get your employer (if you work as a programmer) or school,
+if any, to sign a “copyright disclaimer” for the program, if necessary.
+For more information on this, and how to apply and follow the GNU AGPL, see
+&lt;<http://www.gnu.org/licenses/>&gt;.
diff --git a/pdfocr-api/pom.xml b/pdfocr-api/pom.xml
new file mode 100644
index 0000000..87f6927
--- /dev/null
+++ b/pdfocr-api/pom.xml
@@ -0,0 +1,50 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+
+  <parent>
+    <groupId>com.itextpdf</groupId>
+    <artifactId>pdfocr-root</artifactId>
+    <version>1.0.0</version>
+  </parent>
+
+  <artifactId>pdfocr-api</artifactId>
+
+  <name>pdfOCR API</name>
+  <description>pdfOCR is an iText 7 add-on for Java to recognize and extract text in scanned documents and images. It can also convert them into fully ISO-compliant PDF or PDF/A-3u files that are accessible, searchable, and suitable for archiving</description>
+
+  <dependencies>
+    <dependency>
+      <groupId>com.itextpdf</groupId>
+      <artifactId>layout</artifactId>
+      <version>${itext.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>com.itextpdf</groupId>
+      <artifactId>pdfa</artifactId>
+      <version>${itext.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.commons</groupId>
+      <artifactId>commons-imaging</artifactId>
+      <version>1.0-alpha1</version>
+    </dependency>
+    <dependency>
+      <groupId>com.itextpdf</groupId>
+      <artifactId>pdftest</artifactId>
+      <version>${itext.version}</version>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+
+  <build>
+    <resources>
+      <resource>
+        <directory>src/main/resources</directory>
+        <includes>
+          <include>**/*.ttf</include>
+        </includes>
+      </resource>
+    </resources>
+  </build>
+</project>
diff --git a/pdfocr-api/src/main/java/com/itextpdf/pdfocr/IMetaInfoWrapper.java b/pdfocr-api/src/main/java/com/itextpdf/pdfocr/IMetaInfoWrapper.java
new file mode 100644
index 0000000..e702cb9
--- /dev/null
+++ b/pdfocr-api/src/main/java/com/itextpdf/pdfocr/IMetaInfoWrapper.java
@@ -0,0 +1,37 @@
+/*
+    This file is part of the iText (R) project.
+    Copyright (c) 1998-2020 iText Group NV
+    Authors: iText Software.
+
+    This program is offered under a commercial and under the AGPL license.
+    For commercial licensing, contact us at https://itextpdf.com/sales.  For AGPL licensing, see below.
+
+    AGPL licensing:
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+package com.itextpdf.pdfocr;
+
+import com.itextpdf.kernel.counter.event.IMetaInfo;
+
+/**
+ * The meta info wrapper that holds some meta info
+ */
+public interface IMetaInfoWrapper {
+
+    /**
+     * Gets the wrapped meta info
+     * @return the wrapped meta info
+     */
+    public IMetaInfo getWrappedMetaInfo();
+}
diff --git a/pdfocr-api/src/main/java/com/itextpdf/pdfocr/IOcrEngine.java b/pdfocr-api/src/main/java/com/itextpdf/pdfocr/IOcrEngine.java
new file mode 100644
index 0000000..878fc57
--- /dev/null
+++ b/pdfocr-api/src/main/java/com/itextpdf/pdfocr/IOcrEngine.java
@@ -0,0 +1,61 @@
+/*
+    This file is part of the iText (R) project.
+    Copyright (c) 1998-2020 iText Group NV
+    Authors: iText Software.
+
+    This program is offered under a commercial and under the AGPL license.
+    For commercial licensing, contact us at https://itextpdf.com/sales.  For AGPL licensing, see below.
+
+    AGPL licensing:
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+package com.itextpdf.pdfocr;
+
+import java.io.File;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * {@link IOcrEngine} interface is used for instantiating new OcrReader
+ * objects.
+ * {@link IOcrEngine} interface provides possibility to perform OCR,
+ * to read data from input files and to return the contained text in the
+ * required format.
+ */
+public interface IOcrEngine {
+
+    /**
+     * Reads data from the provided input image file and returns retrieved data
+     * in the format described below.
+     *
+     * @param input input image {@link java.io.File}
+     * @return {@link java.util.Map} where key is {@link java.lang.Integer}
+     * representing the number of the page and value is
+     * {@link java.util.List} of {@link TextInfo} elements where each
+     * {@link TextInfo} element contains a word or a line and its 4
+     * coordinates(bbox)
+     */
+    Map<Integer, List<TextInfo>> doImageOcr(File input);
+
+    /**
+     * Performs OCR using provided {@link IOcrEngine} for the given list of
+     * input images and saves output to a text file using provided path.
+     * Note that a human reading order is not guaranteed
+     * due to possible specifics of input images (multi column layout, tables etc)
+     *
+     * @param inputImages {@link java.util.List} of images to be OCRed
+     * @param txtFile file to be created
+     */
+    void createTxtFile(List<File> inputImages, File txtFile);
+}
diff --git a/pdfocr-api/src/main/java/com/itextpdf/pdfocr/OcrEngineProperties.java b/pdfocr-api/src/main/java/com/itextpdf/pdfocr/OcrEngineProperties.java
new file mode 100644
index 0000000..fd41dea
--- /dev/null
+++ b/pdfocr-api/src/main/java/com/itextpdf/pdfocr/OcrEngineProperties.java
@@ -0,0 +1,76 @@
+/*
+    This file is part of the iText (R) project.
+    Copyright (c) 1998-2020 iText Group NV
+    Authors: iText Software.
+
+    This program is offered under a commercial and under the AGPL license.
+    For commercial licensing, contact us at https://itextpdf.com/sales.  For AGPL licensing, see below.
+
+    AGPL licensing:
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+package com.itextpdf.pdfocr;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+public class OcrEngineProperties {
+
+    /**
+     * List of languages required for ocr for provided images.
+     */
+    private List<String> languages = Collections.<String>emptyList();
+
+    /**
+     * Creates a new {@link OcrEngineProperties} instance.
+     */
+    public OcrEngineProperties() {
+    }
+
+    /**
+     * Creates a new {@link OcrEngineProperties} instance
+     * based on another {@link OcrEngineProperties} instance (copy
+     * constructor).
+     *
+     * @param other the other {@link OcrEngineProperties} instance
+     */
+    public OcrEngineProperties(OcrEngineProperties other) {
+        this.languages = other.languages;
+    }
+
+    /**
+     * Gets list of languages required for provided images.
+     *
+     * @return {@link List} of languages
+     */
+    public final List<String> getLanguages() {
+        return new ArrayList<String>(languages);
+    }
+
+    /**
+     * Sets list of languages to be recognized in provided images.
+     * Consult with documentation of specific engine implementations
+     * to check on which format to give the language in.
+     *
+     * @param requiredLanguages {@link List} of languages in string
+     *                                               format
+     * @return the {@link OcrEngineProperties} instance
+     */
+    public final OcrEngineProperties setLanguages(
+            final List<String> requiredLanguages) {
+        languages = Collections.<String>unmodifiableList(requiredLanguages);
+        return this;
+    }
+}
diff --git a/pdfocr-api/src/main/java/com/itextpdf/pdfocr/OcrException.java b/pdfocr-api/src/main/java/com/itextpdf/pdfocr/OcrException.java
new file mode 100644
index 0000000..be8486b
--- /dev/null
+++ b/pdfocr-api/src/main/java/com/itextpdf/pdfocr/OcrException.java
@@ -0,0 +1,98 @@
+/*
+    This file is part of the iText (R) project.
+    Copyright (c) 1998-2020 iText Group NV
+    Authors: iText Software.
+
+    This program is offered under a commercial and under the AGPL license.
+    For commercial licensing, contact us at https://itextpdf.com/sales.  For AGPL licensing, see below.
+
+    AGPL licensing:
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+package com.itextpdf.pdfocr;
+
+import com.itextpdf.io.util.MessageFormatUtil;
+
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * Exception class for custom exceptions.
+ */
+public class OcrException extends RuntimeException {
+
+    public static final String CANNOT_READ_INPUT_IMAGE =
+            "Cannot read input image";
+    public static final String CANNOT_RESOLVE_PROVIDED_FONTS = "Cannot resolve "
+            + "any of provided fonts. Please check provided FontProvider.";
+    public static final String CANNOT_CREATE_PDF_DOCUMENT = "Cannot create "
+            + "PDF document: {0}";
+    private List<String> messageParams;
+
+    /**
+     * Creates a new OcrException.
+     *
+     * @param msg the detail message.
+     * @param e   the cause
+     *            (which is saved for later retrieval
+     *            by {@link #getCause()} method).
+     */
+    public OcrException(String msg, Throwable e) {
+        super(msg, e);
+    }
+
+    /**
+     * Creates a new OcrException.
+     *
+     * @param msg the detail message.
+     */
+    public OcrException(String msg) {
+        super(msg);
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    @Override
+    public String getMessage() {
+        return this.messageParams != null && this.messageParams.size() != 0
+                ? MessageFormatUtil
+                        .format(super.getMessage(), this.getMessageParams())
+                : super.getMessage();
+    }
+
+    /**
+     * Gets additional params for Exception message.
+     */
+    protected Object[] getMessageParams() {
+        Object[] parameters = new Object[this.messageParams.size()];
+
+        for(int i = 0; i < this.messageParams.size(); ++i) {
+            parameters[i] = this.messageParams.get(i);
+        }
+
+        return parameters;
+    }
+
+    /**
+     * Sets additional params for Exception message.
+     *
+     * @param messageParams additional params.
+     * @return object itself.
+     */
+    public OcrException setMessageParams(String... messageParams) {
+        this.messageParams = Arrays.<String>asList(messageParams);
+        return this;
+    }
+}
diff --git a/pdfocr-api/src/main/java/com/itextpdf/pdfocr/OcrPdfCreator.java b/pdfocr-api/src/main/java/com/itextpdf/pdfocr/OcrPdfCreator.java
new file mode 100644
index 0000000..1196cec
--- /dev/null
+++ b/pdfocr-api/src/main/java/com/itextpdf/pdfocr/OcrPdfCreator.java
@@ -0,0 +1,621 @@
+/*
+    This file is part of the iText (R) project.
+    Copyright (c) 1998-2020 iText Group NV
+    Authors: iText Software.
+
+    This program is offered under a commercial and under the AGPL license.
+    For commercial licensing, contact us at https://itextpdf.com/sales.  For AGPL licensing, see below.
+
+    AGPL licensing:
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+package com.itextpdf.pdfocr;
+
+import com.itextpdf.io.font.otf.ActualTextIterator;
+import com.itextpdf.io.font.otf.Glyph;
+import com.itextpdf.io.font.otf.GlyphLine;
+import com.itextpdf.io.image.ImageData;
+import com.itextpdf.io.util.MessageFormatUtil;
+import com.itextpdf.kernel.counter.event.IMetaInfo;
+import com.itextpdf.kernel.font.PdfFont;
+import com.itextpdf.kernel.font.PdfTrueTypeFont;
+import com.itextpdf.kernel.font.PdfType0Font;
+import com.itextpdf.kernel.font.PdfType1Font;
+import com.itextpdf.kernel.font.PdfType3Font;
+import com.itextpdf.kernel.geom.PageSize;
+import com.itextpdf.kernel.pdf.DocumentProperties;
+import com.itextpdf.kernel.pdf.PdfAConformanceLevel;
+import com.itextpdf.kernel.pdf.PdfDocument;
+import com.itextpdf.kernel.pdf.PdfDocumentInfo;
+import com.itextpdf.kernel.pdf.PdfOutputIntent;
+import com.itextpdf.kernel.pdf.PdfPage;
+import com.itextpdf.kernel.pdf.PdfString;
+import com.itextpdf.kernel.pdf.PdfViewerPreferences;
+import com.itextpdf.kernel.pdf.PdfWriter;
+import com.itextpdf.kernel.pdf.canvas.PdfCanvas;
+import com.itextpdf.kernel.pdf.canvas.PdfCanvasConstants.TextRenderingMode;
+import com.itextpdf.kernel.pdf.layer.PdfLayer;
+import com.itextpdf.layout.Canvas;
+import com.itextpdf.layout.Document;
+import com.itextpdf.layout.element.Paragraph;
+import com.itextpdf.layout.element.Text;
+import com.itextpdf.layout.font.FontProvider;
+import com.itextpdf.layout.property.TextAlignment;
+import com.itextpdf.pdfa.PdfADocument;
+import com.itextpdf.pdfocr.OcrPdfCreatorMetaInfo.PdfDocumentType;
+import com.itextpdf.pdfocr.events.IThreadLocalMetaInfoAware;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.UUID;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * {@link OcrPdfCreator} is the class that creates PDF documents containing input
+ * images and text that was recognized using provided {@link IOcrEngine}.
+ *
+ * {@link OcrPdfCreator} provides possibilities to set list of input images to
+ * be used for OCR, to set scaling mode for images, to set color of text in
+ * output PDF document, to set fixed size of the PDF document's page and to
+ * perform OCR using given images and to return
+ * {@link com.itextpdf.kernel.pdf.PdfDocument} as result.
+ * OCR is based on the provided {@link IOcrEngine}
+ * (e.g. tesseract reader). This parameter is obligatory and it should be
+ * provided in constructor
+ * or using setter.
+ */
+public class OcrPdfCreator {
+
+    /**
+     * The logger.
+     */
+    private static final Logger LOGGER = LoggerFactory
+            .getLogger(OcrPdfCreator.class);
+
+    /**
+     * Selected {@link IOcrEngine}.
+     */
+    private IOcrEngine ocrEngine;
+
+    /**
+     * Set of properties.
+     */
+    private OcrPdfCreatorProperties ocrPdfCreatorProperties;
+
+    /**
+     * Creates a new {@link OcrPdfCreator} instance.
+     *
+     * @param ocrEngine {@link IOcrEngine} selected OCR Reader
+     */
+    public OcrPdfCreator(final IOcrEngine ocrEngine) {
+        this(ocrEngine, new OcrPdfCreatorProperties());
+    }
+
+    /**
+     * Creates a new {@link OcrPdfCreator} instance.
+     *
+     * @param ocrEngine selected OCR Reader {@link IOcrEngine}
+     * @param ocrPdfCreatorProperties set of properties for {@link OcrPdfCreator}
+     */
+    public OcrPdfCreator(final IOcrEngine ocrEngine,
+            final OcrPdfCreatorProperties ocrPdfCreatorProperties) {
+        setOcrEngine(ocrEngine);
+        setOcrPdfCreatorProperties(ocrPdfCreatorProperties);
+    }
+
+    /**
+     * Gets properties for {@link OcrPdfCreator}.
+     *
+     * @return set properties {@link OcrPdfCreatorProperties}
+     */
+    public final OcrPdfCreatorProperties getOcrPdfCreatorProperties() {
+        return ocrPdfCreatorProperties;
+    }
+
+    /**
+     * Sets properties for {@link OcrPdfCreator}.
+     *
+     * @param ocrPdfCreatorProperties set of properties
+     * {@link OcrPdfCreatorProperties} for {@link OcrPdfCreator}
+     */
+    public final void setOcrPdfCreatorProperties(
+            final OcrPdfCreatorProperties ocrPdfCreatorProperties) {
+        this.ocrPdfCreatorProperties = ocrPdfCreatorProperties;
+    }
+
+    /**
+     * Performs OCR with set parameters using provided {@link IOcrEngine} and
+     * creates PDF using provided {@link com.itextpdf.kernel.pdf.PdfWriter} and
+     * {@link com.itextpdf.kernel.pdf.PdfOutputIntent}.
+     * PDF/A-3u document will be created if
+     * provided {@link com.itextpdf.kernel.pdf.PdfOutputIntent} is not null.
+     *
+     * @param inputImages {@link java.util.List} of images to be OCRed
+     * @param pdfWriter the {@link com.itextpdf.kernel.pdf.PdfWriter} object
+     *                  to write final PDF document to
+     * @param pdfOutputIntent {@link com.itextpdf.kernel.pdf.PdfOutputIntent}
+     *                        for PDF/A-3u document
+     * @return result PDF/A-3u {@link com.itextpdf.kernel.pdf.PdfDocument}
+     * object
+     * @throws OcrException if it was not possible to read provided or
+     * default font
+     */
+    public final PdfDocument createPdfA(final List<File> inputImages,
+            final PdfWriter pdfWriter,
+            final PdfOutputIntent pdfOutputIntent)
+            throws OcrException {
+        LOGGER.info(MessageFormatUtil.format(
+                PdfOcrLogMessageConstant.START_OCR_FOR_IMAGES,
+                inputImages.size()));
+
+        IMetaInfo storedMetaInfo = null;
+        if (ocrEngine instanceof IThreadLocalMetaInfoAware) {
+            storedMetaInfo = ((IThreadLocalMetaInfoAware)ocrEngine).getThreadLocalMetaInfo();
+            ((IThreadLocalMetaInfoAware)ocrEngine).setThreadLocalMetaInfo(
+                    new OcrPdfCreatorMetaInfo(((IThreadLocalMetaInfoAware)ocrEngine).getThreadLocalMetaInfo(),
+                            UUID.randomUUID(),
+                            null != pdfOutputIntent ? PdfDocumentType.PDFA : PdfDocumentType.PDF));
+        }
+
+        // map contains:
+        // keys: image files
+        // values:
+        // map pageNumber -> retrieved text data(text and its coordinates)
+        Map<File, Map<Integer, List<TextInfo>>> imagesTextData =
+                new LinkedHashMap<File, Map<Integer, List<TextInfo>>>();
+        try {
+            for (File inputImage : inputImages) {
+                imagesTextData.put(inputImage,
+                        ocrEngine.doImageOcr(inputImage));
+            }
+        } finally {
+            if (ocrEngine instanceof IThreadLocalMetaInfoAware) {
+                ((IThreadLocalMetaInfoAware)ocrEngine).setThreadLocalMetaInfo(storedMetaInfo);
+            }
+        }
+
+
+        // create PdfDocument
+        return createPdfDocument(pdfWriter, pdfOutputIntent, imagesTextData);
+    }
+
+    /**
+     * Performs OCR with set parameters using provided {@link IOcrEngine} and
+     * creates PDF using provided {@link com.itextpdf.kernel.pdf.PdfWriter}.
+     *
+     * @param inputImages {@link java.util.List} of images to be OCRed
+     * @param pdfWriter the {@link com.itextpdf.kernel.pdf.PdfWriter} object
+     *                  to write final PDF document to
+     * @return result {@link com.itextpdf.kernel.pdf.PdfDocument} object
+     * @throws OcrException if provided font is incorrect
+     */
+    public final PdfDocument createPdf(final List<File> inputImages,
+            final PdfWriter pdfWriter)
+            throws OcrException {
+        return createPdfA(inputImages, pdfWriter, null);
+    }
+
+    /**
+     * Gets used {@link IOcrEngine}.
+     *
+     * Returns {@link IOcrEngine} reader object to perform OCR.
+     * @return selected {@link IOcrEngine} instance
+     */
+    public final IOcrEngine getOcrEngine() {
+        return ocrEngine;
+    }
+
+    /**
+     * Sets {@link IOcrEngine} reader object to perform OCR.
+     * @param reader selected {@link IOcrEngine} instance
+     */
+    public final void setOcrEngine(final IOcrEngine reader) {
+        ocrEngine = reader;
+    }
+
+    /**
+     * Adds image (or its one page) and text that was found there to canvas.
+     *
+     * @param pdfDocument result {@link com.itextpdf.kernel.pdf.PdfDocument}
+     * @param imageSize size of the image according to the selected
+     *                  {@link ScaleMode}
+     * @param pageText text that was found on this image (or on this page)
+     * @param imageData input image if it is a single page or its one page if
+     *                 this is a multi-page image
+     * @param createPdfA3u true if PDF/A3u document is being created
+     * @throws OcrException if PDF/A3u document is being created and provided
+     * font contains notdef glyphs
+     */
+    private void addToCanvas(final PdfDocument pdfDocument,
+            final com.itextpdf.kernel.geom.Rectangle imageSize,
+            final List<TextInfo> pageText, final ImageData imageData,
+            final boolean createPdfA3u) throws OcrException {
+        com.itextpdf.kernel.geom.Rectangle rectangleSize =
+                ocrPdfCreatorProperties.getPageSize() == null
+                        ? imageSize : ocrPdfCreatorProperties.getPageSize();
+        PageSize size = new PageSize(rectangleSize);
+        PdfPage pdfPage = pdfDocument.addNewPage(size);
+        PdfCanvas canvas = new NotDefCheckingPdfCanvas(pdfPage, createPdfA3u);
+
+        PdfLayer[] layers = createPdfLayers(ocrPdfCreatorProperties.getImageLayerName(),
+                ocrPdfCreatorProperties.getTextLayerName(),
+                pdfDocument);
+
+        if (layers[0] != null) {
+            canvas.beginLayer(layers[0]);
+        }
+        addImageToCanvas(imageData, imageSize, canvas);
+        if (layers[0] != null && layers[0] != layers[1]) {
+            canvas.endLayer();
+        }
+
+        // how much the original image size changed
+        float multiplier = imageData == null
+                ? 1 : imageSize.getWidth()
+                / PdfCreatorUtil.getPoints(imageData.getWidth());
+        if (layers[1] != null && layers[0] != layers[1]) {
+            canvas.beginLayer(layers[1]);
+        }
+
+        try {
+            addTextToCanvas(imageSize, pageText, canvas, multiplier,
+                    pdfPage.getMediaBox());
+        } catch (OcrException e) {
+            LOGGER.error(MessageFormatUtil.format(
+                    OcrException.CANNOT_CREATE_PDF_DOCUMENT,
+                    e.getMessage()));
+            throw new OcrException(OcrException.CANNOT_CREATE_PDF_DOCUMENT)
+                    .setMessageParams(e.getMessage());
+        }
+        if (layers[1] != null) {
+            canvas.endLayer();
+        }
+    }
+
+    /**
+     * Creates a new PDF document using provided properties, adds images with
+     * recognized text.
+     *
+     * @param pdfWriter the {@link com.itextpdf.kernel.pdf.PdfWriter} object
+     *                  to write final PDF document to
+     * @param pdfOutputIntent {@link com.itextpdf.kernel.pdf.PdfOutputIntent}
+     *                        for PDF/A-3u document
+     * @param imagesTextData map that contains input image files as keys,
+     *                       and as value: map pageNumber -> text for the page
+     * @return result {@link com.itextpdf.kernel.pdf.PdfDocument} object
+     */
+    private PdfDocument createPdfDocument(final PdfWriter pdfWriter,
+            final PdfOutputIntent pdfOutputIntent,
+            final Map<File, Map<Integer, List<TextInfo>>> imagesTextData) {
+        PdfDocument pdfDocument;
+        boolean createPdfA3u = pdfOutputIntent != null;
+        if (createPdfA3u) {
+            pdfDocument = new PdfADocument(pdfWriter,
+                    PdfAConformanceLevel.PDF_A_3U, pdfOutputIntent,
+                    new DocumentProperties().setEventCountingMetaInfo(new PdfOcrMetaInfo()));
+        } else {
+            pdfDocument = new PdfDocument(pdfWriter,
+                    new DocumentProperties().setEventCountingMetaInfo(new PdfOcrMetaInfo()));
+        }
+
+        // pdfLang should be set in PDF/A mode
+        boolean hasPdfLangProperty = ocrPdfCreatorProperties.getPdfLang() != null
+                && !ocrPdfCreatorProperties.getPdfLang().equals("");
+        if (createPdfA3u && !hasPdfLangProperty) {
+            LOGGER.error(MessageFormatUtil.format(
+                    OcrException.CANNOT_CREATE_PDF_DOCUMENT,
+                    PdfOcrLogMessageConstant.PDF_LANGUAGE_PROPERTY_IS_NOT_SET));
+            throw new OcrException(OcrException.CANNOT_CREATE_PDF_DOCUMENT)
+                    .setMessageParams(PdfOcrLogMessageConstant.PDF_LANGUAGE_PROPERTY_IS_NOT_SET);
+        }
+
+        // add metadata
+        if (hasPdfLangProperty) {
+            pdfDocument.getCatalog()
+                    .setLang(new PdfString(ocrPdfCreatorProperties.getPdfLang()));
+        }
+
+        // set title if it is not empty
+        if (ocrPdfCreatorProperties.getTitle() != null) {
+            pdfDocument.getCatalog().setViewerPreferences(
+                    new PdfViewerPreferences().setDisplayDocTitle(true));
+            PdfDocumentInfo info = pdfDocument.getDocumentInfo();
+            info.setTitle(ocrPdfCreatorProperties.getTitle());
+        }
+
+        // reset passed font provider
+        ocrPdfCreatorProperties.getFontProvider().reset();
+
+        addDataToPdfDocument(imagesTextData, pdfDocument, createPdfA3u);
+
+        return pdfDocument;
+    }
+
+    /**
+     * Places provided images and recognized text to the result PDF document.
+     *
+     * @param imagesTextData map that contains input image
+     *                       files as keys, and as value:
+     *                       map pageNumber -> text for the page
+     * @param pdfDocument result {@link com.itextpdf.kernel.pdf.PdfDocument}
+     * @param createPdfA3u true if PDF/A3u document is being created
+     * @throws OcrException if input image cannot be read or provided font
+     * contains NOTDEF glyphs
+     */
+    private void addDataToPdfDocument(
+            final Map<File, Map<Integer, List<TextInfo>>> imagesTextData,
+            final PdfDocument pdfDocument,
+            final boolean createPdfA3u) throws OcrException {
+        for (Map.Entry<File, Map<Integer, List<TextInfo>>> entry
+                : imagesTextData.entrySet()) {
+            try {
+                File inputImage = entry.getKey();
+                List<ImageData> imageDataList =
+                        PdfCreatorUtil.getImageData(inputImage);
+                LOGGER.info(MessageFormatUtil.format(
+                        PdfOcrLogMessageConstant.NUMBER_OF_PAGES_IN_IMAGE,
+                        inputImage.toString(), imageDataList.size()));
+
+                Map<Integer, List<TextInfo>> imageTextData = entry.getValue();
+                if (imageTextData.keySet().size() > 0) {
+                    for (int page = 0; page < imageDataList.size(); ++page) {
+                        ImageData imageData = imageDataList.get(page);
+                        com.itextpdf.kernel.geom.Rectangle imageSize =
+                                PdfCreatorUtil.calculateImageSize(
+                                        imageData,
+                                        ocrPdfCreatorProperties.getScaleMode(),
+                                        ocrPdfCreatorProperties.getPageSize());
+
+                        if (imageTextData.containsKey(page + 1)) {
+                            addToCanvas(pdfDocument, imageSize,
+                                    imageTextData.get(page + 1),
+                                    imageData, createPdfA3u);
+                        }
+                    }
+                }
+            } catch (IOException e) {
+                LOGGER.error(MessageFormatUtil.format(
+                        PdfOcrLogMessageConstant.CANNOT_ADD_DATA_TO_PDF_DOCUMENT,
+                        e.getMessage()));
+            }
+        }
+    }
+
+    /**
+     * Places given image to canvas to background to a separate layer.
+     *
+     * @param imageData input image as {@link java.io.File}
+     * @param imageSize size of the image according to the selected
+     *                  {@link ScaleMode}
+     * @param pdfCanvas canvas to place the image
+     */
+    private void addImageToCanvas(final ImageData imageData,
+            final com.itextpdf.kernel.geom.Rectangle imageSize,
+            final PdfCanvas pdfCanvas) {
+        if (imageData != null) {
+            if (ocrPdfCreatorProperties.getPageSize() == null) {
+                pdfCanvas.addImage(imageData, imageSize, false);
+            } else {
+                com.itextpdf.kernel.geom.Point coordinates =
+                        PdfCreatorUtil.calculateImageCoordinates(
+                        ocrPdfCreatorProperties.getPageSize(), imageSize);
+                com.itextpdf.kernel.geom.Rectangle rect =
+                        new com.itextpdf.kernel.geom.Rectangle(
+                                (float)coordinates.x, (float)coordinates.y,
+                                imageSize.getWidth(), imageSize.getHeight());
+                pdfCanvas.addImage(imageData, rect, false);
+            }
+        }
+    }
+
+    /**
+     * Places retrieved text to canvas to a separate layer.
+     *
+     * @param imageSize size of the image according to the selected
+     *                  {@link ScaleMode}
+     * @param pageText text that was found on this image (or on this page)
+     * @param pdfCanvas canvas to place the text
+     * @param multiplier coefficient to adjust text placing on canvas
+     * @param pageMediaBox page parameters
+     * @throws OcrException if PDF/A3u document is being created and provided
+     * font contains notdef glyphs
+     */
+    private void addTextToCanvas(
+            final com.itextpdf.kernel.geom.Rectangle imageSize,
+            final List<TextInfo> pageText,
+            final PdfCanvas pdfCanvas,
+            final float multiplier,
+            final com.itextpdf.kernel.geom.Rectangle pageMediaBox)
+            throws OcrException {
+        if (pageText != null && pageText.size() > 0) {
+            com.itextpdf.kernel.geom.Point imageCoordinates =
+                    PdfCreatorUtil.calculateImageCoordinates(
+                    ocrPdfCreatorProperties.getPageSize(), imageSize);
+            for (TextInfo item : pageText) {
+                String line = item.getText();
+                List<Float> coordinates = item.getBbox();
+                final float left = coordinates.get(0) * multiplier;
+                final float right = (coordinates.get(2) + 1) * multiplier - 1;
+                final float top = coordinates.get(1) * multiplier;
+                final float bottom = (coordinates.get(3) + 1) * multiplier - 1;
+
+                float bboxWidthPt = PdfCreatorUtil
+                        .getPoints(right - left);
+                float bboxHeightPt = PdfCreatorUtil
+                        .getPoints(bottom - top);
+                FontProvider fontProvider = getOcrPdfCreatorProperties()
+                        .getFontProvider();
+                String fontFamily = getOcrPdfCreatorProperties()
+                        .getDefaultFontFamily();
+                if (!line.isEmpty() && bboxHeightPt > 0 && bboxWidthPt > 0) {
+                    Document document = new Document(pdfCanvas.getDocument());
+                    document.setFontProvider(fontProvider);
+
+                    // Scale the text width to fit the OCR bbox
+                    float fontSize = PdfCreatorUtil.calculateFontSize(
+                            document, line, fontFamily,
+                            bboxHeightPt, bboxWidthPt);
+
+                    float lineWidth = PdfCreatorUtil.getRealLineWidth(document,
+                            line, fontFamily, fontSize);
+
+                    float deltaX = PdfCreatorUtil.getPoints(left);
+                    float deltaY = imageSize.getHeight()
+                            - PdfCreatorUtil.getPoints(bottom);
+
+                    Canvas canvas = new Canvas(pdfCanvas, pageMediaBox);
+                    canvas.setFontProvider(fontProvider);
+
+                    Text text = new Text(line)
+                            .setHorizontalScaling(bboxWidthPt / lineWidth);
+
+                    Paragraph paragraph = new Paragraph(text)
+                            .setMargin(0)
+                            .setMultipliedLeading(1.2f);
+                    paragraph.setFontFamily(fontFamily)
+                            .setFontSize(fontSize);
+                    paragraph.setWidth(bboxWidthPt * 1.5f);
+
+                    if (ocrPdfCreatorProperties.getTextColor() != null) {
+                        paragraph.setFontColor(
+                                ocrPdfCreatorProperties.getTextColor());
+                    } else {
+                        paragraph.setTextRenderingMode(
+                                TextRenderingMode.INVISIBLE);
+                    }
+
+                    canvas.showTextAligned(paragraph,
+                            deltaX + (float)imageCoordinates.x,
+                            deltaY + (float)imageCoordinates.y,
+                            TextAlignment.LEFT);
+                    canvas.close();
+                }
+            }
+        }
+    }
+
+    /**
+     * Creates layers for image and text according rules set in {@link OcrPdfCreatorProperties}.
+     *
+     * @param imageLayerName name of the image layer
+     * @param textLayerName name of the text layer
+     * @param pdfDocument document to add layers to
+     *
+     * @return array of two layers: first layer is for image, second layer is for text.
+     * Elements may be null meaning that layer creation is not requested
+     */
+    private static PdfLayer[] createPdfLayers(
+            String imageLayerName,
+            String textLayerName,
+            PdfDocument pdfDocument) {
+        if (imageLayerName == null && textLayerName == null) {
+            return new PdfLayer[] {null, null};
+        } else if (imageLayerName == null) {
+            return new PdfLayer[]{null, new PdfLayer(textLayerName, pdfDocument)};
+        } else if (textLayerName == null) {
+            return new PdfLayer[]{new PdfLayer(imageLayerName, pdfDocument), null};
+        } else if (imageLayerName.equals(textLayerName)) {
+            PdfLayer pdfLayer = new PdfLayer(imageLayerName, pdfDocument);
+            return new PdfLayer[] {pdfLayer, pdfLayer};
+        } else {
+            return new PdfLayer[] {new PdfLayer(imageLayerName, pdfDocument), new PdfLayer(textLayerName, pdfDocument)};
+        }
+    }
+
+    /**
+     * A handler for PDF canvas that validates existing glyphs.
+     */
+    private static class NotDefCheckingPdfCanvas extends PdfCanvas {
+        private static final long serialVersionUID = 708713860707664107L;
+        private final boolean createPdfA3u;
+        public NotDefCheckingPdfCanvas(PdfPage page, boolean createPdfA3u) {
+            super(page);
+            this.createPdfA3u = createPdfA3u;
+        }
+
+        @Override
+        public PdfCanvas showText(GlyphLine text) {
+            ActualTextCheckingGlyphLine glyphLine =
+                    new ActualTextCheckingGlyphLine(text);
+            PdfFont currentFont = getGraphicsState().getFont();
+            boolean notDefGlyphsExists = false;
+            // default value for error message, it'll be updated with the
+            // unicode of the not found glyph
+            String message = PdfOcrLogMessageConstant
+                    .COULD_NOT_FIND_CORRESPONDING_GLYPH_TO_UNICODE_CHARACTER;
+            for (int i = glyphLine.start; i < glyphLine.end; i++) {
+                if (isNotDefGlyph(currentFont, glyphLine.get(i))) {
+                    notDefGlyphsExists = true;
+                    message = MessageFormatUtil.format(PdfOcrLogMessageConstant
+                                    .COULD_NOT_FIND_CORRESPONDING_GLYPH_TO_UNICODE_CHARACTER,
+                            glyphLine.get(i).getUnicode());
+                    if (this.createPdfA3u) {
+                        // exception is thrown only if PDF/A document is
+                        // being created
+                        throw new OcrException(message);
+                    }
+                    // setting actual text to NotDef glyph
+                    glyphLine.setActualTextToGlyph(i,
+                            glyphLine.toUnicodeString(i, i + 1));
+                    // setting a fake unicode deliberately to pass further
+                    // checks for actual text necessity during iterating over
+                    // glyphline chunks with ActualTextIterator
+                    Glyph glyph = new Glyph(glyphLine.get(i));
+                    glyph.setUnicode(-1);
+                    glyphLine.set(i, glyph);
+                }
+            }
+            // Warning is logged if not PDF/A document is being created
+            if (notDefGlyphsExists) {
+                LOGGER.warn(message);
+            }
+            return this.showText(glyphLine, new ActualTextIterator(glyphLine));
+        }
+
+        private static boolean isNotDefGlyph(PdfFont font, Glyph glyph) {
+            if (font instanceof PdfType0Font
+                    || font instanceof PdfTrueTypeFont) {
+                return glyph.getCode() == 0;
+            } else if (font instanceof PdfType1Font
+                    || font instanceof PdfType3Font) {
+                return glyph.getCode() == -1;
+            }
+            return false;
+        }
+    }
+
+    /**
+     * A handler for GlyphLine that checks existing actual text not to
+     * overwrite it.
+     */
+    private static class ActualTextCheckingGlyphLine extends GlyphLine {
+        private static final long serialVersionUID = -946356392098459518L;
+
+        public ActualTextCheckingGlyphLine(GlyphLine other) {
+            super(other);
+        }
+
+        public void setActualTextToGlyph(int i, String text) {
+            // set actual text if it doesn't exist for i-th glyph
+            if ((this.actualText == null || this.actualText.size() <= i
+                    || this.actualText.get(i) == null)) {
+                super.setActualText(i, i + 1, text);
+            }
+        }
+    }
+}
diff --git a/pdfocr-api/src/main/java/com/itextpdf/pdfocr/OcrPdfCreatorMetaInfo.java b/pdfocr-api/src/main/java/com/itextpdf/pdfocr/OcrPdfCreatorMetaInfo.java
new file mode 100644
index 0000000..47081ea
--- /dev/null
+++ b/pdfocr-api/src/main/java/com/itextpdf/pdfocr/OcrPdfCreatorMetaInfo.java
@@ -0,0 +1,83 @@
+/*
+    This file is part of the iText (R) project.
+    Copyright (c) 1998-2020 iText Group NV
+    Authors: iText Software.
+
+    This program is offered under a commercial and under the AGPL license.
+    For commercial licensing, contact us at https://itextpdf.com/sales.  For AGPL licensing, see below.
+
+    AGPL licensing:
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+package com.itextpdf.pdfocr;
+
+import com.itextpdf.kernel.counter.event.IMetaInfo;
+
+import java.util.UUID;
+
+/**
+ * The meta info that is used internally by pdfOcr to pass a wrapped custom meta data
+ */
+public class OcrPdfCreatorMetaInfo implements IMetaInfo, IMetaInfoWrapper {
+    private static final long serialVersionUID = 7047674343175216537L;
+
+    private IMetaInfo wrappedMetaInfo;
+    private UUID uuid;
+    private PdfDocumentType pdfDocumentType;
+
+    /**
+     * Creates an inner meta info wrapper
+     *
+     * @param wrappedMetaInfo the meta info to be wrapped
+     * @param uuid a unique String which corresponds to the ocr event for which this meta info is passed
+     * @param pdfDocumentType a type of the document which is created during the corresponding ocr event
+     */
+    public OcrPdfCreatorMetaInfo(IMetaInfo wrappedMetaInfo, UUID uuid, PdfDocumentType pdfDocumentType) {
+        this.wrappedMetaInfo = wrappedMetaInfo;
+        this.uuid = uuid;
+        this.pdfDocumentType = pdfDocumentType;
+    }
+
+    /**
+     * Gets the unique String which corresponds to the ocr event for which this meta info is passed
+     * @return the unique String which corresponds to the ocr event for which this meta info is passed
+     */
+    public UUID getDocumentId() {
+        return uuid;
+    }
+
+    /**
+     * Gets the type of the document which is created during the corresponding ocr event
+     * @return the type of the document which is created during the corresponding ocr event
+     */
+    public PdfDocumentType getPdfDocumentType() {
+        return pdfDocumentType;
+    }
+
+    @Override
+    /**
+     * Gets the wrapped meta info
+     * @return the wrapped meta info
+     */
+    public IMetaInfo getWrappedMetaInfo() {
+        return wrappedMetaInfo;
+    }
+
+    /**
+     * The enum which represents types of documents, for which pdfOcr sends different events
+     */
+    public enum PdfDocumentType {
+        PDF, PDFA;
+    }
+}
\ No newline at end of file
diff --git a/pdfocr-api/src/main/java/com/itextpdf/pdfocr/OcrPdfCreatorProperties.java b/pdfocr-api/src/main/java/com/itextpdf/pdfocr/OcrPdfCreatorProperties.java
new file mode 100644
index 0000000..187d768
--- /dev/null
+++ b/pdfocr-api/src/main/java/com/itextpdf/pdfocr/OcrPdfCreatorProperties.java
@@ -0,0 +1,340 @@
+/*
+    This file is part of the iText (R) project.
+    Copyright (c) 1998-2020 iText Group NV
+    Authors: iText Software.
+
+    This program is offered under a commercial and under the AGPL license.
+    For commercial licensing, contact us at https://itextpdf.com/sales.  For AGPL licensing, see below.
+
+    AGPL licensing:
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+package com.itextpdf.pdfocr;
+
+import com.itextpdf.layout.font.FontProvider;
+
+/**
+ * Properties that will be used by the {@link OcrPdfCreator}.
+ */
+public class OcrPdfCreatorProperties {
+
+    /**
+     * Font provider.
+     * By default it is {@link PdfOcrFontProvider} object with default font
+     * family {@link PdfOcrFontProvider#getDefaultFontFamily()}.
+     */
+    private FontProvider fontProvider = null;
+
+    /**
+     * Default font family.
+     * {@link PdfOcrFontProvider#getDefaultFontFamily()} by default.
+     */
+    private String defaultFontFamily = null;
+
+    /**
+     * Color of the text in the output PDF document.
+     * Text will be transparent by default.
+     */
+    private com.itextpdf.kernel.colors.Color textColor = null;
+
+    /**
+     * Scale mode for input images.
+     * {@link ScaleMode#SCALE_TO_FIT} by default. But this value will be used
+     * only if {@link #pageSize} is not null;
+     */
+    private ScaleMode scaleMode = ScaleMode.SCALE_TO_FIT;
+
+    /**
+     * Size of the PDF document pages.
+     * <code>null</code> by default.
+     * If this parameter is null, size of the page will be equal to the
+     * input image size. If this parameter is not null, input image will be
+     * scaled according to the selected {@link ScaleMode}.
+     */
+    private com.itextpdf.kernel.geom.Rectangle pageSize = null;
+
+    /**
+     * Name of the image layer.
+     * <code>null</code> by default.
+     * If this parameter is null then image is placed directly in canvas instead of layer.
+     * If value of imageLayerName is equal to value of textLayerName then image and text placed in one layer.
+     */
+    private String imageLayerName = null;
+
+    /**
+     * Name of the text layer.
+     * <code>null</code> by default.
+     * If this parameter is null then text is placed directly in canvas instead of layer.
+     * If value of textLayerName is equal to value of imageLayerName then text and image placed in one layer.
+     */
+    private String textLayerName = null;
+
+    /**
+     * PDF Language.
+     */
+    private String pdfLang = "";
+
+    /**
+     * Title of the created document.
+     * It is not set by default.
+     */
+    private String title = null;
+
+    /**
+     * Creates a new {@link OcrPdfCreatorProperties} instance.
+     */
+    public OcrPdfCreatorProperties() {
+    }
+
+    /**
+     * Creates a new {@link OcrPdfCreatorProperties} instance
+     * based on another {@link OcrPdfCreatorProperties} instance (copy
+     * constructor).
+     *
+     * @param other the other {@link OcrPdfCreatorProperties} instance
+     */
+    public OcrPdfCreatorProperties(OcrPdfCreatorProperties other) {
+        this.scaleMode = other.scaleMode;
+        this.pageSize = other.pageSize;
+        this.imageLayerName = other.imageLayerName;
+        this.textLayerName = other.textLayerName;
+        this.textColor = other.textColor;
+        this.pdfLang = other.pdfLang;
+        this.title = other.title;
+        this.fontProvider = other.fontProvider;
+        this.defaultFontFamily = other.defaultFontFamily;
+    }
+
+    /**
+     * Gets text color in output PDF document.
+     *
+     * @return set text {@link com.itextpdf.kernel.colors.Color}
+     */
+    public final com.itextpdf.kernel.colors.Color getTextColor() {
+        return textColor;
+    }
+
+    /**
+     * Sets text color in output PDF document.
+     * Text will be transparent by default.
+     *
+     * @param textColor required text {@link com.itextpdf.kernel.colors.Color}
+     * @return the {@link OcrPdfCreatorProperties} instance
+     */
+    public final OcrPdfCreatorProperties setTextColor(
+            final com.itextpdf.kernel.colors.Color textColor) {
+        this.textColor = textColor;
+        return this;
+    }
+
+    /**
+     * Gets scale mode for input images using available options from
+     * {@link ScaleMode} enumeration.
+     *
+     * @return selected {@link ScaleMode}
+     */
+    public final ScaleMode getScaleMode() {
+        return scaleMode;
+    }
+
+    /**
+     * Sets scale mode for input images using available options
+     * from {@link ScaleMode} enumeration.
+     *
+     * @param scaleMode selected {@link ScaleMode}
+     * @return the {@link OcrPdfCreatorProperties} instance
+     */
+    public final OcrPdfCreatorProperties setScaleMode(
+            final ScaleMode scaleMode) {
+        this.scaleMode = scaleMode;
+        return this;
+    }
+
+    /**
+     * Gets required size for output PDF document. Real size of the page will
+     * be calculated according to the selected {@link ScaleMode}
+     *
+     * @return required page size as {@link com.itextpdf.kernel.geom.Rectangle}
+     */
+    public final com.itextpdf.kernel.geom.Rectangle getPageSize() {
+        return pageSize;
+    }
+
+    /**
+     * Sets required size for output PDF document.
+     *
+     * @param pageSize requested page
+     *                size as {@link com.itextpdf.kernel.geom.Rectangle}
+     * @return the {@link OcrPdfCreatorProperties} instance
+     */
+    public final OcrPdfCreatorProperties setPageSize(
+            final com.itextpdf.kernel.geom.Rectangle pageSize) {
+        this.pageSize = pageSize;
+        return this;
+    }
+
+    /**
+     * Gets name of image layer.
+     *
+     * @return image layer's name as {@link java.lang.String} if it was
+     * manually set, otherwise - <code>null</code>
+     */
+    public final String getImageLayerName() {
+        return imageLayerName;
+    }
+
+    /**
+     * Sets name for the image layer.
+     * <code>null</code> by default.
+     * If null then image is placed directly in canvas instead of layer.
+     * If image layer name is equal to text layer name then text and image placed in one layer.
+     *
+     * @param layerName name of the image layer
+     *                       as {@link java.lang.String}
+     * @return the {@link OcrPdfCreatorProperties} instance
+     */
+    public final OcrPdfCreatorProperties setImageLayerName(
+            final String layerName) {
+        imageLayerName = layerName;
+        return this;
+    }
+
+    /**
+     * Gets name of text layer.
+     *
+     * @return text layer's name as {@link java.lang.String} if it was
+     * manually set, otherwise - <code>null</code>
+     */
+    public final String getTextLayerName() {
+        return textLayerName;
+    }
+
+    /**
+     * Sets name for the text layer.
+     * <code>null</code> by default.
+     * If null then text is placed directly in canvas instead of layer.
+     * If text layer name is equal to image layer name then text and image placed in one layer.
+     *
+     * @param layerName of the text layer as {@link java.lang.String}
+     * @return the {@link OcrPdfCreatorProperties} instance
+     */
+    public final OcrPdfCreatorProperties setTextLayerName(
+            final String layerName) {
+        textLayerName = layerName;
+        return this;
+    }
+
+    /**
+     * Gets PDF language.
+     *
+     * @return PDF document language as {@link java.lang.String}
+     */
+    public final String getPdfLang() {
+        return pdfLang;
+    }
+
+    /**
+     * Specify PDF natural language, and optionally locale.
+     * Language identifier shall either be the empty text string, to indicate that the language is unknown,
+     * or a Language-Tag as defined in BCP 47 (2009), Tags for the Identification of Languages.
+     *
+     * @param language PDF document language as {@link java.lang.String},
+     *                 e.g. "en-US", etc.
+     * @return the {@link OcrPdfCreatorProperties} instance
+     */
+    public final OcrPdfCreatorProperties setPdfLang(
+            final String language) {
+        pdfLang = language;
+        return this;
+    }
+
+    /**
+     * Gets PDF document title.
+     *
+     * @return PDF title as {@link java.lang.String}
+     */
+    public final String getTitle() {
+        return title;
+    }
+
+    /**
+     * Sets PDF document title.
+     *
+     * @param title PDF title as {@link java.lang.String}
+     * @return the {@link OcrPdfCreatorProperties} instance
+     */
+    public final OcrPdfCreatorProperties setTitle(
+            final String title) {
+        this.title = title;
+        return this;
+    }
+
+    /**
+     * Returns FontProvider that was set previously or if it is
+     * <code>null<code/> a new instance of {@link PdfOcrFontProvider} is
+     * returned.
+     * @return {@link com.itextpdf.layout.font.FontProvider} object
+     */
+    public FontProvider getFontProvider() {
+        if (fontProvider == null) {
+            fontProvider = new PdfOcrFontProvider();
+        }
+        return fontProvider;
+    }
+
+    /**
+     * Sets font provider.
+     * Please note that passed FontProvider is not to be used in multithreaded
+     * environments or for any parallel processing.
+     * There will be set the following default font family:
+     * {@link PdfOcrFontProvider#getDefaultFontFamily()}
+     * @param fontProvider selected
+     * {@link com.itextpdf.layout.font.FontProvider} instance
+     * @return the {@link OcrPdfCreatorProperties} instance
+     */
+    public OcrPdfCreatorProperties setFontProvider(FontProvider fontProvider) {
+        this.fontProvider = fontProvider;
+        return this;
+    }
+
+    /**
+     * Sets font provider and default font family.
+     * Please note that passed FontProvider is not to be used in multithreaded
+     * environments or for any parallel processing.
+     * @param fontProvider selected
+     * {@link com.itextpdf.layout.font.FontProvider} instance
+     * @param defaultFontFamily preferred font family to be used when selecting
+     *                          font from
+     *                          {@link com.itextpdf.layout.font.FontProvider}.
+     * @return the {@link OcrPdfCreatorProperties} instance
+     */
+    public OcrPdfCreatorProperties setFontProvider(FontProvider fontProvider,
+            String defaultFontFamily) {
+        this.fontProvider = fontProvider;
+        this.defaultFontFamily = defaultFontFamily;
+        return this;
+    }
+
+    /**
+     * Gets preferred font family to be used when selecting font from
+     * {@link com.itextpdf.layout.font.FontProvider}.
+     *
+     * @return if default font family is not set or it is null or empty
+     * {@link PdfOcrFontProvider#getDefaultFontFamily()} is returned
+     */
+    public String getDefaultFontFamily() {
+        return defaultFontFamily == null || defaultFontFamily.length() == 0
+                ? getFontProvider().getDefaultFontFamily() : defaultFontFamily;
+    }
+}
diff --git a/pdfocr-api/src/main/java/com/itextpdf/pdfocr/PdfCreatorUtil.java b/pdfocr-api/src/main/java/com/itextpdf/pdfocr/PdfCreatorUtil.java
new file mode 100644
index 0000000..5398b2e
--- /dev/null
+++ b/pdfocr-api/src/main/java/com/itextpdf/pdfocr/PdfCreatorUtil.java
@@ -0,0 +1,291 @@
+/*
+    This file is part of the iText (R) project.
+    Copyright (c) 1998-2020 iText Group NV
+    Authors: iText Software.
+
+    This program is offered under a commercial and under the AGPL license.
+    For commercial licensing, contact us at https://itextpdf.com/sales.  For AGPL licensing, see below.
+
+    AGPL licensing:
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+package com.itextpdf.pdfocr;
+
+import com.itextpdf.io.image.ImageData;
+import com.itextpdf.io.image.ImageDataFactory;
+import com.itextpdf.io.image.TiffImageData;
+import com.itextpdf.io.source.RandomAccessFileOrArray;
+import com.itextpdf.io.source.RandomAccessSourceFactory;
+import com.itextpdf.io.util.MessageFormatUtil;
+import com.itextpdf.kernel.geom.Rectangle;
+import com.itextpdf.layout.Document;
+import com.itextpdf.layout.element.Paragraph;
+import com.itextpdf.layout.layout.LayoutArea;
+import com.itextpdf.layout.layout.LayoutContext;
+import com.itextpdf.layout.layout.LayoutResult;
+import com.itextpdf.layout.renderer.IRenderer;
+import com.itextpdf.layout.renderer.ParagraphRenderer;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.util.ArrayList;
+import java.util.List;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+class PdfCreatorUtil {
+
+    /**
+     * The Constant to convert pixels to points.
+     */
+    static final float PX_TO_PT = 3f / 4f;
+
+    /**
+     * The Constant for points per inch.
+     */
+    private static final float POINTS_PER_INCH = 72.0f;
+
+    /**
+     * The logger.
+     */
+    private static final Logger LOGGER = LoggerFactory
+            .getLogger(PdfCreatorUtil.class);
+
+    /**
+     * Calculates font size according to given bbox height, width and selected
+     * font.
+     *
+     * @param document PDF document as a {@link com.itextpdf.layout.Document}
+     *                object
+     * @param line text line
+     * @param fontFamily default font family
+     * @param bboxHeightPt height of bbox calculated by OCR Reader
+     * @param bboxWidthPt width of bbox calculated by OCR Reader
+     * @return font size
+     * @throws OcrException if set font provider is invalid and/or fonts that
+     * it contains are invalid
+     */
+    static float calculateFontSize(final Document document, final String line,
+            final String fontFamily, final float bboxHeightPt,
+            final float bboxWidthPt) throws OcrException {
+        Rectangle bbox = new Rectangle(bboxWidthPt * 1.5f,
+                bboxHeightPt * 1.5f);
+        // setting minimum and maximum (approx.) values for font size
+        float fontSize = 1;
+        float maxFontSize = bbox.getHeight();
+
+        try {
+            Paragraph paragraph = new Paragraph(line);
+            paragraph.setWidth(bbox.getWidth());
+            paragraph.setFontFamily(fontFamily);
+
+            while (Math.abs(fontSize - maxFontSize) > 1e-1) {
+                float curFontSize = (fontSize + maxFontSize) / 2;
+                paragraph.setFontSize(curFontSize);
+                ParagraphRenderer renderer = (ParagraphRenderer) paragraph.createRendererSubTree()
+                        .setParent(document.getRenderer());
+                LayoutContext context = new LayoutContext(
+                        new LayoutArea(1, bbox));
+                if (renderer.layout(context).getStatus() == LayoutResult.FULL && renderer.getLines().size() == 1) {
+                    fontSize = curFontSize;
+                } else {
+                    maxFontSize = curFontSize;
+                }
+            }
+        } catch (IllegalStateException e) {
+            LOGGER.error(PdfOcrLogMessageConstant
+                    .PROVIDED_FONT_PROVIDER_IS_INVALID);
+            throw new OcrException(
+                    OcrException.CANNOT_RESOLVE_PROVIDED_FONTS, e);
+        }
+        return fontSize;
+    }
+
+    /**
+     * Calculated real width of a paragraph with given text line, font provider
+     * and font size.
+     *
+     * @param document PDF document as a {@link com.itextpdf.layout.Document}
+     *                 object
+     * @param line text line
+     * @param fontFamily default font family
+     * @param fontSize calculated font size
+     * @return real width of text line in paragraph
+     */
+    static float getRealLineWidth(Document document, final String line,
+            final String fontFamily, float fontSize) {
+        Paragraph paragraph = new Paragraph(line);
+        paragraph.setFontFamily(fontFamily);
+        paragraph.setFontSize(fontSize);
+        IRenderer renderer = paragraph.createRendererSubTree()
+                .setParent(document.getRenderer());
+        return ((ParagraphRenderer) renderer).getMinMaxWidth().getMaxWidth();
+    }
+
+    /**
+     * Calculates image coordinates on the page.
+     *
+     * @param size size of the page
+     * @param imageSize size of the image
+     * @return list of two elements (coordinates): first - x, second - y.
+     */
+    static com.itextpdf.kernel.geom.Point calculateImageCoordinates(
+            final com.itextpdf.kernel.geom.Rectangle size,
+            final com.itextpdf.kernel.geom.Rectangle imageSize) {
+        float x = 0;
+        float y = 0;
+        if (size != null) {
+            if (imageSize.getHeight() < size.getHeight()) {
+                y = (size.getHeight() - imageSize.getHeight()) / 2;
+            }
+            if (imageSize.getWidth() < size.getWidth()) {
+                x = (size.getWidth() - imageSize.getWidth()) / 2;
+            }
+        }
+        return new com.itextpdf.kernel.geom.Point(x, y);
+    }
+
+    /**
+     * Retrieves {@link com.itextpdf.io.image.ImageData} from the
+     * input {@link java.io.File}.
+     *
+     * @param inputImage input image as {@link java.io.File}
+     * @return list of {@link com.itextpdf.io.image.ImageData} objects
+     * (more than one element in the list if it is a multipage tiff)
+     * @throws OcrException if error occurred during reading a file
+     * @throws IOException if error occurred during reading a file
+     */
+    static List<ImageData> getImageData(final File inputImage)
+            throws OcrException, IOException {
+        List<ImageData> images = new ArrayList<ImageData>();
+
+        String ext = "";
+        int index = inputImage.getAbsolutePath().lastIndexOf('.');
+        if (index > 0) {
+            ext = new String(inputImage.getAbsolutePath().toCharArray(),
+                    index + 1,
+                    inputImage.getAbsolutePath().length() - index - 1);
+
+            if ("tiff".equals(ext.toLowerCase())
+                    || "tif".equals(ext.toLowerCase())) {
+                int tiffPages = getNumberOfPageTiff(inputImage);
+
+                for (int page = 0; page < tiffPages; page++) {
+                    byte[] bytes = Files.readAllBytes(inputImage.toPath());
+                    ImageData imageData = ImageDataFactory
+                            .createTiff(bytes, true,
+                                    page + 1, true);
+                    images.add(imageData);
+                }
+            } else {
+                try {
+                    ImageData imageData = ImageDataFactory
+                            .create(inputImage.getAbsolutePath());
+                    images.add(imageData);
+                } catch (com.itextpdf.io.IOException e) {
+                    LOGGER.error(MessageFormatUtil.format(
+                            PdfOcrLogMessageConstant.CANNOT_READ_INPUT_IMAGE,
+                            e.getMessage()));
+                    throw new OcrException(
+                            OcrException.CANNOT_READ_INPUT_IMAGE, e);
+                }
+            }
+        }
+        return images;
+    }
+
+    /**
+     * Calculates the size of the PDF document page according to the provided
+     * {@link ScaleMode}.
+     *
+     * @param imageData input image or its one page as
+     *                  {@link com.itextpdf.io.image.ImageData}
+     * @param scaleMode required {@link ScaleMode} that could be
+     *                  set using {@link OcrPdfCreatorProperties#setScaleMode}
+     *                  method
+     * @param requiredSize size of the page that could be using
+     *                     {@link OcrPdfCreatorProperties#setPageSize} method
+     * @return {@link com.itextpdf.kernel.geom.Rectangle}
+     */
+    static com.itextpdf.kernel.geom.Rectangle calculateImageSize(
+            final ImageData imageData,
+            final ScaleMode scaleMode,
+            final com.itextpdf.kernel.geom.Rectangle requiredSize) {
+        if (imageData != null) {
+            float imgWidthPt = getPoints(imageData.getWidth());
+            float imgHeightPt = getPoints(imageData.getHeight());
+            // page size will be equal to the image size if page size or
+            // scale mode are not set
+            if (requiredSize == null || scaleMode == null) {
+                return new com.itextpdf.kernel.geom.Rectangle(imgWidthPt,
+                        imgHeightPt);
+            } else {
+                com.itextpdf.kernel.geom.Rectangle size =
+                        new com.itextpdf.kernel.geom.Rectangle(
+                                requiredSize.getWidth(),
+                                requiredSize.getHeight());
+                // scale image according to the page size and scale mode
+                if (scaleMode == ScaleMode.SCALE_HEIGHT) {
+                    float newHeight = imgHeightPt
+                            * requiredSize.getWidth() / imgWidthPt;
+                    size.setHeight(newHeight);
+                } else if (scaleMode == ScaleMode.SCALE_WIDTH) {
+                    float newWidth = imgWidthPt
+                            * requiredSize.getHeight() / imgHeightPt;
+                    size.setWidth(newWidth);
+                } else if (scaleMode == ScaleMode.SCALE_TO_FIT) {
+                    float ratio = Math.min(
+                            requiredSize.getWidth() / imgWidthPt,
+                            requiredSize.getHeight() / imgHeightPt);
+                    size.setWidth(imgWidthPt * ratio);
+                    size.setHeight(imgHeightPt * ratio);
+                }
+                return size;
+            }
+        } else {
+            return requiredSize;
+        }
+    }
+
+    /**
+     * Converts value from pixels to points.
+     *
+     * @param pixels input value in pixels
+     * @return result value in points
+     */
+    static float getPoints(final float pixels) {
+        return pixels * PX_TO_PT;
+    }
+
+    /**
+     * Counts number of pages in the provided tiff image.
+     *
+     * @param inputImage input image {@link java.io.File}
+     * @return number of pages in the provided TIFF image
+     * @throws IOException if error occurred during creating a
+     * {@link com.itextpdf.io.source.IRandomAccessSource} based on a filename
+     * string
+     */
+    private static int getNumberOfPageTiff(final File inputImage)
+            throws IOException {
+        RandomAccessFileOrArray raf = new RandomAccessFileOrArray(
+                new RandomAccessSourceFactory()
+                        .createBestSource(
+                                inputImage.getAbsolutePath()));
+        int numOfPages = TiffImageData.getNumberOfPages(raf);
+        raf.close();
+        return numOfPages;
+    }
+}
diff --git a/pdfocr-api/src/main/java/com/itextpdf/pdfocr/PdfOcrFontProvider.java b/pdfocr-api/src/main/java/com/itextpdf/pdfocr/PdfOcrFontProvider.java
new file mode 100644
index 0000000..9742120
--- /dev/null
+++ b/pdfocr-api/src/main/java/com/itextpdf/pdfocr/PdfOcrFontProvider.java
@@ -0,0 +1,92 @@
+/*
+    This file is part of the iText (R) project.
+    Copyright (c) 1998-2020 iText Group NV
+    Authors: iText Software.
+
+    This program is offered under a commercial and under the AGPL license.
+    For commercial licensing, contact us at https://itextpdf.com/sales.  For AGPL licensing, see below.
+
+    AGPL licensing:
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+package com.itextpdf.pdfocr;
+
+import com.itextpdf.io.font.PdfEncodings;
+import com.itextpdf.io.util.MessageFormatUtil;
+import com.itextpdf.io.util.ResourceUtil;
+import com.itextpdf.io.util.StreamUtil;
+import com.itextpdf.layout.font.FontProvider;
+import com.itextpdf.layout.font.FontSet;
+
+import java.io.IOException;
+import java.io.InputStream;
+import org.slf4j.LoggerFactory;
+
+public class PdfOcrFontProvider extends FontProvider {
+
+    /**
+     * Path to the default font.
+     */
+    private static final String DEFAULT_FONT_PATH = "com/itextpdf/pdfocr/fonts/LiberationSans-Regular.ttf";
+
+    /**
+     * Default font family.
+     */
+    private static final String DEFAULT_FONT_FAMILY = "LiberationSans";
+
+    /**
+     * Creates a new {@link PdfOcrFontProvider} instance with the default font
+     * and the default font family.
+     */
+    public PdfOcrFontProvider() {
+        super(DEFAULT_FONT_FAMILY);
+        this.addFont(getDefaultFont(), PdfEncodings.IDENTITY_H);
+    }
+
+    /**
+     * Creates a new {@link PdfOcrFontProvider} instance.
+     */
+    public PdfOcrFontProvider(FontSet fontSet,
+            String defaultFontFamily) {
+        super(fontSet, defaultFontFamily);
+    }
+
+    /**
+     * Gets default font family.
+     *
+     * @return default font family as a string
+     */
+    @Override
+    public String getDefaultFontFamily() {
+        return DEFAULT_FONT_FAMILY;
+    }
+
+    /**
+     * Gets default font as a byte array.
+     *
+     * @return default font as byte[]
+     */
+    private byte[] getDefaultFont() {
+        try (InputStream stream = ResourceUtil
+                .getResourceStream(DEFAULT_FONT_PATH)) {
+            return StreamUtil.inputStreamToArray(stream);
+        } catch (IOException e) {
+            LoggerFactory.getLogger(getClass())
+                    .error(MessageFormatUtil.format(
+                            PdfOcrLogMessageConstant.CANNOT_READ_DEFAULT_FONT,
+                            e.getMessage()));
+            return new byte[0];
+        }
+    }
+}
diff --git a/pdfocr-api/src/main/java/com/itextpdf/pdfocr/PdfOcrLogMessageConstant.java b/pdfocr-api/src/main/java/com/itextpdf/pdfocr/PdfOcrLogMessageConstant.java
new file mode 100644
index 0000000..e324ae5
--- /dev/null
+++ b/pdfocr-api/src/main/java/com/itextpdf/pdfocr/PdfOcrLogMessageConstant.java
@@ -0,0 +1,47 @@
+/*
+    This file is part of the iText (R) project.
+    Copyright (c) 1998-2020 iText Group NV
+    Authors: iText Software.
+
+    This program is offered under a commercial and under the AGPL license.
+    For commercial licensing, contact us at https://itextpdf.com/sales.  For AGPL licensing, see below.
+
+    AGPL licensing:
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+package com.itextpdf.pdfocr;
+
+public class PdfOcrLogMessageConstant {
+    public static final String CANNOT_READ_INPUT_IMAGE =
+            "Cannot read input image {0}";
+    public static final String PROVIDED_FONT_PROVIDER_IS_INVALID =
+            "Provided FontProvider is invalid. Please check that it contains "
+                    + "valid fonts and default font family name.";
+    public static final String CANNOT_READ_DEFAULT_FONT =
+            "Cannot default read font: {0}";
+    public static final String CANNOT_ADD_DATA_TO_PDF_DOCUMENT =
+            "Cannot add data to PDF document: {1}";
+    public static final String START_OCR_FOR_IMAGES =
+            "Starting ocr for {0} image(s)";
+    public static final String NUMBER_OF_PAGES_IN_IMAGE =
+            "Image {0} contains {1} page(s)";
+    public static final String COULD_NOT_FIND_CORRESPONDING_GLYPH_TO_UNICODE_CHARACTER =
+            "Could not find a glyph corresponding to Unicode character {0} "
+                    + "in any of the fonts";
+    public static final String PDF_LANGUAGE_PROPERTY_IS_NOT_SET =
+            "PDF language property is not set";
+
+    private PdfOcrLogMessageConstant() {
+    }
+}
diff --git a/pdfocr-api/src/main/java/com/itextpdf/pdfocr/PdfOcrMetaInfo.java b/pdfocr-api/src/main/java/com/itextpdf/pdfocr/PdfOcrMetaInfo.java
new file mode 100644
index 0000000..e405906
--- /dev/null
+++ b/pdfocr-api/src/main/java/com/itextpdf/pdfocr/PdfOcrMetaInfo.java
@@ -0,0 +1,29 @@
+/*
+    This file is part of the iText (R) project.
+    Copyright (c) 1998-2020 iText Group NV
+    Authors: iText Software.
+
+    This program is offered under a commercial and under the AGPL license.
+    For commercial licensing, contact us at https://itextpdf.com/sales.  For AGPL licensing, see below.
+
+    AGPL licensing:
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+package com.itextpdf.pdfocr;
+
+import com.itextpdf.kernel.counter.event.IMetaInfo;
+
+public class PdfOcrMetaInfo implements IMetaInfo {
+    private static final long serialVersionUID = 7047674343175216537L;
+}
\ No newline at end of file
diff --git a/pdfocr-api/src/main/java/com/itextpdf/pdfocr/ScaleMode.java b/pdfocr-api/src/main/java/com/itextpdf/pdfocr/ScaleMode.java
new file mode 100644
index 0000000..40d0c19
--- /dev/null
+++ b/pdfocr-api/src/main/java/com/itextpdf/pdfocr/ScaleMode.java
@@ -0,0 +1,56 @@
+/*
+    This file is part of the iText (R) project.
+    Copyright (c) 1998-2020 iText Group NV
+    Authors: iText Software.
+
+    This program is offered under a commercial and under the AGPL license.
+    For commercial licensing, contact us at https://itextpdf.com/sales.  For AGPL licensing, see below.
+
+    AGPL licensing:
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+package com.itextpdf.pdfocr;
+
+import com.itextpdf.kernel.geom.Rectangle;
+import com.itextpdf.pdfocr.OcrPdfCreatorProperties;
+
+/**
+ * Enumeration of the possible scale modes for input images.
+ */
+public enum ScaleMode {
+    /**
+     * Only width of the image will be proportionally scaled to fit
+     * required size that is set using
+     * {@link OcrPdfCreatorProperties#setPageSize(Rectangle)} method.
+     * Height will be equal to the page height that was set using
+     * {@link OcrPdfCreatorProperties#setPageSize(Rectangle)} method and
+     * width will be proportionally scaled to keep the original aspect ratio.
+     */
+    SCALE_WIDTH,
+    /**
+     * Only height of the image will be proportionally scaled to fit
+     * required size that is set using
+     * {@link OcrPdfCreatorProperties#setPageSize(Rectangle)} method.
+     * Width will be equal to the page width that was set using
+     * {@link OcrPdfCreatorProperties#setPageSize(Rectangle)} method and
+     * height will be proportionally scaled to keep the original aspect ratio.
+     */
+    SCALE_HEIGHT,
+    /**
+     * The image will be scaled to fit within the page width and height dimensions that are set using
+     * {@link OcrPdfCreatorProperties#setPageSize(Rectangle)} method.
+     * Original aspect ratio of the image stays unchanged.
+     */
+    SCALE_TO_FIT
+}
diff --git a/pdfocr-api/src/main/java/com/itextpdf/pdfocr/TextInfo.java b/pdfocr-api/src/main/java/com/itextpdf/pdfocr/TextInfo.java
new file mode 100644
index 0000000..73ce258
--- /dev/null
+++ b/pdfocr-api/src/main/java/com/itextpdf/pdfocr/TextInfo.java
@@ -0,0 +1,99 @@
+/*
+    This file is part of the iText (R) project.
+    Copyright (c) 1998-2020 iText Group NV
+    Authors: iText Software.
+
+    This program is offered under a commercial and under the AGPL license.
+    For commercial licensing, contact us at https://itextpdf.com/sales.  For AGPL licensing, see below.
+
+    AGPL licensing:
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+package com.itextpdf.pdfocr;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * This class describes how recognized text is positioned on the image
+ * providing bbox for each text item (could be a line or a word).
+ */
+public class TextInfo {
+
+    /**
+     * Contains any text.
+     */
+    private String text;
+
+    /**
+     * Contains 4 float coordinates: bbox parameters.
+     */
+    private List<Float> bbox;
+
+    /**
+     * Creates a new {@link TextInfo} instance.
+     */
+    public TextInfo() {
+        text = null;
+        bbox = Collections.<Float>emptyList();
+    }
+
+    /**
+     * Creates a new {@link TextInfo} instance.
+     *
+     * @param text any text
+     * @param bbox {@link java.util.List} of bbox parameters
+     */
+    public TextInfo(final String text, final List<Float> bbox) {
+        this.text = text;
+        this.bbox = Collections.<Float>unmodifiableList(bbox);
+    }
+
+    /**
+     * Gets text element.
+     *
+     * @return String
+     */
+    public String getText() {
+        return text;
+    }
+
+    /**
+     * Sets text element.
+     *
+     * @param newText retrieved text
+     */
+    public void setText(final String newText) {
+        text = newText;
+    }
+
+    /**
+     * Gets bbox coordinates.
+     *
+     * @return {@link java.util.List} of bbox parameters
+     */
+    public List<Float> getBbox() {
+        return new ArrayList<Float>(bbox);
+    }
+
+    /**
+     * Sets bbox coordinates.
+     *
+     * @param bbox {@link java.util.List} of bbox parameters
+     */
+    public void setBbox(final List<Float> bbox) {
+        this.bbox = Collections.<Float>unmodifiableList(bbox);
+    }
+}
diff --git a/pdfocr-api/src/main/java/com/itextpdf/pdfocr/events/IThreadLocalMetaInfoAware.java b/pdfocr-api/src/main/java/com/itextpdf/pdfocr/events/IThreadLocalMetaInfoAware.java
new file mode 100644
index 0000000..5542b25
--- /dev/null
+++ b/pdfocr-api/src/main/java/com/itextpdf/pdfocr/events/IThreadLocalMetaInfoAware.java
@@ -0,0 +1,45 @@
+/*
+    This file is part of the iText (R) project.
+    Copyright (c) 1998-2020 iText Group NV
+    Authors: iText Software.
+
+    This program is offered under a commercial and under the AGPL license.
+    For commercial licensing, contact us at https://itextpdf.com/sales.  For AGPL licensing, see below.
+
+    AGPL licensing:
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+package com.itextpdf.pdfocr.events;
+
+import com.itextpdf.kernel.counter.event.IMetaInfo;
+
+/**
+ * The interface which holds a thread local meta info,
+ * meaning different threads operate with independent and different meta infos.
+ */
+public interface IThreadLocalMetaInfoAware {
+
+    /**
+     * Gets the meta info which is held by the interface.
+     * @return the held thread local meta info
+     */
+    IMetaInfo getThreadLocalMetaInfo();
+
+    /**
+     * Sets a thread local meta info.
+     * @param metaInfo a thread local meta info to be held
+     * @return this {@link IThreadLocalMetaInfoAware}
+     */
+    IThreadLocalMetaInfoAware setThreadLocalMetaInfo(IMetaInfo metaInfo);
+}
diff --git a/pdfocr-api/src/main/java/com/itextpdf/pdfocr/package-info.java b/pdfocr-api/src/main/java/com/itextpdf/pdfocr/package-info.java
new file mode 100644
index 0000000..004df18
--- /dev/null
+++ b/pdfocr-api/src/main/java/com/itextpdf/pdfocr/package-info.java
@@ -0,0 +1 @@
+package com.itextpdf.pdfocr;
diff --git a/pdfocr-api/src/main/resources/com/itextpdf/pdfocr/NOTICE.txt b/pdfocr-api/src/main/resources/com/itextpdf/pdfocr/NOTICE.txt
new file mode 100644
index 0000000..2658931
--- /dev/null
+++ b/pdfocr-api/src/main/resources/com/itextpdf/pdfocr/NOTICE.txt
@@ -0,0 +1,103 @@
+This software uses the following font under the following license:
+| Liberation Sans font | OFL-1.1 |
+
+------------------------------------------------------------------------------------------------------------------------
+
+Liberation Sans font is used under the following license agreement:
+
+Digitized data copyright (c) 2010 Google Corporation
+	with Reserved Font Arimo, Tinos and Cousine.
+Copyright (c) 2012 Red Hat, Inc.
+	with Reserved Font Name Liberation.
+
+This Font Software is licensed under the SIL Open Font License, Version 1.1.
+This license is copied below, and is also available with a FAQ at:
+http://scripts.sil.org/OFL
+
+
+-----------------------------------------------------------
+SIL OPEN FONT LICENSE Version 1.1 - 26 February 2007
+-----------------------------------------------------------
+
+PREAMBLE
+The goals of the Open Font License (OFL) are to stimulate worldwide
+development of collaborative font projects, to support the font creation
+efforts of academic and linguistic communities, and to provide a free and
+open framework in which fonts may be shared and improved in partnership
+with others.
+
+The OFL allows the licensed fonts to be used, studied, modified and
+redistributed freely as long as they are not sold by themselves. The
+fonts, including any derivative works, can be bundled, embedded,
+redistributed and/or sold with any software provided that any reserved
+names are not used by derivative works. The fonts and derivatives,
+however, cannot be released under any other type of license. The
+requirement for fonts to remain under this license does not apply
+to any document created using the fonts or their derivatives.
+
+DEFINITIONS
+"Font Software" refers to the set of files released by the Copyright
+Holder(s) under this license and clearly marked as such. This may
+include source files, build scripts and documentation.
+
+"Reserved Font Name" refers to any names specified as such after the
+copyright statement(s).
+
+"Original Version" refers to the collection of Font Software components as
+distributed by the Copyright Holder(s).
+
+"Modified Version" refers to any derivative made by adding to, deleting,
+or substituting -- in part or in whole -- any of the components of the
+Original Version, by changing formats or by porting the Font Software to a
+new environment.
+
+"Author" refers to any designer, engineer, programmer, technical
+writer or other person who contributed to the Font Software.
+
+PERMISSION & CONDITIONS
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of the Font Software, to use, study, copy, merge, embed, modify,
+redistribute, and sell modified and unmodified copies of the Font
+Software, subject to the following conditions:
+
+1) Neither the Font Software nor any of its individual components,
+in Original or Modified Versions, may be sold by itself.
+
+2) Original or Modified Versions of the Font Software may be bundled,
+redistributed and/or sold with any software, provided that each copy
+contains the above copyright notice and this license. These can be
+included either as stand-alone text files, human-readable headers or
+in the appropriate machine-readable metadata fields within text or
+binary files as long as those fields can be easily viewed by the user.
+
+3) No Modified Version of the Font Software may use the Reserved Font
+Name(s) unless explicit written permission is granted by the corresponding
+Copyright Holder. This restriction only applies to the primary font name as
+presented to the users.
+
+4) The name(s) of the Copyright Holder(s) or the Author(s) of the Font
+Software shall not be used to promote, endorse or advertise any
+Modified Version, except to acknowledge the contribution(s) of the
+Copyright Holder(s) and the Author(s) or with their explicit written
+permission.
+
+5) The Font Software, modified or unmodified, in part or in whole,
+must be distributed entirely under this license, and must not be
+distributed under any other license. The requirement for fonts to
+remain under this license does not apply to any document created
+using the Font Software.
+
+TERMINATION
+This license becomes null and void if any of the above conditions are
+not met.
+
+DISCLAIMER
+THE FONT SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
+OF COPYRIGHT, PATENT, TRADEMARK, OR OTHER RIGHT. IN NO EVENT SHALL THE
+COPYRIGHT HOLDER BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+INCLUDING ANY GENERAL, SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL
+DAMAGES, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF THE USE OR INABILITY TO USE THE FONT SOFTWARE OR FROM
+OTHER DEALINGS IN THE FONT SOFTWARE.
diff --git a/pdfocr-api/src/main/resources/com/itextpdf/pdfocr/fonts/LiberationSans-Regular.ttf b/pdfocr-api/src/main/resources/com/itextpdf/pdfocr/fonts/LiberationSans-Regular.ttf
new file mode 100644
index 0000000..626dd93
Binary files /dev/null and b/pdfocr-api/src/main/resources/com/itextpdf/pdfocr/fonts/LiberationSans-Regular.ttf differ
diff --git a/pdfocr-api/src/test/java/com/itextpdf/metainfo/TestMetaInfo.java b/pdfocr-api/src/test/java/com/itextpdf/metainfo/TestMetaInfo.java
new file mode 100644
index 0000000..1f10d9f
--- /dev/null
+++ b/pdfocr-api/src/test/java/com/itextpdf/metainfo/TestMetaInfo.java
@@ -0,0 +1,34 @@
+/*
+    This file is part of the iText (R) project.
+    Copyright (c) 1998-2020 iText Group NV
+    Authors: iText Software.
+
+    This program is offered under a commercial and under the AGPL license.
+    For commercial licensing, contact us at https://itextpdf.com/sales.  For AGPL licensing, see below.
+
+    AGPL licensing:
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+package com.itextpdf.metainfo;
+
+import com.itextpdf.kernel.counter.event.IMetaInfo;
+
+/**
+ * This class is used for test purposes.
+ * Please be aware that it's put in the com.itextpdf.metainfo deliberately,
+ * so that it belongs neither to com.itextpdf.pdfocr nor com.itextpdf.pdfocr.tesseract4 packages
+ */
+public class TestMetaInfo implements IMetaInfo {
+    private static final long serialVersionUID = 5521060335175170386L;
+}
diff --git a/pdfocr-api/src/test/java/com/itextpdf/pdfocr/ApiTest.java b/pdfocr-api/src/test/java/com/itextpdf/pdfocr/ApiTest.java
new file mode 100644
index 0000000..65d3ea8
--- /dev/null
+++ b/pdfocr-api/src/test/java/com/itextpdf/pdfocr/ApiTest.java
@@ -0,0 +1,83 @@
+/*
+    This file is part of the iText (R) project.
+    Copyright (c) 1998-2020 iText Group NV
+    Authors: iText Software.
+
+    This program is offered under a commercial and under the AGPL license.
+    For commercial licensing, contact us at https://itextpdf.com/sales.  For AGPL licensing, see below.
+
+    AGPL licensing:
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+package com.itextpdf.pdfocr;
+
+import com.itextpdf.kernel.colors.DeviceRgb;
+import com.itextpdf.kernel.font.PdfFont;
+import com.itextpdf.pdfocr.helpers.CustomOcrEngine;
+import com.itextpdf.pdfocr.helpers.ExtractionStrategy;
+import com.itextpdf.pdfocr.helpers.PdfHelper;
+import com.itextpdf.test.ExtendedITextTest;
+import com.itextpdf.test.annotations.LogMessage;
+import com.itextpdf.test.annotations.LogMessages;
+import com.itextpdf.test.annotations.type.IntegrationTest;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+@Category(IntegrationTest.class)
+public class ApiTest extends ExtendedITextTest {
+
+    @Test
+    public void testTextInfo() {
+        String path = PdfHelper.getDefaultImagePath();
+        Map<Integer, List<TextInfo>> result = new CustomOcrEngine().doImageOcr(new File(path));
+        Assert.assertEquals(1, result.size());
+
+        TextInfo textInfo = new TextInfo();
+        textInfo.setText("text");
+        textInfo.setBbox(Arrays.<Float>asList(204.0f, 158.0f, 742.0f, 294.0f));
+        int page = 2;
+        result.put(page, Collections.<TextInfo>singletonList(textInfo));
+
+        Assert.assertEquals(2, result.size());
+        Assert.assertEquals(textInfo.getText(), result.get(page).get(0).getText());
+        Assert.assertEquals(textInfo.getBbox().size(), result.get(page).get(0).getBbox().size());
+    }
+
+    @LogMessages(messages = {
+        @LogMessage(messageTemplate = PdfOcrLogMessageConstant.COULD_NOT_FIND_CORRESPONDING_GLYPH_TO_UNICODE_CHARACTER, count = 7)
+    })
+    @Test
+    public void testThaiImageWithNotDefGlyphs() throws IOException {
+        String testName = "testThaiImageWithNotdefGlyphs";
+        String path = PdfHelper.getThaiImagePath();
+        String pdfPath = PdfHelper.getTargetDirectory() + testName + ".pdf";
+
+        PdfHelper.createPdf(pdfPath, new File(path),
+                new OcrPdfCreatorProperties().setTextColor(DeviceRgb.BLACK));
+
+        ExtractionStrategy strategy = PdfHelper.getExtractionStrategy(pdfPath);
+
+        PdfFont font = strategy.getPdfFont();
+        String fontName = font.getFontProgram().getFontNames().getFontName();
+        Assert.assertTrue(fontName.contains("LiberationSans"));
+    }
+}
diff --git a/pdfocr-api/src/test/java/com/itextpdf/pdfocr/PdfA3uTest.java b/pdfocr-api/src/test/java/com/itextpdf/pdfocr/PdfA3uTest.java
new file mode 100644
index 0000000..c52abdc
--- /dev/null
+++ b/pdfocr-api/src/test/java/com/itextpdf/pdfocr/PdfA3uTest.java
@@ -0,0 +1,226 @@
+/*
+    This file is part of the iText (R) project.
+    Copyright (c) 1998-2020 iText Group NV
+    Authors: iText Software.
+
+    This program is offered under a commercial and under the AGPL license.
+    For commercial licensing, contact us at https://itextpdf.com/sales.  For AGPL licensing, see below.
+
+    AGPL licensing:
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+package com.itextpdf.pdfocr;
+
+import com.itextpdf.io.font.PdfEncodings;
+import com.itextpdf.io.util.MessageFormatUtil;
+import com.itextpdf.kernel.colors.DeviceCmyk;
+import com.itextpdf.kernel.colors.DeviceRgb;
+import com.itextpdf.kernel.font.PdfFont;
+import com.itextpdf.kernel.font.PdfFontFactory;
+import com.itextpdf.kernel.pdf.PdfAConformanceLevel;
+import com.itextpdf.kernel.pdf.PdfDocument;
+import com.itextpdf.kernel.pdf.PdfReader;
+import com.itextpdf.layout.font.FontProvider;
+import com.itextpdf.layout.font.FontSelector;
+import com.itextpdf.pdfa.PdfAConformanceException;
+import com.itextpdf.pdfocr.helpers.ExtractionStrategy;
+import com.itextpdf.pdfocr.helpers.PdfHelper;
+import com.itextpdf.test.ExtendedITextTest;
+import com.itextpdf.test.annotations.LogMessage;
+import com.itextpdf.test.annotations.LogMessages;
+import com.itextpdf.test.annotations.type.IntegrationTest;
+
+import java.io.File;
+import java.io.IOException;
+import org.junit.Assert;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.rules.ExpectedException;
+import org.verapdf.gf.model.factory.fonts.FontFactory;
+
+@Category(IntegrationTest.class)
+public class PdfA3uTest extends ExtendedITextTest {
+
+    @Rule
+    public ExpectedException junitExpectedException = ExpectedException.none();
+
+    @Test
+    public void testPdfA3uWithNullIntent() throws IOException {
+        String testName = "testPdfA3uWithNullIntent";
+        String path = PdfHelper.getDefaultImagePath();
+        String pdfPath = PdfHelper.getTargetDirectory() + testName + ".pdf";
+
+        OcrPdfCreatorProperties properties = new OcrPdfCreatorProperties();
+        properties.setTextColor(DeviceCmyk.BLACK);
+        properties.setScaleMode(ScaleMode.SCALE_TO_FIT);
+
+        PdfHelper.createPdfA(pdfPath, new File(path), properties, null);
+        String result = PdfHelper.getTextFromPdfLayer(pdfPath, null);
+        Assert.assertEquals(PdfHelper.DEFAULT_TEXT, result);
+        Assert.assertEquals(ScaleMode.SCALE_TO_FIT, properties.getScaleMode());
+    }
+
+    @Test
+    public void testIncompatibleOutputIntentAndFontColorSpaceException()
+            throws IOException {
+        junitExpectedException.expect(com.itextpdf.kernel.PdfException.class);
+        junitExpectedException.expectMessage(PdfAConformanceException.DEVICECMYK_MAY_BE_USED_ONLY_IF_THE_FILE_HAS_A_CMYK_PDFA_OUTPUT_INTENT_OR_DEFAULTCMYK_IN_USAGE_CONTEXT);
+
+        String testName = "testIncompatibleOutputIntentAndFontColorSpaceException";
+        String path = PdfHelper.getDefaultImagePath();
+        String pdfPath = PdfHelper.getTargetDirectory() + testName + ".pdf";
+
+        OcrPdfCreatorProperties ocrPdfCreatorProperties = new OcrPdfCreatorProperties();
+        ocrPdfCreatorProperties.setPdfLang("en-US");
+        ocrPdfCreatorProperties.setTextColor(DeviceCmyk.BLACK);
+
+        PdfHelper.createPdfA(pdfPath, new File(path),
+                ocrPdfCreatorProperties,
+                PdfHelper.getRGBPdfOutputIntent());
+    }
+
+    @Test
+    public void testPdfA3DefaultMetadata() throws IOException {
+        String testName = "testPdfDefaultMetadata";
+        String path = PdfHelper.getDefaultImagePath();
+        String pdfPath = PdfHelper.getTargetDirectory() + testName + ".pdf";
+        File file = new File(path);
+
+        OcrPdfCreatorProperties ocrPdfCreatorProperties = new OcrPdfCreatorProperties();
+        ocrPdfCreatorProperties.setPdfLang("en-US");
+        ocrPdfCreatorProperties.setTextColor(DeviceRgb.BLACK);
+
+        PdfHelper.createPdfA(pdfPath, file,
+                ocrPdfCreatorProperties,
+                PdfHelper.getRGBPdfOutputIntent());
+
+        PdfDocument pdfDocument = new PdfDocument(new PdfReader(pdfPath));
+
+        Assert.assertEquals("en-US",
+                pdfDocument.getCatalog().getLang().toString());
+        Assert.assertEquals(null,
+                pdfDocument.getDocumentInfo().getTitle());
+        Assert.assertEquals(PdfAConformanceLevel.PDF_A_3U,
+                pdfDocument.getReader().getPdfAConformanceLevel());
+
+        pdfDocument.close();
+    }
+
+    @Test
+    public void testPdfCustomMetadata() throws IOException {
+        String testName = "testPdfCustomMetadata";
+        String path = PdfHelper.getDefaultImagePath();
+        String pdfPath = PdfHelper.getTargetDirectory() + testName + ".pdf";
+        File file = new File(path);
+
+        OcrPdfCreatorProperties properties = new OcrPdfCreatorProperties();
+        String locale = "nl-BE";
+        properties.setPdfLang(locale);
+        String title = "Title";
+        properties.setTitle(title);
+
+        PdfHelper.createPdfA(pdfPath, file,
+                new OcrPdfCreatorProperties(properties),
+                PdfHelper.getCMYKPdfOutputIntent());
+
+        PdfDocument pdfDocument = new PdfDocument(new PdfReader(pdfPath));
+        Assert.assertEquals(locale,
+                pdfDocument.getCatalog().getLang().toString());
+        Assert.assertEquals(title,
+                pdfDocument.getDocumentInfo().getTitle());
+        Assert.assertEquals(PdfAConformanceLevel.PDF_A_3U,
+                pdfDocument.getReader().getPdfAConformanceLevel());
+
+        pdfDocument.close();
+    }
+
+    @LogMessages(messages = {
+        @LogMessage(messageTemplate = OcrException.CANNOT_CREATE_PDF_DOCUMENT, count = 1)
+    })
+    @Test
+    public void testNonCompliantThaiPdfA() throws IOException {
+        junitExpectedException.expect(OcrException.class);
+        junitExpectedException.expectMessage(MessageFormatUtil.format(
+                OcrException.CANNOT_CREATE_PDF_DOCUMENT,
+                MessageFormatUtil.format(PdfOcrLogMessageConstant.COULD_NOT_FIND_CORRESPONDING_GLYPH_TO_UNICODE_CHARACTER, 3611)));
+
+        String testName = "testNonCompliantThaiPdfA";
+        String path = PdfHelper.getThaiImagePath();
+        String pdfPath = PdfHelper.getTargetDirectory() + testName + ".pdf";
+
+        OcrPdfCreatorProperties ocrPdfCreatorProperties = new OcrPdfCreatorProperties();
+        ocrPdfCreatorProperties.setPdfLang("en-US");
+        ocrPdfCreatorProperties.setTextColor(DeviceRgb.BLACK);
+
+        PdfHelper.createPdfA(pdfPath, new File(path),
+                ocrPdfCreatorProperties,
+                PdfHelper.getRGBPdfOutputIntent());
+    }
+
+    @Test
+    public void testCompliantThaiPdfA() throws IOException {
+        String testName = "testCompliantThaiPdfA";
+        String path = PdfHelper.getThaiImagePath();
+        String pdfPath = PdfHelper.getTargetDirectory() + testName + ".pdf";
+
+        OcrPdfCreatorProperties ocrPdfCreatorProperties = new OcrPdfCreatorProperties();
+        ocrPdfCreatorProperties.setPdfLang("en-US");
+        ocrPdfCreatorProperties.setTextColor(DeviceRgb.BLACK);
+
+        FontProvider fontProvider = new FontProvider("Kanit");
+        fontProvider.addFont(PdfHelper.getKanitFontPath());
+        PdfOcrFontProvider pdfOcrFontProvider = new PdfOcrFontProvider(
+                fontProvider.getFontSet(), "Kanit");
+        ocrPdfCreatorProperties.setFontProvider(pdfOcrFontProvider);
+
+        PdfHelper.createPdfA(pdfPath, new File(path), ocrPdfCreatorProperties,
+                PdfHelper.getRGBPdfOutputIntent());
+
+        String resultWithActualText = PdfHelper
+                .getTextFromPdfLayerUseActualText(pdfPath, null);
+        Assert.assertEquals(PdfHelper.THAI_TEXT, resultWithActualText);
+
+        String resultWithoutUseActualText = PdfHelper.getTextFromPdfLayer(pdfPath,
+                null);
+        Assert.assertEquals(PdfHelper.THAI_TEXT, resultWithoutUseActualText);
+        Assert.assertEquals(resultWithoutUseActualText, resultWithActualText);
+
+        ExtractionStrategy strategy = PdfHelper.getExtractionStrategy(pdfPath);
+        PdfFont font = strategy.getPdfFont();
+        String fontName = font.getFontProgram().getFontNames().getFontName();
+        Assert.assertTrue(fontName.contains("Kanit"));
+        Assert.assertTrue(font.isEmbedded());
+    }
+
+    @LogMessages(messages = {
+            @LogMessage(messageTemplate = OcrException.CANNOT_CREATE_PDF_DOCUMENT, count = 1)
+    })
+    @Test
+    public void testPdfACreateWithoutPdfLangProperty()
+            throws IOException {
+        junitExpectedException.expect(OcrException.class);
+        junitExpectedException.expectMessage(MessageFormatUtil.format(
+                OcrException.CANNOT_CREATE_PDF_DOCUMENT,
+                PdfOcrLogMessageConstant.PDF_LANGUAGE_PROPERTY_IS_NOT_SET));
+
+        String testName = "testPdfACreateWithoutPdfLangProperty";
+        String path = PdfHelper.getThaiImagePath();
+        String pdfPath = PdfHelper.getTargetDirectory() + testName + ".pdf";
+
+        PdfHelper.createPdfA(pdfPath, new File(path),
+                new OcrPdfCreatorProperties(),
+                PdfHelper.getRGBPdfOutputIntent());
+    }
+}
diff --git a/pdfocr-api/src/test/java/com/itextpdf/pdfocr/PdfFontTest.java b/pdfocr-api/src/test/java/com/itextpdf/pdfocr/PdfFontTest.java
new file mode 100644
index 0000000..8b1abde
--- /dev/null
+++ b/pdfocr-api/src/test/java/com/itextpdf/pdfocr/PdfFontTest.java
@@ -0,0 +1,230 @@
+/*
+    This file is part of the iText (R) project.
+    Copyright (c) 1998-2020 iText Group NV
+    Authors: iText Software.
+
+    This program is offered under a commercial and under the AGPL license.
+    For commercial licensing, contact us at https://itextpdf.com/sales.  For AGPL licensing, see below.
+
+    AGPL licensing:
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+package com.itextpdf.pdfocr;
+
+import com.itextpdf.io.font.PdfEncodings;
+import com.itextpdf.io.util.MessageFormatUtil;
+import com.itextpdf.kernel.colors.DeviceCmyk;
+import com.itextpdf.kernel.colors.DeviceRgb;
+import com.itextpdf.kernel.font.PdfFont;
+import com.itextpdf.layout.font.FontProvider;
+import com.itextpdf.pdfocr.helpers.ExtractionStrategy;
+import com.itextpdf.pdfocr.helpers.PdfHelper;
+import com.itextpdf.test.ExtendedITextTest;
+import com.itextpdf.test.annotations.LogMessage;
+import com.itextpdf.test.annotations.LogMessages;
+import com.itextpdf.test.annotations.type.IntegrationTest;
+
+import java.io.File;
+import java.io.IOException;
+import org.junit.Assert;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.rules.ExpectedException;
+
+@Category(IntegrationTest.class)
+public class PdfFontTest extends ExtendedITextTest {
+
+    @Rule
+    public ExpectedException junitExpectedException = ExpectedException.none();
+
+    @Test
+    public void testFontColor() throws IOException {
+        String testName = "testFontColor";
+        String path = PdfHelper.getImagesTestDirectory() + "multipage.tiff";
+        String pdfPath = PdfHelper.getTargetDirectory() + testName + ".pdf";
+        File file = new File(path);
+
+        OcrPdfCreatorProperties properties = new OcrPdfCreatorProperties();
+        properties.setScaleMode(ScaleMode.SCALE_TO_FIT);
+        properties.setTextLayerName("Text1");
+        com.itextpdf.kernel.colors.Color color = DeviceCmyk.CYAN;
+        properties.setTextColor(color);
+
+        PdfHelper.createPdf(pdfPath, file, properties);
+
+        ExtractionStrategy strategy = PdfHelper.getExtractionStrategy(pdfPath, "Text1");
+        com.itextpdf.kernel.colors.Color fillColor = strategy.getFillColor();
+        Assert.assertEquals(color, fillColor);
+    }
+
+    @LogMessages(messages = {
+        @LogMessage(messageTemplate = PdfOcrLogMessageConstant.PROVIDED_FONT_PROVIDER_IS_INVALID, count = 1),
+        @LogMessage(messageTemplate = OcrException.CANNOT_CREATE_PDF_DOCUMENT, count = 1)
+    })
+    @Test
+    public void testInvalidFontWithInvalidDefaultFontFamily()
+            throws IOException {
+        junitExpectedException.expect(OcrException.class);
+        junitExpectedException.expectMessage(MessageFormatUtil.format(
+                OcrException.CANNOT_CREATE_PDF_DOCUMENT,
+                OcrException.CANNOT_RESOLVE_PROVIDED_FONTS));
+
+        String testName = "testInvalidFontWithInvalidDefaultFontFamily";
+        String path = PdfHelper.getDefaultImagePath();
+        String pdfPath = PdfHelper.getTargetDirectory() + testName + ".pdf";
+        File file = new File(path);
+
+        OcrPdfCreatorProperties properties = new OcrPdfCreatorProperties();
+        FontProvider pdfOcrFontProvider = new FontProvider("Font");
+        pdfOcrFontProvider.getFontSet().addFont("font.ttf", PdfEncodings.IDENTITY_H, "Font");
+
+        properties.setFontProvider(pdfOcrFontProvider, "Font");
+        properties.setScaleMode(ScaleMode.SCALE_TO_FIT);
+
+        PdfHelper.createPdf(pdfPath, file, properties);
+        String result = PdfHelper.getTextFromPdfLayer(pdfPath, null);
+        Assert.assertEquals(PdfHelper.DEFAULT_TEXT, result);
+        Assert.assertEquals(ScaleMode.SCALE_TO_FIT, properties.getScaleMode());
+    }
+
+    @Test
+    public void testDefaultFontInPdfARgb() throws IOException {
+        String testName = "testDefaultFontInPdf";
+        String path = PdfHelper.getDefaultImagePath();
+        String pdfPath = PdfHelper.getTargetDirectory() + testName + ".pdf";
+        File file = new File(path);
+
+        OcrPdfCreatorProperties ocrPdfCreatorProperties = new OcrPdfCreatorProperties();
+        ocrPdfCreatorProperties.setPdfLang("en-US");
+        ocrPdfCreatorProperties.setTextColor(DeviceRgb.BLACK);
+
+        PdfHelper.createPdfA(pdfPath, file,
+                ocrPdfCreatorProperties,
+                PdfHelper.getRGBPdfOutputIntent());
+        ExtractionStrategy strategy = PdfHelper.getExtractionStrategy(pdfPath);
+
+        PdfFont font = strategy.getPdfFont();
+        String fontName = font.getFontProgram().getFontNames().getFontName();
+        Assert.assertTrue(fontName.contains("LiberationSans"));
+        Assert.assertTrue(font.isEmbedded());
+    }
+
+    @Test
+    public void testInvalidCustomFontInPdfACMYK() throws IOException {
+        String testName = "testInvalidCustomFontInPdf";
+        String path = PdfHelper.getDefaultImagePath();
+        String pdfPath = PdfHelper.getTargetDirectory() + testName + ".pdf";
+        File file = new File(path);
+
+        OcrPdfCreatorProperties ocrPdfCreatorProperties = new OcrPdfCreatorProperties();
+        ocrPdfCreatorProperties.setPdfLang("en-US");
+        ocrPdfCreatorProperties.setFontProvider(new PdfOcrFontProvider());
+
+        PdfHelper.createPdfA(pdfPath, file,
+                ocrPdfCreatorProperties,
+                PdfHelper.getCMYKPdfOutputIntent());
+
+        ExtractionStrategy strategy = PdfHelper.getExtractionStrategy(pdfPath);
+        PdfFont font = strategy.getPdfFont();
+        String fontName = font.getFontProgram().getFontNames().getFontName();
+        Assert.assertTrue(fontName.contains("LiberationSans"));
+        Assert.assertTrue(font.isEmbedded());
+    }
+
+    @Test
+    public void testCustomFontInPdf() throws IOException {
+        String testName = "testDefaultFontInPdf";
+        String path = PdfHelper.getDefaultImagePath();
+        String pdfPath = PdfHelper.getTargetDirectory() + testName + ".pdf";
+        File file = new File(path);
+
+        FontProvider fontProvider = new FontProvider("FreeSans");
+        fontProvider.getFontSet().addFont(PdfHelper.getFreeSansFontPath(), PdfEncodings.IDENTITY_H, "FreeSans");
+
+        OcrPdfCreatorProperties ocrPdfCreatorProperties = new OcrPdfCreatorProperties();
+        ocrPdfCreatorProperties.setPdfLang("en-US");
+        ocrPdfCreatorProperties.setFontProvider(fontProvider, "FreeSans");
+
+        PdfHelper.createPdfA(pdfPath, file,
+                ocrPdfCreatorProperties,
+                PdfHelper.getCMYKPdfOutputIntent());
+
+        ExtractionStrategy strategy = PdfHelper.getExtractionStrategy(pdfPath);
+        PdfFont font = strategy.getPdfFont();
+        String fontName = font.getFontProgram().getFontNames().getFontName();
+        Assert.assertTrue(fontName.contains("FreeSans"));
+        Assert.assertTrue(font.isEmbedded());
+    }
+
+    @LogMessages(messages = {
+        @LogMessage(messageTemplate = PdfOcrLogMessageConstant.COULD_NOT_FIND_CORRESPONDING_GLYPH_TO_UNICODE_CHARACTER, count = 7)
+    })
+    @Test
+    public void testThaiImageWithNotDefGlyphs() throws IOException {
+        String testName = "testThaiImageWithNotDefGlyphs";
+        String path = PdfHelper.getThaiImagePath();
+        String pdfPath = PdfHelper.getTargetDirectory() + testName + ".pdf";
+
+        PdfHelper.createPdf(pdfPath, new File(path),
+                new OcrPdfCreatorProperties().setTextColor(DeviceRgb.BLACK));
+
+        String resultWithActualText = PdfHelper
+                .getTextFromPdfLayerUseActualText(pdfPath, null);
+        Assert.assertEquals(PdfHelper.THAI_TEXT.replace(" ", ""),
+                resultWithActualText.replace(" ", ""));
+
+        String resultWithoutUseActualText = PdfHelper.getTextFromPdfLayer(pdfPath,
+                null);
+        Assert.assertNotEquals(PdfHelper.THAI_TEXT, resultWithoutUseActualText);
+        Assert.assertNotEquals(resultWithoutUseActualText, resultWithActualText);
+    }
+
+    @Test
+    public void testReusingFontProvider() throws IOException {
+        String testName = "testReusingFontProvider";
+        String path = PdfHelper.getDefaultImagePath();
+        String pdfPathA3u = PdfHelper.getTargetDirectory() + testName + "_a3u.pdf";
+        String pdfPath = PdfHelper.getTargetDirectory() + testName + ".pdf";
+        File file = new File(path);
+
+        FontProvider fontProvider = new FontProvider("FreeSans");
+        fontProvider.addFont(PdfHelper.getFreeSansFontPath());
+        PdfOcrFontProvider pdfOcrFontProvider = new PdfOcrFontProvider(
+                fontProvider.getFontSet(), "FreeSans");
+
+        OcrPdfCreatorProperties ocrPdfCreatorProperties = new OcrPdfCreatorProperties();
+        ocrPdfCreatorProperties.setPdfLang("en-US");
+        ocrPdfCreatorProperties.setFontProvider(pdfOcrFontProvider);
+
+        PdfHelper.createPdfA(pdfPathA3u, file, ocrPdfCreatorProperties,
+                PdfHelper.getCMYKPdfOutputIntent());
+
+        PdfHelper.createPdf(pdfPath, file, ocrPdfCreatorProperties);
+
+        ExtractionStrategy strategy = PdfHelper.getExtractionStrategy(pdfPathA3u);
+        PdfFont font = strategy.getPdfFont();
+        String fontName = font.getFontProgram().getFontNames().getFontName();
+        Assert.assertTrue(fontName.contains("FreeSans"));
+        Assert.assertTrue(font.isEmbedded());
+        Assert.assertEquals(PdfHelper.DEFAULT_TEXT, strategy.getResultantText());
+
+        strategy = PdfHelper.getExtractionStrategy(pdfPath);
+        font = strategy.getPdfFont();
+        fontName = font.getFontProgram().getFontNames().getFontName();
+        Assert.assertTrue(fontName.contains("FreeSans"));
+        Assert.assertTrue(font.isEmbedded());
+        Assert.assertEquals(PdfHelper.DEFAULT_TEXT, strategy.getResultantText());
+    }
+}
diff --git a/pdfocr-api/src/test/java/com/itextpdf/pdfocr/PdfInputImageTest.java b/pdfocr-api/src/test/java/com/itextpdf/pdfocr/PdfInputImageTest.java
new file mode 100644
index 0000000..2bfb84a
--- /dev/null
+++ b/pdfocr-api/src/test/java/com/itextpdf/pdfocr/PdfInputImageTest.java
@@ -0,0 +1,70 @@
+/*
+    This file is part of the iText (R) project.
+    Copyright (c) 1998-2020 iText Group NV
+    Authors: iText Software.
+
+    This program is offered under a commercial and under the AGPL license.
+    For commercial licensing, contact us at https://itextpdf.com/sales.  For AGPL licensing, see below.
+
+    AGPL licensing:
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+package com.itextpdf.pdfocr;
+
+import com.itextpdf.pdfocr.helpers.PdfHelper;
+import com.itextpdf.test.ExtendedITextTest;
+import com.itextpdf.test.annotations.LogMessage;
+import com.itextpdf.test.annotations.LogMessages;
+import com.itextpdf.test.annotations.type.IntegrationTest;
+
+import java.io.File;
+import org.junit.Assert;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.rules.ExpectedException;
+
+@Category(IntegrationTest.class)
+public class PdfInputImageTest extends ExtendedITextTest {
+    @Rule
+    public ExpectedException junitExpectedException = ExpectedException.none();
+
+    @LogMessages(messages = {
+            @LogMessage(messageTemplate = PdfOcrLogMessageConstant.CANNOT_READ_INPUT_IMAGE,
+                    count = 1)
+    })
+    @Test
+    public void testCorruptedImage() {
+        junitExpectedException.expect(OcrException.class);
+        File file = new File(PdfHelper.getImagesTestDirectory()
+                + "corrupted.jpg");
+        String realOutput = PdfHelper.getTextFromPdf(file, "testCorruptedImage");
+        Assert.assertNotNull(realOutput);
+        Assert.assertEquals("", realOutput);
+    }
+
+    @LogMessages(messages = {
+            @LogMessage(messageTemplate = PdfOcrLogMessageConstant.CANNOT_READ_INPUT_IMAGE, count = 1)
+    })
+    @Test
+    public void testCorruptedImageWithoutExtension() {
+        junitExpectedException.expect(OcrException.class);
+
+        File file = new File(PdfHelper.getImagesTestDirectory()
+                + "corrupted");
+        String realOutput = PdfHelper.getTextFromPdf(file, "testCorruptedImageWithoutExtension");
+        Assert.assertNotNull(realOutput);
+        Assert.assertEquals("", realOutput);
+    }
+}
diff --git a/pdfocr-api/src/test/java/com/itextpdf/pdfocr/PdfLayersTest.java b/pdfocr-api/src/test/java/com/itextpdf/pdfocr/PdfLayersTest.java
new file mode 100644
index 0000000..7c20503
--- /dev/null
+++ b/pdfocr-api/src/test/java/com/itextpdf/pdfocr/PdfLayersTest.java
@@ -0,0 +1,211 @@
+/*
+    This file is part of the iText (R) project.
+    Copyright (c) 1998-2020 iText Group NV
+    Authors: iText Software.
+
+    This program is offered under a commercial and under the AGPL license.
+    For commercial licensing, contact us at https://itextpdf.com/sales.  For AGPL licensing, see below.
+
+    AGPL licensing:
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+package com.itextpdf.pdfocr;
+
+import com.itextpdf.kernel.pdf.PdfDocument;
+import com.itextpdf.kernel.pdf.PdfName;
+import com.itextpdf.kernel.pdf.layer.PdfLayer;
+import com.itextpdf.pdfocr.helpers.CustomOcrEngine;
+import com.itextpdf.pdfocr.helpers.PdfHelper;
+import com.itextpdf.test.ExtendedITextTest;
+import com.itextpdf.test.annotations.type.IntegrationTest;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Collections;
+import java.util.List;
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+@Category(IntegrationTest.class)
+public class PdfLayersTest extends ExtendedITextTest {
+
+    @Test
+    public void testPdfLayersWithDefaultNames() {
+        String path = PdfHelper.getDefaultImagePath();
+        File file = new File(path);
+
+        OcrEngineProperties ocrEngineProperties = new OcrEngineProperties();
+        ocrEngineProperties.setLanguages(
+                Collections.<String>singletonList("eng"));
+        CustomOcrEngine engine = new CustomOcrEngine(ocrEngineProperties);
+
+        OcrPdfCreator ocrPdfCreator = new OcrPdfCreator(engine);
+        PdfDocument doc =
+                ocrPdfCreator.createPdf(Collections.<File>singletonList(file),
+                        PdfHelper.getPdfWriter());
+
+        Assert.assertNotNull(doc);
+        List<PdfLayer> layers = doc.getCatalog()
+                .getOCProperties(true).getLayers();
+
+        Assert.assertEquals(0, layers.size());
+        doc.close();
+
+        Assert.assertEquals(engine, ocrPdfCreator.getOcrEngine());
+        Assert.assertEquals(1, engine.getOcrEngineProperties().getLanguages().size());
+        Assert.assertEquals("eng", engine.getOcrEngineProperties().getLanguages().get(0));
+    }
+
+    @Test
+    public void testPdfLayersWithCustomNames() {
+        String path = PdfHelper.getDefaultImagePath();
+        File file = new File(path);
+
+        OcrPdfCreatorProperties properties = new OcrPdfCreatorProperties();
+        properties.setImageLayerName("name image 1");
+        properties.setTextLayerName("name text 1");
+
+        OcrPdfCreator ocrPdfCreator = new OcrPdfCreator(new CustomOcrEngine(), properties);
+        PdfDocument doc =
+                ocrPdfCreator.createPdf(Collections.<File>singletonList(file),
+                        PdfHelper.getPdfWriter());
+
+        Assert.assertNotNull(doc);
+        List<PdfLayer> layers = doc.getCatalog()
+                .getOCProperties(true).getLayers();
+
+        Assert.assertEquals(2, layers.size());
+        Assert.assertEquals("name image 1",
+                layers.get(0).getPdfObject().get(PdfName.Name).toString());
+        Assert.assertTrue(layers.get(0).isOn());
+        Assert.assertEquals("name text 1",
+                layers.get(1).getPdfObject().get(PdfName.Name).toString());
+        Assert.assertTrue(layers.get(1).isOn());
+
+        doc.close();
+    }
+
+    @Test
+    public void testTextFromPdfLayers() throws IOException {
+        String testName = "testTextFromPdfLayers";
+        String path = PdfHelper.getDefaultImagePath();
+        String pdfPath = PdfHelper.getTargetDirectory() + testName + ".pdf";
+        File file = new File(path);
+
+        OcrPdfCreatorProperties properties = new OcrPdfCreatorProperties();
+        properties.setImageLayerName("Image Layer");
+        properties.setTextLayerName("Text Layer");
+        OcrPdfCreator ocrPdfCreator = new OcrPdfCreator(new CustomOcrEngine(), properties);
+        PdfDocument doc =
+                ocrPdfCreator.createPdf(Collections.<File>singletonList(file), PdfHelper.getPdfWriter(pdfPath));
+
+        Assert.assertNotNull(doc);
+        List<PdfLayer> layers = doc.getCatalog()
+                .getOCProperties(true).getLayers();
+
+        Assert.assertEquals(2, layers.size());
+        Assert.assertEquals("Image Layer",
+                layers.get(0).getPdfObject().get(PdfName.Name).toString());
+        Assert.assertTrue(layers.get(0).isOn());
+        Assert.assertEquals("Text Layer",
+                layers.get(1).getPdfObject().get(PdfName.Name).toString());
+        Assert.assertTrue(layers.get(1).isOn());
+
+        doc.close();
+
+        Assert.assertEquals(PdfHelper.DEFAULT_TEXT,
+                PdfHelper.getTextFromPdfLayer(pdfPath, "Text Layer"));
+        Assert.assertEquals("",
+                PdfHelper.getTextFromPdfLayer(pdfPath, "Image Layer"));
+    }
+
+    @Test
+    public void testPdfLayersWithImageLayerOnly() {
+        String path = PdfHelper.getDefaultImagePath();
+        File file = new File(path);
+
+        OcrPdfCreatorProperties properties = new OcrPdfCreatorProperties();
+        properties.setImageLayerName("Image Layer");
+
+        OcrPdfCreator ocrPdfCreator = new OcrPdfCreator(new CustomOcrEngine(), properties);
+        PdfDocument doc =
+                ocrPdfCreator.createPdf(Collections.<File>singletonList(file),
+                        PdfHelper.getPdfWriter());
+
+        Assert.assertNotNull(doc);
+        List<PdfLayer> layers = doc.getCatalog()
+                .getOCProperties(true).getLayers();
+
+        Assert.assertEquals(1, layers.size());
+        Assert.assertEquals("Image Layer",
+                layers.get(0).getPdfObject().get(PdfName.Name).toString());
+        Assert.assertTrue(layers.get(0).isOn());
+
+        doc.close();
+    }
+
+    @Test
+    public void testPdfLayersWithTextLayerOnly() {
+        String path = PdfHelper.getDefaultImagePath();
+        File file = new File(path);
+
+        OcrPdfCreatorProperties properties = new OcrPdfCreatorProperties();
+        properties.setTextLayerName("Text Layer");
+
+        OcrPdfCreator ocrPdfCreator = new OcrPdfCreator(new CustomOcrEngine(), properties);
+        PdfDocument doc =
+                ocrPdfCreator.createPdf(Collections.<File>singletonList(file),
+                        PdfHelper.getPdfWriter());
+
+        Assert.assertNotNull(doc);
+        List<PdfLayer> layers = doc.getCatalog()
+                .getOCProperties(true).getLayers();
+
+        Assert.assertEquals(1, layers.size());
+        Assert.assertEquals("Text Layer",
+                layers.get(0).getPdfObject().get(PdfName.Name).toString());
+        Assert.assertTrue(layers.get(0).isOn());
+
+        doc.close();
+    }
+
+    @Test
+    public void testPdfLayersWithTextAndImageLayerWithTheSameName() {
+        String path = PdfHelper.getDefaultImagePath();
+        File file = new File(path);
+
+        OcrPdfCreatorProperties properties = new OcrPdfCreatorProperties();
+        properties.setTextLayerName("Mixed Layer");
+        properties.setImageLayerName("Mixed Layer");
+
+        OcrPdfCreator ocrPdfCreator = new OcrPdfCreator(new CustomOcrEngine(), properties);
+        PdfDocument doc =
+                ocrPdfCreator.createPdf(Collections.<File>singletonList(file),
+                        PdfHelper.getPdfWriter());
+
+        Assert.assertNotNull(doc);
+        List<PdfLayer> layers = doc.getCatalog()
+                .getOCProperties(true).getLayers();
+
+        Assert.assertEquals(1, layers.size());
+        Assert.assertEquals("Mixed Layer",
+                layers.get(0).getPdfObject().get(PdfName.Name).toString());
+        Assert.assertTrue(layers.get(0).isOn());
+
+        doc.close();
+    }
+
+
+}
diff --git a/pdfocr-api/src/test/java/com/itextpdf/pdfocr/ScaleModeTest.java b/pdfocr-api/src/test/java/com/itextpdf/pdfocr/ScaleModeTest.java
new file mode 100644
index 0000000..d1d5396
--- /dev/null
+++ b/pdfocr-api/src/test/java/com/itextpdf/pdfocr/ScaleModeTest.java
@@ -0,0 +1,150 @@
+/*
+    This file is part of the iText (R) project.
+    Copyright (c) 1998-2020 iText Group NV
+    Authors: iText Software.
+
+    This program is offered under a commercial and under the AGPL license.
+    For commercial licensing, contact us at https://itextpdf.com/sales.  For AGPL licensing, see below.
+
+    AGPL licensing:
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+package com.itextpdf.pdfocr;
+
+import com.itextpdf.io.image.ImageData;
+import com.itextpdf.io.image.ImageDataFactory;
+import com.itextpdf.kernel.geom.Rectangle;
+import com.itextpdf.kernel.pdf.PdfDocument;
+import com.itextpdf.pdfocr.helpers.CustomOcrEngine;
+import com.itextpdf.pdfocr.helpers.ExtractionStrategy;
+import com.itextpdf.pdfocr.helpers.PdfHelper;
+import com.itextpdf.test.ExtendedITextTest;
+import com.itextpdf.test.annotations.type.IntegrationTest;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Collections;
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+@Category(IntegrationTest.class)
+public class ScaleModeTest extends ExtendedITextTest {
+
+    private static final float DELTA = 1e-4f;
+
+    @Test
+    public void testScaleWidthMode() throws IOException {
+        String testName = "testScaleWidthMode";
+        String path = PdfHelper.getDefaultImagePath();
+        String pdfPath = PdfHelper.getTargetDirectory() + testName + ".pdf";
+        File file = new File(path);
+
+        float pageWidthPt = 400f;
+        float pageHeightPt = 400f;
+
+        com.itextpdf.kernel.geom.Rectangle pageSize =
+                new com.itextpdf.kernel.geom.Rectangle(pageWidthPt, pageHeightPt);
+
+        OcrPdfCreatorProperties properties = new OcrPdfCreatorProperties();
+        properties.setScaleMode(ScaleMode.SCALE_WIDTH);
+        properties.setPageSize(pageSize);
+
+        PdfHelper.createPdf(pdfPath, file, properties);
+
+        com.itextpdf.kernel.geom.Rectangle rect = getImageBBoxRectangleFromPdf(pdfPath);
+        ImageData originalImageData = ImageDataFactory.create(file.getAbsolutePath());
+
+        // page size should be equal to the result image size
+        // result image height should be equal to the value that
+        // was set as page height result image width should be scaled
+        // proportionally according to the provided image height
+        // and original image size
+        Assert.assertEquals(pageHeightPt, rect.getHeight(), DELTA);
+        Assert.assertEquals(originalImageData.getWidth() / originalImageData.getHeight(),
+                rect.getWidth() / rect.getHeight(), DELTA);
+    }
+
+    @Test
+    public void testScaleHeightMode() throws IOException {
+        String testName = "testScaleHeightMode";
+        String path = PdfHelper.getDefaultImagePath();
+        String pdfPath = PdfHelper.getTargetDirectory() + testName + ".pdf";
+        File file = new File(path);
+
+        float pageWidthPt = 400f;
+        float pageHeightPt = 400f;
+
+        com.itextpdf.kernel.geom.Rectangle pageSize =
+                new com.itextpdf.kernel.geom.Rectangle(pageWidthPt, pageHeightPt);
+
+        OcrPdfCreatorProperties properties = new OcrPdfCreatorProperties();
+        properties.setScaleMode(ScaleMode.SCALE_HEIGHT);
+        properties.setPageSize(pageSize);
+
+        PdfHelper.createPdf(pdfPath, file, properties);
+
+        com.itextpdf.kernel.geom.Rectangle rect = getImageBBoxRectangleFromPdf(pdfPath);
+        ImageData originalImageData = ImageDataFactory.create(file.getAbsolutePath());
+
+        Assert.assertEquals(pageWidthPt, rect.getWidth(), DELTA);
+        Assert.assertEquals(originalImageData.getWidth() / originalImageData.getHeight(),
+                rect.getWidth() / rect.getHeight(), DELTA);
+    }
+
+    @Test
+    public void testOriginalSizeScaleMode() throws IOException {
+        String path = PdfHelper.getDefaultImagePath();
+        File file = new File(path);
+
+        OcrPdfCreator ocrPdfCreator = new OcrPdfCreator(new CustomOcrEngine());
+        PdfDocument doc =
+                ocrPdfCreator.createPdf(Collections.<File>singletonList(file),
+                        PdfHelper.getPdfWriter());
+
+        Assert.assertNotNull(doc);
+
+        ImageData imageData = ImageDataFactory.create(file.getAbsolutePath());
+
+        float imageWidth = getPoints(imageData.getWidth());
+        float imageHeight = getPoints(imageData.getHeight());
+        float realWidth = doc.getFirstPage().getPageSize().getWidth();
+        float realHeight = doc.getFirstPage().getPageSize().getHeight();
+
+        Assert.assertEquals(imageWidth, realWidth, DELTA);
+        Assert.assertEquals(imageHeight, realHeight, DELTA);
+
+        doc.close();
+    }
+
+    /**
+     * Converts value from pixels to points.
+     *
+     * @param pixels input value in pixels
+     * @return result value in points
+     */
+    protected float getPoints(final float pixels) {
+        return pixels * 3f / 4f;
+    }
+
+    /**
+     * Retrieve image BBox rectangle from the first page from given PDF document.
+     */
+    public static Rectangle getImageBBoxRectangleFromPdf(String path)
+            throws IOException {
+        ExtractionStrategy extractionStrategy =
+                PdfHelper.getExtractionStrategy(path);
+        return extractionStrategy.getImageBBoxRectangle();
+    }
+}
diff --git a/pdfocr-api/src/test/java/com/itextpdf/pdfocr/events/EventCountingTest.java b/pdfocr-api/src/test/java/com/itextpdf/pdfocr/events/EventCountingTest.java
new file mode 100644
index 0000000..a1aa89a
--- /dev/null
+++ b/pdfocr-api/src/test/java/com/itextpdf/pdfocr/events/EventCountingTest.java
@@ -0,0 +1,117 @@
+/*
+    This file is part of the iText (R) project.
+    Copyright (c) 1998-2020 iText Group NV
+    Authors: iText Software.
+
+    This program is offered under a commercial and under the AGPL license.
+    For commercial licensing, contact us at https://itextpdf.com/sales.  For AGPL licensing, see below.
+
+    AGPL licensing:
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+package com.itextpdf.pdfocr.events;
+
+import com.itextpdf.kernel.pdf.PdfOutputIntent;
+import com.itextpdf.kernel.pdf.PdfWriter;
+import com.itextpdf.metainfo.TestMetaInfo;
+import com.itextpdf.pdfocr.IOcrEngine;
+import com.itextpdf.pdfocr.OcrPdfCreator;
+import com.itextpdf.pdfocr.OcrPdfCreatorProperties;
+import com.itextpdf.pdfocr.helpers.CustomOcrEngine;
+import com.itextpdf.test.ExtendedITextTest;
+import com.itextpdf.test.annotations.type.IntegrationTest;
+
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.InputStream;
+import java.util.Arrays;
+import org.junit.Assert;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.rules.ExpectedException;
+
+@Category(IntegrationTest.class)
+public class EventCountingTest extends ExtendedITextTest {
+
+    protected static final String PROFILE_FOLDER = "./src/test/resources/com/itextpdf/pdfocr/profiles/";
+    protected static final String SOURCE_FOLDER = "./src/test/resources/com/itextpdf/pdfocr/events/";
+
+    @Rule
+    public ExpectedException junitExpectedException = ExpectedException.none();
+
+    private IOcrEngine tesseractReader;
+
+    public EventCountingTest() {
+        tesseractReader = new CustomOcrEngine();
+    }
+
+    @Test
+    public void testEventCountingPdfEvent() {
+        ((CustomOcrEngine) tesseractReader).setThreadLocalMetaInfo(new TestMetaInfo());
+
+        doImageToPdfOcr(tesseractReader, getTestImageFile());
+
+        Assert.assertTrue(((CustomOcrEngine) tesseractReader).getThreadLocalMetaInfo() instanceof TestMetaInfo);
+    }
+
+    @Test
+    public void testEventCountingPdfAEvent() {
+        ((CustomOcrEngine) tesseractReader).setThreadLocalMetaInfo(new TestMetaInfo());
+
+        doImageToPdfAOcr(tesseractReader, getTestImageFile());
+
+        Assert.assertTrue(((CustomOcrEngine) tesseractReader).getThreadLocalMetaInfo() instanceof TestMetaInfo);
+    }
+
+    @Test
+    public void testEventCountingImageEvent() {
+        ((CustomOcrEngine) tesseractReader).setThreadLocalMetaInfo(new TestMetaInfo());
+
+        doImageOcr(tesseractReader, getTestImageFile());
+
+        Assert.assertTrue(((CustomOcrEngine) tesseractReader).getThreadLocalMetaInfo() instanceof TestMetaInfo);
+    }
+
+    private static void doImageOcr(IOcrEngine tesseractReader, File imageFile) {
+        tesseractReader.doImageOcr(imageFile);
+    }
+
+    private static void doImageToPdfOcr(IOcrEngine tesseractReader, File imageFile) {
+        OcrPdfCreator ocrPdfCreator = new OcrPdfCreator(tesseractReader);
+        ocrPdfCreator.createPdf(Arrays.asList(imageFile), new PdfWriter(new ByteArrayOutputStream()));
+    }
+
+    private static void doImageToPdfAOcr(IOcrEngine tesseractReader, File imageFile) {
+        OcrPdfCreator ocrPdfCreator = new OcrPdfCreator(tesseractReader,
+                new OcrPdfCreatorProperties().setPdfLang("en-US"));
+        InputStream is = null;
+        try {
+            is = new FileInputStream(PROFILE_FOLDER + "sRGB_CS_profile.icm");
+        } catch (FileNotFoundException e) {
+            // No expected
+        }
+        PdfOutputIntent outputIntent = new PdfOutputIntent("Custom", "", "http://www.color.org", "sRGB IEC61966-2.1",
+                is);
+
+        ocrPdfCreator.createPdfA(Arrays.asList(imageFile), new PdfWriter(new ByteArrayOutputStream()), outputIntent);
+    }
+
+    private static File getTestImageFile() {
+        String imgPath = SOURCE_FOLDER + "numbers_01.jpg";
+        return new File(imgPath);
+    }
+}
diff --git a/pdfocr-api/src/test/java/com/itextpdf/pdfocr/helpers/CustomOcrEngine.java b/pdfocr-api/src/test/java/com/itextpdf/pdfocr/helpers/CustomOcrEngine.java
new file mode 100644
index 0000000..c165a1a
--- /dev/null
+++ b/pdfocr-api/src/test/java/com/itextpdf/pdfocr/helpers/CustomOcrEngine.java
@@ -0,0 +1,82 @@
+/*
+    This file is part of the iText (R) project.
+    Copyright (c) 1998-2020 iText Group NV
+    Authors: iText Software.
+
+    This program is offered under a commercial and under the AGPL license.
+    For commercial licensing, contact us at https://itextpdf.com/sales.  For AGPL licensing, see below.
+
+    AGPL licensing:
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+package com.itextpdf.pdfocr.helpers;
+
+import com.itextpdf.kernel.counter.event.IMetaInfo;
+import com.itextpdf.pdfocr.IOcrEngine;
+import com.itextpdf.pdfocr.OcrEngineProperties;
+import com.itextpdf.pdfocr.TextInfo;
+import com.itextpdf.pdfocr.events.IThreadLocalMetaInfoAware;
+
+import java.io.File;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+public class CustomOcrEngine implements IOcrEngine, IThreadLocalMetaInfoAware {
+
+    private OcrEngineProperties ocrEngineProperties;
+    private IMetaInfo threadLocalMetaInfo;
+
+    public CustomOcrEngine() {
+    }
+
+    public CustomOcrEngine(OcrEngineProperties ocrEngineProperties) {
+        this.ocrEngineProperties = new OcrEngineProperties(ocrEngineProperties);
+    }
+
+    @Override
+    public Map<Integer, List<TextInfo>> doImageOcr(File input) {
+        Map<Integer, List<TextInfo>> result =
+                new HashMap<Integer, List<TextInfo>>();
+        String text = PdfHelper.DEFAULT_TEXT;
+        if (input.getAbsolutePath().contains(PdfHelper.THAI_IMAGE_NAME)) {
+            text = PdfHelper.THAI_TEXT;
+        }
+        TextInfo textInfo = new TextInfo(text,
+                Arrays.<Float>asList(204.0f, 158.0f, 742.0f, 294.0f));
+        result.put(1, Collections.<TextInfo>singletonList(textInfo));
+        return result;
+    }
+
+    @Override
+    public void createTxtFile(List<File> inputImages, File txtFile) {
+    }
+
+    @Override
+    public IMetaInfo getThreadLocalMetaInfo() {
+        return threadLocalMetaInfo;
+    }
+
+    @Override
+    public IThreadLocalMetaInfoAware setThreadLocalMetaInfo(IMetaInfo metaInfo) {
+        this.threadLocalMetaInfo = metaInfo;
+        return this;
+    }
+
+    public OcrEngineProperties getOcrEngineProperties() {
+        return ocrEngineProperties;
+    }
+}
diff --git a/pdfocr-api/src/test/java/com/itextpdf/pdfocr/helpers/ExtractionStrategy.java b/pdfocr-api/src/test/java/com/itextpdf/pdfocr/helpers/ExtractionStrategy.java
new file mode 100644
index 0000000..0313dfd
--- /dev/null
+++ b/pdfocr-api/src/test/java/com/itextpdf/pdfocr/helpers/ExtractionStrategy.java
@@ -0,0 +1,127 @@
+/*
+    This file is part of the iText (R) project.
+    Copyright (c) 1998-2020 iText Group NV
+    Authors: iText Software.
+
+    This program is offered under a commercial and under the AGPL license.
+    For commercial licensing, contact us at https://itextpdf.com/sales.  For AGPL licensing, see below.
+
+    AGPL licensing:
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+package com.itextpdf.pdfocr.helpers;
+
+import com.itextpdf.kernel.font.PdfFont;
+import com.itextpdf.kernel.pdf.PdfDictionary;
+import com.itextpdf.kernel.pdf.PdfName;
+import com.itextpdf.kernel.pdf.canvas.CanvasTag;
+import com.itextpdf.kernel.pdf.canvas.parser.EventType;
+import com.itextpdf.kernel.pdf.canvas.parser.data.IEventData;
+import com.itextpdf.kernel.pdf.canvas.parser.data.ImageRenderInfo;
+import com.itextpdf.kernel.pdf.canvas.parser.data.TextRenderInfo;
+import com.itextpdf.kernel.pdf.canvas.parser.listener.ITextChunkLocation;
+import com.itextpdf.kernel.pdf.canvas.parser.listener.LocationTextExtractionStrategy;
+import com.itextpdf.kernel.pdf.canvas.parser.listener.TextChunk;
+
+public class ExtractionStrategy extends LocationTextExtractionStrategy {
+    private com.itextpdf.kernel.geom.Rectangle imageBBoxRectangle;
+    private com.itextpdf.kernel.colors.Color fillColor;
+    private String layerName;
+    private PdfFont pdfFont;
+
+    public ExtractionStrategy(String name) {
+        super();
+        layerName = name;
+    }
+
+    public com.itextpdf.kernel.colors.Color getFillColor() {
+        return fillColor;
+    }
+
+    public void setFillColor(com.itextpdf.kernel.colors.Color color) {
+        fillColor = color;
+    }
+
+    public PdfFont getPdfFont() {
+        return pdfFont;
+    }
+
+    public void setPdfFont(PdfFont font) {
+        pdfFont = font;
+    }
+
+    public com.itextpdf.kernel.geom.Rectangle getImageBBoxRectangle() {
+        return this.imageBBoxRectangle;
+    }
+
+    public void setImageBBoxRectangle(com.itextpdf.kernel.geom.Rectangle imageBBoxRectangle) {
+        this.imageBBoxRectangle = imageBBoxRectangle;
+    }
+
+    @Override
+    public void eventOccurred(IEventData data, EventType type) {
+        if (type.equals(EventType.RENDER_TEXT) || type.equals(EventType.RENDER_IMAGE)) {
+            String tagName = getTagName(data, type);
+            if ((tagName == null && layerName == null) || (layerName != null && layerName.equals(tagName))) {
+                if (type.equals(EventType.RENDER_TEXT)) {
+                    TextRenderInfo renderInfo = (TextRenderInfo) data;
+                    setFillColor(renderInfo.getGraphicsState()
+                            .getFillColor());
+                    setPdfFont(renderInfo.getGraphicsState().getFont());
+                    super.eventOccurred(data, type);
+                }
+                else if (type.equals(EventType.RENDER_IMAGE)) {
+                    ImageRenderInfo renderInfo = (ImageRenderInfo) data;
+                    com.itextpdf.kernel.geom.Matrix ctm = renderInfo.getImageCtm();
+                    setImageBBoxRectangle(new com.itextpdf.kernel.geom.Rectangle(ctm.get(6), ctm.get(7),
+                            ctm.get(0), ctm.get(4)));
+                }
+            }
+        }
+    }
+
+    @Override
+    protected boolean isChunkAtWordBoundary(TextChunk chunk,
+            TextChunk previousChunk) {
+        ITextChunkLocation curLoc = chunk.getLocation();
+        ITextChunkLocation prevLoc = previousChunk.getLocation();
+
+        if (curLoc.getStartLocation().equals(curLoc.getEndLocation()) ||
+                prevLoc.getEndLocation()
+                        .equals(prevLoc.getStartLocation())) {
+            return false;
+        }
+
+        return curLoc.distParallelEnd() - prevLoc.distParallelStart() >
+                (curLoc.getCharSpaceWidth() + prevLoc.getCharSpaceWidth())
+                        / 2.0f;
+    }
+
+    private String getTagName(IEventData data, EventType type) {
+        java.util.List<CanvasTag> tagHierarchy = null;
+        if (type.equals(EventType.RENDER_TEXT)) {
+            TextRenderInfo textRenderInfo = (TextRenderInfo) data;
+            tagHierarchy = textRenderInfo.getCanvasTagHierarchy();
+        }
+        else if (type.equals(EventType.RENDER_IMAGE)) {
+            ImageRenderInfo imageRenderInfo = (ImageRenderInfo) data;
+            tagHierarchy = imageRenderInfo.getCanvasTagHierarchy();
+        }
+        return (tagHierarchy == null || tagHierarchy.size() == 0
+                || tagHierarchy.get(0).getProperties().get(PdfName.Name) == null)
+                ? null
+                : tagHierarchy.get(0).getProperties().get(PdfName.Name).toString();
+    }
+
+}
diff --git a/pdfocr-api/src/test/java/com/itextpdf/pdfocr/helpers/PdfHelper.java b/pdfocr-api/src/test/java/com/itextpdf/pdfocr/helpers/PdfHelper.java
new file mode 100644
index 0000000..3658901
--- /dev/null
+++ b/pdfocr-api/src/test/java/com/itextpdf/pdfocr/helpers/PdfHelper.java
@@ -0,0 +1,252 @@
+/*
+    This file is part of the iText (R) project.
+    Copyright (c) 1998-2020 iText Group NV
+    Authors: iText Software.
+
+    This program is offered under a commercial and under the AGPL license.
+    For commercial licensing, contact us at https://itextpdf.com/sales.  For AGPL licensing, see below.
+
+    AGPL licensing:
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+package com.itextpdf.pdfocr.helpers;
+
+import com.itextpdf.kernel.pdf.PdfDocument;
+import com.itextpdf.kernel.pdf.PdfOutputIntent;
+import com.itextpdf.kernel.pdf.PdfReader;
+import com.itextpdf.kernel.pdf.PdfWriter;
+import com.itextpdf.kernel.pdf.WriterProperties;
+import com.itextpdf.kernel.pdf.canvas.parser.PdfCanvasProcessor;
+import com.itextpdf.pdfocr.OcrPdfCreator;
+import com.itextpdf.pdfocr.OcrPdfCreatorProperties;
+
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.file.Files;
+import java.util.Collections;
+
+import com.itextpdf.test.ExtendedITextTest;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class PdfHelper {
+
+    public static final String DEFAULT_IMAGE_NAME = "numbers_01.jpg";
+    public static final String DEFAULT_TEXT = "619121";
+    public static final String THAI_IMAGE_NAME = "thai.PNG";
+    public static final String THAI_TEXT = "ป ระ เท ศ ไ";
+    // directory with test files
+    public static final String TEST_DIRECTORY = "./src/test/resources/com/itextpdf/pdfocr/";
+    public static final String TARGET_DIRECTORY = "./target/test/resources/com/itextpdf/pdfocr/";
+
+    private static final Logger LOGGER = LoggerFactory
+            .getLogger(PdfHelper.class);
+
+    /**
+     * Returns images test directory.
+     */
+    public static String getImagesTestDirectory() {
+        return TEST_DIRECTORY + "images/";
+    }
+
+    /**
+     * Returns path to default test image.
+     */
+    public static String getDefaultImagePath() {
+        return getImagesTestDirectory() + DEFAULT_IMAGE_NAME;
+    }
+
+    /**
+     * Returns path to thai test image.
+     */
+    public static String getThaiImagePath() {
+        return getImagesTestDirectory() + THAI_IMAGE_NAME;
+    }
+
+    /**
+     * Returns path to test font.
+     */
+    public static String getFreeSansFontPath() {
+        return TEST_DIRECTORY + "fonts/FreeSans.ttf";
+    }
+
+    /**
+     * Returns path to test font.
+     */
+    public static String getKanitFontPath() {
+        return TEST_DIRECTORY + "fonts/Kanit-Regular.ttf";
+    }
+
+    /**
+     * Returns target directory (because target/test could not exist).
+     */
+    public static String getTargetDirectory() {
+        if (!Files.exists(java.nio.file.Paths.get(TARGET_DIRECTORY))) {
+            ExtendedITextTest.createDestinationFolder(TARGET_DIRECTORY);
+        }
+        return TARGET_DIRECTORY;
+    }
+
+    /**
+     *
+     * Create pdfWriter using provided path to destination file.
+     */
+    public static PdfWriter getPdfWriter(String pdfPath) throws FileNotFoundException {
+        return new PdfWriter(pdfPath,
+                new WriterProperties().addUAXmpMetadata());
+    }
+
+    /**
+     * Create pdfWriter.
+     */
+    public static PdfWriter getPdfWriter() {
+        return new PdfWriter(new ByteArrayOutputStream(), new WriterProperties().addUAXmpMetadata());
+    }
+
+    /**
+     * Creates PDF rgb output intent for tests.
+     */
+    public static PdfOutputIntent getRGBPdfOutputIntent() throws FileNotFoundException {
+        String defaultRGBColorProfilePath = TEST_DIRECTORY + "profiles"
+                        + "/sRGB_CS_profile.icm";
+        InputStream is = new FileInputStream(defaultRGBColorProfilePath);
+        return new PdfOutputIntent("", "",
+                "", "sRGB IEC61966-2.1", is);
+    }
+
+    /**
+     * Creates PDF cmyk output intent for tests.
+     */
+    public static PdfOutputIntent getCMYKPdfOutputIntent() throws FileNotFoundException {
+        String defaultCMYKColorProfilePath = TEST_DIRECTORY
+                + "profiles/CoatedFOGRA27.icc";
+        InputStream is = new FileInputStream(defaultCMYKColorProfilePath);
+        return new PdfOutputIntent("Custom",
+                "","http://www.color.org",
+                "Coated FOGRA27 (ISO 12647 - 2:2004)", is);
+    }
+
+    /**
+     * Get text from layer specified by name from the first page.
+     */
+    public static String getTextFromPdfLayer(String pdfPath,
+            String layerName)
+            throws IOException {
+        ExtractionStrategy textExtractionStrategy = getExtractionStrategy(pdfPath, layerName, false);
+        return textExtractionStrategy.getResultantText();
+    }
+
+    /**
+     * Get text from layer specified by name from the first page.
+     */
+    public static String getTextFromPdfLayerUseActualText(String pdfPath,
+            String layerName)
+            throws IOException {
+        ExtractionStrategy textExtractionStrategy = getExtractionStrategy(pdfPath, layerName, true);
+        return textExtractionStrategy.getResultantText();
+    }
+
+    /**
+     * Perform OCR with custom ocr engine using provided input image and set
+     * of properties and save to the given path.
+     */
+    public static void createPdf(String pdfPath, File inputFile,
+            OcrPdfCreatorProperties properties) {
+        OcrPdfCreator ocrPdfCreator = new OcrPdfCreator(new CustomOcrEngine(),
+                properties);
+        try (PdfWriter pdfWriter = getPdfWriter(pdfPath)) {
+            ocrPdfCreator.createPdf(Collections.<File>singletonList(inputFile),
+                    pdfWriter).close();
+        } catch (IOException e) {
+            LOGGER.error(e.getMessage());
+        }
+    }
+
+    /**
+     * Perform OCR with custom ocr engine using provided input image and set
+     * of properties and save to the given path.
+     */
+    public static void createPdfA(String pdfPath, File inputFile,
+            OcrPdfCreatorProperties properties, PdfOutputIntent outputIntent) {
+        OcrPdfCreator ocrPdfCreator = new OcrPdfCreator(new CustomOcrEngine(),
+                properties);
+        try (PdfWriter pdfWriter = getPdfWriter(pdfPath)) {
+            ocrPdfCreator.createPdfA(Collections.<File>singletonList(inputFile),
+                    pdfWriter, outputIntent).close();
+        } catch (IOException e) {
+            LOGGER.error(e.getMessage());
+        }
+    }
+
+    /**
+     * Retrieve text from specified page from given PDF document.
+     */
+    public static String getTextFromPdf(File file, String testName) {
+        String result = null;
+        String pdfPath = null;
+        try {
+            pdfPath = getTargetDirectory() + testName + ".pdf";
+            createPdf(pdfPath, file, new OcrPdfCreatorProperties());
+            result = getTextFromPdfLayer(pdfPath, "Text Layer");
+        } catch (IOException e) {
+            LOGGER.error(e.getMessage());
+        }
+
+        return result;
+    }
+
+    /**
+     * Get extraction strategy for given document.
+     */
+    public static ExtractionStrategy getExtractionStrategy(String pdfPath)
+            throws IOException {
+        return getExtractionStrategy(pdfPath, null);
+    }
+
+    /**
+     * Get extraction strategy for given document.
+     */
+    public static ExtractionStrategy getExtractionStrategy(String pdfPath,
+            boolean useActualText)
+            throws IOException {
+        return getExtractionStrategy(pdfPath, "Text Layer", useActualText);
+    }
+
+    /**
+     * Get extraction strategy for given document.
+     */
+    public static ExtractionStrategy getExtractionStrategy(String pdfPath,
+            String layerName) throws IOException {
+        return getExtractionStrategy(pdfPath, layerName, false);
+    }
+
+    /**
+     * Get extraction strategy for given document.
+     */
+    public static ExtractionStrategy getExtractionStrategy(String pdfPath,
+            String layerName, boolean useActualText)
+            throws IOException {
+        PdfDocument pdfDocument = new PdfDocument(new PdfReader(pdfPath));
+        ExtractionStrategy strategy = new ExtractionStrategy(layerName);
+        strategy.setUseActualText(useActualText);
+        PdfCanvasProcessor processor = new PdfCanvasProcessor(strategy);
+        processor.processPageContent(pdfDocument.getFirstPage());
+        pdfDocument.close();
+        return strategy;
+    }
+}
diff --git a/pdfocr-api/src/test/resources/com/itextpdf/pdfocr/events/numbers_01.jpg b/pdfocr-api/src/test/resources/com/itextpdf/pdfocr/events/numbers_01.jpg
new file mode 100644
index 0000000..f384caa
Binary files /dev/null and b/pdfocr-api/src/test/resources/com/itextpdf/pdfocr/events/numbers_01.jpg differ
diff --git a/pdfocr-api/src/test/resources/com/itextpdf/pdfocr/fonts/FreeSans.ttf b/pdfocr-api/src/test/resources/com/itextpdf/pdfocr/fonts/FreeSans.ttf
new file mode 100644
index 0000000..2072cda
Binary files /dev/null and b/pdfocr-api/src/test/resources/com/itextpdf/pdfocr/fonts/FreeSans.ttf differ
diff --git a/pdfocr-api/src/test/resources/com/itextpdf/pdfocr/fonts/Kanit-Regular.ttf b/pdfocr-api/src/test/resources/com/itextpdf/pdfocr/fonts/Kanit-Regular.ttf
new file mode 100644
index 0000000..8ca24fa
Binary files /dev/null and b/pdfocr-api/src/test/resources/com/itextpdf/pdfocr/fonts/Kanit-Regular.ttf differ
diff --git a/pdfocr-api/src/test/resources/com/itextpdf/pdfocr/fonts/LICENSE_GNU.txt b/pdfocr-api/src/test/resources/com/itextpdf/pdfocr/fonts/LICENSE_GNU.txt
new file mode 100644
index 0000000..f288702
--- /dev/null
+++ b/pdfocr-api/src/test/resources/com/itextpdf/pdfocr/fonts/LICENSE_GNU.txt
@@ -0,0 +1,674 @@
+                    GNU GENERAL PUBLIC LICENSE
+                       Version 3, 29 June 2007
+
+ Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+                            Preamble
+
+  The GNU General Public License is a free, copyleft license for
+software and other kinds of works.
+
+  The licenses for most software and other practical works are designed
+to take away your freedom to share and change the works.  By contrast,
+the GNU General Public License is intended to guarantee your freedom to
+share and change all versions of a program--to make sure it remains free
+software for all its users.  We, the Free Software Foundation, use the
+GNU General Public License for most of our software; it applies also to
+any other work released this way by its authors.  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+them if you wish), that you receive source code or can get it if you
+want it, that you can change the software or use pieces of it in new
+free programs, and that you know you can do these things.
+
+  To protect your rights, we need to prevent others from denying you
+these rights or asking you to surrender the rights.  Therefore, you have
+certain responsibilities if you distribute copies of the software, or if
+you modify it: responsibilities to respect the freedom of others.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must pass on to the recipients the same
+freedoms that you received.  You must make sure that they, too, receive
+or can get the source code.  And you must show them these terms so they
+know their rights.
+
+  Developers that use the GNU GPL protect your rights with two steps:
+(1) assert copyright on the software, and (2) offer you this License
+giving you legal permission to copy, distribute and/or modify it.
+
+  For the developers' and authors' protection, the GPL clearly explains
+that there is no warranty for this free software.  For both users' and
+authors' sake, the GPL requires that modified versions be marked as
+changed, so that their problems will not be attributed erroneously to
+authors of previous versions.
+
+  Some devices are designed to deny users access to install or run
+modified versions of the software inside them, although the manufacturer
+can do so.  This is fundamentally incompatible with the aim of
+protecting users' freedom to change the software.  The systematic
+pattern of such abuse occurs in the area of products for individuals to
+use, which is precisely where it is most unacceptable.  Therefore, we
+have designed this version of the GPL to prohibit the practice for those
+products.  If such problems arise substantially in other domains, we
+stand ready to extend this provision to those domains in future versions
+of the GPL, as needed to protect the freedom of users.
+
+  Finally, every program is threatened constantly by software patents.
+States should not allow patents to restrict development and use of
+software on general-purpose computers, but in those that do, we wish to
+avoid the special danger that patents applied to a free program could
+make it effectively proprietary.  To prevent this, the GPL assures that
+patents cannot be used to render the program non-free.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+                       TERMS AND CONDITIONS
+
+  0. Definitions.
+
+  "This License" refers to version 3 of the GNU General Public License.
+
+  "Copyright" also means copyright-like laws that apply to other kinds of
+works, such as semiconductor masks.
+
+  "The Program" refers to any copyrightable work licensed under this
+License.  Each licensee is addressed as "you".  "Licensees" and
+"recipients" may be individuals or organizations.
+
+  To "modify" a work means to copy from or adapt all or part of the work
+in a fashion requiring copyright permission, other than the making of an
+exact copy.  The resulting work is called a "modified version" of the
+earlier work or a work "based on" the earlier work.
+
+  A "covered work" means either the unmodified Program or a work based
+on the Program.
+
+  To "propagate" a work means to do anything with it that, without
+permission, would make you directly or secondarily liable for
+infringement under applicable copyright law, except executing it on a
+computer or modifying a private copy.  Propagation includes copying,
+distribution (with or without modification), making available to the
+public, and in some countries other activities as well.
+
+  To "convey" a work means any kind of propagation that enables other
+parties to make or receive copies.  Mere interaction with a user through
+a computer network, with no transfer of a copy, is not conveying.
+
+  An interactive user interface displays "Appropriate Legal Notices"
+to the extent that it includes a convenient and prominently visible
+feature that (1) displays an appropriate copyright notice, and (2)
+tells the user that there is no warranty for the work (except to the
+extent that warranties are provided), that licensees may convey the
+work under this License, and how to view a copy of this License.  If
+the interface presents a list of user commands or options, such as a
+menu, a prominent item in the list meets this criterion.
+
+  1. Source Code.
+
+  The "source code" for a work means the preferred form of the work
+for making modifications to it.  "Object code" means any non-source
+form of a work.
+
+  A "Standard Interface" means an interface that either is an official
+standard defined by a recognized standards body, or, in the case of
+interfaces specified for a particular programming language, one that
+is widely used among developers working in that language.
+
+  The "System Libraries" of an executable work include anything, other
+than the work as a whole, that (a) is included in the normal form of
+packaging a Major Component, but which is not part of that Major
+Component, and (b) serves only to enable use of the work with that
+Major Component, or to implement a Standard Interface for which an
+implementation is available to the public in source code form.  A
+"Major Component", in this context, means a major essential component
+(kernel, window system, and so on) of the specific operating system
+(if any) on which the executable work runs, or a compiler used to
+produce the work, or an object code interpreter used to run it.
+
+  The "Corresponding Source" for a work in object code form means all
+the source code needed to generate, install, and (for an executable
+work) run the object code and to modify the work, including scripts to
+control those activities.  However, it does not include the work's
+System Libraries, or general-purpose tools or generally available free
+programs which are used unmodified in performing those activities but
+which are not part of the work.  For example, Corresponding Source
+includes interface definition files associated with source files for
+the work, and the source code for shared libraries and dynamically
+linked subprograms that the work is specifically designed to require,
+such as by intimate data communication or control flow between those
+subprograms and other parts of the work.
+
+  The Corresponding Source need not include anything that users
+can regenerate automatically from other parts of the Corresponding
+Source.
+
+  The Corresponding Source for a work in source code form is that
+same work.
+
+  2. Basic Permissions.
+
+  All rights granted under this License are granted for the term of
+copyright on the Program, and are irrevocable provided the stated
+conditions are met.  This License explicitly affirms your unlimited
+permission to run the unmodified Program.  The output from running a
+covered work is covered by this License only if the output, given its
+content, constitutes a covered work.  This License acknowledges your
+rights of fair use or other equivalent, as provided by copyright law.
+
+  You may make, run and propagate covered works that you do not
+convey, without conditions so long as your license otherwise remains
+in force.  You may convey covered works to others for the sole purpose
+of having them make modifications exclusively for you, or provide you
+with facilities for running those works, provided that you comply with
+the terms of this License in conveying all material for which you do
+not control copyright.  Those thus making or running the covered works
+for you must do so exclusively on your behalf, under your direction
+and control, on terms that prohibit them from making any copies of
+your copyrighted material outside their relationship with you.
+
+  Conveying under any other circumstances is permitted solely under
+the conditions stated below.  Sublicensing is not allowed; section 10
+makes it unnecessary.
+
+  3. Protecting Users' Legal Rights From Anti-Circumvention Law.
+
+  No covered work shall be deemed part of an effective technological
+measure under any applicable law fulfilling obligations under article
+11 of the WIPO copyright treaty adopted on 20 December 1996, or
+similar laws prohibiting or restricting circumvention of such
+measures.
+
+  When you convey a covered work, you waive any legal power to forbid
+circumvention of technological measures to the extent such circumvention
+is effected by exercising rights under this License with respect to
+the covered work, and you disclaim any intention to limit operation or
+modification of the work as a means of enforcing, against the work's
+users, your or third parties' legal rights to forbid circumvention of
+technological measures.
+
+  4. Conveying Verbatim Copies.
+
+  You may convey verbatim copies of the Program's source code as you
+receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice;
+keep intact all notices stating that this License and any
+non-permissive terms added in accord with section 7 apply to the code;
+keep intact all notices of the absence of any warranty; and give all
+recipients a copy of this License along with the Program.
+
+  You may charge any price or no price for each copy that you convey,
+and you may offer support or warranty protection for a fee.
+
+  5. Conveying Modified Source Versions.
+
+  You may convey a work based on the Program, or the modifications to
+produce it from the Program, in the form of source code under the
+terms of section 4, provided that you also meet all of these conditions:
+
+    a) The work must carry prominent notices stating that you modified
+    it, and giving a relevant date.
+
+    b) The work must carry prominent notices stating that it is
+    released under this License and any conditions added under section
+    7.  This requirement modifies the requirement in section 4 to
+    "keep intact all notices".
+
+    c) You must license the entire work, as a whole, under this
+    License to anyone who comes into possession of a copy.  This
+    License will therefore apply, along with any applicable section 7
+    additional terms, to the whole of the work, and all its parts,
+    regardless of how they are packaged.  This License gives no
+    permission to license the work in any other way, but it does not
+    invalidate such permission if you have separately received it.
+
+    d) If the work has interactive user interfaces, each must display
+    Appropriate Legal Notices; however, if the Program has interactive
+    interfaces that do not display Appropriate Legal Notices, your
+    work need not make them do so.
+
+  A compilation of a covered work with other separate and independent
+works, which are not by their nature extensions of the covered work,
+and which are not combined with it such as to form a larger program,
+in or on a volume of a storage or distribution medium, is called an
+"aggregate" if the compilation and its resulting copyright are not
+used to limit the access or legal rights of the compilation's users
+beyond what the individual works permit.  Inclusion of a covered work
+in an aggregate does not cause this License to apply to the other
+parts of the aggregate.
+
+  6. Conveying Non-Source Forms.
+
+  You may convey a covered work in object code form under the terms
+of sections 4 and 5, provided that you also convey the
+machine-readable Corresponding Source under the terms of this License,
+in one of these ways:
+
+    a) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by the
+    Corresponding Source fixed on a durable physical medium
+    customarily used for software interchange.
+
+    b) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by a
+    written offer, valid for at least three years and valid for as
+    long as you offer spare parts or customer support for that product
+    model, to give anyone who possesses the object code either (1) a
+    copy of the Corresponding Source for all the software in the
+    product that is covered by this License, on a durable physical
+    medium customarily used for software interchange, for a price no
+    more than your reasonable cost of physically performing this
+    conveying of source, or (2) access to copy the
+    Corresponding Source from a network server at no charge.
+
+    c) Convey individual copies of the object code with a copy of the
+    written offer to provide the Corresponding Source.  This
+    alternative is allowed only occasionally and noncommercially, and
+    only if you received the object code with such an offer, in accord
+    with subsection 6b.
+
+    d) Convey the object code by offering access from a designated
+    place (gratis or for a charge), and offer equivalent access to the
+    Corresponding Source in the same way through the same place at no
+    further charge.  You need not require recipients to copy the
+    Corresponding Source along with the object code.  If the place to
+    copy the object code is a network server, the Corresponding Source
+    may be on a different server (operated by you or a third party)
+    that supports equivalent copying facilities, provided you maintain
+    clear directions next to the object code saying where to find the
+    Corresponding Source.  Regardless of what server hosts the
+    Corresponding Source, you remain obligated to ensure that it is
+    available for as long as needed to satisfy these requirements.
+
+    e) Convey the object code using peer-to-peer transmission, provided
+    you inform other peers where the object code and Corresponding
+    Source of the work are being offered to the general public at no
+    charge under subsection 6d.
+
+  A separable portion of the object code, whose source code is excluded
+from the Corresponding Source as a System Library, need not be
+included in conveying the object code work.
+
+  A "User Product" is either (1) a "consumer product", which means any
+tangible personal property which is normally used for personal, family,
+or household purposes, or (2) anything designed or sold for incorporation
+into a dwelling.  In determining whether a product is a consumer product,
+doubtful cases shall be resolved in favor of coverage.  For a particular
+product received by a particular user, "normally used" refers to a
+typical or common use of that class of product, regardless of the status
+of the particular user or of the way in which the particular user
+actually uses, or expects or is expected to use, the product.  A product
+is a consumer product regardless of whether the product has substantial
+commercial, industrial or non-consumer uses, unless such uses represent
+the only significant mode of use of the product.
+
+  "Installation Information" for a User Product means any methods,
+procedures, authorization keys, or other information required to install
+and execute modified versions of a covered work in that User Product from
+a modified version of its Corresponding Source.  The information must
+suffice to ensure that the continued functioning of the modified object
+code is in no case prevented or interfered with solely because
+modification has been made.
+
+  If you convey an object code work under this section in, or with, or
+specifically for use in, a User Product, and the conveying occurs as
+part of a transaction in which the right of possession and use of the
+User Product is transferred to the recipient in perpetuity or for a
+fixed term (regardless of how the transaction is characterized), the
+Corresponding Source conveyed under this section must be accompanied
+by the Installation Information.  But this requirement does not apply
+if neither you nor any third party retains the ability to install
+modified object code on the User Product (for example, the work has
+been installed in ROM).
+
+  The requirement to provide Installation Information does not include a
+requirement to continue to provide support service, warranty, or updates
+for a work that has been modified or installed by the recipient, or for
+the User Product in which it has been modified or installed.  Access to a
+network may be denied when the modification itself materially and
+adversely affects the operation of the network or violates the rules and
+protocols for communication across the network.
+
+  Corresponding Source conveyed, and Installation Information provided,
+in accord with this section must be in a format that is publicly
+documented (and with an implementation available to the public in
+source code form), and must require no special password or key for
+unpacking, reading or copying.
+
+  7. Additional Terms.
+
+  "Additional permissions" are terms that supplement the terms of this
+License by making exceptions from one or more of its conditions.
+Additional permissions that are applicable to the entire Program shall
+be treated as though they were included in this License, to the extent
+that they are valid under applicable law.  If additional permissions
+apply only to part of the Program, that part may be used separately
+under those permissions, but the entire Program remains governed by
+this License without regard to the additional permissions.
+
+  When you convey a copy of a covered work, you may at your option
+remove any additional permissions from that copy, or from any part of
+it.  (Additional permissions may be written to require their own
+removal in certain cases when you modify the work.)  You may place
+additional permissions on material, added by you to a covered work,
+for which you have or can give appropriate copyright permission.
+
+  Notwithstanding any other provision of this License, for material you
+add to a covered work, you may (if authorized by the copyright holders of
+that material) supplement the terms of this License with terms:
+
+    a) Disclaiming warranty or limiting liability differently from the
+    terms of sections 15 and 16 of this License; or
+
+    b) Requiring preservation of specified reasonable legal notices or
+    author attributions in that material or in the Appropriate Legal
+    Notices displayed by works containing it; or
+
+    c) Prohibiting misrepresentation of the origin of that material, or
+    requiring that modified versions of such material be marked in
+    reasonable ways as different from the original version; or
+
+    d) Limiting the use for publicity purposes of names of licensors or
+    authors of the material; or
+
+    e) Declining to grant rights under trademark law for use of some
+    trade names, trademarks, or service marks; or
+
+    f) Requiring indemnification of licensors and authors of that
+    material by anyone who conveys the material (or modified versions of
+    it) with contractual assumptions of liability to the recipient, for
+    any liability that these contractual assumptions directly impose on
+    those licensors and authors.
+
+  All other non-permissive additional terms are considered "further
+restrictions" within the meaning of section 10.  If the Program as you
+received it, or any part of it, contains a notice stating that it is
+governed by this License along with a term that is a further
+restriction, you may remove that term.  If a license document contains
+a further restriction but permits relicensing or conveying under this
+License, you may add to a covered work material governed by the terms
+of that license document, provided that the further restriction does
+not survive such relicensing or conveying.
+
+  If you add terms to a covered work in accord with this section, you
+must place, in the relevant source files, a statement of the
+additional terms that apply to those files, or a notice indicating
+where to find the applicable terms.
+
+  Additional terms, permissive or non-permissive, may be stated in the
+form of a separately written license, or stated as exceptions;
+the above requirements apply either way.
+
+  8. Termination.
+
+  You may not propagate or modify a covered work except as expressly
+provided under this License.  Any attempt otherwise to propagate or
+modify it is void, and will automatically terminate your rights under
+this License (including any patent licenses granted under the third
+paragraph of section 11).
+
+  However, if you cease all violation of this License, then your
+license from a particular copyright holder is reinstated (a)
+provisionally, unless and until the copyright holder explicitly and
+finally terminates your license, and (b) permanently, if the copyright
+holder fails to notify you of the violation by some reasonable means
+prior to 60 days after the cessation.
+
+  Moreover, your license from a particular copyright holder is
+reinstated permanently if the copyright holder notifies you of the
+violation by some reasonable means, this is the first time you have
+received notice of violation of this License (for any work) from that
+copyright holder, and you cure the violation prior to 30 days after
+your receipt of the notice.
+
+  Termination of your rights under this section does not terminate the
+licenses of parties who have received copies or rights from you under
+this License.  If your rights have been terminated and not permanently
+reinstated, you do not qualify to receive new licenses for the same
+material under section 10.
+
+  9. Acceptance Not Required for Having Copies.
+
+  You are not required to accept this License in order to receive or
+run a copy of the Program.  Ancillary propagation of a covered work
+occurring solely as a consequence of using peer-to-peer transmission
+to receive a copy likewise does not require acceptance.  However,
+nothing other than this License grants you permission to propagate or
+modify any covered work.  These actions infringe copyright if you do
+not accept this License.  Therefore, by modifying or propagating a
+covered work, you indicate your acceptance of this License to do so.
+
+  10. Automatic Licensing of Downstream Recipients.
+
+  Each time you convey a covered work, the recipient automatically
+receives a license from the original licensors, to run, modify and
+propagate that work, subject to this License.  You are not responsible
+for enforcing compliance by third parties with this License.
+
+  An "entity transaction" is a transaction transferring control of an
+organization, or substantially all assets of one, or subdividing an
+organization, or merging organizations.  If propagation of a covered
+work results from an entity transaction, each party to that
+transaction who receives a copy of the work also receives whatever
+licenses to the work the party's predecessor in interest had or could
+give under the previous paragraph, plus a right to possession of the
+Corresponding Source of the work from the predecessor in interest, if
+the predecessor has it or can get it with reasonable efforts.
+
+  You may not impose any further restrictions on the exercise of the
+rights granted or affirmed under this License.  For example, you may
+not impose a license fee, royalty, or other charge for exercise of
+rights granted under this License, and you may not initiate litigation
+(including a cross-claim or counterclaim in a lawsuit) alleging that
+any patent claim is infringed by making, using, selling, offering for
+sale, or importing the Program or any portion of it.
+
+  11. Patents.
+
+  A "contributor" is a copyright holder who authorizes use under this
+License of the Program or a work on which the Program is based.  The
+work thus licensed is called the contributor's "contributor version".
+
+  A contributor's "essential patent claims" are all patent claims
+owned or controlled by the contributor, whether already acquired or
+hereafter acquired, that would be infringed by some manner, permitted
+by this License, of making, using, or selling its contributor version,
+but do not include claims that would be infringed only as a
+consequence of further modification of the contributor version.  For
+purposes of this definition, "control" includes the right to grant
+patent sublicenses in a manner consistent with the requirements of
+this License.
+
+  Each contributor grants you a non-exclusive, worldwide, royalty-free
+patent license under the contributor's essential patent claims, to
+make, use, sell, offer for sale, import and otherwise run, modify and
+propagate the contents of its contributor version.
+
+  In the following three paragraphs, a "patent license" is any express
+agreement or commitment, however denominated, not to enforce a patent
+(such as an express permission to practice a patent or covenant not to
+sue for patent infringement).  To "grant" such a patent license to a
+party means to make such an agreement or commitment not to enforce a
+patent against the party.
+
+  If you convey a covered work, knowingly relying on a patent license,
+and the Corresponding Source of the work is not available for anyone
+to copy, free of charge and under the terms of this License, through a
+publicly available network server or other readily accessible means,
+then you must either (1) cause the Corresponding Source to be so
+available, or (2) arrange to deprive yourself of the benefit of the
+patent license for this particular work, or (3) arrange, in a manner
+consistent with the requirements of this License, to extend the patent
+license to downstream recipients.  "Knowingly relying" means you have
+actual knowledge that, but for the patent license, your conveying the
+covered work in a country, or your recipient's use of the covered work
+in a country, would infringe one or more identifiable patents in that
+country that you have reason to believe are valid.
+
+  If, pursuant to or in connection with a single transaction or
+arrangement, you convey, or propagate by procuring conveyance of, a
+covered work, and grant a patent license to some of the parties
+receiving the covered work authorizing them to use, propagate, modify
+or convey a specific copy of the covered work, then the patent license
+you grant is automatically extended to all recipients of the covered
+work and works based on it.
+
+  A patent license is "discriminatory" if it does not include within
+the scope of its coverage, prohibits the exercise of, or is
+conditioned on the non-exercise of one or more of the rights that are
+specifically granted under this License.  You may not convey a covered
+work if you are a party to an arrangement with a third party that is
+in the business of distributing software, under which you make payment
+to the third party based on the extent of your activity of conveying
+the work, and under which the third party grants, to any of the
+parties who would receive the covered work from you, a discriminatory
+patent license (a) in connection with copies of the covered work
+conveyed by you (or copies made from those copies), or (b) primarily
+for and in connection with specific products or compilations that
+contain the covered work, unless you entered into that arrangement,
+or that patent license was granted, prior to 28 March 2007.
+
+  Nothing in this License shall be construed as excluding or limiting
+any implied license or other defenses to infringement that may
+otherwise be available to you under applicable patent law.
+
+  12. No Surrender of Others' Freedom.
+
+  If conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot convey a
+covered work so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you may
+not convey it at all.  For example, if you agree to terms that obligate you
+to collect a royalty for further conveying from those to whom you convey
+the Program, the only way you could satisfy both those terms and this
+License would be to refrain entirely from conveying the Program.
+
+  13. Use with the GNU Affero General Public License.
+
+  Notwithstanding any other provision of this License, you have
+permission to link or combine any covered work with a work licensed
+under version 3 of the GNU Affero General Public License into a single
+combined work, and to convey the resulting work.  The terms of this
+License will continue to apply to the part which is the covered work,
+but the special requirements of the GNU Affero General Public License,
+section 13, concerning interaction through a network will apply to the
+combination as such.
+
+  14. Revised Versions of this License.
+
+  The Free Software Foundation may publish revised and/or new versions of
+the GNU General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+  Each version is given a distinguishing version number.  If the
+Program specifies that a certain numbered version of the GNU General
+Public License "or any later version" applies to it, you have the
+option of following the terms and conditions either of that numbered
+version or of any later version published by the Free Software
+Foundation.  If the Program does not specify a version number of the
+GNU General Public License, you may choose any version ever published
+by the Free Software Foundation.
+
+  If the Program specifies that a proxy can decide which future
+versions of the GNU General Public License can be used, that proxy's
+public statement of acceptance of a version permanently authorizes you
+to choose that version for the Program.
+
+  Later license versions may give you additional or different
+permissions.  However, no additional obligations are imposed on any
+author or copyright holder as a result of your choosing to follow a
+later version.
+
+  15. Disclaimer of Warranty.
+
+  THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
+APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
+HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
+OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
+IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
+ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+  16. Limitation of Liability.
+
+  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
+THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
+GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
+USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
+DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
+PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
+EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGES.
+
+  17. Interpretation of Sections 15 and 16.
+
+  If the disclaimer of warranty and limitation of liability provided
+above cannot be given local legal effect according to their terms,
+reviewing courts shall apply local law that most closely approximates
+an absolute waiver of all civil liability in connection with the
+Program, unless a warranty or assumption of liability accompanies a
+copy of the Program in return for a fee.
+
+                     END OF TERMS AND CONDITIONS
+
+            How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+state the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+Also add information on how to contact you by electronic and paper mail.
+
+  If the program does terminal interaction, make it output a short
+notice like this when it starts in an interactive mode:
+
+    <program>  Copyright (C) <year>  <name of author>
+    This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, your program's commands
+might be different; for a GUI interface, you would use an "about box".
+
+  You should also get your employer (if you work as a programmer) or school,
+if any, to sign a "copyright disclaimer" for the program, if necessary.
+For more information on this, and how to apply and follow the GNU GPL, see
+<https://www.gnu.org/licenses/>.
+
+  The GNU General Public License does not permit incorporating your program
+into proprietary programs.  If your program is a subroutine library, you
+may consider it more useful to permit linking proprietary applications with
+the library.  If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.  But first, please read
+<https://www.gnu.org/licenses/why-not-lgpl.html>.
diff --git a/pdfocr-api/src/test/resources/com/itextpdf/pdfocr/fonts/LICENSE_OFL.txt b/pdfocr-api/src/test/resources/com/itextpdf/pdfocr/fonts/LICENSE_OFL.txt
new file mode 100644
index 0000000..77b1731
--- /dev/null
+++ b/pdfocr-api/src/test/resources/com/itextpdf/pdfocr/fonts/LICENSE_OFL.txt
@@ -0,0 +1,91 @@
+This Font Software is licensed under the SIL Open Font License, Version 1.1.
+This license is copied below, and is also available with a FAQ at:
+http://scripts.sil.org/OFL
+
+
+-----------------------------------------------------------
+SIL OPEN FONT LICENSE Version 1.1 - 26 February 2007
+-----------------------------------------------------------
+
+PREAMBLE
+The goals of the Open Font License (OFL) are to stimulate worldwide
+development of collaborative font projects, to support the font creation
+efforts of academic and linguistic communities, and to provide a free and
+open framework in which fonts may be shared and improved in partnership
+with others.
+
+The OFL allows the licensed fonts to be used, studied, modified and
+redistributed freely as long as they are not sold by themselves. The
+fonts, including any derivative works, can be bundled, embedded, 
+redistributed and/or sold with any software provided that any reserved
+names are not used by derivative works. The fonts and derivatives,
+however, cannot be released under any other type of license. The
+requirement for fonts to remain under this license does not apply
+to any document created using the fonts or their derivatives.
+
+DEFINITIONS
+"Font Software" refers to the set of files released by the Copyright
+Holder(s) under this license and clearly marked as such. This may
+include source files, build scripts and documentation.
+
+"Reserved Font Name" refers to any names specified as such after the
+copyright statement(s).
+
+"Original Version" refers to the collection of Font Software components as
+distributed by the Copyright Holder(s).
+
+"Modified Version" refers to any derivative made by adding to, deleting,
+or substituting -- in part or in whole -- any of the components of the
+Original Version, by changing formats or by porting the Font Software to a
+new environment.
+
+"Author" refers to any designer, engineer, programmer, technical
+writer or other person who contributed to the Font Software.
+
+PERMISSION & CONDITIONS
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of the Font Software, to use, study, copy, merge, embed, modify,
+redistribute, and sell modified and unmodified copies of the Font
+Software, subject to the following conditions:
+
+1) Neither the Font Software nor any of its individual components,
+in Original or Modified Versions, may be sold by itself.
+
+2) Original or Modified Versions of the Font Software may be bundled,
+redistributed and/or sold with any software, provided that each copy
+contains the above copyright notice and this license. These can be
+included either as stand-alone text files, human-readable headers or
+in the appropriate machine-readable metadata fields within text or
+binary files as long as those fields can be easily viewed by the user.
+
+3) No Modified Version of the Font Software may use the Reserved Font
+Name(s) unless explicit written permission is granted by the corresponding
+Copyright Holder. This restriction only applies to the primary font name as
+presented to the users.
+
+4) The name(s) of the Copyright Holder(s) or the Author(s) of the Font
+Software shall not be used to promote, endorse or advertise any
+Modified Version, except to acknowledge the contribution(s) of the
+Copyright Holder(s) and the Author(s) or with their explicit written
+permission.
+
+5) The Font Software, modified or unmodified, in part or in whole,
+must be distributed entirely under this license, and must not be
+distributed under any other license. The requirement for fonts to
+remain under this license does not apply to any document created
+using the Font Software.
+
+TERMINATION
+This license becomes null and void if any of the above conditions are
+not met.
+
+DISCLAIMER
+THE FONT SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
+OF COPYRIGHT, PATENT, TRADEMARK, OR OTHER RIGHT. IN NO EVENT SHALL THE
+COPYRIGHT HOLDER BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+INCLUDING ANY GENERAL, SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL
+DAMAGES, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF THE USE OR INABILITY TO USE THE FONT SOFTWARE OR FROM
+OTHER DEALINGS IN THE FONT SOFTWARE.
diff --git a/pdfocr-api/src/test/resources/com/itextpdf/pdfocr/fonts/NOTICE.txt b/pdfocr-api/src/test/resources/com/itextpdf/pdfocr/fonts/NOTICE.txt
new file mode 100644
index 0000000..496bc47
--- /dev/null
+++ b/pdfocr-api/src/test/resources/com/itextpdf/pdfocr/fonts/NOTICE.txt
@@ -0,0 +1,4 @@
+Please notice that the following fonts are used with the mentioned below licenses.
+
+* FreeSans - GPL license you can find following the link: https://www.gnu.org/licenses
+* Kanit-Regular - SIL Open Font License, Version 1.1
diff --git a/pdfocr-api/src/test/resources/com/itextpdf/pdfocr/images/corrupted.jpg b/pdfocr-api/src/test/resources/com/itextpdf/pdfocr/images/corrupted.jpg
new file mode 100644
index 0000000..2c0d56a
Binary files /dev/null and b/pdfocr-api/src/test/resources/com/itextpdf/pdfocr/images/corrupted.jpg differ
diff --git a/pdfocr-api/src/test/resources/com/itextpdf/pdfocr/images/multipage.tiff b/pdfocr-api/src/test/resources/com/itextpdf/pdfocr/images/multipage.tiff
new file mode 100644
index 0000000..e8cc630
Binary files /dev/null and b/pdfocr-api/src/test/resources/com/itextpdf/pdfocr/images/multipage.tiff differ
diff --git a/pdfocr-api/src/test/resources/com/itextpdf/pdfocr/images/numbers_01.jpg b/pdfocr-api/src/test/resources/com/itextpdf/pdfocr/images/numbers_01.jpg
new file mode 100644
index 0000000..f384caa
Binary files /dev/null and b/pdfocr-api/src/test/resources/com/itextpdf/pdfocr/images/numbers_01.jpg differ
diff --git a/pdfocr-api/src/test/resources/com/itextpdf/pdfocr/images/thai.PNG b/pdfocr-api/src/test/resources/com/itextpdf/pdfocr/images/thai.PNG
new file mode 100644
index 0000000..7823203
Binary files /dev/null and b/pdfocr-api/src/test/resources/com/itextpdf/pdfocr/images/thai.PNG differ
diff --git a/pdfocr-api/src/test/resources/com/itextpdf/pdfocr/profiles/CoatedFOGRA27.icc b/pdfocr-api/src/test/resources/com/itextpdf/pdfocr/profiles/CoatedFOGRA27.icc
new file mode 100644
index 0000000..086ac9d
Binary files /dev/null and b/pdfocr-api/src/test/resources/com/itextpdf/pdfocr/profiles/CoatedFOGRA27.icc differ
diff --git a/pdfocr-api/src/test/resources/com/itextpdf/pdfocr/profiles/sRGB_CS_profile.icm b/pdfocr-api/src/test/resources/com/itextpdf/pdfocr/profiles/sRGB_CS_profile.icm
new file mode 100644
index 0000000..7f9d18d
Binary files /dev/null and b/pdfocr-api/src/test/resources/com/itextpdf/pdfocr/profiles/sRGB_CS_profile.icm differ
diff --git a/pdfocr-tesseract4/pom.xml b/pdfocr-tesseract4/pom.xml
new file mode 100644
index 0000000..c09cdfe
--- /dev/null
+++ b/pdfocr-tesseract4/pom.xml
@@ -0,0 +1,57 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+
+  <parent>
+    <groupId>com.itextpdf</groupId>
+    <artifactId>pdfocr-root</artifactId>
+    <version>1.0.0</version>
+  </parent>
+
+  <artifactId>pdfocr-tesseract4</artifactId>
+
+  <name>pdfOCR-Tesseract4</name>
+  <description>pdfOCR-Tesseract4 is an iText 7 add-on for Java to recognize and extract text in scanned documents and images. It can also convert them into fully ISO-compliant PDF or PDF/A-3u files that are accessible, searchable, and suitable for archiving</description>
+
+  <dependencies>
+    <dependency>
+      <groupId>com.itextpdf</groupId>
+      <artifactId>pdfocr-api</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>com.itextpdf</groupId>
+      <artifactId>styled-xml-parser</artifactId>
+      <version>${itext.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>net.sourceforge.tess4j</groupId>
+      <artifactId>tess4j</artifactId>
+      <version>4.5.1</version>
+      <exclusions>
+        <exclusion>
+          <groupId>log4j</groupId>
+          <artifactId>log4j</artifactId>
+        </exclusion>
+        <exclusion>
+          <artifactId>ghost4j</artifactId>
+          <groupId>org.ghost4j</groupId>
+        </exclusion>
+        <exclusion>
+          <artifactId>slf4j-api</artifactId>
+          <groupId>org.slf4j</groupId>
+        </exclusion>
+        <exclusion>
+          <artifactId>log4j-over-slf4j</artifactId>
+          <groupId>org.slf4j</groupId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+    <dependency>
+      <groupId>com.itextpdf</groupId>
+      <artifactId>pdftest</artifactId>
+      <version>${itext.version}</version>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+</project>
diff --git a/pdfocr-tesseract4/src/main/java/com/itextpdf/pdfocr/tesseract4/AbstractTesseract4OcrEngine.java b/pdfocr-tesseract4/src/main/java/com/itextpdf/pdfocr/tesseract4/AbstractTesseract4OcrEngine.java
new file mode 100644
index 0000000..feb1e9b
--- /dev/null
+++ b/pdfocr-tesseract4/src/main/java/com/itextpdf/pdfocr/tesseract4/AbstractTesseract4OcrEngine.java
@@ -0,0 +1,485 @@
+/*
+    This file is part of the iText (R) project.
+    Copyright (c) 1998-2020 iText Group NV
+    Authors: iText Software.
+
+    This program is offered under a commercial and under the AGPL license.
+    For commercial licensing, contact us at https://itextpdf.com/sales.  For AGPL licensing, see below.
+
+    AGPL licensing:
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+package com.itextpdf.pdfocr.tesseract4;
+
+import com.itextpdf.io.util.MessageFormatUtil;
+import com.itextpdf.kernel.counter.EventCounterHandler;
+import com.itextpdf.kernel.counter.event.IMetaInfo;
+import com.itextpdf.pdfocr.IOcrEngine;
+import com.itextpdf.pdfocr.OcrPdfCreatorMetaInfo;
+import com.itextpdf.pdfocr.OcrPdfCreatorMetaInfo.PdfDocumentType;
+import com.itextpdf.pdfocr.TextInfo;
+import com.itextpdf.pdfocr.events.IThreadLocalMetaInfoAware;
+import com.itextpdf.pdfocr.tesseract4.events.PdfOcrTesseract4Event;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.UUID;
+import org.slf4j.LoggerFactory;
+
+/**
+ * The implementation of {@link IOcrEngine}.
+ *
+ * This class provides possibilities to perform OCR, to read data from input
+ * files and to return contained text in the required format.
+ * Also there are possibilities to use features of "tesseract"
+ * (optical character recognition engine for various operating systems).
+ */
+public abstract class AbstractTesseract4OcrEngine implements IOcrEngine, IThreadLocalMetaInfoAware {
+
+    /**
+     * Supported image formats.
+     */
+    private static final Set<String> SUPPORTED_IMAGE_FORMATS =
+            Collections.unmodifiableSet(new HashSet<>(
+                    Arrays.<String>asList("bmp", "png", "tiff", "tif", "jpeg",
+                            "jpg", "jpe", "jfif")));
+
+    Set<UUID> processedUUID = new HashSet<>();
+
+    /**
+     * Set of properties.
+     */
+    private Tesseract4OcrEngineProperties tesseract4OcrEngineProperties;
+
+    private ThreadLocal<IMetaInfo> threadLocalMetaInfo = new ThreadLocal<>();
+
+    public AbstractTesseract4OcrEngine(
+            Tesseract4OcrEngineProperties tesseract4OcrEngineProperties) {
+        this.tesseract4OcrEngineProperties = tesseract4OcrEngineProperties;
+    }
+
+    /**
+     * Performs tesseract OCR for the first (or for the only) image page.
+     *
+     * @param inputImage input image {@link java.io.File}
+     * @param outputFile output file for the result for the first page
+     * @param outputFormat selected {@link OutputFormat} for tesseract
+     */
+    public void doTesseractOcr(File inputImage, File outputFile,
+            OutputFormat outputFormat) {
+        doTesseractOcr(inputImage, Collections.<File>singletonList(outputFile),
+                outputFormat, 1);
+    }
+
+    /**
+     * Performs OCR using provided {@link IOcrEngine} for the given list of
+     * input images and saves output to a text file using provided path.
+     *
+     * @param inputImages {@link java.util.List} of images to be OCRed
+     * @param txtFile file to be created
+     */
+    public void createTxtFile(final List<File> inputImages, final File txtFile) {
+        LoggerFactory.getLogger(getClass())
+                .info(MessageFormatUtil.format(
+                Tesseract4LogMessageConstant.START_OCR_FOR_IMAGES,
+                inputImages.size()));
+
+        StringBuilder content = new StringBuilder();
+        for (File inputImage : inputImages) {
+            content.append(doImageOcr(inputImage, OutputFormat.TXT));
+        }
+
+        // write to file
+        TesseractHelper.writeToTextFile(txtFile.getAbsolutePath(),
+                content.toString());
+    }
+
+    /**
+     * Gets properties for {@link AbstractTesseract4OcrEngine}.
+     *
+     * @return set properties {@link Tesseract4OcrEngineProperties}
+     */
+    public final Tesseract4OcrEngineProperties getTesseract4OcrEngineProperties() {
+        return tesseract4OcrEngineProperties;
+    }
+
+    /**
+     * Sets properties for {@link AbstractTesseract4OcrEngine}.
+     *
+     * @param tesseract4OcrEngineProperties set of properties
+     * {@link Tesseract4OcrEngineProperties} for {@link AbstractTesseract4OcrEngine}
+     */
+    public final void setTesseract4OcrEngineProperties(
+            final Tesseract4OcrEngineProperties tesseract4OcrEngineProperties) {
+        this.tesseract4OcrEngineProperties = tesseract4OcrEngineProperties;
+    }
+
+    /**
+     * Gets list of languages concatenated with "+" symbol to a string
+     * in format required by tesseract.
+     * @return {@link java.lang.String} of concatenated languages
+     */
+    public final String getLanguagesAsString() {
+        if (getTesseract4OcrEngineProperties().getLanguages().size() > 0) {
+            return String.join("+",
+                    getTesseract4OcrEngineProperties().getLanguages());
+        } else {
+            return getTesseract4OcrEngineProperties().getDefaultLanguage();
+        }
+    }
+
+    /**
+     * Reads data from the provided input image file and returns retrieved
+     * data in the format described below.
+     *
+     * @param input input image {@link java.io.File}
+     * @return {@link java.util.Map} where key is {@link java.lang.Integer}
+     * representing the number of the page and value is
+     * {@link java.util.List} of {@link TextInfo} elements where each
+     * {@link TextInfo} element contains a word or a line and its 4
+     * coordinates(bbox)
+     */
+    public final Map<Integer, List<TextInfo>> doImageOcr(
+            final File input) {
+        verifyImageFormatValidity(input);
+        return ((TextInfoTesseractOcrResult)processInputFiles(input, OutputFormat.HOCR)).getTextInfos();
+    }
+
+    /**
+     * Reads data from the provided input image file and returns retrieved
+     * data as string.
+     *
+     * @param input input image {@link java.io.File}
+     *
+     * @param outputFormat return {@link OutputFormat} result
+     * @return OCR result as a {@link java.lang.String} that is
+     * returned after processing the given image
+     */
+    public final String doImageOcr(final File input,
+            final OutputFormat outputFormat) {
+        String result = "";
+        verifyImageFormatValidity(input);
+        ITesseractOcrResult processedData = processInputFiles(input, outputFormat);
+        if (processedData != null) {
+            if (outputFormat.equals(OutputFormat.TXT)) {
+                result = ((StringTesseractOcrResult)processedData).getData();
+            } else {
+                StringBuilder outputText = new StringBuilder();
+                Map<Integer, List<TextInfo>> outputMap =
+                        ((TextInfoTesseractOcrResult)processedData).getTextInfos();
+                for (int page : outputMap.keySet()) {
+                    StringBuilder pageText = new StringBuilder();
+                    for (TextInfo textInfo : outputMap.get(page)) {
+                        pageText.append(textInfo.getText());
+                        pageText.append(System.lineSeparator());
+                    }
+                    outputText.append(pageText);
+                    outputText.append(System.lineSeparator());
+                }
+                result = outputText.toString();
+            }
+        }
+        return result;
+    }
+
+    /**
+     * Checks current os type.
+     *
+     * @return boolean true is current os is windows, otherwise - false
+     */
+    public boolean isWindows() {
+        return identifyOsType().toLowerCase().contains("win");
+    }
+
+    /**
+     * Identifies type of current OS and return it (win, linux).
+     *
+     * @return type of current os as {@link java.lang.String}
+     */
+    public String identifyOsType() {
+        String os = System.getProperty("os.name") == null
+                ? System.getProperty("OS") : System.getProperty("os.name");
+        return os.toLowerCase();
+    }
+
+    /**
+     * Validates list of provided languages and
+     * checks if they all exist in given tess data directory.
+     *
+     * @param languagesList {@link java.util.List} of provided languages
+     * @throws Tesseract4OcrException if tess data wasn't found for one of the
+     * languages from the provided list
+     */
+    public void validateLanguages(final List<String> languagesList)
+            throws Tesseract4OcrException {
+        String suffix = ".traineddata";
+        if (languagesList.size() == 0) {
+            if (!new File(getTessData()
+                    + java.io.File.separatorChar
+                    + getTesseract4OcrEngineProperties().getDefaultLanguage()
+                    + suffix)
+                    .exists()) {
+                throw new Tesseract4OcrException(
+                        Tesseract4OcrException.INCORRECT_LANGUAGE)
+                        .setMessageParams(
+                                getTesseract4OcrEngineProperties()
+                                        .getDefaultLanguage()
+                                        + suffix,
+                                getTessData());
+            }
+        } else {
+            for (String lang : languagesList) {
+                if (!new File(getTessData()
+                        + java.io.File.separatorChar + lang + suffix)
+                        .exists()) {
+                    throw new Tesseract4OcrException(
+                            Tesseract4OcrException.INCORRECT_LANGUAGE)
+                            .setMessageParams(lang + suffix, getTessData());
+                }
+            }
+        }
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    @Override
+    public IMetaInfo getThreadLocalMetaInfo() {
+        return threadLocalMetaInfo.get();
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    @Override
+    public IThreadLocalMetaInfoAware setThreadLocalMetaInfo(IMetaInfo metaInfo) {
+        this.threadLocalMetaInfo.set(metaInfo);
+        return this;
+    }
+
+    /**
+     * Performs tesseract OCR using command line tool
+     * or a wrapper for Tesseract OCR API.
+     *
+     * Please note that list of output files is accepted instead of a single file because
+     * page number parameter is not respected in case of TIFF images not requiring preprocessing.
+     * In other words, if the passed image is the TIFF image and according to the {@link Tesseract4OcrEngineProperties}
+     * no preprocessing is needed, each page of the TIFF image is OCRed and the number of output files in the list
+     * is expected to be same as number of pages in the image, otherwise, only one file is expected
+     *
+     * @param inputImage input image {@link java.io.File}
+     * @param outputFiles {@link java.util.List} of output files
+     *                                          (one per each page)
+     * @param outputFormat selected {@link OutputFormat} for tesseract
+     * @param pageNumber number of page to be processed
+     */
+    abstract void doTesseractOcr(File inputImage,
+            List<File> outputFiles, OutputFormat outputFormat,
+            int pageNumber);
+
+    /**
+     * Gets path to provided tess data directory.
+     *
+     * @return path to provided tess data directory as
+     * {@link java.lang.String}
+     */
+    String getTessData() {
+        if (getTesseract4OcrEngineProperties().getPathToTessData() == null) {
+            throw new Tesseract4OcrException(Tesseract4OcrException
+                    .PATH_TO_TESS_DATA_IS_NOT_SET);
+        } else {
+            return getTesseract4OcrEngineProperties().getPathToTessData()
+                    .getAbsolutePath();
+        }
+    }
+
+    void scheduledCheck() {
+        ReflectionUtils.scheduledCheck();
+    }
+
+    void onEvent() {
+        IMetaInfo metaInfo = this.getThreadLocalMetaInfo();
+        if (!(metaInfo instanceof OcrPdfCreatorMetaInfo)) {
+            EventCounterHandler.getInstance()
+                    .onEvent(PdfOcrTesseract4Event.TESSERACT4_IMAGE_OCR, this.getThreadLocalMetaInfo(), getClass());
+        } else {
+            UUID uuid = ((OcrPdfCreatorMetaInfo) metaInfo).getDocumentId();
+            if (!processedUUID.contains(uuid)) {
+                processedUUID.add(uuid);
+                EventCounterHandler.getInstance()
+                        .onEvent(PdfDocumentType.PDFA.equals(((OcrPdfCreatorMetaInfo) metaInfo).getPdfDocumentType())
+                                        ? PdfOcrTesseract4Event.TESSERACT4_IMAGE_TO_PDFA
+                                        : PdfOcrTesseract4Event.TESSERACT4_IMAGE_TO_PDF,
+                                ((OcrPdfCreatorMetaInfo) metaInfo).getWrappedMetaInfo(), getClass());
+
+            }
+        }
+    }
+
+    /**
+     * Reads data from the provided input image file.
+     *
+     * @param input input image {@link java.io.File}
+     * @param outputFormat {@link OutputFormat} for the result returned
+     *                                         by {@link IOcrEngine}
+     * @return {@link ITesseractOcrResult} instance, either {@link StringTesseractOcrResult}
+     *     if output format is TXT, or {@link TextInfoTesseractOcrResult} if the output format is HOCR
+     */
+    private ITesseractOcrResult processInputFiles(
+            final File input, final OutputFormat outputFormat) {
+        Map<Integer, List<TextInfo>> imageData =
+                new LinkedHashMap<Integer, List<TextInfo>>();
+        StringBuilder data = new StringBuilder();
+        List<File> tempFiles = new ArrayList<File>();
+        ITesseractOcrResult result = null;
+        try {
+            // image needs to be paginated only if it's tiff
+            // or preprocessing isn't required
+            int realNumOfPages = !ImagePreprocessingUtil.isTiffImage(input)
+                    ? 1 : ImagePreprocessingUtil.getNumberOfPageTiff(input);
+            int numOfPages =
+                    getTesseract4OcrEngineProperties().isPreprocessingImages()
+                            ? realNumOfPages : 1;
+            int numOfFiles =
+                    getTesseract4OcrEngineProperties().isPreprocessingImages()
+                            ? 1 : realNumOfPages;
+
+            for (int page = 1; page <= numOfPages; page++) {
+                String extension = outputFormat.equals(OutputFormat.HOCR)
+                        ? ".hocr" : ".txt";
+                for (int i = 0; i < numOfFiles; i++) {
+                    tempFiles.add(createTempFile(extension));
+                }
+
+                doTesseractOcr(input, tempFiles, outputFormat, page);
+                if (outputFormat.equals(OutputFormat.HOCR)) {
+                    Map<Integer, List<TextInfo>> pageData = TesseractHelper
+                            .parseHocrFile(tempFiles,
+                                    getTesseract4OcrEngineProperties()
+                                            .getTextPositioning());
+
+                    if (getTesseract4OcrEngineProperties()
+                            .isPreprocessingImages()) {
+                        imageData.put(page, pageData.get(1));
+                    } else {
+                        imageData = pageData;
+                    }
+                    result = new TextInfoTesseractOcrResult(imageData);
+                } else {
+                    for (File tmpFile : tempFiles) {
+                        if (Files.exists(
+                                java.nio.file.Paths
+                                        .get(tmpFile.getAbsolutePath()))) {
+                            data.append(TesseractHelper.readTxtFile(tmpFile));
+                        }
+                    }
+                    result = new StringTesseractOcrResult(data.toString());
+                }
+            }
+        } catch (IOException e) {
+            LoggerFactory.getLogger(getClass())
+                    .error(MessageFormatUtil.format(
+                            Tesseract4LogMessageConstant.CANNOT_OCR_INPUT_FILE,
+                            e.getMessage()));
+        } finally {
+            for (File file : tempFiles) {
+                TesseractHelper.deleteFile(file.getAbsolutePath());
+            }
+        }
+        return result;
+    }
+
+    /**
+     * Creates a temporary file with given extension.
+     *
+     * @param extension file extension for a new file {@link java.lang.String}
+     * @return a new created {@link java.io.File} instance
+     */
+    private File createTempFile(final String extension) {
+        String tmpFileName = TesseractOcrUtil.getTempFilePath(
+                UUID.randomUUID().toString(), extension);
+        return new File(tmpFileName);
+    }
+
+    /**
+     * Validates input image format.
+     * Allowed image formats are listed
+     * in {@link AbstractTesseract4OcrEngine#SUPPORTED_IMAGE_FORMATS}
+     *
+     * @param image input image {@link java.io.File}
+     * @throws Tesseract4OcrException if image format is invalid
+     */
+    private void verifyImageFormatValidity(final File image)
+            throws Tesseract4OcrException {
+        boolean isValid = false;
+        String extension = "incorrect extension";
+        int index = image.getAbsolutePath().lastIndexOf('.');
+        if (index > 0) {
+            extension = new String(image.getAbsolutePath().toCharArray(),
+                    index + 1,
+                    image.getAbsolutePath().length() - index - 1);
+            for (String format : SUPPORTED_IMAGE_FORMATS) {
+                if (format.equals(extension.toLowerCase())) {
+                    isValid = true;
+                    break;
+                }
+            }
+        }
+        if (!isValid) {
+            LoggerFactory.getLogger(getClass()).error(MessageFormatUtil
+                    .format(Tesseract4LogMessageConstant
+                                    .CANNOT_READ_INPUT_IMAGE,
+                            image.getAbsolutePath()));
+            throw new Tesseract4OcrException(
+                    Tesseract4OcrException.INCORRECT_INPUT_IMAGE_FORMAT)
+                    .setMessageParams(extension);
+        }
+    }
+
+    interface ITesseractOcrResult {
+    }
+
+    static class StringTesseractOcrResult implements ITesseractOcrResult {
+        private String data;
+
+        StringTesseractOcrResult(String data) {
+            this.data = data;
+        }
+
+        String getData() {
+            return data;
+        }
+    }
+
+    static class TextInfoTesseractOcrResult implements ITesseractOcrResult {
+        private Map<Integer, List<TextInfo>> textInfos;
+
+        TextInfoTesseractOcrResult(Map<Integer, List<TextInfo>> textInfos) {
+            this.textInfos = textInfos;
+        }
+
+        Map<Integer, List<TextInfo>> getTextInfos() {
+            return this.textInfos;
+        }
+    }
+}
diff --git a/pdfocr-tesseract4/src/main/java/com/itextpdf/pdfocr/tesseract4/ImagePreprocessingUtil.java b/pdfocr-tesseract4/src/main/java/com/itextpdf/pdfocr/tesseract4/ImagePreprocessingUtil.java
new file mode 100644
index 0000000..9abc13d
--- /dev/null
+++ b/pdfocr-tesseract4/src/main/java/com/itextpdf/pdfocr/tesseract4/ImagePreprocessingUtil.java
@@ -0,0 +1,226 @@
+/*
+    This file is part of the iText (R) project.
+    Copyright (c) 1998-2020 iText Group NV
+    Authors: iText Software.
+
+    This program is offered under a commercial and under the AGPL license.
+    For commercial licensing, contact us at https://itextpdf.com/sales.  For AGPL licensing, see below.
+
+    AGPL licensing:
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+package com.itextpdf.pdfocr.tesseract4;
+
+import com.itextpdf.io.image.TiffImageData;
+import com.itextpdf.io.source.RandomAccessFileOrArray;
+import com.itextpdf.io.source.RandomAccessSourceFactory;
+import com.itextpdf.io.util.MessageFormatUtil;
+
+import java.awt.image.BufferedImage;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import javax.imageio.ImageIO;
+import net.sourceforge.lept4j.Leptonica;
+import net.sourceforge.lept4j.Pix;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Utilities class to work with images.
+ * Class provides tools for basic image preprocessing.
+ */
+class ImagePreprocessingUtil {
+
+    /**
+     * Creates a new {@link ImagePreprocessingUtil} instance.
+     */
+    private ImagePreprocessingUtil() {
+    }
+
+    /**
+     * Counts number of pages in the provided tiff image.
+     *
+     * @param inputImage input image {@link java.io.File}
+     * @return number of pages in the provided TIFF image
+     * @throws IOException if error occurred during creating a
+     * {@link com.itextpdf.io.source.IRandomAccessSource} based on a filename
+     * string
+     */
+    static int getNumberOfPageTiff(final File inputImage)
+            throws IOException {
+        RandomAccessFileOrArray raf = new RandomAccessFileOrArray(
+                new RandomAccessSourceFactory()
+                        .createBestSource(
+                                inputImage.getAbsolutePath()));
+        int numOfPages = TiffImageData.getNumberOfPages(raf);
+        raf.close();
+        return numOfPages;
+    }
+
+    /**
+     * Checks whether image format is TIFF.
+     *
+     * @param inputImage input image {@link java.io.File}
+     * @return true if provided image has 'tiff' or 'tif' extension
+     */
+    static boolean isTiffImage(final File inputImage) {
+        int index = inputImage.getAbsolutePath().lastIndexOf('.');
+        if (index > 0) {
+            String extension = new String(
+                    inputImage.getAbsolutePath().toCharArray(), index + 1,
+                    inputImage.getAbsolutePath().length() - index - 1);
+            return extension.toLowerCase().contains("tif");
+        }
+        return false;
+    }
+
+    /**
+     * Reads provided image file using stream.
+     *
+     * @param inputFile input image {@link java.io.File}
+     * @return returns a {@link java.awt.image.BufferedImage} as the result
+     * @throws IllegalArgumentException if error occurred during reading a file
+     * @throws IOException if error occurred during reading a file
+     */
+    static BufferedImage readImageFromFile(final File inputFile)
+            throws IllegalArgumentException, IOException {
+        FileInputStream is = new FileInputStream(inputFile.getAbsolutePath());
+        BufferedImage bi = ImageIO.read(is);
+        is.close();
+        return bi;
+    }
+
+    /**
+     * Reads input file as Leptonica {@link net.sourceforge.lept4j.Pix} and
+     * converts it to {@link java.awt.image.BufferedImage}.
+     *
+     * @param inputImage input image {@link java.io.File}
+     * @return returns a {@link java.awt.image.BufferedImage} as the result
+     * @throws IOException is error occurred during conversion from
+     * {@link net.sourceforge.lept4j.Pix} to
+     * {@link java.awt.image.BufferedImage}
+     */
+    static BufferedImage readAsPixAndConvertToBufferedImage(
+            final File inputImage)
+            throws IOException {
+        Pix pix = Leptonica.INSTANCE
+                .pixRead(inputImage.getAbsolutePath());
+        return TesseractOcrUtil.convertPixToImage(pix);
+    }
+
+    /**
+     * Performs basic image preprocessing using buffered image (if provided).
+     * Preprocessed image will be saved in temporary directory.
+     *
+     * @param inputFile input image {@link java.io.File}
+     * @param pageNumber number of page to be preprocessed
+     * @return created preprocessed image as {@link net.sourceforge.lept4j.Pix}
+     * @throws Tesseract4OcrException if it was not possible to read or convert
+     * input file
+     */
+    static Pix preprocessImage(final File inputFile,
+            final int pageNumber) throws Tesseract4OcrException {
+        Pix pix = null;
+        // read image
+        if (isTiffImage(inputFile)) {
+            pix = TesseractOcrUtil.readPixPageFromTiff(inputFile,
+                    pageNumber - 1);
+        } else {
+            pix = readPix(inputFile);
+        }
+        if (pix == null) {
+            throw new Tesseract4OcrException(
+                    Tesseract4OcrException.CANNOT_READ_PROVIDED_IMAGE)
+                    .setMessageParams(inputFile.getAbsolutePath());
+        }
+        return TesseractOcrUtil.preprocessPix(pix);
+    }
+
+    /**
+     * Reads {@link net.sourceforge.lept4j.Pix} from input file or, if
+     * this is not possible, reads input file as
+     * {@link java.awt.image.BufferedImage} and then converts to
+     * {@link net.sourceforge.lept4j.Pix}.
+     *
+     * @param inputFile input image {@link java.io.File}
+     * @return Pix result {@link net.sourceforge.lept4j.Pix} object from
+     * input file
+     */
+    static Pix readPix(final File inputFile) {
+        Pix pix = null;
+        try {
+            BufferedImage bufferedImage = ImagePreprocessingUtil
+                    .readImageFromFile(inputFile);
+            if (bufferedImage != null) {
+                pix = TesseractOcrUtil.convertImageToPix(bufferedImage);
+            }
+        } catch (Exception e) { // NOSONAR
+            LoggerFactory.getLogger(ImagePreprocessingUtil.class)
+                    .info(MessageFormatUtil.format(
+                            Tesseract4LogMessageConstant
+                                    .CANNOT_CONVERT_IMAGE_TO_PIX,
+                            inputFile.getAbsolutePath(),
+                            e.getMessage()));
+        }
+        if (pix == null) {
+            try {
+                pix = Leptonica.INSTANCE.pixRead(inputFile.getAbsolutePath());
+            } catch (Exception e) { // NOSONAR
+                LoggerFactory.getLogger(ImagePreprocessingUtil.class)
+                        .info(MessageFormatUtil.format(
+                                Tesseract4LogMessageConstant
+                                        .CANNOT_CONVERT_IMAGE_TO_PIX,
+                                inputFile.getAbsolutePath(),
+                                e.getMessage()));
+            }
+        }
+        return pix;
+    }
+
+    /**
+     * Reads input image as a {@link java.awt.image.BufferedImage}.
+     * If it is not possible to read {@link java.awt.image.BufferedImage} from
+     * input file, image will be read as a {@link net.sourceforge.lept4j.Pix}
+     * and then converted to {@link java.awt.image.BufferedImage}.
+     * @param inputImage original input image
+     * @return input image as a {@link java.awt.image.BufferedImage}
+     */
+    static BufferedImage readImage(File inputImage) {
+        BufferedImage bufferedImage = null;
+        try {
+            bufferedImage = ImagePreprocessingUtil
+                    .readImageFromFile(inputImage);
+        } catch (IllegalArgumentException | IOException ex) {
+            LoggerFactory.getLogger(ImagePreprocessingUtil.class).info(
+                    MessageFormatUtil.format(
+                            Tesseract4LogMessageConstant
+                                    .CANNOT_CREATE_BUFFERED_IMAGE,
+                            ex.getMessage()));
+        }
+        if (bufferedImage == null) {
+            try {
+                bufferedImage = ImagePreprocessingUtil
+                        .readAsPixAndConvertToBufferedImage(
+                                inputImage);
+            } catch (IOException ex) {
+                LoggerFactory.getLogger(ImagePreprocessingUtil.class)
+                        .info(MessageFormatUtil.format(
+                                Tesseract4LogMessageConstant
+                                        .CANNOT_READ_INPUT_IMAGE,
+                                ex.getMessage()));
+            }
+        }
+        return bufferedImage;
+    }
+}
diff --git a/pdfocr-tesseract4/src/main/java/com/itextpdf/pdfocr/tesseract4/OutputFormat.java b/pdfocr-tesseract4/src/main/java/com/itextpdf/pdfocr/tesseract4/OutputFormat.java
new file mode 100644
index 0000000..30f47db
--- /dev/null
+++ b/pdfocr-tesseract4/src/main/java/com/itextpdf/pdfocr/tesseract4/OutputFormat.java
@@ -0,0 +1,44 @@
+/*
+    This file is part of the iText (R) project.
+    Copyright (c) 1998-2020 iText Group NV
+    Authors: iText Software.
+
+    This program is offered under a commercial and under the AGPL license.
+    For commercial licensing, contact us at https://itextpdf.com/sales.  For AGPL licensing, see below.
+
+    AGPL licensing:
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+package com.itextpdf.pdfocr.tesseract4;
+
+import com.itextpdf.pdfocr.TextInfo;
+
+/**
+ * Enumeration of the available output formats.
+ * It is used when there is possibility in selected Reader to process input
+ * file and to return result in the required output format.
+ */
+public enum OutputFormat {
+    /**
+     * Reader will produce XHTML output compliant
+     * with the hOCR specification.
+     * Output will be parsed and represented as {@link java.util.List} of
+     * {@link TextInfo} objects
+     */
+    HOCR,
+    /**
+     * Reader will produce plain txt file.
+     */
+    TXT
+}
diff --git a/pdfocr-tesseract4/src/main/java/com/itextpdf/pdfocr/tesseract4/PdfOcrTesseract4ProductInfo.java b/pdfocr-tesseract4/src/main/java/com/itextpdf/pdfocr/tesseract4/PdfOcrTesseract4ProductInfo.java
new file mode 100644
index 0000000..7ae48d2
--- /dev/null
+++ b/pdfocr-tesseract4/src/main/java/com/itextpdf/pdfocr/tesseract4/PdfOcrTesseract4ProductInfo.java
@@ -0,0 +1,38 @@
+/*
+    This file is part of the iText (R) project.
+    Copyright (c) 1998-2020 iText Group NV
+    Authors: iText Software.
+
+    This program is offered under a commercial and under the AGPL license.
+    For commercial licensing, contact us at https://itextpdf.com/sales.  For AGPL licensing, see below.
+
+    AGPL licensing:
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+package com.itextpdf.pdfocr.tesseract4;
+
+/**
+ * Product info about this iText add-on.
+ */
+public class PdfOcrTesseract4ProductInfo {
+
+    /** The product name. */
+    public static final String PRODUCT_NAME = "pdfOcr-Tesseract4";
+
+    /** The major version number. */
+    public static final int MAJOR_VERSION = 1;
+
+    /** The minor version number. */
+    public static final int MINOR_VERSION = 0;
+}
diff --git a/pdfocr-tesseract4/src/main/java/com/itextpdf/pdfocr/tesseract4/ReflectionUtils.java b/pdfocr-tesseract4/src/main/java/com/itextpdf/pdfocr/tesseract4/ReflectionUtils.java
new file mode 100644
index 0000000..07c1c53
--- /dev/null
+++ b/pdfocr-tesseract4/src/main/java/com/itextpdf/pdfocr/tesseract4/ReflectionUtils.java
@@ -0,0 +1,209 @@
+/*
+    This file is part of the iText (R) project.
+    Copyright (c) 1998-2020 iText Group NV
+    Authors: iText Software.
+
+    This program is offered under a commercial and under the AGPL license.
+    For commercial licensing, contact us at https://itextpdf.com/sales.  For AGPL licensing, see below.
+
+    AGPL licensing:
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+package com.itextpdf.pdfocr.tesseract4;
+
+import com.itextpdf.io.util.MessageFormatUtil;
+import com.itextpdf.kernel.Version;
+import com.itextpdf.kernel.counter.ContextManager;
+
+import java.lang.reflect.AccessibleObject;
+import java.lang.reflect.Array;
+import java.lang.reflect.Constructor;
+import java.lang.reflect.Method;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+final class ReflectionUtils {
+
+    private static final Logger logger = LoggerFactory.getLogger(ReflectionUtils.class);
+
+    private static final String KERNEL_PACKAGE = "com.itextpdf.kernel.";
+    private static final String LICENSEKEY_PACKAGE = "com.itextpdf.licensekey.";
+
+    private static final String CONTEXT_MANAGER = "counter.ContextManager";
+    private static final String LICENSEKEY = "LicenseKey";
+    private static final String LICENSEKEY_PRODUCT = "LicenseKeyProduct";
+    private static final String LICENSEKEY_FEATURE = "LicenseKeyProductFeature";
+
+    private static final String REGISTER_GENERIC_CONTEXT = "registerGenericContext";
+    private static final String SCHEDULED_CHECK = "scheduledCheck";
+
+    private static final String NO_PDFOCR_TESSERACT4 = "No license loaded for product pdfOcr-Tesseract4. Please use LicenseKey.loadLicense(...) to load one.";
+
+    private static Map<String, Class<?>> cachedClasses = new HashMap<>();
+    private static Map<MethodSignature, AccessibleObject> cachedMethods = new HashMap<>();
+
+    static {
+        try {
+            ContextManager contextManager = ContextManager.getInstance();
+            callMethod(KERNEL_PACKAGE + CONTEXT_MANAGER, REGISTER_GENERIC_CONTEXT, contextManager,
+                    new Class[] {Collection.class, Collection.class},
+                    Collections.singletonList("com.itextpdf.pdfocr"),
+                    Collections.singletonList("com.itextpdf.pdfocr.tesseract4"));
+            callMethod(KERNEL_PACKAGE + CONTEXT_MANAGER, REGISTER_GENERIC_CONTEXT, contextManager,
+                    new Class[] {Collection.class, Collection.class},
+                    Collections.singletonList("com.itextpdf.pdfocr.tesseract4"),
+                    Collections.singletonList("com.itextpdf.pdfocr.tesseract4"));
+        } catch (Exception e) {
+            logger.error(e.getMessage());
+        }
+    }
+
+    private ReflectionUtils() {
+    }
+
+    public static void scheduledCheck() {
+        try {
+            Class licenseKeyClass = getClass(LICENSEKEY_PACKAGE + LICENSEKEY);
+            Class licenseKeyProductClass = getClass(LICENSEKEY_PACKAGE + LICENSEKEY_PRODUCT);
+            Class licenseKeyProductFeatureClass = getClass(LICENSEKEY_PACKAGE + LICENSEKEY_FEATURE);
+
+            Object licenseKeyProductFeatureArray = Array.newInstance(licenseKeyProductFeatureClass, 0);
+
+            Class[] params = new Class[] {
+                    String.class,
+                    Integer.TYPE,
+                    Integer.TYPE,
+                    licenseKeyProductFeatureArray.getClass()
+            };
+
+            Constructor licenseKeyProductConstructor = licenseKeyProductClass.getConstructor(params);
+
+            Object licenseKeyProductObject = licenseKeyProductConstructor.newInstance(
+                    PdfOcrTesseract4ProductInfo.PRODUCT_NAME,
+                    PdfOcrTesseract4ProductInfo.MAJOR_VERSION,
+                    PdfOcrTesseract4ProductInfo.MINOR_VERSION,
+                    licenseKeyProductFeatureArray
+            );
+
+            Method method = licenseKeyClass.getMethod(SCHEDULED_CHECK, licenseKeyProductClass);
+            method.invoke(null, licenseKeyProductObject);
+        } catch (Exception e) {
+            if (null != e && null != e.getCause()) {
+                String message = e.getCause().getMessage();
+                if (NO_PDFOCR_TESSERACT4.equals(message)) {
+                    throw new RuntimeException(message, e.getCause());
+                }
+            }
+            if (!Version.isAGPLVersion()) {
+                throw new RuntimeException(e.getCause());
+            }
+        }
+    }
+
+    private static Object callMethod(String className, String methodName, Object target, Class[] parameterTypes,
+            Object... args) {
+        try {
+            Method method = findMethod(className, methodName, parameterTypes);
+            return method.invoke(target, args);
+        } catch (NoSuchMethodException e) {
+            logger.warn(MessageFormatUtil.format("Cannot find method {0} for class {1}", methodName, className));
+        } catch (ClassNotFoundException e) {
+            logger.warn(MessageFormatUtil.format("Cannot find class {0}", className));
+        } catch (IllegalArgumentException e) {
+            logger.warn(MessageFormatUtil
+                    .format("Illegal arguments passed to {0}#{1} method call: {2}", className, methodName,
+                            e.getMessage()));
+        } catch (Exception e) {
+            // Converting checked exceptions to unchecked RuntimeException (java-specific comment).
+            //
+            // If kernel utils throws an exception at this point, we consider it as unrecoverable situation for
+            // its callers (pdfOcr methods).
+            // It's might be more suitable to wrap checked exceptions at a bit higher level, but we do it here for
+            // the sake of convenience.
+            throw new RuntimeException(e.toString(), e);
+        }
+        return null;
+    }
+
+    private static Method findMethod(String className, String methodName, Class[] parameterTypes)
+            throws NoSuchMethodException, ClassNotFoundException {
+        MethodSignature tm = new MethodSignature(className, parameterTypes, methodName);
+        Method m = (Method) cachedMethods.get(tm);
+        if (m == null) {
+            m = findClass(className).getDeclaredMethod(methodName, parameterTypes);
+            m.setAccessible(true);
+            cachedMethods.put(tm, m);
+        }
+        return m;
+    }
+
+    private static Class<?> findClass(String className) throws ClassNotFoundException {
+        Class<?> c = cachedClasses.get(className);
+        if (c == null) {
+            c = getClass(className);
+            cachedClasses.put(className, c);
+        }
+        return c;
+    }
+
+    private static Class<?> getClass(String className) throws ClassNotFoundException {
+        return Class.forName(className);
+    }
+
+    private static class MethodSignature {
+        protected final String className;
+        private final String methodName;
+        protected Class[] parameterTypes;
+
+        MethodSignature(String className, Class[] parameterTypes, String methodName) {
+            this.methodName = methodName;
+            this.className = className;
+            this.parameterTypes = parameterTypes;
+        }
+
+        @Override
+        public int hashCode() {
+            int result = className.hashCode();
+            result = 31 * result + Arrays.hashCode(parameterTypes);
+            result = 31 * result + (methodName != null ? methodName.hashCode() : 0);
+            return result;
+        }
+
+        @Override
+        public boolean equals(Object o) {
+            if (this == o) {
+                return true;
+            }
+            if (o == null || getClass() != o.getClass()) {
+                return false;
+            }
+
+            MethodSignature that = (MethodSignature) o;
+
+            if (!className.equals(that.className)) {
+                return false;
+            }
+            if (!Arrays.equals(parameterTypes, that.parameterTypes)) {
+                return false;
+            }
+            return methodName != null ? methodName.equals(that.methodName) : that.methodName == null;
+
+        }
+    }
+}
diff --git a/pdfocr-tesseract4/src/main/java/com/itextpdf/pdfocr/tesseract4/Tesseract4ExecutableOcrEngine.java b/pdfocr-tesseract4/src/main/java/com/itextpdf/pdfocr/tesseract4/Tesseract4ExecutableOcrEngine.java
new file mode 100644
index 0000000..705d6a6
--- /dev/null
+++ b/pdfocr-tesseract4/src/main/java/com/itextpdf/pdfocr/tesseract4/Tesseract4ExecutableOcrEngine.java
@@ -0,0 +1,494 @@
+/*
+    This file is part of the iText (R) project.
+    Copyright (c) 1998-2020 iText Group NV
+    Authors: iText Software.
+
+    This program is offered under a commercial and under the AGPL license.
+    For commercial licensing, contact us at https://itextpdf.com/sales.  For AGPL licensing, see below.
+
+    AGPL licensing:
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+package com.itextpdf.pdfocr.tesseract4;
+
+import com.itextpdf.io.util.MessageFormatUtil;
+
+import java.awt.image.BufferedImage;
+import java.io.File;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.UUID;
+import net.sourceforge.lept4j.Pix;
+import org.slf4j.LoggerFactory;
+
+/**
+ * The implementation of {@link AbstractTesseract4OcrEngine} for tesseract OCR.
+ *
+ * This class provides possibilities to use features of "tesseract" CL tool
+ * (optical character recognition engine for various operating systems).
+ * Please note that it's assumed that "tesseract" has already been
+ * installed locally.
+ */
+public class Tesseract4ExecutableOcrEngine extends AbstractTesseract4OcrEngine {
+
+    /**
+     * Path to the tesseract executable.
+     * By default it's assumed that "tesseract" already exists in the "PATH".
+     */
+    private String pathToExecutable;
+
+    /**
+     * Creates a new {@link Tesseract4ExecutableOcrEngine} instance.
+     *
+     * @param tesseract4OcrEngineProperties set of properties
+     */
+    public Tesseract4ExecutableOcrEngine(
+            final Tesseract4OcrEngineProperties tesseract4OcrEngineProperties) {
+        super(tesseract4OcrEngineProperties);
+        setPathToExecutable("tesseract");
+    }
+
+    /**
+     * Creates a new {@link Tesseract4ExecutableOcrEngine} instance.
+     *
+     * @param executablePath path to tesseract executable
+     * @param tesseract4OcrEngineProperties set of properties
+     */
+    public Tesseract4ExecutableOcrEngine(final String executablePath,
+            final Tesseract4OcrEngineProperties tesseract4OcrEngineProperties) {
+        super(tesseract4OcrEngineProperties);
+        setPathToExecutable(executablePath);
+    }
+
+    /**
+     * Gets path to tesseract executable.
+     *
+     * @return path to tesseract executable
+     */
+    public final String getPathToExecutable() {
+        return pathToExecutable;
+    }
+
+    /**
+     * Sets path to tesseract executable.
+     * By default it's assumed that "tesseract" already exists in the "PATH".
+     *
+     * @param path path to tesseract executable
+     */
+    public final void setPathToExecutable(final String path) {
+        pathToExecutable = path;
+    }
+
+    /**
+     * Performs tesseract OCR using command line tool for the selected page
+     * of input image (by default 1st).
+     *
+     * Please note that list of output files is accepted instead of a single file because
+     * page number parameter is not respected in case of TIFF images not requiring preprocessing.
+     * In other words, if the passed image is the TIFF image and according to the {@link Tesseract4OcrEngineProperties}
+     * no preprocessing is needed, each page of the TIFF image is OCRed and the number of output files in the list
+     * is expected to be same as number of pages in the image, otherwise, only one file is expected
+     *
+     * @param inputImage input image {@link java.io.File}
+     * @param outputFiles {@link java.util.List} of output files
+     *                                          (one per each page)
+     * @param outputFormat selected {@link OutputFormat} for tesseract
+     * @param pageNumber number of page to be processed
+     */
+    void doTesseractOcr(final File inputImage,
+            final List<File> outputFiles, final OutputFormat outputFormat,
+            final int pageNumber) {
+        scheduledCheck();
+        List<String> params = new ArrayList<String>();
+        String execPath = null;
+        String imagePath = null;
+        try {
+            imagePath = inputImage.getAbsolutePath();
+            // path to tesseract executable
+            if (getPathToExecutable() == null
+                    || getPathToExecutable().isEmpty()) {
+                throw new Tesseract4OcrException(
+                        Tesseract4OcrException
+                                .CANNOT_FIND_PATH_TO_TESSERACT_EXECUTABLE);
+            } else {
+                if (isWindows()) {
+                    execPath = addQuotes(getPathToExecutable());
+                } else {
+                    execPath = getPathToExecutable();
+                }
+                params.add(execPath);
+            }
+            checkTesseractInstalled(execPath);
+            // path to tess data
+            addTessData(params);
+
+            // validate languages before preprocessing started
+            validateLanguages(getTesseract4OcrEngineProperties()
+                    .getLanguages());
+
+            // preprocess input file if needed and add it
+            imagePath = preprocessImage(inputImage, pageNumber);
+            addInputFile(params, imagePath);
+            // move to image directory as tesseract cannot parse non ascii
+            // characters in input path
+            List<String> moveToDirectoryParams = moveToImageDirectory(
+                    imagePath);
+            // output file
+            addOutputFile(params, outputFiles.get(0), outputFormat,
+                    imagePath);
+            // page segmentation mode
+            addPageSegMode(params);
+            // add user words if needed
+            addUserWords(params, imagePath);
+            // required languages
+            addLanguages(params);
+            if (outputFormat.equals(OutputFormat.HOCR)) {
+                // path to hocr script
+                setHocrOutput(params);
+            }
+            // set default user defined dpi
+            addDefaultDpi(params);
+            onEvent();
+            TesseractHelper.runCommand(isWindows() ? "cmd" : "bash",
+                    createCommandList(moveToDirectoryParams, params));
+        } catch (Tesseract4OcrException e) {
+            LoggerFactory.getLogger(getClass())
+                    .error(e.getMessage());
+            throw new Tesseract4OcrException(e.getMessage(), e);
+        } finally {
+            try {
+                if (imagePath != null
+                        && !inputImage.getAbsolutePath().equals(imagePath)) {
+                    TesseractHelper.deleteFile(imagePath);
+                }
+            } catch (SecurityException e) {
+                LoggerFactory.getLogger(getClass())
+                        .error(MessageFormatUtil.format(
+                                Tesseract4LogMessageConstant.CANNOT_DELETE_FILE,
+                                imagePath, e.getMessage()));
+            }
+            try {
+                if (getTesseract4OcrEngineProperties()
+                        .getPathToUserWordsFile() != null
+                        && getTesseract4OcrEngineProperties().isUserWordsFileTemporary()) {
+                    TesseractHelper.deleteFile(
+                            getTesseract4OcrEngineProperties()
+                                    .getPathToUserWordsFile());
+                }
+            } catch (SecurityException e) {
+                LoggerFactory.getLogger(getClass())
+                        .error(MessageFormatUtil.format(
+                                Tesseract4LogMessageConstant.CANNOT_DELETE_FILE,
+                                getTesseract4OcrEngineProperties()
+                                        .getPathToUserWordsFile(),
+                                e.getMessage()));
+            }
+        }
+    }
+
+    /**
+     * Creates joint command list of two commands passed as parameters.
+     * @param moveToDirectoryParams first command is responsible for moving
+     *                              to the directory
+     * @param tesseractParams second command is responsible for tesseract
+     *                        parameters
+     * @return joint command list
+     */
+    private List<String> createCommandList(
+            final List<String> moveToDirectoryParams,
+            final List<String> tesseractParams) {
+        // create list of several lists with commands
+        List<String> params = new ArrayList<String>();
+        params.add(isWindows() ? "/c": "-c");
+        params.add(isWindows() ? "\"" : "'");
+        for (String p : moveToDirectoryParams) {
+            params.add(p);
+        }
+        params.add("&&");
+        for (String p : tesseractParams) {
+            params.add(p);
+        }
+        params.add(isWindows() ? "\"" : "'");
+        return params;
+    }
+
+    /**
+     * Create list of parameters for command moving to the image parent
+     * directory.
+     * @param imagePath path to input image
+     * @return command list
+     */
+    private List<String> moveToImageDirectory(final String imagePath) {
+        // go the image parent directory
+        List<String> params = new ArrayList<String>();
+        String parent = TesseractOcrUtil.getParentDirectory(imagePath);
+        String replacement = isWindows() ? "" : "/";
+        parent = parent.replace("file:///", replacement)
+                .replace("file:/", replacement);
+
+        // Use "/d" parameter to handle cases when the current directory on Windows
+        // is located on a different drive compared to the directory we move to
+        if (isWindows()) {
+            params.add("cd /d");
+        } else {
+            params.add("cd");
+        }
+        params.add(addQuotes(parent));
+        return params;
+    }
+
+    /**
+     * Sets hocr output format.
+     *
+     * @param command result command as list of strings
+     */
+    private void setHocrOutput(final List<String> command) {
+        command.add("-c");
+        command.add("tessedit_create_hocr=1");
+    }
+
+    /**
+     * Add path to user-words file for tesseract executable.
+     *
+     * @param command result command as list of strings
+     */
+    private void addUserWords(final List<String> command,
+            final String imgPath) {
+        if (getTesseract4OcrEngineProperties().getPathToUserWordsFile() != null
+                && !getTesseract4OcrEngineProperties()
+                .getPathToUserWordsFile().isEmpty()) {
+            File userWordsFile = new File(getTesseract4OcrEngineProperties()
+                    .getPathToUserWordsFile());
+            // Workaround for a non-ASCII characters in path
+            // Currently works only if the user words (or output files) reside in the same directory as the input image
+            // Leaves only a filename in this case, otherwise - absolute path to output file
+            String filePath = areEqualParentDirectories(imgPath,
+                    userWordsFile.getAbsolutePath())
+                    ? userWordsFile.getName()
+                    : userWordsFile.getAbsolutePath();
+
+            command.add("--user-words");
+            command.add(addQuotes(filePath));
+            command.add("--oem");
+            command.add("0");
+        }
+    }
+
+    /**
+     * Set default DPI for image.
+     *
+     * @param command result command as list of strings
+     */
+    private void addDefaultDpi(final List<String> command) {
+        command.add("-c");
+        command.add("user_defined_dpi=300");
+    }
+
+    /**
+     * Adds path to tess data to the command list.
+     *
+     * @param command result command as list of strings
+     */
+    private void addTessData(final List<String> command) {
+        command.add("--tessdata-dir");
+        command.add(addQuotes(getTessData()));
+    }
+
+    /**
+     * Adds selected Page Segmentation Mode as parameter.
+     *
+     * @param command result command as list of strings
+     */
+    private void addPageSegMode(final List<String> command) {
+        if (getTesseract4OcrEngineProperties().getPageSegMode() != null) {
+            command.add("-c");
+            command.add("tessedit_pageseg_mode=" + getTesseract4OcrEngineProperties().getPageSegMode());
+        }
+    }
+
+    /**
+     * Add list of selected languages concatenated to a string as parameter.
+     *
+     * @param command result command as list of strings
+     */
+    private void addLanguages(final List<String> command) {
+        if (getTesseract4OcrEngineProperties().getLanguages().size() > 0) {
+            command.add("-l");
+            command.add(getLanguagesAsString());
+        }
+    }
+
+    /**
+     * Adds path to the input image file.
+     *
+     * @param command result command as list of strings
+     * @param imagePath path to the input image file as string
+     */
+    private void addInputFile(final List<String> command,
+            final String imagePath) {
+        command.add(addQuotes(new File(imagePath).getName()));
+    }
+
+    /**
+     * Adds path to temporary output file with result.
+     *
+     * @param command result command as list of strings
+     * @param outputFile output file with result
+     * @param outputFormat selected {@link OutputFormat} for tesseract
+     */
+    private void addOutputFile(final List<String> command,
+            final File outputFile, final OutputFormat outputFormat,
+            final String inputImagePath) {
+        String extension = outputFormat.equals(OutputFormat.HOCR)
+                ? ".hocr" : ".txt";
+        try {
+            // Workaround for a non-ASCII characters in path
+            // Currently works only if the user words (or output files) reside in the same directory as the input image
+            // Leaves only a filename in this case, otherwise - absolute path to output file
+            String filePath = areEqualParentDirectories(inputImagePath,
+                    outputFile.getAbsolutePath())
+                    ? outputFile.getName()
+                    : outputFile.getAbsolutePath();
+            String fileName = new String(
+                    filePath.toCharArray(), 0,
+                    filePath.indexOf(extension));
+            LoggerFactory.getLogger(getClass()).info(
+                    MessageFormatUtil.format(
+                            Tesseract4LogMessageConstant.CREATED_TEMPORARY_FILE,
+                            outputFile.getAbsolutePath()));
+            command.add(addQuotes(fileName));
+        } catch (Exception e) { // NOSONAR
+            throw new Tesseract4OcrException(Tesseract4OcrException
+                    .TESSERACT_FAILED);
+        }
+    }
+
+    /**
+     * Surrounds given string with quotes.
+     *
+     * @param value string to be wrapped into quotes
+     * @return wrapped string
+     */
+    private String addQuotes(final String value) {
+        // choosing correct quotes for system
+        if (isWindows()) {
+            return "\"" + value + "\"";
+        } else {
+            return "'" + value + "'";
+        }
+    }
+
+    /**
+     * Preprocess given image if it is needed.
+     *
+     * @param inputImage original input image {@link java.io.File}
+     * @param pageNumber number of page to be OCRed
+     * @return path to output image as {@link java.lang.String}
+     * @throws Tesseract4OcrException if preprocessing cannot be done or file
+     * is invalid
+     */
+    private String preprocessImage(final File inputImage,
+            final int pageNumber) throws Tesseract4OcrException {
+        String tmpFileName = TesseractOcrUtil
+                .getTempFilePath(UUID.randomUUID().toString(),
+                        getExtension(inputImage));
+        String path = inputImage.getAbsolutePath();
+        try {
+            if (getTesseract4OcrEngineProperties().isPreprocessingImages()) {
+                Pix pix = ImagePreprocessingUtil
+                        .preprocessImage(inputImage, pageNumber);
+                TesseractOcrUtil.savePixToTempPngFile(tmpFileName, pix);
+                if (!Files.exists(Paths.get(tmpFileName))) {
+                    BufferedImage img = TesseractOcrUtil.convertPixToImage(pix);
+                    if (img != null) {
+                        TesseractOcrUtil.saveImageToTempPngFile(tmpFileName,
+                                img);
+                    }
+                }
+            }
+            if (!getTesseract4OcrEngineProperties().isPreprocessingImages()
+                    || !Files.exists(Paths.get(tmpFileName))) {
+                TesseractOcrUtil.createTempFileCopy(path, tmpFileName);
+            }
+            if (Files.exists(Paths.get(tmpFileName))) {
+                path = tmpFileName;
+            }
+        } catch (IOException e) {
+            LoggerFactory.getLogger(getClass())
+                    .error(MessageFormatUtil.format(
+                            Tesseract4LogMessageConstant
+                                    .CANNOT_READ_INPUT_IMAGE,
+                            e.getMessage()));
+        }
+        return path;
+    }
+
+    /**
+     * Check whether tesseract executable is installed on the machine and
+     * provided path to tesseract executable is correct.
+     * @param execPath path to tesseract executable
+     * @throws Tesseract4OcrException if tesseract is not installed or
+     * provided path to tesseract executable is incorrect,
+     * i.e. running "{@link #getPathToExecutable()} --version" command failed.
+     */
+    private void checkTesseractInstalled(String execPath)
+            throws Tesseract4OcrException {
+        try {
+            TesseractHelper.runCommand(execPath,
+                    Collections.<String>singletonList("--version"));
+        } catch (Tesseract4OcrException e) {
+            throw new Tesseract4OcrException(
+                    Tesseract4OcrException.TESSERACT_NOT_FOUND, e);
+        }
+    }
+
+    /**
+     * Gets input image file extension.
+     *
+     * @param inputImage input  file
+     * @return file extension as a {@link java.lang.String}
+     */
+    private String getExtension(File inputImage) {
+        if (inputImage != null) {
+            int index = inputImage.getAbsolutePath().lastIndexOf('.');
+            if (index > 0) {
+                String extension = new String(
+                        inputImage.getAbsolutePath().toCharArray(), index,
+                        inputImage.getAbsolutePath().length() - index);
+                return extension.toLowerCase();
+            }
+        }
+        return ".png";
+    }
+
+    /**
+     * Checks whether parent directories are equal for the passed file paths.
+     *
+     * @param firstPath path to the first file
+     * @param secondPath path to the second file
+     * @return true if parent directories are equal, otherwise - false
+     */
+    private boolean areEqualParentDirectories(final String firstPath,
+            final String secondPath) {
+        String firstParentDir = TesseractOcrUtil.getParentDirectory(firstPath);
+        String secondParentDir = TesseractOcrUtil
+                .getParentDirectory(secondPath);
+        return firstParentDir != null
+                && firstParentDir.equals(secondParentDir);
+    }
+}
diff --git a/pdfocr-tesseract4/src/main/java/com/itextpdf/pdfocr/tesseract4/Tesseract4LibOcrEngine.java b/pdfocr-tesseract4/src/main/java/com/itextpdf/pdfocr/tesseract4/Tesseract4LibOcrEngine.java
new file mode 100644
index 0000000..1c90c9c
--- /dev/null
+++ b/pdfocr-tesseract4/src/main/java/com/itextpdf/pdfocr/tesseract4/Tesseract4LibOcrEngine.java
@@ -0,0 +1,299 @@
+/*
+    This file is part of the iText (R) project.
+    Copyright (c) 1998-2020 iText Group NV
+    Authors: iText Software.
+
+    This program is offered under a commercial and under the AGPL license.
+    For commercial licensing, contact us at https://itextpdf.com/sales.  For AGPL licensing, see below.
+
+    AGPL licensing:
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+package com.itextpdf.pdfocr.tesseract4;
+
+import com.itextpdf.io.util.MessageFormatUtil;
+
+import java.awt.image.BufferedImage;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStreamWriter;
+import java.io.Writer;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.List;
+import net.sourceforge.tess4j.ITesseract;
+import net.sourceforge.tess4j.TesseractException;
+import org.slf4j.LoggerFactory;
+
+/**
+ * The implementation of {@link AbstractTesseract4OcrEngine} for tesseract OCR.
+ *
+ * This class provides possibilities to use features of "tesseract"
+ * using tess4j.
+ *
+ * Please note that this class is not thread-safe, in other words this Tesseract engine cannot
+ * be used for multithreaded processing. You should create one instance per thread
+ */
+public class Tesseract4LibOcrEngine extends AbstractTesseract4OcrEngine {
+
+    /**
+     * {@link net.sourceforge.tess4j.ITesseract} Instance.
+     * (depends on OS type)
+     */
+    private ITesseract tesseractInstance = null;
+
+    /**
+     * Creates a new {@link Tesseract4LibOcrEngine} instance.
+     *
+     * @param tesseract4OcrEngineProperties set of properteis
+     */
+    public Tesseract4LibOcrEngine(
+            final Tesseract4OcrEngineProperties tesseract4OcrEngineProperties) {
+        super(tesseract4OcrEngineProperties);
+        tesseractInstance = TesseractOcrUtil
+                .initializeTesseractInstance(isWindows(), null,
+                        null, null);
+    }
+
+    /**
+     * Gets tesseract instance.
+     *
+     * @return initialized {@link net.sourceforge.tess4j.ITesseract} instance
+     */
+    public ITesseract getTesseractInstance() {
+        return tesseractInstance;
+    }
+
+    /**
+     * Initializes instance of tesseract if it haven't been already
+     * initialized or it have been disposed and sets all the required
+     * properties.
+     *
+     * @param outputFormat selected {@link OutputFormat} for tesseract
+     */
+    public void initializeTesseract(final OutputFormat outputFormat) {
+        if (getTesseractInstance() == null
+                || TesseractOcrUtil
+                .isTesseractInstanceDisposed(getTesseractInstance())) {
+            tesseractInstance = TesseractOcrUtil
+                    .initializeTesseractInstance(isWindows(), getTessData(),
+                            getLanguagesAsString(),
+                            getTesseract4OcrEngineProperties()
+                                    .getPathToUserWordsFile());
+        }
+        getTesseractInstance()
+                .setTessVariable("tessedit_create_hocr",
+                        outputFormat.equals(OutputFormat.HOCR) ? "1" : "0");
+        getTesseractInstance().setTessVariable("user_defined_dpi", "300");
+        if (getTesseract4OcrEngineProperties()
+                .getPathToUserWordsFile() != null) {
+            getTesseractInstance()
+                    .setTessVariable("load_system_dawg", "0");
+            getTesseractInstance()
+                    .setTessVariable("load_freq_dawg", "0");
+            getTesseractInstance()
+                    .setTessVariable("user_words_suffix",
+                            getTesseract4OcrEngineProperties()
+                                    .getDefaultUserWordsSuffix());
+            getTesseractInstance()
+                    .setTessVariable("user_words_file",
+                            getTesseract4OcrEngineProperties()
+                                    .getPathToUserWordsFile());
+        }
+
+        TesseractOcrUtil.setTesseractProperties(getTesseractInstance(),
+                getTessData(), getLanguagesAsString(),
+                getTesseract4OcrEngineProperties().getPageSegMode(),
+                getTesseract4OcrEngineProperties().getPathToUserWordsFile());
+    }
+
+    /**
+     * Performs tesseract OCR using wrapper for Tesseract OCR API for the selected page
+     * of input image (by default 1st).
+     *
+     * Please note that list of output files is accepted instead of a single file because
+     * page number parameter is not respected in case of TIFF images not requiring preprocessing.
+     * In other words, if the passed image is the TIFF image and according to the {@link Tesseract4OcrEngineProperties}
+     * no preprocessing is needed, each page of the TIFF image is OCRed and the number of output files in the list
+     * is expected to be same as number of pages in the image, otherwise, only one file is expected
+     *
+     * @param inputImage input image {@link java.io.File}
+     * @param outputFiles {@link java.util.List} of output files
+     *                                          (one per each page)
+     * @param outputFormat selected {@link OutputFormat} for tesseract
+     * @param pageNumber number of page to be processed
+     */
+    void doTesseractOcr(final File inputImage,
+            final List<File> outputFiles, final OutputFormat outputFormat,
+            final int pageNumber) {
+        scheduledCheck();
+        try {
+            validateLanguages(getTesseract4OcrEngineProperties()
+                    .getLanguages());
+            initializeTesseract(outputFormat);
+            onEvent();
+            // if preprocessing is not needed and provided image is tiff,
+            // the image will be paginated and separate pages will be OCRed
+            List<String> resultList = new ArrayList<String>();
+            if (!getTesseract4OcrEngineProperties().isPreprocessingImages()
+                    && ImagePreprocessingUtil.isTiffImage(inputImage)) {
+                resultList = getOcrResultForMultiPage(inputImage,
+                        outputFormat);
+            } else {
+                resultList.add(getOcrResultForSinglePage(inputImage,
+                        outputFormat, pageNumber));
+            }
+
+            // list of result strings is written to separate files
+            // (one for each page)
+            for (int i = 0; i < resultList.size(); i++) {
+                String result = resultList.get(i);
+                File outputFile = i >= outputFiles.size()
+                        ? null : outputFiles.get(i);
+                if (result != null && outputFile != null) {
+                    try (Writer writer = new OutputStreamWriter(
+                            new FileOutputStream(outputFile.getAbsolutePath()),
+                            StandardCharsets.UTF_8)) {
+                        writer.write(result);
+                    } catch (IOException e) {
+                        LoggerFactory.getLogger(getClass()).error(
+                                MessageFormatUtil.format(
+                                        Tesseract4LogMessageConstant
+                                                .CANNOT_WRITE_TO_FILE,
+                                        e.getMessage()));
+                        throw new Tesseract4OcrException(
+                                Tesseract4OcrException.TESSERACT_FAILED);
+                    }
+                }
+            }
+        } catch (Tesseract4OcrException e) {
+            LoggerFactory.getLogger(getClass())
+                    .error(e.getMessage());
+            throw new Tesseract4OcrException(e.getMessage(), e);
+        } finally {
+            if (tesseractInstance != null) {
+                TesseractOcrUtil.disposeTesseractInstance(tesseractInstance);
+            }
+            if (getTesseract4OcrEngineProperties().getPathToUserWordsFile()
+                    != null
+                    && getTesseract4OcrEngineProperties().isUserWordsFileTemporary()) {
+                TesseractHelper.deleteFile(
+                        getTesseract4OcrEngineProperties()
+                                .getPathToUserWordsFile());
+            }
+        }
+    }
+
+    /**
+     * Gets OCR result from provided multi-page image and returns result as
+     * list of strings for each page. This method is used for tiff images
+     * when preprocessing is not needed.
+     *
+     * @param inputImage input image {@link java.io.File}
+     * @param outputFormat selected {@link OutputFormat} for tesseract
+     * @return list of result string that will be written to a temporary files
+     * later
+     */
+    private List<String> getOcrResultForMultiPage(final File inputImage,
+            final OutputFormat outputFormat) {
+        List<String> resultList = new ArrayList<String>();
+        try {
+            initializeTesseract(outputFormat);
+            TesseractOcrUtil util = new TesseractOcrUtil();
+            util.initializeImagesListFromTiff(inputImage);
+            int numOfPages = util.getListOfPages().size();
+            for (int i = 0; i < numOfPages; i++) {
+                String result = util.getOcrResultAsString(
+                        getTesseractInstance(),
+                        util.getListOfPages().get(i),
+                        outputFormat);
+                resultList.add(result);
+            }
+        } catch (TesseractException e) {
+            String msg = MessageFormatUtil
+                    .format(Tesseract4LogMessageConstant.TESSERACT_FAILED,
+                            e.getMessage());
+            LoggerFactory.getLogger(getClass())
+                    .error(msg);
+            throw new Tesseract4OcrException(
+                    Tesseract4OcrException
+                            .TESSERACT_FAILED);
+        } finally {
+            TesseractOcrUtil
+                    .disposeTesseractInstance(getTesseractInstance());
+        }
+        return resultList;
+    }
+
+    /**
+     * Gets OCR result from provided single page image and preprocesses it if
+     * it is needed.
+     *
+     * @param inputImage input image {@link java.io.File}
+     * @param outputFormat selected {@link OutputFormat} for tesseract
+     * @param pageNumber number of page to be OCRed
+     * @return result as string that will be written to a temporary file later
+     */
+    private String getOcrResultForSinglePage(final File inputImage,
+            final OutputFormat outputFormat,
+            final int pageNumber) {
+        String result = null;
+        try {
+            // preprocess if required
+            if (getTesseract4OcrEngineProperties().isPreprocessingImages()) {
+                // preprocess and try to ocr
+                result = new TesseractOcrUtil().getOcrResultAsString(
+                        getTesseractInstance(),
+                        ImagePreprocessingUtil
+                                .preprocessImage(inputImage, pageNumber),
+                        outputFormat);
+            }
+            if (result == null) {
+                BufferedImage bufferedImage = ImagePreprocessingUtil
+                        .readImage(inputImage);
+                if (bufferedImage != null) {
+                    try {
+                        result = new TesseractOcrUtil()
+                                .getOcrResultAsString(getTesseractInstance(),
+                                        bufferedImage, outputFormat);
+                    } catch (Exception e) { // NOSONAR
+                        LoggerFactory.getLogger(getClass())
+                                .info(MessageFormatUtil.format(
+                                        Tesseract4LogMessageConstant
+                                                .CANNOT_PROCESS_IMAGE,
+                                        e.getMessage()));
+                    }
+                }
+                if (result == null) {
+                    // perform ocr using original input image
+                    result = new TesseractOcrUtil()
+                            .getOcrResultAsString(getTesseractInstance(),
+                                    inputImage, outputFormat);
+                }
+            }
+        } catch (Exception e) { // NOSONAR
+            LoggerFactory.getLogger(getClass())
+                    .error(MessageFormatUtil
+                            .format(Tesseract4LogMessageConstant
+                                            .TESSERACT_FAILED,
+                                    e.getMessage()));
+            throw new Tesseract4OcrException(
+                    Tesseract4OcrException
+                            .TESSERACT_FAILED);
+        }
+
+        return result;
+    }
+}
diff --git a/pdfocr-tesseract4/src/main/java/com/itextpdf/pdfocr/tesseract4/Tesseract4LogMessageConstant.java b/pdfocr-tesseract4/src/main/java/com/itextpdf/pdfocr/tesseract4/Tesseract4LogMessageConstant.java
new file mode 100644
index 0000000..90bf76b
--- /dev/null
+++ b/pdfocr-tesseract4/src/main/java/com/itextpdf/pdfocr/tesseract4/Tesseract4LogMessageConstant.java
@@ -0,0 +1,65 @@
+/*
+    This file is part of the iText (R) project.
+    Copyright (c) 1998-2020 iText Group NV
+    Authors: iText Software.
+
+    This program is offered under a commercial and under the AGPL license.
+    For commercial licensing, contact us at https://itextpdf.com/sales.  For AGPL licensing, see below.
+
+    AGPL licensing:
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+package com.itextpdf.pdfocr.tesseract4;
+
+public class Tesseract4LogMessageConstant {
+    public static final String TESSERACT_FAILED =
+            "Tesseract failed: {0}";
+    public static final String COMMAND_FAILED =
+            "Command failed: {0}";
+    public static final String CANNOT_READ_FILE =
+            "Cannot read file {0}: {1}";
+    public static final String CANNOT_OCR_INPUT_FILE =
+            "Cannot ocr input file: {1}";
+    public static final String CANNOT_USE_USER_WORDS =
+            "Cannot use custom user words: {0}";
+    public static final String CANNOT_RETRIEVE_PAGES_FROM_IMAGE =
+            "Cannot get pages from image {0}: {1}";
+    public static final String PAGE_NUMBER_IS_INCORRECT =
+            "Provided number of page ({0}) is incorrect for {1}";
+    public static final String CANNOT_DELETE_FILE =
+            "File {0} cannot be deleted: {1}";
+    public static final String CANNOT_PROCESS_IMAGE = "Cannot process "
+            + "image: {0}";
+    public static final String CANNOT_WRITE_TO_FILE =
+            "Cannot write to file {0}: {1}";
+    public static final String CREATED_TEMPORARY_FILE =
+            "Created temp file {0}";
+    public static final String CANNOT_CONVERT_IMAGE_TO_GRAYSCALE =
+            "Cannot convert to gray image with depth {0}";
+    public static final String CANNOT_BINARIZE_IMAGE =
+            "Cannot binarize image with depth {0}";
+    public static final String CANNOT_CREATE_BUFFERED_IMAGE =
+            "Cannot create a buffered image from the input image: {0}";
+    public static final String START_OCR_FOR_IMAGES =
+            "Starting ocr for {0} image(s)";
+    public static final String CANNOT_READ_INPUT_IMAGE =
+            "Cannot read input image {0}";
+    public static final String CANNOT_GET_TEMPORARY_DIRECTORY = "Cannot get "
+            + "temporary directory: {0}";
+    public static final String CANNOT_CONVERT_IMAGE_TO_PIX =
+            "Cannot convert image to pix: {0}";
+
+    private Tesseract4LogMessageConstant() {
+    }
+}
diff --git a/pdfocr-tesseract4/src/main/java/com/itextpdf/pdfocr/tesseract4/Tesseract4OcrEngineProperties.java b/pdfocr-tesseract4/src/main/java/com/itextpdf/pdfocr/tesseract4/Tesseract4OcrEngineProperties.java
new file mode 100644
index 0000000..de24193
--- /dev/null
+++ b/pdfocr-tesseract4/src/main/java/com/itextpdf/pdfocr/tesseract4/Tesseract4OcrEngineProperties.java
@@ -0,0 +1,390 @@
+/*
+    This file is part of the iText (R) project.
+    Copyright (c) 1998-2020 iText Group NV
+    Authors: iText Software.
+
+    This program is offered under a commercial and under the AGPL license.
+    For commercial licensing, contact us at https://itextpdf.com/sales.  For AGPL licensing, see below.
+
+    AGPL licensing:
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+package com.itextpdf.pdfocr.tesseract4;
+
+import com.itextpdf.io.util.FileUtil;
+import com.itextpdf.io.util.MessageFormatUtil;
+import com.itextpdf.pdfocr.IOcrEngine;
+import com.itextpdf.pdfocr.OcrEngineProperties;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.OutputStreamWriter;
+import java.io.Reader;
+import java.nio.charset.StandardCharsets;
+import java.util.List;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Properties that will be used by the {@link IOcrEngine}.
+ */
+public class Tesseract4OcrEngineProperties extends OcrEngineProperties {
+
+    /**
+     * Default suffix for user-word file.
+     * (e.g. name: 'eng.user-words')
+     */
+    static final String DEFAULT_USER_WORDS_SUFFIX = "user-words";
+
+    /**
+     * Default language for OCR.
+     */
+    private static final String DEFAULT_LANGUAGE = "eng";
+
+    /**
+     * Path to directory with tess data.
+     */
+    private File tessDataDir;
+
+    /**
+     * Page Segmentation Mode.
+     */
+    private Integer pageSegMode = 3;
+
+    /**
+     * "True" - if images need to be preprocessed, otherwise - false.
+     * True by default.
+     */
+    private boolean preprocessingImages = true;
+
+    /**
+     * Defines the way text is retrieved from tesseract output.
+     * Default text positioning is by lines.
+     */
+    private TextPositioning textPositioning = TextPositioning.BY_LINES;
+
+    /**
+     * Path to the file containing user words.
+     * Each word should be on a new line,
+     * file should end with a newline character.
+     */
+    private String pathToUserWordsFile = null;
+
+    /**
+     * Indicates if user words file is temporary and has to be removed.
+     */
+    private boolean isUserWordsFileTemporary = false;
+
+    /**
+     * Creates a new {@link Tesseract4OcrEngineProperties} instance.
+     */
+    public Tesseract4OcrEngineProperties() {
+    }
+
+    /**
+     * Creates a new {@link Tesseract4OcrEngineProperties} instance
+     * based on another {@link Tesseract4OcrEngineProperties} instance (copy
+     * constructor).
+     *
+     * @param other the other {@link Tesseract4OcrEngineProperties} instance
+     */
+    public Tesseract4OcrEngineProperties(Tesseract4OcrEngineProperties other) {
+        super(other);
+        this.tessDataDir = other.tessDataDir;
+        this.pageSegMode = other.pageSegMode;
+        this.preprocessingImages = other.preprocessingImages;
+        this.textPositioning = other.textPositioning;
+        this.pathToUserWordsFile = other.pathToUserWordsFile;
+    }
+
+    /**
+     * Gets default language for ocr.
+     *
+     * @return default language - "eng"
+     */
+    public final String getDefaultLanguage() {
+        return DEFAULT_LANGUAGE;
+    }
+
+    /**
+     * Gets default user words suffix.
+     *
+     * @return default suffix for user words files
+     */
+    public final String getDefaultUserWordsSuffix() {
+        return DEFAULT_USER_WORDS_SUFFIX;
+    }
+
+    /**
+     * Gets path to directory with tess data.
+     *
+     * @return path to directory with tess data
+     */
+    public final File getPathToTessData() {
+        return tessDataDir;
+    }
+
+    /**
+     * Sets path to directory with tess data.
+     *
+     * @param tessData path to train directory as {@link java.io.File}
+     * @return the {@link Tesseract4OcrEngineProperties} instance
+     * @throws Tesseract4OcrException if path to tess data directory is
+     * null or empty, or provided directory does not exist? or it is not
+     * a directory
+     */
+    public final Tesseract4OcrEngineProperties setPathToTessData(
+            final File tessData) {
+        if (tessData == null
+                || !FileUtil.directoryExists(tessData.getAbsolutePath())) {
+            throw new Tesseract4OcrException(
+                    Tesseract4OcrException
+                            .PATH_TO_TESS_DATA_DIRECTORY_IS_INVALID);
+        }
+        this.tessDataDir = tessData;
+
+        return this;
+    }
+
+    /**
+     * Gets Page Segmentation Mode.
+     *
+     * @return psm mode as {@link java.lang.Integer}
+     */
+    public final Integer getPageSegMode() {
+        return pageSegMode;
+    }
+
+    /**
+     * Sets Page Segmentation Mode.
+     * More detailed explanation about psm modes could be found
+     * here https://github.com/tesseract-ocr/tesseract/blob/master/doc/tesseract.1.asc#options
+     * Note that in documentation it is stated that default value of PSM is 3.
+     * This is true for tesseract executable,
+     * but for tesseract lib it is -1 which has negative impact on some documents.
+     * That's why in the code we set it explicitly to 3.
+     *
+     * @param mode psm mode as {@link java.lang.Integer}
+     * @return the {@link Tesseract4OcrEngineProperties} instance
+     */
+    public final Tesseract4OcrEngineProperties setPageSegMode(
+            final Integer mode) {
+        pageSegMode = mode;
+        return this;
+    }
+
+    /**
+     * Checks whether image preprocessing is needed.
+     *
+     * @return true if images need to be preprocessed, otherwise - false
+     */
+    public final boolean isPreprocessingImages() {
+        return preprocessingImages;
+    }
+
+    /**
+     * Sets true if image preprocessing is needed.
+     *
+     * @param preprocess true if images need to be preprocessed,
+     *                   otherwise - false
+     * @return the {@link Tesseract4OcrEngineProperties} instance
+     */
+    public final Tesseract4OcrEngineProperties setPreprocessingImages(
+            final boolean preprocess) {
+        preprocessingImages = preprocess;
+        return this;
+    }
+
+    /**
+     * Defines the way text is retrieved from tesseract output using
+     * {@link TextPositioning}.
+     *
+     * @return the way text is retrieved
+     */
+    public final TextPositioning getTextPositioning() {
+        return textPositioning;
+    }
+
+    /**
+     * Defines the way text is retrieved from tesseract output
+     * using {@link TextPositioning}.
+     *
+     * @param positioning the way text is retrieved
+     * @return the {@link Tesseract4OcrEngineProperties} instance
+     */
+    public final Tesseract4OcrEngineProperties setTextPositioning(
+            final TextPositioning positioning) {
+        textPositioning = positioning;
+        return this;
+    }
+
+    /**
+     * Using provided list of words there will be created
+     * temporary file containing words (one per line) which
+     * ends with a new line character. Train data for provided language
+     * should exist in specified tess data directory.
+     *
+     * NOTE:
+     * User words dictionary doesn't work properly in tesseract4
+     * and hidden for public usage until fix is available
+     *
+     * @param language language as {@link java.lang.String}, tessdata for
+     *                 this languages has to exist in tess data directory
+     * @param userWords {@link java.util.List} of custom words
+     * @return the {@link Tesseract4OcrEngineProperties} instance
+     * @throws Tesseract4OcrException if one of given languages wasn't specified in the
+     * list of required languages for OCR using
+     */
+    Tesseract4OcrEngineProperties setUserWords(final String language,
+            final List<String> userWords)
+            throws Tesseract4OcrException {
+        setPathToUserWordsFile(null);
+        if (userWords != null && userWords.size() > 0) {
+            try {
+                ByteArrayOutputStream baos = new ByteArrayOutputStream();
+                for (String word : userWords) {
+                    byte[] bytesWord = word.getBytes();
+                    baos.write(bytesWord, 0, bytesWord.length);
+                    byte[] bytesSeparator = System.lineSeparator()
+                            .getBytes();
+                    baos.write(bytesSeparator, 0, bytesSeparator.length);
+                }
+                InputStream inputStream = new ByteArrayInputStream(
+                        baos.toByteArray());
+                baos.close();
+                setUserWords(language, inputStream);
+            } catch (IOException e) {
+                LoggerFactory.getLogger(getClass())
+                        .warn(MessageFormatUtil.format(
+                                Tesseract4LogMessageConstant.CANNOT_USE_USER_WORDS,
+                                e.getMessage()));
+            }
+        }
+        return this;
+    }
+
+    /**
+     * Using provided input stream there will be created
+     * temporary file (with name 'language.user-words')
+     * containing words (one per line) which ends with
+     * a new line character. Train data for provided language
+     * should exist in specified tess data directory.
+     *
+     * NOTE:
+     * User words dictionary doesn't work properly in tesseract4
+     * and hidden for public usage until fix is available
+     *
+     * @param language language as {@link java.lang.String}, tessdata for
+     *                 this languages has to exist in tess data directory
+     * @param inputStream custom user words as {@link java.io.InputStream}
+     * @throws Tesseract4OcrException if one of given languages wasn't specified
+     * in the list of required languages for OCR using
+     * {@link Tesseract4OcrEngineProperties#setLanguages(List)} method
+     * @return the {@link Tesseract4OcrEngineProperties} instance
+     */
+    Tesseract4OcrEngineProperties setUserWords(final String language,
+            final InputStream inputStream) throws Tesseract4OcrException {
+        setPathToUserWordsFile(null);
+        if (!getLanguages().contains(language)) {
+            if (DEFAULT_LANGUAGE.equals(language.toLowerCase())) {
+                List<String> languagesList = getLanguages();
+                languagesList.add(language);
+                setLanguages(languagesList);
+            } else {
+                throw new Tesseract4OcrException(
+                        Tesseract4OcrException.LANGUAGE_IS_NOT_IN_THE_LIST)
+                        .setMessageParams(language);
+            }
+        }
+        String userWordsFileName = TesseractOcrUtil.getTempFilePath(language,
+                "." + DEFAULT_USER_WORDS_SUFFIX);
+        try (OutputStreamWriter writer =
+                new FileWriter(userWordsFileName)) {
+            Reader reader = new InputStreamReader(inputStream,
+                    StandardCharsets.UTF_8);
+            int data;
+            while ((data = reader.read()) != -1) {
+                writer.write(data);
+            }
+            writer.write(System.lineSeparator());
+            setPathToUserWordsFile(userWordsFileName, true);
+        } catch (IOException e) {
+            setPathToUserWordsFile(null);
+            LoggerFactory.getLogger(getClass())
+                    .warn(MessageFormatUtil.format(
+                            Tesseract4LogMessageConstant.CANNOT_USE_USER_WORDS,
+                            e.getMessage()));
+        }
+        return this;
+    }
+
+    /**
+     * Returns path to the user words file.
+     *
+     * NOTE:
+     * User words dictionary doesn't work properly in tesseract4
+     * and hidden for public usage until fix is available
+     *
+     * @return path to user words file as {@link java.lang.String} if it
+     * exists, otherwise - null
+     */
+    final String getPathToUserWordsFile() {
+        return pathToUserWordsFile;
+    }
+
+    /**
+     * Sets path to the user words file.
+     *
+     * NOTE:
+     * User words dictionary doesn't work properly in tesseract4
+     * and hidden for public usage until fix is available
+     *
+     * @param pathToUserWordsFile path to user words file
+     *                        as {@link java.lang.String}
+     * @return the {@link Tesseract4OcrEngineProperties} instance
+     */
+    final Tesseract4OcrEngineProperties setPathToUserWordsFile(
+            String pathToUserWordsFile) {
+        return setPathToUserWordsFile(pathToUserWordsFile, false);
+    }
+
+    /**
+     * Sets path to the user words file.
+     *
+     * @param pathToUserWordsFile path to user words file
+     *                        as {@link java.lang.String}
+     * @param isTempFile indicates if user words file is temporary and has to be removed
+     * @return the {@link Tesseract4OcrEngineProperties} instance
+     */
+    final Tesseract4OcrEngineProperties setPathToUserWordsFile(
+            String pathToUserWordsFile, boolean isTempFile) {
+        this.pathToUserWordsFile = pathToUserWordsFile;
+        this.isUserWordsFileTemporary = isTempFile;
+        return this;
+    }
+
+    /**
+     * Indicates if user words file is temporary and has to be removed.
+     *
+     * @return true if the file is temporary, otherwise false.
+     */
+    final boolean isUserWordsFileTemporary() {
+        return isUserWordsFileTemporary;
+    }
+
+}
diff --git a/pdfocr-tesseract4/src/main/java/com/itextpdf/pdfocr/tesseract4/Tesseract4OcrException.java b/pdfocr-tesseract4/src/main/java/com/itextpdf/pdfocr/tesseract4/Tesseract4OcrException.java
new file mode 100644
index 0000000..21b7845
--- /dev/null
+++ b/pdfocr-tesseract4/src/main/java/com/itextpdf/pdfocr/tesseract4/Tesseract4OcrException.java
@@ -0,0 +1,74 @@
+/*
+    This file is part of the iText (R) project.
+    Copyright (c) 1998-2020 iText Group NV
+    Authors: iText Software.
+
+    This program is offered under a commercial and under the AGPL license.
+    For commercial licensing, contact us at https://itextpdf.com/sales.  For AGPL licensing, see below.
+
+    AGPL licensing:
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+package com.itextpdf.pdfocr.tesseract4;
+
+import com.itextpdf.pdfocr.OcrException;
+
+public class Tesseract4OcrException extends OcrException {
+    public static final String INCORRECT_INPUT_IMAGE_FORMAT =
+            "{0} format is not supported.";
+    public static final String INCORRECT_LANGUAGE =
+            "{0} does not exist in {1}";
+    public static final String LANGUAGE_IS_NOT_IN_THE_LIST =
+            "Provided list of languages doesn't contain {0} language";
+    public static final String CANNOT_READ_PROVIDED_IMAGE =
+            "Cannot read input image {0}";
+    public static final String TESSERACT_FAILED = "Tesseract failed. "
+            + "Please check provided parameters";
+    public static final String TESSERACT_LIB_NOT_INSTALLED = "Tesseract failed. "
+            + "Please ensure you have tesseract library installed";
+    public static final String TESSERACT_LIB_NOT_INSTALLED_WIN = "Tesseract failed. "
+            + "Please ensure you have latest Visual C++ Redistributable installed";
+    public static final String TESSERACT_NOT_FOUND = "Tesseract failed. "
+            + "Please check that tesseract is installed and provided path to "
+            + "tesseract executable directory is correct";
+    public static final String CANNOT_FIND_PATH_TO_TESSERACT_EXECUTABLE =
+            "Cannot find path to tesseract executable.";
+    public static final String PATH_TO_TESS_DATA_DIRECTORY_IS_INVALID =
+            "Provided path to tess data directory does not exist or it is "
+                    + "an invalid directory";
+    public static final String PATH_TO_TESS_DATA_IS_NOT_SET =
+            "Path to tess data directory cannot be null and must be set "
+                    + "to a valid directory";
+
+    /**
+     * Creates a new TesseractException.
+     *
+     * @param msg the detail message.
+     * @param e   the cause
+     *            (which is saved for later retrieval
+     *            by {@link #getCause()} method).
+     */
+    public Tesseract4OcrException(String msg, Throwable e) {
+        super(msg, e);
+    }
+
+    /**
+     * Creates a new TesseractException.
+     *
+     * @param msg the detail message.
+     */
+    public Tesseract4OcrException(String msg) {
+        super(msg);
+    }
+}
diff --git a/pdfocr-tesseract4/src/main/java/com/itextpdf/pdfocr/tesseract4/TesseractHelper.java b/pdfocr-tesseract4/src/main/java/com/itextpdf/pdfocr/tesseract4/TesseractHelper.java
new file mode 100644
index 0000000..c700f25
--- /dev/null
+++ b/pdfocr-tesseract4/src/main/java/com/itextpdf/pdfocr/tesseract4/TesseractHelper.java
@@ -0,0 +1,254 @@
+/*
+    This file is part of the iText (R) project.
+    Copyright (c) 1998-2020 iText Group NV
+    Authors: iText Software.
+
+    This program is offered under a commercial and under the AGPL license.
+    For commercial licensing, contact us at https://itextpdf.com/sales.  For AGPL licensing, see below.
+
+    AGPL licensing:
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+package com.itextpdf.pdfocr.tesseract4;
+
+import com.itextpdf.io.util.MessageFormatUtil;
+import com.itextpdf.io.util.SystemUtil;
+import com.itextpdf.pdfocr.TextInfo;
+import com.itextpdf.styledxmlparser.jsoup.Jsoup;
+import com.itextpdf.styledxmlparser.jsoup.nodes.Document;
+import com.itextpdf.styledxmlparser.jsoup.nodes.Element;
+import com.itextpdf.styledxmlparser.jsoup.select.Elements;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStreamWriter;
+import java.io.Writer;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Helper class.
+ */
+public class TesseractHelper {
+
+    /**
+     * The logger.
+     */
+    private static final Logger LOGGER = LoggerFactory
+            .getLogger(TesseractHelper.class);
+
+    /**
+     * Creates a new {@link TesseractHelper} instance.
+     */
+    private TesseractHelper() {
+    }
+
+    /**
+     * Parses each hocr file from the provided list, retrieves text, and
+     * returns data in the format described below.
+     *
+     * @param inputFiles list of input files
+     * @param textPositioning {@link TextPositioning}
+     * @return {@link java.util.Map} where key is {@link java.lang.Integer}
+     * representing the number of the page and value is
+     * {@link java.util.List} of {@link TextInfo} elements where each
+     * {@link TextInfo} element contains a word or a line and its 4
+     * coordinates(bbox)
+     * @throws IOException if error occurred during reading one the provided
+     * files
+     */
+    public static Map<Integer, List<TextInfo>> parseHocrFile(
+            final List<File> inputFiles,
+            final TextPositioning textPositioning)
+            throws IOException {
+        Map<Integer, List<TextInfo>> imageData =
+                new LinkedHashMap<Integer, List<TextInfo>>();
+
+        for (File inputFile : inputFiles) {
+            if (inputFile != null
+                    && Files.exists(
+                            java.nio.file.Paths
+                                    .get(inputFile.getAbsolutePath()))) {
+                FileInputStream fileInputStream =
+                        new FileInputStream(inputFile.getAbsolutePath());
+                Document doc = Jsoup.parse(fileInputStream,
+                        java.nio.charset.StandardCharsets.UTF_8.name(),
+                        inputFile.getAbsolutePath());
+                Elements pages = doc.getElementsByClass("ocr_page");
+
+                Pattern bboxPattern = Pattern.compile(".*bbox(\\s+\\d+){4}.*");
+                Pattern bboxCoordinatePattern = Pattern
+                        .compile(
+                                ".*\\s+(\\d+)\\s+(\\d+)\\s+(\\d+)\\s+(\\d+).*");
+                List<String> searchedClasses = TextPositioning.BY_LINES
+                        .equals(textPositioning)
+                        ? Arrays.<String>asList("ocr_line", "ocr_caption")
+                        : Collections.<String>singletonList("ocrx_word");
+                for (Element page : pages) {
+                    String[] pageNum = page.id().split("page_");
+                    int pageNumber = Integer
+                            .parseInt(pageNum[pageNum.length - 1]);
+                    List<TextInfo> textData = new ArrayList<TextInfo>();
+                    if (searchedClasses.size() > 0) {
+                        Elements objects = page
+                                .getElementsByClass(searchedClasses.get(0));
+                        for (int i = 1; i < searchedClasses.size(); i++) {
+                            Elements foundElements = page
+                                    .getElementsByClass(
+                                            searchedClasses.get(i));
+                            for (int j = 0; j < foundElements.size(); j++) {
+                                objects.add(foundElements.get(j));
+                            }
+                        }
+                        for (Element obj : objects) {
+                            String value = obj.attr("title");
+                            Matcher bboxMatcher = bboxPattern.matcher(value);
+                            if (bboxMatcher.matches()) {
+                                Matcher bboxCoordinateMatcher =
+                                        bboxCoordinatePattern
+                                                .matcher(bboxMatcher.group());
+                                if (bboxCoordinateMatcher.matches()) {
+                                    List<Float> coordinates =
+                                            new ArrayList<Float>();
+                                    for (int i = 0; i < 4; i++) {
+                                        String coord = bboxCoordinateMatcher
+                                                .group(i + 1);
+                                        coordinates
+                                                .add(Float.parseFloat(coord));
+                                    }
+
+                                    textData.add(new TextInfo(obj.text(),
+                                            coordinates));
+                                }
+                            }
+                        }
+                    }
+                    if (textData.size() > 0) {
+                        if (imageData.containsKey(pageNumber)) {
+                            pageNumber = Collections.max(imageData.keySet())
+                                    + 1;
+                        }
+                        imageData.put(pageNumber, textData);
+                    }
+                }
+                fileInputStream.close();
+            }
+        }
+        return imageData;
+    }
+
+    /**
+     * Deletes file using provided path.
+     *
+     * @param pathToFile path to the file to be deleted
+     */
+    static void deleteFile(final String pathToFile) {
+        try {
+            if (pathToFile != null && !pathToFile.isEmpty()
+                    && Files.exists(java.nio.file.Paths.get(pathToFile))) {
+                Files.delete(java.nio.file.Paths.get(pathToFile));
+            }
+        } catch (IOException | SecurityException e) {
+            LOGGER.info(MessageFormatUtil.format(
+                    Tesseract4LogMessageConstant.CANNOT_DELETE_FILE,
+                    pathToFile,
+                    e.getMessage()));
+        }
+    }
+
+    /**
+     * Reads from text file to string.
+     *
+     * @param txtFile input {@link java.io.File} to be read
+     * @return result {@link java.lang.String} from provided text file
+     */
+    static String readTxtFile(final File txtFile) {
+        String content = null;
+        try {
+            content = new String(
+                    Files.readAllBytes(txtFile.toPath()),
+                    StandardCharsets.UTF_8);
+        } catch (IOException e) {
+            LOGGER.error(MessageFormatUtil.format(
+                    Tesseract4LogMessageConstant.CANNOT_READ_FILE,
+                    txtFile.getAbsolutePath(),
+                    e.getMessage()));
+        }
+        return content;
+    }
+
+    /**
+     * Writes provided {@link java.lang.String} to text file using
+     * provided path.
+     *
+     * @param path path as {@link java.lang.String} to file to be created
+     * @param data text data in required format as {@link java.lang.String}
+     */
+    static void writeToTextFile(final String path,
+            final String data) {
+        try (Writer writer = new OutputStreamWriter(new FileOutputStream(path),
+                StandardCharsets.UTF_8)) {
+            writer.write(data);
+        } catch (IOException e) {
+            LOGGER.error(MessageFormatUtil.format(
+                    Tesseract4LogMessageConstant.CANNOT_WRITE_TO_FILE,
+                    path,
+                    e.getMessage()));
+        }
+    }
+
+    /**
+     * Runs given command.
+     *
+     * @param execPath path to the executable
+     * @param paramsList {@link java.util.List} of command line arguments
+     * @throws Tesseract4OcrException if provided command failed
+     */
+    static void runCommand(final String execPath,
+            final List<String> paramsList) throws Tesseract4OcrException {
+        try {
+            String params = String.join(" ", paramsList);
+            boolean cmdSucceeded = SystemUtil
+                    .runProcessAndWait(execPath, params);
+
+            if (!cmdSucceeded) {
+                LOGGER.error(MessageFormatUtil
+                        .format(Tesseract4LogMessageConstant.COMMAND_FAILED,
+                                execPath + " " + params));
+                throw new Tesseract4OcrException(
+                        Tesseract4OcrException
+                                .TESSERACT_FAILED);
+            }
+        } catch (IOException | InterruptedException e) { // NOSONAR
+            LOGGER.error(MessageFormatUtil
+                    .format(Tesseract4LogMessageConstant.COMMAND_FAILED,
+                            e.getMessage()));
+            throw new Tesseract4OcrException(
+                    Tesseract4OcrException
+                            .TESSERACT_FAILED);
+        }
+    }
+}
diff --git a/pdfocr-tesseract4/src/main/java/com/itextpdf/pdfocr/tesseract4/TesseractOcrUtil.java b/pdfocr-tesseract4/src/main/java/com/itextpdf/pdfocr/tesseract4/TesseractOcrUtil.java
new file mode 100644
index 0000000..ffca409
--- /dev/null
+++ b/pdfocr-tesseract4/src/main/java/com/itextpdf/pdfocr/tesseract4/TesseractOcrUtil.java
@@ -0,0 +1,574 @@
+/*
+    This file is part of the iText (R) project.
+    Copyright (c) 1998-2020 iText Group NV
+    Authors: iText Software.
+
+    This program is offered under a commercial and under the AGPL license.
+    For commercial licensing, contact us at https://itextpdf.com/sales.  For AGPL licensing, see below.
+
+    AGPL licensing:
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+package com.itextpdf.pdfocr.tesseract4;
+
+import com.itextpdf.io.util.MessageFormatUtil;
+
+import com.ochafik.lang.jnaerator.runtime.NativeSize;
+import com.ochafik.lang.jnaerator.runtime.NativeSizeByReference;
+import com.sun.jna.ptr.PointerByReference;
+import java.awt.image.BufferedImage;
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.ByteBuffer;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.nio.file.StandardCopyOption;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import javax.imageio.ImageIO;
+import net.sourceforge.lept4j.ILeptonica;
+import net.sourceforge.lept4j.Leptonica;
+import net.sourceforge.lept4j.Pix;
+import net.sourceforge.tess4j.ITesseract;
+import net.sourceforge.tess4j.Tesseract;
+import net.sourceforge.tess4j.Tesseract1;
+import net.sourceforge.tess4j.TesseractException;
+import org.apache.commons.imaging.ImageReadException;
+import org.apache.commons.imaging.Imaging;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Utilities class to work with tesseract command line tool and image
+ * preprocessing using {@link net.sourceforge.lept4j.ILeptonica}.
+ * These all methods have to be ported to .Net manually.
+ */
+class TesseractOcrUtil {
+
+    /**
+     * The logger.
+     */
+    private static final Logger LOGGER = LoggerFactory
+            .getLogger(TesseractOcrUtil.class);
+
+    /**
+     * List of pages of the image that is being processed.
+     */
+    private List<BufferedImage> imagePages =
+            Collections.<BufferedImage>emptyList();
+
+    /**
+     * Creates a new {@link TesseractOcrUtil} instance.
+     */
+    TesseractOcrUtil() {
+    }
+
+    /**
+     * Reads required page from provided tiff image.
+     *
+     * @param inputFile input image as {@link java.io.File}
+     * @param pageNumber number of page
+     * @return result {@link net.sourceforge.lept4j.Pix} object created from
+     * given image
+     */
+    static Pix readPixPageFromTiff(final File inputFile,
+            final int pageNumber) {
+        Pix pix = null;
+        try {
+            BufferedImage img = TesseractOcrUtil
+                    .getImagePage(inputFile, pageNumber);
+            pix = convertImageToPix(img);
+        } catch (IOException e) {
+            LOGGER.error(MessageFormatUtil.format(
+                    Tesseract4LogMessageConstant.CANNOT_READ_INPUT_IMAGE,
+                    e.getMessage()));
+        }
+        // return required page to be preprocessed
+        return pix;
+    }
+
+    /**
+     * Performs default image preprocessing.
+     * It includes the following actions:
+     * converting to grayscale,
+     * thresholding.
+     *
+     * @param pix {@link net.sourceforge.lept4j.Pix} object to be processed
+     * @return preprocessed {@link net.sourceforge.lept4j.Pix} object
+     */
+    static Pix preprocessPix(Pix pix) {
+        pix = convertToGrayscale(pix);
+        pix = otsuImageThresholding(pix);
+        return pix;
+    }
+
+    /**
+     * Converts Leptonica {@link net.sourceforge.lept4j.Pix} to grayscale.
+     * In .Net image is converted only if this is 32bpp image. In java image is
+     * converted anyway using different Leptonica methods depending on
+     * image depth.
+     *
+     * @param pix {@link net.sourceforge.lept4j.Pix} object to be processed
+     * @return preprocessed {@link net.sourceforge.lept4j.Pix} object
+     */
+    static Pix convertToGrayscale(final Pix pix) {
+        Leptonica instance = Leptonica.INSTANCE;
+        if (pix != null) {
+            int depth = instance.pixGetDepth(pix);
+
+            if (depth == 32) {
+                return instance.pixConvertRGBToLuminance(pix);
+            } else {
+                return instance.pixRemoveColormap(pix,
+                        instance.REMOVE_CMAP_TO_GRAYSCALE);
+            }
+        } else {
+            return pix;
+        }
+    }
+
+    /**
+     * Performs Leptonica Otsu adaptive image thresholding using
+     * {@link net.sourceforge.lept4j.Leptonica#pixOtsuAdaptiveThreshold}
+     * method.
+     *
+     * @param pix {@link net.sourceforge.lept4j.Pix} object to be processed
+     * @return {@link net.sourceforge.lept4j.Pix} object after thresholding
+     */
+    static Pix otsuImageThresholding(final Pix pix) {
+        if (pix != null) {
+            Pix thresholdPix = null;
+            if (pix.d == 8) {
+                PointerByReference pointer = new PointerByReference();
+                Leptonica.INSTANCE
+                        .pixOtsuAdaptiveThreshold(pix, pix.w, pix.h,
+                                0, 0, 0,
+                                null, pointer);
+                thresholdPix = new Pix(pointer.getValue());
+                if (thresholdPix.w > 0 && thresholdPix.h > 0) {
+                    // destroying original pix
+                    destroyPix(pix);
+                    return thresholdPix;
+                } else {
+                    LOGGER.info(MessageFormatUtil.format(
+                            Tesseract4LogMessageConstant.CANNOT_BINARIZE_IMAGE,
+                            pix.d));
+                    // destroying created PointerByReference object
+                    destroyPix(thresholdPix);
+                    return pix;
+                }
+            } else {
+                LOGGER.info(MessageFormatUtil.format(
+                        Tesseract4LogMessageConstant.CANNOT_BINARIZE_IMAGE,
+                        pix.d));
+                return pix;
+            }
+        } else {
+            return pix;
+        }
+    }
+
+    /**
+     * Destroys {@link net.sourceforge.lept4j.Pix} object.
+     *
+     * @param pix {@link net.sourceforge.lept4j.Pix} object to be destroyed
+     */
+    static void destroyPix(Pix pix) {
+        if (pix != null) {
+            Leptonica.INSTANCE.lept_free(pix.getPointer());
+        }
+    }
+
+    /**
+     * Sets tesseract properties.
+     * The following properties are set in this method:
+     * In java: path to tess data, languages, psm
+     * In .Net: psm
+     * This means that other properties have been set during the
+     * initialization of tesseract instance previously or tesseract library
+     * doesn't provide such possibilities in api for .Net or java.
+     *
+     * @param tesseractInstance {@link net.sourceforge.tess4j.ITesseract} object
+     * @param tessData path to tess data directory
+     * @param languages list of languages in required format
+     *                  as {@link java.lang.String}
+     * @param pageSegMode page segmentation mode {@link java.lang.Integer}
+     * @param userWordsFilePath path to a temporary file with user words
+     */
+    static void setTesseractProperties(
+            final ITesseract tesseractInstance,
+            final String tessData, final String languages,
+            final Integer pageSegMode, final String userWordsFilePath) {
+        tesseractInstance.setDatapath(tessData);
+        tesseractInstance.setLanguage(languages);
+        if (pageSegMode != null) {
+            tesseractInstance.setPageSegMode(pageSegMode);
+        }
+        tesseractInstance.setOcrEngineMode(userWordsFilePath != null ? 0 : 3);
+    }
+
+    /**
+     * Creates tesseract instance with parameters.
+     * Method is used to initialize tesseract instance with parameters if it
+     * haven't been initialized yet.
+     * In this method in java 'tessData', 'languages' and 'userWordsFilePath'
+     * properties are unused as they will be set using setters in
+     * {@link #setTesseractProperties} method. In .Net all these properties
+     * are needed to be provided in tesseract constructor in order to
+     * initialize tesseract instance. Thus, tesseract initialization takes
+     * place in {@link Tesseract4LibOcrEngine#Tesseract4LibOcrEngine} constructor in
+     * java, but in .Net it happens only after all properties are validated,
+     * i.e. just before OCR process.
+     *
+     * @param isWindows true is current os is windows
+     * @param tessData path to tess data directory
+     * @param languages list of languages in required format as
+     *                  {@link java.lang.String}
+     * @param userWordsFilePath path to a temporary file with user words
+     * @return initialized {@link net.sourceforge.tess4j.ITesseract} object
+     */
+    static ITesseract initializeTesseractInstance(final boolean isWindows,
+            final String tessData, final String languages,
+            final String userWordsFilePath) {
+        try {
+            if (isWindows) {
+                return new Tesseract1();
+            } else {
+                return new Tesseract();
+            }
+        } catch (LinkageError e) {
+            throw new Tesseract4OcrException(isWindows ?
+                    Tesseract4OcrException.TESSERACT_LIB_NOT_INSTALLED_WIN :
+                    Tesseract4OcrException.TESSERACT_LIB_NOT_INSTALLED, e);
+        }
+    }
+
+    /**
+     * Returns true if tesseract instance has been already disposed.
+     * (used in .net version)
+     * @param tesseractInstance {@link net.sourceforge.tess4j.ITesseract}
+     *                          object to check
+     * @return true if tesseract instance is disposed.
+     */
+    static boolean isTesseractInstanceDisposed(
+            final ITesseract tesseractInstance) {
+        return false;
+    }
+
+    /**
+     * Disposes {@link net.sourceforge.tess4j.ITesseract} instance.
+     * (used in .net version)
+     * @param tesseractInstance {@link net.sourceforge.tess4j.ITesseract}
+     *                          object to dispose
+     */
+    static void disposeTesseractInstance(
+            final ITesseract tesseractInstance) {
+    }
+
+    /**
+     * Converts {@link java.awt.image.BufferedImage} to
+     * {@link net.sourceforge.lept4j.Pix}.
+     *
+     * @param bufferedImage input image as {@link java.awt.image.BufferedImage}
+     * @return Pix result converted {@link net.sourceforge.lept4j.Pix} object
+     * @throws IOException if it's not possible to convert
+     */
+    static Pix convertImageToPix(
+            final BufferedImage bufferedImage)
+            throws IOException {
+        Pix pix = null;
+        if (bufferedImage != null) {
+            ByteArrayOutputStream baos = new ByteArrayOutputStream();
+            ImageIO.write(bufferedImage, "png", baos);
+
+            ByteBuffer byteBuffer = ByteBuffer.wrap(baos.toByteArray());
+            NativeSize nativeSize = new NativeSize(baos.toByteArray().length);
+            pix = Leptonica.INSTANCE.pixReadMem(byteBuffer, nativeSize);
+        }
+
+        return pix;
+    }
+
+    /**
+     * Converts Leptonica {@link net.sourceforge.lept4j.Pix}
+     * to {@link java.awt.image.BufferedImage} with
+     * {@link net.sourceforge.lept4j.ILeptonica#IFF_PNG} image format.
+     *
+     * @param pix input {@link net.sourceforge.lept4j.Pix} object
+     * @return result {@link java.awt.image.BufferedImage} object
+     * @throws IOException if it is not possible to convert
+     */
+    static BufferedImage convertPixToImage(final Pix pix)
+            throws IOException {
+        if (pix != null) {
+            Leptonica instance = Leptonica.INSTANCE;
+            BufferedImage bi = null;
+            PointerByReference pdata = new PointerByReference();
+            try {
+                NativeSizeByReference psize = new NativeSizeByReference();
+                instance.pixWriteMem(pdata, psize, pix, ILeptonica.IFF_PNG);
+                byte[] b = pdata.getValue().getByteArray(0,
+                        psize.getValue().intValue());
+                try (InputStream in = new ByteArrayInputStream(b)) {
+                    bi = ImageIO.read(in);
+                }
+            } finally {
+                instance.lept_free(pdata.getValue());
+            }
+            return bi;
+        } else {
+            return null;
+        }
+    }
+
+    /**
+     * Gets path to temp file in current system temporary directory.
+     *
+     * @return path to temp file in the system temporary directory
+     */
+    static String getTempFilePath(String name, String suffix) {
+        String tmpFileName = name + suffix;
+        try {
+            Path tempPath = Files.createTempFile(name, suffix);
+            tmpFileName = tempPath.toString();
+        } catch (IOException | IllegalArgumentException e) {
+            LOGGER.info(MessageFormatUtil.format(
+                    Tesseract4LogMessageConstant.CANNOT_GET_TEMPORARY_DIRECTORY,
+                    e.getMessage()));
+        }
+        return tmpFileName;
+    }
+
+    /**
+     * Gets requested image page from the provided image.
+     *
+     * @param inputFile input image
+     * @param page requested image page
+     * @return requested image page as a {@link java.awt.image.BufferedImage}
+     */
+    static BufferedImage getImagePage(File inputFile, int page)
+    {
+        BufferedImage img = null;
+        try (InputStream is =
+                new FileInputStream(inputFile.getAbsolutePath())) {
+            List<BufferedImage> pages = Imaging.getAllBufferedImages(is,
+                    inputFile.getAbsolutePath());
+            if (page >= pages.size()) {
+                LOGGER.warn(MessageFormatUtil.format(
+                        Tesseract4LogMessageConstant.PAGE_NUMBER_IS_INCORRECT,
+                        page,
+                        inputFile.getAbsolutePath()));
+                return null;
+            }
+            img = pages.get(page);
+        } catch (ImageReadException | IOException e) {
+            LOGGER.error(MessageFormatUtil.format(
+                    Tesseract4LogMessageConstant
+                            .CANNOT_RETRIEVE_PAGES_FROM_IMAGE,
+                    inputFile.getAbsolutePath(),
+                    e.getMessage()));
+        }
+        return img;
+    }
+
+    /**
+     * Saves passed {@link java.awt.image.BufferedImage} to given path
+     *
+     * @param tmpFileName provided file path to save the
+     * {@link java.awt.image.BufferedImage}
+     * @param image provided {@link java.awt.image.BufferedImage} to be saved
+     */
+    static void saveImageToTempPngFile(final String tmpFileName,
+            final BufferedImage image) {
+        if (image != null) {
+            try {
+                ImageIO.write(image, "png", new File(tmpFileName));
+            } catch (Exception e) { // NOSONAR
+                LOGGER.error(MessageFormatUtil.format(
+                        Tesseract4LogMessageConstant.CANNOT_PROCESS_IMAGE,
+                        e.getMessage()));
+            }
+        }
+    }
+
+    /**
+     * Saves passed {@link net.sourceforge.lept4j.Pix} to given path
+     *
+     * @param tmpFileName provided file path to save the
+     * {@link net.sourceforge.lept4j.Pix}
+     * @param pix provided {@link net.sourceforge.lept4j.Pix} to be saved
+     */
+    static void savePixToTempPngFile(final String tmpFileName,
+            final Pix pix) {
+        if (pix != null) {
+            try {
+                Leptonica.INSTANCE.pixWritePng(tmpFileName, pix,
+                        ILeptonica.IFF_PNG);
+            } catch (Exception e) { // NOSONAR
+                LOGGER.info(MessageFormatUtil.format(
+                        Tesseract4LogMessageConstant.CANNOT_PROCESS_IMAGE,
+                        e.getMessage()));
+            }
+        }
+    }
+
+    /**
+     * Create temporary copy of input file to avoid issue with tesseract and
+     * different encodings in the path.
+     *
+     * @param src path to the source image
+     * @param dst destination path
+     */
+    static void createTempFileCopy(final String src, final String dst)
+            throws IOException {
+        Files.copy(Paths.get(src), Paths.get(dst),
+                StandardCopyOption.REPLACE_EXISTING);
+    }
+
+    /**
+     * Returns parent directory for the passed path.
+     *
+     * @param path path to file
+     * @return parent directory where the file is located
+     */
+    static String getParentDirectory(final String path) {
+        return new File(path).getParent();
+    }
+
+    /**
+     * Retrieves list of pages from provided image as list of
+     * {@link java.awt.image.BufferedImage}, one per page and updates
+     * this list for the image using {@link #setListOfPages} method.
+     *
+     * @param inputFile input image {@link java.io.File}
+     */
+    void initializeImagesListFromTiff(
+            final File inputFile) {
+        try (InputStream is =
+                new FileInputStream(inputFile.getAbsolutePath())) {
+            setListOfPages(Imaging
+                    .getAllBufferedImages(is,
+                            inputFile.getAbsolutePath()));
+        } catch (Exception e) { // NOSONAR
+            LOGGER.error(MessageFormatUtil.format(
+                    Tesseract4LogMessageConstant
+                            .CANNOT_RETRIEVE_PAGES_FROM_IMAGE,
+                    inputFile.getAbsolutePath(),
+                    e.getMessage()));
+        }
+    }
+
+    /**
+     * Gets list of page of processing image as list of
+     * {@link java.awt.image.BufferedImage}, one per page.
+     *
+     * @return result {@link java.util.List} of pages
+     */
+    List<BufferedImage> getListOfPages() {
+        return new ArrayList<BufferedImage>(imagePages);
+    }
+
+    /**
+     * Sets list of page of processing image as list of
+     * {@link java.awt.image.BufferedImage}, one per page.
+     *
+     * @param listOfPages list of {@link java.awt.image.BufferedImage} for
+     *                    each page.
+     */
+    void setListOfPages(final List<BufferedImage> listOfPages) {
+        imagePages = Collections.<BufferedImage>unmodifiableList(listOfPages);
+    }
+
+    /**
+     * Performs ocr for the provided image
+     * and returns result as string in required format.
+     * ({@link OutputFormat} is used in .Net version,
+     * in java output format should already be set)
+     * @param tesseractInstance {@link net.sourceforge.tess4j.ITesseract}
+     *                          object to perform OCR
+     * @param image input {@link java.awt.image.BufferedImage} to be processed
+     * @param outputFormat selected {@link OutputFormat} for tesseract
+     * @return result as {@link java.lang.String} in required format
+     * @throws TesseractException if tesseract recognition failed
+     */
+    String getOcrResultAsString(
+            final ITesseract tesseractInstance,
+            final BufferedImage image, final OutputFormat outputFormat)
+            throws TesseractException {
+        if (image != null) {
+            return tesseractInstance.doOCR(image);
+        } else {
+            return null;
+        }
+    }
+
+    /**
+     * Performs ocr for the provided image
+     * and returns result as string in required format.
+     * ({@link OutputFormat} is used in .Net version, in java output format
+     * should already be set)
+     *
+     * @param tesseractInstance {@link net.sourceforge.tess4j.ITesseract}
+     *                          object to perform OCR
+     * @param image input image as {@link java.io.File} to be
+     *              processed
+     * @param outputFormat selected {@link OutputFormat} for tesseract
+     * @return result as {@link java.lang.String} in required format
+     * @throws TesseractException if tesseract recognition failed
+     */
+    String getOcrResultAsString(
+            final ITesseract tesseractInstance,
+            final File image, final OutputFormat outputFormat)
+            throws TesseractException {
+        if (image != null) {
+            return tesseractInstance.doOCR(image);
+        } else {
+            return null;
+        }
+    }
+
+     /**
+     * Performs ocr for the provided image
+     * and returns result as string in required format.
+     * ({@link OutputFormat} is used in .Net version, in java output format
+      * should already be set)
+     *
+     * @param tesseractInstance {@link net.sourceforge.tess4j.ITesseract}
+     *                          object to perform OCR
+     * @param pix input image as {@link net.sourceforge.lept4j.Pix} to be
+     *              processed
+     * @param outputFormat selected {@link OutputFormat} for tesseract
+     * @return result as {@link java.lang.String} in required format
+     * @throws TesseractException if tesseract recognition failed
+     * @throws IOException if it is not possible to convert input image
+     */
+    String getOcrResultAsString(
+            final ITesseract tesseractInstance,
+            final Pix pix, final OutputFormat outputFormat)
+            throws TesseractException, IOException {
+        if (pix != null) {
+            BufferedImage bufferedImage = convertPixToImage(pix);
+            return getOcrResultAsString(tesseractInstance,
+                    bufferedImage, outputFormat);
+        } else {
+            return null;
+        }
+    }
+}
diff --git a/pdfocr-tesseract4/src/main/java/com/itextpdf/pdfocr/tesseract4/TextPositioning.java b/pdfocr-tesseract4/src/main/java/com/itextpdf/pdfocr/tesseract4/TextPositioning.java
new file mode 100644
index 0000000..c8edb07
--- /dev/null
+++ b/pdfocr-tesseract4/src/main/java/com/itextpdf/pdfocr/tesseract4/TextPositioning.java
@@ -0,0 +1,43 @@
+/*
+    This file is part of the iText (R) project.
+    Copyright (c) 1998-2020 iText Group NV
+    Authors: iText Software.
+
+    This program is offered under a commercial and under the AGPL license.
+    For commercial licensing, contact us at https://itextpdf.com/sales.  For AGPL licensing, see below.
+
+    AGPL licensing:
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+package com.itextpdf.pdfocr.tesseract4;
+
+/**
+ * Enumeration of the possible types of text positioning.
+ * It is used when there is possibility in selected Reader to process
+ * the text by lines or by words and to return coordinates for the
+ * selected type of item.
+ * For tesseract this value makes sense only if selected
+ * {@link OutputFormat} is {@link OutputFormat#HOCR}.
+ */
+public enum TextPositioning {
+    /**
+     * Text will be located by lines retrieved from hocr file.
+     * (default value)
+     */
+    BY_LINES,
+    /**
+     * Text will be located by words retrieved from hocr file.
+     */
+    BY_WORDS
+}
diff --git a/pdfocr-tesseract4/src/main/java/com/itextpdf/pdfocr/tesseract4/events/PdfOcrTesseract4Event.java b/pdfocr-tesseract4/src/main/java/com/itextpdf/pdfocr/tesseract4/events/PdfOcrTesseract4Event.java
new file mode 100644
index 0000000..40dcf67
--- /dev/null
+++ b/pdfocr-tesseract4/src/main/java/com/itextpdf/pdfocr/tesseract4/events/PdfOcrTesseract4Event.java
@@ -0,0 +1,61 @@
+/*
+    This file is part of the iText (R) project.
+    Copyright (c) 1998-2020 iText Group NV
+    Authors: iText Software.
+
+    This program is offered under a commercial and under the AGPL license.
+    For commercial licensing, contact us at https://itextpdf.com/sales.  For AGPL licensing, see below.
+
+    AGPL licensing:
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+package com.itextpdf.pdfocr.tesseract4.events;
+
+import com.itextpdf.kernel.counter.event.IGenericEvent;
+
+/**
+ * Class for ocr events
+ */
+public class PdfOcrTesseract4Event implements IGenericEvent {
+
+    public static final PdfOcrTesseract4Event TESSERACT4_IMAGE_OCR = new PdfOcrTesseract4Event("tesseract4-image-ocr");
+    public static final PdfOcrTesseract4Event TESSERACT4_IMAGE_TO_PDF = new PdfOcrTesseract4Event("tesseract4-image-to-pdf");
+    public static final PdfOcrTesseract4Event TESSERACT4_IMAGE_TO_PDFA = new PdfOcrTesseract4Event("tesseract4-image-to-pdfa");
+
+    private static final String PDF_OCR_TESSERACT4_ORIGIN_ID = "com.itextpdf.pdfocr.tesseract4";
+
+    private final String subtype;
+
+    private PdfOcrTesseract4Event(String subtype) {
+        this.subtype = subtype;
+    }
+
+    @Override
+    /**
+     * Gets the type of the event
+     * @return the event type
+     */
+    public String getEventType() {
+        return "pdfOcr-" + subtype;
+    }
+
+    @Override
+    /**
+     * Gets the origin id of the event
+     * @return the origin id
+     */
+    public String getOriginId() {
+        return PDF_OCR_TESSERACT4_ORIGIN_ID;
+    }
+}
diff --git a/pdfocr-tesseract4/src/main/java/com/itextpdf/pdfocr/tesseract4/package-info.java b/pdfocr-tesseract4/src/main/java/com/itextpdf/pdfocr/tesseract4/package-info.java
new file mode 100644
index 0000000..bfc5482
--- /dev/null
+++ b/pdfocr-tesseract4/src/main/java/com/itextpdf/pdfocr/tesseract4/package-info.java
@@ -0,0 +1 @@
+package com.itextpdf.pdfocr.tesseract4;
diff --git a/pdfocr-tesseract4/src/test/java/com/itextpdf/metainfo/TestMetaInfo.java b/pdfocr-tesseract4/src/test/java/com/itextpdf/metainfo/TestMetaInfo.java
new file mode 100644
index 0000000..1f10d9f
--- /dev/null
+++ b/pdfocr-tesseract4/src/test/java/com/itextpdf/metainfo/TestMetaInfo.java
@@ -0,0 +1,34 @@
+/*
+    This file is part of the iText (R) project.
+    Copyright (c) 1998-2020 iText Group NV
+    Authors: iText Software.
+
+    This program is offered under a commercial and under the AGPL license.
+    For commercial licensing, contact us at https://itextpdf.com/sales.  For AGPL licensing, see below.
+
+    AGPL licensing:
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+package com.itextpdf.metainfo;
+
+import com.itextpdf.kernel.counter.event.IMetaInfo;
+
+/**
+ * This class is used for test purposes.
+ * Please be aware that it's put in the com.itextpdf.metainfo deliberately,
+ * so that it belongs neither to com.itextpdf.pdfocr nor com.itextpdf.pdfocr.tesseract4 packages
+ */
+public class TestMetaInfo implements IMetaInfo {
+    private static final long serialVersionUID = 5521060335175170386L;
+}
diff --git a/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/IntegrationTestHelper.java b/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/IntegrationTestHelper.java
new file mode 100644
index 0000000..394e3d6
--- /dev/null
+++ b/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/IntegrationTestHelper.java
@@ -0,0 +1,525 @@
+/*
+    This file is part of the iText (R) project.
+    Copyright (c) 1998-2020 iText Group NV
+    Authors: iText Software.
+
+    This program is offered under a commercial and under the AGPL license.
+    For commercial licensing, contact us at https://itextpdf.com/sales.  For AGPL licensing, see below.
+
+    AGPL licensing:
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+package com.itextpdf.pdfocr;
+
+import com.itextpdf.io.font.PdfEncodings;
+import com.itextpdf.io.util.MessageFormatUtil;
+import com.itextpdf.kernel.font.PdfFont;
+import com.itextpdf.kernel.pdf.DocumentProperties;
+import com.itextpdf.kernel.pdf.PdfDocument;
+import com.itextpdf.kernel.pdf.PdfName;
+import com.itextpdf.kernel.pdf.PdfReader;
+import com.itextpdf.kernel.pdf.PdfWriter;
+import com.itextpdf.kernel.pdf.WriterProperties;
+import com.itextpdf.kernel.pdf.canvas.CanvasTag;
+import com.itextpdf.kernel.pdf.canvas.parser.EventType;
+import com.itextpdf.kernel.pdf.canvas.parser.PdfCanvasProcessor;
+import com.itextpdf.kernel.pdf.canvas.parser.data.IEventData;
+import com.itextpdf.kernel.pdf.canvas.parser.data.ImageRenderInfo;
+import com.itextpdf.kernel.pdf.canvas.parser.data.TextRenderInfo;
+import com.itextpdf.kernel.pdf.canvas.parser.listener.ITextChunkLocation;
+import com.itextpdf.kernel.pdf.canvas.parser.listener.LocationTextExtractionStrategy;
+import com.itextpdf.kernel.pdf.canvas.parser.listener.TextChunk;
+import com.itextpdf.layout.font.FontProvider;
+import com.itextpdf.pdfocr.tesseract4.AbstractTesseract4OcrEngine;
+import com.itextpdf.pdfocr.tesseract4.Tesseract4ExecutableOcrEngine;
+import com.itextpdf.pdfocr.tesseract4.Tesseract4LibOcrEngine;
+import com.itextpdf.pdfocr.tesseract4.Tesseract4LogMessageConstant;
+import com.itextpdf.pdfocr.tesseract4.Tesseract4OcrEngineProperties;
+import com.itextpdf.test.ExtendedITextTest;
+import com.itextpdf.test.annotations.type.IntegrationTest;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import org.junit.Assert;
+import org.junit.experimental.categories.Category;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+@Category(IntegrationTest.class)
+public class IntegrationTestHelper extends ExtendedITextTest {
+
+    private static final Logger LOGGER = LoggerFactory
+            .getLogger(IntegrationTestHelper.class);
+
+    // directory with test files
+    public static final String TEST_DIRECTORY = "./src/test/resources/com/itextpdf/pdfocr/";
+    private static final String TARGET_FOLDER = "./target/test/resources/com/itextpdf/pdfocr/";
+
+    // directory with trained data for tests
+    protected static final String LANG_TESS_DATA_DIRECTORY = TEST_DIRECTORY + "tessdata";
+    // directory with trained data for tests
+    protected static final String SCRIPT_TESS_DATA_DIRECTORY = TEST_DIRECTORY + "tessdata" + File.separator + "script";
+    // directory with test image files
+    protected static final String TEST_IMAGES_DIRECTORY = TEST_DIRECTORY + "images" + File.separator;
+    // directory with fonts
+    protected static final String TEST_FONTS_DIRECTORY = TEST_DIRECTORY + "fonts" + File.separator;
+    // directory with fonts
+    protected static final String TEST_DOCUMENTS_DIRECTORY = TEST_DIRECTORY + "documents" + File.separator;
+
+    // path to font for hindi
+    protected static final String NOTO_SANS_FONT_PATH = TEST_FONTS_DIRECTORY + "NotoSans-Regular.ttf";
+    // path to font for japanese
+    protected static final String KOSUGI_FONT_PATH = TEST_FONTS_DIRECTORY + "Kosugi-Regular.ttf";
+    // path to font for chinese
+    protected static final String NOTO_SANS_SC_FONT_PATH = TEST_FONTS_DIRECTORY + "NotoSansSC-Regular.otf";
+    // path to font for arabic
+    protected static final String CAIRO_FONT_PATH = TEST_FONTS_DIRECTORY + "Cairo-Regular.ttf";
+    // path to font for georgian
+    protected static final String FREE_SANS_FONT_PATH = TEST_FONTS_DIRECTORY + "FreeSans.ttf";
+
+    protected static final Map<String, String> FONT_PATH_TO_FONT_NAME_MAP;
+
+    static {
+        Map<String, String> fontPathToNameMap = new HashMap<>();
+        fontPathToNameMap.put(NOTO_SANS_FONT_PATH, "NotoSans");
+        fontPathToNameMap.put(KOSUGI_FONT_PATH, "Kosugi");
+        fontPathToNameMap.put(NOTO_SANS_SC_FONT_PATH, "NotoSansSC");
+        fontPathToNameMap.put(CAIRO_FONT_PATH, "Cairo");
+        fontPathToNameMap.put(FREE_SANS_FONT_PATH, "FreeSans");
+        FONT_PATH_TO_FONT_NAME_MAP = Collections.unmodifiableMap(fontPathToNameMap);
+    }
+    
+    public enum ReaderType {
+        LIB,
+        EXECUTABLE
+    }
+
+    private static Tesseract4LibOcrEngine tesseractLibReader = null;
+    private static Tesseract4ExecutableOcrEngine tesseractExecutableReader = null;
+
+    public IntegrationTestHelper() {
+        Tesseract4OcrEngineProperties ocrEngineProperties =
+                new Tesseract4OcrEngineProperties();
+        ocrEngineProperties.setPathToTessData(getTessDataDirectory());
+        tesseractLibReader = new Tesseract4LibOcrEngine(ocrEngineProperties);
+        tesseractExecutableReader = new Tesseract4ExecutableOcrEngine(
+                getTesseractDirectory(), ocrEngineProperties);
+    }
+
+    protected static AbstractTesseract4OcrEngine getTesseractReader(ReaderType type) {
+        if (type.equals(ReaderType.LIB)) {
+            return tesseractLibReader;
+        } else {
+            return tesseractExecutableReader;
+        }
+    }
+
+    protected static Tesseract4LibOcrEngine getTesseract4LibOcrEngine() {
+        return tesseractLibReader;
+    }
+
+    protected static String getTesseractDirectory() {
+        String tesseractDir = System.getProperty("tesseractDir");
+        String os = System.getProperty("os.name") == null
+                ? System.getProperty("OS") : System.getProperty("os.name");
+        return os.toLowerCase().contains("win") && tesseractDir != null
+                && !tesseractDir.isEmpty()
+                ? tesseractDir + "\\tesseract.exe" : "tesseract";
+    }
+
+    /**
+     * Returns target directory (because target/test could not exist).
+     */
+    public static String getTargetDirectory() {
+        if (!Files.exists(java.nio.file.Paths.get(TARGET_FOLDER))) {
+            createDestinationFolder(TARGET_FOLDER);
+        }
+        return TARGET_FOLDER;
+    }
+
+    protected static File getTessDataDirectory() {
+        return new File(LANG_TESS_DATA_DIRECTORY);
+    }
+
+    /**
+     * Retrieve text from specified page from given PDF document.
+     */
+    protected String getTextFromPdf(AbstractTesseract4OcrEngine tesseractReader,
+            File file, int page, List<String> languages, List<String> fonts) {
+        String result = null;
+        String pdfPath = null;
+        try {
+            pdfPath = getTargetDirectory() + getImageName(file.getAbsolutePath(), languages) + ".pdf";
+            doOcrAndSavePdfToPath(tesseractReader, file.getAbsolutePath(),
+                    pdfPath, languages, fonts);
+            result = getTextFromPdfLayer(pdfPath, null, page);
+        } catch (IOException e) {
+            LOGGER.error(e.getMessage());
+        }
+
+        return result;
+    }
+
+    /**
+     * Retrieve text from specified page from given PDF document.
+     */
+    protected String getTextFromPdf(AbstractTesseract4OcrEngine tesseractReader,
+            File file, int page, List<String> languages, String fontPath) {
+        return getTextFromPdf(tesseractReader, file, page, languages,
+                Collections.<String>singletonList(fontPath));
+    }
+
+    /**
+     * Retrieve text from the first page of given PDF document setting font.
+     */
+    protected String getTextFromPdf(AbstractTesseract4OcrEngine tesseractReader, File file,
+            List<String> languages, String fontPath) {
+        return getTextFromPdf(tesseractReader, file, 1, languages, fontPath);
+    }
+
+    /**
+     * Retrieve text from the first page of given PDF document.
+     */
+    protected String getTextFromPdf(AbstractTesseract4OcrEngine tesseractReader, File file,
+            List<String> languages) {
+        return getTextFromPdf(tesseractReader, file, 1, languages,
+                new ArrayList<String>());
+    }
+
+    /**
+     * Retrieve text from the required page of given PDF document.
+     */
+    protected String getTextFromPdf(AbstractTesseract4OcrEngine tesseractReader, File file, int page,
+            List<String> languages) {
+        return getTextFromPdf(tesseractReader, file, page, languages, new ArrayList<String>());
+    }
+
+    /**
+     * Retrieve text from the first page of given PDF document.
+     */
+    protected String getTextFromPdf(AbstractTesseract4OcrEngine tesseractReader, File file) {
+        return getTextFromPdf(tesseractReader, file, 1, null, new ArrayList<String>());
+    }
+
+    /**
+     * Get text from layer specified by name from page.
+     */
+    protected String getTextFromPdfLayer(String pdfPath, String layerName,
+            int page, boolean useActualText) throws IOException {
+        PdfDocument pdfDocument = new PdfDocument(new PdfReader(pdfPath),
+                new DocumentProperties().setEventCountingMetaInfo(new PdfOcrMetaInfo()));
+
+        ExtractionStrategy textExtractionStrategy = new ExtractionStrategy(
+                layerName);
+        textExtractionStrategy.setUseActualText(useActualText);
+        PdfCanvasProcessor processor = new PdfCanvasProcessor(
+                textExtractionStrategy);
+        processor.processPageContent(pdfDocument.getPage(page));
+
+        pdfDocument.close();
+        return textExtractionStrategy.getResultantText();
+    }
+
+    /**
+     * Get text from layer specified by name from page.
+     */
+    protected String getTextFromPdfLayer(String pdfPath, String layerName,
+            int page) throws IOException {
+        return getTextFromPdfLayer(pdfPath, layerName, page, false);
+    }
+
+    /**
+     * Get text from layer specified by name from page
+     * removing unnecessary space that were added after each glyph in
+     * {@link LocationTextExtractionStrategy#getResultantText()}.
+     */
+    protected String getTextFromPdfLayerUsingActualText(String pdfPath,
+            String layerName, int page) throws IOException {
+        return getTextFromPdfLayer(pdfPath, layerName, page, true)
+                .replace(" ", "");
+    }
+
+    /**
+     * Perform OCR using provided path to image (imgPath),
+     * save to file and get text from file.
+     */
+    protected String getRecognizedTextFromTextFile(
+            AbstractTesseract4OcrEngine tesseractReader, String input,
+            List<String> languages) {
+        String result = null;
+        String txtPath = null;
+        try {
+            txtPath = getTargetDirectory()
+                    + getImageName(input, languages) + ".txt";
+            doOcrAndSaveToTextFile(tesseractReader, input, txtPath, languages);
+            result = getTextFromTextFile(new File(txtPath));
+        } catch (Exception e) {
+            LOGGER.error(e.getMessage());
+        }
+
+        return result;
+    }
+
+    /**
+     * Perform OCR using provided path to image (imgPath),
+     * save to file and get text from file.
+     */
+    protected String getRecognizedTextFromTextFile(
+            AbstractTesseract4OcrEngine tesseractReader, String input) {
+        return getRecognizedTextFromTextFile(tesseractReader, input, null);
+    }
+
+    /**
+     * Perform OCR using provided path to image (imgPath)
+     * and save result to text file.
+     */
+    protected void doOcrAndSaveToTextFile(
+            AbstractTesseract4OcrEngine tesseractReader, String imgPath,
+            String txtPath, List<String> languages) {
+        if (languages != null) {
+            Tesseract4OcrEngineProperties properties =
+                    tesseractReader.getTesseract4OcrEngineProperties();
+            properties.setLanguages(languages);
+            tesseractReader.setTesseract4OcrEngineProperties(properties);
+        }
+
+        tesseractReader.createTxtFile(Collections.<File>singletonList(new File(imgPath)),
+                new File(txtPath));
+
+        if (languages != null) {
+            Assert.assertEquals(languages.size(),
+                    tesseractReader.getTesseract4OcrEngineProperties().getLanguages().size());
+        }
+    }
+
+    /**
+     * Perform OCR using provided path to image (imgPath)
+     * and save result PDF document to "pdfPath".
+     * (Method is used for compare tool)
+     */
+    protected void doOcrAndSavePdfToPath(
+            AbstractTesseract4OcrEngine tesseractReader, String imgPath,
+            String pdfPath, List<String> languages,
+            List<String> fonts, com.itextpdf.kernel.colors.Color color) {
+        if (languages != null) {
+            Tesseract4OcrEngineProperties properties =
+                    tesseractReader.getTesseract4OcrEngineProperties();
+            properties.setLanguages(languages);
+            tesseractReader.setTesseract4OcrEngineProperties(properties);
+        }
+
+        OcrPdfCreatorProperties properties =  new OcrPdfCreatorProperties();
+        properties.setPdfLang("en-US");
+        properties.setTitle("");
+
+        if (fonts != null && fonts.size() > 0) {
+            FontProvider fontProvider = new FontProvider();
+            for (String fontPath : fonts) {
+                String name = FONT_PATH_TO_FONT_NAME_MAP.get(fontPath);
+                fontProvider.getFontSet().addFont(fontPath, PdfEncodings.IDENTITY_H, name);
+            }
+            properties.setFontProvider(fontProvider);
+        }
+        if (color != null) {
+            properties.setTextColor(color);
+        }
+        if (languages != null) {
+            Assert.assertEquals(languages.size(),
+                    tesseractReader.getTesseract4OcrEngineProperties().getLanguages().size());
+        }
+
+        OcrPdfCreator ocrPdfCreator = new OcrPdfCreator(tesseractReader, properties);
+        try (PdfWriter pdfWriter = getPdfWriter(pdfPath)) {
+            PdfDocument doc = ocrPdfCreator.createPdf(
+                    Collections.<File>singletonList(new File(imgPath)),
+                    pdfWriter);
+
+            Assert.assertNotNull(doc);
+            doc.close();
+        } catch (IOException e) {
+            LOGGER.error(e.getMessage());
+        }
+    }
+
+    /**
+     * Perform OCR using provided path to image (imgPath)
+     * and save result PDF document to "pdfPath".
+     */
+    protected void doOcrAndSavePdfToPath(
+            AbstractTesseract4OcrEngine tesseractReader, String imgPath,
+            String pdfPath, List<String> languages,
+            com.itextpdf.kernel.colors.Color color) {
+        doOcrAndSavePdfToPath(tesseractReader, imgPath, pdfPath,
+                languages, null, color);
+    }
+
+    /**
+     * Perform OCR using provided path to image (imgPath)
+     * and save result PDF document to "pdfPath".
+     * (Text will be invisible)
+     */
+    protected void doOcrAndSavePdfToPath(AbstractTesseract4OcrEngine tesseractReader, String imgPath,
+            String pdfPath, List<String> languages, List<String> fonts) {
+        doOcrAndSavePdfToPath(tesseractReader, imgPath, pdfPath,
+                languages, fonts, null);
+    }
+
+    /**
+     * Perform OCR using provided path to image (imgPath)
+     * and save result PDF document to "pdfPath".
+     * (Method is used for compare tool)
+     */
+    protected void doOcrAndSavePdfToPath(
+            AbstractTesseract4OcrEngine tesseractReader, String imgPath,
+            String pdfPath) {
+        doOcrAndSavePdfToPath(tesseractReader, imgPath, pdfPath, null,
+                null, null);
+    }
+
+    /**
+     * Retrieve text from given txt file.
+     */
+    protected String getTextFromTextFile(File file) {
+        String content = null;
+        try {
+            content = new String(
+                    Files.readAllBytes(file.toPath()),
+                    StandardCharsets.UTF_8);
+        } catch (IOException e) {
+            LOGGER.error(MessageFormatUtil.format(
+                    Tesseract4LogMessageConstant.CANNOT_READ_FILE,
+                    file.getAbsolutePath(),
+                    e.getMessage()));
+        }
+        return content;
+    }
+
+    /**
+     * Create pdfWriter using provided path to destination file.
+     */
+    protected PdfWriter getPdfWriter(String pdfPath) throws FileNotFoundException {
+        return new PdfWriter(pdfPath,
+                new WriterProperties().addUAXmpMetadata());
+    }
+
+    /**
+     * Gets image name from path.
+     */
+    protected String getImageName(String path, List<String> languages) {
+        String lang = (languages != null && languages.size() > 0) ?
+                "_" + String.join("", languages) : "";
+        String img = path
+                .substring(path.lastIndexOf(java.io.File.separator))
+                .substring(1)
+                .replace(".", "_");
+        return img + lang;
+    }
+
+    public static class ExtractionStrategy extends LocationTextExtractionStrategy {
+        private com.itextpdf.kernel.geom.Rectangle imageBBoxRectangle;
+        private com.itextpdf.kernel.colors.Color fillColor;
+        private String layerName;
+        private PdfFont pdfFont;
+
+        public ExtractionStrategy(String name) {
+            super();
+            layerName = name;
+        }
+
+        public void setFillColor(com.itextpdf.kernel.colors.Color color) {
+            fillColor = color;
+        }
+
+        public com.itextpdf.kernel.colors.Color getFillColor() {
+            return fillColor;
+        }
+
+        public void setPdfFont(PdfFont font) {
+            pdfFont = font;
+        }
+
+        public PdfFont getPdfFont() {
+            return pdfFont;
+        }
+
+        public com.itextpdf.kernel.geom.Rectangle getImageBBoxRectangle() { return this.imageBBoxRectangle; }
+
+        public void setImageBBoxRectangle(com.itextpdf.kernel.geom.Rectangle imageBBoxRectangle) {
+            this.imageBBoxRectangle = imageBBoxRectangle;
+        }
+
+        @Override
+        protected boolean isChunkAtWordBoundary(TextChunk chunk,
+                TextChunk previousChunk) {
+            ITextChunkLocation curLoc = chunk.getLocation();
+            ITextChunkLocation prevLoc = previousChunk.getLocation();
+
+            if (curLoc.getStartLocation().equals(curLoc.getEndLocation()) ||
+                    prevLoc.getEndLocation()
+                            .equals(prevLoc.getStartLocation())) {
+                return false;
+            }
+
+            return curLoc.distParallelEnd() - prevLoc.distParallelStart() >
+                    (curLoc.getCharSpaceWidth() + prevLoc.getCharSpaceWidth())
+                            / 2.0f;
+        }
+
+        @Override
+        public void eventOccurred(IEventData data, EventType type) {
+            if (type.equals(EventType.RENDER_TEXT) || type.equals(EventType.RENDER_IMAGE)) {
+                String tagName = getTagName(data, type);
+                if ((tagName == null && layerName == null) || (layerName != null && layerName.equals(tagName))) {
+                    if (type.equals(EventType.RENDER_TEXT)) {
+                        TextRenderInfo renderInfo = (TextRenderInfo) data;
+                        setFillColor(renderInfo.getGraphicsState()
+                                .getFillColor());
+                        setPdfFont(renderInfo.getGraphicsState().getFont());
+                        super.eventOccurred(data, type);
+                    }
+                    else if (type.equals(EventType.RENDER_IMAGE)) {
+                        ImageRenderInfo renderInfo = (ImageRenderInfo) data;
+                        com.itextpdf.kernel.geom.Matrix ctm = renderInfo.getImageCtm();
+                        setImageBBoxRectangle(new com.itextpdf.kernel.geom.Rectangle(ctm.get(6), ctm.get(7),
+                                ctm.get(0), ctm.get(4)));
+                    }
+                }
+            }
+        }
+
+        private String getTagName(IEventData data, EventType type) {
+            java.util.List<CanvasTag> tagHierarchy = null;
+            if (type.equals(EventType.RENDER_TEXT)) {
+                TextRenderInfo textRenderInfo = (TextRenderInfo) data;
+                tagHierarchy = textRenderInfo.getCanvasTagHierarchy();
+            }
+            else if (type.equals(EventType.RENDER_IMAGE)) {
+                ImageRenderInfo imageRenderInfo = (ImageRenderInfo) data;
+                tagHierarchy = imageRenderInfo.getCanvasTagHierarchy();
+            }
+            return (tagHierarchy == null || tagHierarchy.size() == 0
+                    || tagHierarchy.get(0).getProperties().get(PdfName.Name) == null)
+                    ? null
+                    : tagHierarchy.get(0).getProperties().get(PdfName.Name).toString();
+        }
+    }
+}
diff --git a/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/TesseractExecutableIntegrationTest.java b/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/TesseractExecutableIntegrationTest.java
new file mode 100644
index 0000000..16b1d56
--- /dev/null
+++ b/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/TesseractExecutableIntegrationTest.java
@@ -0,0 +1,86 @@
+/*
+    This file is part of the iText (R) project.
+    Copyright (c) 1998-2020 iText Group NV
+    Authors: iText Software.
+
+    This program is offered under a commercial and under the AGPL license.
+    For commercial licensing, contact us at https://itextpdf.com/sales.  For AGPL licensing, see below.
+
+    AGPL licensing:
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+package com.itextpdf.pdfocr;
+
+import com.itextpdf.pdfocr.tesseract4.Tesseract4ExecutableOcrEngine;
+import com.itextpdf.pdfocr.tesseract4.Tesseract4LogMessageConstant;
+import com.itextpdf.pdfocr.tesseract4.Tesseract4OcrEngineProperties;
+import com.itextpdf.pdfocr.tesseract4.Tesseract4OcrException;
+import com.itextpdf.test.annotations.LogMessage;
+import com.itextpdf.test.annotations.LogMessages;
+import com.itextpdf.test.annotations.type.IntegrationTest;
+
+import java.io.File;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.rules.ExpectedException;
+
+@Category(IntegrationTest.class)
+public class TesseractExecutableIntegrationTest extends IntegrationTestHelper {
+
+    @Rule
+    public ExpectedException junitExpectedException = ExpectedException.none();
+
+    @LogMessages(messages = {
+        @LogMessage(messageTemplate =
+                Tesseract4OcrException.CANNOT_FIND_PATH_TO_TESSERACT_EXECUTABLE, count = 1)
+    })
+    @Test
+    public void testNullPathToTesseractExecutable() {
+        junitExpectedException.expect(Tesseract4OcrException.class);
+        junitExpectedException.expectMessage(Tesseract4OcrException.CANNOT_FIND_PATH_TO_TESSERACT_EXECUTABLE);
+        File file = new File(TEST_IMAGES_DIRECTORY + "spanish_01.jpg");
+        Tesseract4ExecutableOcrEngine tesseractExecutableReader =
+                new Tesseract4ExecutableOcrEngine(
+                        new Tesseract4OcrEngineProperties());
+        tesseractExecutableReader.setPathToExecutable(null);
+        getTextFromPdf(tesseractExecutableReader, file);
+    }
+
+    @LogMessages(messages = {
+        @LogMessage(messageTemplate =
+                Tesseract4OcrException.CANNOT_FIND_PATH_TO_TESSERACT_EXECUTABLE, count = 1)
+    })
+    @Test
+    public void testEmptyPathToTesseractExecutable() {
+        junitExpectedException.expect(Tesseract4OcrException.class);
+        junitExpectedException.expectMessage(Tesseract4OcrException.CANNOT_FIND_PATH_TO_TESSERACT_EXECUTABLE);
+        File file = new File(TEST_IMAGES_DIRECTORY + "spanish_01.jpg");
+        getTextFromPdf(new Tesseract4ExecutableOcrEngine("", new Tesseract4OcrEngineProperties()), file);
+    }
+
+    @LogMessages(messages = {
+        @LogMessage(messageTemplate =
+                Tesseract4LogMessageConstant.COMMAND_FAILED, count = 1),
+        @LogMessage(messageTemplate =
+                Tesseract4OcrException.TESSERACT_NOT_FOUND, count = 1)
+    })
+    @Test
+    public void testIncorrectPathToTesseractExecutable() {
+        junitExpectedException.expect(Tesseract4OcrException.class);
+        junitExpectedException.expectMessage(Tesseract4OcrException.TESSERACT_NOT_FOUND);
+        File file = new File(TEST_IMAGES_DIRECTORY + "spanish_01.jpg");
+        getTextFromPdf(new Tesseract4ExecutableOcrEngine("path\\to\\executable\\", new Tesseract4OcrEngineProperties()), file);
+    }
+}
diff --git a/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/events/EventCountingExecutableTest.java b/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/events/EventCountingExecutableTest.java
new file mode 100644
index 0000000..d651e9c
--- /dev/null
+++ b/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/events/EventCountingExecutableTest.java
@@ -0,0 +1,53 @@
+/*
+    This file is part of the iText (R) project.
+    Copyright (c) 1998-2020 iText Group NV
+    Authors: iText Software.
+
+    This program is offered under a commercial and under the AGPL license.
+    For commercial licensing, contact us at https://itextpdf.com/sales.  For AGPL licensing, see below.
+
+    AGPL licensing:
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+package com.itextpdf.pdfocr.events;
+
+import com.itextpdf.io.util.MessageFormatUtil;
+import com.itextpdf.pdfocr.tesseract4.Tesseract4LogMessageConstant;
+import com.itextpdf.pdfocr.tesseract4.Tesseract4OcrException;
+import com.itextpdf.test.annotations.LogMessage;
+import com.itextpdf.test.annotations.LogMessages;
+import com.itextpdf.test.annotations.type.IntegrationTest;
+
+import java.io.File;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+@Category(IntegrationTest.class)
+public class EventCountingExecutableTest extends EventCountingTest {
+    public EventCountingExecutableTest() {
+        super(ReaderType.EXECUTABLE);
+    }
+
+    @Test
+    @LogMessages(messages = {@LogMessage(messageTemplate = Tesseract4LogMessageConstant.CANNOT_READ_INPUT_IMAGE)})
+    public void testEventCountingCustomMetaInfoError() {
+        String imgPath = new File(TEST_IMAGES_DIRECTORY + "numbers_101.jpg").getAbsolutePath();
+
+        junitExpectedException.expect(Tesseract4OcrException.class);
+        junitExpectedException
+                .expectMessage(MessageFormatUtil.format(Tesseract4LogMessageConstant.CANNOT_READ_INPUT_IMAGE, imgPath));
+
+        super.testEventCountingCustomMetaInfoError();
+    }
+}
diff --git a/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/events/EventCountingLibTest.java b/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/events/EventCountingLibTest.java
new file mode 100644
index 0000000..2fce3ba
--- /dev/null
+++ b/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/events/EventCountingLibTest.java
@@ -0,0 +1,55 @@
+/*
+    This file is part of the iText (R) project.
+    Copyright (c) 1998-2020 iText Group NV
+    Authors: iText Software.
+
+    This program is offered under a commercial and under the AGPL license.
+    For commercial licensing, contact us at https://itextpdf.com/sales.  For AGPL licensing, see below.
+
+    AGPL licensing:
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+package com.itextpdf.pdfocr.events;
+
+import com.itextpdf.io.util.MessageFormatUtil;
+import com.itextpdf.pdfocr.tesseract4.Tesseract4LogMessageConstant;
+import com.itextpdf.pdfocr.tesseract4.Tesseract4OcrException;
+import com.itextpdf.test.annotations.LogMessage;
+import com.itextpdf.test.annotations.LogMessages;
+import com.itextpdf.test.annotations.type.IntegrationTest;
+
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+@Category(IntegrationTest.class)
+public class EventCountingLibTest extends EventCountingTest {
+    public EventCountingLibTest() {
+        super(ReaderType.LIB);
+    }
+
+    @Test
+    @LogMessages(messages = {
+            @LogMessage(messageTemplate = Tesseract4LogMessageConstant.TESSERACT_FAILED),
+            @LogMessage(messageTemplate = Tesseract4OcrException.TESSERACT_FAILED)
+    })
+    public void testEventCountingCustomMetaInfoError() {
+        String imgPath = TEST_IMAGES_DIRECTORY + "numbers_101.jpg";
+
+        junitExpectedException.expect(Tesseract4OcrException.class);
+        junitExpectedException
+                .expectMessage(MessageFormatUtil.format(Tesseract4OcrException.TESSERACT_FAILED, imgPath));
+
+        super.testEventCountingCustomMetaInfoError();
+    }
+}
diff --git a/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/events/EventCountingTest.java b/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/events/EventCountingTest.java
new file mode 100644
index 0000000..1540b42
--- /dev/null
+++ b/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/events/EventCountingTest.java
@@ -0,0 +1,290 @@
+/*
+    This file is part of the iText (R) project.
+    Copyright (c) 1998-2020 iText Group NV
+    Authors: iText Software.
+
+    This program is offered under a commercial and under the AGPL license.
+    For commercial licensing, contact us at https://itextpdf.com/sales.  For AGPL licensing, see below.
+
+    AGPL licensing:
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+package com.itextpdf.pdfocr.events;
+
+import com.itextpdf.io.util.MessageFormatUtil;
+import com.itextpdf.kernel.counter.EventCounter;
+import com.itextpdf.kernel.counter.EventCounterHandler;
+import com.itextpdf.kernel.counter.IEventCounterFactory;
+import com.itextpdf.kernel.counter.SimpleEventCounterFactory;
+import com.itextpdf.kernel.counter.event.IEvent;
+import com.itextpdf.kernel.counter.event.IMetaInfo;
+import com.itextpdf.kernel.pdf.PdfOutputIntent;
+import com.itextpdf.kernel.pdf.PdfWriter;
+import com.itextpdf.metainfo.TestMetaInfo;
+import com.itextpdf.pdfocr.IntegrationTestHelper;
+import com.itextpdf.pdfocr.OcrPdfCreator;
+import com.itextpdf.pdfocr.OcrPdfCreatorProperties;
+import com.itextpdf.pdfocr.tesseract4.AbstractTesseract4OcrEngine;
+import com.itextpdf.pdfocr.tesseract4.Tesseract4LogMessageConstant;
+import com.itextpdf.pdfocr.tesseract4.Tesseract4OcrEngineProperties;
+import com.itextpdf.pdfocr.tesseract4.Tesseract4OcrException;
+import com.itextpdf.pdfocr.tesseract4.events.PdfOcrTesseract4Event;
+import com.itextpdf.test.annotations.LogMessage;
+import com.itextpdf.test.annotations.LogMessages;
+
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+
+public abstract class EventCountingTest extends IntegrationTestHelper {
+
+    protected static final String PROFILE_FOLDER = "./src/test/resources/com/itextpdf/pdfocr/events/";
+
+    @Rule
+    public ExpectedException junitExpectedException = ExpectedException.none();
+
+    AbstractTesseract4OcrEngine tesseractReader;
+    String testFileTypeName;
+    private boolean isExecutableReaderType;
+
+    public EventCountingTest(ReaderType type) {
+        isExecutableReaderType = type.equals(ReaderType.EXECUTABLE);
+        if (isExecutableReaderType) {
+            testFileTypeName = "executable";
+        } else {
+            testFileTypeName = "lib";
+        }
+        tesseractReader = getTesseractReader(type);
+    }
+
+    @Before
+    public void initTesseractProperties() {
+        Tesseract4OcrEngineProperties ocrEngineProperties =
+                new Tesseract4OcrEngineProperties();
+        ocrEngineProperties.setPathToTessData(getTessDataDirectory());
+        tesseractReader.setTesseract4OcrEngineProperties(ocrEngineProperties);
+    }
+
+    @Test
+    public void testEventCountingPdfEvent() {
+        String imgPath = TEST_IMAGES_DIRECTORY + "numbers_01.jpg";
+        File file = new File(imgPath);
+
+        TestEventCounter eventCounter = new TestEventCounter();
+        IEventCounterFactory factory = new SimpleEventCounterFactory(eventCounter);
+        EventCounterHandler.getInstance().register(factory);
+        try {
+            doImageToPdfOcr(tesseractReader, Arrays.asList(file));
+
+            Assert.assertEquals(1, eventCounter.getEvents().size());
+            Assert.assertSame(PdfOcrTesseract4Event.TESSERACT4_IMAGE_TO_PDF, eventCounter.getEvents().get(0));
+            Assert.assertNull(eventCounter.getMetaInfos().get(0));
+        } finally {
+            EventCounterHandler.getInstance().unregister(factory);
+        }
+    }
+
+    @Test
+    public void testEventCountingSeveralImagesOneImageToPdfEvent() {
+        String imgPath = TEST_IMAGES_DIRECTORY + "numbers_01.jpg";
+        File file = new File(imgPath);
+
+        TestEventCounter eventCounter = new TestEventCounter();
+        IEventCounterFactory factory = new SimpleEventCounterFactory(eventCounter);
+        EventCounterHandler.getInstance().register(factory);
+        try {
+            doImageToPdfOcr(tesseractReader, Arrays.asList(file, file));
+
+            Assert.assertEquals(1, eventCounter.getEvents().size());
+            Assert.assertSame(PdfOcrTesseract4Event.TESSERACT4_IMAGE_TO_PDF, eventCounter.getEvents().get(0));
+            Assert.assertNull(eventCounter.getMetaInfos().get(0));
+        } finally {
+            EventCounterHandler.getInstance().unregister(factory);
+        }
+    }
+
+    @Test
+    public void testEventCountingPdfAEvent() {
+        String imgPath = TEST_IMAGES_DIRECTORY + "numbers_01.jpg";
+        File file = new File(imgPath);
+
+        TestEventCounter eventCounter = new TestEventCounter();
+        IEventCounterFactory factory = new SimpleEventCounterFactory(eventCounter);
+        EventCounterHandler.getInstance().register(factory);
+        try {
+            doImageToPdfAOcr(tesseractReader, Arrays.asList(file));
+
+            Assert.assertEquals(1, eventCounter.getEvents().size());
+            Assert.assertSame(PdfOcrTesseract4Event.TESSERACT4_IMAGE_TO_PDFA, eventCounter.getEvents().get(0));
+            Assert.assertNull(eventCounter.getMetaInfos().get(0));
+        } finally {
+            EventCounterHandler.getInstance().unregister(factory);
+        }
+    }
+
+    @Test
+    public void testEventCountingTwoPdfEvents() {
+        String imgPath = TEST_IMAGES_DIRECTORY + "numbers_01.jpg";
+        File file = new File(imgPath);
+
+        TestEventCounter eventCounter = new TestEventCounter();
+        IEventCounterFactory factory = new SimpleEventCounterFactory(eventCounter);
+        EventCounterHandler.getInstance().register(factory);
+        try {
+            doImageToPdfOcr(tesseractReader, Arrays.asList(file));
+            doImageToPdfOcr(tesseractReader, Arrays.asList(file));
+
+            Assert.assertEquals(2, eventCounter.getEvents().size());
+            for (int i = 0; i < eventCounter.getEvents().size(); i++) {
+                Assert.assertSame(PdfOcrTesseract4Event.TESSERACT4_IMAGE_TO_PDF, eventCounter.getEvents().get(i));
+                Assert.assertNull(eventCounter.getMetaInfos().get(i));
+            }
+        } finally {
+            EventCounterHandler.getInstance().unregister(factory);
+        }
+    }
+
+    @Test
+    public void testEventCountingImageEvent() {
+        String imgPath = TEST_IMAGES_DIRECTORY + "numbers_01.jpg";
+        File file = new File(imgPath);
+
+        TestEventCounter eventCounter = new TestEventCounter();
+        IEventCounterFactory factory = new SimpleEventCounterFactory(eventCounter);
+        EventCounterHandler.getInstance().register(factory);
+        try {
+            doImageOcr(tesseractReader, file);
+
+            Assert.assertEquals(1, eventCounter.getEvents().size());
+            Assert.assertSame(PdfOcrTesseract4Event.TESSERACT4_IMAGE_OCR, eventCounter.getEvents().get(0));
+            Assert.assertNull(eventCounter.getMetaInfos().get(0));
+        } finally {
+            EventCounterHandler.getInstance().unregister(factory);
+        }
+    }
+
+    @Test
+    public void testEventCountingImageEventCustomMetaInfo() {
+        String imgPath = TEST_IMAGES_DIRECTORY + "numbers_01.jpg";
+        File file = new File(imgPath);
+
+        TestEventCounter eventCounter = new TestEventCounter();
+        IEventCounterFactory factory = new SimpleEventCounterFactory(eventCounter);
+        EventCounterHandler.getInstance().register(factory);
+        try {
+            tesseractReader.setThreadLocalMetaInfo(new TestMetaInfo());
+            doImageOcr(tesseractReader, file);
+
+            Assert.assertEquals(1, eventCounter.getEvents().size());
+            Assert.assertSame(PdfOcrTesseract4Event.TESSERACT4_IMAGE_OCR, eventCounter.getEvents().get(0));
+            Assert.assertTrue(eventCounter.getMetaInfos().get(0) instanceof TestMetaInfo);
+        } finally {
+            EventCounterHandler.getInstance().unregister(factory);
+            tesseractReader.setThreadLocalMetaInfo(null);
+        }
+    }
+
+    @Test
+    public void testEventCountingPdfEventCustomMetaInfo() {
+        String imgPath = TEST_IMAGES_DIRECTORY + "numbers_01.jpg";
+        File file = new File(imgPath);
+
+        TestEventCounter eventCounter = new TestEventCounter();
+        IEventCounterFactory factory = new SimpleEventCounterFactory(eventCounter);
+        EventCounterHandler.getInstance().register(factory);
+        try {
+            tesseractReader.setThreadLocalMetaInfo(new TestMetaInfo());
+            doImageToPdfOcr(tesseractReader, Arrays.asList(file));
+
+            Assert.assertEquals(1, eventCounter.getEvents().size());
+            Assert.assertSame(PdfOcrTesseract4Event.TESSERACT4_IMAGE_TO_PDF, eventCounter.getEvents().get(0));
+            Assert.assertTrue(eventCounter.getMetaInfos().get(0) instanceof TestMetaInfo);
+        } finally {
+            EventCounterHandler.getInstance().unregister(factory);
+            tesseractReader.setThreadLocalMetaInfo(null);
+        }
+    }
+
+    public void testEventCountingCustomMetaInfoError() {
+        String imgPath = TEST_IMAGES_DIRECTORY + "numbers_101.jpg";
+        File file = new File(imgPath);
+
+        TestEventCounter eventCounter = new TestEventCounter();
+        IEventCounterFactory factory = new SimpleEventCounterFactory(eventCounter);
+        EventCounterHandler.getInstance().register(factory);
+
+        IMetaInfo metaInfo = new TestMetaInfo();
+        try {
+            tesseractReader.setThreadLocalMetaInfo(metaInfo);
+            doImageToPdfOcr(tesseractReader, Arrays.asList(file));
+        } finally {
+            Assert.assertEquals(metaInfo, tesseractReader.getThreadLocalMetaInfo());
+            EventCounterHandler.getInstance().unregister(factory);
+            tesseractReader.setThreadLocalMetaInfo(null);
+        }
+    }
+
+    private static void doImageOcr(AbstractTesseract4OcrEngine tesseractReader, File imageFile) {
+        tesseractReader.doImageOcr(imageFile);
+    }
+
+    private static void doImageToPdfOcr(AbstractTesseract4OcrEngine tesseractReader, List<File> imageFiles) {
+        OcrPdfCreator ocrPdfCreator = new OcrPdfCreator(tesseractReader);
+        ocrPdfCreator.createPdf(imageFiles, new PdfWriter(new ByteArrayOutputStream()));
+    }
+
+    private static void doImageToPdfAOcr(AbstractTesseract4OcrEngine tesseractReader, List<File> imageFiles) {
+        OcrPdfCreator ocrPdfCreator = new OcrPdfCreator(tesseractReader,
+                new OcrPdfCreatorProperties().setPdfLang("en-US"));
+        InputStream is = null;
+        try {
+            is = new FileInputStream(PROFILE_FOLDER + "sRGB_CS_profile.icm");
+        } catch (FileNotFoundException e) {
+            // No expected
+        }
+        PdfOutputIntent outputIntent = new PdfOutputIntent("Custom", "", "http://www.color.org", "sRGB IEC61966-2.1",
+                is);
+
+        ocrPdfCreator.createPdfA(imageFiles, new PdfWriter(new ByteArrayOutputStream()), outputIntent);
+    }
+
+    private static class TestEventCounter extends EventCounter {
+        private List<IEvent> events = new ArrayList<>();
+        private List<IMetaInfo> metaInfos = new ArrayList<>();
+
+        public List<IEvent> getEvents() {
+            return events;
+        }
+
+        public List<IMetaInfo> getMetaInfos() {
+            return metaInfos;
+        }
+
+        @Override
+        protected void onEvent(IEvent event, IMetaInfo metaInfo) {
+            this.events.add(event);
+            this.metaInfos.add(metaInfo);
+        }
+    }
+}
diff --git a/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/events/PdfOcrTesseract4EventTest.java b/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/events/PdfOcrTesseract4EventTest.java
new file mode 100644
index 0000000..1da58aa
--- /dev/null
+++ b/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/events/PdfOcrTesseract4EventTest.java
@@ -0,0 +1,59 @@
+/*
+    This file is part of the iText (R) project.
+    Copyright (c) 1998-2020 iText Group NV
+    Authors: iText Software.
+
+    This program is offered under a commercial and under the AGPL license.
+    For commercial licensing, contact us at https://itextpdf.com/sales.  For AGPL licensing, see below.
+
+    AGPL licensing:
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+package com.itextpdf.pdfocr.events;
+
+import com.itextpdf.pdfocr.IntegrationTestHelper;
+import com.itextpdf.pdfocr.tesseract4.events.PdfOcrTesseract4Event;
+import com.itextpdf.test.annotations.type.UnitTest;
+
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+@Category(UnitTest.class)
+public class PdfOcrTesseract4EventTest extends IntegrationTestHelper {
+
+    private static final String PDF_OCR_TESSERACT4_ORIGIN_ID = "com.itextpdf.pdfocr.tesseract4";
+
+    @Test
+    public void testEventTypes() {
+        String[] expectedTypes = {"pdfOcr-tesseract4-image-ocr", "pdfOcr-tesseract4-image-to-pdf", "pdfOcr-tesseract4-image-to-pdfa"};
+        PdfOcrTesseract4Event[] testedEvents = {PdfOcrTesseract4Event.TESSERACT4_IMAGE_OCR,
+                PdfOcrTesseract4Event.TESSERACT4_IMAGE_TO_PDF, PdfOcrTesseract4Event.TESSERACT4_IMAGE_TO_PDFA};
+
+        for (int i = 0; i < testedEvents.length; i++) {
+            Assert.assertEquals(expectedTypes[i], testedEvents[i].getEventType());
+        }
+    }
+
+    @Test
+    public void testOriginId() {
+        String expected = PDF_OCR_TESSERACT4_ORIGIN_ID;
+        PdfOcrTesseract4Event[] testedEvents = {PdfOcrTesseract4Event.TESSERACT4_IMAGE_TO_PDF,
+                PdfOcrTesseract4Event.TESSERACT4_IMAGE_TO_PDF, PdfOcrTesseract4Event.TESSERACT4_IMAGE_TO_PDFA};
+
+        for (PdfOcrTesseract4Event event : testedEvents) {
+            Assert.assertEquals(expected, event.getOriginId());
+        }
+    }
+}
diff --git a/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/events/multithreading/DoImageOcrRunnable.java b/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/events/multithreading/DoImageOcrRunnable.java
new file mode 100644
index 0000000..ea66eaa
--- /dev/null
+++ b/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/events/multithreading/DoImageOcrRunnable.java
@@ -0,0 +1,63 @@
+/*
+    This file is part of the iText (R) project.
+    Copyright (c) 1998-2020 iText Group NV
+    Authors: iText Software.
+
+    This program is offered under a commercial and under the AGPL license.
+    For commercial licensing, contact us at https://itextpdf.com/sales.  For AGPL licensing, see below.
+
+    AGPL licensing:
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+package com.itextpdf.pdfocr.events.multithreading;
+
+import com.itextpdf.kernel.counter.event.IMetaInfo;
+import com.itextpdf.kernel.pdf.PdfWriter;
+import com.itextpdf.pdfocr.OcrPdfCreator;
+import com.itextpdf.pdfocr.tesseract4.AbstractTesseract4OcrEngine;
+import com.itextpdf.pdfocr.tesseract4.OutputFormat;
+
+import java.io.File;
+import java.util.Arrays;
+
+public class DoImageOcrRunnable implements Runnable {
+    private AbstractTesseract4OcrEngine tesseractReader;
+    private File imgFile;
+    private File outputFile;
+    private boolean createPdf;
+    private IMetaInfo metaInfo;
+
+    DoImageOcrRunnable(AbstractTesseract4OcrEngine tesseractReader, IMetaInfo metaInfo, File imgFile, File outputFile, boolean createPdf) {
+        this.tesseractReader = tesseractReader;
+        this.metaInfo = metaInfo;
+        this.imgFile = imgFile;
+        this.outputFile = outputFile;
+        this.createPdf = createPdf;
+    }
+
+    public void run() {
+        try {
+            tesseractReader.setThreadLocalMetaInfo(metaInfo);
+            if (createPdf) {
+                new OcrPdfCreator(tesseractReader).createPdf(Arrays.asList(imgFile), new PdfWriter(outputFile));
+            } else {
+                tesseractReader.doTesseractOcr(imgFile, outputFile, OutputFormat.TXT);
+            }
+            // for test purposes
+            System.out.println(imgFile.getName());
+        } catch (Exception e) {
+            throw new RuntimeException(e.getMessage());
+        }
+    }
+}
diff --git a/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/events/multithreading/MultiThreadingExecutableTest.java b/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/events/multithreading/MultiThreadingExecutableTest.java
new file mode 100644
index 0000000..2321017
--- /dev/null
+++ b/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/events/multithreading/MultiThreadingExecutableTest.java
@@ -0,0 +1,34 @@
+/*
+    This file is part of the iText (R) project.
+    Copyright (c) 1998-2020 iText Group NV
+    Authors: iText Software.
+
+    This program is offered under a commercial and under the AGPL license.
+    For commercial licensing, contact us at https://itextpdf.com/sales.  For AGPL licensing, see below.
+
+    AGPL licensing:
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+package com.itextpdf.pdfocr.events.multithreading;
+
+import com.itextpdf.test.annotations.type.IntegrationTest;
+
+import org.junit.experimental.categories.Category;
+
+@Category(IntegrationTest.class)
+public class MultiThreadingExecutableTest extends MultiThreadingTest {
+    public MultiThreadingExecutableTest() {
+        super(ReaderType.EXECUTABLE);
+    }
+}
diff --git a/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/events/multithreading/MultiThreadingTest.java b/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/events/multithreading/MultiThreadingTest.java
new file mode 100644
index 0000000..3a36dee
--- /dev/null
+++ b/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/events/multithreading/MultiThreadingTest.java
@@ -0,0 +1,148 @@
+/*
+    This file is part of the iText (R) project.
+    Copyright (c) 1998-2020 iText Group NV
+    Authors: iText Software.
+
+    This program is offered under a commercial and under the AGPL license.
+    For commercial licensing, contact us at https://itextpdf.com/sales.  For AGPL licensing, see below.
+
+    AGPL licensing:
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+package com.itextpdf.pdfocr.events.multithreading;
+
+import com.itextpdf.kernel.counter.EventCounter;
+import com.itextpdf.kernel.counter.EventCounterHandler;
+import com.itextpdf.kernel.counter.IEventCounterFactory;
+import com.itextpdf.kernel.counter.SimpleEventCounterFactory;
+import com.itextpdf.kernel.counter.event.IEvent;
+import com.itextpdf.kernel.counter.event.IMetaInfo;
+import com.itextpdf.metainfo.TestMetaInfo;
+import com.itextpdf.pdfocr.IntegrationTestHelper;
+import com.itextpdf.pdfocr.tesseract4.AbstractTesseract4OcrEngine;
+import com.itextpdf.pdfocr.tesseract4.Tesseract4OcrEngineProperties;
+import com.itextpdf.pdfocr.tesseract4.events.PdfOcrTesseract4Event;
+import com.itextpdf.test.annotations.type.IntegrationTest;
+
+import java.io.File;
+import java.util.ArrayList;
+import java.util.List;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.rules.ExpectedException;
+
+@Category(IntegrationTest.class)
+public abstract class MultiThreadingTest extends IntegrationTestHelper {
+    protected static final String destinationFolder = "./target/test/com/itextpdf/pdfocr/events/multithreading/";
+    protected static final String sourceFolder = "./src/test/resources/com/itextpdf/pdfocr/events/multithreading/";
+
+    AbstractTesseract4OcrEngine tesseractReader;
+
+    @Rule
+    public ExpectedException junitExpectedException = ExpectedException.none();
+
+    public MultiThreadingTest(ReaderType type) {
+        tesseractReader = getTesseractReader(type);
+    }
+
+    @BeforeClass
+    public static void beforeClass() {
+        createDestinationFolder(destinationFolder);
+    }
+
+    @Before
+    public void initTesseractProperties() {
+        Tesseract4OcrEngineProperties ocrEngineProperties =
+                new Tesseract4OcrEngineProperties();
+        ocrEngineProperties.setPathToTessData(new File(sourceFolder + "../../tessdata/"));
+        tesseractReader.setTesseract4OcrEngineProperties(ocrEngineProperties);
+    }
+
+    @Test
+    public void testEventCountingPdfEvent() throws InterruptedException {
+        TestEventCounter eventCounter = new TestEventCounter();
+        IEventCounterFactory factory = new SimpleEventCounterFactory(eventCounter);
+        EventCounterHandler.getInstance().register(factory);
+        try {
+            int n = 16;
+            IMetaInfo metainfo = new TestMetaInfo();
+            Thread[] threads = new Thread[n];
+            for (int i = 0; i < n; i++) {
+                // We do not use Runnable as the variable's type because of porting issues
+                DoImageOcrRunnable runnable = new DoImageOcrRunnable(
+                        tesseractReader,
+                        metainfo,
+                        new File(sourceFolder + "numbers_01.jpg"),
+                        new File(destinationFolder + "ocr-result-" + (i + 1) + ".txt"),
+                        0 == i % 2);
+                threads[i] = getThread(runnable);
+            }
+            for (int i = 0; i < n; i++) {
+                threads[i].start();
+
+                // The test will pass in sequential mode, i.e. if the following line is uncommented
+                //threads[i].join();
+            }
+            for (int i = 0; i < n; i++) {
+                threads[i].join();
+            }
+
+            Assert.assertEquals(n, eventCounter.getEvents().size());
+            int expectedPdfEvents = n / 2;
+            int expectedImageEvents = n - expectedPdfEvents;
+            int foundPdfEvents = 0;
+            int foundImageEvents = 0;
+            for (int i = 0; i < n; i++) {
+                if (PdfOcrTesseract4Event.TESSERACT4_IMAGE_TO_PDF == eventCounter.getEvents().get(i)) {
+                    foundPdfEvents++;
+                } else if (PdfOcrTesseract4Event.TESSERACT4_IMAGE_OCR == eventCounter.getEvents().get(i)) {
+                    foundImageEvents++;
+                }
+                Assert.assertEquals(metainfo, eventCounter.getMetaInfos().get(i));
+            }
+            Assert.assertEquals(expectedImageEvents, foundImageEvents);
+            Assert.assertEquals(expectedPdfEvents, foundPdfEvents);
+        } finally {
+            EventCounterHandler.getInstance().unregister(factory);
+        }
+    }
+
+    private static Thread getThread(DoImageOcrRunnable runnable) {
+        return new Thread(runnable);
+    }
+
+    public static class TestEventCounter extends EventCounter {
+        private List<IEvent> events = new ArrayList<>();
+        private List<IMetaInfo> metaInfos = new ArrayList<>();
+
+        public List<IEvent> getEvents() {
+            return events;
+        }
+
+        public List<IMetaInfo> getMetaInfos() {
+            return metaInfos;
+        }
+
+        @Override
+        protected void onEvent(IEvent event, IMetaInfo metaInfo) {
+            this.events.add(event);
+            this.metaInfos.add(metaInfo);
+        }
+    }
+
+}
diff --git a/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/general/BasicTesseractIntegrationExecutableTest.java b/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/general/BasicTesseractIntegrationExecutableTest.java
new file mode 100644
index 0000000..a6cb20f
--- /dev/null
+++ b/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/general/BasicTesseractIntegrationExecutableTest.java
@@ -0,0 +1,34 @@
+/*
+    This file is part of the iText (R) project.
+    Copyright (c) 1998-2020 iText Group NV
+    Authors: iText Software.
+
+    This program is offered under a commercial and under the AGPL license.
+    For commercial licensing, contact us at https://itextpdf.com/sales.  For AGPL licensing, see below.
+
+    AGPL licensing:
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+package com.itextpdf.pdfocr.general;
+
+import com.itextpdf.test.annotations.type.IntegrationTest;
+
+import org.junit.experimental.categories.Category;
+
+@Category(IntegrationTest.class)
+public class BasicTesseractIntegrationExecutableTest extends BasicTesseractIntegrationTest {
+    public BasicTesseractIntegrationExecutableTest() {
+        super(ReaderType.EXECUTABLE);
+    }
+}
diff --git a/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/general/BasicTesseractIntegrationLibTest.java b/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/general/BasicTesseractIntegrationLibTest.java
new file mode 100644
index 0000000..4d2b0f4
--- /dev/null
+++ b/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/general/BasicTesseractIntegrationLibTest.java
@@ -0,0 +1,34 @@
+/*
+    This file is part of the iText (R) project.
+    Copyright (c) 1998-2020 iText Group NV
+    Authors: iText Software.
+
+    This program is offered under a commercial and under the AGPL license.
+    For commercial licensing, contact us at https://itextpdf.com/sales.  For AGPL licensing, see below.
+
+    AGPL licensing:
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+package com.itextpdf.pdfocr.general;
+
+import com.itextpdf.test.annotations.type.IntegrationTest;
+
+import org.junit.experimental.categories.Category;
+
+@Category(IntegrationTest.class)
+public class BasicTesseractIntegrationLibTest extends BasicTesseractIntegrationTest {
+    public BasicTesseractIntegrationLibTest() {
+        super(ReaderType.LIB);
+    }
+}
diff --git a/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/general/BasicTesseractIntegrationTest.java b/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/general/BasicTesseractIntegrationTest.java
new file mode 100644
index 0000000..0bf1cf2
--- /dev/null
+++ b/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/general/BasicTesseractIntegrationTest.java
@@ -0,0 +1,465 @@
+/*
+    This file is part of the iText (R) project.
+    Copyright (c) 1998-2020 iText Group NV
+    Authors: iText Software.
+
+    This program is offered under a commercial and under the AGPL license.
+    For commercial licensing, contact us at https://itextpdf.com/sales.  For AGPL licensing, see below.
+
+    AGPL licensing:
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+package com.itextpdf.pdfocr.general;
+
+import com.itextpdf.io.source.ByteArrayOutputStream;
+import com.itextpdf.io.util.MessageFormatUtil;
+import com.itextpdf.kernel.colors.DeviceCmyk;
+import com.itextpdf.kernel.pdf.PdfDocument;
+import com.itextpdf.kernel.pdf.PdfReader;
+import com.itextpdf.kernel.pdf.PdfWriter;
+import com.itextpdf.kernel.pdf.WriterProperties;
+import com.itextpdf.kernel.pdf.canvas.parser.PdfCanvasProcessor;
+import com.itextpdf.pdfocr.IntegrationTestHelper;
+import com.itextpdf.pdfocr.IOcrEngine;
+import com.itextpdf.pdfocr.OcrPdfCreator;
+import com.itextpdf.pdfocr.OcrPdfCreatorProperties;
+import com.itextpdf.pdfocr.TextInfo;
+import com.itextpdf.pdfocr.tesseract4.AbstractTesseract4OcrEngine;
+import com.itextpdf.pdfocr.tesseract4.OutputFormat;
+import com.itextpdf.pdfocr.tesseract4.Tesseract4LogMessageConstant;
+import com.itextpdf.pdfocr.tesseract4.Tesseract4OcrEngineProperties;
+import com.itextpdf.pdfocr.tesseract4.Tesseract4OcrException;
+import com.itextpdf.pdfocr.tesseract4.TesseractHelper;
+import com.itextpdf.test.annotations.LogMessage;
+import com.itextpdf.test.annotations.LogMessages;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+
+public abstract class BasicTesseractIntegrationTest extends IntegrationTestHelper {
+
+    @Rule
+    public ExpectedException junitExpectedException = ExpectedException.none();
+
+    AbstractTesseract4OcrEngine tesseractReader;
+
+    public BasicTesseractIntegrationTest(ReaderType type) {
+        tesseractReader = getTesseractReader(type);
+    }
+
+    @Before
+    public void initTesseractProperties() {
+        Tesseract4OcrEngineProperties ocrEngineProperties =
+                new Tesseract4OcrEngineProperties();
+        ocrEngineProperties.setPathToTessData(getTessDataDirectory());
+        tesseractReader.setTesseract4OcrEngineProperties(ocrEngineProperties);
+    }
+
+    @Test
+    public void testFontColorInMultiPagePdf() throws IOException {
+        String testName = "testFontColorInMultiPagePdf";
+        String path = TEST_IMAGES_DIRECTORY + "multîpage.tiff";
+        String pdfPath = getTargetDirectory() + testName + ".pdf";
+        File file = new File(path);
+
+        tesseractReader.setTesseract4OcrEngineProperties(
+                tesseractReader.getTesseract4OcrEngineProperties()
+                        .setPreprocessingImages(false));
+        OcrPdfCreatorProperties ocrPdfCreatorProperties = new OcrPdfCreatorProperties();
+        ocrPdfCreatorProperties.setTextLayerName("Text1");
+        com.itextpdf.kernel.colors.Color color = DeviceCmyk.MAGENTA;
+        ocrPdfCreatorProperties.setTextColor(color);
+
+        OcrPdfCreator ocrPdfCreator = new OcrPdfCreator(tesseractReader,
+                ocrPdfCreatorProperties);
+        PdfDocument doc =
+                ocrPdfCreator.createPdf(Collections.<File>singletonList(file), getPdfWriter(pdfPath));
+
+        Assert.assertNotNull(doc);
+        doc.close();
+
+        PdfDocument pdfDocument = new PdfDocument(new PdfReader(pdfPath));
+
+        ExtractionStrategy strategy = new ExtractionStrategy("Text1");
+        PdfCanvasProcessor processor = new PdfCanvasProcessor(strategy);
+
+        processor.processPageContent(pdfDocument.getPage(1));
+
+        com.itextpdf.kernel.colors.Color fillColor = strategy.getFillColor();
+        Assert.assertEquals(fillColor, color);
+
+        pdfDocument.close();
+    }
+
+    @Test
+    public void testNoisyImage() {
+        String path = TEST_IMAGES_DIRECTORY + "tèst/noisy_01.png";
+        String expectedOutput1 = "Noisyimage to test Tesseract OCR";
+        String expectedOutput2 = "Noisy image to test Tesseract OCR";
+
+        String realOutputHocr = getTextUsingTesseractFromImage(tesseractReader,
+                new File(path));
+        Assert.assertTrue(realOutputHocr.equals(expectedOutput1) ||
+                realOutputHocr.equals(expectedOutput2));
+    }
+
+    @Test
+    public void testPantoneImage() {
+        String filePath = TEST_IMAGES_DIRECTORY + "pantone_blue.jpg";
+        String expected = "";
+
+        String realOutputHocr = getTextUsingTesseractFromImage(tesseractReader,
+                new File(filePath));
+        Assert.assertEquals(expected, realOutputHocr);
+    }
+
+    @Test
+    public void testDifferentTextStyles() {
+        String path = TEST_IMAGES_DIRECTORY + "example_04.png";
+        String expectedOutput = "How about a bigger font?";
+
+        testImageOcrText(tesseractReader, path, expectedOutput);
+    }
+
+    @Test
+    public void testImageWithoutText() throws IOException {
+        String testName = "testImageWithoutText";
+        String filePath = TEST_IMAGES_DIRECTORY + "pantone_blue.jpg";
+        String pdfPath = getTargetDirectory() + testName + ".pdf";
+        File file = new File(filePath);
+
+        OcrPdfCreator ocrPdfCreator = new OcrPdfCreator(tesseractReader);
+
+        ocrPdfCreator.createPdf(Collections.<File>singletonList(file),
+                        new PdfWriter(pdfPath)).close();
+
+        PdfDocument pdfDocument = new PdfDocument(new PdfReader(pdfPath));
+
+        ExtractionStrategy strategy = new ExtractionStrategy("Text Layer");
+        PdfCanvasProcessor processor = new PdfCanvasProcessor(strategy);
+
+        processor.processPageContent(pdfDocument.getFirstPage());
+        pdfDocument.close();
+
+        Assert.assertEquals("", strategy.getResultantText());
+    }
+
+    @LogMessages(messages = {
+        @LogMessage(messageTemplate = Tesseract4LogMessageConstant.CANNOT_READ_INPUT_IMAGE, count = 1)
+    })
+    @Test
+    public void testInputInvalidImage() {
+        junitExpectedException.expect(Tesseract4OcrException.class);
+        junitExpectedException.expectMessage(MessageFormatUtil
+                .format(Tesseract4OcrException.INCORRECT_INPUT_IMAGE_FORMAT,
+                        "txt"));
+
+        File file1 = new File(TEST_IMAGES_DIRECTORY + "example.txt");
+        File file2 = new File(TEST_IMAGES_DIRECTORY
+                + "example_05_corrupted.bmp");
+        File file3 = new File(TEST_IMAGES_DIRECTORY
+                + "numbers_02.jpg");
+        tesseractReader.setTesseract4OcrEngineProperties(
+                tesseractReader.getTesseract4OcrEngineProperties()
+                        .setPathToTessData(getTessDataDirectory()));
+        OcrPdfCreator ocrPdfCreator = new OcrPdfCreator(tesseractReader);
+
+        ocrPdfCreator.createPdf(Arrays.<File>asList(file3, file1, file2, file3), getPdfWriter());
+    }
+
+    @Test
+    public void testNonAsciiImagePath() {
+        String path = TEST_IMAGES_DIRECTORY + "tèst/noisy_01.png";
+        String expectedOutput1 = "Noisyimage to test Tesseract OCR";
+        String expectedOutput2 = "Noisy image to test Tesseract OCR";
+
+        String realOutputHocr = getTextUsingTesseractFromImage(tesseractReader,
+                new File(path));
+        Assert.assertTrue(realOutputHocr.equals(expectedOutput1) ||
+                realOutputHocr.equals(expectedOutput2));
+    }
+
+    @Test
+    public void testNonAsciiImageName() {
+        String path = TEST_IMAGES_DIRECTORY + "nümbérs.jpg";
+        String expectedOutput = "619121";
+
+        String realOutputHocr = getTextUsingTesseractFromImage(tesseractReader,
+                new File(path));
+        Assert.assertTrue(realOutputHocr.equals(expectedOutput));
+    }
+
+    @Test
+    public void testNullPathToTessData() {
+        junitExpectedException.expect(Tesseract4OcrException.class);
+        junitExpectedException.expectMessage(Tesseract4OcrException.PATH_TO_TESS_DATA_DIRECTORY_IS_INVALID);
+        File file = new File(TEST_IMAGES_DIRECTORY + "spanish_01.jpg");
+        tesseractReader.setTesseract4OcrEngineProperties(
+                tesseractReader.getTesseract4OcrEngineProperties()
+                        .setPathToTessData(null));
+        getTextFromPdf(tesseractReader, file, Collections.<String>singletonList("eng"));
+    }
+
+    @Test
+    public void testPathToTessDataWithoutData() {
+        junitExpectedException.expect(Tesseract4OcrException.class);
+        junitExpectedException.expectMessage(Tesseract4OcrException.PATH_TO_TESS_DATA_DIRECTORY_IS_INVALID);
+
+        File file = new File(TEST_IMAGES_DIRECTORY + "spanish_01.jpg");
+        tesseractReader.setTesseract4OcrEngineProperties(
+                tesseractReader.getTesseract4OcrEngineProperties()
+                        .setPathToTessData(new File("test/")));
+        getTextFromPdf(tesseractReader, file, Collections.<String>singletonList("eng"));
+    }
+
+    @LogMessages(messages = {
+        @LogMessage(messageTemplate = Tesseract4OcrException.INCORRECT_LANGUAGE)
+    })
+    @Test
+    public void testEmptyPathToTessData() {
+        junitExpectedException.expect(Tesseract4OcrException.class);
+        junitExpectedException.expectMessage(MessageFormatUtil
+                .format(Tesseract4OcrException.INCORRECT_LANGUAGE,
+                        "eng.traineddata",
+                        new File(".").getAbsolutePath()));
+
+        File file = new File(TEST_IMAGES_DIRECTORY + "spanish_01.jpg");
+        Tesseract4OcrEngineProperties properties = tesseractReader.getTesseract4OcrEngineProperties()
+                .setPathToTessData(new File("."));
+        tesseractReader.setTesseract4OcrEngineProperties(properties);
+        getTextFromPdf(tesseractReader, file);
+
+        Assert.assertEquals(new File("").getAbsolutePath(),
+                tesseractReader.getTesseract4OcrEngineProperties()
+                        .getPathToTessData().getAbsolutePath());
+    }
+
+    @Test
+    public void testTxtStringOutput() {
+        File file = new File(TEST_IMAGES_DIRECTORY + "multîpage.tiff");
+        List<String> expectedOutput = Arrays.<String>asList(
+                "Multipage\nTIFF\nExample\nPage 1",
+                "Multipage\nTIFF\nExample\nPage 2",
+                "Multipage\nTIFF\nExample\nPage 4",
+                "Multipage\nTIFF\nExample\nPage 5",
+                "Multipage\nTIFF\nExample\nPage 6",
+                "Multipage\nTIFF\nExample\nPage /",
+                "Multipage\nTIFF\nExample\nPage 8",
+                "Multipage\nTIFF\nExample\nPage 9"
+        );
+
+        String result = tesseractReader.doImageOcr(file, OutputFormat.TXT);
+        for (String line : expectedOutput) {
+            Assert.assertTrue(result.replaceAll("\r", "").contains(line));
+        }
+    }
+
+    @Test
+    public void testHocrStringOutput() {
+        File file = new File(TEST_IMAGES_DIRECTORY + "multîpage.tiff");
+        List<String> expectedOutput = Arrays.<String>asList(
+                "Multipage\nTIFF\nExample\nPage 1",
+                "Multipage\nTIFF\nExample\nPage 2",
+                "Multipage\nTIFF\nExample\nPage 4",
+                "Multipage\nTIFF\nExample\nPage 5",
+                "Multipage\nTIFF\nExample\nPage 6",
+                "Multipage\nTIFF\nExample\nPage /",
+                "Multipage\nTIFF\nExample\nPage 8",
+                "Multipage\nTIFF\nExample\nPage 9"
+        );
+
+        String result = tesseractReader.doImageOcr(file, OutputFormat.HOCR);
+        for (String line : expectedOutput) {
+            Assert.assertTrue(result.replaceAll("\r", "").contains(line));
+        }
+    }
+
+    @LogMessages(messages = {
+            @LogMessage(messageTemplate = Tesseract4OcrException.INCORRECT_LANGUAGE,
+                    count = 1)
+    })
+    @Test
+    public void testIncorrectLanguage() {
+        junitExpectedException.expect(Tesseract4OcrException.class);
+        junitExpectedException.expectMessage(MessageFormatUtil
+                .format(Tesseract4OcrException.INCORRECT_LANGUAGE,
+                        "spa_new.traineddata",
+                        new File(LANG_TESS_DATA_DIRECTORY).getAbsolutePath()));
+        File file = new File(TEST_IMAGES_DIRECTORY + "spanish_01.jpg");
+        getTextFromPdf(tesseractReader, file, Collections.<String>singletonList("spa_new"));
+    }
+
+    @LogMessages(messages = {
+            @LogMessage(messageTemplate = Tesseract4OcrException.INCORRECT_LANGUAGE,
+                    count = 1)
+    })
+    @Test
+    public void testListOfLanguagesWithOneIncorrectLanguage() {
+        junitExpectedException.expect(Tesseract4OcrException.class);
+        junitExpectedException.expectMessage(MessageFormatUtil
+                .format(Tesseract4OcrException.INCORRECT_LANGUAGE,
+                        "spa_new.traineddata",
+                        new File(LANG_TESS_DATA_DIRECTORY).getAbsolutePath()));
+        File file = new File(TEST_IMAGES_DIRECTORY + "spanish_01.jpg");
+        getTextFromPdf(tesseractReader, file, Arrays.<String>asList("spa", "spa_new", "spa_old"));
+    }
+
+    @LogMessages(messages = {
+            @LogMessage(messageTemplate = Tesseract4OcrException.INCORRECT_LANGUAGE,
+                    count = 1)
+    })
+    @Test
+    public void testIncorrectScriptsName() {
+        junitExpectedException.expect(Tesseract4OcrException.class);
+        junitExpectedException.expectMessage(MessageFormatUtil
+                .format(Tesseract4OcrException.INCORRECT_LANGUAGE,
+                        "English.traineddata",
+                        new File(SCRIPT_TESS_DATA_DIRECTORY).getAbsolutePath()));
+
+        File file = new File(TEST_IMAGES_DIRECTORY + "spanish_01.jpg");
+        tesseractReader.setTesseract4OcrEngineProperties(
+                tesseractReader.getTesseract4OcrEngineProperties()
+                        .setPathToTessData(new File(SCRIPT_TESS_DATA_DIRECTORY)));
+        getTextFromPdf(tesseractReader, file, Collections.<String>singletonList("English"));
+    }
+
+    @LogMessages(messages = {
+            @LogMessage(messageTemplate = Tesseract4OcrException.INCORRECT_LANGUAGE, count
+                    = 1)
+    })
+    @Test
+    public void testListOfScriptsWithOneIncorrect() {
+        junitExpectedException.expect(Tesseract4OcrException.class);
+        junitExpectedException.expectMessage(MessageFormatUtil
+                .format(Tesseract4OcrException.INCORRECT_LANGUAGE,
+                        "English.traineddata",
+                        new File(SCRIPT_TESS_DATA_DIRECTORY).getAbsolutePath()));
+
+        File file = new File(TEST_IMAGES_DIRECTORY + "spanish_01.jpg");
+        tesseractReader.setTesseract4OcrEngineProperties(
+                tesseractReader.getTesseract4OcrEngineProperties()
+                        .setPathToTessData(new File(SCRIPT_TESS_DATA_DIRECTORY)));
+        getTextFromPdf(tesseractReader, file,
+                Arrays.<String>asList("Georgian", "Japanese", "English"));
+    }
+
+    @Test
+    public void testTesseract4OcrForOnePageWithHocrFormat()
+            throws IOException {
+        String path = TEST_IMAGES_DIRECTORY + "numbers_01.jpg";
+        String expected = "619121";
+        File imgFile = new File(path);
+        File outputFile = new File(getTargetDirectory()
+                + "testTesseract4OcrForOnePage.hocr");
+
+        tesseractReader.doTesseractOcr(imgFile, outputFile, OutputFormat.HOCR);
+        Map<Integer, List<TextInfo>> pageData = TesseractHelper
+                .parseHocrFile(Collections.<File>singletonList(outputFile),
+                        tesseractReader
+                                .getTesseract4OcrEngineProperties()
+                                .getTextPositioning()
+                );
+
+        String result = getTextFromPage(pageData.get(1));
+        Assert.assertEquals(expected, result.trim());
+    }
+
+    @Test
+    public void testTesseract4OcrForOnePageWithTxtFormat() {
+        String path = TEST_IMAGES_DIRECTORY + "numbers_01.jpg";
+        String expected = "619121";
+        File imgFile = new File(path);
+        File outputFile = new File(getTargetDirectory()
+                + "testTesseract4OcrForOnePage.txt");
+
+        tesseractReader.doTesseractOcr(imgFile, outputFile, OutputFormat.TXT);
+
+        String result = getTextFromTextFile(outputFile);
+        Assert.assertTrue(result.contains(expected));
+    }
+
+    @Test
+    public void testSimpleTextOutput() {
+        String imgPath = TEST_IMAGES_DIRECTORY + "numbers_01.jpg";
+        String expectedOutput = "619121";
+
+        Assert.assertTrue(
+                getRecognizedTextFromTextFile(tesseractReader, imgPath)
+                        .contains(expectedOutput));
+    }
+
+    /**
+     * Parse text from image and compare with expected.
+     */
+    private void testImageOcrText(AbstractTesseract4OcrEngine tesseractReader, String path,
+                                  String expectedOutput) {
+        File ex1 = new File(path);
+
+        String realOutputHocr = getTextUsingTesseractFromImage(tesseractReader,
+                ex1);
+        Assert.assertTrue(realOutputHocr.contains(expectedOutput));
+    }
+
+    /**
+     * Parse text from given image using tesseract.
+     */
+    private String getTextUsingTesseractFromImage(IOcrEngine tesseractReader,
+                                                  File file) {
+        int page = 1;
+        Map<Integer, List<TextInfo>> data = tesseractReader.doImageOcr(file);
+        List<TextInfo> pageText = data.get(page);
+
+        if (pageText == null || pageText.size() == 0) {
+            pageText = new ArrayList<TextInfo>();
+            TextInfo textInfo = new TextInfo();
+            textInfo.setBbox(Arrays.<Float>asList(0f, 0f, 0f, 0f));
+            textInfo.setText("");
+            pageText.add(textInfo);
+        }
+
+        return getTextFromPage(pageText);
+    }
+
+    /**
+     * Concatenates provided text items to one string.
+     */
+    private String getTextFromPage(List<TextInfo> pageText) {
+        Assert.assertEquals(4,
+                pageText.get(0).getBbox().size());
+
+        StringBuilder stringBuilder = new StringBuilder();
+        for (TextInfo text : pageText) {
+            stringBuilder.append(text.getText());
+            stringBuilder.append(" ");
+        }
+        return stringBuilder.toString().trim();
+    }
+
+    /**
+     * Create pdfWriter.
+     */
+    private PdfWriter getPdfWriter() {
+        return new PdfWriter(new ByteArrayOutputStream(), new WriterProperties().addUAXmpMetadata());
+    }
+}
diff --git a/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/imageformats/ImageFormatIntegrationExecutableTest.java b/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/imageformats/ImageFormatIntegrationExecutableTest.java
new file mode 100644
index 0000000..7eeaebb
--- /dev/null
+++ b/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/imageformats/ImageFormatIntegrationExecutableTest.java
@@ -0,0 +1,34 @@
+/*
+    This file is part of the iText (R) project.
+    Copyright (c) 1998-2020 iText Group NV
+    Authors: iText Software.
+
+    This program is offered under a commercial and under the AGPL license.
+    For commercial licensing, contact us at https://itextpdf.com/sales.  For AGPL licensing, see below.
+
+    AGPL licensing:
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+package com.itextpdf.pdfocr.imageformats;
+
+import com.itextpdf.test.annotations.type.IntegrationTest;
+
+import org.junit.experimental.categories.Category;
+
+@Category(IntegrationTest.class)
+public class ImageFormatIntegrationExecutableTest extends ImageFormatIntegrationTest {
+    public ImageFormatIntegrationExecutableTest() {
+        super(ReaderType.EXECUTABLE);
+    }
+}
diff --git a/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/imageformats/ImageFormatIntegrationLibTest.java b/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/imageformats/ImageFormatIntegrationLibTest.java
new file mode 100644
index 0000000..15c21cc
--- /dev/null
+++ b/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/imageformats/ImageFormatIntegrationLibTest.java
@@ -0,0 +1,34 @@
+/*
+    This file is part of the iText (R) project.
+    Copyright (c) 1998-2020 iText Group NV
+    Authors: iText Software.
+
+    This program is offered under a commercial and under the AGPL license.
+    For commercial licensing, contact us at https://itextpdf.com/sales.  For AGPL licensing, see below.
+
+    AGPL licensing:
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+package com.itextpdf.pdfocr.imageformats;
+
+import com.itextpdf.test.annotations.type.IntegrationTest;
+
+import org.junit.experimental.categories.Category;
+
+@Category(IntegrationTest.class)
+public class ImageFormatIntegrationLibTest extends ImageFormatIntegrationTest {
+    public ImageFormatIntegrationLibTest() {
+        super(ReaderType.LIB);
+    }
+}
diff --git a/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/imageformats/ImageFormatIntegrationTest.java b/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/imageformats/ImageFormatIntegrationTest.java
new file mode 100644
index 0000000..3550bac
--- /dev/null
+++ b/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/imageformats/ImageFormatIntegrationTest.java
@@ -0,0 +1,318 @@
+/*
+    This file is part of the iText (R) project.
+    Copyright (c) 1998-2020 iText Group NV
+    Authors: iText Software.
+
+    This program is offered under a commercial and under the AGPL license.
+    For commercial licensing, contact us at https://itextpdf.com/sales.  For AGPL licensing, see below.
+
+    AGPL licensing:
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+package com.itextpdf.pdfocr.imageformats;
+
+import com.itextpdf.io.util.MessageFormatUtil;
+import com.itextpdf.kernel.colors.DeviceCmyk;
+import com.itextpdf.kernel.utils.CompareTool;
+import com.itextpdf.pdfocr.IntegrationTestHelper;
+import com.itextpdf.pdfocr.tesseract4.AbstractTesseract4OcrEngine;
+import com.itextpdf.pdfocr.tesseract4.Tesseract4LogMessageConstant;
+import com.itextpdf.pdfocr.tesseract4.Tesseract4OcrEngineProperties;
+import com.itextpdf.pdfocr.tesseract4.Tesseract4OcrException;
+import com.itextpdf.pdfocr.tesseract4.TextPositioning;
+import com.itextpdf.test.annotations.LogMessage;
+import com.itextpdf.test.annotations.LogMessages;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Collections;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+
+public abstract class ImageFormatIntegrationTest extends IntegrationTestHelper {
+
+    @Rule
+    public ExpectedException junitExpectedException = ExpectedException.none();
+
+    AbstractTesseract4OcrEngine tesseractReader;
+    String testType;
+
+    public ImageFormatIntegrationTest(ReaderType type) {
+        tesseractReader = getTesseractReader(type);
+        this.testType = type.toString().toLowerCase();
+    }
+
+    @Before
+    public void initTesseractProperties() {
+        Tesseract4OcrEngineProperties ocrEngineProperties =
+                new Tesseract4OcrEngineProperties();
+        ocrEngineProperties.setPathToTessData(getTessDataDirectory());
+        tesseractReader.setTesseract4OcrEngineProperties(ocrEngineProperties);
+    }
+
+    @Test
+    public void compareBmp() throws IOException, InterruptedException {
+        String testName = "compareBmp";
+        String fileName = "example_01";
+        String path = TEST_IMAGES_DIRECTORY + fileName + ".BMP";
+        String expectedPdfPath = TEST_DOCUMENTS_DIRECTORY + fileName + "_" + testType + ".pdf";
+        String resultPdfPath = getTargetDirectory() + fileName + "_" + testName + "_" + testType + ".pdf";
+
+        doOcrAndSavePdfToPath(tesseractReader, path, resultPdfPath,
+                Collections.<String>singletonList("eng"), DeviceCmyk.MAGENTA);
+
+        Assert.assertNull(new CompareTool().compareByContent(resultPdfPath,
+                expectedPdfPath, getTargetDirectory(), "diff_"));
+    }
+
+    @Test
+    public void testBMPText() {
+        String path = TEST_IMAGES_DIRECTORY + "example_01.BMP";
+        String expectedOutput = "This is a test message for OCR Scanner Test";
+
+        String realOutputHocr = getTextFromPdf(tesseractReader, new File(path),
+                Collections.<String>singletonList("eng"));
+        realOutputHocr = realOutputHocr.replaceAll("[\n]", " ");
+        realOutputHocr = realOutputHocr.replaceAll("[‘]", "");
+        Assert.assertTrue(realOutputHocr.contains((expectedOutput)));
+    }
+
+    @Test
+    public void compareBmp02() throws IOException, InterruptedException {
+        String testName = "compareBmp02";
+        String fileName = "englishText";
+        String path = TEST_IMAGES_DIRECTORY + fileName + ".bmp";
+        String expectedPdfPath = TEST_DOCUMENTS_DIRECTORY + fileName + "_" + testType + ".pdf";
+        String resultPdfPath = getTargetDirectory() + fileName + "_" + testName + "_" + testType + ".pdf";
+
+        doOcrAndSavePdfToPath(tesseractReader, path, resultPdfPath,
+                Collections.<String>singletonList("eng"), DeviceCmyk.MAGENTA);
+
+        Assert.assertNull(new CompareTool().compareByContent(resultPdfPath,
+                expectedPdfPath, getTargetDirectory(), "diff_"));
+    }
+
+    @Test
+    public void testBMPText02() {
+        String path = TEST_IMAGES_DIRECTORY + "englishText.bmp";
+        String expectedOutput = "This is a test message for OCR Scanner Test BMPTest";
+
+        String realOutputHocr = getTextFromPdf(tesseractReader, new File(path),
+                Collections.<String>singletonList("eng"));
+        realOutputHocr = realOutputHocr.replaceAll("[\n]", " ");
+        Assert.assertTrue(realOutputHocr.contains((expectedOutput)));
+    }
+
+    @Test
+    public void compareJFIF() throws IOException, InterruptedException {
+        String testName = "compareJFIF";
+        String filename = "example_02";
+        String expectedPdfPath = TEST_DOCUMENTS_DIRECTORY + filename + ".pdf";
+        String resultPdfPath = getTargetDirectory() + filename + "_" + testName + ".pdf";
+
+        doOcrAndSavePdfToPath(tesseractReader,
+                TEST_IMAGES_DIRECTORY + filename + ".JFIF",
+                resultPdfPath, null, DeviceCmyk.MAGENTA);
+
+        Assert.assertNull(new CompareTool().compareByContent(resultPdfPath,
+                expectedPdfPath, getTargetDirectory(), "diff_"));
+    }
+
+    @Test
+    public void compareJpg() throws IOException, InterruptedException {
+        String testName = "compareJpg";
+        String fileName = "numbers_02";
+        String path = TEST_IMAGES_DIRECTORY + fileName + ".jpg";
+        String pdfName = fileName + "_" + testName + "_" + testType + ".pdf";
+        String expectedPdfPath = TEST_DOCUMENTS_DIRECTORY + pdfName;
+        String resultPdfPath = getTargetDirectory() + pdfName;
+
+        doOcrAndSavePdfToPath(tesseractReader, path, resultPdfPath,
+                null, DeviceCmyk.BLACK);
+
+        Assert.assertNull(new CompareTool().compareByContent(resultPdfPath,
+                expectedPdfPath, getTargetDirectory(), "diff_"));
+    }
+
+    @Test
+    public void testTextFromJPG() {
+        String path = TEST_IMAGES_DIRECTORY + "numbers_02.jpg";
+        String expectedOutput = "0123456789";
+
+        tesseractReader.setTesseract4OcrEngineProperties(
+                tesseractReader
+                        .getTesseract4OcrEngineProperties()
+                        .setPreprocessingImages(false));
+        String realOutputHocr = getTextFromPdf(tesseractReader, new File(path));
+        Assert.assertTrue(realOutputHocr.contains(expectedOutput));
+    }
+
+    @Test
+    public void compareJpe() throws IOException, InterruptedException {
+        String testName = "compareJpe";
+        String fileName = "numbers_01";
+        String path = TEST_IMAGES_DIRECTORY + fileName + ".jpe";
+        String pdfName = fileName + "_" + testName + "_" + testType + ".pdf";
+        String expectedPdfPath = TEST_DOCUMENTS_DIRECTORY + pdfName;
+        String resultPdfPath = getTargetDirectory() + pdfName;
+
+        doOcrAndSavePdfToPath(tesseractReader, path, resultPdfPath,
+                null, DeviceCmyk.BLACK);
+
+        Assert.assertNull(new CompareTool().compareByContent(resultPdfPath,
+                expectedPdfPath, getTargetDirectory(), "diff_"));
+    }
+
+    @Test
+    public void testTextFromJPE() {
+        String path = TEST_IMAGES_DIRECTORY + "numbers_01.jpe";
+        String expectedOutput = "619121";
+
+        String realOutputHocr = getTextFromPdf(tesseractReader, new File(path));
+        Assert.assertTrue(realOutputHocr.contains(expectedOutput));
+    }
+
+    @Test
+    public void compareTif() throws IOException, InterruptedException {
+        String testName = "compareTif";
+        String fileName = "numbers_01";
+        String path = TEST_IMAGES_DIRECTORY + fileName + ".tif";
+        String pdfName = fileName + "_" + testName + "_" + testType + ".pdf";
+        String expectedPdfPath = TEST_DOCUMENTS_DIRECTORY + pdfName;
+        String resultPdfPath = getTargetDirectory() + pdfName;
+
+        doOcrAndSavePdfToPath(tesseractReader, path, resultPdfPath,
+                null, DeviceCmyk.BLACK);
+
+        Assert.assertNull(new CompareTool().compareByContent(resultPdfPath,
+                expectedPdfPath, getTargetDirectory(), "diff_"));
+    }
+
+    @Test
+    public void testTextFromTIF() {
+        String path = TEST_IMAGES_DIRECTORY + "numbers_01.tif";
+        String expectedOutput = "619121";
+
+        String realOutputHocr = getTextFromPdf(tesseractReader, new File(path));
+        Assert.assertTrue(realOutputHocr.contains(expectedOutput));
+    }
+
+    @Test
+    public void testBigTiffWithoutPreprocessing() {
+        String path = TEST_IMAGES_DIRECTORY + "example_03_10MB.tiff";
+        String expectedOutput = "Image File Format";
+
+        tesseractReader.setTesseract4OcrEngineProperties(
+                tesseractReader.getTesseract4OcrEngineProperties()
+                        .setPreprocessingImages(false)
+                        .setPageSegMode(null));
+        String realOutputHocr = getTextFromPdf(tesseractReader, new File(path),
+                Collections.<String>singletonList("eng"));
+        Assert.assertTrue(realOutputHocr.contains(expectedOutput));
+    }
+
+    @Test
+    public void compareMultipagesTIFFWithPreprocessing() throws IOException, InterruptedException {
+        String testName = "compareMultipagesTIFFWithPreprocessing";
+        String fileName = "multipage";
+        String path = TEST_IMAGES_DIRECTORY + fileName + ".tiff";
+        String expectedPdfPath = TEST_DOCUMENTS_DIRECTORY + fileName + "_" + testType + ".pdf";
+        String resultPdfPath = getTargetDirectory() + fileName + "_" + testName + "_" + testType + ".pdf";
+
+        doOcrAndSavePdfToPath(tesseractReader, path, resultPdfPath,
+                Collections.<String>singletonList("eng"), DeviceCmyk.BLACK);
+
+        Assert.assertNull(new CompareTool().compareByContent(resultPdfPath,
+                expectedPdfPath, getTargetDirectory(), "diff_"));
+    }
+
+    @Test
+    public void testInputMultipagesTIFFWithPreprocessing() {
+        String path = TEST_IMAGES_DIRECTORY + "multîpage.tiff";
+        String expectedOutput = "Multipage\nTIFF\nExample\nPage 5";
+
+        File file = new File(path);
+
+        String realOutputHocr = getTextFromPdf(tesseractReader, file, 5,
+                Collections.<String>singletonList("eng"));
+        Assert.assertNotNull(realOutputHocr);
+        Assert.assertEquals(expectedOutput, realOutputHocr);
+    }
+
+    @Test
+    public void testInputMultipagesTIFFWithoutPreprocessing() {
+        String path = TEST_IMAGES_DIRECTORY + "multîpage.tiff";
+        String expectedOutput = "Multipage\nTIFF\nExample\nPage 3";
+
+        File file = new File(path);
+
+        tesseractReader.setTesseract4OcrEngineProperties(
+                tesseractReader.getTesseract4OcrEngineProperties()
+                        .setPreprocessingImages(false));
+        String realOutputHocr = getTextFromPdf(tesseractReader, file, 3,
+                Collections.<String>singletonList("eng"));
+        Assert.assertNotNull(realOutputHocr);
+        Assert.assertEquals(expectedOutput, realOutputHocr);
+    }
+
+    @LogMessages(messages = {
+        @LogMessage(messageTemplate = Tesseract4LogMessageConstant.CANNOT_READ_INPUT_IMAGE, count = 1)
+    })
+    @Test
+    public void testInputWrongFormat() {
+        junitExpectedException.expect(Tesseract4OcrException.class);
+        junitExpectedException.expectMessage(MessageFormatUtil
+                .format(Tesseract4OcrException.INCORRECT_INPUT_IMAGE_FORMAT,
+                        "txt"));
+        File file = new File(TEST_IMAGES_DIRECTORY + "example.txt");
+        getTextFromPdf(tesseractReader, file);
+    }
+
+    @Test
+    public void testJpgWithoutPreprocessing() {
+        String path = TEST_IMAGES_DIRECTORY + "nümbérs.jpg";
+        String expectedOutput = "619121";
+
+        tesseractReader.setTesseract4OcrEngineProperties(
+                tesseractReader.getTesseract4OcrEngineProperties()
+                        .setPreprocessingImages(false));
+        String realOutputHocr = getTextFromPdf(tesseractReader, new File(path),
+                Collections.<String>singletonList("eng"));
+        Assert.assertTrue(realOutputHocr.contains(expectedOutput));
+    }
+
+    @Test
+    public void compareNumbersJPG() throws IOException, InterruptedException {
+        String testName = "compareNumbersJPG";
+        String filename = "nümbérs";
+        String expectedPdfPath = TEST_DOCUMENTS_DIRECTORY + "numbers_01.pdf";
+        String resultPdfPath = getTargetDirectory() + "numbers_01_" + testName + ".pdf";
+
+        tesseractReader.setTesseract4OcrEngineProperties(
+                tesseractReader.getTesseract4OcrEngineProperties()
+                        .setTextPositioning(TextPositioning.BY_WORDS));
+        doOcrAndSavePdfToPath(tesseractReader,
+                TEST_IMAGES_DIRECTORY + filename + ".jpg",
+                resultPdfPath);
+        tesseractReader.setTesseract4OcrEngineProperties(
+                tesseractReader.getTesseract4OcrEngineProperties()
+                        .setTextPositioning(TextPositioning.BY_LINES));
+
+        Assert.assertNull(new CompareTool().compareByContent(resultPdfPath,
+                expectedPdfPath, getTargetDirectory(), "diff_"));
+    }
+}
diff --git a/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/pdfa3u/PdfA3UIntegrationExecutableTest.java b/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/pdfa3u/PdfA3UIntegrationExecutableTest.java
new file mode 100644
index 0000000..6b1fe6b
--- /dev/null
+++ b/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/pdfa3u/PdfA3UIntegrationExecutableTest.java
@@ -0,0 +1,34 @@
+/*
+    This file is part of the iText (R) project.
+    Copyright (c) 1998-2020 iText Group NV
+    Authors: iText Software.
+
+    This program is offered under a commercial and under the AGPL license.
+    For commercial licensing, contact us at https://itextpdf.com/sales.  For AGPL licensing, see below.
+
+    AGPL licensing:
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+package com.itextpdf.pdfocr.pdfa3u;
+
+import com.itextpdf.test.annotations.type.IntegrationTest;
+
+import org.junit.experimental.categories.Category;
+
+@Category(IntegrationTest.class)
+public class PdfA3UIntegrationExecutableTest extends PdfA3UIntegrationTest {
+    public PdfA3UIntegrationExecutableTest() {
+        super(ReaderType.EXECUTABLE);
+    }
+}
diff --git a/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/pdfa3u/PdfA3UIntegrationLibTest.java b/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/pdfa3u/PdfA3UIntegrationLibTest.java
new file mode 100644
index 0000000..5e7cff5
--- /dev/null
+++ b/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/pdfa3u/PdfA3UIntegrationLibTest.java
@@ -0,0 +1,34 @@
+/*
+    This file is part of the iText (R) project.
+    Copyright (c) 1998-2020 iText Group NV
+    Authors: iText Software.
+
+    This program is offered under a commercial and under the AGPL license.
+    For commercial licensing, contact us at https://itextpdf.com/sales.  For AGPL licensing, see below.
+
+    AGPL licensing:
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+package com.itextpdf.pdfocr.pdfa3u;
+
+import com.itextpdf.test.annotations.type.IntegrationTest;
+
+import org.junit.experimental.categories.Category;
+
+@Category(IntegrationTest.class)
+public class PdfA3UIntegrationLibTest extends PdfA3UIntegrationTest {
+    public PdfA3UIntegrationLibTest() {
+        super(ReaderType.LIB);
+    }
+}
diff --git a/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/pdfa3u/PdfA3UIntegrationTest.java b/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/pdfa3u/PdfA3UIntegrationTest.java
new file mode 100644
index 0000000..8868609
--- /dev/null
+++ b/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/pdfa3u/PdfA3UIntegrationTest.java
@@ -0,0 +1,158 @@
+/*
+    This file is part of the iText (R) project.
+    Copyright (c) 1998-2020 iText Group NV
+    Authors: iText Software.
+
+    This program is offered under a commercial and under the AGPL license.
+    For commercial licensing, contact us at https://itextpdf.com/sales.  For AGPL licensing, see below.
+
+    AGPL licensing:
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+package com.itextpdf.pdfocr.pdfa3u;
+
+import com.itextpdf.kernel.colors.DeviceRgb;
+import com.itextpdf.kernel.pdf.PdfDocument;
+import com.itextpdf.kernel.pdf.PdfOutputIntent;
+import com.itextpdf.kernel.utils.CompareTool;
+import com.itextpdf.pdfocr.IntegrationTestHelper;
+import com.itextpdf.pdfocr.OcrPdfCreator;
+import com.itextpdf.pdfocr.OcrPdfCreatorProperties;
+import com.itextpdf.pdfocr.tesseract4.AbstractTesseract4OcrEngine;
+import com.itextpdf.pdfocr.tesseract4.Tesseract4OcrEngineProperties;
+import com.itextpdf.pdfocr.tesseract4.TextPositioning;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Collections;
+import org.junit.Assert;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+
+public abstract class PdfA3UIntegrationTest extends IntegrationTestHelper {
+
+    // path to default cmyk color profile
+    private static final String DEFAULT_CMYK_COLOR_PROFILE_PATH = TEST_DIRECTORY + "profiles/CoatedFOGRA27.icc";
+    // path to default rgb color profile
+    private static final String DEFAULT_RGB_COLOR_PROFILE_PATH = TEST_DIRECTORY + "profiles/sRGB_CS_profile.icm";
+
+    AbstractTesseract4OcrEngine tesseractReader;
+
+    @Rule
+    public ExpectedException junitExpectedException = ExpectedException.none();
+
+    public PdfA3UIntegrationTest(ReaderType type) {
+        tesseractReader = getTesseractReader(type);
+    }
+
+    @Test
+    public void comparePdfA3uCMYKColorSpaceJPG() throws IOException,
+            InterruptedException {
+        String testName = "comparePdfA3uCMYKColorSpaceJPG";
+        String filename = "numbers_01";
+        String expectedPdfPath = TEST_DOCUMENTS_DIRECTORY + filename + "_a3u.pdf";
+        String resultPdfPath = getTargetDirectory() + filename + "_" + testName + "_a3u.pdf";
+
+        try {
+            OcrPdfCreatorProperties ocrPdfCreatorProperties = new OcrPdfCreatorProperties();
+            ocrPdfCreatorProperties.setPdfLang("en-US");
+            ocrPdfCreatorProperties.setTitle("");
+
+            OcrPdfCreator ocrPdfCreator = new OcrPdfCreator(tesseractReader,
+                    ocrPdfCreatorProperties);
+
+            tesseractReader.setTesseract4OcrEngineProperties(
+                    tesseractReader.getTesseract4OcrEngineProperties()
+                            .setTextPositioning(TextPositioning.BY_WORDS));
+            Assert.assertEquals(tesseractReader, ocrPdfCreator.getOcrEngine());
+            ocrPdfCreator.setOcrEngine(tesseractReader);
+            PdfDocument doc =
+                    ocrPdfCreator.createPdfA(
+                            Collections.<File>singletonList(
+                            new File(TEST_IMAGES_DIRECTORY
+                                    + filename + ".jpg")),
+                            getPdfWriter(resultPdfPath),
+                            getCMYKPdfOutputIntent());
+            Assert.assertNotNull(doc);
+            doc.close();
+
+            Assert.assertNull(new CompareTool()
+                    .compareByContent(resultPdfPath, expectedPdfPath,
+                            getTargetDirectory(), "diff_"));
+        } finally {
+            Assert.assertEquals(TextPositioning.BY_WORDS,
+                    tesseractReader.getTesseract4OcrEngineProperties().getTextPositioning());
+            tesseractReader.setTesseract4OcrEngineProperties(
+                    tesseractReader.getTesseract4OcrEngineProperties()
+                            .setTextPositioning(TextPositioning.BY_LINES));
+        }
+    }
+
+    @Test
+    public void comparePdfA3uRGBSpanishJPG()
+            throws IOException, InterruptedException {
+        String testName = "comparePdfA3uRGBSpanishJPG";
+        String filename = "spanish_01";
+        String expectedPdfPath = TEST_DOCUMENTS_DIRECTORY + filename + "_a3u.pdf";
+        String resultPdfPath = getTargetDirectory() + filename + "_" + testName + "_a3u.pdf";
+
+        Tesseract4OcrEngineProperties properties =
+                new Tesseract4OcrEngineProperties(tesseractReader.getTesseract4OcrEngineProperties());
+        properties.setPathToTessData(getTessDataDirectory());
+        properties.setLanguages(Collections.<String>singletonList("spa"));
+        tesseractReader.setTesseract4OcrEngineProperties(properties);
+
+        OcrPdfCreatorProperties ocrPdfCreatorProperties = new OcrPdfCreatorProperties();
+        ocrPdfCreatorProperties.setPdfLang("en-US");
+        ocrPdfCreatorProperties.setTitle("");
+        ocrPdfCreatorProperties.setTextColor(DeviceRgb.BLACK);
+
+        OcrPdfCreator ocrPdfCreator = new OcrPdfCreator(tesseractReader,
+                ocrPdfCreatorProperties);
+
+        PdfDocument doc = ocrPdfCreator.createPdfA(
+                Collections.<File>singletonList(
+                        new File(TEST_IMAGES_DIRECTORY + filename
+                                + ".jpg")), getPdfWriter(resultPdfPath),
+                getRGBPdfOutputIntent());
+        Assert.assertNotNull(doc);
+        doc.close();
+
+        Assert.assertNull(new CompareTool().compareByContent(resultPdfPath,
+                expectedPdfPath, getTargetDirectory(), "diff_"));
+    }
+
+    /**
+     * Creates PDF cmyk output intent for tests.
+     */
+    protected PdfOutputIntent getCMYKPdfOutputIntent() throws FileNotFoundException {
+        InputStream is = new FileInputStream(DEFAULT_CMYK_COLOR_PROFILE_PATH);
+        return new PdfOutputIntent("Custom",
+                "","http://www.color.org",
+                "Coated FOGRA27 (ISO 12647 - 2:2004)", is);
+    }
+
+    /**
+     * Creates PDF rgb output intent for tests.
+     */
+    protected  PdfOutputIntent getRGBPdfOutputIntent() throws FileNotFoundException {
+        InputStream is = new FileInputStream(DEFAULT_RGB_COLOR_PROFILE_PATH);
+        return new PdfOutputIntent("", "",
+                "", "sRGB IEC61966-2.1", is);
+    }
+}
diff --git a/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/pdflayers/PdfLayersIntegrationExecutableTest.java b/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/pdflayers/PdfLayersIntegrationExecutableTest.java
new file mode 100644
index 0000000..f4b8057
--- /dev/null
+++ b/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/pdflayers/PdfLayersIntegrationExecutableTest.java
@@ -0,0 +1,34 @@
+/*
+    This file is part of the iText (R) project.
+    Copyright (c) 1998-2020 iText Group NV
+    Authors: iText Software.
+
+    This program is offered under a commercial and under the AGPL license.
+    For commercial licensing, contact us at https://itextpdf.com/sales.  For AGPL licensing, see below.
+
+    AGPL licensing:
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+package com.itextpdf.pdfocr.pdflayers;
+
+import com.itextpdf.test.annotations.type.IntegrationTest;
+
+import org.junit.experimental.categories.Category;
+
+@Category(IntegrationTest.class)
+public class PdfLayersIntegrationExecutableTest extends PdfLayersIntegrationTest {
+    public PdfLayersIntegrationExecutableTest() {
+        super(ReaderType.EXECUTABLE);
+    }
+}
diff --git a/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/pdflayers/PdfLayersIntegrationLibTest.java b/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/pdflayers/PdfLayersIntegrationLibTest.java
new file mode 100644
index 0000000..e725807
--- /dev/null
+++ b/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/pdflayers/PdfLayersIntegrationLibTest.java
@@ -0,0 +1,34 @@
+/*
+    This file is part of the iText (R) project.
+    Copyright (c) 1998-2020 iText Group NV
+    Authors: iText Software.
+
+    This program is offered under a commercial and under the AGPL license.
+    For commercial licensing, contact us at https://itextpdf.com/sales.  For AGPL licensing, see below.
+
+    AGPL licensing:
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+package com.itextpdf.pdfocr.pdflayers;
+
+import com.itextpdf.test.annotations.type.IntegrationTest;
+
+import org.junit.experimental.categories.Category;
+
+@Category(IntegrationTest.class)
+public class PdfLayersIntegrationLibTest extends PdfLayersIntegrationTest {
+    public PdfLayersIntegrationLibTest() {
+        super(ReaderType.LIB);
+    }
+}
diff --git a/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/pdflayers/PdfLayersIntegrationTest.java b/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/pdflayers/PdfLayersIntegrationTest.java
new file mode 100644
index 0000000..59270e3
--- /dev/null
+++ b/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/pdflayers/PdfLayersIntegrationTest.java
@@ -0,0 +1,174 @@
+/*
+    This file is part of the iText (R) project.
+    Copyright (c) 1998-2020 iText Group NV
+    Authors: iText Software.
+
+    This program is offered under a commercial and under the AGPL license.
+    For commercial licensing, contact us at https://itextpdf.com/sales.  For AGPL licensing, see below.
+
+    AGPL licensing:
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+package com.itextpdf.pdfocr.pdflayers;
+
+import com.itextpdf.kernel.pdf.PdfDocument;
+import com.itextpdf.kernel.pdf.PdfName;
+import com.itextpdf.kernel.pdf.layer.PdfLayer;
+import com.itextpdf.pdfocr.IntegrationTestHelper;
+import com.itextpdf.pdfocr.OcrPdfCreator;
+import com.itextpdf.pdfocr.OcrPdfCreatorProperties;
+import com.itextpdf.pdfocr.tesseract4.AbstractTesseract4OcrEngine;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import org.junit.Assert;
+import org.junit.Test;
+
+public abstract class PdfLayersIntegrationTest extends IntegrationTestHelper {
+
+    AbstractTesseract4OcrEngine tesseractReader;
+
+    public PdfLayersIntegrationTest(ReaderType type) {
+        tesseractReader = getTesseractReader(type);
+    }
+
+    @Test
+    public void testTextFromPdfLayersFromMultiPageTiff() throws IOException {
+        String testName = "testTextFromPdfLayersFromMultiPageTiff";
+        boolean preprocess =
+                tesseractReader.getTesseract4OcrEngineProperties().isPreprocessingImages();
+        String path = TEST_IMAGES_DIRECTORY + "multîpage.tiff";
+        String pdfPath = getTargetDirectory() + testName + ".pdf";
+        File file = new File(path);
+
+        tesseractReader.setTesseract4OcrEngineProperties(
+                tesseractReader.getTesseract4OcrEngineProperties()
+                        .setPreprocessingImages(false));
+        OcrPdfCreatorProperties properties = new OcrPdfCreatorProperties();
+        properties.setTextLayerName("Text Layer");
+        properties.setImageLayerName("Image Layer");
+        OcrPdfCreator ocrPdfCreator = new OcrPdfCreator(tesseractReader, properties);
+        PdfDocument doc =
+                ocrPdfCreator.createPdf(Collections.<File>singletonList(file), getPdfWriter(pdfPath));
+
+        Assert.assertNotNull(doc);
+        int numOfPages = doc.getNumberOfPages();
+        List<PdfLayer> layers = doc.getCatalog()
+                .getOCProperties(true).getLayers();
+
+        Assert.assertEquals(numOfPages * 2, layers.size());
+        Assert.assertEquals("Image Layer",
+                layers.get(2).getPdfObject().get(PdfName.Name).toString());
+        Assert.assertEquals("Text Layer",
+                layers.get(3).getPdfObject().get(PdfName.Name).toString());
+
+        doc.close();
+
+        // Text layer should contain all text
+        // Image layer shouldn't contain any text
+        String expectedOutput = "Multipage\nTIFF\nExample\nPage 5";
+        Assert.assertEquals(expectedOutput,
+                getTextFromPdfLayer(pdfPath, "Text Layer", 5));
+        Assert.assertEquals("",
+                getTextFromPdfLayer(pdfPath,
+                        "Image Layer", 5));
+        Assert.assertFalse(tesseractReader.getTesseract4OcrEngineProperties().isPreprocessingImages());
+        tesseractReader.setTesseract4OcrEngineProperties(
+                tesseractReader.getTesseract4OcrEngineProperties()
+                        .setPreprocessingImages(preprocess));
+    }
+
+    @Test
+    public void testTextFromMultiPageTiff() throws IOException {
+        String testName = "testTextFromMultiPageTiff";
+        boolean preprocess =
+                tesseractReader.getTesseract4OcrEngineProperties().isPreprocessingImages();
+        String path = TEST_IMAGES_DIRECTORY + "multîpage.tiff";
+        String pdfPath = getTargetDirectory() + testName + ".pdf";
+        File file = new File(path);
+
+        tesseractReader.setTesseract4OcrEngineProperties(
+                tesseractReader.getTesseract4OcrEngineProperties()
+                        .setPreprocessingImages(false));
+
+        OcrPdfCreator ocrPdfCreator = new OcrPdfCreator(tesseractReader);
+        PdfDocument doc =
+                ocrPdfCreator.createPdf(Collections.<File>singletonList(file), getPdfWriter(pdfPath));
+
+        Assert.assertNotNull(doc);
+        int numOfPages = doc.getNumberOfPages();
+        List<PdfLayer> layers = doc.getCatalog()
+                .getOCProperties(true).getLayers();
+
+        Assert.assertEquals(0, layers.size());
+
+
+        doc.close();
+
+        // Text layer should contain all text
+        // Image layer shouldn't contain any text
+        String expectedOutput = "Multipage\nTIFF\nExample\nPage 5";
+        Assert.assertEquals(expectedOutput,
+                getTextFromPdfLayer(pdfPath, null, 5));
+        Assert.assertFalse(tesseractReader.getTesseract4OcrEngineProperties().isPreprocessingImages());
+        tesseractReader.setTesseract4OcrEngineProperties(
+                tesseractReader.getTesseract4OcrEngineProperties()
+                        .setPreprocessingImages(preprocess));
+    }
+
+    @Test
+    public void testTextFromPdfLayersFromMultiPagePdf() throws IOException {
+        String testName = "testTextFromPdfLayersFromMultiPagePdf";
+        String pdfPath = getTargetDirectory() + testName + ".pdf";
+
+        List<File> files = Arrays.<File>asList(
+                new File(TEST_IMAGES_DIRECTORY + "german_01.jpg"),
+                new File(TEST_IMAGES_DIRECTORY + "tèst/noisy_01.png"),
+                new File(TEST_IMAGES_DIRECTORY + "nümbérs.jpg"),
+                new File(TEST_IMAGES_DIRECTORY + "example_04.png")
+        );
+
+        OcrPdfCreatorProperties properties = new OcrPdfCreatorProperties();
+        properties.setImageLayerName("image");
+        properties.setTextLayerName("text");
+
+        OcrPdfCreator ocrPdfCreator = new OcrPdfCreator(tesseractReader, properties);
+        PdfDocument doc = ocrPdfCreator.createPdf(files, getPdfWriter(pdfPath));
+
+        Assert.assertNotNull(doc);
+        int numOfPages = doc.getNumberOfPages();
+        Assert.assertEquals(numOfPages, files.size());
+        List<PdfLayer> layers = doc.getCatalog()
+                .getOCProperties(true).getLayers();
+
+        Assert.assertEquals(numOfPages * 2, layers.size());
+        Assert.assertEquals("image",
+                layers.get(2).getPdfObject().get(PdfName.Name).toString());
+        Assert.assertEquals("text",
+                layers.get(3).getPdfObject().get(PdfName.Name).toString());
+
+        doc.close();
+
+        // Text layer should contain all text
+        // Image layer shouldn't contain any text
+        String expectedOutput = "619121";
+        Assert.assertEquals(expectedOutput,
+                getTextFromPdfLayer(pdfPath, "text", 3));
+        Assert.assertEquals("",
+                getTextFromPdfLayer(pdfPath, "image", 3));
+    }
+}
diff --git a/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/tessdata/TessDataIntegrationExecutableTest.java b/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/tessdata/TessDataIntegrationExecutableTest.java
new file mode 100644
index 0000000..c67fc54
--- /dev/null
+++ b/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/tessdata/TessDataIntegrationExecutableTest.java
@@ -0,0 +1,34 @@
+/*
+    This file is part of the iText (R) project.
+    Copyright (c) 1998-2020 iText Group NV
+    Authors: iText Software.
+
+    This program is offered under a commercial and under the AGPL license.
+    For commercial licensing, contact us at https://itextpdf.com/sales.  For AGPL licensing, see below.
+
+    AGPL licensing:
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+package com.itextpdf.pdfocr.tessdata;
+
+import com.itextpdf.test.annotations.type.IntegrationTest;
+
+import org.junit.experimental.categories.Category;
+
+@Category(IntegrationTest.class)
+public class TessDataIntegrationExecutableTest extends TessDataIntegrationTest {
+    public TessDataIntegrationExecutableTest() {
+        super(ReaderType.EXECUTABLE);
+    }
+}
diff --git a/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/tessdata/TessDataIntegrationLibTest.java b/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/tessdata/TessDataIntegrationLibTest.java
new file mode 100644
index 0000000..689a058
--- /dev/null
+++ b/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/tessdata/TessDataIntegrationLibTest.java
@@ -0,0 +1,112 @@
+/*
+    This file is part of the iText (R) project.
+    Copyright (c) 1998-2020 iText Group NV
+    Authors: iText Software.
+
+    This program is offered under a commercial and under the AGPL license.
+    For commercial licensing, contact us at https://itextpdf.com/sales.  For AGPL licensing, see below.
+
+    AGPL licensing:
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+package com.itextpdf.pdfocr.tessdata;
+
+import com.itextpdf.pdfocr.TextInfo;
+import com.itextpdf.pdfocr.tesseract4.OutputFormat;
+import com.itextpdf.pdfocr.tesseract4.TesseractHelper;
+import com.itextpdf.pdfocr.tesseract4.TextPositioning;
+import com.itextpdf.test.annotations.type.IntegrationTest;
+
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+import java.io.File;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+
+@Category(IntegrationTest.class)
+public class TessDataIntegrationLibTest extends TessDataIntegrationTest {
+    public TessDataIntegrationLibTest() {
+        super(ReaderType.LIB);
+    }
+
+    @Test(timeout = 50000)
+    public void textOutputFromHalftoneFile() {
+        String imgPath = TEST_IMAGES_DIRECTORY + "halftone.jpg";
+        String expected01 = "Silliness Enablers";
+        String expected02 = "You dream it, we enable it";
+        String expected03 = "QUANTITY";
+
+        String result = getRecognizedTextFromTextFile(tesseractReader, imgPath,
+                Collections.<String>singletonList("eng"));
+
+        // correct result for a halftone input image
+        Assert.assertTrue(result.contains(expected01));
+        Assert.assertTrue(result.contains(expected02));
+        Assert.assertTrue(result.contains(expected03));
+    }
+
+    @Test(timeout = 50000)
+    public void hocrOutputFromHalftoneFile() throws java.io.IOException {
+        String path = TEST_IMAGES_DIRECTORY + "halftone.jpg";
+        String expected01 = "Silliness";
+        String expected02 = "Enablers";
+        String expected03 = "You";
+        String expected04 = "Middle";
+        String expected05 = "André";
+        String expected06 = "QUANTITY";
+        String expected07 = "DESCRIPTION";
+        String expected08 = "Silliness Enablers";
+        String expected09 = "QUANTITY DESCRIPTION UNIT PRICE TOTAL";
+
+        File imgFile = new File(path);
+        File outputFile = new File(getTargetDirectory()
+                + "hocrOutputFromHalftoneFile.hocr");
+
+        tesseractReader.doTesseractOcr(imgFile, outputFile, OutputFormat.HOCR);
+        Map<Integer, List<TextInfo>> pageData = TesseractHelper
+                .parseHocrFile(Collections.<File>singletonList(outputFile),
+                        TextPositioning.BY_WORDS
+                );
+        Assert.assertTrue(findTextInPageData(pageData, 1, expected01));
+        Assert.assertTrue(findTextInPageData(pageData, 1, expected02));
+        Assert.assertTrue(findTextInPageData(pageData, 1, expected03));
+        Assert.assertTrue(findTextInPageData(pageData, 1, expected04));
+        Assert.assertTrue(findTextInPageData(pageData, 1, expected05));
+        Assert.assertTrue(findTextInPageData(pageData, 1, expected06));
+        Assert.assertTrue(findTextInPageData(pageData, 1, expected07));
+
+        pageData = TesseractHelper
+                .parseHocrFile(Collections.<File>singletonList(outputFile),
+                        TextPositioning.BY_LINES
+                );
+        Assert.assertTrue(findTextInPageData(pageData, 1, expected08));
+        Assert.assertTrue(findTextInPageData(pageData, 1, expected09));
+    }
+
+    /**
+     * Searches for certain text in page data.
+     */
+    private boolean findTextInPageData(Map<Integer, List<TextInfo>> pageData, int page, String textToSearchFor) {
+        for (TextInfo textInfo : pageData.get(page)) {
+            if (textToSearchFor.equals(textInfo.getText())) {
+                return true;
+            }
+        }
+        return false;
+    }
+
+}
diff --git a/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/tessdata/TessDataIntegrationTest.java b/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/tessdata/TessDataIntegrationTest.java
new file mode 100644
index 0000000..7ff1b14
--- /dev/null
+++ b/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/tessdata/TessDataIntegrationTest.java
@@ -0,0 +1,673 @@
+/*
+    This file is part of the iText (R) project.
+    Copyright (c) 1998-2020 iText Group NV
+    Authors: iText Software.
+
+    This program is offered under a commercial and under the AGPL license.
+    For commercial licensing, contact us at https://itextpdf.com/sales.  For AGPL licensing, see below.
+
+    AGPL licensing:
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+package com.itextpdf.pdfocr.tessdata;
+
+import com.itextpdf.kernel.colors.DeviceCmyk;
+import com.itextpdf.kernel.pdf.PdfWriter;
+import com.itextpdf.kernel.utils.CompareTool;
+import com.itextpdf.pdfocr.IntegrationTestHelper;
+import com.itextpdf.pdfocr.OcrPdfCreator;
+import com.itextpdf.pdfocr.OcrPdfCreatorProperties;
+import com.itextpdf.pdfocr.PdfOcrLogMessageConstant;
+import com.itextpdf.pdfocr.tesseract4.AbstractTesseract4OcrEngine;
+import com.itextpdf.pdfocr.tesseract4.Tesseract4OcrEngineProperties;
+import com.itextpdf.pdfocr.tesseract4.TextPositioning;
+import com.itextpdf.test.annotations.LogMessage;
+import com.itextpdf.test.annotations.LogMessages;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public abstract class TessDataIntegrationTest extends IntegrationTestHelper {
+
+    private static final Logger LOGGER = LoggerFactory
+            .getLogger(TessDataIntegrationTest.class);
+
+    @Rule
+    public ExpectedException junitExpectedException = ExpectedException.none();
+
+    AbstractTesseract4OcrEngine tesseractReader;
+    String testFileTypeName;
+    private boolean isExecutableReaderType;
+
+    public TessDataIntegrationTest(ReaderType type) {
+        isExecutableReaderType = type.equals(ReaderType.EXECUTABLE);
+        if (isExecutableReaderType) {
+            testFileTypeName = "executable";
+        } else {
+            testFileTypeName = "lib";
+        }
+        tesseractReader = getTesseractReader(type);
+    }
+
+    @Before
+    public void initTesseractProperties() {
+        Tesseract4OcrEngineProperties ocrEngineProperties =
+                new Tesseract4OcrEngineProperties();
+        ocrEngineProperties.setPathToTessData(getTessDataDirectory());
+        tesseractReader.setTesseract4OcrEngineProperties(ocrEngineProperties);
+    }
+
+    @Test
+    public void textGreekText() {
+        String imgPath = TEST_IMAGES_DIRECTORY + "greek_01.jpg";
+        File file = new File(imgPath);
+        String expected = "ΟΜΟΛΟΓΙΑ";
+
+        if (isExecutableReaderType) {
+            tesseractReader.setTesseract4OcrEngineProperties(
+                    tesseractReader.getTesseract4OcrEngineProperties()
+                            .setPreprocessingImages(false));
+        }
+        String real = getTextFromPdf(tesseractReader, file,
+                Arrays.<String>asList("ell"), NOTO_SANS_FONT_PATH);
+        // correct result with specified greek language
+        Assert.assertTrue(real.contains(expected));
+    }
+
+    @Test
+    public void textJapaneseText() {
+        String imgPath = TEST_IMAGES_DIRECTORY + "japanese_01.png";
+        File file = new File(imgPath);
+        String expected = "日 本 語\n文法";
+
+        // correct result with specified japanese language
+        Assert.assertEquals(expected, getTextFromPdf(tesseractReader, file,
+                Arrays.<String>asList("jpn"), KOSUGI_FONT_PATH));
+    }
+
+    @Test
+    public void testFrench() {
+        String imgPath = TEST_IMAGES_DIRECTORY + "french_01.png";
+        File file = new File(imgPath);
+        String expectedFr = "RESTEZ\nCALME\nPARLEZ EN\nFRANÇAIS";
+
+        // correct result with specified spanish language
+        Assert.assertTrue(getTextFromPdf(tesseractReader, file,
+                Collections.<String>singletonList("fra")).endsWith(expectedFr));
+
+        // incorrect result when languages are not specified
+        // or languages were specified in the wrong order
+        Assert.assertFalse(getTextFromPdf(tesseractReader,file,
+                Collections.<String>singletonList("eng")).endsWith(expectedFr));
+        Assert.assertNotEquals(expectedFr,
+                getTextFromPdf(tesseractReader,file, Collections.<String>singletonList("spa")));
+        Assert.assertNotEquals(expectedFr,
+                getTextFromPdf(tesseractReader,file, new ArrayList<String>()));
+    }
+
+    @Test
+    public void testSpanishPNG() throws IOException {
+        String testName = "compareSpanishPNG";
+        String filename = "scanned_spa_01";
+        String expectedText1 = "¿Y SI ENSAYARA COMO ACTUAR?";
+        String expectedText2 = "¿Y SI ENSAYARA ACTUAR?";
+        String resultPdfPath = getTargetDirectory() + filename + "_" + testName
+                + "_" + testFileTypeName + ".pdf";
+
+        List<String> languages = Arrays.<String>asList("spa", "spa_old");
+        Tesseract4OcrEngineProperties properties =
+                tesseractReader.getTesseract4OcrEngineProperties();
+        if (isExecutableReaderType) {
+            properties.setPreprocessingImages(false);
+        }
+
+        // locate text by words
+        properties.setTextPositioning(TextPositioning.BY_WORDS);
+        properties.setLanguages(languages);
+        tesseractReader.setTesseract4OcrEngineProperties(properties);
+
+        OcrPdfCreatorProperties ocrPdfCreatorProperties = new OcrPdfCreatorProperties();
+        ocrPdfCreatorProperties.setTextColor(DeviceCmyk.BLACK);
+
+        OcrPdfCreator ocrPdfCreator = new OcrPdfCreator(tesseractReader, ocrPdfCreatorProperties);
+        try (PdfWriter pdfWriter = getPdfWriter(resultPdfPath)) {
+            ocrPdfCreator.createPdf(Collections.<File>singletonList(
+                    new File(TEST_IMAGES_DIRECTORY + filename + ".png")),
+                    pdfWriter)
+                    .close();
+        }
+
+        try {
+            String result = getTextFromPdfLayer(resultPdfPath, null, 1)
+                    .replace("\n", " ");
+            Assert.assertTrue(result.contains(expectedText1)
+                    || result.contains(expectedText2));
+        } finally {
+            Assert.assertEquals(TextPositioning.BY_WORDS,
+                    tesseractReader.getTesseract4OcrEngineProperties().getTextPositioning());
+        }
+    }
+
+    @Test
+    public void textGreekOutputFromTxtFile() {
+        String imgPath = TEST_IMAGES_DIRECTORY + "greek_01.jpg";
+        String expected = "ΟΜΟΛΟΓΙΑ";
+
+        if (isExecutableReaderType) {
+            tesseractReader.setTesseract4OcrEngineProperties(
+                    tesseractReader.getTesseract4OcrEngineProperties()
+                            .setPreprocessingImages(false));
+        }
+        String result = getRecognizedTextFromTextFile(tesseractReader, imgPath,
+                Collections.<String>singletonList("ell"));
+        // correct result with specified greek language
+        Assert.assertTrue(result.contains(expected));
+    }
+
+    @Test
+    public void textJapaneseOutputFromTxtFile() {
+        String imgPath = TEST_IMAGES_DIRECTORY + "japanese_01.png";
+        String expected = "日本語文法";
+
+        String result = getRecognizedTextFromTextFile(tesseractReader, imgPath,
+                Collections.<String>singletonList("jpn"));
+
+        result = result.replaceAll("[\f\n]", "");
+        // correct result with specified japanese language
+        Assert.assertTrue(result.contains(expected));
+    }
+
+    @Test
+    public void testFrenchOutputFromTxtFile() {
+        String imgPath = TEST_IMAGES_DIRECTORY + "french_01.png";
+        String expectedFr = "RESTEZ\nCALME\nPARLEZ EN\nFRANÇAIS";
+
+        String result = getRecognizedTextFromTextFile(tesseractReader, imgPath,
+                Collections.<String>singletonList("fra"));
+        result = result.replaceAll("(?:\\n\\f)+", "").trim();
+        result = result.replaceAll("\\n\\n", "\n").trim();
+        // correct result with specified spanish language
+        Assert.assertTrue(result.endsWith(expectedFr));
+
+        // incorrect result when languages are not specified
+        // or languages were specified in the wrong order
+        Assert.assertFalse(
+                getRecognizedTextFromTextFile(tesseractReader, imgPath,
+                Collections.<String>singletonList("eng")).endsWith(expectedFr));
+        Assert.assertNotEquals(expectedFr,
+                getRecognizedTextFromTextFile(tesseractReader, imgPath,
+                        Collections.<String>singletonList("spa")));
+        Assert.assertNotEquals(expectedFr,
+                getRecognizedTextFromTextFile(tesseractReader, imgPath,
+                        new ArrayList<String>()));
+    }
+
+    @Test
+    public void testArabicOutputFromTxtFile() {
+        String imgPath = TEST_IMAGES_DIRECTORY + "arabic_02.png";
+        // First sentence
+        String expected = "اللغة العربية";
+
+        String result = getRecognizedTextFromTextFile(tesseractReader, imgPath,
+                Collections.<String>singletonList("ara"));
+        // correct result with specified arabic language
+        Assert.assertTrue(result.startsWith(expected));
+
+        // incorrect result when languages are not specified
+        // or languages were specified in the wrong order
+
+        String engResult = getRecognizedTextFromTextFile(tesseractReader, imgPath,
+                Collections.<String>singletonList("eng"));
+        Assert.assertFalse(engResult.startsWith(expected));
+        String spaResult = getRecognizedTextFromTextFile(tesseractReader, imgPath,
+                Collections.<String>singletonList("spa"));
+        Assert.assertFalse(spaResult.startsWith(expected));
+        String langNotSpecifiedResult = getRecognizedTextFromTextFile(tesseractReader, imgPath,
+                new ArrayList<String>());
+        Assert.assertFalse(langNotSpecifiedResult.startsWith(expected));
+    }
+
+    @Test
+    public void testGermanAndCompareTxtFiles() {
+        String imgPath = TEST_IMAGES_DIRECTORY + "german_01.jpg";
+        String expectedTxt = TEST_DOCUMENTS_DIRECTORY + "german_01" + testFileTypeName + ".txt";
+
+        boolean result = doOcrAndCompareTxtFiles(tesseractReader, imgPath, expectedTxt,
+                Collections.<String>singletonList("deu"));
+        Assert.assertTrue(result);
+    }
+
+    @Test
+    public void testMultipageTiffAndCompareTxtFiles() {
+        String imgPath = TEST_IMAGES_DIRECTORY + "multîpage.tiff";
+        String expectedTxt = TEST_DOCUMENTS_DIRECTORY + "multipage_" + testFileTypeName + ".txt";
+
+        boolean result = doOcrAndCompareTxtFiles(tesseractReader, imgPath, expectedTxt,
+                Collections.<String>singletonList("eng"));
+        Assert.assertTrue(result);
+    }
+
+    @Test
+    public void testGermanWithTessData() {
+        String imgPath = TEST_IMAGES_DIRECTORY + "german_01.jpg";
+        File file = new File(imgPath);
+        String expectedGerman = "Das Geheimnis\ndes Könnens\nliegt im Wollen.";
+
+        String res = getTextFromPdf(tesseractReader, file, Collections.<String>singletonList("deu"));
+        // correct result with specified spanish language
+        Assert.assertEquals(expectedGerman, res);
+
+        // incorrect result when languages are not specified
+        // or languages were specified in the wrong order
+        Assert.assertNotEquals(expectedGerman,
+                getTextFromPdf(tesseractReader, file, Collections.<String>singletonList("eng")));
+        Assert.assertNotEquals(expectedGerman,
+                getTextFromPdf(tesseractReader, file, Collections.<String>singletonList("fra")));
+        Assert.assertNotEquals(expectedGerman,
+                getTextFromPdf(tesseractReader, file, new ArrayList<String>()));
+    }
+
+    @Test
+    public void testArabicTextWithEng() {
+        String imgPath = TEST_IMAGES_DIRECTORY + "arabic_01.jpg";
+        File file = new File(imgPath);
+        String expected = "الحية. والضحك؛ والحب\nlive, laugh, love";
+
+        String result = getTextFromPdf(tesseractReader, file,
+                Arrays.<String>asList("ara", "eng"), CAIRO_FONT_PATH);
+        // correct result with specified arabic+english languages
+        Assert.assertEquals(expected, result.replaceAll("[?]", ""));
+
+        // incorrect result when languages are not specified
+        // or languages were specified in the wrong order
+        Assert.assertNotEquals(expected, getTextFromPdf(tesseractReader, file,
+                Collections.<String>singletonList("eng"), CAIRO_FONT_PATH));
+        Assert.assertNotEquals(expected,
+                getTextFromPdf(tesseractReader, file, new ArrayList<String>(),
+                        CAIRO_FONT_PATH));
+    }
+
+    @Test
+    public void testArabicText() {
+        String imgPath = TEST_IMAGES_DIRECTORY + "arabic_02.png";
+        File file = new File(imgPath);
+        // First sentence
+        String expected = "اللغة العربية";
+
+        // correct result with specified arabic language
+        Assert.assertEquals(expected, getTextFromPdf(tesseractReader, file,
+                Collections.<String>singletonList("ara"), CAIRO_FONT_PATH));
+
+        // incorrect result when languages are not specified
+        // or languages were specified in the wrong order
+        Assert.assertNotEquals(expected, getTextFromPdf(tesseractReader, file,
+                Collections.<String>singletonList("eng"), CAIRO_FONT_PATH));
+        Assert.assertNotEquals(expected, getTextFromPdf(tesseractReader, file,
+                Collections.<String>singletonList("spa"), CAIRO_FONT_PATH));
+        Assert.assertNotEquals(expected, getTextFromPdf(tesseractReader, file,
+                new ArrayList<String>(), CAIRO_FONT_PATH));
+    }
+
+    @Test
+    public void compareMultiLangImage() throws InterruptedException, java.io.IOException {
+        String testName = "compareMultiLangImage";
+        String filename = "multilang";
+        String expectedPdfPath = TEST_DOCUMENTS_DIRECTORY + filename + "_" + testFileTypeName + ".pdf";
+        String resultPdfPath = getTargetDirectory() + filename + "_" + testName + "_" + testFileTypeName + ".pdf";
+
+        try {
+            Tesseract4OcrEngineProperties properties =
+                    tesseractReader.getTesseract4OcrEngineProperties();
+            properties.setTextPositioning(TextPositioning.BY_WORDS);
+            properties.setPathToTessData(getTessDataDirectory());
+            properties.setPageSegMode(3);
+            tesseractReader.setTesseract4OcrEngineProperties(properties);
+            doOcrAndSavePdfToPath(tesseractReader,
+                    TEST_IMAGES_DIRECTORY + filename + ".jpg", resultPdfPath,
+                    Arrays.<String>asList("eng", "deu", "spa"), DeviceCmyk.BLACK);
+
+            Assert.assertNull(new CompareTool().compareByContent(resultPdfPath, expectedPdfPath,
+                    TEST_DOCUMENTS_DIRECTORY, "diff_"));
+        } finally {
+            Assert.assertEquals(TextPositioning.BY_WORDS,
+                    tesseractReader.getTesseract4OcrEngineProperties().getTextPositioning());
+            Assert.assertEquals(3, tesseractReader
+                    .getTesseract4OcrEngineProperties().getPageSegMode().intValue());
+        }
+    }
+
+    @LogMessages(messages = {
+        @LogMessage(messageTemplate = PdfOcrLogMessageConstant.COULD_NOT_FIND_CORRESPONDING_GLYPH_TO_UNICODE_CHARACTER, count = 12)
+    })
+    @Test
+    public void testHindiTextWithUrdu() throws IOException {
+        String testName = "testHindiTextWithUrdu";
+        String imgPath = TEST_IMAGES_DIRECTORY + "hindi_01.jpg";
+        File file = new File(imgPath);
+        String pdfPath = getTargetDirectory() + testName + ".pdf";
+
+        String expectedHindi = "हिन्दुस्तानी";
+        String expectedUrdu = "وتالی";
+
+        doOcrAndSavePdfToPath(tesseractReader, file.getAbsolutePath(),
+                pdfPath, Arrays.asList("hin", "urd"),
+                Collections.singletonList(CAIRO_FONT_PATH));
+
+        String resultWithoutActualText = getTextFromPdfLayer(pdfPath, null, 1);
+        // because of provided font only urdu will be displayed correctly
+        Assert.assertTrue(resultWithoutActualText.contains(expectedUrdu));
+        Assert.assertFalse(resultWithoutActualText.contains(expectedHindi));
+
+        String resultWithActualText = getTextFromPdfLayerUsingActualText(pdfPath, null, 1);
+        // actual text should contain all text
+        Assert.assertTrue(resultWithActualText.contains(expectedUrdu));
+        Assert.assertTrue(resultWithActualText.contains(expectedHindi));
+    }
+
+    @LogMessages(messages = {
+        @LogMessage(messageTemplate = PdfOcrLogMessageConstant.COULD_NOT_FIND_CORRESPONDING_GLYPH_TO_UNICODE_CHARACTER)
+    }, ignore = true)
+    @Test
+    public void testHindiTextWithUrduActualTextWithIncorrectFont() throws IOException {
+        String testName = "testHindiTextWithUrduActualTextWithIncorrectFont";
+        String imgPath = TEST_IMAGES_DIRECTORY + "hindi_01.jpg";
+        File file = new File(imgPath);
+        String pdfPath = getTargetDirectory() + testName + ".pdf";
+
+        String expectedHindi = "हिन्दुस्तानी";
+        String expectedUrdu = "وتالی";
+
+        doOcrAndSavePdfToPath(tesseractReader, file.getAbsolutePath(),
+                pdfPath, Arrays.asList("hin", "urd"), null, null);
+
+        String resultWithoutActualText = getTextFromPdfLayer(pdfPath, null, 1);
+        // because of provided font only urdu will be displayed correctly
+        Assert.assertFalse(resultWithoutActualText.contains(expectedUrdu));
+        Assert.assertFalse(resultWithoutActualText.contains(expectedHindi));
+
+        String resultWithActualText = getTextFromPdfLayerUsingActualText(pdfPath, null, 1);
+        // actual text should contain all text
+        Assert.assertTrue(resultWithActualText.contains(expectedUrdu));
+        Assert.assertTrue(resultWithActualText.contains(expectedHindi));
+    }
+
+    @Test
+    public void testHindiTextWithEng() {
+        String imgPath = TEST_IMAGES_DIRECTORY + "hindi_02.jpg";
+        File file = new File(imgPath);
+
+        String expected = "मानक हनिदी\nHindi";
+
+        // correct result with specified arabic+english languages
+        Assert.assertEquals(expected, getTextFromPdf(tesseractReader, file,
+                Arrays.<String>asList("hin", "eng"), NOTO_SANS_FONT_PATH));
+
+        // incorrect result without specified english language
+        Assert.assertNotEquals(expected, getTextFromPdf(tesseractReader, file,
+                Collections.<String>singletonList("hin"), NOTO_SANS_FONT_PATH));
+
+        // incorrect result when languages are not specified
+        // or languages were specified in the wrong order
+        Assert.assertNotEquals(expected, getTextFromPdf(tesseractReader, file,
+                Collections.<String>singletonList("eng"), NOTO_SANS_FONT_PATH));
+        Assert.assertNotEquals(expected, getTextFromPdf(tesseractReader, file));
+        Assert.assertNotEquals(expected, getTextFromPdf(tesseractReader, file,
+                new ArrayList<String>(), NOTO_SANS_FONT_PATH));
+    }
+
+    @Test
+    public void testGeorgianText() throws IOException {
+        String imgPath = TEST_IMAGES_DIRECTORY + "georgian_01.jpg";
+        File file = new File(imgPath);
+        // First sentence
+        String expected = "ღმერთი";
+
+        String result = getTextFromPdf(tesseractReader, file,
+                Collections.<String>singletonList("kat"), FREE_SANS_FONT_PATH);
+        // correct result with specified georgian+eng language
+        Assert.assertEquals(expected, result);
+        result = getTextFromPdf(tesseractReader, file,
+                Arrays.<String>asList("kat", "kat_old"), FREE_SANS_FONT_PATH);
+        Assert.assertEquals(expected, result);
+    }
+
+    @LogMessages(messages = {
+        @LogMessage(messageTemplate = PdfOcrLogMessageConstant.COULD_NOT_FIND_CORRESPONDING_GLYPH_TO_UNICODE_CHARACTER, count = 6)
+    })
+    @Test
+    public void testGeorgianActualTextWithDefaultFont() throws IOException {
+        String testName = "testGeorgianActualTextWithDefaultFont";
+        String pdfPath = getTargetDirectory() + testName + ".pdf";
+        String imgPath = TEST_IMAGES_DIRECTORY + "georgian_01.jpg";
+        File file = new File(imgPath);
+        // First sentence
+        String expected = "ღმერთი";
+
+        doOcrAndSavePdfToPath(tesseractReader, file.getAbsolutePath(),
+                pdfPath, Collections.<String>singletonList("kat"), null, null);
+
+        String resultWithoutActualText = getTextFromPdfLayer(pdfPath, null, 1);
+        Assert.assertNotEquals(expected, resultWithoutActualText);
+
+        String resultWithActualText = getTextFromPdfLayerUsingActualText(pdfPath, null, 1);
+        Assert.assertEquals(expected, resultWithActualText);
+    }
+
+    @Test
+    public void testBengali() {
+        String imgPath = TEST_IMAGES_DIRECTORY + "bengali_01.jpeg";
+        File file = new File(imgPath);
+        String expected = "ইংরজে\nশখো";
+
+        tesseractReader.setTesseract4OcrEngineProperties(
+                tesseractReader.getTesseract4OcrEngineProperties()
+                        .setTextPositioning(TextPositioning.BY_WORDS));
+        // correct result with specified spanish language
+        String result = getTextFromPdf(tesseractReader, file, 1,
+                Collections.<String>singletonList("ben"),
+                Arrays.<String>asList(FREE_SANS_FONT_PATH, KOSUGI_FONT_PATH));
+        Assert.assertEquals(expected, result);
+
+        Assert.assertEquals(expected, getTextFromPdf(tesseractReader, file,
+                Collections.<String>singletonList("ben"), FREE_SANS_FONT_PATH));
+    }
+
+    @LogMessages(messages = {
+        @LogMessage(messageTemplate = PdfOcrLogMessageConstant.COULD_NOT_FIND_CORRESPONDING_GLYPH_TO_UNICODE_CHARACTER, count = 8)
+    })
+    @Test
+    public void testBengaliActualTextWithDefaultFont() throws IOException {
+        String testName = "testBengaliActualTextWithDefaultFont";
+        String pdfPath = getTargetDirectory() + testName + ".pdf";
+        String imgPath = TEST_IMAGES_DIRECTORY + "bengali_01.jpeg";
+        File file = new File(imgPath);
+        String expected = "ইংরজে\nশখো";
+
+        tesseractReader.setTesseract4OcrEngineProperties(
+                tesseractReader.getTesseract4OcrEngineProperties()
+                        .setTextPositioning(TextPositioning.BY_WORDS));
+
+        doOcrAndSavePdfToPath(tesseractReader, file.getAbsolutePath(),
+                pdfPath, Collections.<String>singletonList("ben"), null, null);
+
+        String resultWithoutActualText = getTextFromPdfLayer(pdfPath, null, 1);
+        Assert.assertNotEquals(expected, resultWithoutActualText);
+
+        String resultWithActualText = getTextFromPdfLayerUsingActualText(pdfPath, null, 1);
+        Assert.assertEquals(expected, resultWithActualText);
+    }
+
+    @LogMessages(messages = {
+        @LogMessage(messageTemplate = PdfOcrLogMessageConstant.COULD_NOT_FIND_CORRESPONDING_GLYPH_TO_UNICODE_CHARACTER, count = 6)
+    })
+    @Test
+    public void testChinese() {
+        String imgPath = TEST_IMAGES_DIRECTORY + "chinese_01.jpg";
+        File file = new File(imgPath);
+        String expected = "你 好\nni hao";
+
+        // correct result with specified spanish language
+        Assert.assertEquals(expected, getTextFromPdf(tesseractReader, file,
+                Arrays.<String>asList("chi_sim", "chi_tra"),
+                NOTO_SANS_SC_FONT_PATH));
+        Assert.assertEquals(expected, getTextFromPdf(tesseractReader, file,
+                Collections.<String>singletonList("chi_sim"),
+                NOTO_SANS_SC_FONT_PATH));
+        Assert.assertEquals(expected, getTextFromPdf(tesseractReader, file,
+                Collections.<String>singletonList("chi_tra"),
+                NOTO_SANS_SC_FONT_PATH));
+
+        // incorrect result when languages are not specified
+        // or languages were specified in the wrong order
+        Assert.assertNotEquals(expected,
+                getTextFromPdf(tesseractReader, file,
+                        Collections.<String>singletonList("chi_sim")));
+        Assert.assertNotEquals(expected,
+                getTextFromPdf(tesseractReader, file, Collections.<String>singletonList("chi_tra")));
+        Assert.assertNotEquals(expected,
+                getTextFromPdf(tesseractReader, file, Arrays.<String>asList("chi_sim", "chi_tra")));
+        Assert.assertFalse(getTextFromPdf(tesseractReader, file, new ArrayList<String>())
+                .contains(expected));
+    }
+
+    @Test
+    public void testSpanishWithTessData() {
+        String imgPath = TEST_IMAGES_DIRECTORY + "spanish_01.jpg";
+        File file = new File(imgPath);
+        String expectedSpanish = "Aquí\nhablamos\nespañol";
+
+        // correct result with specified spanish language
+        Assert.assertEquals(expectedSpanish,
+                getTextFromPdf(tesseractReader, file, Collections.<String>singletonList("spa")));
+        Assert.assertEquals(expectedSpanish,
+                getTextFromPdf(tesseractReader, file, Arrays.<String>asList("spa", "eng")));
+        Assert.assertEquals(expectedSpanish,
+                getTextFromPdf(tesseractReader, file, Arrays.<String>asList("eng", "spa")));
+
+        // incorrect result when languages are not specified
+        // or languages were specified in the wrong order
+        Assert.assertNotEquals(expectedSpanish,
+                getTextFromPdf(tesseractReader, file, Collections.<String>singletonList("eng")));
+        Assert.assertNotEquals(expectedSpanish,
+                getTextFromPdf(tesseractReader, file, new ArrayList<String>()));
+    }
+
+    @Test
+    public void testBengaliScript() {
+        String imgPath = TEST_IMAGES_DIRECTORY + "bengali_01.jpeg";
+        File file = new File(imgPath);
+        String expected = "ইংরজে";
+
+        tesseractReader.setTesseract4OcrEngineProperties(
+                tesseractReader.getTesseract4OcrEngineProperties()
+                        .setPathToTessData(new File(SCRIPT_TESS_DATA_DIRECTORY)));
+        // correct result with specified spanish language
+        Assert.assertTrue(getTextFromPdf(tesseractReader, file, 1,
+                Collections.<String>singletonList("Bengali"),
+                Arrays.<String>asList(FREE_SANS_FONT_PATH, KOSUGI_FONT_PATH))
+                .startsWith(expected));
+    }
+
+    @Test
+    public void testGeorgianTextWithScript() {
+        String imgPath = TEST_IMAGES_DIRECTORY + "georgian_01.jpg";
+        File file = new File(imgPath);
+        // First sentence
+        String expected = "ღმერთი";
+
+        tesseractReader.setTesseract4OcrEngineProperties(
+                tesseractReader.getTesseract4OcrEngineProperties()
+                        .setPathToTessData(new File(SCRIPT_TESS_DATA_DIRECTORY)));
+        // correct result with specified georgian+eng language
+        Assert.assertTrue(getTextFromPdf(tesseractReader, file,
+                Collections.<String>singletonList("Georgian"),
+                FREE_SANS_FONT_PATH)
+                .startsWith(expected));
+    }
+
+    @Test
+    public void testJapaneseScript() {
+        String imgPath = TEST_IMAGES_DIRECTORY + "japanese_01.png";
+        File file = new File(imgPath);
+        String expected = "日 本 語\n文法";
+
+        tesseractReader.setTesseract4OcrEngineProperties(
+                tesseractReader.getTesseract4OcrEngineProperties()
+                        .setPathToTessData(new File(SCRIPT_TESS_DATA_DIRECTORY)));
+        // correct result with specified japanese language
+        String result = getTextFromPdf(tesseractReader, file,
+                Arrays.<String>asList("Japanese"), KOSUGI_FONT_PATH);
+        Assert.assertEquals(expected, result);
+    }
+
+    /**
+     * Do OCR for given image and compare result text file with expected one.
+     */
+    private boolean doOcrAndCompareTxtFiles(AbstractTesseract4OcrEngine tesseractReader,
+            String imgPath, String expectedPath, List<String> languages) {
+        String resultTxtFile = getTargetDirectory() + getImageName(imgPath, languages) + ".txt";
+        doOcrAndSaveToTextFile(tesseractReader, imgPath, resultTxtFile, languages);
+        return compareTxtFiles(expectedPath, resultTxtFile);
+    }
+
+    /**
+     * Compare two text files using provided paths.
+     */
+    private boolean compareTxtFiles(String expectedFilePath, String resultFilePath) {
+        boolean areEqual = true;
+        try {
+            List<String> expected = Files.readAllLines(java.nio.file.Paths.get(expectedFilePath));
+            List<String> result = Files.readAllLines(java.nio.file.Paths.get(resultFilePath));
+
+            if (expected.size() != result.size()) {
+                return false;
+            }
+
+            for (int i = 0; i < expected.size(); i++) {
+                String exp = expected.get(i)
+                        .replace("\n", "")
+                        .replace("\f", "");
+                exp = exp.replaceAll("[^\\u0009\\u000A\\u000D\\u0020-\\u007E]", "");
+                String res = result.get(i)
+                        .replace("\n", "")
+                        .replace("\f", "");
+                res = res.replaceAll("[^\\u0009\\u000A\\u000D\\u0020-\\u007E]", "");
+                if (expected.get(i) == null || result.get(i) == null) {
+                    areEqual = false;
+                    break;
+                } else if (!exp.equals(res)) {
+                    areEqual = false;
+                    break;
+                }
+            }
+        } catch (IOException e) {
+            areEqual = false;
+            LOGGER.error(e.getMessage());
+        }
+
+        return areEqual;
+    }
+}
diff --git a/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/tesseract4/ApiTest.java b/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/tesseract4/ApiTest.java
new file mode 100644
index 0000000..1f43e3e
--- /dev/null
+++ b/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/tesseract4/ApiTest.java
@@ -0,0 +1,178 @@
+/*
+    This file is part of the iText (R) project.
+    Copyright (c) 1998-2020 iText Group NV
+    Authors: iText Software.
+
+    This program is offered under a commercial and under the AGPL license.
+    For commercial licensing, contact us at https://itextpdf.com/sales.  For AGPL licensing, see below.
+
+    AGPL licensing:
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+package com.itextpdf.pdfocr.tesseract4;
+
+import com.itextpdf.io.util.MessageFormatUtil;
+import com.itextpdf.pdfocr.IntegrationTestHelper;
+import com.itextpdf.test.annotations.LogMessage;
+import com.itextpdf.test.annotations.LogMessages;
+
+import java.awt.image.BufferedImage;
+import java.io.File;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+import net.sourceforge.lept4j.Pix;
+import net.sourceforge.tess4j.TesseractException;
+import org.junit.Assert;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+
+public class ApiTest extends IntegrationTestHelper {
+
+    @Rule
+    public ExpectedException junitExpectedException = ExpectedException.none();
+
+    @LogMessages(messages = {
+        @LogMessage(messageTemplate = Tesseract4OcrException.PATH_TO_TESS_DATA_IS_NOT_SET)
+    })
+    @Test
+    public void testDefaultTessDataPathValidationForLib() {
+        junitExpectedException.expect(Tesseract4OcrException.class);
+        junitExpectedException.expectMessage(Tesseract4OcrException.PATH_TO_TESS_DATA_IS_NOT_SET);
+        String path = TEST_IMAGES_DIRECTORY + "numbers_01.jpg";
+        File imgFile = new File(path);
+
+        Tesseract4LibOcrEngine engine =
+                new Tesseract4LibOcrEngine(new Tesseract4OcrEngineProperties());
+        engine.doImageOcr(imgFile);
+    }
+
+    @LogMessages(messages = {
+        @LogMessage(messageTemplate = Tesseract4OcrException.PATH_TO_TESS_DATA_IS_NOT_SET)
+    })
+    @Test
+    public void testDefaultTessDataPathValidationForExecutable() {
+        junitExpectedException.expect(Tesseract4OcrException.class);
+        junitExpectedException.expectMessage(Tesseract4OcrException.PATH_TO_TESS_DATA_IS_NOT_SET);
+        String path = TEST_IMAGES_DIRECTORY + "numbers_01.jpg";
+        File imgFile = new File(path);
+
+        Tesseract4ExecutableOcrEngine engine =
+                new Tesseract4ExecutableOcrEngine(getTesseractDirectory(),
+                        new Tesseract4OcrEngineProperties());
+        engine.doImageOcr(imgFile);
+    }
+
+    @LogMessages(messages = {
+        @LogMessage(messageTemplate = Tesseract4LogMessageConstant.CANNOT_READ_INPUT_IMAGE)
+    })
+    @Test
+    public void testDoTesseractOcrForIncorrectImageForExecutable() {
+        junitExpectedException.expect(Tesseract4OcrException.class);
+        junitExpectedException.expectMessage(MessageFormatUtil.format(
+                Tesseract4OcrException.CANNOT_READ_PROVIDED_IMAGE,
+                new File(TEST_IMAGES_DIRECTORY + "numbers_01")
+                        .getAbsolutePath()));
+        String path = TEST_IMAGES_DIRECTORY + "numbers_01";
+        File imgFile = new File(path);
+
+        Tesseract4ExecutableOcrEngine engine =
+                new Tesseract4ExecutableOcrEngine(getTesseractDirectory(),
+                        new Tesseract4OcrEngineProperties()
+                                .setPathToTessData(getTessDataDirectory()));
+        engine.doTesseractOcr(imgFile, null, OutputFormat.HOCR);
+    }
+
+    @LogMessages(messages = {
+        @LogMessage(messageTemplate = Tesseract4OcrException.TESSERACT_FAILED),
+        @LogMessage(messageTemplate = Tesseract4LogMessageConstant.TESSERACT_FAILED)
+    })
+    @Test
+    public void testOcrResultForSinglePageForNullImage() {
+        junitExpectedException.expect(Tesseract4OcrException.class);
+        junitExpectedException.expectMessage(Tesseract4OcrException.TESSERACT_FAILED);
+        Tesseract4LibOcrEngine tesseract4LibOcrEngine = getTesseract4LibOcrEngine();
+        tesseract4LibOcrEngine.setTesseract4OcrEngineProperties(
+                new Tesseract4OcrEngineProperties()
+                        .setPathToTessData(getTessDataDirectory()));
+        tesseract4LibOcrEngine.initializeTesseract(OutputFormat.TXT);
+        tesseract4LibOcrEngine.doTesseractOcr(null, null, OutputFormat.HOCR);
+    }
+
+    @Test
+    public void testDoTesseractOcrForNonAsciiPathForExecutable() {
+        String path = TEST_IMAGES_DIRECTORY + "tèst/noisy_01.png";
+        File imgFile = new File(path);
+        File outputFile = new File(TesseractOcrUtil.getTempFilePath("test",
+                ".hocr"));
+
+        Tesseract4OcrEngineProperties properties = new Tesseract4OcrEngineProperties();
+        properties.setPathToTessData(getTessDataDirectory());
+        properties.setPreprocessingImages(false);
+        Tesseract4ExecutableOcrEngine engine =
+                new Tesseract4ExecutableOcrEngine(getTesseractDirectory(),
+                        properties);
+        engine.doTesseractOcr(imgFile, outputFile, OutputFormat.HOCR);
+        Assert.assertTrue(Files.exists(Paths.get(outputFile.getAbsolutePath())));
+        TesseractHelper.deleteFile(outputFile.getAbsolutePath());
+        Assert.assertFalse(Files.exists(Paths.get(outputFile.getAbsolutePath())));
+    }
+
+    @LogMessages(messages = {
+        @LogMessage(messageTemplate = Tesseract4LogMessageConstant.CANNOT_READ_INPUT_IMAGE),
+        @LogMessage(messageTemplate = Tesseract4OcrException.TESSERACT_FAILED),
+        @LogMessage(messageTemplate = Tesseract4OcrException.TESSERACT_NOT_FOUND),
+        @LogMessage(messageTemplate = Tesseract4LogMessageConstant.COMMAND_FAILED)
+    }, ignore = true)
+    @Test
+    public void testDoTesseractOcrForExecutableForWin() {
+        junitExpectedException.expect(Tesseract4OcrException.class);
+        testSettingOsName("win");
+    }
+
+    @LogMessages(messages = {
+        @LogMessage(messageTemplate = Tesseract4LogMessageConstant.CANNOT_READ_INPUT_IMAGE),
+        @LogMessage(messageTemplate = Tesseract4OcrException.TESSERACT_FAILED),
+        @LogMessage(messageTemplate = Tesseract4OcrException.TESSERACT_NOT_FOUND),
+        @LogMessage(messageTemplate = Tesseract4LogMessageConstant.COMMAND_FAILED)
+    }, ignore = true)
+    @Test
+    public void testDoTesseractOcrForExecutableForLinux() {
+        junitExpectedException.expect(Tesseract4OcrException.class);
+        testSettingOsName("linux");
+    }
+
+    private void testSettingOsName(String osName) {
+        String path = TEST_IMAGES_DIRECTORY + "numbers_01.jpg";
+        File imgFile = new File(path);
+
+        String tesseractDirectory = getTesseractDirectory();
+        String osPropertyName = System.getProperty("os.name") == null ? "OS" : "os.name";
+        String os = System.getProperty(osPropertyName);
+        System.setProperty(osPropertyName, osName);
+
+        try {
+            Tesseract4OcrEngineProperties properties = new Tesseract4OcrEngineProperties();
+            properties.setPathToTessData(getTessDataDirectory());
+            Tesseract4ExecutableOcrEngine engine =
+                    new Tesseract4ExecutableOcrEngine(tesseractDirectory,
+                            properties);
+
+            engine.doTesseractOcr(imgFile, null, OutputFormat.HOCR);
+        } finally {
+            System.setProperty(osPropertyName, os);
+        }
+    }
+}
diff --git a/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/tesseract4/ImagePreprocessingUtilTest.java b/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/tesseract4/ImagePreprocessingUtilTest.java
new file mode 100644
index 0000000..5309494
--- /dev/null
+++ b/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/tesseract4/ImagePreprocessingUtilTest.java
@@ -0,0 +1,52 @@
+/*
+    This file is part of the iText (R) project.
+    Copyright (c) 1998-2020 iText Group NV
+    Authors: iText Software.
+
+    This program is offered under a commercial and under the AGPL license.
+    For commercial licensing, contact us at https://itextpdf.com/sales.  For AGPL licensing, see below.
+
+    AGPL licensing:
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+package com.itextpdf.pdfocr.tesseract4;
+
+import com.itextpdf.pdfocr.IntegrationTestHelper;
+
+import java.io.File;
+import org.junit.Assert;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+
+public class ImagePreprocessingUtilTest extends IntegrationTestHelper{
+
+    @Rule
+    public ExpectedException junitExpectedException = ExpectedException.none();
+
+    @Test
+    public void testCheckForInvalidTiff() {
+        String path = TEST_IMAGES_DIRECTORY + "example_03_10MB";
+        File imgFile = new File(path);
+        Assert.assertFalse(ImagePreprocessingUtil.isTiffImage(imgFile));
+    }
+
+    @Test
+    public void testReadingInvalidImagePath() {
+        junitExpectedException.expect(Tesseract4OcrException.class);
+        String path = TEST_IMAGES_DIRECTORY + "numbers_02";
+        File imgFile = new File(path);
+        ImagePreprocessingUtil.preprocessImage(imgFile, 1);
+    }
+}
diff --git a/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/tesseract4/TesseractOcrUtilTest.java b/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/tesseract4/TesseractOcrUtilTest.java
new file mode 100644
index 0000000..1287719
--- /dev/null
+++ b/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/tesseract4/TesseractOcrUtilTest.java
@@ -0,0 +1,195 @@
+/*
+    This file is part of the iText (R) project.
+    Copyright (c) 1998-2020 iText Group NV
+    Authors: iText Software.
+
+    This program is offered under a commercial and under the AGPL license.
+    For commercial licensing, contact us at https://itextpdf.com/sales.  For AGPL licensing, see below.
+
+    AGPL licensing:
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+package com.itextpdf.pdfocr.tesseract4;
+
+import com.itextpdf.pdfocr.IntegrationTestHelper;
+import com.itextpdf.test.annotations.LogMessage;
+import com.itextpdf.test.annotations.LogMessages;
+
+import java.awt.image.BufferedImage;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+import javax.imageio.ImageIO;
+import net.sourceforge.lept4j.Pix;
+import net.sourceforge.tess4j.TesseractException;
+import org.junit.Assert;
+import org.junit.Test;
+
+public class TesseractOcrUtilTest extends IntegrationTestHelper {
+
+    @Test
+    public void testTesseract4OcrForPix()
+            throws TesseractException, IOException {
+        String path = TEST_IMAGES_DIRECTORY + "numbers_02.jpg";
+        String expected = "0123456789";
+        File imgFile = new File(path);
+
+        Pix pix = ImagePreprocessingUtil.readPix(imgFile);
+        Tesseract4LibOcrEngine tesseract4LibOcrEngine = getTesseract4LibOcrEngine();
+        tesseract4LibOcrEngine.setTesseract4OcrEngineProperties(
+                new Tesseract4OcrEngineProperties()
+                        .setPathToTessData(getTessDataDirectory()));
+        tesseract4LibOcrEngine.initializeTesseract(OutputFormat.TXT);
+
+        String result = new TesseractOcrUtil().getOcrResultAsString(
+                tesseract4LibOcrEngine.getTesseractInstance(),
+                pix, OutputFormat.TXT);
+        Assert.assertTrue(result.contains(expected));
+    }
+
+    @Test
+    public void testGetOcrResultAsStringForFile()
+            throws TesseractException {
+        String path = TEST_IMAGES_DIRECTORY + "numbers_01.jpg";
+        String expected = "619121";
+        File imgFile = new File(path);
+
+        Tesseract4LibOcrEngine tesseract4LibOcrEngine = getTesseract4LibOcrEngine();
+        tesseract4LibOcrEngine.setTesseract4OcrEngineProperties(
+                new Tesseract4OcrEngineProperties()
+                        .setPathToTessData(getTessDataDirectory()));
+        tesseract4LibOcrEngine.initializeTesseract(OutputFormat.TXT);
+
+        String result = new TesseractOcrUtil().getOcrResultAsString(
+                tesseract4LibOcrEngine.getTesseractInstance(),
+                imgFile, OutputFormat.TXT);
+        Assert.assertTrue(result.contains(expected));
+    }
+
+    @LogMessages(messages = {
+            @LogMessage(messageTemplate = Tesseract4LogMessageConstant.PAGE_NUMBER_IS_INCORRECT)
+    })
+    @Test
+    public void testReadingSecondPageFromOnePageTiff() {
+        String path = TEST_IMAGES_DIRECTORY + "example_03_10MB.tiff";
+        File imgFile = new File(path);
+        Pix page = TesseractOcrUtil.readPixPageFromTiff(imgFile, 2);
+        Assert.assertNull(page);
+    }
+
+    @LogMessages(messages = {
+        @LogMessage(messageTemplate = Tesseract4LogMessageConstant.CANNOT_RETRIEVE_PAGES_FROM_IMAGE)
+    })
+    @Test
+    public void testReadingPageFromInvalidTiff() {
+        String path = TEST_IMAGES_DIRECTORY + "example_03.tiff";
+        File imgFile = new File(path);
+        Pix page = TesseractOcrUtil.readPixPageFromTiff(imgFile, 0);
+        Assert.assertNull(page);
+    }
+
+    @LogMessages(messages = {
+        @LogMessage(messageTemplate = Tesseract4LogMessageConstant.CANNOT_RETRIEVE_PAGES_FROM_IMAGE)
+    })
+    @Test
+    public void testInitializeImagesListFromInvalidTiff() {
+        String path = TEST_IMAGES_DIRECTORY + "example_03.tiff";
+        File imgFile = new File(path);
+        TesseractOcrUtil tesseractOcrUtil = new TesseractOcrUtil();
+        tesseractOcrUtil.initializeImagesListFromTiff(imgFile);
+        Assert.assertEquals(0, tesseractOcrUtil.getListOfPages().size());
+    }
+
+    @Test
+    public void testPreprocessingConditions() throws IOException {
+        Pix pix = null;
+        Assert.assertNull(TesseractOcrUtil.convertToGrayscale(pix));
+        Assert.assertNull(TesseractOcrUtil.otsuImageThresholding(pix));
+        Assert.assertNull(TesseractOcrUtil.convertPixToImage(pix));
+        TesseractOcrUtil.destroyPix(pix);
+    }
+
+    @Test
+    public void testOcrResultConditions() throws IOException,
+            TesseractException {
+        Tesseract4LibOcrEngine tesseract4LibOcrEngine = getTesseract4LibOcrEngine();
+        tesseract4LibOcrEngine.setTesseract4OcrEngineProperties(
+                new Tesseract4OcrEngineProperties()
+                        .setPathToTessData(getTessDataDirectory()));
+        tesseract4LibOcrEngine.initializeTesseract(OutputFormat.HOCR);
+
+        Pix pix = null;
+        Assert.assertNull(new TesseractOcrUtil()
+                .getOcrResultAsString(
+                        tesseract4LibOcrEngine.getTesseractInstance(),
+                        pix, OutputFormat.HOCR));
+        File file = null;
+        Assert.assertNull(new TesseractOcrUtil()
+                .getOcrResultAsString(
+                        tesseract4LibOcrEngine.getTesseractInstance(),
+                        file, OutputFormat.HOCR));
+        BufferedImage bi = null;
+        Assert.assertNull(new TesseractOcrUtil()
+                .getOcrResultAsString(
+                        tesseract4LibOcrEngine.getTesseractInstance(),
+                        bi, OutputFormat.HOCR));
+    }
+
+    @Test
+    public void testImageSavingAsPng() throws IOException {
+        String path = TEST_IMAGES_DIRECTORY + "numbers_01.jpg";
+        String tmpFileName = getTargetDirectory() + "testImageSavingAsPng.png";
+        Assert.assertFalse(Files.exists(Paths.get(tmpFileName)));
+        BufferedImage bi = ImageIO.read(new FileInputStream(path));
+        TesseractOcrUtil.saveImageToTempPngFile(tmpFileName, bi);
+        Assert.assertTrue(Files.exists(Paths.get(tmpFileName)));
+        TesseractHelper.deleteFile(tmpFileName);
+        Assert.assertFalse(Files.exists(Paths.get(tmpFileName)));
+    }
+
+    @Test
+    public void testNullSavingAsPng() {
+        String tmpFileName = TesseractOcrUtil.getTempFilePath(
+                getTargetDirectory() + "/testNullSavingAsPng", ".png");
+        TesseractOcrUtil.saveImageToTempPngFile(tmpFileName, null);
+        Assert.assertFalse(Files.exists(Paths.get(tmpFileName)));
+
+        TesseractOcrUtil.savePixToTempPngFile(tmpFileName, null);
+        Assert.assertFalse(Files.exists(Paths.get(tmpFileName)));
+    }
+
+    @Test
+    public void testPixSavingAsPng() {
+        String path = TEST_IMAGES_DIRECTORY + "numbers_01.jpg";
+        String tmpFileName = getTargetDirectory() + "testPixSavingAsPng.png";
+        Assert.assertFalse(Files.exists(Paths.get(tmpFileName)));
+        Pix pix = ImagePreprocessingUtil.readPix(new File(path));
+        TesseractOcrUtil.savePixToTempPngFile(tmpFileName, pix);
+        Assert.assertTrue(Files.exists(Paths.get(tmpFileName)));
+        TesseractHelper.deleteFile(tmpFileName);
+        Assert.assertFalse(Files.exists(Paths.get(tmpFileName)));
+    }
+
+    @LogMessages(messages = {
+        @LogMessage(messageTemplate = Tesseract4LogMessageConstant.CANNOT_PROCESS_IMAGE)
+    })
+    @Test
+    public void testImageSavingAsPngWithError() throws IOException {
+        String path = TEST_IMAGES_DIRECTORY + "numbers_01.jpg";
+        BufferedImage bi = ImageIO.read(new FileInputStream(path));
+        TesseractOcrUtil.saveImageToTempPngFile(null, bi);
+    }
+}
diff --git a/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/tesseract4/UserWordsExecutableTest.java b/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/tesseract4/UserWordsExecutableTest.java
new file mode 100644
index 0000000..03b4daf
--- /dev/null
+++ b/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/tesseract4/UserWordsExecutableTest.java
@@ -0,0 +1,33 @@
+/*
+    This file is part of the iText (R) project.
+    Copyright (c) 1998-2020 iText Group NV
+    Authors: iText Software.
+
+    This program is offered under a commercial and under the AGPL license.
+    For commercial licensing, contact us at https://itextpdf.com/sales.  For AGPL licensing, see below.
+
+    AGPL licensing:
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+package com.itextpdf.pdfocr.tesseract4;
+
+import com.itextpdf.test.annotations.type.IntegrationTest;
+import org.junit.experimental.categories.Category;
+
+@Category(IntegrationTest.class)
+public class UserWordsExecutableTest extends UserWordsTest {
+    public UserWordsExecutableTest() {
+        super(ReaderType.EXECUTABLE);
+    }
+}
diff --git a/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/tesseract4/UserWordsLibTest.java b/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/tesseract4/UserWordsLibTest.java
new file mode 100644
index 0000000..c0b4541
--- /dev/null
+++ b/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/tesseract4/UserWordsLibTest.java
@@ -0,0 +1,33 @@
+/*
+    This file is part of the iText (R) project.
+    Copyright (c) 1998-2020 iText Group NV
+    Authors: iText Software.
+
+    This program is offered under a commercial and under the AGPL license.
+    For commercial licensing, contact us at https://itextpdf.com/sales.  For AGPL licensing, see below.
+
+    AGPL licensing:
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+package com.itextpdf.pdfocr.tesseract4;
+
+import com.itextpdf.test.annotations.type.IntegrationTest;
+import org.junit.experimental.categories.Category;
+
+@Category(IntegrationTest.class)
+public class UserWordsLibTest extends UserWordsTest {
+    public UserWordsLibTest() {
+        super(ReaderType.LIB);
+    }
+}
diff --git a/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/tesseract4/UserWordsTest.java b/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/tesseract4/UserWordsTest.java
new file mode 100644
index 0000000..8f222e6
--- /dev/null
+++ b/pdfocr-tesseract4/src/test/java/com/itextpdf/pdfocr/tesseract4/UserWordsTest.java
@@ -0,0 +1,157 @@
+/*
+    This file is part of the iText (R) project.
+    Copyright (c) 1998-2020 iText Group NV
+    Authors: iText Software.
+
+    This program is offered under a commercial and under the AGPL license.
+    For commercial licensing, contact us at https://itextpdf.com/sales.  For AGPL licensing, see below.
+
+    AGPL licensing:
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+package com.itextpdf.pdfocr.tesseract4;
+
+import com.itextpdf.io.util.MessageFormatUtil;
+import com.itextpdf.pdfocr.IntegrationTestHelper;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+public abstract class UserWordsTest extends IntegrationTestHelper {
+
+    @Rule
+    public ExpectedException junitExpectedException = ExpectedException.none();
+
+    AbstractTesseract4OcrEngine tesseractReader;
+    String testFileTypeName;
+    private boolean isExecutableReaderType;
+
+    public UserWordsTest(ReaderType type) {
+        isExecutableReaderType = type.equals(ReaderType.EXECUTABLE);
+        if (isExecutableReaderType) {
+            testFileTypeName = "executable";
+        } else {
+            testFileTypeName = "lib";
+        }
+        tesseractReader = getTesseractReader(type);
+    }
+
+    @Before
+    public void initTesseractProperties() {
+        Tesseract4OcrEngineProperties ocrEngineProperties =
+                new Tesseract4OcrEngineProperties();
+        ocrEngineProperties.setPathToTessData(getTessDataDirectory());
+        tesseractReader.setTesseract4OcrEngineProperties(ocrEngineProperties);
+    }
+
+    @Test
+    public void testCustomUserWords() {
+        String imgPath = TEST_IMAGES_DIRECTORY + "wierdwords.png";
+        List<String> userWords = Arrays.<String>asList("he23llo", "qwetyrtyqpwe-rty");
+
+        Tesseract4OcrEngineProperties properties =
+                tesseractReader.getTesseract4OcrEngineProperties();
+        properties.setLanguages(Arrays.asList("fra"));
+        properties.setUserWords("fra", userWords);
+        tesseractReader.setTesseract4OcrEngineProperties(properties);
+        String result = getRecognizedTextFromTextFile(tesseractReader, imgPath);
+        Assert.assertTrue(result.contains(userWords.get(0))
+                || result.contains(userWords.get(1)));
+
+        Assert.assertTrue(tesseractReader.getTesseract4OcrEngineProperties()
+                .getPathToUserWordsFile().endsWith(".user-words"));
+    }
+
+    @Test
+    public void testCustomUserWordsWithListOfLanguages() {
+        String imgPath = TEST_IMAGES_DIRECTORY + "bogusText.jpg";
+        String expectedOutput = "B1adeb1ab1a";
+
+        Tesseract4OcrEngineProperties properties =
+                tesseractReader.getTesseract4OcrEngineProperties();
+        properties.setLanguages(Arrays.asList("fra", "eng"));
+        properties.setUserWords("eng", Arrays.<String>asList("b1adeb1ab1a"));
+        tesseractReader.setTesseract4OcrEngineProperties(properties);
+
+        String result = getRecognizedTextFromTextFile(tesseractReader, imgPath);
+        result = result.replace("\n", "").replace("\f", "");
+        result = result.replaceAll("[^\\u0009\\u000A\\u000D\\u0020-\\u007E]", "");
+        Assert.assertTrue(result.startsWith(expectedOutput));
+
+        Assert.assertTrue(tesseractReader.getTesseract4OcrEngineProperties()
+                .getPathToUserWordsFile().endsWith(".user-words"));
+    }
+
+    @Test
+    public void testUserWordsWithLanguageNotInList() throws FileNotFoundException {
+        junitExpectedException.expect(Tesseract4OcrException.class);
+        junitExpectedException.expectMessage(MessageFormatUtil
+                .format(Tesseract4OcrException.LANGUAGE_IS_NOT_IN_THE_LIST,
+                        "spa"));
+        String userWords = TEST_DOCUMENTS_DIRECTORY + "userwords.txt";
+        Tesseract4OcrEngineProperties properties =
+                tesseractReader.getTesseract4OcrEngineProperties();
+        properties.setUserWords("spa", new FileInputStream(userWords));
+        properties.setLanguages(new ArrayList<String>());
+    }
+
+    @Test
+    public void testIncorrectLanguageForUserWordsAsList() {
+        junitExpectedException.expect(Tesseract4OcrException.class);
+        junitExpectedException.expectMessage(MessageFormatUtil
+                .format(Tesseract4OcrException.LANGUAGE_IS_NOT_IN_THE_LIST,
+                        "eng1"));
+        Tesseract4OcrEngineProperties properties =
+                tesseractReader.getTesseract4OcrEngineProperties();
+        properties.setUserWords("eng1", Arrays.<String>asList("word1", "word2"));
+        properties.setLanguages(new ArrayList<String>());
+    }
+
+    @Test
+    public void testUserWordsWithDefaultLanguageNotInList()
+            throws FileNotFoundException {
+        String userWords = TEST_DOCUMENTS_DIRECTORY + "userwords.txt";
+        Tesseract4OcrEngineProperties properties =
+                tesseractReader.getTesseract4OcrEngineProperties();
+        properties.setUserWords("eng", new FileInputStream(userWords));
+        properties.setLanguages(new ArrayList<String>());
+        tesseractReader.setTesseract4OcrEngineProperties(properties);
+        String imgPath = TEST_IMAGES_DIRECTORY + "numbers_01.jpg";
+        String expectedOutput = "619121";
+        String result = getRecognizedTextFromTextFile(tesseractReader, imgPath);
+        Assert.assertTrue(result.startsWith(expectedOutput));
+    }
+
+    @Test
+    public void testUserWordsFileNotDeleted() {
+        String userWords = TEST_DOCUMENTS_DIRECTORY + "userwords.txt";
+        Tesseract4OcrEngineProperties properties =
+                tesseractReader.getTesseract4OcrEngineProperties();
+        properties.setPathToUserWordsFile(userWords);
+        properties.setLanguages(Arrays.<String>asList("eng"));
+        tesseractReader.setTesseract4OcrEngineProperties(properties);
+        String imgPath = TEST_IMAGES_DIRECTORY + "numbers_01.jpg";
+        tesseractReader.doImageOcr(new File(imgPath));
+        Assert.assertTrue(new File(userWords).exists());
+    }
+}
diff --git a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/documents/englishText_executable.pdf b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/documents/englishText_executable.pdf
new file mode 100644
index 0000000..5d706d0
Binary files /dev/null and b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/documents/englishText_executable.pdf differ
diff --git a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/documents/englishText_lib.pdf b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/documents/englishText_lib.pdf
new file mode 100644
index 0000000..004e5ef
Binary files /dev/null and b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/documents/englishText_lib.pdf differ
diff --git a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/documents/example_01_executable.pdf b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/documents/example_01_executable.pdf
new file mode 100644
index 0000000..d08a201
Binary files /dev/null and b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/documents/example_01_executable.pdf differ
diff --git a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/documents/example_01_lib.pdf b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/documents/example_01_lib.pdf
new file mode 100644
index 0000000..657e872
Binary files /dev/null and b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/documents/example_01_lib.pdf differ
diff --git a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/documents/example_02.pdf b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/documents/example_02.pdf
new file mode 100644
index 0000000..1a567f2
Binary files /dev/null and b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/documents/example_02.pdf differ
diff --git a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/documents/german_01executable.txt b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/documents/german_01executable.txt
new file mode 100644
index 0000000..c15d2a9
--- /dev/null
+++ b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/documents/german_01executable.txt
@@ -0,0 +1,4 @@
+Das Geheimnis
+des Könnens
+liegt im Wollen.
+
diff --git a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/documents/german_01lib.txt b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/documents/german_01lib.txt
new file mode 100644
index 0000000..72180eb
--- /dev/null
+++ b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/documents/german_01lib.txt
@@ -0,0 +1,3 @@
+Das Geheimnis
+des Könnens
+liegt im Wollen.
diff --git a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/documents/multilang_executable.pdf b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/documents/multilang_executable.pdf
new file mode 100644
index 0000000..4441b9e
Binary files /dev/null and b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/documents/multilang_executable.pdf differ
diff --git a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/documents/multilang_lib.pdf b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/documents/multilang_lib.pdf
new file mode 100644
index 0000000..7fa0333
Binary files /dev/null and b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/documents/multilang_lib.pdf differ
diff --git a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/documents/multipage_executable.pdf b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/documents/multipage_executable.pdf
new file mode 100644
index 0000000..35b6538
Binary files /dev/null and b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/documents/multipage_executable.pdf differ
diff --git a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/documents/multipage_executable.txt b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/documents/multipage_executable.txt
new file mode 100644
index 0000000..b73a392
--- /dev/null
+++ b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/documents/multipage_executable.txt
@@ -0,0 +1,37 @@
+Multipage
+TIFF
+Example
+Page 1
+Multipage
+TIFF
+Example
+Page 2
+Multipage
+TIFF
+Example
+Page 3
+Multipage
+TIFF
+Example
+Page 4
+Multipage
+TIFF
+Example
+Page 5
+Multipage
+TIFF
+Example
+Page 6
+Multipage
+TIFF
+Example
+Page /
+Multipage
+TIFF
+Example
+Page 8
+Multipage
+TIFF
+Example
+Page 9
+
\ No newline at end of file
diff --git a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/documents/multipage_lib.pdf b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/documents/multipage_lib.pdf
new file mode 100644
index 0000000..1a726fe
Binary files /dev/null and b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/documents/multipage_lib.pdf differ
diff --git a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/documents/multipage_lib.txt b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/documents/multipage_lib.txt
new file mode 100644
index 0000000..61b7bbd
--- /dev/null
+++ b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/documents/multipage_lib.txt
@@ -0,0 +1,36 @@
+Multipage
+TIFF
+Example
+Page 1
+Multipage
+TIFF
+Example
+Page 2
+Multipage
+TIFF
+Example
+Page 3
+Multipage
+TIFF
+Example
+Page 4
+Multipage
+TIFF
+Example
+Page 5
+Multipage
+TIFF
+Example
+Page 6
+Multipage
+TIFF
+Example
+Page /
+Multipage
+TIFF
+Example
+Page 8
+Multipage
+TIFF
+Example
+Page 9
diff --git a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/documents/numbers_01.pdf b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/documents/numbers_01.pdf
new file mode 100644
index 0000000..882a45d
Binary files /dev/null and b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/documents/numbers_01.pdf differ
diff --git a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/documents/numbers_01_a3u.pdf b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/documents/numbers_01_a3u.pdf
new file mode 100644
index 0000000..7657a62
Binary files /dev/null and b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/documents/numbers_01_a3u.pdf differ
diff --git a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/documents/numbers_01_compareJpe_executable.pdf b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/documents/numbers_01_compareJpe_executable.pdf
new file mode 100644
index 0000000..0bbd75b
Binary files /dev/null and b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/documents/numbers_01_compareJpe_executable.pdf differ
diff --git a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/documents/numbers_01_compareJpe_lib.pdf b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/documents/numbers_01_compareJpe_lib.pdf
new file mode 100644
index 0000000..cafdbf5
Binary files /dev/null and b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/documents/numbers_01_compareJpe_lib.pdf differ
diff --git a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/documents/numbers_01_compareTif_executable.pdf b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/documents/numbers_01_compareTif_executable.pdf
new file mode 100644
index 0000000..f74a7df
Binary files /dev/null and b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/documents/numbers_01_compareTif_executable.pdf differ
diff --git a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/documents/numbers_01_compareTif_lib.pdf b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/documents/numbers_01_compareTif_lib.pdf
new file mode 100644
index 0000000..ea13a39
Binary files /dev/null and b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/documents/numbers_01_compareTif_lib.pdf differ
diff --git a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/documents/numbers_02_compareJpg_executable.pdf b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/documents/numbers_02_compareJpg_executable.pdf
new file mode 100644
index 0000000..c86db65
Binary files /dev/null and b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/documents/numbers_02_compareJpg_executable.pdf differ
diff --git a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/documents/numbers_02_compareJpg_lib.pdf b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/documents/numbers_02_compareJpg_lib.pdf
new file mode 100644
index 0000000..867a2f5
Binary files /dev/null and b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/documents/numbers_02_compareJpg_lib.pdf differ
diff --git a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/documents/spanish_01_a3u.pdf b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/documents/spanish_01_a3u.pdf
new file mode 100644
index 0000000..68c24cf
Binary files /dev/null and b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/documents/spanish_01_a3u.pdf differ
diff --git a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/documents/userwords.txt b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/documents/userwords.txt
new file mode 100644
index 0000000..c91b117
--- /dev/null
+++ b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/documents/userwords.txt
@@ -0,0 +1,7 @@
+Items
+hello
+included
+vat
+cash
+change
+word
diff --git a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/events/multithreading/numbers_01.jpg b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/events/multithreading/numbers_01.jpg
new file mode 100644
index 0000000..f384caa
Binary files /dev/null and b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/events/multithreading/numbers_01.jpg differ
diff --git a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/fonts/Cairo-Regular.ttf b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/fonts/Cairo-Regular.ttf
new file mode 100644
index 0000000..f0920bd
Binary files /dev/null and b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/fonts/Cairo-Regular.ttf differ
diff --git a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/fonts/FreeSans.ttf b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/fonts/FreeSans.ttf
new file mode 100644
index 0000000..2072cda
Binary files /dev/null and b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/fonts/FreeSans.ttf differ
diff --git a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/fonts/Kosugi-Regular.ttf b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/fonts/Kosugi-Regular.ttf
new file mode 100644
index 0000000..67ee170
Binary files /dev/null and b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/fonts/Kosugi-Regular.ttf differ
diff --git a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/fonts/LICENSE_APACHE.txt b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/fonts/LICENSE_APACHE.txt
new file mode 100644
index 0000000..d645695
--- /dev/null
+++ b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/fonts/LICENSE_APACHE.txt
@@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/fonts/LICENSE_GNU.txt b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/fonts/LICENSE_GNU.txt
new file mode 100644
index 0000000..f288702
--- /dev/null
+++ b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/fonts/LICENSE_GNU.txt
@@ -0,0 +1,674 @@
+                    GNU GENERAL PUBLIC LICENSE
+                       Version 3, 29 June 2007
+
+ Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+                            Preamble
+
+  The GNU General Public License is a free, copyleft license for
+software and other kinds of works.
+
+  The licenses for most software and other practical works are designed
+to take away your freedom to share and change the works.  By contrast,
+the GNU General Public License is intended to guarantee your freedom to
+share and change all versions of a program--to make sure it remains free
+software for all its users.  We, the Free Software Foundation, use the
+GNU General Public License for most of our software; it applies also to
+any other work released this way by its authors.  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+them if you wish), that you receive source code or can get it if you
+want it, that you can change the software or use pieces of it in new
+free programs, and that you know you can do these things.
+
+  To protect your rights, we need to prevent others from denying you
+these rights or asking you to surrender the rights.  Therefore, you have
+certain responsibilities if you distribute copies of the software, or if
+you modify it: responsibilities to respect the freedom of others.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must pass on to the recipients the same
+freedoms that you received.  You must make sure that they, too, receive
+or can get the source code.  And you must show them these terms so they
+know their rights.
+
+  Developers that use the GNU GPL protect your rights with two steps:
+(1) assert copyright on the software, and (2) offer you this License
+giving you legal permission to copy, distribute and/or modify it.
+
+  For the developers' and authors' protection, the GPL clearly explains
+that there is no warranty for this free software.  For both users' and
+authors' sake, the GPL requires that modified versions be marked as
+changed, so that their problems will not be attributed erroneously to
+authors of previous versions.
+
+  Some devices are designed to deny users access to install or run
+modified versions of the software inside them, although the manufacturer
+can do so.  This is fundamentally incompatible with the aim of
+protecting users' freedom to change the software.  The systematic
+pattern of such abuse occurs in the area of products for individuals to
+use, which is precisely where it is most unacceptable.  Therefore, we
+have designed this version of the GPL to prohibit the practice for those
+products.  If such problems arise substantially in other domains, we
+stand ready to extend this provision to those domains in future versions
+of the GPL, as needed to protect the freedom of users.
+
+  Finally, every program is threatened constantly by software patents.
+States should not allow patents to restrict development and use of
+software on general-purpose computers, but in those that do, we wish to
+avoid the special danger that patents applied to a free program could
+make it effectively proprietary.  To prevent this, the GPL assures that
+patents cannot be used to render the program non-free.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+                       TERMS AND CONDITIONS
+
+  0. Definitions.
+
+  "This License" refers to version 3 of the GNU General Public License.
+
+  "Copyright" also means copyright-like laws that apply to other kinds of
+works, such as semiconductor masks.
+
+  "The Program" refers to any copyrightable work licensed under this
+License.  Each licensee is addressed as "you".  "Licensees" and
+"recipients" may be individuals or organizations.
+
+  To "modify" a work means to copy from or adapt all or part of the work
+in a fashion requiring copyright permission, other than the making of an
+exact copy.  The resulting work is called a "modified version" of the
+earlier work or a work "based on" the earlier work.
+
+  A "covered work" means either the unmodified Program or a work based
+on the Program.
+
+  To "propagate" a work means to do anything with it that, without
+permission, would make you directly or secondarily liable for
+infringement under applicable copyright law, except executing it on a
+computer or modifying a private copy.  Propagation includes copying,
+distribution (with or without modification), making available to the
+public, and in some countries other activities as well.
+
+  To "convey" a work means any kind of propagation that enables other
+parties to make or receive copies.  Mere interaction with a user through
+a computer network, with no transfer of a copy, is not conveying.
+
+  An interactive user interface displays "Appropriate Legal Notices"
+to the extent that it includes a convenient and prominently visible
+feature that (1) displays an appropriate copyright notice, and (2)
+tells the user that there is no warranty for the work (except to the
+extent that warranties are provided), that licensees may convey the
+work under this License, and how to view a copy of this License.  If
+the interface presents a list of user commands or options, such as a
+menu, a prominent item in the list meets this criterion.
+
+  1. Source Code.
+
+  The "source code" for a work means the preferred form of the work
+for making modifications to it.  "Object code" means any non-source
+form of a work.
+
+  A "Standard Interface" means an interface that either is an official
+standard defined by a recognized standards body, or, in the case of
+interfaces specified for a particular programming language, one that
+is widely used among developers working in that language.
+
+  The "System Libraries" of an executable work include anything, other
+than the work as a whole, that (a) is included in the normal form of
+packaging a Major Component, but which is not part of that Major
+Component, and (b) serves only to enable use of the work with that
+Major Component, or to implement a Standard Interface for which an
+implementation is available to the public in source code form.  A
+"Major Component", in this context, means a major essential component
+(kernel, window system, and so on) of the specific operating system
+(if any) on which the executable work runs, or a compiler used to
+produce the work, or an object code interpreter used to run it.
+
+  The "Corresponding Source" for a work in object code form means all
+the source code needed to generate, install, and (for an executable
+work) run the object code and to modify the work, including scripts to
+control those activities.  However, it does not include the work's
+System Libraries, or general-purpose tools or generally available free
+programs which are used unmodified in performing those activities but
+which are not part of the work.  For example, Corresponding Source
+includes interface definition files associated with source files for
+the work, and the source code for shared libraries and dynamically
+linked subprograms that the work is specifically designed to require,
+such as by intimate data communication or control flow between those
+subprograms and other parts of the work.
+
+  The Corresponding Source need not include anything that users
+can regenerate automatically from other parts of the Corresponding
+Source.
+
+  The Corresponding Source for a work in source code form is that
+same work.
+
+  2. Basic Permissions.
+
+  All rights granted under this License are granted for the term of
+copyright on the Program, and are irrevocable provided the stated
+conditions are met.  This License explicitly affirms your unlimited
+permission to run the unmodified Program.  The output from running a
+covered work is covered by this License only if the output, given its
+content, constitutes a covered work.  This License acknowledges your
+rights of fair use or other equivalent, as provided by copyright law.
+
+  You may make, run and propagate covered works that you do not
+convey, without conditions so long as your license otherwise remains
+in force.  You may convey covered works to others for the sole purpose
+of having them make modifications exclusively for you, or provide you
+with facilities for running those works, provided that you comply with
+the terms of this License in conveying all material for which you do
+not control copyright.  Those thus making or running the covered works
+for you must do so exclusively on your behalf, under your direction
+and control, on terms that prohibit them from making any copies of
+your copyrighted material outside their relationship with you.
+
+  Conveying under any other circumstances is permitted solely under
+the conditions stated below.  Sublicensing is not allowed; section 10
+makes it unnecessary.
+
+  3. Protecting Users' Legal Rights From Anti-Circumvention Law.
+
+  No covered work shall be deemed part of an effective technological
+measure under any applicable law fulfilling obligations under article
+11 of the WIPO copyright treaty adopted on 20 December 1996, or
+similar laws prohibiting or restricting circumvention of such
+measures.
+
+  When you convey a covered work, you waive any legal power to forbid
+circumvention of technological measures to the extent such circumvention
+is effected by exercising rights under this License with respect to
+the covered work, and you disclaim any intention to limit operation or
+modification of the work as a means of enforcing, against the work's
+users, your or third parties' legal rights to forbid circumvention of
+technological measures.
+
+  4. Conveying Verbatim Copies.
+
+  You may convey verbatim copies of the Program's source code as you
+receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice;
+keep intact all notices stating that this License and any
+non-permissive terms added in accord with section 7 apply to the code;
+keep intact all notices of the absence of any warranty; and give all
+recipients a copy of this License along with the Program.
+
+  You may charge any price or no price for each copy that you convey,
+and you may offer support or warranty protection for a fee.
+
+  5. Conveying Modified Source Versions.
+
+  You may convey a work based on the Program, or the modifications to
+produce it from the Program, in the form of source code under the
+terms of section 4, provided that you also meet all of these conditions:
+
+    a) The work must carry prominent notices stating that you modified
+    it, and giving a relevant date.
+
+    b) The work must carry prominent notices stating that it is
+    released under this License and any conditions added under section
+    7.  This requirement modifies the requirement in section 4 to
+    "keep intact all notices".
+
+    c) You must license the entire work, as a whole, under this
+    License to anyone who comes into possession of a copy.  This
+    License will therefore apply, along with any applicable section 7
+    additional terms, to the whole of the work, and all its parts,
+    regardless of how they are packaged.  This License gives no
+    permission to license the work in any other way, but it does not
+    invalidate such permission if you have separately received it.
+
+    d) If the work has interactive user interfaces, each must display
+    Appropriate Legal Notices; however, if the Program has interactive
+    interfaces that do not display Appropriate Legal Notices, your
+    work need not make them do so.
+
+  A compilation of a covered work with other separate and independent
+works, which are not by their nature extensions of the covered work,
+and which are not combined with it such as to form a larger program,
+in or on a volume of a storage or distribution medium, is called an
+"aggregate" if the compilation and its resulting copyright are not
+used to limit the access or legal rights of the compilation's users
+beyond what the individual works permit.  Inclusion of a covered work
+in an aggregate does not cause this License to apply to the other
+parts of the aggregate.
+
+  6. Conveying Non-Source Forms.
+
+  You may convey a covered work in object code form under the terms
+of sections 4 and 5, provided that you also convey the
+machine-readable Corresponding Source under the terms of this License,
+in one of these ways:
+
+    a) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by the
+    Corresponding Source fixed on a durable physical medium
+    customarily used for software interchange.
+
+    b) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by a
+    written offer, valid for at least three years and valid for as
+    long as you offer spare parts or customer support for that product
+    model, to give anyone who possesses the object code either (1) a
+    copy of the Corresponding Source for all the software in the
+    product that is covered by this License, on a durable physical
+    medium customarily used for software interchange, for a price no
+    more than your reasonable cost of physically performing this
+    conveying of source, or (2) access to copy the
+    Corresponding Source from a network server at no charge.
+
+    c) Convey individual copies of the object code with a copy of the
+    written offer to provide the Corresponding Source.  This
+    alternative is allowed only occasionally and noncommercially, and
+    only if you received the object code with such an offer, in accord
+    with subsection 6b.
+
+    d) Convey the object code by offering access from a designated
+    place (gratis or for a charge), and offer equivalent access to the
+    Corresponding Source in the same way through the same place at no
+    further charge.  You need not require recipients to copy the
+    Corresponding Source along with the object code.  If the place to
+    copy the object code is a network server, the Corresponding Source
+    may be on a different server (operated by you or a third party)
+    that supports equivalent copying facilities, provided you maintain
+    clear directions next to the object code saying where to find the
+    Corresponding Source.  Regardless of what server hosts the
+    Corresponding Source, you remain obligated to ensure that it is
+    available for as long as needed to satisfy these requirements.
+
+    e) Convey the object code using peer-to-peer transmission, provided
+    you inform other peers where the object code and Corresponding
+    Source of the work are being offered to the general public at no
+    charge under subsection 6d.
+
+  A separable portion of the object code, whose source code is excluded
+from the Corresponding Source as a System Library, need not be
+included in conveying the object code work.
+
+  A "User Product" is either (1) a "consumer product", which means any
+tangible personal property which is normally used for personal, family,
+or household purposes, or (2) anything designed or sold for incorporation
+into a dwelling.  In determining whether a product is a consumer product,
+doubtful cases shall be resolved in favor of coverage.  For a particular
+product received by a particular user, "normally used" refers to a
+typical or common use of that class of product, regardless of the status
+of the particular user or of the way in which the particular user
+actually uses, or expects or is expected to use, the product.  A product
+is a consumer product regardless of whether the product has substantial
+commercial, industrial or non-consumer uses, unless such uses represent
+the only significant mode of use of the product.
+
+  "Installation Information" for a User Product means any methods,
+procedures, authorization keys, or other information required to install
+and execute modified versions of a covered work in that User Product from
+a modified version of its Corresponding Source.  The information must
+suffice to ensure that the continued functioning of the modified object
+code is in no case prevented or interfered with solely because
+modification has been made.
+
+  If you convey an object code work under this section in, or with, or
+specifically for use in, a User Product, and the conveying occurs as
+part of a transaction in which the right of possession and use of the
+User Product is transferred to the recipient in perpetuity or for a
+fixed term (regardless of how the transaction is characterized), the
+Corresponding Source conveyed under this section must be accompanied
+by the Installation Information.  But this requirement does not apply
+if neither you nor any third party retains the ability to install
+modified object code on the User Product (for example, the work has
+been installed in ROM).
+
+  The requirement to provide Installation Information does not include a
+requirement to continue to provide support service, warranty, or updates
+for a work that has been modified or installed by the recipient, or for
+the User Product in which it has been modified or installed.  Access to a
+network may be denied when the modification itself materially and
+adversely affects the operation of the network or violates the rules and
+protocols for communication across the network.
+
+  Corresponding Source conveyed, and Installation Information provided,
+in accord with this section must be in a format that is publicly
+documented (and with an implementation available to the public in
+source code form), and must require no special password or key for
+unpacking, reading or copying.
+
+  7. Additional Terms.
+
+  "Additional permissions" are terms that supplement the terms of this
+License by making exceptions from one or more of its conditions.
+Additional permissions that are applicable to the entire Program shall
+be treated as though they were included in this License, to the extent
+that they are valid under applicable law.  If additional permissions
+apply only to part of the Program, that part may be used separately
+under those permissions, but the entire Program remains governed by
+this License without regard to the additional permissions.
+
+  When you convey a copy of a covered work, you may at your option
+remove any additional permissions from that copy, or from any part of
+it.  (Additional permissions may be written to require their own
+removal in certain cases when you modify the work.)  You may place
+additional permissions on material, added by you to a covered work,
+for which you have or can give appropriate copyright permission.
+
+  Notwithstanding any other provision of this License, for material you
+add to a covered work, you may (if authorized by the copyright holders of
+that material) supplement the terms of this License with terms:
+
+    a) Disclaiming warranty or limiting liability differently from the
+    terms of sections 15 and 16 of this License; or
+
+    b) Requiring preservation of specified reasonable legal notices or
+    author attributions in that material or in the Appropriate Legal
+    Notices displayed by works containing it; or
+
+    c) Prohibiting misrepresentation of the origin of that material, or
+    requiring that modified versions of such material be marked in
+    reasonable ways as different from the original version; or
+
+    d) Limiting the use for publicity purposes of names of licensors or
+    authors of the material; or
+
+    e) Declining to grant rights under trademark law for use of some
+    trade names, trademarks, or service marks; or
+
+    f) Requiring indemnification of licensors and authors of that
+    material by anyone who conveys the material (or modified versions of
+    it) with contractual assumptions of liability to the recipient, for
+    any liability that these contractual assumptions directly impose on
+    those licensors and authors.
+
+  All other non-permissive additional terms are considered "further
+restrictions" within the meaning of section 10.  If the Program as you
+received it, or any part of it, contains a notice stating that it is
+governed by this License along with a term that is a further
+restriction, you may remove that term.  If a license document contains
+a further restriction but permits relicensing or conveying under this
+License, you may add to a covered work material governed by the terms
+of that license document, provided that the further restriction does
+not survive such relicensing or conveying.
+
+  If you add terms to a covered work in accord with this section, you
+must place, in the relevant source files, a statement of the
+additional terms that apply to those files, or a notice indicating
+where to find the applicable terms.
+
+  Additional terms, permissive or non-permissive, may be stated in the
+form of a separately written license, or stated as exceptions;
+the above requirements apply either way.
+
+  8. Termination.
+
+  You may not propagate or modify a covered work except as expressly
+provided under this License.  Any attempt otherwise to propagate or
+modify it is void, and will automatically terminate your rights under
+this License (including any patent licenses granted under the third
+paragraph of section 11).
+
+  However, if you cease all violation of this License, then your
+license from a particular copyright holder is reinstated (a)
+provisionally, unless and until the copyright holder explicitly and
+finally terminates your license, and (b) permanently, if the copyright
+holder fails to notify you of the violation by some reasonable means
+prior to 60 days after the cessation.
+
+  Moreover, your license from a particular copyright holder is
+reinstated permanently if the copyright holder notifies you of the
+violation by some reasonable means, this is the first time you have
+received notice of violation of this License (for any work) from that
+copyright holder, and you cure the violation prior to 30 days after
+your receipt of the notice.
+
+  Termination of your rights under this section does not terminate the
+licenses of parties who have received copies or rights from you under
+this License.  If your rights have been terminated and not permanently
+reinstated, you do not qualify to receive new licenses for the same
+material under section 10.
+
+  9. Acceptance Not Required for Having Copies.
+
+  You are not required to accept this License in order to receive or
+run a copy of the Program.  Ancillary propagation of a covered work
+occurring solely as a consequence of using peer-to-peer transmission
+to receive a copy likewise does not require acceptance.  However,
+nothing other than this License grants you permission to propagate or
+modify any covered work.  These actions infringe copyright if you do
+not accept this License.  Therefore, by modifying or propagating a
+covered work, you indicate your acceptance of this License to do so.
+
+  10. Automatic Licensing of Downstream Recipients.
+
+  Each time you convey a covered work, the recipient automatically
+receives a license from the original licensors, to run, modify and
+propagate that work, subject to this License.  You are not responsible
+for enforcing compliance by third parties with this License.
+
+  An "entity transaction" is a transaction transferring control of an
+organization, or substantially all assets of one, or subdividing an
+organization, or merging organizations.  If propagation of a covered
+work results from an entity transaction, each party to that
+transaction who receives a copy of the work also receives whatever
+licenses to the work the party's predecessor in interest had or could
+give under the previous paragraph, plus a right to possession of the
+Corresponding Source of the work from the predecessor in interest, if
+the predecessor has it or can get it with reasonable efforts.
+
+  You may not impose any further restrictions on the exercise of the
+rights granted or affirmed under this License.  For example, you may
+not impose a license fee, royalty, or other charge for exercise of
+rights granted under this License, and you may not initiate litigation
+(including a cross-claim or counterclaim in a lawsuit) alleging that
+any patent claim is infringed by making, using, selling, offering for
+sale, or importing the Program or any portion of it.
+
+  11. Patents.
+
+  A "contributor" is a copyright holder who authorizes use under this
+License of the Program or a work on which the Program is based.  The
+work thus licensed is called the contributor's "contributor version".
+
+  A contributor's "essential patent claims" are all patent claims
+owned or controlled by the contributor, whether already acquired or
+hereafter acquired, that would be infringed by some manner, permitted
+by this License, of making, using, or selling its contributor version,
+but do not include claims that would be infringed only as a
+consequence of further modification of the contributor version.  For
+purposes of this definition, "control" includes the right to grant
+patent sublicenses in a manner consistent with the requirements of
+this License.
+
+  Each contributor grants you a non-exclusive, worldwide, royalty-free
+patent license under the contributor's essential patent claims, to
+make, use, sell, offer for sale, import and otherwise run, modify and
+propagate the contents of its contributor version.
+
+  In the following three paragraphs, a "patent license" is any express
+agreement or commitment, however denominated, not to enforce a patent
+(such as an express permission to practice a patent or covenant not to
+sue for patent infringement).  To "grant" such a patent license to a
+party means to make such an agreement or commitment not to enforce a
+patent against the party.
+
+  If you convey a covered work, knowingly relying on a patent license,
+and the Corresponding Source of the work is not available for anyone
+to copy, free of charge and under the terms of this License, through a
+publicly available network server or other readily accessible means,
+then you must either (1) cause the Corresponding Source to be so
+available, or (2) arrange to deprive yourself of the benefit of the
+patent license for this particular work, or (3) arrange, in a manner
+consistent with the requirements of this License, to extend the patent
+license to downstream recipients.  "Knowingly relying" means you have
+actual knowledge that, but for the patent license, your conveying the
+covered work in a country, or your recipient's use of the covered work
+in a country, would infringe one or more identifiable patents in that
+country that you have reason to believe are valid.
+
+  If, pursuant to or in connection with a single transaction or
+arrangement, you convey, or propagate by procuring conveyance of, a
+covered work, and grant a patent license to some of the parties
+receiving the covered work authorizing them to use, propagate, modify
+or convey a specific copy of the covered work, then the patent license
+you grant is automatically extended to all recipients of the covered
+work and works based on it.
+
+  A patent license is "discriminatory" if it does not include within
+the scope of its coverage, prohibits the exercise of, or is
+conditioned on the non-exercise of one or more of the rights that are
+specifically granted under this License.  You may not convey a covered
+work if you are a party to an arrangement with a third party that is
+in the business of distributing software, under which you make payment
+to the third party based on the extent of your activity of conveying
+the work, and under which the third party grants, to any of the
+parties who would receive the covered work from you, a discriminatory
+patent license (a) in connection with copies of the covered work
+conveyed by you (or copies made from those copies), or (b) primarily
+for and in connection with specific products or compilations that
+contain the covered work, unless you entered into that arrangement,
+or that patent license was granted, prior to 28 March 2007.
+
+  Nothing in this License shall be construed as excluding or limiting
+any implied license or other defenses to infringement that may
+otherwise be available to you under applicable patent law.
+
+  12. No Surrender of Others' Freedom.
+
+  If conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot convey a
+covered work so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you may
+not convey it at all.  For example, if you agree to terms that obligate you
+to collect a royalty for further conveying from those to whom you convey
+the Program, the only way you could satisfy both those terms and this
+License would be to refrain entirely from conveying the Program.
+
+  13. Use with the GNU Affero General Public License.
+
+  Notwithstanding any other provision of this License, you have
+permission to link or combine any covered work with a work licensed
+under version 3 of the GNU Affero General Public License into a single
+combined work, and to convey the resulting work.  The terms of this
+License will continue to apply to the part which is the covered work,
+but the special requirements of the GNU Affero General Public License,
+section 13, concerning interaction through a network will apply to the
+combination as such.
+
+  14. Revised Versions of this License.
+
+  The Free Software Foundation may publish revised and/or new versions of
+the GNU General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+  Each version is given a distinguishing version number.  If the
+Program specifies that a certain numbered version of the GNU General
+Public License "or any later version" applies to it, you have the
+option of following the terms and conditions either of that numbered
+version or of any later version published by the Free Software
+Foundation.  If the Program does not specify a version number of the
+GNU General Public License, you may choose any version ever published
+by the Free Software Foundation.
+
+  If the Program specifies that a proxy can decide which future
+versions of the GNU General Public License can be used, that proxy's
+public statement of acceptance of a version permanently authorizes you
+to choose that version for the Program.
+
+  Later license versions may give you additional or different
+permissions.  However, no additional obligations are imposed on any
+author or copyright holder as a result of your choosing to follow a
+later version.
+
+  15. Disclaimer of Warranty.
+
+  THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
+APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
+HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
+OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
+IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
+ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+  16. Limitation of Liability.
+
+  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
+THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
+GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
+USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
+DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
+PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
+EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGES.
+
+  17. Interpretation of Sections 15 and 16.
+
+  If the disclaimer of warranty and limitation of liability provided
+above cannot be given local legal effect according to their terms,
+reviewing courts shall apply local law that most closely approximates
+an absolute waiver of all civil liability in connection with the
+Program, unless a warranty or assumption of liability accompanies a
+copy of the Program in return for a fee.
+
+                     END OF TERMS AND CONDITIONS
+
+            How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+state the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+Also add information on how to contact you by electronic and paper mail.
+
+  If the program does terminal interaction, make it output a short
+notice like this when it starts in an interactive mode:
+
+    <program>  Copyright (C) <year>  <name of author>
+    This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, your program's commands
+might be different; for a GUI interface, you would use an "about box".
+
+  You should also get your employer (if you work as a programmer) or school,
+if any, to sign a "copyright disclaimer" for the program, if necessary.
+For more information on this, and how to apply and follow the GNU GPL, see
+<https://www.gnu.org/licenses/>.
+
+  The GNU General Public License does not permit incorporating your program
+into proprietary programs.  If your program is a subroutine library, you
+may consider it more useful to permit linking proprietary applications with
+the library.  If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.  But first, please read
+<https://www.gnu.org/licenses/why-not-lgpl.html>.
diff --git a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/fonts/LICENSE_OFL.txt b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/fonts/LICENSE_OFL.txt
new file mode 100644
index 0000000..77b1731
--- /dev/null
+++ b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/fonts/LICENSE_OFL.txt
@@ -0,0 +1,91 @@
+This Font Software is licensed under the SIL Open Font License, Version 1.1.
+This license is copied below, and is also available with a FAQ at:
+http://scripts.sil.org/OFL
+
+
+-----------------------------------------------------------
+SIL OPEN FONT LICENSE Version 1.1 - 26 February 2007
+-----------------------------------------------------------
+
+PREAMBLE
+The goals of the Open Font License (OFL) are to stimulate worldwide
+development of collaborative font projects, to support the font creation
+efforts of academic and linguistic communities, and to provide a free and
+open framework in which fonts may be shared and improved in partnership
+with others.
+
+The OFL allows the licensed fonts to be used, studied, modified and
+redistributed freely as long as they are not sold by themselves. The
+fonts, including any derivative works, can be bundled, embedded, 
+redistributed and/or sold with any software provided that any reserved
+names are not used by derivative works. The fonts and derivatives,
+however, cannot be released under any other type of license. The
+requirement for fonts to remain under this license does not apply
+to any document created using the fonts or their derivatives.
+
+DEFINITIONS
+"Font Software" refers to the set of files released by the Copyright
+Holder(s) under this license and clearly marked as such. This may
+include source files, build scripts and documentation.
+
+"Reserved Font Name" refers to any names specified as such after the
+copyright statement(s).
+
+"Original Version" refers to the collection of Font Software components as
+distributed by the Copyright Holder(s).
+
+"Modified Version" refers to any derivative made by adding to, deleting,
+or substituting -- in part or in whole -- any of the components of the
+Original Version, by changing formats or by porting the Font Software to a
+new environment.
+
+"Author" refers to any designer, engineer, programmer, technical
+writer or other person who contributed to the Font Software.
+
+PERMISSION & CONDITIONS
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of the Font Software, to use, study, copy, merge, embed, modify,
+redistribute, and sell modified and unmodified copies of the Font
+Software, subject to the following conditions:
+
+1) Neither the Font Software nor any of its individual components,
+in Original or Modified Versions, may be sold by itself.
+
+2) Original or Modified Versions of the Font Software may be bundled,
+redistributed and/or sold with any software, provided that each copy
+contains the above copyright notice and this license. These can be
+included either as stand-alone text files, human-readable headers or
+in the appropriate machine-readable metadata fields within text or
+binary files as long as those fields can be easily viewed by the user.
+
+3) No Modified Version of the Font Software may use the Reserved Font
+Name(s) unless explicit written permission is granted by the corresponding
+Copyright Holder. This restriction only applies to the primary font name as
+presented to the users.
+
+4) The name(s) of the Copyright Holder(s) or the Author(s) of the Font
+Software shall not be used to promote, endorse or advertise any
+Modified Version, except to acknowledge the contribution(s) of the
+Copyright Holder(s) and the Author(s) or with their explicit written
+permission.
+
+5) The Font Software, modified or unmodified, in part or in whole,
+must be distributed entirely under this license, and must not be
+distributed under any other license. The requirement for fonts to
+remain under this license does not apply to any document created
+using the Font Software.
+
+TERMINATION
+This license becomes null and void if any of the above conditions are
+not met.
+
+DISCLAIMER
+THE FONT SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
+OF COPYRIGHT, PATENT, TRADEMARK, OR OTHER RIGHT. IN NO EVENT SHALL THE
+COPYRIGHT HOLDER BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+INCLUDING ANY GENERAL, SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL
+DAMAGES, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF THE USE OR INABILITY TO USE THE FONT SOFTWARE OR FROM
+OTHER DEALINGS IN THE FONT SOFTWARE.
diff --git a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/fonts/NOTICE.txt b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/fonts/NOTICE.txt
new file mode 100644
index 0000000..d340343
--- /dev/null
+++ b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/fonts/NOTICE.txt
@@ -0,0 +1,7 @@
+Please notice that the following fonts are used with the mentioned below licenses.
+
+* Cairo-Regular - SIL Open Font License, Version 1.1
+* FreeSans - GPL license you can find following the link: https://www.gnu.org/licenses
+* Kosugi-Regular - Apache License, Version 2.0.
+* NotoSans-Regular - SIL Open Font License, Version 1.1
+* NotoSansSC-Regular - SIL Open Font License, Version 1.1
diff --git a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/fonts/NotoSans-Regular.ttf b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/fonts/NotoSans-Regular.ttf
new file mode 100644
index 0000000..10589e2
Binary files /dev/null and b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/fonts/NotoSans-Regular.ttf differ
diff --git a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/fonts/NotoSansSC-Regular.otf b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/fonts/NotoSansSC-Regular.otf
new file mode 100644
index 0000000..ee1e304
Binary files /dev/null and b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/fonts/NotoSansSC-Regular.otf differ
diff --git a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/arabic_01.jpg b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/arabic_01.jpg
new file mode 100644
index 0000000..c41788f
Binary files /dev/null and b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/arabic_01.jpg differ
diff --git a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/arabic_02.png b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/arabic_02.png
new file mode 100644
index 0000000..7b35925
Binary files /dev/null and b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/arabic_02.png differ
diff --git a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/bengali_01.jpeg b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/bengali_01.jpeg
new file mode 100644
index 0000000..d769282
Binary files /dev/null and b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/bengali_01.jpeg differ
diff --git a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/bogusText.jpg b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/bogusText.jpg
new file mode 100644
index 0000000..7ee7f8a
Binary files /dev/null and b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/bogusText.jpg differ
diff --git a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/chinese_01.jpg b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/chinese_01.jpg
new file mode 100644
index 0000000..ea1cb26
Binary files /dev/null and b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/chinese_01.jpg differ
diff --git a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/corrupted.jpg b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/corrupted.jpg
new file mode 100644
index 0000000..2c0d56a
Binary files /dev/null and b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/corrupted.jpg differ
diff --git a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/englishText.bmp b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/englishText.bmp
new file mode 100644
index 0000000..9e35ff4
Binary files /dev/null and b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/englishText.bmp differ
diff --git a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/example_01.BMP b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/example_01.BMP
new file mode 100644
index 0000000..439962e
Binary files /dev/null and b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/example_01.BMP differ
diff --git a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/example_02.JFIF b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/example_02.JFIF
new file mode 100644
index 0000000..b598806
Binary files /dev/null and b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/example_02.JFIF differ
diff --git a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/example_03_10MB.tiff b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/example_03_10MB.tiff
new file mode 100644
index 0000000..c9d37ff
Binary files /dev/null and b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/example_03_10MB.tiff differ
diff --git a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/example_04.png b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/example_04.png
new file mode 100644
index 0000000..d0243e0
Binary files /dev/null and b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/example_04.png differ
diff --git a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/example_05_corrupted.bmp b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/example_05_corrupted.bmp
new file mode 100644
index 0000000..6540ad7
Binary files /dev/null and b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/example_05_corrupted.bmp differ
diff --git a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/french_01.png b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/french_01.png
new file mode 100644
index 0000000..2d35846
Binary files /dev/null and b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/french_01.png differ
diff --git a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/georgian_01.jpg b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/georgian_01.jpg
new file mode 100644
index 0000000..7e42f2d
Binary files /dev/null and b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/georgian_01.jpg differ
diff --git a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/german_01.jpg b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/german_01.jpg
new file mode 100644
index 0000000..6f63dc2
Binary files /dev/null and b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/german_01.jpg differ
diff --git a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/greek_01.jpg b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/greek_01.jpg
new file mode 100644
index 0000000..5e48756
Binary files /dev/null and b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/greek_01.jpg differ
diff --git a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/halftone.jpg b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/halftone.jpg
new file mode 100644
index 0000000..a255024
Binary files /dev/null and b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/halftone.jpg differ
diff --git a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/hindi_01.jpg b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/hindi_01.jpg
new file mode 100644
index 0000000..8c1477a
Binary files /dev/null and b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/hindi_01.jpg differ
diff --git a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/hindi_02.jpg b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/hindi_02.jpg
new file mode 100644
index 0000000..1e2b01f
Binary files /dev/null and b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/hindi_02.jpg differ
diff --git a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/japanese_01.png b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/japanese_01.png
new file mode 100644
index 0000000..9fa9fae
Binary files /dev/null and b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/japanese_01.png differ
diff --git a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/multilang.jpg b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/multilang.jpg
new file mode 100644
index 0000000..9dc0004
Binary files /dev/null and b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/multilang.jpg differ
diff --git a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/multipage.tiff b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/multipage.tiff
new file mode 100644
index 0000000..e8cc630
Binary files /dev/null and b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/multipage.tiff differ
diff --git "a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/mult\303\256page.tiff" "b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/mult\303\256page.tiff"
new file mode 100644
index 0000000..e8cc630
Binary files /dev/null and "b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/mult\303\256page.tiff" differ
diff --git a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/noisy_01.png b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/noisy_01.png
new file mode 100644
index 0000000..2a91a3b
Binary files /dev/null and b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/noisy_01.png differ
diff --git a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/numbers_01.jpe b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/numbers_01.jpe
new file mode 100644
index 0000000..e633b25
Binary files /dev/null and b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/numbers_01.jpe differ
diff --git a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/numbers_01.jpg b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/numbers_01.jpg
new file mode 100644
index 0000000..f384caa
Binary files /dev/null and b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/numbers_01.jpg differ
diff --git a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/numbers_01.tif b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/numbers_01.tif
new file mode 100644
index 0000000..60e70e9
Binary files /dev/null and b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/numbers_01.tif differ
diff --git a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/numbers_02.jpg b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/numbers_02.jpg
new file mode 100644
index 0000000..5da603a
Binary files /dev/null and b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/numbers_02.jpg differ
diff --git "a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/n\303\274mb\303\251rs.jpg" "b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/n\303\274mb\303\251rs.jpg"
new file mode 100644
index 0000000..f384caa
Binary files /dev/null and "b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/n\303\274mb\303\251rs.jpg" differ
diff --git a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/pantone_blue.jpg b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/pantone_blue.jpg
new file mode 100644
index 0000000..431ca52
Binary files /dev/null and b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/pantone_blue.jpg differ
diff --git a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/scanned_spa_01.png b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/scanned_spa_01.png
new file mode 100644
index 0000000..e0b46d5
Binary files /dev/null and b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/scanned_spa_01.png differ
diff --git a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/spanish_01.jpg b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/spanish_01.jpg
new file mode 100644
index 0000000..ffe5bf9
Binary files /dev/null and b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/spanish_01.jpg differ
diff --git "a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/t\303\250st/noisy_01.png" "b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/t\303\250st/noisy_01.png"
new file mode 100644
index 0000000..2a91a3b
Binary files /dev/null and "b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/t\303\250st/noisy_01.png" differ
diff --git a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/wierdwords.png b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/wierdwords.png
new file mode 100644
index 0000000..40cc9d6
Binary files /dev/null and b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/images/wierdwords.png differ
diff --git a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/profiles/CoatedFOGRA27.icc b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/profiles/CoatedFOGRA27.icc
new file mode 100644
index 0000000..086ac9d
Binary files /dev/null and b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/profiles/CoatedFOGRA27.icc differ
diff --git a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/profiles/sRGB_CS_profile.icm b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/profiles/sRGB_CS_profile.icm
new file mode 100644
index 0000000..7f9d18d
Binary files /dev/null and b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/profiles/sRGB_CS_profile.icm differ
diff --git a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/tessdata/ara.traineddata b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/tessdata/ara.traineddata
new file mode 100644
index 0000000..4b687c7
Binary files /dev/null and b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/tessdata/ara.traineddata differ
diff --git a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/tessdata/ben.traineddata b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/tessdata/ben.traineddata
new file mode 100644
index 0000000..7e9054d
Binary files /dev/null and b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/tessdata/ben.traineddata differ
diff --git a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/tessdata/chi_sim.traineddata b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/tessdata/chi_sim.traineddata
new file mode 100644
index 0000000..388bac2
Binary files /dev/null and b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/tessdata/chi_sim.traineddata differ
diff --git a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/tessdata/chi_tra.traineddata b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/tessdata/chi_tra.traineddata
new file mode 100644
index 0000000..1955cd8
Binary files /dev/null and b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/tessdata/chi_tra.traineddata differ
diff --git a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/tessdata/deu.traineddata b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/tessdata/deu.traineddata
new file mode 100644
index 0000000..97ed7b2
Binary files /dev/null and b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/tessdata/deu.traineddata differ
diff --git a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/tessdata/ell.traineddata b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/tessdata/ell.traineddata
new file mode 100644
index 0000000..ed98ae1
Binary files /dev/null and b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/tessdata/ell.traineddata differ
diff --git a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/tessdata/eng.traineddata b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/tessdata/eng.traineddata
new file mode 100644
index 0000000..f4744c2
Binary files /dev/null and b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/tessdata/eng.traineddata differ
diff --git a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/tessdata/fra.traineddata b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/tessdata/fra.traineddata
new file mode 100644
index 0000000..250c774
Binary files /dev/null and b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/tessdata/fra.traineddata differ
diff --git a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/tessdata/grc.traineddata b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/tessdata/grc.traineddata
new file mode 100644
index 0000000..a306f3e
Binary files /dev/null and b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/tessdata/grc.traineddata differ
diff --git a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/tessdata/hin.traineddata b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/tessdata/hin.traineddata
new file mode 100644
index 0000000..a8f0aae
Binary files /dev/null and b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/tessdata/hin.traineddata differ
diff --git a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/tessdata/jpn.traineddata b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/tessdata/jpn.traineddata
new file mode 100644
index 0000000..c4178f8
Binary files /dev/null and b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/tessdata/jpn.traineddata differ
diff --git a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/tessdata/jpn_vert.traineddata b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/tessdata/jpn_vert.traineddata
new file mode 100644
index 0000000..43f38de
Binary files /dev/null and b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/tessdata/jpn_vert.traineddata differ
diff --git a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/tessdata/kat.traineddata b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/tessdata/kat.traineddata
new file mode 100644
index 0000000..1a3ae11
Binary files /dev/null and b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/tessdata/kat.traineddata differ
diff --git a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/tessdata/kat_old.traineddata b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/tessdata/kat_old.traineddata
new file mode 100644
index 0000000..f4ae5ab
Binary files /dev/null and b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/tessdata/kat_old.traineddata differ
diff --git a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/tessdata/osd.traineddata b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/tessdata/osd.traineddata
new file mode 100644
index 0000000..527457c
Binary files /dev/null and b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/tessdata/osd.traineddata differ
diff --git a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/tessdata/script/Bengali.traineddata b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/tessdata/script/Bengali.traineddata
new file mode 100644
index 0000000..a1b888e
Binary files /dev/null and b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/tessdata/script/Bengali.traineddata differ
diff --git a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/tessdata/script/Georgian.traineddata b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/tessdata/script/Georgian.traineddata
new file mode 100644
index 0000000..7751150
Binary files /dev/null and b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/tessdata/script/Georgian.traineddata differ
diff --git a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/tessdata/script/Japanese.traineddata b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/tessdata/script/Japanese.traineddata
new file mode 100644
index 0000000..89481f2
Binary files /dev/null and b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/tessdata/script/Japanese.traineddata differ
diff --git a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/tessdata/spa.traineddata b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/tessdata/spa.traineddata
new file mode 100644
index 0000000..72e901f
Binary files /dev/null and b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/tessdata/spa.traineddata differ
diff --git a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/tessdata/spa_old.traineddata b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/tessdata/spa_old.traineddata
new file mode 100644
index 0000000..42b281f
Binary files /dev/null and b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/tessdata/spa_old.traineddata differ
diff --git a/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/tessdata/urd.traineddata b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/tessdata/urd.traineddata
new file mode 100644
index 0000000..715a159
Binary files /dev/null and b/pdfocr-tesseract4/src/test/resources/com/itextpdf/pdfocr/tessdata/urd.traineddata differ
diff --git a/pom.xml b/pom.xml
index f8907e6..e8ab2c5 100644
--- a/pom.xml
+++ b/pom.xml
@@ -5,21 +5,29 @@
   <parent>
     <groupId>com.itextpdf</groupId>
     <artifactId>root</artifactId>
-    <version>7.1.0</version>
-    <relativePath />
+    <version>7.1.11</version>
+    <relativePath/>
   </parent>
 
-  <artifactId>ocr</artifactId>
-  <version>1.0.0-SNAPSHOT</version>
+  <artifactId>pdfocr-root</artifactId>
+  <version>1.0.0</version>
+  <packaging>pom</packaging>
 
-  <name>OCR</name>
-  <description>OCR is an iText 7 add-on that lets you to parse text from provided images and adds it to PDF.</description>
+  <name>pdfOCR</name>
+  <description>pdfOCR is an iText 7 add-on for Java to recognize and extract text in scanned documents and images. It can also convert them into fully ISO-compliant PDF or PDF/A-3u files that are accessible, searchable, and suitable for archiving</description>
+
+  <modules>
+    <module>pdfocr-api</module>
+    <module>pdfocr-tesseract4</module>
+  </modules>
 
   <properties>
-    <itext.version>${project.parent.version}</itext.version>
+    <itext.version>7.1.11</itext.version>
     <java.version>1.8</java.version>
-    <maven.compiler.source>1.8</maven.compiler.source>
-    <maven.compiler.target>1.8</maven.compiler.target>
+    <jdkLevel>${java.version}</jdkLevel>
+    <maven.compiler.source>${java.version}</maven.compiler.source>
+    <maven.compiler.target>${java.version}</maven.compiler.target>
+    <skipTests>false</skipTests>
   </properties>
 
   <repositories>
@@ -41,86 +49,54 @@
     </repository>
   </repositories>
 
-  <dependencies>
-    <dependency>
-      <groupId>com.itextpdf</groupId>
-      <artifactId>forms</artifactId>
-      <version>${itext.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>com.itextpdf</groupId>
-      <artifactId>io</artifactId>
-      <version>${itext.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>com.itextpdf</groupId>
-      <artifactId>kernel</artifactId>
-      <version>${itext.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>com.itextpdf</groupId>
-      <artifactId>layout</artifactId>
-      <version>${itext.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>commons-io</groupId>
-      <artifactId>commons-io</artifactId>
-      <version>2.6</version>
-    </dependency>
-    <dependency>
-      <groupId>net.htmlparser.jericho</groupId>
-      <artifactId>jericho-html</artifactId>
-      <version>3.3</version>
-    </dependency>
-    <dependency>
-      <groupId>com.itextpdf</groupId>
-      <artifactId>pdfa</artifactId>
-      <version>${itext.version}</version>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
-      <groupId>com.itextpdf</groupId>
-      <artifactId>pdftest</artifactId>
-      <version>${itext.version}</version>
-      <scope>test</scope>
-    </dependency>
-  </dependencies>
-
   <build>
-
+    <resources>
+      <resource>
+        <directory>src/main/resources</directory>
+        <includes>
+          <include>**/*.ttf</include>
+        </includes>
+      </resource>
+    </resources>
     <plugins>
       <plugin>
-        <groupId>external.atlassian.jgitflow</groupId>
-        <artifactId>jgitflow-maven-plugin</artifactId>
-        <version>1.0-m5.1</version>
+        <artifactId>maven-compiler-plugin</artifactId>
+        <configuration>
+          <source>${java.version}</source>
+          <target>${java.version}</target>
+        </configuration>
       </plugin>
       <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-failsafe-plugin</artifactId>
-        <version>2.19.1</version>
+        <version>${failsafe.version}</version>
         <configuration>
-          <includes>
-            <include>**/*Test.java</include>
-          </includes>
-          <groups>${integrationtests}</groups>
+          <skipTests>${skipTests}</skipTests>
+          <forkCount>1</forkCount>
+          <reuseForks>false</reuseForks>
+          <useSystemClassLoader>false</useSystemClassLoader>
         </configuration>
       </plugin>
       <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-source-plugin</artifactId>
-        <version>3.0.0</version>
+        <artifactId>maven-javadoc-plugin</artifactId>
         <configuration>
-          <excludes>
-            <exclude>**</exclude>
-          </excludes>
+          <source>${java.version}</source>
+          <doclint>none</doclint>
+          <groups>
+            <group>
+              <title>Ocr</title>
+              <packages>com.itextpdf.ocr*</packages>
+            </group>
+          </groups>
         </configuration>
       </plugin>
       <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-surefire-plugin</artifactId>
-        <version>2.19.1</version>
+        <version>${surefire.version}</version>
         <configuration>
-          <groups>${unittests}</groups>
+          <skipTests>${skipTests}</skipTests>
+          <forkCount>1</forkCount>
+          <reuseForks>false</reuseForks>
+          <useSystemClassLoader>false</useSystemClassLoader>
         </configuration>
       </plugin>
       <plugin>
@@ -131,15 +107,42 @@
           <skip>true</skip>
         </configuration>
       </plugin>
-      <plugin>
-        <groupId>org.revapi</groupId>
-        <artifactId>revapi-maven-plugin</artifactId>
-        <version>0.8.2</version>
-        <configuration>
-          <skip>true</skip>
-        </configuration>
-      </plugin>
     </plugins>
-
   </build>
+
+  <profiles>
+    <profile>
+      <id>qa</id>
+      <build>
+        <plugins>
+          <plugin>
+            <groupId>com.github.siom79.japicmp</groupId>
+            <artifactId>japicmp-maven-plugin</artifactId>
+            <version>0.14.3</version>
+            <executions>
+              <execution>
+                <phase>none</phase>
+              </execution>
+            </executions>
+          </plugin>
+          <plugin>
+            <groupId>org.owasp</groupId>
+            <artifactId>dependency-check-core</artifactId>
+            <version>5.3.0</version>
+            <configuration>
+              <assemblyAnalyzerEnabled>false</assemblyAnalyzerEnabled>
+            </configuration>
+          </plugin>
+          <plugin>
+            <groupId>org.revapi</groupId>
+            <artifactId>revapi-maven-plugin</artifactId>
+            <version>0.11.1</version>
+            <configuration>
+              <oldVersion>${project.version}</oldVersion>
+            </configuration>
+          </plugin>
+        </plugins>
+      </build>
+    </profile>
+  </profiles>
 </project>