-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathPDF_Find_Text.py
44 lines (37 loc) · 1.26 KB
/
PDF_Find_Text.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
# Py PDF Find
# created by: Earl Lamier
# python forum
# MAC
# v1.3.0
#
# Using pdfplumber is easier, latest module, and works well
# status: release
#from cgi import print_directory
from asyncio import coroutines
import enum
from itertools import count
from multiprocessing.sharedctypes import Value
from operator import index
from re import search
from tkinter.messagebox import YES
from typing import Counter
import pdfplumber
import csv
pdf_file = open(r'/Users/doc/sample.pdf', mode='rb')
# pdf_file = open(r'/Users/doc/sample.pdf', mode='rb')
search_word = 'Page (5)'
header = ['Record','(Py)Start Page No.','Page No.']
with pdfplumber.open(pdf_file) as pdf, open('Output_Results.csv', 'w', newline='') as f_out:
writer = csv.writer(f_out)
pages = pdf.pages
writer.writerow(header)
rec = 1
for pageNumber, pg in enumerate(pages, 1):
content = pg.extract_text()
if search_word in content:
pyStartPageN = pageNumber - 5
print("Index: {}, Search: {}, (Py)Start Page#: {}, Page#: {}".format(rec, search_word, pyStartPageN,pageNumber))
#print(rec, search_word, pageNumber)
writer.writerow([rec, pyStartPageN, pageNumber])
rec = rec + 1
print()