F451 = PyPDF2.PdfFileReader(FRN451)
num_pages = F451.numPages
count = 0
text = ""
while count < num_pages:
pageObj = F451.getPage(count)
count +=1
text += pageObj.extractText()
text = text.lower()
text = text.replace('\n', '')
text = text.replace(' ', '')
if text != "":
text = text
else:
text = textract.process(fileurl, method='tesseract', language='eng')
while count < num_pages:
pageObj = THG.getPage(count)
count +=1
text += pageObj.extractText()
text = text.lower()
text = text.replace('\n', '')
text = text.replace(' ', '')
text = text.replace("™", "'")