Code: Select all
lst_all_text = []
for foldername,subfolders,files in os.walk(r"C:/MY PATH"):
for file in files:
# open the pdf file
object = PyPDF2.PdfFileReader(os.path.join(foldername,file))
# get number of pages
NumPages = object.getNumPages()
text = ""
# extract text and do the search
for i in range(0, NumPages):
PageObj = object.getPage(i)
text += PageObj.extractText()
lst_all_text.append(text)