awacke1 commited on
Commit
12c3a0c
·
1 Parent(s): 22c8575

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -4
app.py CHANGED
@@ -226,12 +226,12 @@ def pdf2txt(pdf_docs):
226
  # You need to replace the following lines with actual file reading
227
  # based on the file_extension
228
  if file_extension in ['txt', 'html', 'htm', 'py', 'xml', 'json']:
229
- # text += textract.process(file_str).decode("utf-8")
230
  text += f"\nExtracted text from {file_extension} file..."
231
  elif file_extension == 'pdf':
232
- # pdf_reader = PdfReader(file_str)
233
- # for page in pdf_reader.pages:
234
- # text += page.extract_text()
235
  text += f"\nExtracted text from PDF file..."
236
 
237
  return text
 
226
  # You need to replace the following lines with actual file reading
227
  # based on the file_extension
228
  if file_extension in ['txt', 'html', 'htm', 'py', 'xml', 'json']:
229
+ text += textract.process(str(file).decode("utf-8")
230
  text += f"\nExtracted text from {file_extension} file..."
231
  elif file_extension == 'pdf':
232
+ pdf_reader = PdfReader(file)
233
+ for page in pdf_reader.pages:
234
+ text += page.extract_text()
235
  text += f"\nExtracted text from PDF file..."
236
 
237
  return text