Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -226,10 +226,10 @@ def pdf2txt(pdf_docs):
|
|
226 |
# You need to replace the following lines with actual file reading
|
227 |
# based on the file_extension
|
228 |
if file_extension in ['txt', 'html', 'htm', 'py', 'xml', 'json']:
|
229 |
-
text += textract.process(str(file))
|
230 |
text += f"\nExtracted text from {file_extension} file..."
|
231 |
elif file_extension == 'pdf':
|
232 |
-
pdf_reader = PdfReader(file)
|
233 |
for page in pdf_reader.pages:
|
234 |
text += page.extract_text()
|
235 |
text += f"\nExtracted text from PDF file..."
|
|
|
226 |
# You need to replace the following lines with actual file reading
|
227 |
# based on the file_extension
|
228 |
if file_extension in ['txt', 'html', 'htm', 'py', 'xml', 'json']:
|
229 |
+
text += textract.process(str(file.name))
|
230 |
text += f"\nExtracted text from {file_extension} file..."
|
231 |
elif file_extension == 'pdf':
|
232 |
+
pdf_reader = PdfReader(file.name)
|
233 |
for page in pdf_reader.pages:
|
234 |
text += page.extract_text()
|
235 |
text += f"\nExtracted text from PDF file..."
|