Spaces:
Runtime error
Runtime error
Commit
·
710b787
1
Parent(s):
b962a46
Update app.py
Browse files
app.py
CHANGED
@@ -6,11 +6,10 @@ import os
|
|
6 |
import tqdm
|
7 |
import tempfile
|
8 |
import re
|
|
|
9 |
|
10 |
print("pdfminer", print(pdfminer.__version__))
|
11 |
|
12 |
-
from pdfminer.high_level import extract_text
|
13 |
-
|
14 |
#from docx import Document
|
15 |
#document = Document()
|
16 |
#document.add_heading('Labels for ', level=1)
|
@@ -57,7 +56,7 @@ def retrieve_lines(filename):
|
|
57 |
extension = filename.split(".")[-1]
|
58 |
|
59 |
if extension == "pdf":
|
60 |
-
text = extract_text(filename)
|
61 |
lines = text.split("\n")
|
62 |
elif extension in ["docx", "doc"]:
|
63 |
with tempfile.TemporaryDirectory() as tmpdirname:
|
|
|
6 |
import tqdm
|
7 |
import tempfile
|
8 |
import re
|
9 |
+
import pdfminer
|
10 |
|
11 |
print("pdfminer", print(pdfminer.__version__))
|
12 |
|
|
|
|
|
13 |
#from docx import Document
|
14 |
#document = Document()
|
15 |
#document.add_heading('Labels for ', level=1)
|
|
|
56 |
extension = filename.split(".")[-1]
|
57 |
|
58 |
if extension == "pdf":
|
59 |
+
text = pdfminer.high_level.extract_text(filename)
|
60 |
lines = text.split("\n")
|
61 |
elif extension in ["docx", "doc"]:
|
62 |
with tempfile.TemporaryDirectory() as tmpdirname:
|