patrickvonplaten commited on
Commit
710b787
·
1 Parent(s): b962a46

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -3
app.py CHANGED
@@ -6,11 +6,10 @@ import os
6
  import tqdm
7
  import tempfile
8
  import re
 
9
 
10
  print("pdfminer", print(pdfminer.__version__))
11
 
12
- from pdfminer.high_level import extract_text
13
-
14
  #from docx import Document
15
  #document = Document()
16
  #document.add_heading('Labels for ', level=1)
@@ -57,7 +56,7 @@ def retrieve_lines(filename):
57
  extension = filename.split(".")[-1]
58
 
59
  if extension == "pdf":
60
- text = extract_text(filename)
61
  lines = text.split("\n")
62
  elif extension in ["docx", "doc"]:
63
  with tempfile.TemporaryDirectory() as tmpdirname:
 
6
  import tqdm
7
  import tempfile
8
  import re
9
+ import pdfminer
10
 
11
  print("pdfminer", print(pdfminer.__version__))
12
 
 
 
13
  #from docx import Document
14
  #document = Document()
15
  #document.add_heading('Labels for ', level=1)
 
56
  extension = filename.split(".")[-1]
57
 
58
  if extension == "pdf":
59
+ text = pdfminer.high_level.extract_text(filename)
60
  lines = text.split("\n")
61
  elif extension in ["docx", "doc"]:
62
  with tempfile.TemporaryDirectory() as tmpdirname: