notabaka commited on
Commit
47fd8f9
·
1 Parent(s): 9bbbf26
Files changed (1) hide show
  1. app.py +2 -4
app.py CHANGED
@@ -37,13 +37,11 @@ def extract_text(doc):
37
  return doc.read().decode('utf-8')
38
 
39
  if doc.name.endswith(".pdf"):
40
- raw = doc.read()
41
 
42
- with pdfplumber.open(raw) as pdf:
43
- pdf.set_doc(raw)
44
  pages = [page.extract_text() for page in pdf.pages]
45
  return "\n".join(pages)
46
-
47
  if doc.name.endswith('.docx'):
48
  raw_text = doc.read()
49
  return docx2txt.process(raw_text)
 
37
  return doc.read().decode('utf-8')
38
 
39
  if doc.name.endswith(".pdf"):
40
+ raw = doc.read().decode('latin-1')
41
 
42
+ with pdfplumber.open(io.BytesIO(raw)) as pdf:
 
43
  pages = [page.extract_text() for page in pdf.pages]
44
  return "\n".join(pages)
 
45
  if doc.name.endswith('.docx'):
46
  raw_text = doc.read()
47
  return docx2txt.process(raw_text)