Spaces:
Runtime error
Runtime error
Commit
·
6263226
1
Parent(s):
9e5941f
Update ResumeReader.py
Browse files- ResumeReader.py +12 -10
ResumeReader.py
CHANGED
@@ -3,6 +3,7 @@ import os
|
|
3 |
import logging
|
4 |
import pdfplumber
|
5 |
import fitz
|
|
|
6 |
|
7 |
class ResumeReader:
|
8 |
|
@@ -45,16 +46,17 @@ class ResumeReader:
|
|
45 |
:rtype: str
|
46 |
"""
|
47 |
|
48 |
-
pdf = pdfplumber.open(pdf_file)
|
49 |
-
raw_text= ""
|
50 |
-
with fitz.open(pdf_file) as doc:
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
# for page in pdf.pages:
|
55 |
-
# raw_text += page.extract_text() + "\n"
|
56 |
-
|
57 |
-
|
|
|
58 |
|
59 |
try:
|
60 |
full_string = re.sub(r'\n+', '\n', raw_text)
|
|
|
3 |
import logging
|
4 |
import pdfplumber
|
5 |
import fitz
|
6 |
+
from pdfminer.high_level import extract_text
|
7 |
|
8 |
class ResumeReader:
|
9 |
|
|
|
46 |
:rtype: str
|
47 |
"""
|
48 |
|
49 |
+
# pdf = pdfplumber.open(pdf_file)
|
50 |
+
# raw_text= ""
|
51 |
+
# with fitz.open(pdf_file) as doc:
|
52 |
+
# for page in doc:
|
53 |
+
# raw_text += page.get_text()
|
54 |
+
# print(raw_text)
|
55 |
+
# # for page in pdf.pages:
|
56 |
+
# # raw_text += page.extract_text() + "\n"
|
57 |
+
|
58 |
+
raw_text = extract_text("pdf_file")
|
59 |
+
|
60 |
|
61 |
try:
|
62 |
full_string = re.sub(r'\n+', '\n', raw_text)
|