dudegladiator commited on
Commit
6263226
·
1 Parent(s): 9e5941f

Update ResumeReader.py

Browse files
Files changed (1) hide show
  1. ResumeReader.py +12 -10
ResumeReader.py CHANGED
@@ -3,6 +3,7 @@ import os
3
  import logging
4
  import pdfplumber
5
  import fitz
 
6
 
7
  class ResumeReader:
8
 
@@ -45,16 +46,17 @@ class ResumeReader:
45
  :rtype: str
46
  """
47
 
48
- pdf = pdfplumber.open(pdf_file)
49
- raw_text= ""
50
- with fitz.open(pdf_file) as doc:
51
- for page in doc:
52
- raw_text += page.get_text()
53
- print(raw_text)
54
- # for page in pdf.pages:
55
- # raw_text += page.extract_text() + "\n"
56
-
57
- pdf.close()
 
58
 
59
  try:
60
  full_string = re.sub(r'\n+', '\n', raw_text)
 
3
  import logging
4
  import pdfplumber
5
  import fitz
6
+ from pdfminer.high_level import extract_text
7
 
8
  class ResumeReader:
9
 
 
46
  :rtype: str
47
  """
48
 
49
+ # pdf = pdfplumber.open(pdf_file)
50
+ # raw_text= ""
51
+ # with fitz.open(pdf_file) as doc:
52
+ # for page in doc:
53
+ # raw_text += page.get_text()
54
+ # print(raw_text)
55
+ # # for page in pdf.pages:
56
+ # # raw_text += page.extract_text() + "\n"
57
+
58
+ raw_text = extract_text("pdf_file")
59
+
60
 
61
  try:
62
  full_string = re.sub(r'\n+', '\n', raw_text)