Jeet Paul commited on
Commit
7423c0b
·
1 Parent(s): 7e0fd2a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -9
app.py CHANGED
@@ -13,8 +13,10 @@ import pdfminer
13
  from pdfminer.high_level import extract_text
14
  import re
15
  import PyPDF2
16
- import docx
17
  import textract
 
 
 
18
 
19
  nltk.download('punkt')
20
  nltk.download('stopwords')
@@ -30,11 +32,6 @@ def preprocess_text(text):
30
 
31
  return ' '.join(words)
32
 
33
- import textract
34
- import tempfile
35
-
36
- import fitz # PyMuPDF
37
-
38
  def extract_text_from_pdf(pdf_content):
39
  pdf_document = fitz.open(stream=pdf_content, filetype="pdf")
40
  text = ""
@@ -44,8 +41,6 @@ def extract_text_from_pdf(pdf_content):
44
  pdf_document.close()
45
  return text
46
 
47
- from docx import Document
48
-
49
  def extract_text_from_docx(docx_content):
50
  doc = Document(BytesIO(docx_content))
51
  text = " ".join(paragraph.text for paragraph in doc.paragraphs)
@@ -56,7 +51,6 @@ def extract_text_from_txt(txt_content):
56
  text = textract.process(input_filename=None, input_bytes=txt_content)
57
  return text
58
 
59
-
60
  def extract_text_from_resume(file_path):
61
  file_extension = file_path.split('.')[-1].lower()
62
 
 
13
  from pdfminer.high_level import extract_text
14
  import re
15
  import PyPDF2
 
16
  import textract
17
+ import tempfile
18
+ import fitz
19
+ from docx import Document
20
 
21
  nltk.download('punkt')
22
  nltk.download('stopwords')
 
32
 
33
  return ' '.join(words)
34
 
 
 
 
 
 
35
  def extract_text_from_pdf(pdf_content):
36
  pdf_document = fitz.open(stream=pdf_content, filetype="pdf")
37
  text = ""
 
41
  pdf_document.close()
42
  return text
43
 
 
 
44
  def extract_text_from_docx(docx_content):
45
  doc = Document(BytesIO(docx_content))
46
  text = " ".join(paragraph.text for paragraph in doc.paragraphs)
 
51
  text = textract.process(input_filename=None, input_bytes=txt_content)
52
  return text
53
 
 
54
  def extract_text_from_resume(file_path):
55
  file_extension = file_path.split('.')[-1].lower()
56