Spaces:
Sleeping
Sleeping
Jeet Paul
commited on
Commit
·
7423c0b
1
Parent(s):
7e0fd2a
Update app.py
Browse files
app.py
CHANGED
@@ -13,8 +13,10 @@ import pdfminer
|
|
13 |
from pdfminer.high_level import extract_text
|
14 |
import re
|
15 |
import PyPDF2
|
16 |
-
import docx
|
17 |
import textract
|
|
|
|
|
|
|
18 |
|
19 |
nltk.download('punkt')
|
20 |
nltk.download('stopwords')
|
@@ -30,11 +32,6 @@ def preprocess_text(text):
|
|
30 |
|
31 |
return ' '.join(words)
|
32 |
|
33 |
-
import textract
|
34 |
-
import tempfile
|
35 |
-
|
36 |
-
import fitz # PyMuPDF
|
37 |
-
|
38 |
def extract_text_from_pdf(pdf_content):
|
39 |
pdf_document = fitz.open(stream=pdf_content, filetype="pdf")
|
40 |
text = ""
|
@@ -44,8 +41,6 @@ def extract_text_from_pdf(pdf_content):
|
|
44 |
pdf_document.close()
|
45 |
return text
|
46 |
|
47 |
-
from docx import Document
|
48 |
-
|
49 |
def extract_text_from_docx(docx_content):
|
50 |
doc = Document(BytesIO(docx_content))
|
51 |
text = " ".join(paragraph.text for paragraph in doc.paragraphs)
|
@@ -56,7 +51,6 @@ def extract_text_from_txt(txt_content):
|
|
56 |
text = textract.process(input_filename=None, input_bytes=txt_content)
|
57 |
return text
|
58 |
|
59 |
-
|
60 |
def extract_text_from_resume(file_path):
|
61 |
file_extension = file_path.split('.')[-1].lower()
|
62 |
|
|
|
13 |
from pdfminer.high_level import extract_text
|
14 |
import re
|
15 |
import PyPDF2
|
|
|
16 |
import textract
|
17 |
+
import tempfile
|
18 |
+
import fitz
|
19 |
+
from docx import Document
|
20 |
|
21 |
nltk.download('punkt')
|
22 |
nltk.download('stopwords')
|
|
|
32 |
|
33 |
return ' '.join(words)
|
34 |
|
|
|
|
|
|
|
|
|
|
|
35 |
def extract_text_from_pdf(pdf_content):
|
36 |
pdf_document = fitz.open(stream=pdf_content, filetype="pdf")
|
37 |
text = ""
|
|
|
41 |
pdf_document.close()
|
42 |
return text
|
43 |
|
|
|
|
|
44 |
def extract_text_from_docx(docx_content):
|
45 |
doc = Document(BytesIO(docx_content))
|
46 |
text = " ".join(paragraph.text for paragraph in doc.paragraphs)
|
|
|
51 |
text = textract.process(input_filename=None, input_bytes=txt_content)
|
52 |
return text
|
53 |
|
|
|
54 |
def extract_text_from_resume(file_path):
|
55 |
file_extension = file_path.split('.')[-1].lower()
|
56 |
|