Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -14,6 +14,13 @@ LABEL_MAP = {
|
|
14 |
"LABEL_1": "Relevant"
|
15 |
}
|
16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
def clean_resume_text(text):
|
18 |
text = re.sub(r'http\S+', ' ', text)
|
19 |
text = re.sub(r'#\S+', '', text)
|
@@ -22,6 +29,7 @@ def clean_resume_text(text):
|
|
22 |
text = re.sub(r'[^\x00-\x7f]', ' ', text)
|
23 |
return re.sub(r'\s+', ' ', text).strip()
|
24 |
|
|
|
25 |
def extract_resume_text(file):
|
26 |
try:
|
27 |
reader = PyPDF2.PdfReader(file)
|
@@ -34,11 +42,12 @@ def extract_resume_text(file):
|
|
34 |
except Exception as e:
|
35 |
return None, f"Error reading PDF: {str(e)}"
|
36 |
|
37 |
-
|
|
|
38 |
predictions = {}
|
39 |
-
|
|
|
40 |
|
41 |
-
# Create temp dir for filtered files
|
42 |
if os.path.exists("filtered_resumes"):
|
43 |
shutil.rmtree("filtered_resumes")
|
44 |
os.makedirs("filtered_resumes", exist_ok=True)
|
@@ -61,29 +70,33 @@ def filter_relevant_resumes(files):
|
|
61 |
"Confidence Score": score
|
62 |
}
|
63 |
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
shutil.copyfileobj(f_in, f_out)
|
68 |
-
relevant_files.append(dest_path)
|
69 |
|
70 |
-
|
71 |
|
|
|
|
|
|
|
|
|
|
|
72 |
|
73 |
|
74 |
# Gradio UI
|
75 |
-
with gr.Blocks(title="Resume
|
76 |
-
gr.Markdown("## π Resume Relevance
|
77 |
|
78 |
file_input = gr.File(file_types=[".pdf"], file_count="multiple", label="Upload Resume PDFs")
|
79 |
-
|
80 |
-
with gr.Row():
|
81 |
-
classify_button = gr.Button("π§ Classify and Filter Relevant Resumes")
|
82 |
|
83 |
relevance_output = gr.JSON(label="Classification Results")
|
84 |
-
relevant_resume_gallery = gr.File(label="Download Relevant Resumes", file_types=[".pdf"], file_count="multiple")
|
85 |
|
86 |
-
|
|
|
|
|
|
|
|
|
87 |
|
88 |
if __name__ == "__main__":
|
89 |
demo.launch()
|
|
|
14 |
"LABEL_1": "Relevant"
|
15 |
}
|
16 |
|
17 |
+
# Global variable to store the filtered files per label
|
18 |
+
classified_files = {
|
19 |
+
"Relevant": [],
|
20 |
+
"Irrelevant": []
|
21 |
+
}
|
22 |
+
|
23 |
+
|
24 |
def clean_resume_text(text):
|
25 |
text = re.sub(r'http\S+', ' ', text)
|
26 |
text = re.sub(r'#\S+', '', text)
|
|
|
29 |
text = re.sub(r'[^\x00-\x7f]', ' ', text)
|
30 |
return re.sub(r'\s+', ' ', text).strip()
|
31 |
|
32 |
+
|
33 |
def extract_resume_text(file):
|
34 |
try:
|
35 |
reader = PyPDF2.PdfReader(file)
|
|
|
42 |
except Exception as e:
|
43 |
return None, f"Error reading PDF: {str(e)}"
|
44 |
|
45 |
+
|
46 |
+
def classify_and_store(files):
|
47 |
predictions = {}
|
48 |
+
classified_files["Relevant"] = []
|
49 |
+
classified_files["Irrelevant"] = []
|
50 |
|
|
|
51 |
if os.path.exists("filtered_resumes"):
|
52 |
shutil.rmtree("filtered_resumes")
|
53 |
os.makedirs("filtered_resumes", exist_ok=True)
|
|
|
70 |
"Confidence Score": score
|
71 |
}
|
72 |
|
73 |
+
dest_path = f"filtered_resumes/{file_name}"
|
74 |
+
with open(file.name, "rb") as f_in, open(dest_path, "wb") as f_out:
|
75 |
+
shutil.copyfileobj(f_in, f_out)
|
|
|
|
|
76 |
|
77 |
+
classified_files[status].append(dest_path)
|
78 |
|
79 |
+
return predictions
|
80 |
+
|
81 |
+
|
82 |
+
def get_resumes_by_category(category):
|
83 |
+
return classified_files.get(category, [])
|
84 |
|
85 |
|
86 |
# Gradio UI
|
87 |
+
with gr.Blocks(title="Resume Classifier & Category Filter") as demo:
|
88 |
+
gr.Markdown("## π Resume Relevance Classifier\nUpload resumes and view based on relevance category.")
|
89 |
|
90 |
file_input = gr.File(file_types=[".pdf"], file_count="multiple", label="Upload Resume PDFs")
|
91 |
+
classify_button = gr.Button("π§ Classify Resumes")
|
|
|
|
|
92 |
|
93 |
relevance_output = gr.JSON(label="Classification Results")
|
|
|
94 |
|
95 |
+
category_dropdown = gr.Dropdown(choices=["Relevant", "Irrelevant"], label="Select Resume Category to View")
|
96 |
+
filtered_files_output = gr.File(label="Filtered Resumes", file_types=[".pdf"], file_count="multiple")
|
97 |
+
|
98 |
+
classify_button.click(fn=classify_and_store, inputs=[file_input], outputs=[relevance_output])
|
99 |
+
category_dropdown.change(fn=get_resumes_by_category, inputs=[category_dropdown], outputs=[filtered_files_output])
|
100 |
|
101 |
if __name__ == "__main__":
|
102 |
demo.launch()
|