TKM03 commited on
Commit
102fc06
·
verified ·
1 Parent(s): dcb6d86

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +88 -0
app.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import os
3
+ import shutil
4
+ import PyPDF2
5
+ import gradio as gr
6
+ from transformers import pipeline
7
+
8
+ # Load classification model
9
+ text_classifier = pipeline("text-classification", model="saattrupdan/job-listing-filtering-model")
10
+
11
+ # Label mapping for binary classification
12
+ LABEL_MAP = {
13
+ "LABEL_0": "Irrelevant",
14
+ "LABEL_1": "Relevant"
15
+ }
16
+
17
+ def clean_resume_text(text):
18
+ text = re.sub(r'http\S+', ' ', text)
19
+ text = re.sub(r'#\S+', '', text)
20
+ text = re.sub(r'@\S+', ' ', text)
21
+ text = re.sub(r'[^\w\s]', ' ', text)
22
+ text = re.sub(r'[^\x00-\x7f]', ' ', text)
23
+ return re.sub(r'\s+', ' ', text).strip()
24
+
25
+ def extract_resume_text(file):
26
+ try:
27
+ reader = PyPDF2.PdfReader(file)
28
+ text = ""
29
+ for page in reader.pages:
30
+ page_text = page.extract_text()
31
+ if page_text:
32
+ text += page_text + " "
33
+ return text, None if text.strip() else "No text found in PDF"
34
+ except Exception as e:
35
+ return None, f"Error reading PDF: {str(e)}"
36
+
37
+ def filter_relevant_resumes(files):
38
+ predictions = {}
39
+ relevant_files = []
40
+
41
+ # Create temp dir for filtered files
42
+ if os.path.exists("filtered_resumes"):
43
+ shutil.rmtree("filtered_resumes")
44
+ os.makedirs("filtered_resumes", exist_ok=True)
45
+
46
+ for file in files:
47
+ file_name = file.name.split("/")[-1]
48
+ resume_text, error = extract_resume_text(file)
49
+ if error:
50
+ predictions[file_name] = {"error": error}
51
+ continue
52
+
53
+ cleaned_text = clean_resume_text(resume_text)
54
+ result = text_classifier(cleaned_text[:512])
55
+ label = result[0]['label']
56
+ score = round(result[0]['score'], 4)
57
+
58
+ status = LABEL_MAP.get(label, "Unknown")
59
+ predictions[file_name] = {
60
+ "Relevance": status,
61
+ "Confidence Score": score
62
+ }
63
+
64
+ if status == "Relevant":
65
+ # Copy file to filtered folder
66
+ dest_path = f"filtered_resumes/{file_name}"
67
+ with open(dest_path, "wb") as f_out:
68
+ f_out.write(file.read())
69
+ relevant_files.append(dest_path)
70
+
71
+ return predictions, relevant_files
72
+
73
+ # Gradio UI
74
+ with gr.Blocks(title="Resume Relevance Classifier & Filter") as demo:
75
+ gr.Markdown("## 📂 Resume Relevance Filter using Hugging Face Model\nUpload PDF resumes and filter out only the relevant ones.")
76
+
77
+ file_input = gr.File(file_types=[".pdf"], file_count="multiple", label="Upload Resume PDFs")
78
+
79
+ with gr.Row():
80
+ classify_button = gr.Button("🧠 Classify and Filter Relevant Resumes")
81
+
82
+ relevance_output = gr.JSON(label="Classification Results")
83
+ relevant_resume_gallery = gr.File(label="Download Relevant Resumes", file_types=[".pdf"], file_count="multiple")
84
+
85
+ classify_button.click(fn=filter_relevant_resumes, inputs=[file_input], outputs=[relevance_output, relevant_resume_gallery])
86
+
87
+ if __name__ == "__main__":
88
+ demo.launch()