TKM03 commited on
Commit
8ab2e60
Β·
verified Β·
1 Parent(s): 59bc749

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +69 -91
app.py CHANGED
@@ -5,137 +5,115 @@ import PyPDF2
5
  import gradio as gr
6
  from transformers import pipeline
7
 
8
- # ------------------- Category Mapping -------------------
 
 
 
9
  CATEGORY_MAP = {
10
- "C0": "Administration / Clerical",
11
- "C1": "Agriculture / Environmental / Forestry",
12
- "C2": "Information Technology / Software Engineering",
13
- "C3": "Data Science / Machine Learning / AI",
14
- "C4": "Finance / Accounting / Auditing",
15
- "C5": "Human Resources / Recruitment / Talent Acquisition",
16
- "C6": "Sales / Marketing / Business Development",
17
- "C7": "Engineering / Mechanical / Civil / Electrical",
18
- "C8": "Customer Service / Support",
19
- "C9": "Design / UI-UX / Creative",
20
- "C10": "Healthcare / Medical / Nursing",
21
- "C11": "Education / Training / Teaching",
22
- "C12": "Retail / Merchandising / E-commerce",
23
- "C13": "Telecommunication / Network Engineering",
24
- "C14": "Operations / Logistics / Supply Chain",
25
- "C15": "Entrepreneurship / Startups / Freelancing",
26
- "C16": "Product Management / Project Management",
27
- "C17": "Legal / Compliance",
28
- "C18": "Real Estate / Property Management",
29
- "C19": "Transportation / Automotive / Aviation",
30
- "C20": "Construction / Architecture",
31
- "C21": "Energy / Oil & Gas / Utilities",
32
- "C22": "Security / Safety",
33
- "C23": "Procurement / Purchasing",
34
- "C24": "Manufacturing / Production",
35
- "C25": "Media / Communication / PR / Journalism",
36
- "C26": "Science / Research / R&D",
37
- "C27": "Quality Assurance / Control",
38
- "C28": "Hospitality / Tourism / Travel",
39
- "C29": "Management / Strategy / Consulting",
40
- "C30": "Other / Miscellaneous"
41
  }
42
 
43
- # ------------------- Load Classification Model -------------------
44
- classifier = pipeline("text-classification", model="CleveGreen/JobClassifier_v2")
45
 
46
- # ------------------- Resume Utilities -------------------
47
  def clean_text(text):
48
  text = re.sub(r'http\S+', ' ', text)
49
- text = re.sub(r'[^\x00-\x7f]', ' ', text)
50
  text = re.sub(r'[^\w\s]', ' ', text)
51
- text = re.sub(r'\s+', ' ', text).strip()
52
- return text
53
 
54
  def extract_text_from_pdf(file):
55
  try:
56
  reader = PyPDF2.PdfReader(file)
57
- text = ""
58
- for page in reader.pages:
59
- content = page.extract_text()
60
- if content:
61
- text += content + " "
62
- return text.strip(), None if text else "No text found in PDF."
63
  except Exception as e:
64
  return None, str(e)
65
 
66
- # ------------------- Resume Classification & Organization -------------------
67
  def classify_resumes(files):
68
- predictions = {}
69
- classified_files = {}
70
 
71
  if os.path.exists("classified_resumes"):
72
  shutil.rmtree("classified_resumes")
73
- os.makedirs("classified_resumes")
74
 
75
  for file in files:
76
  file_name = os.path.basename(file.name)
77
- resume_text, error = extract_text_from_pdf(file)
 
78
  if error:
79
- predictions[file_name] = {"error": error}
80
  continue
81
 
82
- cleaned_text = clean_text(resume_text)
83
- result = classifier(cleaned_text[:512])[0] # Truncate to avoid max token
84
-
85
- label = result['label'] # e.g., C2
86
- score = round(result['score'], 4)
87
- category = CATEGORY_MAP.get(label, "Other / Miscellaneous")
88
 
89
- predictions[file_name] = {
90
  "Predicted Job Category": label,
91
  "Category Name": category,
92
  "Confidence Score": score
93
  }
94
 
95
- category_folder = os.path.join("classified_resumes", category)
96
- os.makedirs(category_folder, exist_ok=True)
97
- dest_path = os.path.join(category_folder, file_name)
98
-
99
  with open(file.name, "rb") as f_in, open(dest_path, "wb") as f_out:
100
  shutil.copyfileobj(f_in, f_out)
101
 
102
- if category not in classified_files:
103
- classified_files[category] = []
104
- classified_files[category].append(dest_path)
105
 
106
- return predictions, classified_files
107
 
108
- # ------------------- Gradio App -------------------
109
- def filter_by_category(category, all_classified):
110
- return all_classified.get(category, [])
111
 
112
- with gr.Blocks(title="Resume Screening & Classification") as app:
113
- gr.Markdown("""
114
- # πŸ“„ Resume Screening Tool
115
- Upload resumes in PDF format. The system will classify them into job categories using a pretrained AI model.
116
- """)
117
 
118
- with gr.Row():
119
- uploaded_files = gr.File(file_types=[".pdf"], file_count="multiple", label="Upload Resumes")
120
- classify_button = gr.Button("Classify Resumes")
121
 
122
- classification_results = gr.JSON(label="Classification Output")
123
- category_selector = gr.Dropdown(choices=list(CATEGORY_MAP.values()), label="Filter by Job Category")
124
- filtered_resumes_output = gr.File(file_types=[".pdf"], file_count="multiple", label="Filtered Resumes")
 
125
 
126
- all_classified_state = gr.State({})
 
 
127
 
128
- classify_button.click(
129
- fn=classify_resumes,
130
- inputs=[uploaded_files],
131
- outputs=[classification_results, all_classified_state]
132
- )
133
 
134
- category_selector.change(
135
- fn=filter_by_category,
136
- inputs=[category_selector, all_classified_state],
137
- outputs=[filtered_resumes_output]
138
- )
139
 
140
  if __name__ == "__main__":
141
- app.launch()
 
5
  import gradio as gr
6
  from transformers import pipeline
7
 
8
+ # Load multi-class resume classifier model
9
+ text_classifier = pipeline("text-classification", model="liberatoratif/BERT-resume-job-recommender", top_k=1)
10
+
11
+ # Map label to readable category names
12
  CATEGORY_MAP = {
13
+ "LABEL_0": "Data Science / Machine Learning / AI",
14
+ "LABEL_1": "Information Technology / Software Engineering",
15
+ "LABEL_2": "Sales / Marketing / Business Development",
16
+ "LABEL_3": "Finance / Accounting / Auditing",
17
+ "LABEL_4": "Human Resources / Recruitment / Talent Acquisition",
18
+ "LABEL_5": "Product Management / Project Management",
19
+ "LABEL_6": "Engineering / Mechanical / Civil / Electrical",
20
+ "LABEL_7": "Operations / Logistics / Supply Chain",
21
+ "LABEL_8": "Design / UI-UX / Creative",
22
+ "LABEL_9": "Legal / Compliance",
23
+ "LABEL_10": "Healthcare / Medical / Nursing",
24
+ "LABEL_11": "Customer Service / Support",
25
+ "LABEL_12": "Education / Training / Teaching",
26
+ "LABEL_13": "Entrepreneurship / Startups / Freelancing",
27
+ "LABEL_14": "Retail / Merchandising / E-commerce",
28
+ "LABEL_15": "Media / Communication / PR / Journalism",
29
+ "LABEL_16": "Manufacturing / Production",
30
+ "LABEL_17": "Administration / Clerical",
31
+ "LABEL_18": "Quality Assurance / Control",
32
+ "LABEL_19": "Construction / Architecture",
33
+ "LABEL_20": "Science / Research / R&D",
34
+ "LABEL_21": "Real Estate / Property Management",
35
+ "LABEL_22": "Security / Safety",
36
+ "LABEL_23": "Procurement / Purchasing",
37
+ "LABEL_24": "Hospitality / Tourism / Travel",
38
+ "LABEL_25": "Telecommunication / Network Engineering",
39
+ "LABEL_26": "Transportation / Automotive / Aviation",
40
+ "LABEL_27": "Energy / Oil & Gas / Utilities",
41
+ "LABEL_28": "Management / Strategy / Consulting",
42
+ "LABEL_29": "Other / Miscellaneous"
 
43
  }
44
 
 
 
45
 
 
46
  def clean_text(text):
47
  text = re.sub(r'http\S+', ' ', text)
 
48
  text = re.sub(r'[^\w\s]', ' ', text)
49
+ return re.sub(r'\s+', ' ', text).strip()
50
+
51
 
52
  def extract_text_from_pdf(file):
53
  try:
54
  reader = PyPDF2.PdfReader(file)
55
+ text = " ".join(page.extract_text() or "" for page in reader.pages)
56
+ return clean_text(text), None if text.strip() else "No text found."
 
 
 
 
57
  except Exception as e:
58
  return None, str(e)
59
 
60
+
61
  def classify_resumes(files):
62
+ results = {}
63
+ category_to_files = {}
64
 
65
  if os.path.exists("classified_resumes"):
66
  shutil.rmtree("classified_resumes")
67
+ os.makedirs("classified_resumes", exist_ok=True)
68
 
69
  for file in files:
70
  file_name = os.path.basename(file.name)
71
+ text, error = extract_text_from_pdf(file)
72
+
73
  if error:
74
+ results[file_name] = {"error": error}
75
  continue
76
 
77
+ pred = text_classifier(text[:512])[0][0]
78
+ label = pred['label']
79
+ category = CATEGORY_MAP.get(label, "Unknown")
80
+ score = round(pred['score'], 4)
 
 
81
 
82
+ results[file_name] = {
83
  "Predicted Job Category": label,
84
  "Category Name": category,
85
  "Confidence Score": score
86
  }
87
 
88
+ # Save file in category folder
89
+ cat_dir = os.path.join("classified_resumes", category)
90
+ os.makedirs(cat_dir, exist_ok=True)
91
+ dest_path = os.path.join(cat_dir, file_name)
92
  with open(file.name, "rb") as f_in, open(dest_path, "wb") as f_out:
93
  shutil.copyfileobj(f_in, f_out)
94
 
95
+ category_to_files.setdefault(category, []).append(dest_path)
 
 
96
 
97
+ return results, list(category_to_files.keys()), category_to_files
98
 
 
 
 
99
 
100
+ def show_files_by_category(selected_category, category_to_files):
101
+ return category_to_files.get(selected_category, [])
 
 
 
102
 
 
 
 
103
 
104
+ with gr.Blocks(title="Resume Category Classifier") as demo:
105
+ gr.Markdown("## πŸ“‚ Resume Screening by Job Category")
106
+ file_input = gr.File(file_types=[".pdf"], file_count="multiple", label="Upload Resume PDFs")
107
+ classify_btn = gr.Button("πŸ” Classify Resumes")
108
 
109
+ results_output = gr.JSON(label="Prediction Results")
110
+ category_dropdown = gr.Dropdown(label="Select Category to View Resumes", interactive=True)
111
+ file_output = gr.File(label="Download Filtered Resumes", file_types=[".pdf"], file_count="multiple")
112
 
113
+ category_files_state = gr.State({})
 
 
 
 
114
 
115
+ classify_btn.click(fn=classify_resumes, inputs=[file_input], outputs=[results_output, category_dropdown, category_files_state])
116
+ category_dropdown.change(fn=show_files_by_category, inputs=[category_dropdown, category_files_state], outputs=[file_output])
 
 
 
117
 
118
  if __name__ == "__main__":
119
+ demo.launch()