TKM03 commited on
Commit
3cf8dcc
Β·
verified Β·
1 Parent(s): 9f062d8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -32
app.py CHANGED
@@ -7,26 +7,10 @@ from collections import Counter
7
  # Load NER pipeline
8
  ner_pipeline = pipeline("ner", model="dslim/bert-base-NER", tokenizer="dslim/bert-base-NER", aggregation_strategy="simple")
9
 
10
- # Load Job Category Classifier
11
- text_classifier = pipeline("text-classification", model="serbog/distilbert-jobCategory_410k")
12
-
13
- # Mapping from category code to readable label
14
- CATEGORY_MAP = {
15
- "C1": "Engineering",
16
- "C2": "Information Technology",
17
- "C3": "Sales & Marketing",
18
- "C4": "Accounting & Finance",
19
- "C5": "Healthcare",
20
- "D1": "Education",
21
- "D2": "Human Resources",
22
- "E1": "Operations & Logistics",
23
- "E2": "Legal",
24
- "F1": "Customer Support",
25
- "Other": "General / Undefined"
26
- }
27
 
28
  def clean_resume_text(text):
29
- """Clean text by removing unwanted characters and formatting."""
30
  text = re.sub(r'http\S+', ' ', text)
31
  text = re.sub(r'#\S+', '', text)
32
  text = re.sub(r'@\S+', ' ', text)
@@ -35,7 +19,6 @@ def clean_resume_text(text):
35
  return re.sub(r'\s+', ' ', text).strip()
36
 
37
  def extract_resume_text(file):
38
- """Extract raw text from uploaded PDF."""
39
  try:
40
  reader = PyPDF2.PdfReader(file)
41
  text = ""
@@ -50,7 +33,6 @@ def extract_resume_text(file):
50
  return None, f"Error reading PDF: {str(e)}"
51
 
52
  def classify_resume_ner(entities):
53
- """Basic rule-based NER classification using ORG, LOC, MISC."""
54
  orgs = [e['word'] for e in entities if e['entity_group'] == 'ORG']
55
  locs = [e['word'] for e in entities if e['entity_group'] == 'LOC']
56
  jobs = [e['word'] for e in entities if e['entity_group'] == 'MISC']
@@ -66,7 +48,6 @@ def classify_resume_ner(entities):
66
  }
67
 
68
  def process_resumes(files):
69
- """Extract entities and show classification based on NER."""
70
  all_results = {}
71
  for file in files:
72
  file_name = file.name.split("/")[-1]
@@ -82,7 +63,7 @@ def process_resumes(files):
82
  all_results[file_name] = {
83
  "Persons": list({e["word"] for e in entities if e["entity_group"] == "PER"}),
84
  "Organizations": list({e["word"] for e in entities if e["entity_group"] == "ORG"}),
85
- "Locations": list({e["word"] for e in entities if e["entity_group"] == "LOC"}),
86
  "Other Entities": list({e["word"] for e in entities if e["entity_group"] not in ["PER", "ORG", "LOC"]}),
87
  "Cleaned_Text": cleaned_text,
88
  "Classification (NER)": classification
@@ -90,7 +71,6 @@ def process_resumes(files):
90
  return all_results
91
 
92
  def classify_resumes_with_model(files):
93
- """Use job category model to classify resume into readable job field."""
94
  predictions = {}
95
  for file in files:
96
  file_name = file.name.split("/")[-1]
@@ -99,20 +79,16 @@ def classify_resumes_with_model(files):
99
  predictions[file_name] = {"error": error}
100
  continue
101
  cleaned_text = clean_resume_text(resume_text)
102
- result = text_classifier(cleaned_text[:512]) # Truncate for safety
103
- raw_label = result[0]['label']
104
- readable_label = CATEGORY_MAP.get(raw_label, "Unknown")
105
-
106
  predictions[file_name] = {
107
- "Predicted Job Category": readable_label,
108
- "Raw Label": raw_label,
109
  "Confidence Score": round(result[0]['score'], 4)
110
  }
111
  return predictions
112
 
113
  # Gradio Interface
114
- with gr.Blocks(title="Resume Analyzer") as demo:
115
- gr.Markdown("## πŸ“‚ Multi-Resume Entity Extractor & Job Classifier\nUpload multiple PDF resumes. This tool extracts entities using NER and predicts the job field using a trained classifier model.")
116
 
117
  with gr.Row():
118
  file_input = gr.File(file_types=[".pdf"], label="Upload Resume PDFs", file_count="multiple")
@@ -122,7 +98,7 @@ with gr.Blocks(title="Resume Analyzer") as demo:
122
  classify_button = gr.Button("🧠 Predict Job Category (Model)")
123
 
124
  output_entities = gr.JSON(label="NER Results & Classification")
125
- output_class = gr.JSON(label="Model-Predicted Job Category")
126
 
127
  extract_button.click(fn=process_resumes, inputs=[file_input], outputs=[output_entities])
128
  classify_button.click(fn=classify_resumes_with_model, inputs=[file_input], outputs=[output_class])
 
7
  # Load NER pipeline
8
  ner_pipeline = pipeline("ner", model="dslim/bert-base-NER", tokenizer="dslim/bert-base-NER", aggregation_strategy="simple")
9
 
10
+ # Load NEW job classifier with human-readable labels
11
+ text_classifier = pipeline("text-classification", model="tkuye/job-description-classifier")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
  def clean_resume_text(text):
 
14
  text = re.sub(r'http\S+', ' ', text)
15
  text = re.sub(r'#\S+', '', text)
16
  text = re.sub(r'@\S+', ' ', text)
 
19
  return re.sub(r'\s+', ' ', text).strip()
20
 
21
  def extract_resume_text(file):
 
22
  try:
23
  reader = PyPDF2.PdfReader(file)
24
  text = ""
 
33
  return None, f"Error reading PDF: {str(e)}"
34
 
35
  def classify_resume_ner(entities):
 
36
  orgs = [e['word'] for e in entities if e['entity_group'] == 'ORG']
37
  locs = [e['word'] for e in entities if e['entity_group'] == 'LOC']
38
  jobs = [e['word'] for e in entities if e['entity_group'] == 'MISC']
 
48
  }
49
 
50
  def process_resumes(files):
 
51
  all_results = {}
52
  for file in files:
53
  file_name = file.name.split("/")[-1]
 
63
  all_results[file_name] = {
64
  "Persons": list({e["word"] for e in entities if e["entity_group"] == "PER"}),
65
  "Organizations": list({e["word"] for e in entities if e["entity_group"] == "ORG"}),
66
+ "Locations": list({e["word"] for e in entities if e["entity_group"] == "LOC"]),
67
  "Other Entities": list({e["word"] for e in entities if e["entity_group"] not in ["PER", "ORG", "LOC"]}),
68
  "Cleaned_Text": cleaned_text,
69
  "Classification (NER)": classification
 
71
  return all_results
72
 
73
  def classify_resumes_with_model(files):
 
74
  predictions = {}
75
  for file in files:
76
  file_name = file.name.split("/")[-1]
 
79
  predictions[file_name] = {"error": error}
80
  continue
81
  cleaned_text = clean_resume_text(resume_text)
82
+ result = text_classifier(cleaned_text[:512])
 
 
 
83
  predictions[file_name] = {
84
+ "Predicted Job Category": result[0]['label'],
 
85
  "Confidence Score": round(result[0]['score'], 4)
86
  }
87
  return predictions
88
 
89
  # Gradio Interface
90
+ with gr.Blocks(title="Resume Analyzer (Readable Labels)") as demo:
91
+ gr.Markdown("## πŸ“‚ Multi-Resume Entity Extractor & Job Classifier\nUpload multiple PDF resumes. This tool extracts entities using NER and predicts job category using a model with readable labels.")
92
 
93
  with gr.Row():
94
  file_input = gr.File(file_types=[".pdf"], label="Upload Resume PDFs", file_count="multiple")
 
98
  classify_button = gr.Button("🧠 Predict Job Category (Model)")
99
 
100
  output_entities = gr.JSON(label="NER Results & Classification")
101
+ output_class = gr.JSON(label="Predicted Job Category (Model)")
102
 
103
  extract_button.click(fn=process_resumes, inputs=[file_input], outputs=[output_entities])
104
  classify_button.click(fn=classify_resumes_with_model, inputs=[file_input], outputs=[output_class])