TKM03 commited on
Commit
9f062d8
Β·
verified Β·
1 Parent(s): 3453a71

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -14
app.py CHANGED
@@ -4,14 +4,29 @@ import gradio as gr
4
  from transformers import pipeline
5
  from collections import Counter
6
 
7
- # Load NER pipeline for entity extraction
8
  ner_pipeline = pipeline("ner", model="dslim/bert-base-NER", tokenizer="dslim/bert-base-NER", aggregation_strategy="simple")
9
 
10
  # Load Job Category Classifier
11
  text_classifier = pipeline("text-classification", model="serbog/distilbert-jobCategory_410k")
12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  def clean_resume_text(text):
14
- """Clean text by removing URLs, punctuation, non-ASCII chars."""
15
  text = re.sub(r'http\S+', ' ', text)
16
  text = re.sub(r'#\S+', '', text)
17
  text = re.sub(r'@\S+', ' ', text)
@@ -35,7 +50,7 @@ def extract_resume_text(file):
35
  return None, f"Error reading PDF: {str(e)}"
36
 
37
  def classify_resume_ner(entities):
38
- """Classify by extracting key orgs and locations from NER output."""
39
  orgs = [e['word'] for e in entities if e['entity_group'] == 'ORG']
40
  locs = [e['word'] for e in entities if e['entity_group'] == 'LOC']
41
  jobs = [e['word'] for e in entities if e['entity_group'] == 'MISC']
@@ -51,12 +66,11 @@ def classify_resume_ner(entities):
51
  }
52
 
53
  def process_resumes(files):
54
- """Process multiple resumes with NER and classification."""
55
  all_results = {}
56
  for file in files:
57
  file_name = file.name.split("/")[-1]
58
  resume_text, error = extract_resume_text(file)
59
-
60
  if error:
61
  all_results[file_name] = {"error": error}
62
  continue
@@ -76,7 +90,7 @@ def process_resumes(files):
76
  return all_results
77
 
78
  def classify_resumes_with_model(files):
79
- """Use job category model to predict the field/role."""
80
  predictions = {}
81
  for file in files:
82
  file_name = file.name.split("/")[-1]
@@ -85,26 +99,30 @@ def classify_resumes_with_model(files):
85
  predictions[file_name] = {"error": error}
86
  continue
87
  cleaned_text = clean_resume_text(resume_text)
88
- result = text_classifier(cleaned_text[:512]) # Truncate if too long
 
 
 
89
  predictions[file_name] = {
90
- "Predicted Job Category": result[0]['label'].replace("_", " ").title(),
 
91
  "Confidence Score": round(result[0]['score'], 4)
92
  }
93
  return predictions
94
 
95
  # Gradio Interface
96
  with gr.Blocks(title="Resume Analyzer") as demo:
97
- gr.Markdown("## πŸ“‚ Multi-Resume Entity Extractor & Job Category Classifier\nUpload multiple PDF resumes. This tool uses NER to extract info and a job classification model to predict job field/category.")
98
 
99
  with gr.Row():
100
- file_input = gr.File(file_types=[".pdf"], label="Upload Resumes (PDF)", file_count="multiple")
101
 
102
  with gr.Row():
103
- extract_button = gr.Button("πŸ” Extract Entities")
104
- classify_button = gr.Button("🧠 Predict Job Category")
105
 
106
- output_entities = gr.JSON(label="Entity Results & NER Classification")
107
- output_class = gr.JSON(label="Predicted Job Category (Model)")
108
 
109
  extract_button.click(fn=process_resumes, inputs=[file_input], outputs=[output_entities])
110
  classify_button.click(fn=classify_resumes_with_model, inputs=[file_input], outputs=[output_class])
 
4
  from transformers import pipeline
5
  from collections import Counter
6
 
7
+ # Load NER pipeline
8
  ner_pipeline = pipeline("ner", model="dslim/bert-base-NER", tokenizer="dslim/bert-base-NER", aggregation_strategy="simple")
9
 
10
  # Load Job Category Classifier
11
  text_classifier = pipeline("text-classification", model="serbog/distilbert-jobCategory_410k")
12
 
13
+ # Mapping from category code to readable label
14
+ CATEGORY_MAP = {
15
+ "C1": "Engineering",
16
+ "C2": "Information Technology",
17
+ "C3": "Sales & Marketing",
18
+ "C4": "Accounting & Finance",
19
+ "C5": "Healthcare",
20
+ "D1": "Education",
21
+ "D2": "Human Resources",
22
+ "E1": "Operations & Logistics",
23
+ "E2": "Legal",
24
+ "F1": "Customer Support",
25
+ "Other": "General / Undefined"
26
+ }
27
+
28
  def clean_resume_text(text):
29
+ """Clean text by removing unwanted characters and formatting."""
30
  text = re.sub(r'http\S+', ' ', text)
31
  text = re.sub(r'#\S+', '', text)
32
  text = re.sub(r'@\S+', ' ', text)
 
50
  return None, f"Error reading PDF: {str(e)}"
51
 
52
  def classify_resume_ner(entities):
53
+ """Basic rule-based NER classification using ORG, LOC, MISC."""
54
  orgs = [e['word'] for e in entities if e['entity_group'] == 'ORG']
55
  locs = [e['word'] for e in entities if e['entity_group'] == 'LOC']
56
  jobs = [e['word'] for e in entities if e['entity_group'] == 'MISC']
 
66
  }
67
 
68
  def process_resumes(files):
69
+ """Extract entities and show classification based on NER."""
70
  all_results = {}
71
  for file in files:
72
  file_name = file.name.split("/")[-1]
73
  resume_text, error = extract_resume_text(file)
 
74
  if error:
75
  all_results[file_name] = {"error": error}
76
  continue
 
90
  return all_results
91
 
92
  def classify_resumes_with_model(files):
93
+ """Use job category model to classify resume into readable job field."""
94
  predictions = {}
95
  for file in files:
96
  file_name = file.name.split("/")[-1]
 
99
  predictions[file_name] = {"error": error}
100
  continue
101
  cleaned_text = clean_resume_text(resume_text)
102
+ result = text_classifier(cleaned_text[:512]) # Truncate for safety
103
+ raw_label = result[0]['label']
104
+ readable_label = CATEGORY_MAP.get(raw_label, "Unknown")
105
+
106
  predictions[file_name] = {
107
+ "Predicted Job Category": readable_label,
108
+ "Raw Label": raw_label,
109
  "Confidence Score": round(result[0]['score'], 4)
110
  }
111
  return predictions
112
 
113
  # Gradio Interface
114
  with gr.Blocks(title="Resume Analyzer") as demo:
115
+ gr.Markdown("## πŸ“‚ Multi-Resume Entity Extractor & Job Classifier\nUpload multiple PDF resumes. This tool extracts entities using NER and predicts the job field using a trained classifier model.")
116
 
117
  with gr.Row():
118
+ file_input = gr.File(file_types=[".pdf"], label="Upload Resume PDFs", file_count="multiple")
119
 
120
  with gr.Row():
121
+ extract_button = gr.Button("πŸ” Extract Entities (NER)")
122
+ classify_button = gr.Button("🧠 Predict Job Category (Model)")
123
 
124
+ output_entities = gr.JSON(label="NER Results & Classification")
125
+ output_class = gr.JSON(label="Model-Predicted Job Category")
126
 
127
  extract_button.click(fn=process_resumes, inputs=[file_input], outputs=[output_entities])
128
  classify_button.click(fn=classify_resumes_with_model, inputs=[file_input], outputs=[output_class])