TKM03 commited on
Commit
cc2242c
Β·
verified Β·
1 Parent(s): 8bfd778

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -7
app.py CHANGED
@@ -4,11 +4,29 @@ import gradio as gr
4
  from transformers import pipeline
5
  from collections import Counter
6
 
7
- # Load pipelines
8
  ner_pipeline = pipeline("ner", model="dslim/bert-base-NER", tokenizer="dslim/bert-base-NER", aggregation_strategy="simple")
9
- text_classifier = pipeline("text-classification", model="tkuye/job-description-classifier")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
  def clean_resume_text(text):
 
12
  text = re.sub(r'http\S+', ' ', text)
13
  text = re.sub(r'#\S+', '', text)
14
  text = re.sub(r'@\S+', ' ', text)
@@ -17,6 +35,7 @@ def clean_resume_text(text):
17
  return re.sub(r'\s+', ' ', text).strip()
18
 
19
  def extract_resume_text(file):
 
20
  try:
21
  reader = PyPDF2.PdfReader(file)
22
  text = ""
@@ -31,6 +50,7 @@ def extract_resume_text(file):
31
  return None, f"Error reading PDF: {str(e)}"
32
 
33
  def classify_resume_ner(entities):
 
34
  orgs = [e['word'] for e in entities if e['entity_group'] == 'ORG']
35
  locs = [e['word'] for e in entities if e['entity_group'] == 'LOC']
36
  jobs = [e['word'] for e in entities if e['entity_group'] == 'MISC']
@@ -46,6 +66,7 @@ def classify_resume_ner(entities):
46
  }
47
 
48
  def process_resumes(files):
 
49
  all_results = {}
50
  for file in files:
51
  file_name = file.name.split("/")[-1]
@@ -69,6 +90,7 @@ def process_resumes(files):
69
  return all_results
70
 
71
  def classify_resumes_with_model(files):
 
72
  predictions = {}
73
  for file in files:
74
  file_name = file.name.split("/")[-1]
@@ -77,16 +99,20 @@ def classify_resumes_with_model(files):
77
  predictions[file_name] = {"error": error}
78
  continue
79
  cleaned_text = clean_resume_text(resume_text)
80
- result = text_classifier(cleaned_text[:512]) # Truncate long resumes
 
 
 
81
  predictions[file_name] = {
82
- "Predicted Job Category": result[0]['label'],
 
83
  "Confidence Score": round(result[0]['score'], 4)
84
  }
85
  return predictions
86
 
87
  # Gradio Interface
88
- with gr.Blocks(title="Resume Analyzer (Readable Labels)") as demo:
89
- gr.Markdown("## πŸ“‚ Multi-Resume Entity Extractor & Job Classifier\nUpload multiple PDF resumes. This tool extracts entities using NER and predicts job category using a model with readable labels.")
90
 
91
  with gr.Row():
92
  file_input = gr.File(file_types=[".pdf"], label="Upload Resume PDFs", file_count="multiple")
@@ -96,7 +122,7 @@ with gr.Blocks(title="Resume Analyzer (Readable Labels)") as demo:
96
  classify_button = gr.Button("🧠 Predict Job Category (Model)")
97
 
98
  output_entities = gr.JSON(label="NER Results & Classification")
99
- output_class = gr.JSON(label="Predicted Job Category (Model)")
100
 
101
  extract_button.click(fn=process_resumes, inputs=[file_input], outputs=[output_entities])
102
  classify_button.click(fn=classify_resumes_with_model, inputs=[file_input], outputs=[output_class])
 
4
  from transformers import pipeline
5
  from collections import Counter
6
 
7
+ # Load NER pipeline
8
  ner_pipeline = pipeline("ner", model="dslim/bert-base-NER", tokenizer="dslim/bert-base-NER", aggregation_strategy="simple")
9
+
10
+ # Load Job Category Classifier
11
+ text_classifier = pipeline("text-classification", model="serbog/distilbert-jobCategory_410k")
12
+
13
+ # Mapping from category code to readable label
14
+ CATEGORY_MAP = {
15
+ "C1": "Engineering",
16
+ "C2": "Information Technology",
17
+ "C3": "Sales & Marketing",
18
+ "C4": "Accounting & Finance",
19
+ "C5": "Healthcare",
20
+ "D1": "Education",
21
+ "D2": "Human Resources",
22
+ "E1": "Operations & Logistics",
23
+ "E2": "Legal",
24
+ "F1": "Customer Support",
25
+ "Other": "General / Undefined"
26
+ }
27
 
28
  def clean_resume_text(text):
29
+ """Clean text by removing unwanted characters and formatting."""
30
  text = re.sub(r'http\S+', ' ', text)
31
  text = re.sub(r'#\S+', '', text)
32
  text = re.sub(r'@\S+', ' ', text)
 
35
  return re.sub(r'\s+', ' ', text).strip()
36
 
37
  def extract_resume_text(file):
38
+ """Extract raw text from uploaded PDF."""
39
  try:
40
  reader = PyPDF2.PdfReader(file)
41
  text = ""
 
50
  return None, f"Error reading PDF: {str(e)}"
51
 
52
  def classify_resume_ner(entities):
53
+ """Basic rule-based NER classification using ORG, LOC, MISC."""
54
  orgs = [e['word'] for e in entities if e['entity_group'] == 'ORG']
55
  locs = [e['word'] for e in entities if e['entity_group'] == 'LOC']
56
  jobs = [e['word'] for e in entities if e['entity_group'] == 'MISC']
 
66
  }
67
 
68
  def process_resumes(files):
69
+ """Extract entities and show classification based on NER."""
70
  all_results = {}
71
  for file in files:
72
  file_name = file.name.split("/")[-1]
 
90
  return all_results
91
 
92
  def classify_resumes_with_model(files):
93
+ """Use job category model to classify resume into readable job field."""
94
  predictions = {}
95
  for file in files:
96
  file_name = file.name.split("/")[-1]
 
99
  predictions[file_name] = {"error": error}
100
  continue
101
  cleaned_text = clean_resume_text(resume_text)
102
+ result = text_classifier(cleaned_text[:512]) # Truncate for safety
103
+ raw_label = result[0]['label']
104
+ readable_label = CATEGORY_MAP.get(raw_label, "Unknown")
105
+
106
  predictions[file_name] = {
107
+ "Predicted Job Category": readable_label,
108
+ "Raw Label": raw_label,
109
  "Confidence Score": round(result[0]['score'], 4)
110
  }
111
  return predictions
112
 
113
  # Gradio Interface
114
+ with gr.Blocks(title="Resume Analyzer") as demo:
115
+ gr.Markdown("## πŸ“‚ Multi-Resume Entity Extractor & Job Classifier\nUpload multiple PDF resumes. This tool extracts entities using NER and predicts the job field using a trained classifier model.")
116
 
117
  with gr.Row():
118
  file_input = gr.File(file_types=[".pdf"], label="Upload Resume PDFs", file_count="multiple")
 
122
  classify_button = gr.Button("🧠 Predict Job Category (Model)")
123
 
124
  output_entities = gr.JSON(label="NER Results & Classification")
125
+ output_class = gr.JSON(label="Model-Predicted Job Category")
126
 
127
  extract_button.click(fn=process_resumes, inputs=[file_input], outputs=[output_entities])
128
  classify_button.click(fn=classify_resumes_with_model, inputs=[file_input], outputs=[output_class])