TKM03's picture
Update app.py
239934b verified
import os
import re
import shutil
import PyPDF2
import gradio as gr
from transformers import pipeline
# Load the model
classifier = pipeline("text-classification", model="liberatoratif/BERT-resume-job-recommender", truncation=True)
# Category mapping (expand as needed)
CATEGORY_MAP = {
"LABEL_0": "Information Technology / Software Engineering",
"LABEL_1": "Finance / Accounting / Auditing",
"LABEL_2": "Marketing / Sales / Business Development",
"LABEL_3": "Engineering / Mechanical / Civil / Electrical",
"LABEL_4": "Data Science / Machine Learning / AI",
"LABEL_5": "Human Resources / Recruitment / Talent Acquisition",
"LABEL_6": "Healthcare / Medical / Nursing",
"LABEL_7": "Legal / Compliance",
"LABEL_8": "Education / Training / Teaching",
"LABEL_9": "Customer Service / Support",
"LABEL_10": "Operations / Logistics / Supply Chain",
# Add more if model supports more
}
classified_files_by_category = {}
def clean_text(text):
text = re.sub(r'http\S+', '', text)
text = re.sub(r'[^A-Za-z0-9\s]', '', text)
return re.sub(r'\s+', ' ', text).strip()
def extract_text_from_pdf(file):
try:
reader = PyPDF2.PdfReader(file)
text = " ".join([page.extract_text() or "" for page in reader.pages])
return clean_text(text), None
except Exception as e:
return "", str(e)
def classify_resumes(files):
global classified_files_by_category
results = {}
classified_files_by_category = {}
if os.path.exists("classified_resumes"):
shutil.rmtree("classified_resumes")
os.makedirs("classified_resumes", exist_ok=True)
for file in files:
filename = os.path.basename(file.name)
text, error = extract_text_from_pdf(file)
if error or not text:
results[filename] = {"error": error or "No text found in PDF"}
continue
prediction = classifier(text[:512])[0]
label = prediction["label"]
score = round(prediction["score"], 4)
category = CATEGORY_MAP.get(label, "Other / Miscellaneous")
results[filename] = {
"Predicted Job Category": label,
"Category Name": category,
"Confidence Score": score
}
# Save file by category
category_dir = f"classified_resumes/{category}"
os.makedirs(category_dir, exist_ok=True)
dest_path = os.path.join(category_dir, filename)
with open(file.name, "rb") as f_in, open(dest_path, "wb") as f_out:
shutil.copyfileobj(f_in, f_out)
if category not in classified_files_by_category:
classified_files_by_category[category] = []
classified_files_by_category[category].append(dest_path)
available_categories = sorted(classified_files_by_category.keys())
return results, gr.update(choices=available_categories, value=available_categories[0] if available_categories else None)
def filter_by_category(selected_category):
if selected_category and selected_category in classified_files_by_category:
return classified_files_by_category[selected_category]
return []
# Gradio UI
with gr.Blocks(title="Resume Classifier by Job Category") as demo:
gr.Markdown("## πŸ“„ Resume Screening System\nUpload resumes and classify them into job categories.")
file_input = gr.File(label="Upload Resume PDFs", file_types=[".pdf"], file_count="multiple")
classify_button = gr.Button("πŸ” Classify All Resumes")
output_json = gr.JSON(label="Classification Result (JSON)")
category_dropdown = gr.Dropdown(label="Select a Job Category", choices=[], interactive=True)
resume_output = gr.File(label="Filtered Resumes", file_types=[".pdf"], file_count="multiple")
classify_button.click(fn=classify_resumes, inputs=[file_input], outputs=[output_json, category_dropdown])
category_dropdown.change(fn=filter_by_category, inputs=category_dropdown, outputs=resume_output)
if __name__ == "__main__":
demo.launch()