Spaces:
Sleeping
Sleeping
import os | |
import re | |
import shutil | |
import PyPDF2 | |
import gradio as gr | |
from transformers import pipeline | |
# Load the model | |
classifier = pipeline("text-classification", model="liberatoratif/BERT-resume-job-recommender", truncation=True) | |
# Category mapping (expand as needed) | |
CATEGORY_MAP = { | |
"LABEL_0": "Information Technology / Software Engineering", | |
"LABEL_1": "Finance / Accounting / Auditing", | |
"LABEL_2": "Marketing / Sales / Business Development", | |
"LABEL_3": "Engineering / Mechanical / Civil / Electrical", | |
"LABEL_4": "Data Science / Machine Learning / AI", | |
"LABEL_5": "Human Resources / Recruitment / Talent Acquisition", | |
"LABEL_6": "Healthcare / Medical / Nursing", | |
"LABEL_7": "Legal / Compliance", | |
"LABEL_8": "Education / Training / Teaching", | |
"LABEL_9": "Customer Service / Support", | |
"LABEL_10": "Operations / Logistics / Supply Chain", | |
# Add more if model supports more | |
} | |
classified_files_by_category = {} | |
def clean_text(text): | |
text = re.sub(r'http\S+', '', text) | |
text = re.sub(r'[^A-Za-z0-9\s]', '', text) | |
return re.sub(r'\s+', ' ', text).strip() | |
def extract_text_from_pdf(file): | |
try: | |
reader = PyPDF2.PdfReader(file) | |
text = " ".join([page.extract_text() or "" for page in reader.pages]) | |
return clean_text(text), None | |
except Exception as e: | |
return "", str(e) | |
def classify_resumes(files): | |
global classified_files_by_category | |
results = {} | |
classified_files_by_category = {} | |
if os.path.exists("classified_resumes"): | |
shutil.rmtree("classified_resumes") | |
os.makedirs("classified_resumes", exist_ok=True) | |
for file in files: | |
filename = os.path.basename(file.name) | |
text, error = extract_text_from_pdf(file) | |
if error or not text: | |
results[filename] = {"error": error or "No text found in PDF"} | |
continue | |
prediction = classifier(text[:512])[0] | |
label = prediction["label"] | |
score = round(prediction["score"], 4) | |
category = CATEGORY_MAP.get(label, "Other / Miscellaneous") | |
results[filename] = { | |
"Predicted Job Category": label, | |
"Category Name": category, | |
"Confidence Score": score | |
} | |
# Save file by category | |
category_dir = f"classified_resumes/{category}" | |
os.makedirs(category_dir, exist_ok=True) | |
dest_path = os.path.join(category_dir, filename) | |
with open(file.name, "rb") as f_in, open(dest_path, "wb") as f_out: | |
shutil.copyfileobj(f_in, f_out) | |
if category not in classified_files_by_category: | |
classified_files_by_category[category] = [] | |
classified_files_by_category[category].append(dest_path) | |
available_categories = sorted(classified_files_by_category.keys()) | |
return results, gr.update(choices=available_categories, value=available_categories[0] if available_categories else None) | |
def filter_by_category(selected_category): | |
if selected_category and selected_category in classified_files_by_category: | |
return classified_files_by_category[selected_category] | |
return [] | |
# Gradio UI | |
with gr.Blocks(title="Resume Classifier by Job Category") as demo: | |
gr.Markdown("## π Resume Screening System\nUpload resumes and classify them into job categories.") | |
file_input = gr.File(label="Upload Resume PDFs", file_types=[".pdf"], file_count="multiple") | |
classify_button = gr.Button("π Classify All Resumes") | |
output_json = gr.JSON(label="Classification Result (JSON)") | |
category_dropdown = gr.Dropdown(label="Select a Job Category", choices=[], interactive=True) | |
resume_output = gr.File(label="Filtered Resumes", file_types=[".pdf"], file_count="multiple") | |
classify_button.click(fn=classify_resumes, inputs=[file_input], outputs=[output_json, category_dropdown]) | |
category_dropdown.change(fn=filter_by_category, inputs=category_dropdown, outputs=resume_output) | |
if __name__ == "__main__": | |
demo.launch() | |