Spaces:

TKM03
/

RESUME_FILTERING

Sleeping

File size: 4,005 Bytes

102fc06
1d2993e
102fc06
 
 
 
1d2993e
239934b
0ea0dd9
8ab2e60
239934b
1d2993e
0ea0dd9
 
239934b
 
 
 
 
 
 
 
 
 
52a747a
 
239934b
59bc749
 
239934b

import os
import re
import shutil
import PyPDF2
import gradio as gr
from transformers import pipeline

# Load the model
classifier = pipeline("text-classification", model="liberatoratif/BERT-resume-job-recommender", truncation=True)

# Category mapping (expand as needed)
CATEGORY_MAP = {
    "LABEL_0": "Information Technology / Software Engineering",
    "LABEL_1": "Finance / Accounting / Auditing",
    "LABEL_2": "Marketing / Sales / Business Development",
    "LABEL_3": "Engineering / Mechanical / Civil / Electrical",
    "LABEL_4": "Data Science / Machine Learning / AI",
    "LABEL_5": "Human Resources / Recruitment / Talent Acquisition",
    "LABEL_6": "Healthcare / Medical / Nursing",
    "LABEL_7": "Legal / Compliance",
    "LABEL_8": "Education / Training / Teaching",
    "LABEL_9": "Customer Service / Support",
    "LABEL_10": "Operations / Logistics / Supply Chain",
    # Add more if model supports more
}

classified_files_by_category = {}

def clean_text(text):
    text = re.sub(r'http\S+', '', text)
    text = re.sub(r'[^A-Za-z0-9\s]', '', text)
    return re.sub(r'\s+', ' ', text).strip()

def extract_text_from_pdf(file):
    try:
        reader = PyPDF2.PdfReader(file)
        text = " ".join([page.extract_text() or "" for page in reader.pages])
        return clean_text(text), None
    except Exception as e:
        return "", str(e)

def classify_resumes(files):
    global classified_files_by_category
    results = {}
    classified_files_by_category = {}
    
    if os.path.exists("classified_resumes"):
        shutil.rmtree("classified_resumes")
    os.makedirs("classified_resumes", exist_ok=True)

    for file in files:
        filename = os.path.basename(file.name)
        text, error = extract_text_from_pdf(file)
        if error or not text:
            results[filename] = {"error": error or "No text found in PDF"}
            continue

        prediction = classifier(text[:512])[0]
        label = prediction["label"]
        score = round(prediction["score"], 4)
        category = CATEGORY_MAP.get(label, "Other / Miscellaneous")

        results[filename] = {
            "Predicted Job Category": label,
            "Category Name": category,
            "Confidence Score": score
        }

        # Save file by category
        category_dir = f"classified_resumes/{category}"
        os.makedirs(category_dir, exist_ok=True)
        dest_path = os.path.join(category_dir, filename)
        with open(file.name, "rb") as f_in, open(dest_path, "wb") as f_out:
            shutil.copyfileobj(f_in, f_out)

        if category not in classified_files_by_category:
            classified_files_by_category[category] = []
        classified_files_by_category[category].append(dest_path)

    available_categories = sorted(classified_files_by_category.keys())
    return results, gr.update(choices=available_categories, value=available_categories[0] if available_categories else None)

def filter_by_category(selected_category):
    if selected_category and selected_category in classified_files_by_category:
        return classified_files_by_category[selected_category]
    return []

# Gradio UI
with gr.Blocks(title="Resume Classifier by Job Category") as demo:
    gr.Markdown("## 📄 Resume Screening System\nUpload resumes and classify them into job categories.")

    file_input = gr.File(label="Upload Resume PDFs", file_types=[".pdf"], file_count="multiple")
    classify_button = gr.Button("🔍 Classify All Resumes")

    output_json = gr.JSON(label="Classification Result (JSON)")
    category_dropdown = gr.Dropdown(label="Select a Job Category", choices=[], interactive=True)
    resume_output = gr.File(label="Filtered Resumes", file_types=[".pdf"], file_count="multiple")

    classify_button.click(fn=classify_resumes, inputs=[file_input], outputs=[output_json, category_dropdown])
    category_dropdown.change(fn=filter_by_category, inputs=category_dropdown, outputs=resume_output)

if __name__ == "__main__":
    demo.launch()