File size: 4,005 Bytes
102fc06
1d2993e
102fc06
 
 
 
1d2993e
239934b
0ea0dd9
8ab2e60
239934b
1d2993e
0ea0dd9
 
239934b
 
 
 
 
 
 
 
 
 
52a747a
 
239934b
59bc749
 
239934b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import os
import re
import shutil
import PyPDF2
import gradio as gr
from transformers import pipeline

# Load the model
classifier = pipeline("text-classification", model="liberatoratif/BERT-resume-job-recommender", truncation=True)

# Category mapping (expand as needed)
CATEGORY_MAP = {
    "LABEL_0": "Information Technology / Software Engineering",
    "LABEL_1": "Finance / Accounting / Auditing",
    "LABEL_2": "Marketing / Sales / Business Development",
    "LABEL_3": "Engineering / Mechanical / Civil / Electrical",
    "LABEL_4": "Data Science / Machine Learning / AI",
    "LABEL_5": "Human Resources / Recruitment / Talent Acquisition",
    "LABEL_6": "Healthcare / Medical / Nursing",
    "LABEL_7": "Legal / Compliance",
    "LABEL_8": "Education / Training / Teaching",
    "LABEL_9": "Customer Service / Support",
    "LABEL_10": "Operations / Logistics / Supply Chain",
    # Add more if model supports more
}

classified_files_by_category = {}

def clean_text(text):
    text = re.sub(r'http\S+', '', text)
    text = re.sub(r'[^A-Za-z0-9\s]', '', text)
    return re.sub(r'\s+', ' ', text).strip()

def extract_text_from_pdf(file):
    try:
        reader = PyPDF2.PdfReader(file)
        text = " ".join([page.extract_text() or "" for page in reader.pages])
        return clean_text(text), None
    except Exception as e:
        return "", str(e)

def classify_resumes(files):
    global classified_files_by_category
    results = {}
    classified_files_by_category = {}
    
    if os.path.exists("classified_resumes"):
        shutil.rmtree("classified_resumes")
    os.makedirs("classified_resumes", exist_ok=True)

    for file in files:
        filename = os.path.basename(file.name)
        text, error = extract_text_from_pdf(file)
        if error or not text:
            results[filename] = {"error": error or "No text found in PDF"}
            continue

        prediction = classifier(text[:512])[0]
        label = prediction["label"]
        score = round(prediction["score"], 4)
        category = CATEGORY_MAP.get(label, "Other / Miscellaneous")

        results[filename] = {
            "Predicted Job Category": label,
            "Category Name": category,
            "Confidence Score": score
        }

        # Save file by category
        category_dir = f"classified_resumes/{category}"
        os.makedirs(category_dir, exist_ok=True)
        dest_path = os.path.join(category_dir, filename)
        with open(file.name, "rb") as f_in, open(dest_path, "wb") as f_out:
            shutil.copyfileobj(f_in, f_out)

        if category not in classified_files_by_category:
            classified_files_by_category[category] = []
        classified_files_by_category[category].append(dest_path)

    available_categories = sorted(classified_files_by_category.keys())
    return results, gr.update(choices=available_categories, value=available_categories[0] if available_categories else None)

def filter_by_category(selected_category):
    if selected_category and selected_category in classified_files_by_category:
        return classified_files_by_category[selected_category]
    return []

# Gradio UI
with gr.Blocks(title="Resume Classifier by Job Category") as demo:
    gr.Markdown("## πŸ“„ Resume Screening System\nUpload resumes and classify them into job categories.")

    file_input = gr.File(label="Upload Resume PDFs", file_types=[".pdf"], file_count="multiple")
    classify_button = gr.Button("πŸ” Classify All Resumes")

    output_json = gr.JSON(label="Classification Result (JSON)")
    category_dropdown = gr.Dropdown(label="Select a Job Category", choices=[], interactive=True)
    resume_output = gr.File(label="Filtered Resumes", file_types=[".pdf"], file_count="multiple")

    classify_button.click(fn=classify_resumes, inputs=[file_input], outputs=[output_json, category_dropdown])
    category_dropdown.change(fn=filter_by_category, inputs=category_dropdown, outputs=resume_output)

if __name__ == "__main__":
    demo.launch()