File size: 3,209 Bytes
102fc06
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52a747a
 
 
 
 
 
 
102fc06
 
 
 
 
 
 
 
52a747a
102fc06
 
 
 
 
 
 
 
 
 
 
 
52a747a
 
102fc06
52a747a
 
102fc06
 
 
 
 
 
90f398d
102fc06
 
 
 
 
 
 
 
 
 
90f398d
102fc06
 
 
 
 
52a747a
 
 
102fc06
52a747a
102fc06
52a747a
 
 
 
 
5bda631
90f398d
102fc06
52a747a
 
102fc06
 
52a747a
102fc06
 
 
52a747a
 
 
 
 
102fc06
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import re
import os
import shutil
import PyPDF2
import gradio as gr
from transformers import pipeline

# Load classification model
text_classifier = pipeline("text-classification", model="saattrupdan/job-listing-filtering-model")

# Label mapping for binary classification
LABEL_MAP = {
    "LABEL_0": "Irrelevant",
    "LABEL_1": "Relevant"
}

# Global variable to store the filtered files per label
classified_files = {
    "Relevant": [],
    "Irrelevant": []
}


def clean_resume_text(text):
    text = re.sub(r'http\S+', ' ', text)
    text = re.sub(r'#\S+', '', text)
    text = re.sub(r'@\S+', ' ', text)
    text = re.sub(r'[^\w\s]', ' ', text)
    text = re.sub(r'[^\x00-\x7f]', ' ', text)
    return re.sub(r'\s+', ' ', text).strip()


def extract_resume_text(file):
    try:
        reader = PyPDF2.PdfReader(file)
        text = ""
        for page in reader.pages:
            page_text = page.extract_text()
            if page_text:
                text += page_text + " "
        return text, None if text.strip() else "No text found in PDF"
    except Exception as e:
        return None, f"Error reading PDF: {str(e)}"


def classify_and_store(files):
    predictions = {}
    classified_files["Relevant"] = []
    classified_files["Irrelevant"] = []

    if os.path.exists("filtered_resumes"):
        shutil.rmtree("filtered_resumes")
    os.makedirs("filtered_resumes", exist_ok=True)

    for file in files:
        file_name = os.path.basename(file.name)
        resume_text, error = extract_resume_text(file)
        if error:
            predictions[file_name] = {"error": error}
            continue

        cleaned_text = clean_resume_text(resume_text)
        result = text_classifier(cleaned_text[:512])
        label = result[0]['label']
        score = round(result[0]['score'], 4)
        status = LABEL_MAP.get(label, "Unknown")

        predictions[file_name] = {
            "Relevance": status,
            "Confidence Score": score
        }

        dest_path = f"filtered_resumes/{file_name}"
        with open(file.name, "rb") as f_in, open(dest_path, "wb") as f_out:
            shutil.copyfileobj(f_in, f_out)

        classified_files[status].append(dest_path)

    return predictions


def get_resumes_by_category(category):
    return classified_files.get(category, [])


# Gradio UI
with gr.Blocks(title="Resume Classifier & Category Filter") as demo:
    gr.Markdown("## πŸ“‚ Resume Relevance Classifier\nUpload resumes and view based on relevance category.")

    file_input = gr.File(file_types=[".pdf"], file_count="multiple", label="Upload Resume PDFs")
    classify_button = gr.Button("🧠 Classify Resumes")

    relevance_output = gr.JSON(label="Classification Results")

    category_dropdown = gr.Dropdown(choices=["Relevant", "Irrelevant"], label="Select Resume Category to View")
    filtered_files_output = gr.File(label="Filtered Resumes", file_types=[".pdf"], file_count="multiple")

    classify_button.click(fn=classify_and_store, inputs=[file_input], outputs=[relevance_output])
    category_dropdown.change(fn=get_resumes_by_category, inputs=[category_dropdown], outputs=[filtered_files_output])

if __name__ == "__main__":
    demo.launch()