TKM03's picture
Update app.py
52a747a verified
raw
history blame
3.21 kB
import re
import os
import shutil
import PyPDF2
import gradio as gr
from transformers import pipeline
# Load classification model
text_classifier = pipeline("text-classification", model="saattrupdan/job-listing-filtering-model")
# Label mapping for binary classification
LABEL_MAP = {
"LABEL_0": "Irrelevant",
"LABEL_1": "Relevant"
}
# Global variable to store the filtered files per label
classified_files = {
"Relevant": [],
"Irrelevant": []
}
def clean_resume_text(text):
text = re.sub(r'http\S+', ' ', text)
text = re.sub(r'#\S+', '', text)
text = re.sub(r'@\S+', ' ', text)
text = re.sub(r'[^\w\s]', ' ', text)
text = re.sub(r'[^\x00-\x7f]', ' ', text)
return re.sub(r'\s+', ' ', text).strip()
def extract_resume_text(file):
try:
reader = PyPDF2.PdfReader(file)
text = ""
for page in reader.pages:
page_text = page.extract_text()
if page_text:
text += page_text + " "
return text, None if text.strip() else "No text found in PDF"
except Exception as e:
return None, f"Error reading PDF: {str(e)}"
def classify_and_store(files):
predictions = {}
classified_files["Relevant"] = []
classified_files["Irrelevant"] = []
if os.path.exists("filtered_resumes"):
shutil.rmtree("filtered_resumes")
os.makedirs("filtered_resumes", exist_ok=True)
for file in files:
file_name = os.path.basename(file.name)
resume_text, error = extract_resume_text(file)
if error:
predictions[file_name] = {"error": error}
continue
cleaned_text = clean_resume_text(resume_text)
result = text_classifier(cleaned_text[:512])
label = result[0]['label']
score = round(result[0]['score'], 4)
status = LABEL_MAP.get(label, "Unknown")
predictions[file_name] = {
"Relevance": status,
"Confidence Score": score
}
dest_path = f"filtered_resumes/{file_name}"
with open(file.name, "rb") as f_in, open(dest_path, "wb") as f_out:
shutil.copyfileobj(f_in, f_out)
classified_files[status].append(dest_path)
return predictions
def get_resumes_by_category(category):
return classified_files.get(category, [])
# Gradio UI
with gr.Blocks(title="Resume Classifier & Category Filter") as demo:
gr.Markdown("## πŸ“‚ Resume Relevance Classifier\nUpload resumes and view based on relevance category.")
file_input = gr.File(file_types=[".pdf"], file_count="multiple", label="Upload Resume PDFs")
classify_button = gr.Button("🧠 Classify Resumes")
relevance_output = gr.JSON(label="Classification Results")
category_dropdown = gr.Dropdown(choices=["Relevant", "Irrelevant"], label="Select Resume Category to View")
filtered_files_output = gr.File(label="Filtered Resumes", file_types=[".pdf"], file_count="multiple")
classify_button.click(fn=classify_and_store, inputs=[file_input], outputs=[relevance_output])
category_dropdown.change(fn=get_resumes_by_category, inputs=[category_dropdown], outputs=[filtered_files_output])
if __name__ == "__main__":
demo.launch()