import os import re import shutil import PyPDF2 import gradio as gr from transformers import pipeline # Load the model classifier = pipeline("text-classification", model="liberatoratif/BERT-resume-job-recommender", truncation=True) # Category mapping (expand as needed) CATEGORY_MAP = { "LABEL_0": "Information Technology / Software Engineering", "LABEL_1": "Finance / Accounting / Auditing", "LABEL_2": "Marketing / Sales / Business Development", "LABEL_3": "Engineering / Mechanical / Civil / Electrical", "LABEL_4": "Data Science / Machine Learning / AI", "LABEL_5": "Human Resources / Recruitment / Talent Acquisition", "LABEL_6": "Healthcare / Medical / Nursing", "LABEL_7": "Legal / Compliance", "LABEL_8": "Education / Training / Teaching", "LABEL_9": "Customer Service / Support", "LABEL_10": "Operations / Logistics / Supply Chain", # Add more if model supports more } classified_files_by_category = {} def clean_text(text): text = re.sub(r'http\S+', '', text) text = re.sub(r'[^A-Za-z0-9\s]', '', text) return re.sub(r'\s+', ' ', text).strip() def extract_text_from_pdf(file): try: reader = PyPDF2.PdfReader(file) text = " ".join([page.extract_text() or "" for page in reader.pages]) return clean_text(text), None except Exception as e: return "", str(e) def classify_resumes(files): global classified_files_by_category results = {} classified_files_by_category = {} if os.path.exists("classified_resumes"): shutil.rmtree("classified_resumes") os.makedirs("classified_resumes", exist_ok=True) for file in files: filename = os.path.basename(file.name) text, error = extract_text_from_pdf(file) if error or not text: results[filename] = {"error": error or "No text found in PDF"} continue prediction = classifier(text[:512])[0] label = prediction["label"] score = round(prediction["score"], 4) category = CATEGORY_MAP.get(label, "Other / Miscellaneous") results[filename] = { "Predicted Job Category": label, "Category Name": category, "Confidence Score": score } # Save file by category category_dir = f"classified_resumes/{category}" os.makedirs(category_dir, exist_ok=True) dest_path = os.path.join(category_dir, filename) with open(file.name, "rb") as f_in, open(dest_path, "wb") as f_out: shutil.copyfileobj(f_in, f_out) if category not in classified_files_by_category: classified_files_by_category[category] = [] classified_files_by_category[category].append(dest_path) available_categories = sorted(classified_files_by_category.keys()) return results, gr.update(choices=available_categories, value=available_categories[0] if available_categories else None) def filter_by_category(selected_category): if selected_category and selected_category in classified_files_by_category: return classified_files_by_category[selected_category] return [] # Gradio UI with gr.Blocks(title="Resume Classifier by Job Category") as demo: gr.Markdown("## 📄 Resume Screening System\nUpload resumes and classify them into job categories.") file_input = gr.File(label="Upload Resume PDFs", file_types=[".pdf"], file_count="multiple") classify_button = gr.Button("🔍 Classify All Resumes") output_json = gr.JSON(label="Classification Result (JSON)") category_dropdown = gr.Dropdown(label="Select a Job Category", choices=[], interactive=True) resume_output = gr.File(label="Filtered Resumes", file_types=[".pdf"], file_count="multiple") classify_button.click(fn=classify_resumes, inputs=[file_input], outputs=[output_json, category_dropdown]) category_dropdown.change(fn=filter_by_category, inputs=category_dropdown, outputs=resume_output) if __name__ == "__main__": demo.launch()