Spaces:
Sleeping
Sleeping
| import re | |
| import os | |
| import shutil | |
| import PyPDF2 | |
| import gradio as gr | |
| from transformers import pipeline | |
| # Load classification model | |
| text_classifier = pipeline("text-classification", model="saattrupdan/job-listing-filtering-model") | |
| # Label mapping for binary classification | |
| LABEL_MAP = { | |
| "LABEL_0": "Irrelevant", | |
| "LABEL_1": "Relevant" | |
| } | |
| # Global variable to store the filtered files per label | |
| classified_files = { | |
| "Relevant": [], | |
| "Irrelevant": [] | |
| } | |
| def clean_resume_text(text): | |
| text = re.sub(r'http\S+', ' ', text) | |
| text = re.sub(r'#\S+', '', text) | |
| text = re.sub(r'@\S+', ' ', text) | |
| text = re.sub(r'[^\w\s]', ' ', text) | |
| text = re.sub(r'[^\x00-\x7f]', ' ', text) | |
| return re.sub(r'\s+', ' ', text).strip() | |
| def extract_resume_text(file): | |
| try: | |
| reader = PyPDF2.PdfReader(file) | |
| text = "" | |
| for page in reader.pages: | |
| page_text = page.extract_text() | |
| if page_text: | |
| text += page_text + " " | |
| return text, None if text.strip() else "No text found in PDF" | |
| except Exception as e: | |
| return None, f"Error reading PDF: {str(e)}" | |
| def classify_and_store(files): | |
| predictions = {} | |
| classified_files["Relevant"] = [] | |
| classified_files["Irrelevant"] = [] | |
| if os.path.exists("filtered_resumes"): | |
| shutil.rmtree("filtered_resumes") | |
| os.makedirs("filtered_resumes", exist_ok=True) | |
| for file in files: | |
| file_name = os.path.basename(file.name) | |
| resume_text, error = extract_resume_text(file) | |
| if error: | |
| predictions[file_name] = {"error": error} | |
| continue | |
| cleaned_text = clean_resume_text(resume_text) | |
| result = text_classifier(cleaned_text[:512]) | |
| label = result[0]['label'] | |
| score = round(result[0]['score'], 4) | |
| status = LABEL_MAP.get(label, "Unknown") | |
| predictions[file_name] = { | |
| "Relevance": status, | |
| "Confidence Score": score | |
| } | |
| dest_path = f"filtered_resumes/{file_name}" | |
| with open(file.name, "rb") as f_in, open(dest_path, "wb") as f_out: | |
| shutil.copyfileobj(f_in, f_out) | |
| classified_files[status].append(dest_path) | |
| return predictions | |
| def get_resumes_by_category(category): | |
| return classified_files.get(category, []) | |
| # Gradio UI | |
| with gr.Blocks(title="Resume Classifier & Category Filter") as demo: | |
| gr.Markdown("## π Resume Relevance Classifier\nUpload resumes and view based on relevance category.") | |
| file_input = gr.File(file_types=[".pdf"], file_count="multiple", label="Upload Resume PDFs") | |
| classify_button = gr.Button("π§ Classify Resumes") | |
| relevance_output = gr.JSON(label="Classification Results") | |
| category_dropdown = gr.Dropdown(choices=["Relevant", "Irrelevant"], label="Select Resume Category to View") | |
| filtered_files_output = gr.File(label="Filtered Resumes", file_types=[".pdf"], file_count="multiple") | |
| classify_button.click(fn=classify_and_store, inputs=[file_input], outputs=[relevance_output]) | |
| category_dropdown.change(fn=get_resumes_by_category, inputs=[category_dropdown], outputs=[filtered_files_output]) | |
| if __name__ == "__main__": | |
| demo.launch() | |