Spaces:

TKM03
/

RESUME_FILTERING

Sleeping

App Files Files Community

TKM03 commited on Jul 2

Commit

102fc06

verified ·

1 Parent(s): dcb6d86

Create app.py

Browse files

Files changed (1) hide show

app.py +88 -0

app.py ADDED Viewed

	@@ -0,0 +1,88 @@

+import re
+import os
+import shutil
+import PyPDF2
+import gradio as gr
+from transformers import pipeline
+# Load classification model
+text_classifier = pipeline("text-classification", model="saattrupdan/job-listing-filtering-model")
+# Label mapping for binary classification
+LABEL_MAP = {
+    "LABEL_0": "Irrelevant",
+    "LABEL_1": "Relevant"
+}
+def clean_resume_text(text):
+    text = re.sub(r'http\S+', ' ', text)
+    text = re.sub(r'#\S+', '', text)
+    text = re.sub(r'@\S+', ' ', text)
+    text = re.sub(r'[^\w\s]', ' ', text)
+    text = re.sub(r'[^\x00-\x7f]', ' ', text)
+    return re.sub(r'\s+', ' ', text).strip()
+def extract_resume_text(file):
+    try:
+        reader = PyPDF2.PdfReader(file)
+        text = ""
+        for page in reader.pages:
+            page_text = page.extract_text()
+            if page_text:
+                text += page_text + " "
+        return text, None if text.strip() else "No text found in PDF"
+    except Exception as e:
+        return None, f"Error reading PDF: {str(e)}"
+def filter_relevant_resumes(files):
+    predictions = {}
+    relevant_files = []
+    # Create temp dir for filtered files
+    if os.path.exists("filtered_resumes"):
+        shutil.rmtree("filtered_resumes")
+    os.makedirs("filtered_resumes", exist_ok=True)
+    for file in files:
+        file_name = file.name.split("/")[-1]
+        resume_text, error = extract_resume_text(file)
+        if error:
+            predictions[file_name] = {"error": error}
+            continue
+        cleaned_text = clean_resume_text(resume_text)
+        result = text_classifier(cleaned_text[:512])
+        label = result[0]['label']
+        score = round(result[0]['score'], 4)
+        status = LABEL_MAP.get(label, "Unknown")
+        predictions[file_name] = {
+            "Relevance": status,
+            "Confidence Score": score
+        }
+        if status == "Relevant":
+            # Copy file to filtered folder
+            dest_path = f"filtered_resumes/{file_name}"
+            with open(dest_path, "wb") as f_out:
+                f_out.write(file.read())
+            relevant_files.append(dest_path)
+    return predictions, relevant_files
+# Gradio UI
+with gr.Blocks(title="Resume Relevance Classifier & Filter") as demo:
+    gr.Markdown("## 📂 Resume Relevance Filter using Hugging Face Model\nUpload PDF resumes and filter out only the relevant ones.")
+    file_input = gr.File(file_types=[".pdf"], file_count="multiple", label="Upload Resume PDFs")
+    with gr.Row():
+        classify_button = gr.Button("🧠 Classify and Filter Relevant Resumes")
+    relevance_output = gr.JSON(label="Classification Results")
+    relevant_resume_gallery = gr.File(label="Download Relevant Resumes", file_types=[".pdf"], file_count="multiple")
+    classify_button.click(fn=filter_relevant_resumes, inputs=[file_input], outputs=[relevance_output, relevant_resume_gallery])
+if __name__ == "__main__":
+    demo.launch()