Spaces:

TKM03
/

RESUME_FILTERING

Sleeping

App Files Files Community

TKM03 commited on Jul 2

Commit

8ab2e60

verified ·

1 Parent(s): 59bc749

Update app.py

Browse files

Files changed (1) hide show

app.py +69 -91

app.py CHANGED Viewed

@@ -5,137 +5,115 @@ import PyPDF2
 import gradio as gr
 from transformers import pipeline
-# ------------------- Category Mapping -------------------
 CATEGORY_MAP = {
-    "C0": "Administration / Clerical",
-    "C1": "Agriculture / Environmental / Forestry",
-    "C2": "Information Technology / Software Engineering",
-    "C3": "Data Science / Machine Learning / AI",
-    "C4": "Finance / Accounting / Auditing",
-    "C5": "Human Resources / Recruitment / Talent Acquisition",
-    "C6": "Sales / Marketing / Business Development",
-    "C7": "Engineering / Mechanical / Civil / Electrical",
-    "C8": "Customer Service / Support",
-    "C9": "Design / UI-UX / Creative",
-    "C10": "Healthcare / Medical / Nursing",
-    "C11": "Education / Training / Teaching",
-    "C12": "Retail / Merchandising / E-commerce",
-    "C13": "Telecommunication / Network Engineering",
-    "C14": "Operations / Logistics / Supply Chain",
-    "C15": "Entrepreneurship / Startups / Freelancing",
-    "C16": "Product Management / Project Management",
-    "C17": "Legal / Compliance",
-    "C18": "Real Estate / Property Management",
-    "C19": "Transportation / Automotive / Aviation",
-    "C20": "Construction / Architecture",
-    "C21": "Energy / Oil & Gas / Utilities",
-    "C22": "Security / Safety",
-    "C23": "Procurement / Purchasing",
-    "C24": "Manufacturing / Production",
-    "C25": "Media / Communication / PR / Journalism",
-    "C26": "Science / Research / R&D",
-    "C27": "Quality Assurance / Control",
-    "C28": "Hospitality / Tourism / Travel",
-    "C29": "Management / Strategy / Consulting",
-    "C30": "Other / Miscellaneous"
 }
-# ------------------- Load Classification Model -------------------
-classifier = pipeline("text-classification", model="CleveGreen/JobClassifier_v2")
-# ------------------- Resume Utilities -------------------
 def clean_text(text):
     text = re.sub(r'http\S+', ' ', text)
-    text = re.sub(r'[^\x00-\x7f]', ' ', text)
     text = re.sub(r'[^\w\s]', ' ', text)
-    text = re.sub(r'\s+', ' ', text).strip()
-    return text
 def extract_text_from_pdf(file):
     try:
         reader = PyPDF2.PdfReader(file)
-        text = ""
-        for page in reader.pages:
-            content = page.extract_text()
-            if content:
-                text += content + " "
-        return text.strip(), None if text else "No text found in PDF."
     except Exception as e:
         return None, str(e)
-# ------------------- Resume Classification & Organization -------------------
 def classify_resumes(files):
-    predictions = {}
-    classified_files = {}
     if os.path.exists("classified_resumes"):
         shutil.rmtree("classified_resumes")
-    os.makedirs("classified_resumes")
     for file in files:
         file_name = os.path.basename(file.name)
-        resume_text, error = extract_text_from_pdf(file)
         if error:
-            predictions[file_name] = {"error": error}
             continue
-        cleaned_text = clean_text(resume_text)
-        result = classifier(cleaned_text[:512])[0]  # Truncate to avoid max token
-        label = result['label']  # e.g., C2
-        score = round(result['score'], 4)
-        category = CATEGORY_MAP.get(label, "Other / Miscellaneous")
-        predictions[file_name] = {
             "Predicted Job Category": label,
             "Category Name": category,
             "Confidence Score": score
         }
-        category_folder = os.path.join("classified_resumes", category)
-        os.makedirs(category_folder, exist_ok=True)
-        dest_path = os.path.join(category_folder, file_name)
         with open(file.name, "rb") as f_in, open(dest_path, "wb") as f_out:
             shutil.copyfileobj(f_in, f_out)
-        if category not in classified_files:
-            classified_files[category] = []
-        classified_files[category].append(dest_path)
-    return predictions, classified_files
-# ------------------- Gradio App -------------------
-def filter_by_category(category, all_classified):
-    return all_classified.get(category, [])
-with gr.Blocks(title="Resume Screening & Classification") as app:
-    gr.Markdown("""
-    # 📄 Resume Screening Tool
-    Upload resumes in PDF format. The system will classify them into job categories using a pretrained AI model.
-    """)
-    with gr.Row():
-        uploaded_files = gr.File(file_types=[".pdf"], file_count="multiple", label="Upload Resumes")
-        classify_button = gr.Button("Classify Resumes")
-    classification_results = gr.JSON(label="Classification Output")
-    category_selector = gr.Dropdown(choices=list(CATEGORY_MAP.values()), label="Filter by Job Category")
-    filtered_resumes_output = gr.File(file_types=[".pdf"], file_count="multiple", label="Filtered Resumes")
-    all_classified_state = gr.State({})
-    classify_button.click(
-        fn=classify_resumes,
-        inputs=[uploaded_files],
-        outputs=[classification_results, all_classified_state]
-    )
-    category_selector.change(
-        fn=filter_by_category,
-        inputs=[category_selector, all_classified_state],
-        outputs=[filtered_resumes_output]
-    )
 if __name__ == "__main__":
-    app.launch()

 import gradio as gr
 from transformers import pipeline
+# Load multi-class resume classifier model
+text_classifier = pipeline("text-classification", model="liberatoratif/BERT-resume-job-recommender", top_k=1)
+# Map label to readable category names
 CATEGORY_MAP = {
+    "LABEL_0": "Data Science / Machine Learning / AI",
+    "LABEL_1": "Information Technology / Software Engineering",
+    "LABEL_2": "Sales / Marketing / Business Development",
+    "LABEL_3": "Finance / Accounting / Auditing",
+    "LABEL_4": "Human Resources / Recruitment / Talent Acquisition",
+    "LABEL_5": "Product Management / Project Management",
+    "LABEL_6": "Engineering / Mechanical / Civil / Electrical",
+    "LABEL_7": "Operations / Logistics / Supply Chain",
+    "LABEL_8": "Design / UI-UX / Creative",
+    "LABEL_9": "Legal / Compliance",
+    "LABEL_10": "Healthcare / Medical / Nursing",
+    "LABEL_11": "Customer Service / Support",
+    "LABEL_12": "Education / Training / Teaching",
+    "LABEL_13": "Entrepreneurship / Startups / Freelancing",
+    "LABEL_14": "Retail / Merchandising / E-commerce",
+    "LABEL_15": "Media / Communication / PR / Journalism",
+    "LABEL_16": "Manufacturing / Production",
+    "LABEL_17": "Administration / Clerical",
+    "LABEL_18": "Quality Assurance / Control",
+    "LABEL_19": "Construction / Architecture",
+    "LABEL_20": "Science / Research / R&D",
+    "LABEL_21": "Real Estate / Property Management",
+    "LABEL_22": "Security / Safety",
+    "LABEL_23": "Procurement / Purchasing",
+    "LABEL_24": "Hospitality / Tourism / Travel",
+    "LABEL_25": "Telecommunication / Network Engineering",
+    "LABEL_26": "Transportation / Automotive / Aviation",
+    "LABEL_27": "Energy / Oil & Gas / Utilities",
+    "LABEL_28": "Management / Strategy / Consulting",
+    "LABEL_29": "Other / Miscellaneous"
 }
 def clean_text(text):
     text = re.sub(r'http\S+', ' ', text)
     text = re.sub(r'[^\w\s]', ' ', text)
+    return re.sub(r'\s+', ' ', text).strip()
 def extract_text_from_pdf(file):
     try:
         reader = PyPDF2.PdfReader(file)
+        text = " ".join(page.extract_text() or "" for page in reader.pages)
+        return clean_text(text), None if text.strip() else "No text found."
     except Exception as e:
         return None, str(e)
 def classify_resumes(files):
+    results = {}
+    category_to_files = {}
     if os.path.exists("classified_resumes"):
         shutil.rmtree("classified_resumes")
+    os.makedirs("classified_resumes", exist_ok=True)
     for file in files:
         file_name = os.path.basename(file.name)
+        text, error = extract_text_from_pdf(file)
         if error:
+            results[file_name] = {"error": error}
             continue
+        pred = text_classifier(text[:512])[0][0]
+        label = pred['label']
+        category = CATEGORY_MAP.get(label, "Unknown")
+        score = round(pred['score'], 4)
+        results[file_name] = {
             "Predicted Job Category": label,
             "Category Name": category,
             "Confidence Score": score
         }
+        # Save file in category folder
+        cat_dir = os.path.join("classified_resumes", category)
+        os.makedirs(cat_dir, exist_ok=True)
+        dest_path = os.path.join(cat_dir, file_name)
         with open(file.name, "rb") as f_in, open(dest_path, "wb") as f_out:
             shutil.copyfileobj(f_in, f_out)
+        category_to_files.setdefault(category, []).append(dest_path)
+    return results, list(category_to_files.keys()), category_to_files
+def show_files_by_category(selected_category, category_to_files):
+    return category_to_files.get(selected_category, [])
+with gr.Blocks(title="Resume Category Classifier") as demo:
+    gr.Markdown("## 📂 Resume Screening by Job Category")
+    file_input = gr.File(file_types=[".pdf"], file_count="multiple", label="Upload Resume PDFs")
+    classify_btn = gr.Button("🔍 Classify Resumes")
+    results_output = gr.JSON(label="Prediction Results")
+    category_dropdown = gr.Dropdown(label="Select Category to View Resumes", interactive=True)
+    file_output = gr.File(label="Download Filtered Resumes", file_types=[".pdf"], file_count="multiple")
+    category_files_state = gr.State({})
+    classify_btn.click(fn=classify_resumes, inputs=[file_input], outputs=[results_output, category_dropdown, category_files_state])
+    category_dropdown.change(fn=show_files_by_category, inputs=[category_dropdown, category_files_state], outputs=[file_output])
 if __name__ == "__main__":
+    demo.launch()