Spaces:

saherPervaiz
/

cv

Running

File size: 3,603 Bytes

94516ce
0ab97c7
94516ce
 
 
9480219
94516ce
0ab97c7
94516ce
 
 
 
 
 
 
e92236a
8d505f4
e693c99
 
 
a01b561
 
 
 
 
 
 
 
 
e92236a
8d505f4
 
c045a8c
0ab97c7
e92236a
8d505f4
 
a01b561
e92236a
8d505f4
 
94516ce
e693c99
0ab97c7
e92236a
 
 
 
0ab97c7
 
e693c99
a01b561
e693c99
 
a01b561
e693c99
0ab97c7
a01b561
 
 
 
 
 
 
 
 
 
e92236a
0ab97c7
e92236a
a01b561
 
e92236a
0ab97c7
 
94516ce
 
 
 
e92236a
 
8467e1e
a01b561
8467e1e
0ab97c7
e693c99
0ab97c7
 
 
a01b561
0ab97c7
e693c99
 
 
0ab97c7
a01b561
0ab97c7
 
 
 
 
 
 
e693c99
0ab97c7
e92236a

import gradio as gr
import numpy as np
from text_extractor import extract_text_from_file
from embedder import get_embeddings
from vector_store import create_faiss_index, search_similar_cvs
from groq_api import summarize_match

# Global storage
cv_texts = []
cv_names = []
cv_vectors = []
faiss_index = None

def upload_cvs(files):
    global cv_texts, cv_names, cv_vectors, faiss_index

    try:
        if len(files) > 10:
            return "❌ Limit exceeded: Upload a maximum of 10 CVs."

        # Remove duplicates based on filename
        unique_files = []
        seen = set()
        for f in files:
            if f.name not in seen:
                seen.add(f.name)
                unique_files.append(f)
        files = unique_files

        cv_texts = [extract_text_from_file(f) for f in files]
        cv_names = [f.name for f in files]
        cv_vectors = get_embeddings(cv_texts)

        if cv_vectors is None or np.array(cv_vectors).size == 0:
            return "❌ No valid CVs."

        faiss_index = create_faiss_index(cv_vectors)
        return f"✅ Uploaded and indexed {len(files)} CV(s)."

    except Exception as e:
        return f"❌ Error during upload: {e}"

def match_jd(jd_text, match_mode):
    if faiss_index is None:
        return "❌ Please upload CVs first."
    if not jd_text.strip():
        return "⚠️ Job description is empty."

    try:
        jd_vector = get_embeddings([jd_text])[0]

        # Select CVs based on match mode
        if match_mode == "Top 3 Matches":
            indices = search_similar_cvs(jd_vector, faiss_index, k=3)
        else:  # All uploaded CVs
            indices = list(range(len(cv_names)))

        # Filter duplicates by name
        seen = set()
        unique_indices = []
        for i in indices:
            if cv_names[i] not in seen:
                seen.add(cv_names[i])
                unique_indices.append(i)

        matched = [cv_names[i] for i in unique_indices]
        texts = [cv_texts[i] for i in unique_indices]

        summary = summarize_match(jd_text, matched, texts)

        title = "✅ Match Result:" if len(matched) == 1 else f"✅ Matching {len(matched)} CVs:"
        return f"{title}\n\n" + "\n".join(matched) + f"\n\n📝 Summary:\n{summary}"

    except Exception as e:
        return f"❌ Error during matching: {e}"

def clear_data():
    global cv_texts, cv_names, cv_vectors, faiss_index
    cv_texts, cv_names, cv_vectors, faiss_index = [], [], [], None
    return "🧹 Cleared."

with gr.Blocks() as app:
    gr.Markdown("## 📄 CV Matcher with Groq API (Dynamic Matching)")

    # Upload
    file_input = gr.File(file_types=[".pdf", ".docx"], file_count="multiple", label="📤 Upload CVs (Max 10)")
    upload_button = gr.Button("📁 Upload & Index")
    upload_status = gr.Textbox(label="Upload Status")

    # Job Description Matching
    jd_input = gr.Textbox(label="📋 Paste Job Description", lines=8, placeholder="Paste job description here...")

    match_mode = gr.Radio(["Top 3 Matches", "All Uploaded CVs"], value="Top 3 Matches", label="Matching Mode")

    match_button = gr.Button("🔍 Match CVs")
    result_output = gr.Textbox(label="Match Results", lines=25)

    # Clear Session
    clear_button = gr.Button("🧹 Clear All")
    clear_status = gr.Textbox(label="Clear Status")

    # Actions
    upload_button.click(upload_cvs, inputs=[file_input], outputs=[upload_status])
    match_button.click(match_jd, inputs=[jd_input, match_mode], outputs=[result_output])
    clear_button.click(clear_data, inputs=[], outputs=[clear_status])

app.launch()