cv / app.py
saherPervaiz's picture
Update app.py
a01b561 verified
import gradio as gr
import numpy as np
from text_extractor import extract_text_from_file
from embedder import get_embeddings
from vector_store import create_faiss_index, search_similar_cvs
from groq_api import summarize_match
# Global storage
cv_texts = []
cv_names = []
cv_vectors = []
faiss_index = None
def upload_cvs(files):
global cv_texts, cv_names, cv_vectors, faiss_index
try:
if len(files) > 10:
return "❌ Limit exceeded: Upload a maximum of 10 CVs."
# Remove duplicates based on filename
unique_files = []
seen = set()
for f in files:
if f.name not in seen:
seen.add(f.name)
unique_files.append(f)
files = unique_files
cv_texts = [extract_text_from_file(f) for f in files]
cv_names = [f.name for f in files]
cv_vectors = get_embeddings(cv_texts)
if cv_vectors is None or np.array(cv_vectors).size == 0:
return "❌ No valid CVs."
faiss_index = create_faiss_index(cv_vectors)
return f"βœ… Uploaded and indexed {len(files)} CV(s)."
except Exception as e:
return f"❌ Error during upload: {e}"
def match_jd(jd_text, match_mode):
if faiss_index is None:
return "❌ Please upload CVs first."
if not jd_text.strip():
return "⚠️ Job description is empty."
try:
jd_vector = get_embeddings([jd_text])[0]
# Select CVs based on match mode
if match_mode == "Top 3 Matches":
indices = search_similar_cvs(jd_vector, faiss_index, k=3)
else: # All uploaded CVs
indices = list(range(len(cv_names)))
# Filter duplicates by name
seen = set()
unique_indices = []
for i in indices:
if cv_names[i] not in seen:
seen.add(cv_names[i])
unique_indices.append(i)
matched = [cv_names[i] for i in unique_indices]
texts = [cv_texts[i] for i in unique_indices]
summary = summarize_match(jd_text, matched, texts)
title = "βœ… Match Result:" if len(matched) == 1 else f"βœ… Matching {len(matched)} CVs:"
return f"{title}\n\n" + "\n".join(matched) + f"\n\nπŸ“ Summary:\n{summary}"
except Exception as e:
return f"❌ Error during matching: {e}"
def clear_data():
global cv_texts, cv_names, cv_vectors, faiss_index
cv_texts, cv_names, cv_vectors, faiss_index = [], [], [], None
return "🧹 Cleared."
with gr.Blocks() as app:
gr.Markdown("## πŸ“„ CV Matcher with Groq API (Dynamic Matching)")
# Upload
file_input = gr.File(file_types=[".pdf", ".docx"], file_count="multiple", label="πŸ“€ Upload CVs (Max 10)")
upload_button = gr.Button("πŸ“ Upload & Index")
upload_status = gr.Textbox(label="Upload Status")
# Job Description Matching
jd_input = gr.Textbox(label="πŸ“‹ Paste Job Description", lines=8, placeholder="Paste job description here...")
match_mode = gr.Radio(["Top 3 Matches", "All Uploaded CVs"], value="Top 3 Matches", label="Matching Mode")
match_button = gr.Button("πŸ” Match CVs")
result_output = gr.Textbox(label="Match Results", lines=25)
# Clear Session
clear_button = gr.Button("🧹 Clear All")
clear_status = gr.Textbox(label="Clear Status")
# Actions
upload_button.click(upload_cvs, inputs=[file_input], outputs=[upload_status])
match_button.click(match_jd, inputs=[jd_input, match_mode], outputs=[result_output])
clear_button.click(clear_data, inputs=[], outputs=[clear_status])
app.launch()