import streamlit as st
import os
import fitz  # PyMuPDF
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from spellchecker import SpellChecker  # Import the SpellChecker class from pyspellchecker
import tempfile
import pandas as pd

st.title("RESUME RANKER")

# User input for skills
skills = st.text_input("Enter Skills (comma-separated):")

# User input for job description
job_description = st.text_area("Enter Job Description:")

# User input for uploading multiple PDF resumes
pdf_resumes = st.file_uploader("Upload Resumes/CVs", type=["pdf"], accept_multiple_files=True)

if st.button("Rank Resumes"):
    if not pdf_resumes:
        st.warning("Please upload PDF resumes.")
    else:
        skills = [skill.strip() for skill in skills.split(',')]
        job_description = job_description.lower()
        resume_data = []

        # Function to extract text from a PDF file using PyMuPDF (fitz)
        def extract_text_from_pdf(pdf_path):
            text = ""
            with fitz.open(pdf_path) as doc:
                for page in doc:
                    text += page.get_text()
            return text

        # Loop through all uploaded PDF resumes and extract text
        for pdf_resume in pdf_resumes:
            if pdf_resume.type == "application/pdf":
                with tempfile.NamedTemporaryFile(delete=False) as temp_pdf:
                    temp_pdf.write(pdf_resume.read())
                    temp_pdf_name = temp_pdf.name
                pdf_text = extract_text_from_pdf(temp_pdf_name).lower()
                resume_data.append((pdf_resume.name, pdf_text))
                os.remove(temp_pdf_name)

        if not resume_data:
            st.warning("No PDF resumes found in the uploaded files.")
        else:
            resume_rankings = []

            # Loop through resumes and check for the presence of input skills
            for resume_name, resume_text in resume_data:
                matching_skills = [skill for skill in skills if skill.lower() in resume_text]
                similarity_score =( len(matching_skills) / len(skills)  )# Calculate a simple similarity score
                missing_skills = [skill for skill in skills if skill.lower() not in resume_text]

                # Calculate the cosine similarity between job description and resume
                tfidf_vectorizer = TfidfVectorizer()
                job_description_matrix = tfidf_vectorizer.fit_transform([job_description])
                resume_matrix = tfidf_vectorizer.transform([resume_text])
                job_description_similarity = cosine_similarity(job_description_matrix, resume_matrix)
                job_description_similarity = (job_description_similarity[0][0])
                
                similarity_score = round(similarity_score * 100, 2)
                job_description_similarity = round(job_description_similarity * 100, 2)

                resume_rankings.append((resume_name, f"{similarity_score}%", f"{job_description_similarity}%", missing_skills))

            # Sort the resumes by similarity score in descending order
            resume_rankings.sort(key=lambda x: x[1], reverse=True)

            # Create a DataFrame to display the results
            df = pd.DataFrame(resume_rankings, columns=["File Name", "Skills Match ", "Job Description Match ", "Missing Skills"])
            

           
            st.subheader("Ranked Resumes:")
            st.dataframe(df)