import streamlit as st import os import tempfile from pathlib import Path import time from typing import List, Dict, Tuple import pandas as pd from streamlit.runtime.uploaded_file_manager import UploadedFile from anthropic import Anthropic import pymongo from dotenv import load_dotenv import fitz # PyMuPDF # Load environment variables load_dotenv() # Initialize MongoDB client MONGO_URI = os.getenv('MONGO_URI') mongo_client = pymongo.MongoClient(MONGO_URI) db = mongo_client['intratalent'] resume_collection = db['resumes'] # Initialize Anthropic client anthropic = Anthropic(api_key=os.getenv('ANTHROPIC_API_KEY')) # Initialize Streamlit app st.set_page_config( page_title="IntraTalent Resume Processor", page_icon="📄", layout="wide" ) def extract_text_from_pdf(pdf_content: bytes) -> str: """Extract text from PDF content.""" try: # Create a temporary file to store the PDF content with tempfile.NamedTemporaryFile(mode='w+b', suffix='.pdf', delete=False) as temp_file: temp_file.write(pdf_content) temp_file_path = temp_file.name # Extract text from PDF doc = fitz.open(temp_file_path) text = "" for page_num in range(doc.page_count): page = doc.load_page(page_num) text += page.get_text() + "\n" doc.close() # Clean up temporary file os.unlink(temp_file_path) return text except Exception as e: st.error(f"Error extracting text from PDF: {e}") return "" def extract_info_with_claude(resume_text: str) -> str: """Extract information from resume text using Claude.""" st.write("🤖 Sending request to Claude API...") prompt = """ Extract the following information from the given resume: 1. Full Name 2. List of all experiences with their descriptions (copy exactly from resume) Please format the output as follows: Name: [Full Name] Projects: 1. [Project Name]: [Project Description] 2. [Project Name]: [Project Description] ... Extract all experiences, including projects, leadership, work experience, research, etc. Here's the resume text: {resume_text} """.format(resume_text=resume_text) try: message = anthropic.messages.create( model="claude-3-haiku-20240307", max_tokens=4096, system="You are a helpful assistant that extracts information from resumes.", messages=[{ "role": "user", "content": prompt }] ) extracted_info = message.content[0].text st.write("✅ Received response from Claude:") st.code(extracted_info, language="text") except Exception as e: extracted_info = f"An error occurred: {e}" st.error(f"❌ API Error: {e}") return extracted_info def parse_resume(uploaded_file: UploadedFile) -> Tuple[str, List[Dict]]: """Parse a resume file and return name and projects.""" try: st.write(f"📝 Processing resume: {uploaded_file.name}") resume_content = uploaded_file.getvalue() st.write("📊 Extracting text from PDF...") resume_text = extract_text_from_pdf(resume_content) st.write("📄 Extracted text from PDF:") st.code(resume_text[:500] + "..." if len(resume_text) > 500 else resume_text) extracted_info = extract_info_with_claude(resume_text) st.write("🔍 Parsing extracted information...") # Parse the extracted information lines = extracted_info.split('\n') name = lines[0].split(': ')[1] if len(lines) > 0 and ': ' in lines[0] else "Unknown" st.write(f"👤 Extracted name: {name}") projects = [] project_started = False for line in lines: if line.strip() == "Projects:": project_started = True continue if project_started and line.strip(): project_parts = line.split(': ', 1) if len(project_parts) == 2: project_name = project_parts[0].split('. ', 1)[-1] # Remove the number project_description = project_parts[1] projects.append({"name": project_name, "description": project_description}) st.write("📋 Extracted projects:") st.json(projects) # Store in MongoDB resume_data = { "name": name, "projects": projects, "full_content": resume_text } resume_collection.insert_one(resume_data) st.write("💾 Stored data in MongoDB") return name, projects except Exception as e: st.error(f"❌ Error processing resume: {e}") return "Unknown", [] def process_resumes(uploaded_files: List[UploadedFile]) -> Dict: """Process multiple resumes and return results.""" results = {} progress_bar = st.progress(0) for idx, file in enumerate(uploaded_files): st.write(f"\n---\n### Processing file {idx + 1} of {len(uploaded_files)}") if file.type != "application/pdf": st.warning(f"âš ī¸ Skipping {file.name}: Not a PDF file") continue try: name, projects = parse_resume(file) results[file.name] = { "name": name, "projects": projects } # Update progress progress_bar.progress((idx + 1) / len(uploaded_files)) st.write(f"✅ Successfully processed {file.name}") except Exception as e: st.error(f"❌ Error processing {file.name}: {e}") return results def display_results(results: Dict): """Display processed resume results in an organized manner.""" if not results: return st.subheader("📊 Processed Resumes") for filename, data in results.items(): with st.expander(f"📄 {data['name']} ({filename})"): st.write("đŸˇī¸ File details:") st.json({ "filename": filename, "name": data['name'], "number_of_projects": len(data['projects']) }) if data['projects']: st.write("📋 Projects:") df = pd.DataFrame(data['projects']) st.dataframe( df, column_config={ "name": "Project Name", "description": "Description" }, hide_index=True ) else: st.info("â„šī¸ No projects found in this resume") def main(): st.title("đŸŽ¯ IntraTalent Resume Processor") # File uploader section st.header("📤 Upload Resumes") uploaded_files = st.file_uploader( "Upload up to 10 resumes (PDF only)", type=['pdf'], accept_multiple_files=True, key="resume_uploader" ) # Validate number of files if uploaded_files and len(uploaded_files) > 10: st.error("âš ī¸ Maximum 10 files allowed. Please remove some files.") return # Process button if uploaded_files and st.button("🔄 Process Resumes"): with st.spinner("Processing resumes..."): st.write("🚀 Starting resume processing...") results = process_resumes(uploaded_files) st.session_state['processed_results'] = results st.write("✨ Processing complete!") display_results(results) # Query section st.header("🔍 Search Projects") query = st.text_area( "Enter your project requirements", placeholder="Example: Looking for team members with experience in machine learning and computer vision...", height=100 ) if query and st.button("🔎 Search"): if 'processed_results' not in st.session_state: st.warning("âš ī¸ Please process some resumes first!") return with st.spinner("Searching for matches..."): st.write("🔄 Preparing to search...") # Here you would implement the embedding and similarity search # Using the code from your original script st.success("✅ Search completed!") # Display results in a nice format st.subheader("đŸŽ¯ Top Matches") # Placeholder for search results st.info("🔜 Feature coming soon: Will display matching projects and candidates based on similarity search") if __name__ == "__main__": main()