import re
from pathlib import Path
from pdfminer.high_level import extract_text as pdf_extract_text
from docx import Document

class ResumeParser:
    def __init__(self):
        pass
    
    def extract_text(self, file_path: str) -> str:
        """Extract text from PDF or DOCX files"""
        path = Path(file_path)
        
        if path.suffix.lower() == ".pdf":
            text = pdf_extract_text(file_path)
            return re.sub(r'\s+', ' ', text).strip()
        elif path.suffix.lower() == ".docx":
            doc = Document(file_path)
            return "\n".join([p.text for p in doc.paragraphs if p.text.strip()])
        else:
            raise ValueError("Unsupported file format")

    def extract_name(self, text: str) -> str:
        """Extract name from resume text"""
        # Try to find name at the beginning of document
        first_lines = [line.strip() for line in text.split('\n')[:10] if line.strip()]
        
        for line in first_lines:
            # Simple name pattern (2-4 words, all starting with capital)
            if re.match(r'^[A-Z][a-z]+(?:\s+[A-Z][a-z]+){1,3}$', line):
                if not any(word.lower() in ['resume', 'cv', 'curriculum'] for word in line.split()):
                    return line
        
        # Fallback: return first non-empty line that looks like a name
        for line in first_lines:
            if 2 <= len(line.split()) <= 4 and line[0].isupper():
                return line
        
        return "Not Found"

    def extract_sections(self, text: str) -> dict:
        """Extract skills, education, and experience using regex"""
        results = {
            "skills": [],
            "education": [],
            "experience": []
        }
        
        # Extract skills
        skills_match = re.search(
            r'(?:skills|technologies|expertise)[:\s]*(.*?)(?:\n\n|\n\s*\n|$)',
            text, re.IGNORECASE
        )
        if skills_match:
            skills_text = skills_match.group(1)
            results["skills"] = [s.strip() for s in re.split(r'[,;]', skills_text) if s.strip()]
        
        # Extract education
        edu_match = re.search(
            r'(?:education|degrees?)[:\s]*(.*?)(?:\n\n|\n\s*\n|$)',
            text, re.IGNORECASE
        )
        if edu_match:
            results["education"] = [e.strip() for e in edu_match.group(1).split('\n') if e.strip()]
        
        # Extract experience
        exp_match = re.search(
            r'(?:experience|work history|employment)[:\s]*(.*?)(?:\n\n|\n\s*\n|$)',
            text, re.IGNORECASE
        )
        if exp_match:
            results["experience"] = [x.strip() for x in exp_match.group(1).split('\n') if x.strip()]
        
        return results

    def parse_resume(self, file_path: str) -> dict:
        """Main parsing function"""
        try:
            text = self.extract_text(file_path)
            
            if not text or len(text.strip()) < 10:
                return {
                    "name": "Error: Empty file",
                    "skills": [],
                    "education": [],
                    "experience": []
                }
            
            name = self.extract_name(text)
            sections = self.extract_sections(text)
            
            return {
                "name": name,
                "skills": sections["skills"][:10],  # Limit to 10 skills
                "education": sections["education"][:3],  # Limit to 3 items
                "experience": sections["experience"][:3]  # Limit to 3 items
            }
            
        except Exception as e:
            return {
                "name": f"Error: {str(e)}",
                "skills": [],
                "education": [],
                "experience": []
            }

# Global instance
resume_parser = ResumeParser()

def parse_resume(file_path: str) -> dict:
    """Public interface for resume parsing"""
    return resume_parser.parse_resume(file_path)