Spaces:
Sleeping
Sleeping
from fastapi import FastAPI, UploadFile, File | |
import json, re, io | |
from llama_cpp import Llama | |
from PyPDF2 import PdfReader | |
from docx import Document | |
import os | |
# ✅ Define Model Path Inside Hugging Face Space | |
BASE_DIR = os.path.dirname(os.path.abspath(__file__)) | |
MODEL_PATH = os.path.join(BASE_DIR, "models", "capybarahermes-2.5-mistral-7b.Q5_K_M.gguf") | |
print(f"🔹 Loading Mistral 7B from: {MODEL_PATH} (This may take a while)") | |
llm = Llama(model_path=MODEL_PATH, n_ctx=4096, n_gpu_layers=0) # Set n_gpu_layers=0 for CPU if no GPU | |
print("✅ Model loaded successfully!") | |
app = FastAPI(title="Resume Parsing API") | |
def extract_text_from_resume(uploaded_file): | |
file_content = uploaded_file.file.read() | |
file_stream = io.BytesIO(file_content) | |
if uploaded_file.filename.endswith(".pdf"): | |
reader = PdfReader(file_stream) | |
return "\n".join([page.extract_text() for page in reader.pages if page.extract_text()]) | |
elif uploaded_file.filename.endswith(".docx"): | |
doc = Document(file_stream) | |
return "\n".join([para.text for para in doc.paragraphs]) | |
return None | |
def extract_email_phone(text): | |
email_pattern = r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}" | |
phone_pattern = r"\+?\d{1,3}?[-.\s]?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}" | |
email_match = re.search(email_pattern, text) | |
phone_match = re.search(phone_pattern, text) | |
return { | |
"email": email_match.group() if email_match else "Email not found", | |
"phone": phone_match.group() if phone_match else "Phone not found" | |
} | |
def analyze_resume(text): | |
truncated_text = text[:3500] # Keep within context limit | |
prompt = f""" | |
Extract these details from the resume: | |
1. Full Name | |
2. Work Experience | |
3. Qualifications | |
4. List of Skills | |
Resume Text: {truncated_text} | |
Format response as JSON: | |
{{ | |
"name": "Candidate Name", | |
"experience": [ | |
{{ | |
"company": "Company Name", | |
"role": "Job Title", | |
"duration": "Start Date - End Date", | |
"responsibilities": "Brief work responsibilities" | |
}} | |
], | |
"qualifications": "Degree, Certifications", | |
"skills": ["List of skills"] | |
}} | |
""" | |
response = llm(prompt, max_tokens=700) | |
output = response["choices"][0]["text"].strip() | |
print("🔹 Raw LLaMA Output:\n", output) | |
try: | |
return json.loads(output) | |
except json.JSONDecodeError: | |
return {"error": "Failed to parse JSON", "raw_output": output} | |
async def parse_resume(file: UploadFile = File(...)): | |
text = extract_text_from_resume(file) | |
if not text: | |
return {"error": "Unsupported file format"} | |
extracted_info = extract_email_phone(text) | |
llm_data = analyze_resume(text) | |
extracted_info.update(llm_data) | |
return {"success": True, "data": extracted_info} | |