Spaces:
Paused
Paused
File size: 1,960 Bytes
682910e 33fa314 efffc2e b336194 682910e 33fa314 682910e 33fa314 682910e 864c2ae 6d286f1 a511250 682910e 33fa314 682910e af02e64 33fa314 864c2ae 682910e 6d286f1 682910e 33fa314 864c2ae efffc2e d4b2339 33fa314 efffc2e 33fa314 682910e 864c2ae 33fa314 864c2ae 33fa314 864c2ae 33fa314 efffc2e 33fa314 efffc2e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 |
import json
from pathlib import Path
from typing import Dict
from pdfminer.high_level import extract_text as pdf_extract_text
from docx import Document
from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
# --------------------
# Load Resume NER Model
# --------------------
MODEL_NAME = "Ioana23/bert-finetuned-resumes-ner"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForTokenClassification.from_pretrained(MODEL_NAME)
ner_pipeline = pipeline("ner", model=model, tokenizer=tokenizer, aggregation_strategy="simple")
# --------------------
# Extract Text from PDF/DOCX
# --------------------
def extract_text(file_path: str) -> str:
path = Path(file_path)
if path.suffix.lower() == ".pdf":
return pdf_extract_text(file_path)
elif path.suffix.lower() == ".docx":
doc = Document(file_path)
return "\n".join([p.text for p in doc.paragraphs])
else:
raise ValueError("Unsupported file format")
# --------------------
# Parse Resume
# --------------------
def parse_resume(file_path: str) -> Dict[str, str]:
text = extract_text(file_path)
entities = ner_pipeline(text)
name = []
skills = []
education = []
experience = []
for ent in entities:
label = ent["entity_group"].upper()
value = ent["word"].strip()
if label == "NAME":
name.append(value)
elif label == "SKILL":
skills.append(value)
elif label in ["EDUCATION", "DEGREE"]:
education.append(value)
elif label in ["EXPERIENCE", "JOB", "ROLE", "POSITION"]:
experience.append(value)
return {
"name": " ".join(dict.fromkeys(name)) or "Not Found",
"skills": ", ".join(dict.fromkeys(skills)) or "Not Found",
"education": ", ".join(dict.fromkeys(education)) or "Not Found",
"experience": ", ".join(dict.fromkeys(experience)) or "Not Found"
}
|