Spaces:

husseinelsaadi
/

Codingo

Paused

husseinelsaadi commited on 17 days ago

Commit

4f1e97d

1 Parent(s): 8d99522

updated

Files changed (1) hide show

backend/services/resume_parser.py CHANGED Viewed

@@ -77,33 +77,35 @@ def extract_name(text: str, filename: str) -> str:
 # Zephyr Parsing
 # ===============================
 def parse_with_zephyr(text: str) -> dict:
     prompt = f"""
-Extract the following information from the resume text provided below.
-Return ONLY a valid JSON object (no extra commentary).
-Information to extract:
-- Full Name
-- Email
-- Phone
-- Skills (list)
-- Education (list of degrees + institutions)
-- Experience (list of jobs with company, title, and dates)
 Resume:
 {text}
-JSON format:
 {{
-  "name": "Full Name",
-  "email": "[email protected]",
-  "phone": "+961-xxx-xxx",
-  "skills": ["Skill1", "Skill2", "Skill3"],
-  "education": ["Degree1 - Institution1", "Degree2 - Institution2"],
-  "experience": ["Job1 - Company1 (Dates)", "Job2 - Company2 (Dates)"]
 }}
 """
     inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
-    outputs = model.generate(**inputs, max_new_tokens=256, temperature=0.0)
     response = tokenizer.decode(outputs[0], skip_special_tokens=True)
     match = re.search(r"\{.*\}", response, re.S)
@@ -112,8 +114,8 @@ JSON format:
             return json.loads(match.group())
         except:
             pass
-    return {"name": "", "email": "", "phone": "", "skills": [], "education": [], "experience": []}
 # ===============================
 # Main Parse Function
 # ===============================

 # Zephyr Parsing
 # ===============================
 def parse_with_zephyr(text: str) -> dict:
+    """Use Zephyr-7B to extract resume details in JSON format."""
     prompt = f"""
+You are an information extraction system.
+Extract the following fields from the resume text.
+⚠️ DO NOT return placeholders like "Full Name" or "Skill1".
+Return ONLY actual values from the resume. If a field is missing, leave it as an empty string or empty list.
+Fields to extract:
+- name
+- skills (list of skills)
+- education (list of degrees + institutions)
+- experience (list of jobs with company, title, dates)
 Resume:
 {text}
+Return ONLY a valid JSON in this format:
 {{
+  "name": "<actual name or empty string>",
+  "skills": ["<actual skill>", "<actual skill>"],
+  "education": ["<Degree - Institution>", "<Degree - Institution>"],
+  "experience": ["<Job - Company (Dates)>", "<Job - Company (Dates)>"]
 }}
 """
     inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
+    outputs = model.generate(**inputs, max_new_tokens=512, do_sample=False, temperature=0)
     response = tokenizer.decode(outputs[0], skip_special_tokens=True)
     match = re.search(r"\{.*\}", response, re.S)
             return json.loads(match.group())
         except:
             pass
+    return {"name": "", "skills": [], "education": [], "experience": []}
 # ===============================
 # Main Parse Function
 # ===============================