Spaces:

husseinelsaadi
/

Codingo

Paused

husseinelsaadi commited on 17 days ago

Commit

1ead253

1 Parent(s): 0e43f07

updated

Files changed (1) hide show

backend/services/resume_parser.py CHANGED Viewed

@@ -90,16 +90,42 @@ def extract_name(text: str, filename: str) -> str:
 # ===============================
 def parse_with_deepseek(text: str) -> dict:
     """Use Deepseek-Coder-V2-Lite-Instruct to extract resume details in JSON format."""
-    # --- UPDATED: Refined prompt for better JSON extraction ---
     prompt = f"""
 Extract the following information from the resume text provided below. Your response should be a valid JSON object.
-**Information to extract:**
-- **Full Name:** The candidate's full name.
-- **Email:** The candidate's email address.
-- **Phone:** The candidate's phone number.
-- **Skills:** A list of technical and soft skills.
-- **Education:** A list of academic degrees and institutions.
-- **Experience:** A list of previous jobs, including company, title, and dates.
-**Resume Text:**

 # ===============================
 def parse_with_deepseek(text: str) -> dict:
     """Use Deepseek-Coder-V2-Lite-Instruct to extract resume details in JSON format."""
     prompt = f"""
 Extract the following information from the resume text provided below. Your response should be a valid JSON object.
+Information to extract:
+- Full Name: The candidate's full name.
+- Email: The candidate's email address.
+- Phone: The candidate's phone number.
+- Skills: A list of technical and soft skills.
+- Education: A list of academic degrees and institutions.
+- Experience: A list of previous jobs, including company, title, and dates.
+Resume Text:
+{text}
+Return only valid JSON in the following format:
+{{
+  "name": "Full Name",
+  "email": "[email protected]",
+  "phone": "+961-xxx-xxx",
+  "skills": ["Skill1", "Skill2", "Skill3"],
+  "education": ["Degree1 - Institution1", "Degree2 - Institution2"],
+  "experience": ["Job1 - Company1 (Dates)", "Job2 - Company2 (Dates)"]
+}}
+"""
+    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
+    outputs = model.generate(**inputs, max_new_tokens=512)
+    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    import re, json
+    match = re.search(r"\{.*\}", response, re.S)
+    if match:
+        try:
+            return json.loads(match.group())
+        except:
+            pass
+    return {"name": "", "email": "", "phone": "", "skills": [], "education": [], "experience": []}