husseinelsaadi commited on
Commit
4f1e97d
·
1 Parent(s): 8d99522
Files changed (1) hide show
  1. backend/services/resume_parser.py +21 -19
backend/services/resume_parser.py CHANGED
@@ -77,33 +77,35 @@ def extract_name(text: str, filename: str) -> str:
77
  # Zephyr Parsing
78
  # ===============================
79
  def parse_with_zephyr(text: str) -> dict:
 
 
80
  prompt = f"""
81
- Extract the following information from the resume text provided below.
82
- Return ONLY a valid JSON object (no extra commentary).
83
 
84
- Information to extract:
85
- - Full Name
86
- - Email
87
- - Phone
88
- - Skills (list)
89
- - Education (list of degrees + institutions)
90
- - Experience (list of jobs with company, title, and dates)
 
 
91
 
92
  Resume:
93
  {text}
94
 
95
- JSON format:
96
  {{
97
- "name": "Full Name",
98
- "email": "[email protected]",
99
- "phone": "+961-xxx-xxx",
100
- "skills": ["Skill1", "Skill2", "Skill3"],
101
- "education": ["Degree1 - Institution1", "Degree2 - Institution2"],
102
- "experience": ["Job1 - Company1 (Dates)", "Job2 - Company2 (Dates)"]
103
  }}
104
  """
 
105
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
106
- outputs = model.generate(**inputs, max_new_tokens=256, temperature=0.0)
107
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
108
 
109
  match = re.search(r"\{.*\}", response, re.S)
@@ -112,8 +114,8 @@ JSON format:
112
  return json.loads(match.group())
113
  except:
114
  pass
115
- return {"name": "", "email": "", "phone": "", "skills": [], "education": [], "experience": []}
116
-
117
  # ===============================
118
  # Main Parse Function
119
  # ===============================
 
77
  # Zephyr Parsing
78
  # ===============================
79
  def parse_with_zephyr(text: str) -> dict:
80
+ """Use Zephyr-7B to extract resume details in JSON format."""
81
+
82
  prompt = f"""
83
+ You are an information extraction system.
 
84
 
85
+ Extract the following fields from the resume text.
86
+ ⚠️ DO NOT return placeholders like "Full Name" or "Skill1".
87
+ Return ONLY actual values from the resume. If a field is missing, leave it as an empty string or empty list.
88
+
89
+ Fields to extract:
90
+ - name
91
+ - skills (list of skills)
92
+ - education (list of degrees + institutions)
93
+ - experience (list of jobs with company, title, dates)
94
 
95
  Resume:
96
  {text}
97
 
98
+ Return ONLY a valid JSON in this format:
99
  {{
100
+ "name": "<actual name or empty string>",
101
+ "skills": ["<actual skill>", "<actual skill>"],
102
+ "education": ["<Degree - Institution>", "<Degree - Institution>"],
103
+ "experience": ["<Job - Company (Dates)>", "<Job - Company (Dates)>"]
 
 
104
  }}
105
  """
106
+
107
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
108
+ outputs = model.generate(**inputs, max_new_tokens=512, do_sample=False, temperature=0)
109
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
110
 
111
  match = re.search(r"\{.*\}", response, re.S)
 
114
  return json.loads(match.group())
115
  except:
116
  pass
117
+
118
+ return {"name": "", "skills": [], "education": [], "experience": []}
119
  # ===============================
120
  # Main Parse Function
121
  # ===============================