husseinelsaadi commited on
Commit
2445440
·
1 Parent(s): 329b59a

resume parser removed

Browse files
README DELETED
File without changes
app.py CHANGED
@@ -26,10 +26,8 @@ sys.path.append(current_dir)
26
  # Import and initialize DB
27
  from backend.models.database import db, Job, Application, init_db
28
  from backend.models.user import User
29
- from backend.routes.auth import auth_bp
30
  from backend.routes.interview_api import interview_api
31
- from backend.models.resume_parser.resume_to_features import extract_resume_features
32
-
33
  # Initialize Flask app
34
  app = Flask(
35
  __name__,
@@ -88,31 +86,6 @@ def load_user(user_id):
88
  app.register_blueprint(auth_bp)
89
  app.register_blueprint(interview_api, url_prefix="/api")
90
 
91
- def handle_resume_upload(file):
92
- """Save uploaded file temporarily, extract features, then clean up."""
93
- if not file or file.filename == '':
94
- return None, "No file uploaded", None
95
-
96
- try:
97
- filename = secure_filename(file.filename)
98
- temp_dir = '/tmp/temp' # Use /tmp for temporary files
99
- os.makedirs(temp_dir, exist_ok=True)
100
- filepath = os.path.join(temp_dir, filename)
101
-
102
- file.save(filepath)
103
- features = extract_resume_features(filepath)
104
-
105
- # Clean up
106
- try:
107
- os.remove(filepath)
108
- except:
109
- pass
110
-
111
- return features, None, filename
112
- except Exception as e:
113
- print(f"Error in handle_resume_upload: {e}")
114
- return None, str(e), None
115
-
116
  # Routes (keep your existing routes)
117
  @app.route('/')
118
  def index():
@@ -132,29 +105,43 @@ def job_detail(job_id):
132
  @login_required
133
  def apply(job_id):
134
  job = Job.query.get_or_404(job_id)
135
-
136
  if request.method == 'POST':
 
 
137
  file = request.files.get('resume')
138
- features, error, _ = handle_resume_upload(file)
139
-
140
- if error or not features:
141
- flash("Resume parsing failed. Please try again.", "danger")
 
 
 
 
 
 
142
  return render_template('apply.html', job=job)
143
-
 
 
 
 
 
 
144
  application = Application(
145
  job_id=job_id,
146
  user_id=current_user.id,
147
  name=current_user.username,
148
  email=current_user.email,
 
149
  extracted_features=json.dumps(features)
150
  )
151
-
152
  db.session.add(application)
153
  db.session.commit()
154
-
155
  flash('Your application has been submitted successfully!', 'success')
156
  return redirect(url_for('jobs'))
157
-
158
  return render_template('apply.html', job=job)
159
 
160
  @app.route('/my_applications')
@@ -168,14 +155,30 @@ def my_applications():
168
  @app.route('/parse_resume', methods=['POST'])
169
  def parse_resume():
170
  file = request.files.get('resume')
171
- features, error, _ = handle_resume_upload(file)
172
-
 
 
173
  if error:
174
- return {"error": "Error parsing resume. Please try again."}, 400
175
-
 
 
 
 
176
  if not features:
177
- return {"error": "Failed to extract resume details."}, 400
178
-
 
 
 
 
 
 
 
 
 
 
179
  response = {
180
  "name": features.get('name', ''),
181
  "email": features.get('email', ''),
 
26
  # Import and initialize DB
27
  from backend.models.database import db, Job, Application, init_db
28
  from backend.models.user import User
29
+ from backend.routes.auth import auth_bp, handle_resume_upload
30
  from backend.routes.interview_api import interview_api
 
 
31
  # Initialize Flask app
32
  app = Flask(
33
  __name__,
 
86
  app.register_blueprint(auth_bp)
87
  app.register_blueprint(interview_api, url_prefix="/api")
88
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  # Routes (keep your existing routes)
90
  @app.route('/')
91
  def index():
 
105
  @login_required
106
  def apply(job_id):
107
  job = Job.query.get_or_404(job_id)
 
108
  if request.method == 'POST':
109
+ # Retrieve the uploaded resume file from the request. The ``name``
110
+ # attribute in the HTML form is ``resume``.
111
  file = request.files.get('resume')
112
+ # Use our safe upload helper to store the resume and obtain an empty
113
+ # features dictionary. ``filepath`` contains the location where the
114
+ # file was saved, allowing us to persist a reference in the database.
115
+ features, error, filepath = handle_resume_upload(file)
116
+
117
+ # If there was an error saving the resume, notify the user. We no
118
+ # longer attempt to parse the resume contents, so an empty
119
+ # features dictionary is considered valid.
120
+ if error:
121
+ flash("Resume upload failed. Please try again.", "danger")
122
  return render_template('apply.html', job=job)
123
+
124
+ # Ensure features is a dictionary for JSON serialization. An empty
125
+ # dictionary results in a non-empty JSON string ("{}"), which is
126
+ # truthy and enables the interview feature on the applications page.
127
+ if not features:
128
+ features = {}
129
+
130
  application = Application(
131
  job_id=job_id,
132
  user_id=current_user.id,
133
  name=current_user.username,
134
  email=current_user.email,
135
+ resume_path=filepath,
136
  extracted_features=json.dumps(features)
137
  )
138
+
139
  db.session.add(application)
140
  db.session.commit()
141
+
142
  flash('Your application has been submitted successfully!', 'success')
143
  return redirect(url_for('jobs'))
144
+
145
  return render_template('apply.html', job=job)
146
 
147
  @app.route('/my_applications')
 
155
  @app.route('/parse_resume', methods=['POST'])
156
  def parse_resume():
157
  file = request.files.get('resume')
158
+ features, error, filepath = handle_resume_upload(file)
159
+
160
+ # If the upload failed, return an error. Parsing is no longer
161
+ # supported, so we do not attempt to inspect the resume contents.
162
  if error:
163
+ return {"error": "Error processing resume. Please try again."}, 400
164
+
165
+ # If no features were extracted (the normal case now), respond with
166
+ # empty fields rather than an error. This preserves the API
167
+ # contract expected by any front‑end code that might call this
168
+ # endpoint.
169
  if not features:
170
+ return {
171
+ "name": "",
172
+ "email": "",
173
+ "mobile_number": "",
174
+ "skills": [],
175
+ "experience": [],
176
+ "education": [],
177
+ "summary": ""
178
+ }, 200
179
+
180
+ # Should features contain values (unlikely in the new implementation),
181
+ # pass them through to the client.
182
  response = {
183
  "name": features.get('name', ''),
184
  "email": features.get('email', ''),
backend/models/resume_parser/resume_to_features.py DELETED
@@ -1,251 +0,0 @@
1
- import os
2
- import re
3
- import json
4
- from pathlib import Path
5
- import PyPDF2
6
- from docx import Document
7
- import textract
8
-
9
- class SimpleResumeParser:
10
- def __init__(self):
11
- # Common skills keywords
12
- self.skills_keywords = [
13
- 'python', 'javascript', 'java', 'c++', 'c#', 'php', 'ruby', 'go', 'rust',
14
- 'html', 'css', 'react', 'angular', 'vue', 'node.js', 'express', 'django',
15
- 'flask', 'spring', 'laravel', 'rails', 'asp.net', 'jquery', 'bootstrap',
16
- 'sql', 'mysql', 'postgresql', 'mongodb', 'redis', 'elasticsearch',
17
- 'aws', 'azure', 'gcp', 'docker', 'kubernetes', 'jenkins', 'git', 'github',
18
- 'machine learning', 'deep learning', 'tensorflow', 'pytorch', 'scikit-learn',
19
- 'data analysis', 'pandas', 'numpy', 'matplotlib', 'tableau', 'power bi',
20
- 'agile', 'scrum', 'devops', 'ci/cd', 'microservices', 'api', 'rest', 'graphql'
21
- ]
22
-
23
- # Education keywords
24
- self.education_keywords = [
25
- 'bachelor', 'master', 'phd', 'degree', 'university', 'college', 'institute',
26
- 'computer science', 'engineering', 'mathematics', 'physics', 'chemistry',
27
- 'business', 'mba', 'certification', 'diploma'
28
- ]
29
-
30
- # Experience keywords
31
- self.experience_keywords = [
32
- 'experience', 'worked', 'developed', 'managed', 'led', 'created', 'built',
33
- 'designed', 'implemented', 'maintained', 'optimized', 'improved', 'years'
34
- ]
35
-
36
- def extract_text_from_pdf(self, file_path):
37
- """Extract text from PDF file"""
38
- try:
39
- with open(file_path, 'rb') as file:
40
- reader = PyPDF2.PdfReader(file)
41
- text = ""
42
- for page in reader.pages:
43
- text += page.extract_text() + "\n"
44
- return text
45
- except Exception as e:
46
- print(f"Error reading PDF: {e}")
47
- return ""
48
-
49
- def extract_text_from_docx(self, file_path):
50
- """Extract text from DOCX file"""
51
- try:
52
- doc = Document(file_path)
53
- text = ""
54
- for paragraph in doc.paragraphs:
55
- text += paragraph.text + "\n"
56
- return text
57
- except Exception as e:
58
- print(f"Error reading DOCX: {e}")
59
- return ""
60
-
61
- def extract_text_from_doc(self, file_path):
62
- """Extract text from DOC file using textract"""
63
- try:
64
- text = textract.process(file_path).decode('utf-8')
65
- return text
66
- except Exception as e:
67
- print(f"Error reading DOC: {e}")
68
- return ""
69
-
70
- def extract_text(self, file_path):
71
- """Extract text based on file extension"""
72
- file_extension = Path(file_path).suffix.lower()
73
-
74
- if file_extension == '.pdf':
75
- return self.extract_text_from_pdf(file_path)
76
- elif file_extension == '.docx':
77
- return self.extract_text_from_docx(file_path)
78
- elif file_extension == '.doc':
79
- return self.extract_text_from_doc(file_path)
80
- else:
81
- return ""
82
-
83
- def extract_email(self, text):
84
- """Extract email addresses from text"""
85
- email_pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b'
86
- emails = re.findall(email_pattern, text)
87
- return emails[0] if emails else ""
88
-
89
- def extract_phone(self, text):
90
- """Extract phone numbers from text"""
91
- phone_patterns = [
92
- r'\+?1?[-.\s]?$$?([0-9]{3})$$?[-.\s]?([0-9]{3})[-.\s]?([0-9]{4})',
93
- r'\+?([0-9]{1,3})[-.\s]?([0-9]{3,4})[-.\s]?([0-9]{3,4})[-.\s]?([0-9]{3,4})',
94
- r'(\d{3}[-.\s]?\d{3}[-.\s]?\d{4})',
95
- r'($$\d{3}$$\s?\d{3}[-.\s]?\d{4})'
96
- ]
97
-
98
- for pattern in phone_patterns:
99
- matches = re.findall(pattern, text)
100
- if matches:
101
- if isinstance(matches[0], tuple):
102
- return ''.join(matches[0])
103
- return matches[0]
104
- return ""
105
-
106
- def extract_name(self, text):
107
- """Extract name from text (simple heuristic)"""
108
- lines = text.split('\n')
109
- for line in lines[:5]: # Check first 5 lines
110
- line = line.strip()
111
- if len(line.split()) == 2 and line.replace(' ', '').isalpha():
112
- # Simple check: two words, all alphabetic
113
- if not any(keyword in line.lower() for keyword in ['resume', 'cv', 'curriculum']):
114
- return line.title()
115
- return ""
116
-
117
- def extract_skills(self, text):
118
- """Extract skills from text"""
119
- text_lower = text.lower()
120
- found_skills = []
121
-
122
- for skill in self.skills_keywords:
123
- if skill.lower() in text_lower:
124
- found_skills.append(skill.title())
125
-
126
- # Remove duplicates and return
127
- return list(set(found_skills))
128
-
129
- def extract_education(self, text):
130
- """Extract education information"""
131
- text_lower = text.lower()
132
- education = []
133
-
134
- # Look for education section
135
- education_section = ""
136
- lines = text.split('\n')
137
- in_education_section = False
138
-
139
- for line in lines:
140
- line_lower = line.lower()
141
- if any(keyword in line_lower for keyword in ['education', 'academic', 'qualification']):
142
- in_education_section = True
143
- continue
144
- elif in_education_section and any(keyword in line_lower for keyword in ['experience', 'work', 'employment', 'project']):
145
- break
146
- elif in_education_section:
147
- education_section += line + " "
148
-
149
- # Extract degrees and institutions
150
- for keyword in self.education_keywords:
151
- if keyword in text_lower:
152
- # Find context around the keyword
153
- pattern = rf'.{{0,50}}{re.escape(keyword)}.{{0,50}}'
154
- matches = re.findall(pattern, text, re.IGNORECASE)
155
- education.extend(matches)
156
-
157
- return education[:3] # Return top 3 education entries
158
-
159
- def extract_experience(self, text):
160
- """Extract work experience"""
161
- experience = []
162
- lines = text.split('\n')
163
-
164
- # Look for experience section
165
- in_experience_section = False
166
- current_experience = ""
167
-
168
- for line in lines:
169
- line_lower = line.lower()
170
- if any(keyword in line_lower for keyword in ['experience', 'work', 'employment', 'career']):
171
- in_experience_section = True
172
- continue
173
- elif in_experience_section and any(keyword in line_lower for keyword in ['education', 'skill', 'project']):
174
- if current_experience:
175
- experience.append(current_experience.strip())
176
- break
177
- elif in_experience_section:
178
- if line.strip():
179
- current_experience += line + " "
180
- elif current_experience:
181
- experience.append(current_experience.strip())
182
- current_experience = ""
183
-
184
- if current_experience:
185
- experience.append(current_experience.strip())
186
-
187
- return experience[:3] # Return top 3 experience entries
188
-
189
- def extract_summary(self, text):
190
- """Extract summary/objective"""
191
- lines = text.split('\n')
192
- summary = ""
193
-
194
- for i, line in enumerate(lines):
195
- line_lower = line.lower()
196
- if any(keyword in line_lower for keyword in ['summary', 'objective', 'profile', 'about']):
197
- # Get next few lines as summary
198
- summary_lines = lines[i+1:i+4]
199
- summary = ' '.join([l.strip() for l in summary_lines if l.strip()])
200
- break
201
-
202
- return summary[:200] # Limit to 200 characters
203
-
204
- def extract_resume_features(file_path):
205
- """
206
- Main function to extract features from resume
207
- Returns a dictionary with extracted information
208
- """
209
- try:
210
- parser = SimpleResumeParser()
211
- text = parser.extract_text(file_path)
212
-
213
- if not text:
214
- return {
215
- 'name': '',
216
- 'email': '',
217
- 'mobile_number': '',
218
- 'skills': [],
219
- 'experience': [],
220
- 'education': [],
221
- 'summary': ''
222
- }
223
-
224
- # Extract all features
225
- features = {
226
- 'name': parser.extract_name(text),
227
- 'email': parser.extract_email(text),
228
- 'mobile_number': parser.extract_phone(text),
229
- 'skills': parser.extract_skills(text),
230
- 'experience': parser.extract_experience(text),
231
- 'education': parser.extract_education(text),
232
- 'summary': parser.extract_summary(text)
233
- }
234
-
235
- return features
236
-
237
- except Exception as e:
238
- print(f"Error extracting resume features: {e}")
239
- return {
240
- 'name': '',
241
- 'email': '',
242
- 'mobile_number': '',
243
- 'skills': [],
244
- 'experience': [],
245
- 'education': [],
246
- 'summary': ''
247
- }
248
-
249
- # For backward compatibility
250
- def parse_resume(file_path):
251
- return extract_resume_features(file_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
backend/routes/auth.py CHANGED
@@ -14,25 +14,42 @@ auth_bp = Blueprint('auth', __name__)
14
 
15
  def handle_resume_upload(file):
16
  """
17
- Save uploaded file temporarily, extract features, then clean up.
18
- Returns (features_dict, error_message, filename)
 
 
 
 
 
19
  """
 
20
  if not file or file.filename == '':
21
  return None, "No file uploaded", None
22
 
23
  try:
24
- # Get current directory for temp folder
25
- current_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 
 
26
  filename = secure_filename(file.filename)
27
- filepath = os.path.join(current_dir, 'temp', filename)
28
- os.makedirs(os.path.dirname(filepath), exist_ok=True)
29
- file.save(filepath)
30
 
31
- features = extract_resume_features(filepath)
32
- os.remove(filepath) # Clean up after parsing
 
33
 
34
- return features, None, filename
 
 
 
 
 
 
35
  except Exception as e:
 
 
36
  print(f"Error in handle_resume_upload: {e}")
37
  return None, str(e), None
38
 
 
14
 
15
  def handle_resume_upload(file):
16
  """
17
+ Handle a resume upload by saving the file to a temporary location and
18
+ returning an empty feature dictionary. This function no longer attempts
19
+ to parse the resume contents; it simply stores the file so that it can
20
+ be referenced later (e.g. for downloading or inspection) and returns
21
+ a placeholder features object. A tuple of (features_dict, error_message,
22
+ filepath) is returned. If an error occurs, ``features_dict`` will be
23
+ ``None`` and ``error_message`` will contain a description of the error.
24
  """
25
+ # Validate that a file was provided
26
  if not file or file.filename == '':
27
  return None, "No file uploaded", None
28
 
29
  try:
30
+ # Generate a safe filename and determine the target directory. Use
31
+ # ``/tmp/resumes`` so that the directory is writable within Hugging
32
+ # Face Spaces. Creating the directory with ``exist_ok=True`` ensures
33
+ # that it is available without raising an error if it already exists.
34
  filename = secure_filename(file.filename)
35
+ temp_dir = os.path.join('/tmp', 'resumes')
36
+ os.makedirs(temp_dir, exist_ok=True)
37
+ filepath = os.path.join(temp_dir, filename)
38
 
39
+ # Save the uploaded file to the temporary location. If this fails
40
+ # (e.g. due to permissions issues) the exception will be caught below.
41
+ file.save(filepath)
42
 
43
+ # Resume parsing has been removed from this project. To maintain
44
+ # compatibility with the rest of the application, return an empty
45
+ # dictionary of features. Downstream code will interpret an empty
46
+ # dictionary as "no extracted features", which still allows the
47
+ # interview flow to proceed.
48
+ features = {}
49
+ return features, None, filepath
50
  except Exception as e:
51
+ # Log the error and propagate it to the caller. Returning None for
52
+ # ``features`` signals that the upload failed.
53
  print(f"Error in handle_resume_upload: {e}")
54
  return None, str(e), None
55
 
backend/services/interview_engine.py CHANGED
@@ -11,13 +11,43 @@ import torch
11
 
12
  # Initialize models
13
  chat_groq_api = os.getenv("GROQ_API_KEY")
14
- if not chat_groq_api:
15
- raise ValueError("GROQ_API_KEY is not set in environment variables.")
16
- groq_llm = ChatGroq(
17
- temperature=0.7,
18
- model_name="llama-3.3-70b-versatile",
19
- api_key=chat_groq_api
20
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
  # Initialize Whisper model
23
  #
 
11
 
12
  # Initialize models
13
  chat_groq_api = os.getenv("GROQ_API_KEY")
14
+
15
+ # Attempt to initialize the Groq LLM only if an API key is provided. When
16
+ # running in environments where the key is unavailable (such as local
17
+ # development or automated testing), fall back to a simple stub that
18
+ # generates generic responses. This avoids raising an exception at import
19
+ # time and allows the rest of the application to run without external
20
+ # dependencies. See the DummyGroq class defined below.
21
+ if chat_groq_api:
22
+ try:
23
+ groq_llm = ChatGroq(
24
+ temperature=0.7,
25
+ model_name="llama-3.3-70b-versatile",
26
+ api_key=chat_groq_api
27
+ )
28
+ except Exception as e:
29
+ logging.error(f"Error initializing ChatGroq LLM: {e}. Falling back to dummy model.")
30
+ groq_llm = None
31
+ else:
32
+ groq_llm = None
33
+
34
+ if groq_llm is None:
35
+ class DummyGroq:
36
+ """A fallback language model used when no Groq API key is set.
37
+
38
+ The ``invoke`` method of this class returns a simple canned response
39
+ rather than calling an external API. This ensures that the
40
+ interview functionality still produces a sensible prompt, albeit
41
+ without advanced LLM behaviour.
42
+ """
43
+ def invoke(self, prompt: str):
44
+ # Provide a very generic question based on the prompt. This
45
+ # implementation ignores the prompt contents entirely; in a more
46
+ # sophisticated fallback you could parse ``prompt`` to tailor
47
+ # responses.
48
+ return "Tell me about yourself and why you're interested in this position."
49
+
50
+ groq_llm = DummyGroq()
51
 
52
  # Initialize Whisper model
53
  #
backend/templates/apply.html CHANGED
@@ -25,7 +25,7 @@
25
  <div class="card">
26
  <div class="card-header">
27
  <h2>Submit Your Application</h2>
28
- <p>Please upload your resume (PDF, DOCX). Our system will extract your info automatically.</p>
29
  </div>
30
 
31
  <div class="card-body">
 
25
  <div class="card">
26
  <div class="card-header">
27
  <h2>Submit Your Application</h2>
28
+ <p>Please upload your resume (PDF, DOCX). Your file will be saved securely for recruiters to review.</p>
29
  </div>
30
 
31
  <div class="card-body">
backend/utils/luna_phase1.py DELETED
File without changes
requirements.txt CHANGED
@@ -1,12 +1,5 @@
 
1
  flask
2
- scikit-learn
3
- pandas
4
- joblib
5
- PyMuPDF
6
- python-docx
7
- spacy>=3.0.0
8
- nltk
9
- pyresparser
10
  flask_login
11
  flask_sqlalchemy
12
  flask_wtf
@@ -36,9 +29,7 @@ cohere==5.16.1
36
  # Vector DB
37
  qdrant-client==1.14.3
38
 
39
- # PDF & DOCX parsing
40
- PyPDF2==3.0.1
41
- python-docx==1.2.0
42
 
43
  # Audio processing
44
  ffmpeg-python==0.2.0
@@ -46,7 +37,7 @@ inputimeout==1.0.4
46
  evaluate==0.4.5
47
  accelerate==0.29.3
48
  huggingface_hub==0.20.3
49
- textract==1.6.3
50
  bitsandbytes
51
  faster-whisper==0.10.0
52
  edge-tts==6.1.2
 
1
+
2
  flask
 
 
 
 
 
 
 
 
3
  flask_login
4
  flask_sqlalchemy
5
  flask_wtf
 
29
  # Vector DB
30
  qdrant-client==1.14.3
31
 
32
+ # PDF & DOCX parsing (removed; resume parsing is no longer supported)
 
 
33
 
34
  # Audio processing
35
  ffmpeg-python==0.2.0
 
37
  evaluate==0.4.5
38
  accelerate==0.29.3
39
  huggingface_hub==0.20.3
40
+ # textract removed; no resume parsing
41
  bitsandbytes
42
  faster-whisper==0.10.0
43
  edge-tts==6.1.2