Spaces:

husseinelsaadi
/

Codingo

Paused

App Files Files Community

husseinelsaadi commited on Jul 27

Commit

2445440

1 Parent(s): 329b59a

resume parser removed

Browse files

Files changed (8) hide show

README +0 -0
app.py +46 -43
backend/models/resume_parser/resume_to_features.py +0 -251
backend/routes/auth.py +27 -10
backend/services/interview_engine.py +37 -7
backend/templates/apply.html +1 -1
backend/utils/luna_phase1.py +0 -0
requirements.txt +3 -12

README DELETED Viewed

File without changes

app.py CHANGED Viewed

@@ -26,10 +26,8 @@ sys.path.append(current_dir)
 # Import and initialize DB
 from backend.models.database import db, Job, Application, init_db
 from backend.models.user import User
-from backend.routes.auth import auth_bp
 from backend.routes.interview_api import interview_api
-from backend.models.resume_parser.resume_to_features import extract_resume_features
 # Initialize Flask app
 app = Flask(
     __name__,
@@ -88,31 +86,6 @@ def load_user(user_id):
 app.register_blueprint(auth_bp)
 app.register_blueprint(interview_api, url_prefix="/api")
-def handle_resume_upload(file):
-    """Save uploaded file temporarily, extract features, then clean up."""
-    if not file or file.filename == '':
-        return None, "No file uploaded", None
-    try:
-        filename = secure_filename(file.filename)
-        temp_dir = '/tmp/temp'  # Use /tmp for temporary files
-        os.makedirs(temp_dir, exist_ok=True)
-        filepath = os.path.join(temp_dir, filename)
-        file.save(filepath)
-        features = extract_resume_features(filepath)
-        # Clean up
-        try:
-            os.remove(filepath)
-        except:
-            pass
-        return features, None, filename
-    except Exception as e:
-        print(f"Error in handle_resume_upload: {e}")
-        return None, str(e), None
 # Routes (keep your existing routes)
 @app.route('/')
 def index():
@@ -132,29 +105,43 @@ def job_detail(job_id):
 @login_required
 def apply(job_id):
     job = Job.query.get_or_404(job_id)
     if request.method == 'POST':
         file = request.files.get('resume')
-        features, error, _ = handle_resume_upload(file)
-        if error or not features:
-            flash("Resume parsing failed. Please try again.", "danger")
             return render_template('apply.html', job=job)
         application = Application(
             job_id=job_id,
             user_id=current_user.id,
             name=current_user.username,
             email=current_user.email,
             extracted_features=json.dumps(features)
         )
         db.session.add(application)
         db.session.commit()
         flash('Your application has been submitted successfully!', 'success')
         return redirect(url_for('jobs'))
     return render_template('apply.html', job=job)
 @app.route('/my_applications')
@@ -168,14 +155,30 @@ def my_applications():
 @app.route('/parse_resume', methods=['POST'])
 def parse_resume():
     file = request.files.get('resume')
-    features, error, _ = handle_resume_upload(file)
     if error:
-        return {"error": "Error parsing resume. Please try again."}, 400
     if not features:
-        return {"error": "Failed to extract resume details."}, 400
     response = {
         "name": features.get('name', ''),
         "email": features.get('email', ''),

 # Import and initialize DB
 from backend.models.database import db, Job, Application, init_db
 from backend.models.user import User
+from backend.routes.auth import auth_bp, handle_resume_upload
 from backend.routes.interview_api import interview_api
 # Initialize Flask app
 app = Flask(
     __name__,
 app.register_blueprint(auth_bp)
 app.register_blueprint(interview_api, url_prefix="/api")
 # Routes (keep your existing routes)
 @app.route('/')
 def index():
 @login_required
 def apply(job_id):
     job = Job.query.get_or_404(job_id)
     if request.method == 'POST':
+        # Retrieve the uploaded resume file from the request.  The ``name``
+        # attribute in the HTML form is ``resume``.
         file = request.files.get('resume')
+        # Use our safe upload helper to store the resume and obtain an empty
+        # features dictionary.  ``filepath`` contains the location where the
+        # file was saved, allowing us to persist a reference in the database.
+        features, error, filepath = handle_resume_upload(file)
+        # If there was an error saving the resume, notify the user.  We no
+        # longer attempt to parse the resume contents, so an empty
+        # features dictionary is considered valid.
+        if error:
+            flash("Resume upload failed. Please try again.", "danger")
             return render_template('apply.html', job=job)
+        # Ensure features is a dictionary for JSON serialization.  An empty
+        # dictionary results in a non-empty JSON string ("{}"), which is
+        # truthy and enables the interview feature on the applications page.
+        if not features:
+            features = {}
         application = Application(
             job_id=job_id,
             user_id=current_user.id,
             name=current_user.username,
             email=current_user.email,
+            resume_path=filepath,
             extracted_features=json.dumps(features)
         )
         db.session.add(application)
         db.session.commit()
         flash('Your application has been submitted successfully!', 'success')
         return redirect(url_for('jobs'))
     return render_template('apply.html', job=job)
 @app.route('/my_applications')
 @app.route('/parse_resume', methods=['POST'])
 def parse_resume():
     file = request.files.get('resume')
+    features, error, filepath = handle_resume_upload(file)
+    # If the upload failed, return an error.  Parsing is no longer
+    # supported, so we do not attempt to inspect the resume contents.
     if error:
+        return {"error": "Error processing resume. Please try again."}, 400
+    # If no features were extracted (the normal case now), respond with
+    # empty fields rather than an error.  This preserves the API
+    # contract expected by any front‑end code that might call this
+    # endpoint.
     if not features:
+        return {
+            "name": "",
+            "email": "",
+            "mobile_number": "",
+            "skills": [],
+            "experience": [],
+            "education": [],
+            "summary": ""
+        }, 200
+    # Should features contain values (unlikely in the new implementation),
+    # pass them through to the client.
     response = {
         "name": features.get('name', ''),
         "email": features.get('email', ''),

backend/models/resume_parser/resume_to_features.py DELETED Viewed

@@ -1,251 +0,0 @@
-import os
-import re
-import json
-from pathlib import Path
-import PyPDF2
-from docx import Document
-import textract
-class SimpleResumeParser:
-    def __init__(self):
-        # Common skills keywords
-        self.skills_keywords = [
-            'python', 'javascript', 'java', 'c++', 'c#', 'php', 'ruby', 'go', 'rust',
-            'html', 'css', 'react', 'angular', 'vue', 'node.js', 'express', 'django',
-            'flask', 'spring', 'laravel', 'rails', 'asp.net', 'jquery', 'bootstrap',
-            'sql', 'mysql', 'postgresql', 'mongodb', 'redis', 'elasticsearch',
-            'aws', 'azure', 'gcp', 'docker', 'kubernetes', 'jenkins', 'git', 'github',
-            'machine learning', 'deep learning', 'tensorflow', 'pytorch', 'scikit-learn',
-            'data analysis', 'pandas', 'numpy', 'matplotlib', 'tableau', 'power bi',
-            'agile', 'scrum', 'devops', 'ci/cd', 'microservices', 'api', 'rest', 'graphql'
-        ]
-        # Education keywords
-        self.education_keywords = [
-            'bachelor', 'master', 'phd', 'degree', 'university', 'college', 'institute',
-            'computer science', 'engineering', 'mathematics', 'physics', 'chemistry',
-            'business', 'mba', 'certification', 'diploma'
-        ]
-        # Experience keywords
-        self.experience_keywords = [
-            'experience', 'worked', 'developed', 'managed', 'led', 'created', 'built',
-            'designed', 'implemented', 'maintained', 'optimized', 'improved', 'years'
-        ]
-    def extract_text_from_pdf(self, file_path):
-        """Extract text from PDF file"""
-        try:
-            with open(file_path, 'rb') as file:
-                reader = PyPDF2.PdfReader(file)
-                text = ""
-                for page in reader.pages:
-                    text += page.extract_text() + "\n"
-                return text
-        except Exception as e:
-            print(f"Error reading PDF: {e}")
-            return ""
-    def extract_text_from_docx(self, file_path):
-        """Extract text from DOCX file"""
-        try:
-            doc = Document(file_path)
-            text = ""
-            for paragraph in doc.paragraphs:
-                text += paragraph.text + "\n"
-            return text
-        except Exception as e:
-            print(f"Error reading DOCX: {e}")
-            return ""
-    def extract_text_from_doc(self, file_path):
-        """Extract text from DOC file using textract"""
-        try:
-            text = textract.process(file_path).decode('utf-8')
-            return text
-        except Exception as e:
-            print(f"Error reading DOC: {e}")
-            return ""
-    def extract_text(self, file_path):
-        """Extract text based on file extension"""
-        file_extension = Path(file_path).suffix.lower()
-        if file_extension == '.pdf':
-            return self.extract_text_from_pdf(file_path)
-        elif file_extension == '.docx':
-            return self.extract_text_from_docx(file_path)
-        elif file_extension == '.doc':
-            return self.extract_text_from_doc(file_path)
-        else:
-            return ""
-    def extract_email(self, text):
-        """Extract email addresses from text"""
-        email_pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b'
-        emails = re.findall(email_pattern, text)
-        return emails[0] if emails else ""
-    def extract_phone(self, text):
-        """Extract phone numbers from text"""
-        phone_patterns = [
-            r'\+?1?[-.\s]?$$?([0-9]{3})$$?[-.\s]?([0-9]{3})[-.\s]?([0-9]{4})',
-            r'\+?([0-9]{1,3})[-.\s]?([0-9]{3,4})[-.\s]?([0-9]{3,4})[-.\s]?([0-9]{3,4})',
-            r'(\d{3}[-.\s]?\d{3}[-.\s]?\d{4})',
-            r'($$\d{3}$$\s?\d{3}[-.\s]?\d{4})'
-        ]
-        for pattern in phone_patterns:
-            matches = re.findall(pattern, text)
-            if matches:
-                if isinstance(matches[0], tuple):
-                    return ''.join(matches[0])
-                return matches[0]
-        return ""
-    def extract_name(self, text):
-        """Extract name from text (simple heuristic)"""
-        lines = text.split('\n')
-        for line in lines[:5]:  # Check first 5 lines
-            line = line.strip()
-            if len(line.split()) == 2 and line.replace(' ', '').isalpha():
-                # Simple check: two words, all alphabetic
-                if not any(keyword in line.lower() for keyword in ['resume', 'cv', 'curriculum']):
-                    return line.title()
-        return ""
-    def extract_skills(self, text):
-        """Extract skills from text"""
-        text_lower = text.lower()
-        found_skills = []
-        for skill in self.skills_keywords:
-            if skill.lower() in text_lower:
-                found_skills.append(skill.title())
-        # Remove duplicates and return
-        return list(set(found_skills))
-    def extract_education(self, text):
-        """Extract education information"""
-        text_lower = text.lower()
-        education = []
-        # Look for education section
-        education_section = ""
-        lines = text.split('\n')
-        in_education_section = False
-        for line in lines:
-            line_lower = line.lower()
-            if any(keyword in line_lower for keyword in ['education', 'academic', 'qualification']):
-                in_education_section = True
-                continue
-            elif in_education_section and any(keyword in line_lower for keyword in ['experience', 'work', 'employment', 'project']):
-                break
-            elif in_education_section:
-                education_section += line + " "
-        # Extract degrees and institutions
-        for keyword in self.education_keywords:
-            if keyword in text_lower:
-                # Find context around the keyword
-                pattern = rf'.{{0,50}}{re.escape(keyword)}.{{0,50}}'
-                matches = re.findall(pattern, text, re.IGNORECASE)
-                education.extend(matches)
-        return education[:3]  # Return top 3 education entries
-    def extract_experience(self, text):
-        """Extract work experience"""
-        experience = []
-        lines = text.split('\n')
-        # Look for experience section
-        in_experience_section = False
-        current_experience = ""
-        for line in lines:
-            line_lower = line.lower()
-            if any(keyword in line_lower for keyword in ['experience', 'work', 'employment', 'career']):
-                in_experience_section = True
-                continue
-            elif in_experience_section and any(keyword in line_lower for keyword in ['education', 'skill', 'project']):
-                if current_experience:
-                    experience.append(current_experience.strip())
-                break
-            elif in_experience_section:
-                if line.strip():
-                    current_experience += line + " "
-                elif current_experience:
-                    experience.append(current_experience.strip())
-                    current_experience = ""
-        if current_experience:
-            experience.append(current_experience.strip())
-        return experience[:3]  # Return top 3 experience entries
-    def extract_summary(self, text):
-        """Extract summary/objective"""
-        lines = text.split('\n')
-        summary = ""
-        for i, line in enumerate(lines):
-            line_lower = line.lower()
-            if any(keyword in line_lower for keyword in ['summary', 'objective', 'profile', 'about']):
-                # Get next few lines as summary
-                summary_lines = lines[i+1:i+4]
-                summary = ' '.join([l.strip() for l in summary_lines if l.strip()])
-                break
-        return summary[:200]  # Limit to 200 characters
-def extract_resume_features(file_path):
-    """
-    Main function to extract features from resume
-    Returns a dictionary with extracted information
-    """
-    try:
-        parser = SimpleResumeParser()
-        text = parser.extract_text(file_path)
-        if not text:
-            return {
-                'name': '',
-                'email': '',
-                'mobile_number': '',
-                'skills': [],
-                'experience': [],
-                'education': [],
-                'summary': ''
-            }
-        # Extract all features
-        features = {
-            'name': parser.extract_name(text),
-            'email': parser.extract_email(text),
-            'mobile_number': parser.extract_phone(text),
-            'skills': parser.extract_skills(text),
-            'experience': parser.extract_experience(text),
-            'education': parser.extract_education(text),
-            'summary': parser.extract_summary(text)
-        }
-        return features
-    except Exception as e:
-        print(f"Error extracting resume features: {e}")
-        return {
-            'name': '',
-            'email': '',
-            'mobile_number': '',
-            'skills': [],
-            'experience': [],
-            'education': [],
-            'summary': ''
-        }
-# For backward compatibility
-def parse_resume(file_path):
-    return extract_resume_features(file_path)

backend/routes/auth.py CHANGED Viewed

@@ -14,25 +14,42 @@ auth_bp = Blueprint('auth', __name__)
 def handle_resume_upload(file):
     """
-    Save uploaded file temporarily, extract features, then clean up.
-    Returns (features_dict, error_message, filename)
     """
     if not file or file.filename == '':
         return None, "No file uploaded", None
     try:
-        # Get current directory for temp folder
-        current_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
         filename = secure_filename(file.filename)
-        filepath = os.path.join(current_dir, 'temp', filename)
-        os.makedirs(os.path.dirname(filepath), exist_ok=True)
-        file.save(filepath)
-        features = extract_resume_features(filepath)
-        os.remove(filepath)  # Clean up after parsing
-        return features, None, filename
     except Exception as e:
         print(f"Error in handle_resume_upload: {e}")
         return None, str(e), None

 def handle_resume_upload(file):
     """
+    Handle a resume upload by saving the file to a temporary location and
+    returning an empty feature dictionary.  This function no longer attempts
+    to parse the resume contents; it simply stores the file so that it can
+    be referenced later (e.g. for downloading or inspection) and returns
+    a placeholder features object.  A tuple of (features_dict, error_message,
+    filepath) is returned.  If an error occurs, ``features_dict`` will be
+    ``None`` and ``error_message`` will contain a description of the error.
     """
+    # Validate that a file was provided
     if not file or file.filename == '':
         return None, "No file uploaded", None
     try:
+        # Generate a safe filename and determine the target directory.  Use
+        # ``/tmp/resumes`` so that the directory is writable within Hugging
+        # Face Spaces.  Creating the directory with ``exist_ok=True`` ensures
+        # that it is available without raising an error if it already exists.
         filename = secure_filename(file.filename)
+        temp_dir = os.path.join('/tmp', 'resumes')
+        os.makedirs(temp_dir, exist_ok=True)
+        filepath = os.path.join(temp_dir, filename)
+        # Save the uploaded file to the temporary location.  If this fails
+        # (e.g. due to permissions issues) the exception will be caught below.
+        file.save(filepath)
+        # Resume parsing has been removed from this project.  To maintain
+        # compatibility with the rest of the application, return an empty
+        # dictionary of features.  Downstream code will interpret an empty
+        # dictionary as "no extracted features", which still allows the
+        # interview flow to proceed.
+        features = {}
+        return features, None, filepath
     except Exception as e:
+        # Log the error and propagate it to the caller.  Returning None for
+        # ``features`` signals that the upload failed.
         print(f"Error in handle_resume_upload: {e}")
         return None, str(e), None

backend/services/interview_engine.py CHANGED Viewed

@@ -11,13 +11,43 @@ import torch
 # Initialize models
 chat_groq_api = os.getenv("GROQ_API_KEY")
-if not chat_groq_api:
-    raise ValueError("GROQ_API_KEY is not set in environment variables.")
-groq_llm = ChatGroq(
-    temperature=0.7,
-    model_name="llama-3.3-70b-versatile",
-    api_key=chat_groq_api
-)
 # Initialize Whisper model
 #

 # Initialize models
 chat_groq_api = os.getenv("GROQ_API_KEY")
+# Attempt to initialize the Groq LLM only if an API key is provided.  When
+# running in environments where the key is unavailable (such as local
+# development or automated testing), fall back to a simple stub that
+# generates generic responses.  This avoids raising an exception at import
+# time and allows the rest of the application to run without external
+# dependencies.  See the DummyGroq class defined below.
+if chat_groq_api:
+    try:
+        groq_llm = ChatGroq(
+            temperature=0.7,
+            model_name="llama-3.3-70b-versatile",
+            api_key=chat_groq_api
+        )
+    except Exception as e:
+        logging.error(f"Error initializing ChatGroq LLM: {e}. Falling back to dummy model.")
+        groq_llm = None
+else:
+    groq_llm = None
+if groq_llm is None:
+    class DummyGroq:
+        """A fallback language model used when no Groq API key is set.
+        The ``invoke`` method of this class returns a simple canned response
+        rather than calling an external API.  This ensures that the
+        interview functionality still produces a sensible prompt, albeit
+        without advanced LLM behaviour.
+        """
+        def invoke(self, prompt: str):
+            # Provide a very generic question based on the prompt.  This
+            # implementation ignores the prompt contents entirely; in a more
+            # sophisticated fallback you could parse ``prompt`` to tailor
+            # responses.
+            return "Tell me about yourself and why you're interested in this position."
+    groq_llm = DummyGroq()
 # Initialize Whisper model
 #

backend/templates/apply.html CHANGED Viewed

@@ -25,7 +25,7 @@
     <div class="card">
         <div class="card-header">
             <h2>Submit Your Application</h2>
-            <p>Please upload your resume (PDF, DOCX). Our system will extract your info automatically.</p>
         </div>
         <div class="card-body">

     <div class="card">
         <div class="card-header">
             <h2>Submit Your Application</h2>
+            <p>Please upload your resume (PDF, DOCX). Your file will be saved securely for recruiters to review.</p>
         </div>
         <div class="card-body">

backend/utils/luna_phase1.py DELETED Viewed

File without changes

requirements.txt CHANGED Viewed

@@ -1,12 +1,5 @@
 flask
-scikit-learn
-pandas
-joblib
-PyMuPDF
-python-docx
-spacy>=3.0.0
-nltk
-pyresparser
 flask_login
 flask_sqlalchemy
 flask_wtf
@@ -36,9 +29,7 @@ cohere==5.16.1
 # Vector DB
 qdrant-client==1.14.3
-# PDF & DOCX parsing
-PyPDF2==3.0.1
-python-docx==1.2.0
 # Audio processing
 ffmpeg-python==0.2.0
@@ -46,7 +37,7 @@ inputimeout==1.0.4
 evaluate==0.4.5
 accelerate==0.29.3
 huggingface_hub==0.20.3
-textract==1.6.3
 bitsandbytes
 faster-whisper==0.10.0
 edge-tts==6.1.2

 flask
 flask_login
 flask_sqlalchemy
 flask_wtf
 # Vector DB
 qdrant-client==1.14.3
+# PDF & DOCX parsing (removed; resume parsing is no longer supported)
 # Audio processing
 ffmpeg-python==0.2.0
 evaluate==0.4.5
 accelerate==0.29.3
 huggingface_hub==0.20.3
+# textract removed; no resume parsing
 bitsandbytes
 faster-whisper==0.10.0
 edge-tts==6.1.2