Spaces:

Dhahlan2000
/

AppyJob

Sleeping

App Files Files Community

Dhahlan2000 commited on Jan 6

Commit

2d88e43

1 Parent(s): 7b2b80a

Refactor app.py to simplify CV text extraction and enhance email generation. Removed the parse_cv_sections function, returning the full CV text instead of structured sections. Updated related functions to accommodate this change, improving the overall flow and user experience in the Streamlit interface. This refactor streamlines the process of handling CV uploads and job descriptions, ensuring a more efficient email generation workflow.

Browse files

Files changed (1) hide show

app.py +21 -120

app.py CHANGED Viewed

@@ -8,84 +8,6 @@ import docx
 import re
 from typing import Dict
-def parse_cv_sections(text: str) -> Dict[str, str]:
-    """Parse CV text into structured sections."""
-    sections = {
-        'contact': '',
-        'education': '',
-        'experience': '',
-        'skills': '',
-        'projects': '',
-    }
-    # Common section headers in CVs with more variations
-    section_patterns = {
-        'contact': r'(?i)(contact|personal\s+information|profile|contact\s+details|about\s+me)',
-        'education': r'(?i)(education|academic|qualification|academic\s+background|educational\s+background)',
-        'experience': r'(?i)(experience|work|employment|professional|work\s+history|career|professional\s+experience)',
-        'skills': r'(?i)(skills|technical\s+skills|competencies|expertise|technologies|tools|programming|languages)',
-        'projects': r'(?i)(projects|personal\s+projects|portfolio|work\s+samples)',
-    }
-    # Split text into lines and clean
-    lines = [line.strip() for line in text.split('\n') if line.strip()]
-    current_section = None
-    section_content = []
-    # First pass: identify sections
-    for i, line in enumerate(lines):
-        # Check if line is a section header
-        for section, pattern in section_patterns.items():
-            if re.search(pattern, line, re.IGNORECASE):
-                # If we found a section header
-                if current_section:
-                    # Save previous section content
-                    sections[current_section] = '\n'.join(section_content)
-                current_section = section
-                section_content = []
-                break
-        else:
-            # If line is not a header and we have a current section, add to content
-            if current_section:
-                section_content.append(line)
-    # Save the last section
-    if current_section and section_content:
-        sections[current_section] = '\n'.join(section_content)
-    # If no sections were found, try to categorize the content
-    if all(not content for content in sections.values()):
-        lines_text = '\n'.join(lines)
-        # Look for email addresses and phone numbers for contact
-        email_pattern = r'[\w\.-]+@[\w\.-]+'
-        phone_pattern = r'[\+\d]?(\d{2,3}[-\.\s]?){2}\d{4}'
-        emails = re.findall(email_pattern, lines_text)
-        phones = re.findall(phone_pattern, lines_text)
-        if emails or phones:
-            sections['contact'] = '\n'.join(emails + phones)
-        # Look for education keywords
-        edu_keywords = r'(?i)(university|college|school|degree|bachelor|master|phd|diploma)'
-        edu_lines = [l for l in lines if re.search(edu_keywords, l)]
-        if edu_lines:
-            sections['education'] = '\n'.join(edu_lines)
-        # Look for experience keywords
-        exp_keywords = r'(?i)(worked|developer|engineer|manager|consultant|analyst)'
-        exp_lines = [l for l in lines if re.search(exp_keywords, l)]
-        if exp_lines:
-            sections['experience'] = '\n'.join(exp_lines)
-        # Look for skills
-        skill_keywords = r'(?i)(python|java|javascript|react|node|sql|aws|docker|kubernetes|git)'
-        skill_lines = [l for l in lines if re.search(skill_keywords, l)]
-        if skill_lines:
-            sections['skills'] = '\n'.join(skill_lines)
-    return sections
 def extract_cv_text(file):
     """Extract text from PDF or DOCX CV files."""
     if file is None:
@@ -98,28 +20,16 @@ def extract_cv_text(file):
         if file_ext == '.pdf':
             reader = PdfReader(file)
             for page in reader.pages:
-                text += page.extract_text() + "\n"
         elif file_ext == '.docx':
             doc = docx.Document(file)
             for paragraph in doc.paragraphs:
-                text += paragraph.text + "\n"
-            # Also check tables in docx
-            for table in doc.tables:
-                for row in table.rows:
-                    for cell in row.cells:
-                        text += cell.text + "\n"
         else:
             return "Unsupported file format. Please upload PDF or DOCX files."
-        # Parse the CV into sections
-        sections = parse_cv_sections(text)
-        # Verify that we have content
-        if all(not content.strip() for content in sections.values()):
-            return f"Could not parse CV sections. Raw text:\n{text}"
-        return sections
     except Exception as e:
         return f"Error processing file: {str(e)}"
@@ -130,20 +40,13 @@ access_token = os.getenv('API_KEY')
 # Initialize the inference client (if needed for other API-based tasks)
 client = InferenceClient(token=access_token)
-def create_email_prompt(job_description: str, cv_sections: Dict[str, str]) -> str:
     """Create a detailed prompt for email generation."""
     return f"""Job Description:
 {job_description}
 Your CV Details:
-Experience:
-{cv_sections['experience']}
-Skills:
-{cv_sections['skills']}
-Education:
-{cv_sections['education']}
 Instructions: Write a professional job application email following these guidelines:
 1. Start with a proper greeting
@@ -157,9 +60,9 @@ Keep the tone professional, confident, and enthusiastic. Be concise but impactfu
 Email:"""
-def conversation_predict(input_text: str, cv_sections: Dict[str, str]):
     """Generate a response using the model with streaming output."""
-    prompt = create_email_prompt(input_text, cv_sections)
     # Use the streaming API
     try:
@@ -223,18 +126,18 @@ CV Summary:
 # Streamlit UI section
 st.title("AI Job Application Email Generator")
-def update_ui(message, cv_file, cv_sections):
     """Handle the UI updates for email generation."""
     # Create placeholder for the generated email
     email_placeholder = st.empty()
     # Generate button
     if st.button("Generate Email", key="generate_button"):
-        if message and cv_file and isinstance(cv_sections, dict):
             email_text = ""
             # Stream the response
             try:
-                for chunk in conversation_predict(message, cv_sections):
                     if chunk:
                         email_text += chunk
                         # Update the text area with each chunk, using timestamp in key
@@ -256,27 +159,25 @@ with tab1:
     cv_file = st.file_uploader("Upload CV (PDF or DOCX)", type=["pdf", "docx"])
     if cv_file:
-        cv_sections = extract_cv_text(cv_file)
-        if isinstance(cv_sections, dict):
-            st.success("CV uploaded and parsed successfully!")
         else:
-            st.error(cv_sections)
-            cv_sections = None
     else:
-        cv_sections = None
     # Job description input
     st.markdown("### Job Description")
     message = st.text_area("Paste the job description here:", height=200)
     # Call the updated UI function with parameters
-    update_ui(message, cv_file, cv_sections)
 with tab2:
-    if cv_file and isinstance(cv_sections, dict):
-        st.markdown("### Parsed CV Details")
-        for section, content in cv_sections.items():
-            with st.expander(f"{section.title()}"):
-                st.text(content)
     else:
-        st.info("Upload a CV to view parsed details")

 import re
 from typing import Dict
 def extract_cv_text(file):
     """Extract text from PDF or DOCX CV files."""
     if file is None:
         if file_ext == '.pdf':
             reader = PdfReader(file)
             for page in reader.pages:
+                text += page.extract_text()
         elif file_ext == '.docx':
             doc = docx.Document(file)
             for paragraph in doc.paragraphs:
+                text += paragraph.text + '\n'
         else:
             return "Unsupported file format. Please upload PDF or DOCX files."
+        return text  # Return the full text instead of parsed sections
     except Exception as e:
         return f"Error processing file: {str(e)}"
 # Initialize the inference client (if needed for other API-based tasks)
 client = InferenceClient(token=access_token)
+def create_email_prompt(job_description: str, cv_text: str) -> str:
     """Create a detailed prompt for email generation."""
     return f"""Job Description:
 {job_description}
 Your CV Details:
+{cv_text}
 Instructions: Write a professional job application email following these guidelines:
 1. Start with a proper greeting
 Email:"""
+def conversation_predict(input_text: str, cv_text: str):
     """Generate a response using the model with streaming output."""
+    prompt = create_email_prompt(input_text, cv_text)
     # Use the streaming API
     try:
 # Streamlit UI section
 st.title("AI Job Application Email Generator")
+def update_ui(message, cv_file, cv_text):
     """Handle the UI updates for email generation."""
     # Create placeholder for the generated email
     email_placeholder = st.empty()
     # Generate button
     if st.button("Generate Email", key="generate_button"):
+        if message and cv_file and isinstance(cv_text, str) and not cv_text.startswith("Error"):
             email_text = ""
             # Stream the response
             try:
+                for chunk in conversation_predict(message, cv_text):
                     if chunk:
                         email_text += chunk
                         # Update the text area with each chunk, using timestamp in key
     cv_file = st.file_uploader("Upload CV (PDF or DOCX)", type=["pdf", "docx"])
     if cv_file:
+        cv_text = extract_cv_text(cv_file)
+        if isinstance(cv_text, str) and not cv_text.startswith("Error"):
+            st.success("CV uploaded successfully!")
         else:
+            st.error(cv_text)
+            cv_text = None
     else:
+        cv_text = None
     # Job description input
     st.markdown("### Job Description")
     message = st.text_area("Paste the job description here:", height=200)
     # Call the updated UI function with parameters
+    update_ui(message, cv_file, cv_text)
 with tab2:
+    if cv_file and isinstance(cv_text, str) and not cv_text.startswith("Error"):
+        st.markdown("### CV Content")
+        st.text_area("Full CV Text", value=cv_text, height=400)
     else:
+        st.info("Upload a CV to view content")