Spaces:

Dhahlan2000
/

AppyJob

Sleeping

App Files Files Community

Dhahlan2000 commited on Jan 6

Commit

23c19e0

1 Parent(s): 3baf333

Enhance app.py with CV parsing functionality and improved email generation. Introduced a new function to parse CV sections from uploaded PDF or DOCX files, allowing structured extraction of contact, education, experience, skills, and projects. Updated the email generation prompt to include detailed guidelines for crafting job application emails based on parsed CV data. Streamlined the Streamlit UI to support CV uploads and display parsed details, improving user interaction and experience.

Browse files

Files changed (1) hide show

app.py +134 -50

app.py CHANGED Viewed

@@ -5,6 +5,47 @@ import torch
 import os
 from PyPDF2 import PdfReader
 import docx
 def extract_cv_text(file):
     """Extract text from PDF or DOCX CV files."""
@@ -12,23 +53,27 @@ def extract_cv_text(file):
         return "No CV uploaded"
     file_ext = os.path.splitext(file.name)[1].lower()
-    if file_ext == '.pdf':
-        reader = PdfReader(file)
-        text = ""
-        for page in reader.pages:
-            text += page.extract_text()
-        return text
-    elif file_ext == '.docx':
-        doc = docx.Document(file)
-        text = ""
-        for paragraph in doc.paragraphs:
-            text += paragraph.text + "\n"
-        return text
-    else:
-        return "Unsupported file format. Please upload PDF or DOCX files."
 # Replace 'your_huggingface_token' with your actual Hugging Face access token
 access_token = os.getenv('API_KEY')
@@ -45,20 +90,48 @@ model.eval()  # Set the model to evaluation mode
 # Initialize the inference client (if needed for other API-based tasks)
 client = InferenceClient(token=access_token)
-def conversation_predict(input_text):
-    """Generate a response for single-turn input using the model."""
-    # Tokenize the input text
-    input_ids = tokenizer(f"""Job Description:
-{input_text}
-Instructions: Write a concise and professional email expressing interest in the position.
-Highlight relevant experience and skills from the CV that match the job requirements.
-Keep the tone professional and enthusiastic.
-Email:""", return_tensors="pt").input_ids
     # Generate a response with the model
-    outputs = model.generate(input_ids, max_new_tokens=2048)
     # Decode and return the generated response
     return tokenizer.decode(outputs[0], skip_special_tokens=True)
@@ -106,30 +179,41 @@ CV Summary:
         response += token
         yield response
-# Streamlit UI
-st.title("Job Application Email Generator")
-# Instructions text area
-system_message = st.text_area("System message",
-                              "Instructions: Write a concise and professional email expressing interest in the position.",
-                              height=150)
-# CV file upload
-cv_file = st.file_uploader("Upload CV (PDF or DOCX)", type=["pdf", "docx"])
-# Sliders for max tokens, temperature, and top-p
-max_tokens = st.slider("Max new tokens", min_value=1, max_value=2048, value=512, step=1)
-temperature = st.slider("Temperature", min_value=0.1, max_value=4.0, value=0.7, step=0.1)
-top_p = st.slider("Top-p (nucleus sampling)", min_value=0.1, max_value=1.0, value=0.95, step=0.05)
-# Input message field
-message = st.text_input("Job Description", "")
-# Button to generate response
-if st.button("Generate Email"):
-    if message:
-        response = conversation_predict(message)
-        st.write("Generated Email:")
-        st.write(response)
     else:
-        st.warning("Please enter a job description.")

 import os
 from PyPDF2 import PdfReader
 import docx
+import re
+from typing import Dict
+def parse_cv_sections(text: str) -> Dict[str, str]:
+    """Parse CV text into structured sections."""
+    sections = {
+        'contact': '',
+        'education': '',
+        'experience': '',
+        'skills': '',
+        'projects': '',
+    }
+    # Common section headers in CVs
+    section_patterns = {
+        'contact': r'(?i)(contact|personal\s+information|profile)',
+        'education': r'(?i)(education|academic|qualification)',
+        'experience': r'(?i)(experience|work|employment|professional)',
+        'skills': r'(?i)(skills|technical skills|competencies)',
+        'projects': r'(?i)(projects|personal projects)',
+    }
+    # Split text into lines
+    lines = text.split('\n')
+    current_section = None
+    for line in lines:
+        line = line.strip()
+        if not line:
+            continue
+        # Check if line is a section header
+        for section, pattern in section_patterns.items():
+            if re.search(pattern, line, re.IGNORECASE):
+                current_section = section
+                break
+        if current_section and line:
+            sections[current_section] += line + '\n'
+    return sections
 def extract_cv_text(file):
     """Extract text from PDF or DOCX CV files."""
         return "No CV uploaded"
     file_ext = os.path.splitext(file.name)[1].lower()
+    text = ""
+    try:
+        if file_ext == '.pdf':
+            reader = PdfReader(file)
+            for page in reader.pages:
+                text += page.extract_text()
+        elif file_ext == '.docx':
+            doc = docx.Document(file)
+            for paragraph in doc.paragraphs:
+                text += paragraph.text + '\n'
+        else:
+            return "Unsupported file format. Please upload PDF or DOCX files."
+        # Parse the CV into sections
+        sections = parse_cv_sections(text)
+        return sections
+    except Exception as e:
+        return f"Error processing file: {str(e)}"
 # Replace 'your_huggingface_token' with your actual Hugging Face access token
 access_token = os.getenv('API_KEY')
 # Initialize the inference client (if needed for other API-based tasks)
 client = InferenceClient(token=access_token)
+def create_email_prompt(job_description: str, cv_sections: Dict[str, str]) -> str:
+    """Create a detailed prompt for email generation."""
+    return f"""Job Description:
+{job_description}
+Your CV Details:
+Experience:
+{cv_sections['experience']}
+Skills:
+{cv_sections['skills']}
+Education:
+{cv_sections['education']}
+Instructions: Write a professional job application email following these guidelines:
+1. Start with a proper greeting
+2. First paragraph: Express interest in the position and mention how you found it
+3. Second paragraph: Highlight 2-3 most relevant experiences from your CV that match the job requirements
+4. Third paragraph: Mention specific skills that align with the role
+5. Closing paragraph: Express enthusiasm for an interview and provide contact information
+6. End with a professional closing
+Keep the tone professional, confident, and enthusiastic. Be concise but impactful.
+Email:"""
+def conversation_predict(input_text: str, cv_sections: Dict[str, str]):
+    """Generate a response using the model with improved prompting."""
+    prompt = create_email_prompt(input_text, cv_sections)
+    # Tokenize the input text
+    input_ids = tokenizer(prompt, return_tensors="pt").input_ids
     # Generate a response with the model
+    outputs = model.generate(
+        input_ids,
+        max_new_tokens=2048,
+        temperature=0.7,
+        top_p=0.95,
+        do_sample=True
+    )
     # Decode and return the generated response
     return tokenizer.decode(outputs[0], skip_special_tokens=True)
         response += token
         yield response
+# Streamlit UI section
+st.title("AI Job Application Email Generator")
+# Add tabs for different sections
+tab1, tab2 = st.tabs(["Generate Email", "View CV Details"])
+with tab1:
+    # CV file upload
+    cv_file = st.file_uploader("Upload CV (PDF or DOCX)", type=["pdf", "docx"])
+    if cv_file:
+        cv_sections = extract_cv_text(cv_file)
+        if isinstance(cv_sections, dict):
+            st.success("CV uploaded and parsed successfully!")
+        else:
+            st.error(cv_sections)  # Show error message if parsing failed
+    # Job description input
+    st.markdown("### Job Description")
+    message = st.text_area("Paste the job description here:", height=200)
+    # Generate button
+    if st.button("Generate Email"):
+        if message and cv_file and isinstance(cv_sections, dict):
+            response = conversation_predict(message, cv_sections)
+            st.markdown("### Generated Email:")
+            st.markdown(response)
+        else:
+            st.warning("Please upload a CV and enter a job description.")
+with tab2:
+    if cv_file and isinstance(cv_sections, dict):
+        st.markdown("### Parsed CV Details")
+        for section, content in cv_sections.items():
+            with st.expander(f"{section.title()}"):
+                st.text(content)
     else:
+        st.info("Upload a CV to view parsed details")