Spaces:

Dhahlan2000
/

AppyJob

Sleeping

App Files Files Community

Dhahlan2000 commited on Jan 6

Commit

7c0b46d

1 Parent(s): 3ce6b08

Refactor app.py to transition from Streamlit to Gradio for a more interactive user interface. Implement CV text extraction from both PDF and DOCX formats, enhancing file upload capabilities. Update email generation process to utilize Hugging Face's model directly, improving response generation. Modify requirements.txt to include 'gradio', 'huggingface_hub', and 'python-docx' for new dependencies.

Browse files

Files changed (2) hide show

app.py +123 -131
requirements.txt +3 -0

app.py CHANGED Viewed

@@ -1,137 +1,129 @@
-import streamlit as st
-from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
-import PyPDF2
-from dotenv import load_dotenv
 import os
-# Load environment variables from .env
-load_dotenv()
-# API Key
-access_token = os.getenv("API_KEY")
-# Streamlit App Title
-st.title("Job Description and CV-Based Email Generator")
-st.write("""
-This app uses Hugging Face's Gemma model to generate a professional email based on a pre-parsed CV and a job description.
-Upload your CV once in the sidebar, and the system will reuse the parsed details for generating emails.
-""")
-# Sidebar for Settings and CV Upload
-st.sidebar.title("Settings and CV Upload")
-# File Upload for CV in Sidebar
-uploaded_file = st.sidebar.file_uploader("Upload your CV (PDF format):", type=["pdf"])
-if "parsed_cv" not in st.session_state:
-    st.session_state.parsed_cv = None
-if "email_history" not in st.session_state:
-    st.session_state.email_history = []
-if uploaded_file is not None:
-    try:
-        # Extract text from PDF
-        pdf_reader = PyPDF2.PdfReader(uploaded_file)
-        cv_text = "".join([page.extract_text() for page in pdf_reader.pages])
-        st.sidebar.success("CV uploaded and text extracted successfully!")
-        # Parse CV details and save to session state
-        def parse_cv(cv_text):
-            return f"""
-            Name: [Extracted Name]
-            Contact Information: [Extracted Contact Info]
-            Skills: [Extracted Skills]
-            Experience: [Extracted Experience]
-            Education: [Extracted Education]
-            Summary: {cv_text[:500]}...  # Truncated summary of the CV
-            """
-        st.session_state.parsed_cv = parse_cv(cv_text)
-        st.sidebar.success("CV parsed successfully!")
-    except Exception as e:
-        st.sidebar.error(f"Failed to extract text from CV: {e}")
-if st.session_state.parsed_cv:
-    st.sidebar.write("### Parsed CV Details:")
-    st.sidebar.text(st.session_state.parsed_cv)
-# Ensure Access Token is Provided
-if access_token:
-    @st.cache_resource
-    def initialize_pipeline(access_token):
-        try:
-            tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b-it", token=access_token)
-            model = AutoModelForCausalLM.from_pretrained(
-                "google/gemma-2b-it",
-                token=access_token,
-            )
-            return pipeline(
-                "text-generation",
-                model=model,
-                tokenizer=tokenizer,
-                max_new_tokens=2048,
-                temperature=0.7,
-                top_p=0.95
-            )
-        except Exception as e:
-            st.error(f"Failed to initialize the model: {str(e)}")
-            return None
-    text_gen_pipeline = initialize_pipeline(access_token)
-    # Input job description
-    job_description = st.text_area("Enter the job description:", "")
-    # Display generated email
-    if st.button("Generate Email"):
-        if st.session_state.parsed_cv and job_description.strip():
-            try:
-                # Improved prompt template
-                prompt = f"""Task: Write a professional job application email.
-CV Summary:
-{st.session_state.parsed_cv}
-Job Description:
-{job_description}
 Instructions: Write a concise and professional email expressing interest in the position.
 Highlight relevant experience and skills from the CV that match the job requirements.
 Keep the tone professional and enthusiastic.
-Email:
-"""
-                # Generate email using the pipeline
-                if text_gen_pipeline:
-                    response = text_gen_pipeline(
-                        prompt,
-                        clean_up_tokenization_spaces=True,
-                        return_full_text=False
-                    )[0]['generated_text']
-                    # Save response in history
-                    st.session_state.email_history.append({
-                        "job_description": job_description,
-                        "email": response
-                    })
-                    # Display response
-                    st.subheader("Generated Email:")
-                    st.write(response)
-                    # Display conversation history
-                    if st.session_state.email_history:
-                        st.subheader("Previous Generations:")
-                        for idx, entry in enumerate(st.session_state.email_history, 1):
-                            st.write(f"### Email {idx}")
-                            st.write(f"**Job Description:** {entry['job_description']}")
-                            st.write(f"**Generated Email:** {entry['email']}")
-                else:
-                    st.error("Text generation pipeline not properly initialized.")
-            except Exception as e:
-                st.error(f"Error generating email: {str(e)}")
-        else:
-            st.warning("Please upload your CV in the sidebar and enter a job description.")
-else:
-    st.warning("Please enter your Hugging Face access token in the sidebar to use the app.")

+import gradio as gr
+from huggingface_hub import InferenceClient
+from transformers import AutoTokenizer, AutoModelForCausalLM
+import torch
 import os
+from PyPDF2 import PdfReader
+import docx
+def extract_cv_text(file):
+    """Extract text from PDF or DOCX CV files."""
+    if file is None:
+        return "No CV uploaded"
+    file_ext = os.path.splitext(file.name)[1].lower()
+    if file_ext == '.pdf':
+        reader = PdfReader(file)
+        text = ""
+        for page in reader.pages:
+            text += page.extract_text()
+        return text
+    elif file_ext == '.docx':
+        doc = docx.Document(file)
+        text = ""
+        for paragraph in doc.paragraphs:
+            text += paragraph.text + "\n"
+        return text
+    else:
+        return "Unsupported file format. Please upload PDF or DOCX files."
+# Replace 'your_huggingface_token' with your actual Hugging Face access token
+access_token = os.getenv('token')
+# Initialize the tokenizer and model with the Hugging Face access token
+tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b-it", use_auth_token=access_token)
+model = AutoModelForCausalLM.from_pretrained(
+    "google/gemma-2b-it",
+    torch_dtype=torch.bfloat16,
+    use_auth_token=access_token
+)
+model.eval()  # Set the model to evaluation mode
+# Initialize the inference client (if needed for other API-based tasks)
+client = InferenceClient(token=access_token)
+def conversation_predict(input_text):
+    """Generate a response for single-turn input using the model."""
+    # Tokenize the input text
+    input_ids = tokenizer("""Job Description:
+{input_text}
 Instructions: Write a concise and professional email expressing interest in the position.
 Highlight relevant experience and skills from the CV that match the job requirements.
 Keep the tone professional and enthusiastic.
+Email:""", return_tensors="pt").input_ids
+    # Generate a response with the model
+    outputs = model.generate(input_ids, max_new_tokens=2048)
+    # Decode and return the generated response
+    return tokenizer.decode(outputs[0], skip_special_tokens=True)
+def respond(
+    message: str,
+    history: list[tuple[str, str]],
+    system_message: str,
+    cv_file,
+    max_tokens: int,
+    temperature: float,
+    top_p: float,
+):
+    """Generate a response for a multi-turn chat conversation."""
+    # Extract CV text and update system message
+    cv_text = extract_cv_text(cv_file) if cv_file else "No CV provided"
+    updated_system_message = f"""Task: Write a professional job application email.
+CV Summary:
+{cv_text}
+{system_message}"""
+    messages = [{"role": "system", "content": updated_system_message}]
+    for user_input, assistant_reply in history:
+        if user_input:
+            messages.append({"role": "user", "content": user_input})
+        if assistant_reply:
+            messages.append({"role": "assistant", "content": assistant_reply})
+    messages.append({"role": "user", "content": message})
+    response = ""
+    for message_chunk in client.chat_completion(
+        messages=messages,
+        max_tokens=max_tokens,
+        stream=True,
+        temperature=temperature,
+        top_p=top_p,
+    ):
+        token = message_chunk["choices"][0]["delta"].get("content", "")
+        response += token
+        yield response
+# Create a Gradio ChatInterface demo
+demo = gr.ChatInterface(
+    fn=respond,
+    additional_inputs=[
+        gr.Textbox(value="Instructions: Write a concise and professional email expressing interest in the position.",
+                   label="System message"),
+        gr.File(label="Upload CV (PDF or DOCX)", file_types=[".pdf", ".docx"]),
+        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
+        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
+        gr.Slider(
+            minimum=0.1,
+            maximum=1.0,
+            value=0.95,
+            step=0.05,
+            label="Top-p (nucleus sampling)",
+        ),
+    ],
+)
+if __name__ == "__main__":
+    demo.launch()

requirements.txt CHANGED Viewed

@@ -5,4 +5,7 @@ langchain
 transformers
 torch
 PyPDF2

 transformers
 torch
 PyPDF2
+gradio
+huggingface_hub
+python-docx