Spaces:

Dhahlan2000
/

AppyJob

Sleeping

App Files Files Community

Dhahlan2000 commited on Jan 6

Commit

d4b8475

1 Parent(s): 58aa7f1

Refactor app.py to implement CV upload and email generation features using Hugging Face's Gemma model. Removed unused job search functions and integrated PDF parsing for CVs. Updated requirements.txt to include PyPDF2 for PDF handling.

Browse files

Files changed (3) hide show

.gitignore +2 -0
app.py +97 -187
requirements.txt +2 -2

.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ .env
2	+ .venv

app.py CHANGED Viewed

@@ -1,200 +1,110 @@
 import streamlit as st
-import requests
-import os
 from dotenv import load_dotenv
-from langchain.agents import Tool, AgentExecutor, LLMSingleActionAgent, AgentOutputParser
-from langchain.schema import AgentAction, AgentFinish, HumanMessage
-from langchain.prompts import BaseChatPromptTemplate
-from langchain.tools import Tool
-from langchain.memory import ConversationBufferWindowMemory
-from transformers import pipeline
-from typing import List, Union
-import re
 # Load environment variables from .env
 load_dotenv()
 # Job API keys and endpoints
-JOB_API_KEY = os.getenv("JOB_API_KEY")  # Add your job API key here if required
-JOBS_API_URL = "https://jobs.github.com/positions.json"  # Example API endpoint (replace with an actual one)
-# Function to find global job openings
-def find_global_jobs():
-    try:
-        response = requests.get(JOBS_API_URL)
-        if response.status_code == 200:
-            jobs = response.json()
-            return [
-                {
-                    "title": job["title"],
-                    "company": job["company"],
-                    "location": job["location"],
-                    "url": job["url"]
-                } for job in jobs
-            ]
-        else:
-            return {"error": "Unable to fetch job data."}
-    except Exception as e:
-        return {"error": str(e)}
-# Function to find remote jobs
-def find_remote_jobs():
-    try:
-        response = requests.get(f"{JOBS_API_URL}?location=remote")
-        if response.status_code == 200:
-            jobs = response.json()
-            return [
-                {
-                    "title": job["title"],
-                    "company": job["company"],
-                    "url": job["url"]
-                } for job in jobs
-            ]
-        else:
-            return {"error": "Unable to fetch remote job data."}
-    except Exception as e:
-        return {"error": str(e)}
-# Function to find jobs near a location
-def find_jobs_near_location(location):
     try:
-        response = requests.get(f"{JOBS_API_URL}?location={location}")
-        if response.status_code == 200:
-            jobs = response.json()
-            return [
-                {
-                    "title": job["title"],
-                    "company": job["company"],
-                    "location": job["location"],
-                    "url": job["url"]
-                } for job in jobs
-            ]
-        else:
-            return {"error": "Unable to fetch job data for location."}
     except Exception as e:
-        return {"error": str(e)}
-# Define LangChain tools
-global_jobs_tool = Tool(
-    name="Global Job Finder",
-    func=find_global_jobs,
-    description="Find all job openings around the world."
-)
-remote_jobs_tool = Tool(
-    name="Remote Job Finder",
-    func=find_remote_jobs,
-    description="Find remote job openings."
-)
-local_jobs_tool = Tool(
-    name="Local Job Finder",
-    func=find_jobs_near_location,
-    description="Find job openings near a specified location. Input should be a city or region name."
-)
-# Set up the tools
-tools = [
-    global_jobs_tool,
-    remote_jobs_tool,
-    local_jobs_tool
-]
-# Set up a prompt template with history
-template_with_history = """You are JobSearchGPT, an AI assistant specialized in finding job openings. Answer the following questions as best you can. You have access to the following tools:
-{tools}
-Use the following format:
-Question: the input question you must answer
-Thought: you should always think about what to do
-Action: the action to take, should be one of [{tool_names}]
-Action Input: the input to the action
-Observation: the result of the action
-... (this Thought/Action/Action Input/Observation can repeat N times)
-Thought: I now know the final answer
-Final Answer: the final answer to the original input question
-Begin! Remember to give detailed, informative answers
-Previous conversation history:
-{history}
-New question: {input}
-{agent_scratchpad}"""
-# Set up the prompt template
-class CustomPromptTemplate(BaseChatPromptTemplate):
-    template: str
-    tools: List[Tool]
-    def format_messages(self, **kwargs) -> str:
-        intermediate_steps = kwargs.pop("intermediate_steps")
-        thoughts = ""
-        for action, observation in intermediate_steps:
-            thoughts += action.log
-            thoughts += f"\nObservation: {observation}\nThought: "
-        kwargs["agent_scratchpad"] = thoughts
-        kwargs["tools"] = "\n".join([f"{tool.name}: {tool.description}" for tool in self.tools])
-        kwargs["tool_names"] = ", ".join([tool.name for tool in self.tools])
-        formatted = self.template.format(**kwargs)
-        return [HumanMessage(content=formatted)]
-prompt_with_history = CustomPromptTemplate(
-    template=template_with_history,
-    tools=tools,
-    input_variables=["input", "intermediate_steps", "history"]
-)
-# Custom output parser
-class CustomOutputParser(AgentOutputParser):
-    def parse(self, llm_output: str) -> Union[AgentAction, AgentFinish]:
-        if "Final Answer:" in llm_output:
-            return AgentFinish(
-                return_values={"output": llm_output.split("Final Answer:")[-1].strip()},
-                log=llm_output,
             )
-        regex = r"Action: (.*?)[\n]*Action Input:[\s]*(.*)"
-        match = re.search(regex, llm_output, re.DOTALL)
-        if not match:
-            raise ValueError(f"Could not parse LLM output: `{llm_output}`")
-        action = match.group(1).strip()
-        action_input = match.group(2)
-        return AgentAction(tool=action, tool_input=action_input.strip(" ").strip('"'), log=llm_output)
-output_parser = CustomOutputParser()
-# Initialize HuggingFace pipeline
-pipe = pipeline("text-generation", model="gpt-neo-2.7B")  # Replace with a suitable model
-# LLM chain
-llm_chain = LLMChain(llm=pipe, prompt=prompt_with_history)
-tool_names = [tool.name for tool in tools]
-agent = LLMSingleActionAgent(
-    llm_chain=llm_chain,
-    output_parser=output_parser,
-    stop=["\nObservation:"],
-    allowed_tools=tool_names
-)
-memory = ConversationBufferWindowMemory(k=2)
-agent_executor = AgentExecutor.from_agent_and_tools(agent=agent, tools=tools, verbose=True, memory=memory)
-# Streamlit app
-st.title("Job Search Helper Agent")
-query = st.text_input("Enter your query:")
-if st.button("Submit"):
-    if query:
-        st.write("Debug: User Query ->", query)
-        with st.spinner("Processing..."):
-            try:
-                # Run the agent and get the response
-                response = agent_executor.run(query)  # Correct method is `run()`
-                st.success("Response:")
-                st.write(response)
-            except Exception as e:
-                st.error(f"An error occurred: {e}")

 import streamlit as st
+from langchain.chains import ConversationChain
+from langchain.memory import ConversationBufferMemory
+from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
+import PyPDF2
 from dotenv import load_dotenv
 # Load environment variables from .env
 load_dotenv()
 # Job API keys and endpoints
+access_token = os.getenv("API_KEY")
+# Streamlit App Title
+st.title("Job Description and CV-Based Email Generator")
+st.write("""
+This app uses Hugging Face's Gemma model to generate a professional email based on a pre-parsed CV and a job description.
+Upload your CV once in the sidebar, and the system will reuse the parsed details for generating emails.
+""")
+# Sidebar for Settings and CV Upload
+st.sidebar.title("Settings and CV Upload")
+access_token = st.sidebar.text_input("Enter your Hugging Face Access Token", type="password")
+# File Upload for CV in Sidebar
+uploaded_file = st.sidebar.file_uploader("Upload your CV (PDF format):", type=["pdf"])
+if "parsed_cv" not in st.session_state:
+    st.session_state.parsed_cv = None
+if uploaded_file is not None:
     try:
+        # Extract text from PDF
+        pdf_reader = PyPDF2.PdfReader(uploaded_file)
+        cv_text = "".join([page.extract_text() for page in pdf_reader.pages])
+        st.sidebar.success("CV uploaded and text extracted successfully!")
+        # Parse CV details and save to session state
+        def parse_cv(cv_text):
+            # Basic parsing logic (can be extended for specific details)
+            return f"""
+            Name: [Extracted Name]
+            Contact Information: [Extracted Contact Info]
+            Skills: [Extracted Skills]
+            Experience: [Extracted Experience]
+            Education: [Extracted Education]
+            Summary: {cv_text[:500]}...  # Truncated summary of the CV
+            """
+        st.session_state.parsed_cv = parse_cv(cv_text)
+        st.sidebar.success("CV parsed successfully!")
     except Exception as e:
+        st.sidebar.error(f"Failed to extract text from CV: {e}")
+if st.session_state.parsed_cv:
+    st.sidebar.write("### Parsed CV Details:")
+    st.sidebar.text(st.session_state.parsed_cv)
+# Ensure Access Token is Provided
+if access_token:
+    @st.cache_resource
+    def initialize_pipeline(access_token):
+        tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b-it", token=access_token)
+        model = AutoModelForCausalLM.from_pretrained(
+            "google/gemma-2b-it",
+            torch_dtype="bfloat16",
+            token=access_token
+        )
+        return pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=512)
+    text_gen_pipeline = initialize_pipeline(access_token)
+    @st.cache_resource
+    def initialize_chain():
+        memory = ConversationBufferMemory()
+        return ConversationChain(llm=None, memory=memory)  # No LLM; handled by pipeline
+    conversation_chain = initialize_chain()
+    # Input job description
+    job_description = st.text_area("Enter the job description:", "")
+    # Display generated email
+    if st.button("Generate Email"):
+        if st.session_state.parsed_cv and job_description.strip():
+            # Prompt for email generation
+            prompt = (
+                f"Based on the following CV details:\n\n{st.session_state.parsed_cv}\n\n"
+                f"And the following job description:\n\n{job_description}\n\n"
+                f"Write a professional email expressing interest in the job. "
+                f"Make it concise, polite, and tailored to the job."
             )
+            # Generate email using Hugging Face pipeline
+            response = text_gen_pipeline(prompt)[0]['generated_text']
+            # Update memory with job description and response
+            conversation_chain.memory.save_context({"job_description": job_description}, {"email": response})
+            # Display response
+            st.subheader("Generated Email:")
+            st.write(response)
+            # Display conversation history
+            st.subheader("History:")
+            st.write(conversation_chain.memory.buffer)
+        else:
+            st.warning("Please upload your CV in the sidebar and enter a job description.")
+else:
+    st.warning("Please enter your Hugging Face access token in the sidebar to use the app.")

requirements.txt CHANGED Viewed

@@ -1,8 +1,8 @@
 # add requirements
 streamlit
-requests
 python-dotenv
 langchain
 transformers
 torch
-typing

 # add requirements
 streamlit
 python-dotenv
 langchain
 transformers
 torch
+PyPDF2