Dhahlan2000 commited on
Commit
d4b8475
·
1 Parent(s): 58aa7f1

Refactor app.py to implement CV upload and email generation features using Hugging Face's Gemma model. Removed unused job search functions and integrated PDF parsing for CVs. Updated requirements.txt to include PyPDF2 for PDF handling.

Browse files
Files changed (3) hide show
  1. .gitignore +2 -0
  2. app.py +97 -187
  3. requirements.txt +2 -2
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ .env
2
+ .venv
app.py CHANGED
@@ -1,200 +1,110 @@
1
  import streamlit as st
2
- import requests
3
- import os
 
 
4
  from dotenv import load_dotenv
5
- from langchain.agents import Tool, AgentExecutor, LLMSingleActionAgent, AgentOutputParser
6
- from langchain.schema import AgentAction, AgentFinish, HumanMessage
7
- from langchain.prompts import BaseChatPromptTemplate
8
- from langchain.tools import Tool
9
- from langchain.memory import ConversationBufferWindowMemory
10
- from transformers import pipeline
11
- from typing import List, Union
12
- import re
13
 
14
  # Load environment variables from .env
15
  load_dotenv()
16
 
17
  # Job API keys and endpoints
18
- JOB_API_KEY = os.getenv("JOB_API_KEY") # Add your job API key here if required
19
- JOBS_API_URL = "https://jobs.github.com/positions.json" # Example API endpoint (replace with an actual one)
20
 
21
- # Function to find global job openings
22
- def find_global_jobs():
23
- try:
24
- response = requests.get(JOBS_API_URL)
25
- if response.status_code == 200:
26
- jobs = response.json()
27
- return [
28
- {
29
- "title": job["title"],
30
- "company": job["company"],
31
- "location": job["location"],
32
- "url": job["url"]
33
- } for job in jobs
34
- ]
35
- else:
36
- return {"error": "Unable to fetch job data."}
37
- except Exception as e:
38
- return {"error": str(e)}
39
 
40
- # Function to find remote jobs
41
- def find_remote_jobs():
42
- try:
43
- response = requests.get(f"{JOBS_API_URL}?location=remote")
44
- if response.status_code == 200:
45
- jobs = response.json()
46
- return [
47
- {
48
- "title": job["title"],
49
- "company": job["company"],
50
- "url": job["url"]
51
- } for job in jobs
52
- ]
53
- else:
54
- return {"error": "Unable to fetch remote job data."}
55
- except Exception as e:
56
- return {"error": str(e)}
57
 
58
- # Function to find jobs near a location
59
- def find_jobs_near_location(location):
 
 
60
  try:
61
- response = requests.get(f"{JOBS_API_URL}?location={location}")
62
- if response.status_code == 200:
63
- jobs = response.json()
64
- return [
65
- {
66
- "title": job["title"],
67
- "company": job["company"],
68
- "location": job["location"],
69
- "url": job["url"]
70
- } for job in jobs
71
- ]
72
- else:
73
- return {"error": "Unable to fetch job data for location."}
 
 
 
 
 
 
74
  except Exception as e:
75
- return {"error": str(e)}
76
-
77
- # Define LangChain tools
78
- global_jobs_tool = Tool(
79
- name="Global Job Finder",
80
- func=find_global_jobs,
81
- description="Find all job openings around the world."
82
- )
83
-
84
- remote_jobs_tool = Tool(
85
- name="Remote Job Finder",
86
- func=find_remote_jobs,
87
- description="Find remote job openings."
88
- )
89
-
90
- local_jobs_tool = Tool(
91
- name="Local Job Finder",
92
- func=find_jobs_near_location,
93
- description="Find job openings near a specified location. Input should be a city or region name."
94
- )
95
-
96
- # Set up the tools
97
- tools = [
98
- global_jobs_tool,
99
- remote_jobs_tool,
100
- local_jobs_tool
101
- ]
102
-
103
- # Set up a prompt template with history
104
- template_with_history = """You are JobSearchGPT, an AI assistant specialized in finding job openings. Answer the following questions as best you can. You have access to the following tools:
105
-
106
- {tools}
107
-
108
- Use the following format:
109
-
110
- Question: the input question you must answer
111
- Thought: you should always think about what to do
112
- Action: the action to take, should be one of [{tool_names}]
113
- Action Input: the input to the action
114
- Observation: the result of the action
115
- ... (this Thought/Action/Action Input/Observation can repeat N times)
116
- Thought: I now know the final answer
117
- Final Answer: the final answer to the original input question
118
-
119
- Begin! Remember to give detailed, informative answers
120
-
121
- Previous conversation history:
122
- {history}
123
-
124
- New question: {input}
125
- {agent_scratchpad}"""
126
-
127
- # Set up the prompt template
128
- class CustomPromptTemplate(BaseChatPromptTemplate):
129
- template: str
130
- tools: List[Tool]
131
-
132
- def format_messages(self, **kwargs) -> str:
133
- intermediate_steps = kwargs.pop("intermediate_steps")
134
- thoughts = ""
135
- for action, observation in intermediate_steps:
136
- thoughts += action.log
137
- thoughts += f"\nObservation: {observation}\nThought: "
138
-
139
- kwargs["agent_scratchpad"] = thoughts
140
- kwargs["tools"] = "\n".join([f"{tool.name}: {tool.description}" for tool in self.tools])
141
- kwargs["tool_names"] = ", ".join([tool.name for tool in self.tools])
142
- formatted = self.template.format(**kwargs)
143
- return [HumanMessage(content=formatted)]
144
-
145
- prompt_with_history = CustomPromptTemplate(
146
- template=template_with_history,
147
- tools=tools,
148
- input_variables=["input", "intermediate_steps", "history"]
149
- )
150
-
151
- # Custom output parser
152
- class CustomOutputParser(AgentOutputParser):
153
- def parse(self, llm_output: str) -> Union[AgentAction, AgentFinish]:
154
- if "Final Answer:" in llm_output:
155
- return AgentFinish(
156
- return_values={"output": llm_output.split("Final Answer:")[-1].strip()},
157
- log=llm_output,
158
  )
159
- regex = r"Action: (.*?)[\n]*Action Input:[\s]*(.*)"
160
- match = re.search(regex, llm_output, re.DOTALL)
161
- if not match:
162
- raise ValueError(f"Could not parse LLM output: `{llm_output}`")
163
- action = match.group(1).strip()
164
- action_input = match.group(2)
165
- return AgentAction(tool=action, tool_input=action_input.strip(" ").strip('"'), log=llm_output)
166
-
167
- output_parser = CustomOutputParser()
168
-
169
- # Initialize HuggingFace pipeline
170
- pipe = pipeline("text-generation", model="gpt-neo-2.7B") # Replace with a suitable model
171
-
172
- # LLM chain
173
- llm_chain = LLMChain(llm=pipe, prompt=prompt_with_history)
174
- tool_names = [tool.name for tool in tools]
175
- agent = LLMSingleActionAgent(
176
- llm_chain=llm_chain,
177
- output_parser=output_parser,
178
- stop=["\nObservation:"],
179
- allowed_tools=tool_names
180
- )
181
-
182
- memory = ConversationBufferWindowMemory(k=2)
183
- agent_executor = AgentExecutor.from_agent_and_tools(agent=agent, tools=tools, verbose=True, memory=memory)
184
-
185
- # Streamlit app
186
- st.title("Job Search Helper Agent")
187
-
188
- query = st.text_input("Enter your query:")
189
-
190
- if st.button("Submit"):
191
- if query:
192
- st.write("Debug: User Query ->", query)
193
- with st.spinner("Processing..."):
194
- try:
195
- # Run the agent and get the response
196
- response = agent_executor.run(query) # Correct method is `run()`
197
- st.success("Response:")
198
- st.write(response)
199
- except Exception as e:
200
- st.error(f"An error occurred: {e}")
 
1
  import streamlit as st
2
+ from langchain.chains import ConversationChain
3
+ from langchain.memory import ConversationBufferMemory
4
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
5
+ import PyPDF2
6
  from dotenv import load_dotenv
 
 
 
 
 
 
 
 
7
 
8
  # Load environment variables from .env
9
  load_dotenv()
10
 
11
  # Job API keys and endpoints
12
+ access_token = os.getenv("API_KEY")
 
13
 
14
+ # Streamlit App Title
15
+ st.title("Job Description and CV-Based Email Generator")
16
+ st.write("""
17
+ This app uses Hugging Face's Gemma model to generate a professional email based on a pre-parsed CV and a job description.
18
+ Upload your CV once in the sidebar, and the system will reuse the parsed details for generating emails.
19
+ """)
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
+ # Sidebar for Settings and CV Upload
22
+ st.sidebar.title("Settings and CV Upload")
23
+ access_token = st.sidebar.text_input("Enter your Hugging Face Access Token", type="password")
24
+
25
+ # File Upload for CV in Sidebar
26
+ uploaded_file = st.sidebar.file_uploader("Upload your CV (PDF format):", type=["pdf"])
 
 
 
 
 
 
 
 
 
 
 
27
 
28
+ if "parsed_cv" not in st.session_state:
29
+ st.session_state.parsed_cv = None
30
+
31
+ if uploaded_file is not None:
32
  try:
33
+ # Extract text from PDF
34
+ pdf_reader = PyPDF2.PdfReader(uploaded_file)
35
+ cv_text = "".join([page.extract_text() for page in pdf_reader.pages])
36
+ st.sidebar.success("CV uploaded and text extracted successfully!")
37
+
38
+ # Parse CV details and save to session state
39
+ def parse_cv(cv_text):
40
+ # Basic parsing logic (can be extended for specific details)
41
+ return f"""
42
+ Name: [Extracted Name]
43
+ Contact Information: [Extracted Contact Info]
44
+ Skills: [Extracted Skills]
45
+ Experience: [Extracted Experience]
46
+ Education: [Extracted Education]
47
+ Summary: {cv_text[:500]}... # Truncated summary of the CV
48
+ """
49
+
50
+ st.session_state.parsed_cv = parse_cv(cv_text)
51
+ st.sidebar.success("CV parsed successfully!")
52
  except Exception as e:
53
+ st.sidebar.error(f"Failed to extract text from CV: {e}")
54
+
55
+ if st.session_state.parsed_cv:
56
+ st.sidebar.write("### Parsed CV Details:")
57
+ st.sidebar.text(st.session_state.parsed_cv)
58
+
59
+ # Ensure Access Token is Provided
60
+ if access_token:
61
+ @st.cache_resource
62
+ def initialize_pipeline(access_token):
63
+ tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b-it", token=access_token)
64
+ model = AutoModelForCausalLM.from_pretrained(
65
+ "google/gemma-2b-it",
66
+ torch_dtype="bfloat16",
67
+ token=access_token
68
+ )
69
+ return pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=512)
70
+
71
+ text_gen_pipeline = initialize_pipeline(access_token)
72
+
73
+ @st.cache_resource
74
+ def initialize_chain():
75
+ memory = ConversationBufferMemory()
76
+ return ConversationChain(llm=None, memory=memory) # No LLM; handled by pipeline
77
+
78
+ conversation_chain = initialize_chain()
79
+
80
+ # Input job description
81
+ job_description = st.text_area("Enter the job description:", "")
82
+
83
+ # Display generated email
84
+ if st.button("Generate Email"):
85
+ if st.session_state.parsed_cv and job_description.strip():
86
+ # Prompt for email generation
87
+ prompt = (
88
+ f"Based on the following CV details:\n\n{st.session_state.parsed_cv}\n\n"
89
+ f"And the following job description:\n\n{job_description}\n\n"
90
+ f"Write a professional email expressing interest in the job. "
91
+ f"Make it concise, polite, and tailored to the job."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
  )
93
+
94
+ # Generate email using Hugging Face pipeline
95
+ response = text_gen_pipeline(prompt)[0]['generated_text']
96
+
97
+ # Update memory with job description and response
98
+ conversation_chain.memory.save_context({"job_description": job_description}, {"email": response})
99
+
100
+ # Display response
101
+ st.subheader("Generated Email:")
102
+ st.write(response)
103
+
104
+ # Display conversation history
105
+ st.subheader("History:")
106
+ st.write(conversation_chain.memory.buffer)
107
+ else:
108
+ st.warning("Please upload your CV in the sidebar and enter a job description.")
109
+ else:
110
+ st.warning("Please enter your Hugging Face access token in the sidebar to use the app.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
requirements.txt CHANGED
@@ -1,8 +1,8 @@
1
  # add requirements
2
  streamlit
3
- requests
4
  python-dotenv
5
  langchain
6
  transformers
7
  torch
8
- typing
 
 
1
  # add requirements
2
  streamlit
 
3
  python-dotenv
4
  langchain
5
  transformers
6
  torch
7
+ PyPDF2
8
+