Spaces:
Sleeping
Sleeping
Commit
·
d4b8475
1
Parent(s):
58aa7f1
Refactor app.py to implement CV upload and email generation features using Hugging Face's Gemma model. Removed unused job search functions and integrated PDF parsing for CVs. Updated requirements.txt to include PyPDF2 for PDF handling.
Browse files- .gitignore +2 -0
- app.py +97 -187
- requirements.txt +2 -2
.gitignore
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
.env
|
2 |
+
.venv
|
app.py
CHANGED
@@ -1,200 +1,110 @@
|
|
1 |
import streamlit as st
|
2 |
-
import
|
3 |
-
import
|
|
|
|
|
4 |
from dotenv import load_dotenv
|
5 |
-
from langchain.agents import Tool, AgentExecutor, LLMSingleActionAgent, AgentOutputParser
|
6 |
-
from langchain.schema import AgentAction, AgentFinish, HumanMessage
|
7 |
-
from langchain.prompts import BaseChatPromptTemplate
|
8 |
-
from langchain.tools import Tool
|
9 |
-
from langchain.memory import ConversationBufferWindowMemory
|
10 |
-
from transformers import pipeline
|
11 |
-
from typing import List, Union
|
12 |
-
import re
|
13 |
|
14 |
# Load environment variables from .env
|
15 |
load_dotenv()
|
16 |
|
17 |
# Job API keys and endpoints
|
18 |
-
|
19 |
-
JOBS_API_URL = "https://jobs.github.com/positions.json" # Example API endpoint (replace with an actual one)
|
20 |
|
21 |
-
#
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
return [
|
28 |
-
{
|
29 |
-
"title": job["title"],
|
30 |
-
"company": job["company"],
|
31 |
-
"location": job["location"],
|
32 |
-
"url": job["url"]
|
33 |
-
} for job in jobs
|
34 |
-
]
|
35 |
-
else:
|
36 |
-
return {"error": "Unable to fetch job data."}
|
37 |
-
except Exception as e:
|
38 |
-
return {"error": str(e)}
|
39 |
|
40 |
-
#
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
return [
|
47 |
-
{
|
48 |
-
"title": job["title"],
|
49 |
-
"company": job["company"],
|
50 |
-
"url": job["url"]
|
51 |
-
} for job in jobs
|
52 |
-
]
|
53 |
-
else:
|
54 |
-
return {"error": "Unable to fetch remote job data."}
|
55 |
-
except Exception as e:
|
56 |
-
return {"error": str(e)}
|
57 |
|
58 |
-
|
59 |
-
|
|
|
|
|
60 |
try:
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
]
|
72 |
-
|
73 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
74 |
except Exception as e:
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
Observation: the result of the action
|
115 |
-
... (this Thought/Action/Action Input/Observation can repeat N times)
|
116 |
-
Thought: I now know the final answer
|
117 |
-
Final Answer: the final answer to the original input question
|
118 |
-
|
119 |
-
Begin! Remember to give detailed, informative answers
|
120 |
-
|
121 |
-
Previous conversation history:
|
122 |
-
{history}
|
123 |
-
|
124 |
-
New question: {input}
|
125 |
-
{agent_scratchpad}"""
|
126 |
-
|
127 |
-
# Set up the prompt template
|
128 |
-
class CustomPromptTemplate(BaseChatPromptTemplate):
|
129 |
-
template: str
|
130 |
-
tools: List[Tool]
|
131 |
-
|
132 |
-
def format_messages(self, **kwargs) -> str:
|
133 |
-
intermediate_steps = kwargs.pop("intermediate_steps")
|
134 |
-
thoughts = ""
|
135 |
-
for action, observation in intermediate_steps:
|
136 |
-
thoughts += action.log
|
137 |
-
thoughts += f"\nObservation: {observation}\nThought: "
|
138 |
-
|
139 |
-
kwargs["agent_scratchpad"] = thoughts
|
140 |
-
kwargs["tools"] = "\n".join([f"{tool.name}: {tool.description}" for tool in self.tools])
|
141 |
-
kwargs["tool_names"] = ", ".join([tool.name for tool in self.tools])
|
142 |
-
formatted = self.template.format(**kwargs)
|
143 |
-
return [HumanMessage(content=formatted)]
|
144 |
-
|
145 |
-
prompt_with_history = CustomPromptTemplate(
|
146 |
-
template=template_with_history,
|
147 |
-
tools=tools,
|
148 |
-
input_variables=["input", "intermediate_steps", "history"]
|
149 |
-
)
|
150 |
-
|
151 |
-
# Custom output parser
|
152 |
-
class CustomOutputParser(AgentOutputParser):
|
153 |
-
def parse(self, llm_output: str) -> Union[AgentAction, AgentFinish]:
|
154 |
-
if "Final Answer:" in llm_output:
|
155 |
-
return AgentFinish(
|
156 |
-
return_values={"output": llm_output.split("Final Answer:")[-1].strip()},
|
157 |
-
log=llm_output,
|
158 |
)
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
output_parser=output_parser,
|
178 |
-
stop=["\nObservation:"],
|
179 |
-
allowed_tools=tool_names
|
180 |
-
)
|
181 |
-
|
182 |
-
memory = ConversationBufferWindowMemory(k=2)
|
183 |
-
agent_executor = AgentExecutor.from_agent_and_tools(agent=agent, tools=tools, verbose=True, memory=memory)
|
184 |
-
|
185 |
-
# Streamlit app
|
186 |
-
st.title("Job Search Helper Agent")
|
187 |
-
|
188 |
-
query = st.text_input("Enter your query:")
|
189 |
-
|
190 |
-
if st.button("Submit"):
|
191 |
-
if query:
|
192 |
-
st.write("Debug: User Query ->", query)
|
193 |
-
with st.spinner("Processing..."):
|
194 |
-
try:
|
195 |
-
# Run the agent and get the response
|
196 |
-
response = agent_executor.run(query) # Correct method is `run()`
|
197 |
-
st.success("Response:")
|
198 |
-
st.write(response)
|
199 |
-
except Exception as e:
|
200 |
-
st.error(f"An error occurred: {e}")
|
|
|
1 |
import streamlit as st
|
2 |
+
from langchain.chains import ConversationChain
|
3 |
+
from langchain.memory import ConversationBufferMemory
|
4 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
|
5 |
+
import PyPDF2
|
6 |
from dotenv import load_dotenv
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
|
8 |
# Load environment variables from .env
|
9 |
load_dotenv()
|
10 |
|
11 |
# Job API keys and endpoints
|
12 |
+
access_token = os.getenv("API_KEY")
|
|
|
13 |
|
14 |
+
# Streamlit App Title
|
15 |
+
st.title("Job Description and CV-Based Email Generator")
|
16 |
+
st.write("""
|
17 |
+
This app uses Hugging Face's Gemma model to generate a professional email based on a pre-parsed CV and a job description.
|
18 |
+
Upload your CV once in the sidebar, and the system will reuse the parsed details for generating emails.
|
19 |
+
""")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
|
21 |
+
# Sidebar for Settings and CV Upload
|
22 |
+
st.sidebar.title("Settings and CV Upload")
|
23 |
+
access_token = st.sidebar.text_input("Enter your Hugging Face Access Token", type="password")
|
24 |
+
|
25 |
+
# File Upload for CV in Sidebar
|
26 |
+
uploaded_file = st.sidebar.file_uploader("Upload your CV (PDF format):", type=["pdf"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
|
28 |
+
if "parsed_cv" not in st.session_state:
|
29 |
+
st.session_state.parsed_cv = None
|
30 |
+
|
31 |
+
if uploaded_file is not None:
|
32 |
try:
|
33 |
+
# Extract text from PDF
|
34 |
+
pdf_reader = PyPDF2.PdfReader(uploaded_file)
|
35 |
+
cv_text = "".join([page.extract_text() for page in pdf_reader.pages])
|
36 |
+
st.sidebar.success("CV uploaded and text extracted successfully!")
|
37 |
+
|
38 |
+
# Parse CV details and save to session state
|
39 |
+
def parse_cv(cv_text):
|
40 |
+
# Basic parsing logic (can be extended for specific details)
|
41 |
+
return f"""
|
42 |
+
Name: [Extracted Name]
|
43 |
+
Contact Information: [Extracted Contact Info]
|
44 |
+
Skills: [Extracted Skills]
|
45 |
+
Experience: [Extracted Experience]
|
46 |
+
Education: [Extracted Education]
|
47 |
+
Summary: {cv_text[:500]}... # Truncated summary of the CV
|
48 |
+
"""
|
49 |
+
|
50 |
+
st.session_state.parsed_cv = parse_cv(cv_text)
|
51 |
+
st.sidebar.success("CV parsed successfully!")
|
52 |
except Exception as e:
|
53 |
+
st.sidebar.error(f"Failed to extract text from CV: {e}")
|
54 |
+
|
55 |
+
if st.session_state.parsed_cv:
|
56 |
+
st.sidebar.write("### Parsed CV Details:")
|
57 |
+
st.sidebar.text(st.session_state.parsed_cv)
|
58 |
+
|
59 |
+
# Ensure Access Token is Provided
|
60 |
+
if access_token:
|
61 |
+
@st.cache_resource
|
62 |
+
def initialize_pipeline(access_token):
|
63 |
+
tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b-it", token=access_token)
|
64 |
+
model = AutoModelForCausalLM.from_pretrained(
|
65 |
+
"google/gemma-2b-it",
|
66 |
+
torch_dtype="bfloat16",
|
67 |
+
token=access_token
|
68 |
+
)
|
69 |
+
return pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=512)
|
70 |
+
|
71 |
+
text_gen_pipeline = initialize_pipeline(access_token)
|
72 |
+
|
73 |
+
@st.cache_resource
|
74 |
+
def initialize_chain():
|
75 |
+
memory = ConversationBufferMemory()
|
76 |
+
return ConversationChain(llm=None, memory=memory) # No LLM; handled by pipeline
|
77 |
+
|
78 |
+
conversation_chain = initialize_chain()
|
79 |
+
|
80 |
+
# Input job description
|
81 |
+
job_description = st.text_area("Enter the job description:", "")
|
82 |
+
|
83 |
+
# Display generated email
|
84 |
+
if st.button("Generate Email"):
|
85 |
+
if st.session_state.parsed_cv and job_description.strip():
|
86 |
+
# Prompt for email generation
|
87 |
+
prompt = (
|
88 |
+
f"Based on the following CV details:\n\n{st.session_state.parsed_cv}\n\n"
|
89 |
+
f"And the following job description:\n\n{job_description}\n\n"
|
90 |
+
f"Write a professional email expressing interest in the job. "
|
91 |
+
f"Make it concise, polite, and tailored to the job."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
92 |
)
|
93 |
+
|
94 |
+
# Generate email using Hugging Face pipeline
|
95 |
+
response = text_gen_pipeline(prompt)[0]['generated_text']
|
96 |
+
|
97 |
+
# Update memory with job description and response
|
98 |
+
conversation_chain.memory.save_context({"job_description": job_description}, {"email": response})
|
99 |
+
|
100 |
+
# Display response
|
101 |
+
st.subheader("Generated Email:")
|
102 |
+
st.write(response)
|
103 |
+
|
104 |
+
# Display conversation history
|
105 |
+
st.subheader("History:")
|
106 |
+
st.write(conversation_chain.memory.buffer)
|
107 |
+
else:
|
108 |
+
st.warning("Please upload your CV in the sidebar and enter a job description.")
|
109 |
+
else:
|
110 |
+
st.warning("Please enter your Hugging Face access token in the sidebar to use the app.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
requirements.txt
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
# add requirements
|
2 |
streamlit
|
3 |
-
requests
|
4 |
python-dotenv
|
5 |
langchain
|
6 |
transformers
|
7 |
torch
|
8 |
-
|
|
|
|
1 |
# add requirements
|
2 |
streamlit
|
|
|
3 |
python-dotenv
|
4 |
langchain
|
5 |
transformers
|
6 |
torch
|
7 |
+
PyPDF2
|
8 |
+
|