Spaces:
Sleeping
Sleeping
Commit
·
7c0b46d
1
Parent(s):
3ce6b08
Refactor app.py to transition from Streamlit to Gradio for a more interactive user interface. Implement CV text extraction from both PDF and DOCX formats, enhancing file upload capabilities. Update email generation process to utilize Hugging Face's model directly, improving response generation. Modify requirements.txt to include 'gradio', 'huggingface_hub', and 'python-docx' for new dependencies.
Browse files- app.py +123 -131
- requirements.txt +3 -0
app.py
CHANGED
@@ -1,137 +1,129 @@
|
|
1 |
-
import
|
2 |
-
from
|
3 |
-
import
|
4 |
-
|
5 |
import os
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
""
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
st.sidebar.error(f"Failed to extract text from CV: {e}")
|
54 |
-
|
55 |
-
if st.session_state.parsed_cv:
|
56 |
-
st.sidebar.write("### Parsed CV Details:")
|
57 |
-
st.sidebar.text(st.session_state.parsed_cv)
|
58 |
-
|
59 |
-
# Ensure Access Token is Provided
|
60 |
-
if access_token:
|
61 |
-
@st.cache_resource
|
62 |
-
def initialize_pipeline(access_token):
|
63 |
-
try:
|
64 |
-
tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b-it", token=access_token)
|
65 |
-
model = AutoModelForCausalLM.from_pretrained(
|
66 |
-
"google/gemma-2b-it",
|
67 |
-
token=access_token,
|
68 |
-
)
|
69 |
-
return pipeline(
|
70 |
-
"text-generation",
|
71 |
-
model=model,
|
72 |
-
tokenizer=tokenizer,
|
73 |
-
max_new_tokens=2048,
|
74 |
-
temperature=0.7,
|
75 |
-
top_p=0.95
|
76 |
-
)
|
77 |
-
except Exception as e:
|
78 |
-
st.error(f"Failed to initialize the model: {str(e)}")
|
79 |
-
return None
|
80 |
-
|
81 |
-
text_gen_pipeline = initialize_pipeline(access_token)
|
82 |
-
|
83 |
-
# Input job description
|
84 |
-
job_description = st.text_area("Enter the job description:", "")
|
85 |
-
|
86 |
-
# Display generated email
|
87 |
-
if st.button("Generate Email"):
|
88 |
-
if st.session_state.parsed_cv and job_description.strip():
|
89 |
-
try:
|
90 |
-
# Improved prompt template
|
91 |
-
prompt = f"""Task: Write a professional job application email.
|
92 |
-
|
93 |
-
CV Summary:
|
94 |
-
{st.session_state.parsed_cv}
|
95 |
-
|
96 |
-
Job Description:
|
97 |
-
{job_description}
|
98 |
|
99 |
Instructions: Write a concise and professional email expressing interest in the position.
|
100 |
Highlight relevant experience and skills from the CV that match the job requirements.
|
101 |
Keep the tone professional and enthusiastic.
|
102 |
|
103 |
-
Email:
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from huggingface_hub import InferenceClient
|
3 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
4 |
+
import torch
|
5 |
import os
|
6 |
+
from PyPDF2 import PdfReader
|
7 |
+
import docx
|
8 |
+
|
9 |
+
def extract_cv_text(file):
|
10 |
+
"""Extract text from PDF or DOCX CV files."""
|
11 |
+
if file is None:
|
12 |
+
return "No CV uploaded"
|
13 |
+
|
14 |
+
file_ext = os.path.splitext(file.name)[1].lower()
|
15 |
+
|
16 |
+
if file_ext == '.pdf':
|
17 |
+
reader = PdfReader(file)
|
18 |
+
text = ""
|
19 |
+
for page in reader.pages:
|
20 |
+
text += page.extract_text()
|
21 |
+
return text
|
22 |
+
|
23 |
+
elif file_ext == '.docx':
|
24 |
+
doc = docx.Document(file)
|
25 |
+
text = ""
|
26 |
+
for paragraph in doc.paragraphs:
|
27 |
+
text += paragraph.text + "\n"
|
28 |
+
return text
|
29 |
+
|
30 |
+
else:
|
31 |
+
return "Unsupported file format. Please upload PDF or DOCX files."
|
32 |
+
|
33 |
+
# Replace 'your_huggingface_token' with your actual Hugging Face access token
|
34 |
+
access_token = os.getenv('token')
|
35 |
+
|
36 |
+
# Initialize the tokenizer and model with the Hugging Face access token
|
37 |
+
tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b-it", use_auth_token=access_token)
|
38 |
+
model = AutoModelForCausalLM.from_pretrained(
|
39 |
+
"google/gemma-2b-it",
|
40 |
+
torch_dtype=torch.bfloat16,
|
41 |
+
use_auth_token=access_token
|
42 |
+
)
|
43 |
+
model.eval() # Set the model to evaluation mode
|
44 |
+
|
45 |
+
# Initialize the inference client (if needed for other API-based tasks)
|
46 |
+
client = InferenceClient(token=access_token)
|
47 |
+
|
48 |
+
def conversation_predict(input_text):
|
49 |
+
"""Generate a response for single-turn input using the model."""
|
50 |
+
# Tokenize the input text
|
51 |
+
input_ids = tokenizer("""Job Description:
|
52 |
+
{input_text}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
|
54 |
Instructions: Write a concise and professional email expressing interest in the position.
|
55 |
Highlight relevant experience and skills from the CV that match the job requirements.
|
56 |
Keep the tone professional and enthusiastic.
|
57 |
|
58 |
+
Email:""", return_tensors="pt").input_ids
|
59 |
+
|
60 |
+
# Generate a response with the model
|
61 |
+
outputs = model.generate(input_ids, max_new_tokens=2048)
|
62 |
+
|
63 |
+
# Decode and return the generated response
|
64 |
+
return tokenizer.decode(outputs[0], skip_special_tokens=True)
|
65 |
+
|
66 |
+
def respond(
|
67 |
+
message: str,
|
68 |
+
history: list[tuple[str, str]],
|
69 |
+
system_message: str,
|
70 |
+
cv_file,
|
71 |
+
max_tokens: int,
|
72 |
+
temperature: float,
|
73 |
+
top_p: float,
|
74 |
+
):
|
75 |
+
"""Generate a response for a multi-turn chat conversation."""
|
76 |
+
# Extract CV text and update system message
|
77 |
+
cv_text = extract_cv_text(cv_file) if cv_file else "No CV provided"
|
78 |
+
|
79 |
+
updated_system_message = f"""Task: Write a professional job application email.
|
80 |
+
|
81 |
+
CV Summary:
|
82 |
+
{cv_text}
|
83 |
+
|
84 |
+
{system_message}"""
|
85 |
+
|
86 |
+
messages = [{"role": "system", "content": updated_system_message}]
|
87 |
+
|
88 |
+
for user_input, assistant_reply in history:
|
89 |
+
if user_input:
|
90 |
+
messages.append({"role": "user", "content": user_input})
|
91 |
+
if assistant_reply:
|
92 |
+
messages.append({"role": "assistant", "content": assistant_reply})
|
93 |
+
|
94 |
+
messages.append({"role": "user", "content": message})
|
95 |
+
|
96 |
+
response = ""
|
97 |
+
|
98 |
+
for message_chunk in client.chat_completion(
|
99 |
+
messages=messages,
|
100 |
+
max_tokens=max_tokens,
|
101 |
+
stream=True,
|
102 |
+
temperature=temperature,
|
103 |
+
top_p=top_p,
|
104 |
+
):
|
105 |
+
token = message_chunk["choices"][0]["delta"].get("content", "")
|
106 |
+
response += token
|
107 |
+
yield response
|
108 |
+
|
109 |
+
# Create a Gradio ChatInterface demo
|
110 |
+
demo = gr.ChatInterface(
|
111 |
+
fn=respond,
|
112 |
+
additional_inputs=[
|
113 |
+
gr.Textbox(value="Instructions: Write a concise and professional email expressing interest in the position.",
|
114 |
+
label="System message"),
|
115 |
+
gr.File(label="Upload CV (PDF or DOCX)", file_types=[".pdf", ".docx"]),
|
116 |
+
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
|
117 |
+
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
|
118 |
+
gr.Slider(
|
119 |
+
minimum=0.1,
|
120 |
+
maximum=1.0,
|
121 |
+
value=0.95,
|
122 |
+
step=0.05,
|
123 |
+
label="Top-p (nucleus sampling)",
|
124 |
+
),
|
125 |
+
],
|
126 |
+
)
|
127 |
+
|
128 |
+
if __name__ == "__main__":
|
129 |
+
demo.launch()
|
requirements.txt
CHANGED
@@ -5,4 +5,7 @@ langchain
|
|
5 |
transformers
|
6 |
torch
|
7 |
PyPDF2
|
|
|
|
|
|
|
8 |
|
|
|
5 |
transformers
|
6 |
torch
|
7 |
PyPDF2
|
8 |
+
gradio
|
9 |
+
huggingface_hub
|
10 |
+
python-docx
|
11 |
|