ruslanmv commited on
Commit
d70b0b7
·
verified ·
1 Parent(s): 76dab41

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +181 -60
app.py CHANGED
@@ -4,32 +4,44 @@ import PyPDF2
4
  import io
5
  from docx import Document
6
  import os
7
- import fitz # PyMuPDF for better PDF handling
8
 
9
  # For PDF generation
10
  from reportlab.pdfgen import canvas
11
  from reportlab.lib.pagesizes import letter
 
12
  from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
13
  from reportlab.lib.styles import getSampleStyleSheet
14
 
15
- # Initialize Hugging Face Inference Client with Meta-Llama-3.1-8B-Instruct
16
- client = InferenceClient(
17
- model="meta-llama/Meta-Llama-3.1-8B-Instruct",
18
- token=os.getenv("HF_TOKEN")
19
- )
 
 
 
 
 
 
 
20
 
21
- # Function to extract text from PDF
22
 
23
  def extract_text_from_pdf(pdf_file):
 
24
  try:
25
  pdf_reader = PyPDF2.PdfReader(pdf_file)
26
- text = "".join(page.extract_text() or "" for page in pdf_reader.pages)
 
 
 
 
27
  return text.strip() or "No text could be extracted from the PDF."
28
  except Exception as e:
29
  return f"Error reading PDF: {e}"
30
 
31
- # Function to extract text from DOCX
32
  def extract_text_from_docx(docx_file):
 
33
  try:
34
  doc = Document(docx_file)
35
  text = "\n".join(para.text for para in doc.paragraphs)
@@ -37,13 +49,14 @@ def extract_text_from_docx(docx_file):
37
  except Exception as e:
38
  return f"Error reading DOCX: {e}"
39
 
40
- # Function to analyze CV
41
  def parse_cv(file, job_description):
 
42
  if file is None:
43
  return "Please upload a CV file.", ""
44
 
45
  try:
46
- file_path = file.name
47
  file_ext = os.path.splitext(file_path)[1].lower()
48
 
49
  if file_ext == ".pdf":
@@ -51,13 +64,20 @@ def parse_cv(file, job_description):
51
  elif file_ext == ".docx":
52
  extracted_text = extract_text_from_docx(file_path)
53
  else:
54
- return "Unsupported file format. Please upload a PDF or DOCX file.", ""
 
 
 
 
55
  except Exception as e:
56
- return f"Error reading file: {e}", ""
 
57
 
 
58
  if extracted_text.startswith("Error"):
59
  return extracted_text, "Error during text extraction. Please check the file."
60
 
 
61
  prompt = (
62
  f"Analyze the CV against the job description. Provide a summary, assessment, "
63
  f"and a score 0-10.\n\n"
@@ -65,100 +85,201 @@ def parse_cv(file, job_description):
65
  f"Candidate CV:\n{extracted_text}\n"
66
  )
67
 
 
68
  try:
69
  analysis = client.text_generation(prompt, max_new_tokens=512)
70
- return extracted_text, f"--- Analysis Report ---\n{analysis}"
 
 
 
 
 
71
  except Exception as e:
72
  return extracted_text, f"Analysis Error: {e}"
73
 
74
- # Function to optimize resume
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  def optimize_resume(resume_text, job_title):
76
  prompt = f"Optimize the following resume for the job title '{job_title}':\n\n{resume_text}"
77
  responses = []
78
  try:
79
- for message in client.chat_completion(
80
- messages=[{"role": "user", "content": prompt}],
81
- max_tokens=1000,
82
  stream=True,
83
  ):
84
- responses.append(message.choices[0].delta.content)
85
  except Exception as e:
86
  return f"Error during model inference: {e}"
87
 
88
  return ''.join(responses)
89
 
90
- # Function to process resume and job title inputs
 
 
 
 
 
 
 
 
 
 
 
 
91
  def process_resume(file, job_title):
92
  try:
93
  file_name = file.name
94
  if file_name.endswith(".pdf"):
95
- resume_text = extract_text_from_pdf(file.name)
 
96
  elif file_name.endswith(".docx"):
 
97
  resume_text = extract_text_from_docx(file.name)
98
  else:
99
- return "Unsupported file format. Please upload a PDF or DOCX file."
 
 
100
 
 
101
  optimized_resume = optimize_resume(resume_text, job_title)
 
102
  return optimized_resume
103
  except Exception as e:
104
  return f"Error processing resume: {e}"
105
 
106
- # Function to generate a PDF report
107
- def create_pdf_report(report_text):
108
- buffer = io.BytesIO()
109
- doc = SimpleDocTemplate(buffer, pagesize=letter)
110
- styles = getSampleStyleSheet()
111
- Story = [Paragraph("<b>Analysis Report</b>", styles["Title"]), Spacer(1, 12)]
112
-
113
- for line in report_text.split("\n"):
114
- Story.append(Paragraph(line, styles["Normal"]))
115
- Story.append(Spacer(1, 6))
116
-
117
- doc.build(Story)
118
- buffer.seek(0)
119
- return buffer
120
-
121
- # Function to toggle the download button
122
- def toggle_download_button(analysis_report):
123
- return gr.update(interactive=bool(analysis_report.strip()), visible=bool(analysis_report.strip()))
124
-
125
  # Build the Gradio UI
126
  demo = gr.Blocks()
127
  with demo:
128
- gr.Markdown("## AI-powered CV Analyzer, Optimizer, and Chatbot")
129
 
130
  with gr.Tab("Chatbot"):
131
  chat_interface = gr.ChatInterface(
132
- lambda message, history: client.chat_completion(
133
- messages=[{"role": "user", "content": message}],
134
- max_tokens=512,
135
- ),
136
- chatbot=gr.Chatbot(label="Chatbot"),
137
  textbox=gr.Textbox(placeholder="Enter your message here...", label="Message"),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
  )
139
 
140
  with gr.Tab("CV Analyzer"):
141
  gr.Markdown("### Upload your CV and provide the job description")
142
  file_input = gr.File(label="Upload CV", file_types=[".pdf", ".docx"])
143
  job_desc_input = gr.Textbox(label="Job Description", lines=5)
144
- extracted_text = gr.Textbox(label="Extracted CV Content", lines=10, interactive=False)
145
- analysis_output = gr.Textbox(label="Analysis Report", lines=10, interactive=False)
146
- download_pdf_button = gr.Button("Download Analysis as PDF", visible=False, interactive=False)
 
 
 
 
 
 
147
  pdf_file = gr.File(label="Download PDF", interactive=False)
 
148
  analyze_button = gr.Button("Analyze CV")
149
-
150
- analyze_button.click(parse_cv, [file_input, job_desc_input], [extracted_text, analysis_output])
151
- analyze_button.then(toggle_download_button, [analysis_output], [download_pdf_button])
152
- download_pdf_button.click(create_pdf_report, [analysis_output], [pdf_file])
153
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
154
  with gr.Tab("CV Optimizer"):
155
- gr.Markdown("### Upload your Resume and Enter Job Title")
156
- resume_file = gr.File(label="Upload Resume (PDF or Word)")
157
- job_title_input = gr.Textbox(label="Job Title", lines=1)
158
  optimized_resume_output = gr.Textbox(label="Optimized Resume", lines=20)
159
- optimize_button = gr.Button("Optimize Resume")
160
-
161
- optimize_button.click(process_resume, [resume_file, job_title_input], [optimized_resume_output])
 
 
 
 
162
 
163
  if __name__ == "__main__":
164
  demo.queue().launch()
 
4
  import io
5
  from docx import Document
6
  import os
 
7
 
8
  # For PDF generation
9
  from reportlab.pdfgen import canvas
10
  from reportlab.lib.pagesizes import letter
11
+ from reportlab.lib import utils
12
  from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
13
  from reportlab.lib.styles import getSampleStyleSheet
14
 
15
+ # Import for CV Optimizer
16
+ import fitz # PyMuPDF for PDF handling
17
+
18
+ # Initialize the inference client from Hugging Face.
19
+ # Updated model to Meta-Llama-3.1-8B-Instruct
20
+ try:
21
+ client = InferenceClient(
22
+ model="meta-llama/Meta-Llama-3-8B-Instruct",
23
+ token=os.getenv("HF_TOKEN")
24
+ )
25
+ except Exception as e:
26
+ print(f"Error initializing InferenceClient: {e}")
27
 
 
28
 
29
  def extract_text_from_pdf(pdf_file):
30
+ """Extract text from PDF file."""
31
  try:
32
  pdf_reader = PyPDF2.PdfReader(pdf_file)
33
+ text = ""
34
+ for page in pdf_reader.pages:
35
+ page_text = page.extract_text()
36
+ if page_text:
37
+ text += page_text + "\n"
38
  return text.strip() or "No text could be extracted from the PDF."
39
  except Exception as e:
40
  return f"Error reading PDF: {e}"
41
 
42
+
43
  def extract_text_from_docx(docx_file):
44
+ """Extract text from DOCX file."""
45
  try:
46
  doc = Document(docx_file)
47
  text = "\n".join(para.text for para in doc.paragraphs)
 
49
  except Exception as e:
50
  return f"Error reading DOCX: {e}"
51
 
52
+
53
  def parse_cv(file, job_description):
54
+ """Analyze the CV, show the prompt (debug) and return LLM analysis."""
55
  if file is None:
56
  return "Please upload a CV file.", ""
57
 
58
  try:
59
+ file_path = file.name # Get the file path
60
  file_ext = os.path.splitext(file_path)[1].lower()
61
 
62
  if file_ext == ".pdf":
 
64
  elif file_ext == ".docx":
65
  extracted_text = extract_text_from_docx(file_path)
66
  else:
67
+ return (
68
+ "Unsupported file format. Please upload a PDF or DOCX file.",
69
+ "Unsupported file format.",
70
+ )
71
+
72
  except Exception as e:
73
+ error_msg = f"Error reading file: {e}"
74
+ return error_msg, error_msg
75
 
76
+ # Check for extraction errors
77
  if extracted_text.startswith("Error"):
78
  return extracted_text, "Error during text extraction. Please check the file."
79
 
80
+ # Prepare debug prompt
81
  prompt = (
82
  f"Analyze the CV against the job description. Provide a summary, assessment, "
83
  f"and a score 0-10.\n\n"
 
85
  f"Candidate CV:\n{extracted_text}\n"
86
  )
87
 
88
+ # Call LLM
89
  try:
90
  analysis = client.text_generation(prompt, max_new_tokens=512)
91
+ # Show both the debug prompt and the LLM analysis in the "Analysis Report"
92
+ analysis_report = (
93
+ f"--- DEBUG PROMPT ---\n{prompt}\n"
94
+ f"--- LLM ANALYSIS ---\n{analysis}"
95
+ )
96
+ return extracted_text, analysis_report
97
  except Exception as e:
98
  return extracted_text, f"Analysis Error: {e}"
99
 
100
+
101
+ def respond(
102
+ message,
103
+ history: list[tuple[str, str]],
104
+ system_message,
105
+ max_tokens,
106
+ temperature,
107
+ top_p,
108
+ ):
109
+ """Generate chatbot response."""
110
+ messages = [{"role": "system", "content": system_message}]
111
+ for user_msg, bot_msg in history:
112
+ if user_msg:
113
+ messages.append({"role": "user", "content": user_msg})
114
+ if bot_msg:
115
+ messages.append({"role": "assistant", "content": bot_msg})
116
+ messages.append({"role": "user", "content": message})
117
+
118
+ response = ""
119
+ try:
120
+ for message_chunk in client.text_generation(
121
+ messages,
122
+ max_new_tokens=max_tokens,
123
+ stream=True,
124
+ temperature=temperature,
125
+ top_p=top_p,
126
+ ):
127
+ response += message_chunk
128
+ yield response
129
+ except Exception as e:
130
+ yield f"Error during chat generation: {e}"
131
+
132
+
133
+ def create_pdf_report(report_text):
134
+ """Creates a PDF report using SimpleDocTemplate for better formatting."""
135
+ if not report_text.strip():
136
+ report_text = "No analysis report to convert."
137
+
138
+ buffer = io.BytesIO()
139
+ doc = SimpleDocTemplate(buffer, pagesize=letter)
140
+ styles = getSampleStyleSheet()
141
+ Story = []
142
+
143
+ # Title
144
+ Story.append(Paragraph("<b>Analysis Report</b>", styles["Title"]))
145
+ Story.append(Spacer(1, 12))
146
+
147
+ # Report Content
148
+ for line in report_text.split("\n"):
149
+ Story.append(Paragraph(line, styles["Normal"]))
150
+ Story.append(Spacer(1, 6)) # Add a small space between lines
151
+
152
+ doc.build(Story)
153
+ buffer.seek(0)
154
+ return buffer
155
+
156
+
157
+ def toggle_download_button(analysis_report):
158
+ """Toggle the download button."""
159
+ return gr.update(
160
+ interactive=bool(analysis_report.strip()),
161
+ visible=bool(analysis_report.strip()),
162
+ )
163
+
164
+ # Function to optimize resume based on job title
165
  def optimize_resume(resume_text, job_title):
166
  prompt = f"Optimize the following resume for the job title '{job_title}':\n\n{resume_text}"
167
  responses = []
168
  try:
169
+ for message in client.text_generation(
170
+ prompt,
171
+ max_new_tokens=1000,
172
  stream=True,
173
  ):
174
+ responses.append(message)
175
  except Exception as e:
176
  return f"Error during model inference: {e}"
177
 
178
  return ''.join(responses)
179
 
180
+ # Function to extract text from a PDF file (using PyMuPDF)
181
+ def extract_text_from_pdf_fitz(pdf_file_path):
182
+ text = ""
183
+ try:
184
+ pdf_document = fitz.open(pdf_file_path)
185
+ for page_num in range(len(pdf_document)):
186
+ page = pdf_document.load_page(page_num)
187
+ text += page.get_text()
188
+ except Exception as e:
189
+ return f"Error extracting text from PDF: {e}"
190
+ return text
191
+
192
+ # Function to process the resume and job title inputs for optimization
193
  def process_resume(file, job_title):
194
  try:
195
  file_name = file.name
196
  if file_name.endswith(".pdf"):
197
+ # Extract text if the file is a PDF
198
+ resume_text = extract_text_from_pdf_fitz(file.name)
199
  elif file_name.endswith(".docx"):
200
+ # Extract text if the file is a Word document
201
  resume_text = extract_text_from_docx(file.name)
202
  else:
203
+ # Assume the file is a text file and read it directly
204
+ with open(file.name, 'r', encoding='utf-8') as f:
205
+ resume_text = f.read()
206
 
207
+ # Optimize the resume
208
  optimized_resume = optimize_resume(resume_text, job_title)
209
+
210
  return optimized_resume
211
  except Exception as e:
212
  return f"Error processing resume: {e}"
213
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
214
  # Build the Gradio UI
215
  demo = gr.Blocks()
216
  with demo:
217
+ gr.Markdown("## AI-powered CV Analyzer and Chatbot")
218
 
219
  with gr.Tab("Chatbot"):
220
  chat_interface = gr.ChatInterface(
221
+ respond,
222
+ chatbot=gr.Chatbot(value=[], label="Chatbot"),
 
 
 
223
  textbox=gr.Textbox(placeholder="Enter your message here...", label="Message"),
224
+ additional_inputs=[
225
+ gr.Textbox(
226
+ value="You are a friendly Chatbot.", label="System message"
227
+ ),
228
+ gr.Slider(
229
+ minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"
230
+ ),
231
+ gr.Slider(
232
+ minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"
233
+ ),
234
+ gr.Slider(
235
+ minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"
236
+ ),
237
+ ],
238
  )
239
 
240
  with gr.Tab("CV Analyzer"):
241
  gr.Markdown("### Upload your CV and provide the job description")
242
  file_input = gr.File(label="Upload CV", file_types=[".pdf", ".docx"])
243
  job_desc_input = gr.Textbox(label="Job Description", lines=5)
244
+ extracted_text = gr.Textbox(
245
+ label="Extracted CV Content", lines=10, interactive=False
246
+ )
247
+ analysis_output = gr.Textbox(
248
+ label="Analysis Report", lines=10, interactive=False
249
+ )
250
+ download_pdf_button = gr.Button(
251
+ "Download Analysis as PDF", visible=False, interactive=False
252
+ )
253
  pdf_file = gr.File(label="Download PDF", interactive=False)
254
+
255
  analyze_button = gr.Button("Analyze CV")
 
 
 
 
256
 
257
+ analyze_button.click(
258
+ parse_cv,
259
+ inputs=[file_input, job_desc_input],
260
+ outputs=[extracted_text, analysis_output],
261
+ ).then(
262
+ toggle_download_button,
263
+ inputs=[analysis_output],
264
+ outputs=[download_pdf_button],
265
+ )
266
+
267
+ download_pdf_button.click(
268
+ create_pdf_report, inputs=[analysis_output], outputs=[pdf_file]
269
+ )
270
+
271
  with gr.Tab("CV Optimizer"):
272
+ gr.Markdown("### Upload your CV and enter the job title to optimize your resume")
273
+ cv_file_input = gr.File(label="Upload CV (PDF or DOCX)", file_types=[".pdf", ".docx"])
274
+ job_title_input = gr.Textbox(label="Job Title", placeholder="Enter the job title...")
275
  optimized_resume_output = gr.Textbox(label="Optimized Resume", lines=20)
276
+ optimize_button = gr.Button("Optimize CV")
277
+
278
+ optimize_button.click(
279
+ process_resume,
280
+ inputs=[cv_file_input, job_title_input],
281
+ outputs=[optimized_resume_output]
282
+ )
283
 
284
  if __name__ == "__main__":
285
  demo.queue().launch()