ruslanmv commited on
Commit
ec1a8b9
·
verified ·
1 Parent(s): 7105896

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +85 -56
app.py CHANGED
@@ -3,18 +3,23 @@ from huggingface_hub import InferenceClient
3
  import PyPDF2
4
  import io
5
  from docx import Document
 
6
 
7
  # For PDF generation
8
  from reportlab.pdfgen import canvas
9
  from reportlab.lib.pagesizes import letter
 
 
 
10
 
11
  # Initialize the inference client from Hugging Face.
12
  client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
13
 
14
- def extract_text_from_pdf(pdf_file_bytes):
15
- """Extract text from PDF bytes."""
 
16
  try:
17
- pdf_reader = PyPDF2.PdfReader(io.BytesIO(pdf_file_bytes))
18
  text = ""
19
  for page in pdf_reader.pages:
20
  page_text = page.extract_text()
@@ -24,38 +29,40 @@ def extract_text_from_pdf(pdf_file_bytes):
24
  except Exception as e:
25
  return f"Error reading PDF: {e}"
26
 
27
- def extract_text_from_docx(docx_file_bytes):
28
- """Extract text from DOCX bytes."""
 
29
  try:
30
- doc = Document(io.BytesIO(docx_file_bytes))
31
  text = "\n".join(para.text for para in doc.paragraphs)
32
  return text.strip() or "No text could be extracted from the DOCX file."
33
  except Exception as e:
34
  return f"Error reading DOCX: {e}"
35
 
36
- def parse_cv(file_bytes, job_description):
 
37
  """Analyze the CV, show the prompt (debug) and return LLM analysis."""
38
- if file_bytes is None:
39
  return "Please upload a CV file.", ""
40
 
41
- # Determine file extension by header bytes
42
  try:
43
- if file_bytes.startswith(b'%PDF'):
44
- file_ext = "pdf"
45
- elif file_bytes.startswith(b'PK\x03\x04'):
46
- file_ext = "docx"
 
 
 
47
  else:
48
- return "Unsupported file format.", "Cannot determine file type from content"
 
 
 
 
49
  except Exception as e:
50
  error_msg = f"Error reading file: {e}"
51
  return error_msg, error_msg
52
 
53
- # Extract text
54
- if file_ext == "pdf":
55
- extracted_text = extract_text_from_pdf(file_bytes)
56
- else: # docx
57
- extracted_text = extract_text_from_docx(file_bytes)
58
-
59
  # Check for extraction errors
60
  if extracted_text.startswith("Error"):
61
  return extracted_text, "Error during text extraction. Please check the file."
@@ -80,7 +87,15 @@ def parse_cv(file_bytes, job_description):
80
  except Exception as e:
81
  return extracted_text, f"Analysis Error: {e}"
82
 
83
- def respond(message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p):
 
 
 
 
 
 
 
 
84
  """Generate chatbot response."""
85
  messages = [{"role": "system", "content": system_message}]
86
  for user_msg, bot_msg in history:
@@ -105,32 +120,38 @@ def respond(message, history: list[tuple[str, str]], system_message, max_tokens,
105
  except Exception as e:
106
  yield f"Error during chat generation: {e}"
107
 
 
108
  def create_pdf_report(report_text):
109
- """Creates a PDF report and returns it as (bytes, filename)."""
110
  if not report_text.strip():
111
  report_text = "No analysis report to convert."
112
 
113
- pdf_buffer = io.BytesIO()
114
- c = canvas.Canvas(pdf_buffer, pagesize=letter)
115
- c.setFont("Helvetica-Bold", 14)
116
- c.drawString(72, 750, "Analysis Report")
 
 
 
 
117
 
118
- text_obj = c.beginText(72, 730)
119
- text_obj.setFont("Helvetica", 11)
120
  for line in report_text.split("\n"):
121
- text_obj.textLine(line)
122
- c.drawText(text_obj)
123
 
124
- c.showPage()
125
- c.save()
126
- pdf_buffer.seek(0)
127
 
128
- # Return as a bytes-filename tuple that Gradio will treat as a file download
129
- return (pdf_buffer.getvalue(), "analysis_report.pdf")
130
 
131
  def toggle_download_button(analysis_report):
132
- """Toggle the download button if we have an analysis report."""
133
- return gr.update(interactive=bool(analysis_report.strip()), visible=bool(analysis_report.strip()))
 
 
 
 
134
 
135
  # Build the Gradio UI
136
  demo = gr.Blocks()
@@ -138,35 +159,45 @@ with demo:
138
  gr.Markdown("## AI-powered CV Analyzer and Chatbot")
139
 
140
  with gr.Tab("Chatbot"):
141
- # Simple chat interface
142
  chat_interface = gr.ChatInterface(
143
  respond,
144
- chatbot=gr.Chatbot(value=[], label="Chatbot", type="messages"),
145
- type="messages",
146
  additional_inputs=[
147
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
148
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
149
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
150
- gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
 
 
 
 
 
 
 
 
151
  ],
152
  )
153
 
154
  with gr.Tab("CV Analyzer"):
155
  gr.Markdown("### Upload your CV and provide the job description")
156
- # IMPORTANT: set type="file" and file_types to handle multiple file types
157
- file_input = gr.File(label="Upload CV", type="file", file_types=['.pdf', '.docx'])
158
  job_desc_input = gr.Textbox(label="Job Description", lines=5)
159
-
160
- extracted_text = gr.Textbox(label="Extracted CV Content", lines=10, interactive=False)
161
- analysis_output = gr.Textbox(label="Analysis Report", lines=10, interactive=False)
162
-
163
- download_pdf_button = gr.Button("Download Analysis as PDF", visible=False, interactive=False)
164
- pdf_file = gr.File(label="Download PDF", file_count="single", interactive=False)
 
 
 
 
165
 
166
  analyze_button = gr.Button("Analyze CV")
167
 
168
  analyze_button.click(
169
- lambda file, job_desc: parse_cv(file.read(), job_desc) if file else ("Please upload a CV", ""),
170
  inputs=[file_input, job_desc_input],
171
  outputs=[extracted_text, analysis_output],
172
  ).then(
@@ -176,9 +207,7 @@ with demo:
176
  )
177
 
178
  download_pdf_button.click(
179
- create_pdf_report,
180
- inputs=[analysis_output],
181
- outputs=[pdf_file],
182
  )
183
 
184
  if __name__ == "__main__":
 
3
  import PyPDF2
4
  import io
5
  from docx import Document
6
+ import os
7
 
8
  # For PDF generation
9
  from reportlab.pdfgen import canvas
10
  from reportlab.lib.pagesizes import letter
11
+ from reportlab.lib import utils
12
+ from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
13
+ from reportlab.lib.styles import getSampleStyleSheet
14
 
15
  # Initialize the inference client from Hugging Face.
16
  client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
17
 
18
+
19
+ def extract_text_from_pdf(pdf_file):
20
+ """Extract text from PDF file."""
21
  try:
22
+ pdf_reader = PyPDF2.PdfReader(pdf_file)
23
  text = ""
24
  for page in pdf_reader.pages:
25
  page_text = page.extract_text()
 
29
  except Exception as e:
30
  return f"Error reading PDF: {e}"
31
 
32
+
33
+ def extract_text_from_docx(docx_file):
34
+ """Extract text from DOCX file."""
35
  try:
36
+ doc = Document(docx_file)
37
  text = "\n".join(para.text for para in doc.paragraphs)
38
  return text.strip() or "No text could be extracted from the DOCX file."
39
  except Exception as e:
40
  return f"Error reading DOCX: {e}"
41
 
42
+
43
+ def parse_cv(file, job_description):
44
  """Analyze the CV, show the prompt (debug) and return LLM analysis."""
45
+ if file is None:
46
  return "Please upload a CV file.", ""
47
 
 
48
  try:
49
+ file_path = file.name # Get the file path
50
+ file_ext = os.path.splitext(file_path)[1].lower()
51
+
52
+ if file_ext == ".pdf":
53
+ extracted_text = extract_text_from_pdf(file_path)
54
+ elif file_ext == ".docx":
55
+ extracted_text = extract_text_from_docx(file_path)
56
  else:
57
+ return (
58
+ "Unsupported file format. Please upload a PDF or DOCX file.",
59
+ "Unsupported file format.",
60
+ )
61
+
62
  except Exception as e:
63
  error_msg = f"Error reading file: {e}"
64
  return error_msg, error_msg
65
 
 
 
 
 
 
 
66
  # Check for extraction errors
67
  if extracted_text.startswith("Error"):
68
  return extracted_text, "Error during text extraction. Please check the file."
 
87
  except Exception as e:
88
  return extracted_text, f"Analysis Error: {e}"
89
 
90
+
91
+ def respond(
92
+ message,
93
+ history: list[tuple[str, str]],
94
+ system_message,
95
+ max_tokens,
96
+ temperature,
97
+ top_p,
98
+ ):
99
  """Generate chatbot response."""
100
  messages = [{"role": "system", "content": system_message}]
101
  for user_msg, bot_msg in history:
 
120
  except Exception as e:
121
  yield f"Error during chat generation: {e}"
122
 
123
+
124
  def create_pdf_report(report_text):
125
+ """Creates a PDF report using SimpleDocTemplate for better formatting."""
126
  if not report_text.strip():
127
  report_text = "No analysis report to convert."
128
 
129
+ buffer = io.BytesIO()
130
+ doc = SimpleDocTemplate(buffer, pagesize=letter)
131
+ styles = getSampleStyleSheet()
132
+ Story = []
133
+
134
+ # Title
135
+ Story.append(Paragraph("<b>Analysis Report</b>", styles["Title"]))
136
+ Story.append(Spacer(1, 12))
137
 
138
+ # Report Content
 
139
  for line in report_text.split("\n"):
140
+ Story.append(Paragraph(line, styles["Normal"]))
141
+ Story.append(Spacer(1, 6)) # Add a small space between lines
142
 
143
+ doc.build(Story)
144
+ buffer.seek(0)
145
+ return buffer
146
 
 
 
147
 
148
  def toggle_download_button(analysis_report):
149
+ """Toggle the download button."""
150
+ return gr.update(
151
+ interactive=bool(analysis_report.strip()),
152
+ visible=bool(analysis_report.strip()),
153
+ )
154
+
155
 
156
  # Build the Gradio UI
157
  demo = gr.Blocks()
 
159
  gr.Markdown("## AI-powered CV Analyzer and Chatbot")
160
 
161
  with gr.Tab("Chatbot"):
 
162
  chat_interface = gr.ChatInterface(
163
  respond,
164
+ chatbot=gr.Chatbot(value=[], label="Chatbot"),
165
+ textbox=gr.Textbox(placeholder="Enter your message here...", label="Message"),
166
  additional_inputs=[
167
+ gr.Textbox(
168
+ value="You are a friendly Chatbot.", label="System message"
169
+ ),
170
+ gr.Slider(
171
+ minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"
172
+ ),
173
+ gr.Slider(
174
+ minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"
175
+ ),
176
+ gr.Slider(
177
+ minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"
178
+ ),
179
  ],
180
  )
181
 
182
  with gr.Tab("CV Analyzer"):
183
  gr.Markdown("### Upload your CV and provide the job description")
184
+ file_input = gr.File(label="Upload CV", file_types=[".pdf", ".docx"])
 
185
  job_desc_input = gr.Textbox(label="Job Description", lines=5)
186
+ extracted_text = gr.Textbox(
187
+ label="Extracted CV Content", lines=10, interactive=False
188
+ )
189
+ analysis_output = gr.Textbox(
190
+ label="Analysis Report", lines=10, interactive=False
191
+ )
192
+ download_pdf_button = gr.Button(
193
+ "Download Analysis as PDF", visible=False, interactive=False
194
+ )
195
+ pdf_file = gr.File(label="Download PDF", interactive=False)
196
 
197
  analyze_button = gr.Button("Analyze CV")
198
 
199
  analyze_button.click(
200
+ parse_cv,
201
  inputs=[file_input, job_desc_input],
202
  outputs=[extracted_text, analysis_output],
203
  ).then(
 
207
  )
208
 
209
  download_pdf_button.click(
210
+ create_pdf_report, inputs=[analysis_output], outputs=[pdf_file]
 
 
211
  )
212
 
213
  if __name__ == "__main__":