ruslanmv commited on
Commit
0b10650
·
verified ·
1 Parent(s): baffc49

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +82 -54
app.py CHANGED
@@ -3,18 +3,23 @@ from huggingface_hub import InferenceClient
3
  import PyPDF2
4
  import io
5
  from docx import Document
 
6
 
7
  # For PDF generation
8
  from reportlab.pdfgen import canvas
9
  from reportlab.lib.pagesizes import letter
 
 
 
10
 
11
  # Initialize the inference client from Hugging Face.
12
  client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
13
 
14
- def extract_text_from_pdf(pdf_file_bytes):
15
- """Extract text from PDF bytes."""
 
16
  try:
17
- pdf_reader = PyPDF2.PdfReader(io.BytesIO(pdf_file_bytes))
18
  text = ""
19
  for page in pdf_reader.pages:
20
  page_text = page.extract_text()
@@ -24,39 +29,40 @@ def extract_text_from_pdf(pdf_file_bytes):
24
  except Exception as e:
25
  return f"Error reading PDF: {e}"
26
 
27
- def extract_text_from_docx(docx_file_bytes):
28
- """Extract text from DOCX bytes."""
 
29
  try:
30
- doc = Document(io.BytesIO(docx_file_bytes))
31
  text = "\n".join(para.text for para in doc.paragraphs)
32
  return text.strip() or "No text could be extracted from the DOCX file."
33
  except Exception as e:
34
  return f"Error reading DOCX: {e}"
35
 
 
36
  def parse_cv(file, job_description):
37
  """Analyze the CV, show the prompt (debug) and return LLM analysis."""
38
  if file is None:
39
  return "Please upload a CV file.", ""
40
 
41
  try:
42
- file_bytes = file
43
- file_ext = "pdf"
44
- if file_bytes.startswith(b'%PDF'):
45
- file_ext = "pdf"
46
- elif file_bytes.startswith(b'PK\x03\x04'):
47
- file_ext = "docx"
 
48
  else:
49
- return "Unsupported file format.", "Cannot determine file type from content"
 
 
 
 
50
  except Exception as e:
51
  error_msg = f"Error reading file: {e}"
52
  return error_msg, error_msg
53
 
54
- # Extract text
55
- if file_ext == "pdf":
56
- extracted_text = extract_text_from_pdf(file_bytes)
57
- else: # docx
58
- extracted_text = extract_text_from_docx(file_bytes)
59
-
60
  # Check for extraction errors
61
  if extracted_text.startswith("Error"):
62
  return extracted_text, "Error during text extraction. Please check the file."
@@ -81,7 +87,15 @@ def parse_cv(file, job_description):
81
  except Exception as e:
82
  return extracted_text, f"Analysis Error: {e}"
83
 
84
- def respond(message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p):
 
 
 
 
 
 
 
 
85
  """Generate chatbot response."""
86
  messages = [{"role": "system", "content": system_message}]
87
  for user_msg, bot_msg in history:
@@ -106,36 +120,38 @@ def respond(message, history: list[tuple[str, str]], system_message, max_tokens,
106
  except Exception as e:
107
  yield f"Error during chat generation: {e}"
108
 
 
109
  def create_pdf_report(report_text):
110
- """Creates a PDF report."""
111
  if not report_text.strip():
112
  report_text = "No analysis report to convert."
113
 
114
- pdf_buffer = io.BytesIO()
115
- c = canvas.Canvas(pdf_buffer, pagesize=letter)
 
 
116
 
117
- c.setFont("Helvetica-Bold", 14)
118
- c.drawString(72, 750, "Analysis Report")
 
119
 
120
- text_obj = c.beginText(72, 730)
121
- text_obj.setFont("Helvetica", 11)
122
  for line in report_text.split("\n"):
123
- text_obj.textLine(line)
124
- c.drawText(text_obj)
125
 
126
- c.showPage()
127
- c.save()
128
- pdf_buffer.seek(0)
129
 
130
- return {
131
- "name": "analysis_report.pdf",
132
- "data": pdf_buffer.getvalue(),
133
- "mime_type": "application/pdf"
134
- }
135
 
136
  def toggle_download_button(analysis_report):
137
  """Toggle the download button."""
138
- return gr.update(interactive=bool(analysis_report.strip()), visible=bool(analysis_report.strip()))
 
 
 
 
139
 
140
  # Build the Gradio UI
141
  demo = gr.Blocks()
@@ -145,41 +161,53 @@ with demo:
145
  with gr.Tab("Chatbot"):
146
  chat_interface = gr.ChatInterface(
147
  respond,
148
- chatbot=gr.Chatbot(value=[], label="Chatbot", type="messages"),
149
- type="messages",
150
  additional_inputs=[
151
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
152
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
153
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
154
- gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
 
 
 
 
 
 
 
 
155
  ],
156
  )
157
 
158
  with gr.Tab("CV Analyzer"):
159
  gr.Markdown("### Upload your CV and provide the job description")
160
- file_input = gr.File(label="Upload CV", type="binary")
161
  job_desc_input = gr.Textbox(label="Job Description", lines=5)
162
- extracted_text = gr.Textbox(label="Extracted CV Content", lines=10, interactive=False)
163
- analysis_output = gr.Textbox(label="Analysis Report", lines=10, interactive=False)
164
- download_pdf_button = gr.Button("Download Analysis as PDF", visible=False, interactive=False)
165
- pdf_file = gr.File(label="Download PDF", file_count="single", interactive=False)
 
 
 
 
 
 
166
 
167
  analyze_button = gr.Button("Analyze CV")
168
 
169
  analyze_button.click(
170
  parse_cv,
171
  inputs=[file_input, job_desc_input],
172
- outputs=[extracted_text, analysis_output]
173
  ).then(
174
  toggle_download_button,
175
  inputs=[analysis_output],
176
- outputs=[download_pdf_button]
177
  )
178
 
179
  download_pdf_button.click(
180
- create_pdf_report,
181
- inputs=[analysis_output],
182
- outputs=[pdf_file]
183
  )
184
 
185
  if __name__ == "__main__":
 
3
  import PyPDF2
4
  import io
5
  from docx import Document
6
+ import os
7
 
8
  # For PDF generation
9
  from reportlab.pdfgen import canvas
10
  from reportlab.lib.pagesizes import letter
11
+ from reportlab.lib import utils
12
+ from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
13
+ from reportlab.lib.styles import getSampleStyleSheet
14
 
15
  # Initialize the inference client from Hugging Face.
16
  client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
17
 
18
+
19
+ def extract_text_from_pdf(pdf_file):
20
+ """Extract text from PDF file."""
21
  try:
22
+ pdf_reader = PyPDF2.PdfReader(pdf_file)
23
  text = ""
24
  for page in pdf_reader.pages:
25
  page_text = page.extract_text()
 
29
  except Exception as e:
30
  return f"Error reading PDF: {e}"
31
 
32
+
33
+ def extract_text_from_docx(docx_file):
34
+ """Extract text from DOCX file."""
35
  try:
36
+ doc = Document(docx_file)
37
  text = "\n".join(para.text for para in doc.paragraphs)
38
  return text.strip() or "No text could be extracted from the DOCX file."
39
  except Exception as e:
40
  return f"Error reading DOCX: {e}"
41
 
42
+
43
  def parse_cv(file, job_description):
44
  """Analyze the CV, show the prompt (debug) and return LLM analysis."""
45
  if file is None:
46
  return "Please upload a CV file.", ""
47
 
48
  try:
49
+ file_path = file.name # Get the file path
50
+ file_ext = os.path.splitext(file_path)[1].lower()
51
+
52
+ if file_ext == ".pdf":
53
+ extracted_text = extract_text_from_pdf(file_path)
54
+ elif file_ext == ".docx":
55
+ extracted_text = extract_text_from_docx(file_path)
56
  else:
57
+ return (
58
+ "Unsupported file format. Please upload a PDF or DOCX file.",
59
+ "Unsupported file format.",
60
+ )
61
+
62
  except Exception as e:
63
  error_msg = f"Error reading file: {e}"
64
  return error_msg, error_msg
65
 
 
 
 
 
 
 
66
  # Check for extraction errors
67
  if extracted_text.startswith("Error"):
68
  return extracted_text, "Error during text extraction. Please check the file."
 
87
  except Exception as e:
88
  return extracted_text, f"Analysis Error: {e}"
89
 
90
+
91
+ def respond(
92
+ message,
93
+ history: list[tuple[str, str]],
94
+ system_message,
95
+ max_tokens,
96
+ temperature,
97
+ top_p,
98
+ ):
99
  """Generate chatbot response."""
100
  messages = [{"role": "system", "content": system_message}]
101
  for user_msg, bot_msg in history:
 
120
  except Exception as e:
121
  yield f"Error during chat generation: {e}"
122
 
123
+
124
  def create_pdf_report(report_text):
125
+ """Creates a PDF report using SimpleDocTemplate for better formatting."""
126
  if not report_text.strip():
127
  report_text = "No analysis report to convert."
128
 
129
+ buffer = io.BytesIO()
130
+ doc = SimpleDocTemplate(buffer, pagesize=letter)
131
+ styles = getSampleStyleSheet()
132
+ Story = []
133
 
134
+ # Title
135
+ Story.append(Paragraph("<b>Analysis Report</b>", styles["Title"]))
136
+ Story.append(Spacer(1, 12))
137
 
138
+ # Report Content
 
139
  for line in report_text.split("\n"):
140
+ Story.append(Paragraph(line, styles["Normal"]))
141
+ Story.append(Spacer(1, 6)) # Add a small space between lines
142
 
143
+ doc.build(Story)
144
+ buffer.seek(0)
145
+ return buffer
146
 
 
 
 
 
 
147
 
148
  def toggle_download_button(analysis_report):
149
  """Toggle the download button."""
150
+ return gr.update(
151
+ interactive=bool(analysis_report.strip()),
152
+ visible=bool(analysis_report.strip()),
153
+ )
154
+
155
 
156
  # Build the Gradio UI
157
  demo = gr.Blocks()
 
161
  with gr.Tab("Chatbot"):
162
  chat_interface = gr.ChatInterface(
163
  respond,
164
+ chatbot=gr.Chatbot(value=[], label="Chatbot"),
165
+ textbox=gr.Textbox(placeholder="Enter your message here...", label="Message"),
166
  additional_inputs=[
167
+ gr.Textbox(
168
+ value="You are a friendly Chatbot.", label="System message"
169
+ ),
170
+ gr.Slider(
171
+ minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"
172
+ ),
173
+ gr.Slider(
174
+ minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"
175
+ ),
176
+ gr.Slider(
177
+ minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"
178
+ ),
179
  ],
180
  )
181
 
182
  with gr.Tab("CV Analyzer"):
183
  gr.Markdown("### Upload your CV and provide the job description")
184
+ file_input = gr.File(label="Upload CV", file_types=[".pdf", ".docx"])
185
  job_desc_input = gr.Textbox(label="Job Description", lines=5)
186
+ extracted_text = gr.Textbox(
187
+ label="Extracted CV Content", lines=10, interactive=False
188
+ )
189
+ analysis_output = gr.Textbox(
190
+ label="Analysis Report", lines=10, interactive=False
191
+ )
192
+ download_pdf_button = gr.Button(
193
+ "Download Analysis as PDF", visible=False, interactive=False
194
+ )
195
+ pdf_file = gr.File(label="Download PDF", interactive=False)
196
 
197
  analyze_button = gr.Button("Analyze CV")
198
 
199
  analyze_button.click(
200
  parse_cv,
201
  inputs=[file_input, job_desc_input],
202
+ outputs=[extracted_text, analysis_output],
203
  ).then(
204
  toggle_download_button,
205
  inputs=[analysis_output],
206
+ outputs=[download_pdf_button],
207
  )
208
 
209
  download_pdf_button.click(
210
+ create_pdf_report, inputs=[analysis_output], outputs=[pdf_file]
 
 
211
  )
212
 
213
  if __name__ == "__main__":