ruslanmv commited on
Commit
ea32e64
·
verified ·
1 Parent(s): 0b10650

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +56 -85
app.py CHANGED
@@ -3,23 +3,18 @@ from huggingface_hub import InferenceClient
3
  import PyPDF2
4
  import io
5
  from docx import Document
6
- import os
7
 
8
  # For PDF generation
9
  from reportlab.pdfgen import canvas
10
  from reportlab.lib.pagesizes import letter
11
- from reportlab.lib import utils
12
- from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
13
- from reportlab.lib.styles import getSampleStyleSheet
14
 
15
  # Initialize the inference client from Hugging Face.
16
  client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
17
 
18
-
19
- def extract_text_from_pdf(pdf_file):
20
- """Extract text from PDF file."""
21
  try:
22
- pdf_reader = PyPDF2.PdfReader(pdf_file)
23
  text = ""
24
  for page in pdf_reader.pages:
25
  page_text = page.extract_text()
@@ -29,40 +24,38 @@ def extract_text_from_pdf(pdf_file):
29
  except Exception as e:
30
  return f"Error reading PDF: {e}"
31
 
32
-
33
- def extract_text_from_docx(docx_file):
34
- """Extract text from DOCX file."""
35
  try:
36
- doc = Document(docx_file)
37
  text = "\n".join(para.text for para in doc.paragraphs)
38
  return text.strip() or "No text could be extracted from the DOCX file."
39
  except Exception as e:
40
  return f"Error reading DOCX: {e}"
41
 
42
-
43
- def parse_cv(file, job_description):
44
  """Analyze the CV, show the prompt (debug) and return LLM analysis."""
45
- if file is None:
46
  return "Please upload a CV file.", ""
47
 
 
48
  try:
49
- file_path = file.name # Get the file path
50
- file_ext = os.path.splitext(file_path)[1].lower()
51
-
52
- if file_ext == ".pdf":
53
- extracted_text = extract_text_from_pdf(file_path)
54
- elif file_ext == ".docx":
55
- extracted_text = extract_text_from_docx(file_path)
56
  else:
57
- return (
58
- "Unsupported file format. Please upload a PDF or DOCX file.",
59
- "Unsupported file format.",
60
- )
61
-
62
  except Exception as e:
63
  error_msg = f"Error reading file: {e}"
64
  return error_msg, error_msg
65
 
 
 
 
 
 
 
66
  # Check for extraction errors
67
  if extracted_text.startswith("Error"):
68
  return extracted_text, "Error during text extraction. Please check the file."
@@ -87,15 +80,7 @@ def parse_cv(file, job_description):
87
  except Exception as e:
88
  return extracted_text, f"Analysis Error: {e}"
89
 
90
-
91
- def respond(
92
- message,
93
- history: list[tuple[str, str]],
94
- system_message,
95
- max_tokens,
96
- temperature,
97
- top_p,
98
- ):
99
  """Generate chatbot response."""
100
  messages = [{"role": "system", "content": system_message}]
101
  for user_msg, bot_msg in history:
@@ -120,38 +105,32 @@ def respond(
120
  except Exception as e:
121
  yield f"Error during chat generation: {e}"
122
 
123
-
124
  def create_pdf_report(report_text):
125
- """Creates a PDF report using SimpleDocTemplate for better formatting."""
126
  if not report_text.strip():
127
  report_text = "No analysis report to convert."
128
 
129
- buffer = io.BytesIO()
130
- doc = SimpleDocTemplate(buffer, pagesize=letter)
131
- styles = getSampleStyleSheet()
132
- Story = []
133
-
134
- # Title
135
- Story.append(Paragraph("<b>Analysis Report</b>", styles["Title"]))
136
- Story.append(Spacer(1, 12))
137
 
138
- # Report Content
 
139
  for line in report_text.split("\n"):
140
- Story.append(Paragraph(line, styles["Normal"]))
141
- Story.append(Spacer(1, 6)) # Add a small space between lines
142
 
143
- doc.build(Story)
144
- buffer.seek(0)
145
- return buffer
146
 
 
 
147
 
148
  def toggle_download_button(analysis_report):
149
- """Toggle the download button."""
150
- return gr.update(
151
- interactive=bool(analysis_report.strip()),
152
- visible=bool(analysis_report.strip()),
153
- )
154
-
155
 
156
  # Build the Gradio UI
157
  demo = gr.Blocks()
@@ -159,40 +138,30 @@ with demo:
159
  gr.Markdown("## AI-powered CV Analyzer and Chatbot")
160
 
161
  with gr.Tab("Chatbot"):
 
162
  chat_interface = gr.ChatInterface(
163
  respond,
164
- chatbot=gr.Chatbot(value=[], label="Chatbot"),
165
- textbox=gr.Textbox(placeholder="Enter your message here...", label="Message"),
166
  additional_inputs=[
167
- gr.Textbox(
168
- value="You are a friendly Chatbot.", label="System message"
169
- ),
170
- gr.Slider(
171
- minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"
172
- ),
173
- gr.Slider(
174
- minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"
175
- ),
176
- gr.Slider(
177
- minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"
178
- ),
179
  ],
180
  )
181
 
182
  with gr.Tab("CV Analyzer"):
183
  gr.Markdown("### Upload your CV and provide the job description")
184
- file_input = gr.File(label="Upload CV", file_types=[".pdf", ".docx"])
 
185
  job_desc_input = gr.Textbox(label="Job Description", lines=5)
186
- extracted_text = gr.Textbox(
187
- label="Extracted CV Content", lines=10, interactive=False
188
- )
189
- analysis_output = gr.Textbox(
190
- label="Analysis Report", lines=10, interactive=False
191
- )
192
- download_pdf_button = gr.Button(
193
- "Download Analysis as PDF", visible=False, interactive=False
194
- )
195
- pdf_file = gr.File(label="Download PDF", interactive=False)
196
 
197
  analyze_button = gr.Button("Analyze CV")
198
 
@@ -207,8 +176,10 @@ with demo:
207
  )
208
 
209
  download_pdf_button.click(
210
- create_pdf_report, inputs=[analysis_output], outputs=[pdf_file]
 
 
211
  )
212
 
213
  if __name__ == "__main__":
214
- demo.queue().launch()
 
3
  import PyPDF2
4
  import io
5
  from docx import Document
 
6
 
7
  # For PDF generation
8
  from reportlab.pdfgen import canvas
9
  from reportlab.lib.pagesizes import letter
 
 
 
10
 
11
  # Initialize the inference client from Hugging Face.
12
  client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
13
 
14
+ def extract_text_from_pdf(pdf_file_bytes):
15
+ """Extract text from PDF bytes."""
 
16
  try:
17
+ pdf_reader = PyPDF2.PdfReader(io.BytesIO(pdf_file_bytes))
18
  text = ""
19
  for page in pdf_reader.pages:
20
  page_text = page.extract_text()
 
24
  except Exception as e:
25
  return f"Error reading PDF: {e}"
26
 
27
+ def extract_text_from_docx(docx_file_bytes):
28
+ """Extract text from DOCX bytes."""
 
29
  try:
30
+ doc = Document(io.BytesIO(docx_file_bytes))
31
  text = "\n".join(para.text for para in doc.paragraphs)
32
  return text.strip() or "No text could be extracted from the DOCX file."
33
  except Exception as e:
34
  return f"Error reading DOCX: {e}"
35
 
36
+ def parse_cv(file_bytes, job_description):
 
37
  """Analyze the CV, show the prompt (debug) and return LLM analysis."""
38
+ if file_bytes is None:
39
  return "Please upload a CV file.", ""
40
 
41
+ # Determine file extension by header bytes
42
  try:
43
+ if file_bytes.startswith(b'%PDF'):
44
+ file_ext = "pdf"
45
+ elif file_bytes.startswith(b'PK\x03\x04'):
46
+ file_ext = "docx"
 
 
 
47
  else:
48
+ return "Unsupported file format.", "Cannot determine file type from content"
 
 
 
 
49
  except Exception as e:
50
  error_msg = f"Error reading file: {e}"
51
  return error_msg, error_msg
52
 
53
+ # Extract text
54
+ if file_ext == "pdf":
55
+ extracted_text = extract_text_from_pdf(file_bytes)
56
+ else: # docx
57
+ extracted_text = extract_text_from_docx(file_bytes)
58
+
59
  # Check for extraction errors
60
  if extracted_text.startswith("Error"):
61
  return extracted_text, "Error during text extraction. Please check the file."
 
80
  except Exception as e:
81
  return extracted_text, f"Analysis Error: {e}"
82
 
83
+ def respond(message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p):
 
 
 
 
 
 
 
 
84
  """Generate chatbot response."""
85
  messages = [{"role": "system", "content": system_message}]
86
  for user_msg, bot_msg in history:
 
105
  except Exception as e:
106
  yield f"Error during chat generation: {e}"
107
 
 
108
  def create_pdf_report(report_text):
109
+ """Creates a PDF report and returns it as (bytes, filename)."""
110
  if not report_text.strip():
111
  report_text = "No analysis report to convert."
112
 
113
+ pdf_buffer = io.BytesIO()
114
+ c = canvas.Canvas(pdf_buffer, pagesize=letter)
115
+ c.setFont("Helvetica-Bold", 14)
116
+ c.drawString(72, 750, "Analysis Report")
 
 
 
 
117
 
118
+ text_obj = c.beginText(72, 730)
119
+ text_obj.setFont("Helvetica", 11)
120
  for line in report_text.split("\n"):
121
+ text_obj.textLine(line)
122
+ c.drawText(text_obj)
123
 
124
+ c.showPage()
125
+ c.save()
126
+ pdf_buffer.seek(0)
127
 
128
+ # Return as a bytes-filename tuple that Gradio will treat as a file download
129
+ return (pdf_buffer.getvalue(), "analysis_report.pdf")
130
 
131
  def toggle_download_button(analysis_report):
132
+ """Toggle the download button if we have an analysis report."""
133
+ return gr.update(interactive=bool(analysis_report.strip()), visible=bool(analysis_report.strip()))
 
 
 
 
134
 
135
  # Build the Gradio UI
136
  demo = gr.Blocks()
 
138
  gr.Markdown("## AI-powered CV Analyzer and Chatbot")
139
 
140
  with gr.Tab("Chatbot"):
141
+ # Simple chat interface
142
  chat_interface = gr.ChatInterface(
143
  respond,
144
+ chatbot=gr.Chatbot(value=[], label="Chatbot", type="messages"),
145
+ type="messages",
146
  additional_inputs=[
147
+ gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
148
+ gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
149
+ gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
150
+ gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
 
 
 
 
 
 
 
 
151
  ],
152
  )
153
 
154
  with gr.Tab("CV Analyzer"):
155
  gr.Markdown("### Upload your CV and provide the job description")
156
+ # IMPORTANT: set type="bytes" so we get raw bytes in parse_cv
157
+ file_input = gr.File(label="Upload CV", type="bytes")
158
  job_desc_input = gr.Textbox(label="Job Description", lines=5)
159
+
160
+ extracted_text = gr.Textbox(label="Extracted CV Content", lines=10, interactive=False)
161
+ analysis_output = gr.Textbox(label="Analysis Report", lines=10, interactive=False)
162
+
163
+ download_pdf_button = gr.Button("Download Analysis as PDF", visible=False, interactive=False)
164
+ pdf_file = gr.File(label="Download PDF", file_count="single", interactive=False)
 
 
 
 
165
 
166
  analyze_button = gr.Button("Analyze CV")
167
 
 
176
  )
177
 
178
  download_pdf_button.click(
179
+ create_pdf_report,
180
+ inputs=[analysis_output],
181
+ outputs=[pdf_file],
182
  )
183
 
184
  if __name__ == "__main__":
185
+ demo.queue().launch()