Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -3,18 +3,23 @@ from huggingface_hub import InferenceClient
|
|
3 |
import PyPDF2
|
4 |
import io
|
5 |
from docx import Document
|
|
|
6 |
|
7 |
# For PDF generation
|
8 |
from reportlab.pdfgen import canvas
|
9 |
from reportlab.lib.pagesizes import letter
|
|
|
|
|
|
|
10 |
|
11 |
# Initialize the inference client from Hugging Face.
|
12 |
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
|
13 |
|
14 |
-
|
15 |
-
|
|
|
16 |
try:
|
17 |
-
pdf_reader = PyPDF2.PdfReader(
|
18 |
text = ""
|
19 |
for page in pdf_reader.pages:
|
20 |
page_text = page.extract_text()
|
@@ -24,38 +29,40 @@ def extract_text_from_pdf(pdf_file_bytes):
|
|
24 |
except Exception as e:
|
25 |
return f"Error reading PDF: {e}"
|
26 |
|
27 |
-
|
28 |
-
|
|
|
29 |
try:
|
30 |
-
doc = Document(
|
31 |
text = "\n".join(para.text for para in doc.paragraphs)
|
32 |
return text.strip() or "No text could be extracted from the DOCX file."
|
33 |
except Exception as e:
|
34 |
return f"Error reading DOCX: {e}"
|
35 |
|
36 |
-
|
|
|
37 |
"""Analyze the CV, show the prompt (debug) and return LLM analysis."""
|
38 |
-
if
|
39 |
return "Please upload a CV file.", ""
|
40 |
|
41 |
-
# Determine file extension by header bytes
|
42 |
try:
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
|
|
|
|
|
|
47 |
else:
|
48 |
-
return
|
|
|
|
|
|
|
|
|
49 |
except Exception as e:
|
50 |
error_msg = f"Error reading file: {e}"
|
51 |
return error_msg, error_msg
|
52 |
|
53 |
-
# Extract text
|
54 |
-
if file_ext == "pdf":
|
55 |
-
extracted_text = extract_text_from_pdf(file_bytes)
|
56 |
-
else: # docx
|
57 |
-
extracted_text = extract_text_from_docx(file_bytes)
|
58 |
-
|
59 |
# Check for extraction errors
|
60 |
if extracted_text.startswith("Error"):
|
61 |
return extracted_text, "Error during text extraction. Please check the file."
|
@@ -80,7 +87,15 @@ def parse_cv(file_bytes, job_description):
|
|
80 |
except Exception as e:
|
81 |
return extracted_text, f"Analysis Error: {e}"
|
82 |
|
83 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
84 |
"""Generate chatbot response."""
|
85 |
messages = [{"role": "system", "content": system_message}]
|
86 |
for user_msg, bot_msg in history:
|
@@ -105,32 +120,38 @@ def respond(message, history: list[tuple[str, str]], system_message, max_tokens,
|
|
105 |
except Exception as e:
|
106 |
yield f"Error during chat generation: {e}"
|
107 |
|
|
|
108 |
def create_pdf_report(report_text):
|
109 |
-
"""Creates a PDF report
|
110 |
if not report_text.strip():
|
111 |
report_text = "No analysis report to convert."
|
112 |
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
|
|
|
|
|
|
|
|
117 |
|
118 |
-
|
119 |
-
text_obj.setFont("Helvetica", 11)
|
120 |
for line in report_text.split("\n"):
|
121 |
-
|
122 |
-
|
123 |
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
|
128 |
-
# Return as a bytes-filename tuple that Gradio will treat as a file download
|
129 |
-
return (pdf_buffer.getvalue(), "analysis_report.pdf")
|
130 |
|
131 |
def toggle_download_button(analysis_report):
|
132 |
-
"""Toggle the download button
|
133 |
-
return gr.update(
|
|
|
|
|
|
|
|
|
134 |
|
135 |
# Build the Gradio UI
|
136 |
demo = gr.Blocks()
|
@@ -138,35 +159,45 @@ with demo:
|
|
138 |
gr.Markdown("## AI-powered CV Analyzer and Chatbot")
|
139 |
|
140 |
with gr.Tab("Chatbot"):
|
141 |
-
# Simple chat interface
|
142 |
chat_interface = gr.ChatInterface(
|
143 |
respond,
|
144 |
-
chatbot=gr.Chatbot(value=[], label="Chatbot"
|
145 |
-
|
146 |
additional_inputs=[
|
147 |
-
gr.Textbox(
|
148 |
-
|
149 |
-
|
150 |
-
gr.Slider(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
151 |
],
|
152 |
)
|
153 |
|
154 |
with gr.Tab("CV Analyzer"):
|
155 |
gr.Markdown("### Upload your CV and provide the job description")
|
156 |
-
|
157 |
-
file_input = gr.File(label="Upload CV", type="file", file_types=['.pdf', '.docx'])
|
158 |
job_desc_input = gr.Textbox(label="Job Description", lines=5)
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
|
|
|
|
|
|
|
|
165 |
|
166 |
analyze_button = gr.Button("Analyze CV")
|
167 |
|
168 |
analyze_button.click(
|
169 |
-
|
170 |
inputs=[file_input, job_desc_input],
|
171 |
outputs=[extracted_text, analysis_output],
|
172 |
).then(
|
@@ -176,9 +207,7 @@ with demo:
|
|
176 |
)
|
177 |
|
178 |
download_pdf_button.click(
|
179 |
-
create_pdf_report,
|
180 |
-
inputs=[analysis_output],
|
181 |
-
outputs=[pdf_file],
|
182 |
)
|
183 |
|
184 |
if __name__ == "__main__":
|
|
|
3 |
import PyPDF2
|
4 |
import io
|
5 |
from docx import Document
|
6 |
+
import os
|
7 |
|
8 |
# For PDF generation
|
9 |
from reportlab.pdfgen import canvas
|
10 |
from reportlab.lib.pagesizes import letter
|
11 |
+
from reportlab.lib import utils
|
12 |
+
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
|
13 |
+
from reportlab.lib.styles import getSampleStyleSheet
|
14 |
|
15 |
# Initialize the inference client from Hugging Face.
|
16 |
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
|
17 |
|
18 |
+
|
19 |
+
def extract_text_from_pdf(pdf_file):
|
20 |
+
"""Extract text from PDF file."""
|
21 |
try:
|
22 |
+
pdf_reader = PyPDF2.PdfReader(pdf_file)
|
23 |
text = ""
|
24 |
for page in pdf_reader.pages:
|
25 |
page_text = page.extract_text()
|
|
|
29 |
except Exception as e:
|
30 |
return f"Error reading PDF: {e}"
|
31 |
|
32 |
+
|
33 |
+
def extract_text_from_docx(docx_file):
|
34 |
+
"""Extract text from DOCX file."""
|
35 |
try:
|
36 |
+
doc = Document(docx_file)
|
37 |
text = "\n".join(para.text for para in doc.paragraphs)
|
38 |
return text.strip() or "No text could be extracted from the DOCX file."
|
39 |
except Exception as e:
|
40 |
return f"Error reading DOCX: {e}"
|
41 |
|
42 |
+
|
43 |
+
def parse_cv(file, job_description):
|
44 |
"""Analyze the CV, show the prompt (debug) and return LLM analysis."""
|
45 |
+
if file is None:
|
46 |
return "Please upload a CV file.", ""
|
47 |
|
|
|
48 |
try:
|
49 |
+
file_path = file.name # Get the file path
|
50 |
+
file_ext = os.path.splitext(file_path)[1].lower()
|
51 |
+
|
52 |
+
if file_ext == ".pdf":
|
53 |
+
extracted_text = extract_text_from_pdf(file_path)
|
54 |
+
elif file_ext == ".docx":
|
55 |
+
extracted_text = extract_text_from_docx(file_path)
|
56 |
else:
|
57 |
+
return (
|
58 |
+
"Unsupported file format. Please upload a PDF or DOCX file.",
|
59 |
+
"Unsupported file format.",
|
60 |
+
)
|
61 |
+
|
62 |
except Exception as e:
|
63 |
error_msg = f"Error reading file: {e}"
|
64 |
return error_msg, error_msg
|
65 |
|
|
|
|
|
|
|
|
|
|
|
|
|
66 |
# Check for extraction errors
|
67 |
if extracted_text.startswith("Error"):
|
68 |
return extracted_text, "Error during text extraction. Please check the file."
|
|
|
87 |
except Exception as e:
|
88 |
return extracted_text, f"Analysis Error: {e}"
|
89 |
|
90 |
+
|
91 |
+
def respond(
|
92 |
+
message,
|
93 |
+
history: list[tuple[str, str]],
|
94 |
+
system_message,
|
95 |
+
max_tokens,
|
96 |
+
temperature,
|
97 |
+
top_p,
|
98 |
+
):
|
99 |
"""Generate chatbot response."""
|
100 |
messages = [{"role": "system", "content": system_message}]
|
101 |
for user_msg, bot_msg in history:
|
|
|
120 |
except Exception as e:
|
121 |
yield f"Error during chat generation: {e}"
|
122 |
|
123 |
+
|
124 |
def create_pdf_report(report_text):
|
125 |
+
"""Creates a PDF report using SimpleDocTemplate for better formatting."""
|
126 |
if not report_text.strip():
|
127 |
report_text = "No analysis report to convert."
|
128 |
|
129 |
+
buffer = io.BytesIO()
|
130 |
+
doc = SimpleDocTemplate(buffer, pagesize=letter)
|
131 |
+
styles = getSampleStyleSheet()
|
132 |
+
Story = []
|
133 |
+
|
134 |
+
# Title
|
135 |
+
Story.append(Paragraph("<b>Analysis Report</b>", styles["Title"]))
|
136 |
+
Story.append(Spacer(1, 12))
|
137 |
|
138 |
+
# Report Content
|
|
|
139 |
for line in report_text.split("\n"):
|
140 |
+
Story.append(Paragraph(line, styles["Normal"]))
|
141 |
+
Story.append(Spacer(1, 6)) # Add a small space between lines
|
142 |
|
143 |
+
doc.build(Story)
|
144 |
+
buffer.seek(0)
|
145 |
+
return buffer
|
146 |
|
|
|
|
|
147 |
|
148 |
def toggle_download_button(analysis_report):
|
149 |
+
"""Toggle the download button."""
|
150 |
+
return gr.update(
|
151 |
+
interactive=bool(analysis_report.strip()),
|
152 |
+
visible=bool(analysis_report.strip()),
|
153 |
+
)
|
154 |
+
|
155 |
|
156 |
# Build the Gradio UI
|
157 |
demo = gr.Blocks()
|
|
|
159 |
gr.Markdown("## AI-powered CV Analyzer and Chatbot")
|
160 |
|
161 |
with gr.Tab("Chatbot"):
|
|
|
162 |
chat_interface = gr.ChatInterface(
|
163 |
respond,
|
164 |
+
chatbot=gr.Chatbot(value=[], label="Chatbot"),
|
165 |
+
textbox=gr.Textbox(placeholder="Enter your message here...", label="Message"),
|
166 |
additional_inputs=[
|
167 |
+
gr.Textbox(
|
168 |
+
value="You are a friendly Chatbot.", label="System message"
|
169 |
+
),
|
170 |
+
gr.Slider(
|
171 |
+
minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"
|
172 |
+
),
|
173 |
+
gr.Slider(
|
174 |
+
minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"
|
175 |
+
),
|
176 |
+
gr.Slider(
|
177 |
+
minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"
|
178 |
+
),
|
179 |
],
|
180 |
)
|
181 |
|
182 |
with gr.Tab("CV Analyzer"):
|
183 |
gr.Markdown("### Upload your CV and provide the job description")
|
184 |
+
file_input = gr.File(label="Upload CV", file_types=[".pdf", ".docx"])
|
|
|
185 |
job_desc_input = gr.Textbox(label="Job Description", lines=5)
|
186 |
+
extracted_text = gr.Textbox(
|
187 |
+
label="Extracted CV Content", lines=10, interactive=False
|
188 |
+
)
|
189 |
+
analysis_output = gr.Textbox(
|
190 |
+
label="Analysis Report", lines=10, interactive=False
|
191 |
+
)
|
192 |
+
download_pdf_button = gr.Button(
|
193 |
+
"Download Analysis as PDF", visible=False, interactive=False
|
194 |
+
)
|
195 |
+
pdf_file = gr.File(label="Download PDF", interactive=False)
|
196 |
|
197 |
analyze_button = gr.Button("Analyze CV")
|
198 |
|
199 |
analyze_button.click(
|
200 |
+
parse_cv,
|
201 |
inputs=[file_input, job_desc_input],
|
202 |
outputs=[extracted_text, analysis_output],
|
203 |
).then(
|
|
|
207 |
)
|
208 |
|
209 |
download_pdf_button.click(
|
210 |
+
create_pdf_report, inputs=[analysis_output], outputs=[pdf_file]
|
|
|
|
|
211 |
)
|
212 |
|
213 |
if __name__ == "__main__":
|