Spaces:
Sleeping
Sleeping
File size: 6,907 Bytes
3935f98 71c7cb5 87475e8 0b10650 3935f98 83eba02 0b10650 83eba02 93e353c 3935f98 0b10650 39890ac 0b10650 39890ac 93e353c 71c7cb5 0b10650 39890ac 0b10650 39890ac 93e353c 71c7cb5 0b10650 3751b1c 83eba02 71c7cb5 8af823f 83eba02 39890ac 0b10650 8af823f 0b10650 39890ac 8af823f 87475e8 8af823f c1c0b76 83eba02 39890ac 83eba02 39890ac 83eba02 39890ac 87475e8 83eba02 39890ac 8af823f 01fb377 83eba02 39890ac 8af823f 87475e8 0b10650 8af823f 5a48c62 8af823f 5a48c62 0b10650 83eba02 0b10650 83eba02 0b10650 01fb377 0b10650 01fb377 0b10650 83eba02 0b10650 01fb377 0b10650 83eba02 01fb377 baffc49 0b10650 83eba02 71c7cb5 87475e8 5a48c62 0b10650 5a48c62 0b10650 5a48c62 71c7cb5 8af823f 0b10650 3751b1c 0b10650 87475e8 83eba02 baffc49 8af823f 0b10650 01fb377 0b10650 01fb377 83eba02 0b10650 83eba02 3935f98 baffc49 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 |
import gradio as gr
from huggingface_hub import InferenceClient
import PyPDF2
import io
from docx import Document
import os
# For PDF generation
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import letter
from reportlab.lib import utils
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
from reportlab.lib.styles import getSampleStyleSheet
# Initialize the inference client from Hugging Face.
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
def extract_text_from_pdf(pdf_file):
"""Extract text from PDF file."""
try:
pdf_reader = PyPDF2.PdfReader(pdf_file)
text = ""
for page in pdf_reader.pages:
page_text = page.extract_text()
if page_text:
text += page_text + "\n"
return text.strip() or "No text could be extracted from the PDF."
except Exception as e:
return f"Error reading PDF: {e}"
def extract_text_from_docx(docx_file):
"""Extract text from DOCX file."""
try:
doc = Document(docx_file)
text = "\n".join(para.text for para in doc.paragraphs)
return text.strip() or "No text could be extracted from the DOCX file."
except Exception as e:
return f"Error reading DOCX: {e}"
def parse_cv(file, job_description):
"""Analyze the CV, show the prompt (debug) and return LLM analysis."""
if file is None:
return "Please upload a CV file.", ""
try:
file_path = file.name # Get the file path
file_ext = os.path.splitext(file_path)[1].lower()
if file_ext == ".pdf":
extracted_text = extract_text_from_pdf(file_path)
elif file_ext == ".docx":
extracted_text = extract_text_from_docx(file_path)
else:
return (
"Unsupported file format. Please upload a PDF or DOCX file.",
"Unsupported file format.",
)
except Exception as e:
error_msg = f"Error reading file: {e}"
return error_msg, error_msg
# Check for extraction errors
if extracted_text.startswith("Error"):
return extracted_text, "Error during text extraction. Please check the file."
# Prepare debug prompt
prompt = (
f"Analyze the CV against the job description. Provide a summary, assessment, "
f"and a score 0-10.\n\n"
f"Job Description:\n{job_description}\n\n"
f"Candidate CV:\n{extracted_text}\n"
)
# Call LLM
try:
analysis = client.text_generation(prompt, max_new_tokens=512)
# Show both the debug prompt and the LLM analysis in the "Analysis Report"
analysis_report = (
f"--- DEBUG PROMPT ---\n{prompt}\n"
f"--- LLM ANALYSIS ---\n{analysis}"
)
return extracted_text, analysis_report
except Exception as e:
return extracted_text, f"Analysis Error: {e}"
def respond(
message,
history: list[tuple[str, str]],
system_message,
max_tokens,
temperature,
top_p,
):
"""Generate chatbot response."""
messages = [{"role": "system", "content": system_message}]
for user_msg, bot_msg in history:
if user_msg:
messages.append({"role": "user", "content": user_msg})
if bot_msg:
messages.append({"role": "assistant", "content": bot_msg})
messages.append({"role": "user", "content": message})
response = ""
try:
for message_chunk in client.chat_completion(
messages,
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
):
token = message_chunk.choices[0].delta.content
response += token
yield response
except Exception as e:
yield f"Error during chat generation: {e}"
def create_pdf_report(report_text):
"""Creates a PDF report using SimpleDocTemplate for better formatting."""
if not report_text.strip():
report_text = "No analysis report to convert."
buffer = io.BytesIO()
doc = SimpleDocTemplate(buffer, pagesize=letter)
styles = getSampleStyleSheet()
Story = []
# Title
Story.append(Paragraph("<b>Analysis Report</b>", styles["Title"]))
Story.append(Spacer(1, 12))
# Report Content
for line in report_text.split("\n"):
Story.append(Paragraph(line, styles["Normal"]))
Story.append(Spacer(1, 6)) # Add a small space between lines
doc.build(Story)
buffer.seek(0)
return buffer
def toggle_download_button(analysis_report):
"""Toggle the download button."""
return gr.update(
interactive=bool(analysis_report.strip()),
visible=bool(analysis_report.strip()),
)
# Build the Gradio UI
demo = gr.Blocks()
with demo:
gr.Markdown("## AI-powered CV Analyzer and Chatbot")
with gr.Tab("Chatbot"):
chat_interface = gr.ChatInterface(
respond,
chatbot=gr.Chatbot(value=[], label="Chatbot"),
textbox=gr.Textbox(placeholder="Enter your message here...", label="Message"),
additional_inputs=[
gr.Textbox(
value="You are a friendly Chatbot.", label="System message"
),
gr.Slider(
minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"
),
gr.Slider(
minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"
),
gr.Slider(
minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"
),
],
)
with gr.Tab("CV Analyzer"):
gr.Markdown("### Upload your CV and provide the job description")
file_input = gr.File(label="Upload CV", file_types=[".pdf", ".docx"])
job_desc_input = gr.Textbox(label="Job Description", lines=5)
extracted_text = gr.Textbox(
label="Extracted CV Content", lines=10, interactive=False
)
analysis_output = gr.Textbox(
label="Analysis Report", lines=10, interactive=False
)
download_pdf_button = gr.Button(
"Download Analysis as PDF", visible=False, interactive=False
)
pdf_file = gr.File(label="Download PDF", interactive=False)
analyze_button = gr.Button("Analyze CV")
analyze_button.click(
parse_cv,
inputs=[file_input, job_desc_input],
outputs=[extracted_text, analysis_output],
).then(
toggle_download_button,
inputs=[analysis_output],
outputs=[download_pdf_button],
)
download_pdf_button.click(
create_pdf_report, inputs=[analysis_output], outputs=[pdf_file]
)
if __name__ == "__main__":
demo.queue().launch() |