Spaces:

ruslanmv
/

CV_Analizer

Sleeping

App Files Files Community

CV_Analizer / app.py

ruslanmv

Update app.py

0b10650 verified 7 months ago

raw

history blame

6.91 kB

	import gradio as gr
	from huggingface_hub import InferenceClient
	import PyPDF2
	import io
	from docx import Document
	import os

	# For PDF generation
	from reportlab.pdfgen import canvas
	from reportlab.lib.pagesizes import letter
	from reportlab.lib import utils
	from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
	from reportlab.lib.styles import getSampleStyleSheet

	# Initialize the inference client from Hugging Face.
	client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")


	def extract_text_from_pdf(pdf_file):
	"""Extract text from PDF file."""
	try:
	pdf_reader = PyPDF2.PdfReader(pdf_file)
	text = ""
	for page in pdf_reader.pages:
	page_text = page.extract_text()
	if page_text:
	text += page_text + "\n"
	return text.strip() or "No text could be extracted from the PDF."
	except Exception as e:
	return f"Error reading PDF: {e}"


	def extract_text_from_docx(docx_file):
	"""Extract text from DOCX file."""
	try:
	doc = Document(docx_file)
	text = "\n".join(para.text for para in doc.paragraphs)
	return text.strip() or "No text could be extracted from the DOCX file."
	except Exception as e:
	return f"Error reading DOCX: {e}"


	def parse_cv(file, job_description):
	"""Analyze the CV, show the prompt (debug) and return LLM analysis."""
	if file is None:
	return "Please upload a CV file.", ""

	try:
	file_path = file.name # Get the file path
	file_ext = os.path.splitext(file_path)[1].lower()

	if file_ext == ".pdf":
	extracted_text = extract_text_from_pdf(file_path)
	elif file_ext == ".docx":
	extracted_text = extract_text_from_docx(file_path)
	else:
	return (
	"Unsupported file format. Please upload a PDF or DOCX file.",
	"Unsupported file format.",
	)

	except Exception as e:
	error_msg = f"Error reading file: {e}"
	return error_msg, error_msg

	# Check for extraction errors
	if extracted_text.startswith("Error"):
	return extracted_text, "Error during text extraction. Please check the file."

	# Prepare debug prompt
	prompt = (
	f"Analyze the CV against the job description. Provide a summary, assessment, "
	f"and a score 0-10.\n\n"
	f"Job Description:\n{job_description}\n\n"
	f"Candidate CV:\n{extracted_text}\n"
	)

	# Call LLM
	try:
	analysis = client.text_generation(prompt, max_new_tokens=512)
	# Show both the debug prompt and the LLM analysis in the "Analysis Report"
	analysis_report = (
	f"--- DEBUG PROMPT ---\n{prompt}\n"
	f"--- LLM ANALYSIS ---\n{analysis}"
	)
	return extracted_text, analysis_report
	except Exception as e:
	return extracted_text, f"Analysis Error: {e}"


	def respond(
	message,
	history: list[tuple[str, str]],
	system_message,
	max_tokens,
	temperature,
	top_p,
	):
	"""Generate chatbot response."""
	messages = [{"role": "system", "content": system_message}]
	for user_msg, bot_msg in history:
	if user_msg:
	messages.append({"role": "user", "content": user_msg})
	if bot_msg:
	messages.append({"role": "assistant", "content": bot_msg})
	messages.append({"role": "user", "content": message})

	response = ""
	try:
	for message_chunk in client.chat_completion(
	messages,
	max_tokens=max_tokens,
	stream=True,
	temperature=temperature,
	top_p=top_p,
	):
	token = message_chunk.choices[0].delta.content
	response += token
	yield response
	except Exception as e:
	yield f"Error during chat generation: {e}"


	def create_pdf_report(report_text):
	"""Creates a PDF report using SimpleDocTemplate for better formatting."""
	if not report_text.strip():
	report_text = "No analysis report to convert."

	buffer = io.BytesIO()
	doc = SimpleDocTemplate(buffer, pagesize=letter)
	styles = getSampleStyleSheet()
	Story = []

	# Title
	Story.append(Paragraph("<b>Analysis Report</b>", styles["Title"]))
	Story.append(Spacer(1, 12))

	# Report Content
	for line in report_text.split("\n"):
	Story.append(Paragraph(line, styles["Normal"]))
	Story.append(Spacer(1, 6)) # Add a small space between lines

	doc.build(Story)
	buffer.seek(0)
	return buffer


	def toggle_download_button(analysis_report):
	"""Toggle the download button."""
	return gr.update(
	interactive=bool(analysis_report.strip()),
	visible=bool(analysis_report.strip()),
	)


	# Build the Gradio UI
	demo = gr.Blocks()
	with demo:
	gr.Markdown("## AI-powered CV Analyzer and Chatbot")

	with gr.Tab("Chatbot"):
	chat_interface = gr.ChatInterface(
	respond,
	chatbot=gr.Chatbot(value=[], label="Chatbot"),
	textbox=gr.Textbox(placeholder="Enter your message here...", label="Message"),
	additional_inputs=[
	gr.Textbox(
	value="You are a friendly Chatbot.", label="System message"
	),
	gr.Slider(
	minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"
	),
	gr.Slider(
	minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"
	),
	gr.Slider(
	minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"
	),
	],
	)

	with gr.Tab("CV Analyzer"):
	gr.Markdown("### Upload your CV and provide the job description")
	file_input = gr.File(label="Upload CV", file_types=[".pdf", ".docx"])
	job_desc_input = gr.Textbox(label="Job Description", lines=5)
	extracted_text = gr.Textbox(
	label="Extracted CV Content", lines=10, interactive=False
	)
	analysis_output = gr.Textbox(
	label="Analysis Report", lines=10, interactive=False
	)
	download_pdf_button = gr.Button(
	"Download Analysis as PDF", visible=False, interactive=False
	)
	pdf_file = gr.File(label="Download PDF", interactive=False)

	analyze_button = gr.Button("Analyze CV")

	analyze_button.click(
	parse_cv,
	inputs=[file_input, job_desc_input],
	outputs=[extracted_text, analysis_output],
	).then(
	toggle_download_button,
	inputs=[analysis_output],
	outputs=[download_pdf_button],
	)

	download_pdf_button.click(
	create_pdf_report, inputs=[analysis_output], outputs=[pdf_file]
	)

	if __name__ == "__main__":
	demo.queue().launch()