ruslanmv commited on
Commit
628f061
·
verified ·
1 Parent(s): 4aeb0f2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -46
app.py CHANGED
@@ -1,20 +1,20 @@
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
- import PyPDF2
4
  import io
5
  from docx import Document
6
  import os
7
- import pymupdf # Corrected import for PyMuPDF
8
- from reportlab.pdfgen import canvas
9
  from reportlab.lib.pagesizes import letter
10
  from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
11
  from reportlab.lib.styles import getSampleStyleSheet
12
- from reportlab.lib import colors
13
 
 
14
  client = InferenceClient(
15
  model="meta-llama/Meta-Llama-3-8B-Instruct",
16
  token=os.getenv("HF_TOKEN"))
17
 
 
18
  def extract_text_from_pdf(pdf_file):
19
  try:
20
  pdf_document = pymupdf.open(pdf_file)
@@ -23,6 +23,7 @@ def extract_text_from_pdf(pdf_file):
23
  except Exception as e:
24
  return f"Error reading PDF: {e}"
25
 
 
26
  def extract_text_from_docx(docx_file):
27
  try:
28
  doc = Document(docx_file)
@@ -31,70 +32,71 @@ def extract_text_from_docx(docx_file):
31
  except Exception as e:
32
  return f"Error reading DOCX: {e}"
33
 
 
34
  def parse_cv(file, job_description):
35
  if file is None:
36
- return "Please upload a CV file.", ""
37
  try:
38
  file_path = file.name
39
  file_ext = os.path.splitext(file_path)[1].lower()
40
- extracted_text = extract_text_from_pdf(file_path) if file_ext == ".pdf" else extract_text_from_docx(file_path)
 
 
 
 
 
41
  except Exception as e:
42
- return f"Error reading file: {e}", ""
43
  if extracted_text.startswith("Error"):
44
- return extracted_text, "Error during text extraction."
45
- prompt = f"Analyze this CV for job relevance.\nJob Description:\n{job_description}\n\nCV:\n{extracted_text}\n"
 
 
 
 
 
46
  try:
47
  analysis = client.text_generation(prompt, max_new_tokens=512)
48
- return extracted_text, f"**Analysis Report:**\n{analysis}"
 
 
49
  except Exception as e:
50
- return extracted_text, f"Analysis Error: {e}"
51
 
 
52
  def create_pdf_report(report_text):
53
- pdf_buffer = io.BytesIO()
54
- doc = SimpleDocTemplate(pdf_buffer, pagesize=letter)
 
 
 
55
  styles = getSampleStyleSheet()
56
- Story = [Paragraph("<b>Analysis Report</b>", styles['Title']), Spacer(1, 12)]
57
- Story.append(Paragraph(report_text.replace("\n", "<br/>"), styles['BodyText']))
58
- doc.build(Story)
59
- pdf_buffer.seek(0)
60
- return pdf_buffer.getvalue(), "analysis_report.pdf"
61
 
62
- def process_resume(resume_file, job_title):
63
- if resume_file is None:
64
- return "Please upload a resume file."
65
- try:
66
- file_path = resume_file.name
67
- file_ext = os.path.splitext(file_path)[1].lower()
68
- resume_text = extract_text_from_pdf(file_path) if file_ext == ".pdf" else extract_text_from_docx(file_path)
69
- if resume_text.startswith("Error"):
70
- return resume_text
71
- prompt = f"Optimize this resume for {job_title}:\n{resume_text}\n"
72
- optimized_resume = client.text_generation(prompt, max_new_tokens=1024)
73
- return optimized_resume.replace("\n", " \n") # Ensure Markdown formatting
74
- except Exception as e:
75
- return f"Error processing resume: {e}"
76
 
 
 
 
 
 
 
 
77
  demo = gr.Blocks()
78
  with demo:
79
  gr.Markdown("## AI-powered CV Analyzer and Optimizer")
80
-
81
  with gr.Tab("CV Analyzer"):
 
82
  file_input = gr.File(label="Upload CV", file_types=[".pdf", ".docx"])
83
  job_desc_input = gr.Textbox(label="Job Description", lines=5)
84
  extracted_text = gr.Textbox(label="Extracted CV Content", lines=10, interactive=False)
85
- analysis_output = gr.Markdown(label="Analysis Report")
86
- download_pdf_button = gr.Button("Download Analysis as PDF", visible=False)
87
- pdf_file = gr.File(label="Download PDF", interactive=False)
88
  analyze_button = gr.Button("Analyze CV")
89
- analyze_button.click(parse_cv, [file_input, job_desc_input], [extracted_text, analysis_output])
90
- download_pdf_button.click(create_pdf_report, [analysis_output], [pdf_file])
91
-
92
- with gr.Tab("CV Optimizer"):
93
- resume_file = gr.File(label="Upload Resume (PDF or Word)", file_types=[".pdf", ".docx"])
94
- job_title_input = gr.Textbox(label="Job Title")
95
- optimized_resume_output = gr.Markdown(label="Optimized Resume")
96
- optimize_button = gr.Button("Optimize Resume")
97
- optimize_button.click(process_resume, [resume_file, job_title_input], [optimized_resume_output])
98
 
99
  if __name__ == "__main__":
100
- demo.queue().launch()
 
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
 
3
  import io
4
  from docx import Document
5
  import os
6
+ import pymupdf
7
+ # For PDF generation
8
  from reportlab.lib.pagesizes import letter
9
  from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
10
  from reportlab.lib.styles import getSampleStyleSheet
 
11
 
12
+ # Initialize Hugging Face Inference Client
13
  client = InferenceClient(
14
  model="meta-llama/Meta-Llama-3-8B-Instruct",
15
  token=os.getenv("HF_TOKEN"))
16
 
17
+ # Function to extract text from PDF
18
  def extract_text_from_pdf(pdf_file):
19
  try:
20
  pdf_document = pymupdf.open(pdf_file)
 
23
  except Exception as e:
24
  return f"Error reading PDF: {e}"
25
 
26
+ # Function to extract text from DOCX
27
  def extract_text_from_docx(docx_file):
28
  try:
29
  doc = Document(docx_file)
 
32
  except Exception as e:
33
  return f"Error reading DOCX: {e}"
34
 
35
+ # Function to analyze CV and generate report
36
  def parse_cv(file, job_description):
37
  if file is None:
38
+ return "Please upload a CV file.", "", None
39
  try:
40
  file_path = file.name
41
  file_ext = os.path.splitext(file_path)[1].lower()
42
+ if file_ext == ".pdf":
43
+ extracted_text = extract_text_from_pdf(file_path)
44
+ elif file_ext == ".docx":
45
+ extracted_text = extract_text_from_docx(file_path)
46
+ else:
47
+ return "Unsupported file format. Please upload a PDF or DOCX file.", "", None
48
  except Exception as e:
49
+ return f"Error reading file: {e}", "", None
50
  if extracted_text.startswith("Error"):
51
+ return extracted_text, "Error during text extraction. Please check the file.", None
52
+ prompt = (
53
+ f"Analyze the CV against the job description. Provide a summary, assessment, "
54
+ f"and a score 0-10.\n\n"
55
+ f"Job Description:\n{job_description}\n\n"
56
+ f"Candidate CV:\n{extracted_text}\n"
57
+ )
58
  try:
59
  analysis = client.text_generation(prompt, max_new_tokens=512)
60
+ report_text = f"--- Analysis Report ---\n{analysis}"
61
+ pdf_path = create_pdf_report(report_text) # Generate PDF in the backend
62
+ return extracted_text, report_text, pdf_path
63
  except Exception as e:
64
+ return extracted_text, f"Analysis Error: {e}", None
65
 
66
+ # Function to create PDF report
67
  def create_pdf_report(report_text):
68
+ if not report_text.strip():
69
+ report_text = "No analysis report to convert."
70
+
71
+ pdf_path = "analysis_report.pdf"
72
+ doc = SimpleDocTemplate(pdf_path, pagesize=letter)
73
  styles = getSampleStyleSheet()
74
+ Story = []
 
 
 
 
75
 
76
+ title = Paragraph("<b>Analysis Report</b>", styles['Title'])
77
+ Story.append(title)
78
+ Story.append(Spacer(1, 12))
 
 
 
 
 
 
 
 
 
 
 
79
 
80
+ report_paragraph = Paragraph(report_text.replace("\n", "<br/>"), styles['BodyText'])
81
+ Story.append(report_paragraph)
82
+
83
+ doc.build(Story)
84
+ return pdf_path
85
+
86
+ # Build the Gradio UI
87
  demo = gr.Blocks()
88
  with demo:
89
  gr.Markdown("## AI-powered CV Analyzer and Optimizer")
 
90
  with gr.Tab("CV Analyzer"):
91
+ gr.Markdown("### Upload your CV and provide the job description")
92
  file_input = gr.File(label="Upload CV", file_types=[".pdf", ".docx"])
93
  job_desc_input = gr.Textbox(label="Job Description", lines=5)
94
  extracted_text = gr.Textbox(label="Extracted CV Content", lines=10, interactive=False)
95
+ analysis_output = gr.Textbox(label="Analysis Report", lines=10, interactive=False)
96
+ pdf_file = gr.File(label="Download Analysis Report PDF", interactive=True)
 
97
  analyze_button = gr.Button("Analyze CV")
98
+
99
+ analyze_button.click(parse_cv, [file_input, job_desc_input], [extracted_text, analysis_output, pdf_file])
 
 
 
 
 
 
 
100
 
101
  if __name__ == "__main__":
102
+ demo.queue().launch()