Spaces:

ruslanmv
/

CV_Analizer

Sleeping

App Files Files Community

ruslanmv commited on Feb 4

Commit

ec1a8b9

verified ·

1 Parent(s): 7105896

Update app.py

Browse files

Files changed (1) hide show

app.py +85 -56

app.py CHANGED Viewed

@@ -3,18 +3,23 @@ from huggingface_hub import InferenceClient
 import PyPDF2
 import io
 from docx import Document
 # For PDF generation
 from reportlab.pdfgen import canvas
 from reportlab.lib.pagesizes import letter
 # Initialize the inference client from Hugging Face.
 client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
-def extract_text_from_pdf(pdf_file_bytes):
-    """Extract text from PDF bytes."""
     try:
-        pdf_reader = PyPDF2.PdfReader(io.BytesIO(pdf_file_bytes))
         text = ""
         for page in pdf_reader.pages:
             page_text = page.extract_text()
@@ -24,38 +29,40 @@ def extract_text_from_pdf(pdf_file_bytes):
     except Exception as e:
         return f"Error reading PDF: {e}"
-def extract_text_from_docx(docx_file_bytes):
-    """Extract text from DOCX bytes."""
     try:
-        doc = Document(io.BytesIO(docx_file_bytes))
         text = "\n".join(para.text for para in doc.paragraphs)
         return text.strip() or "No text could be extracted from the DOCX file."
     except Exception as e:
         return f"Error reading DOCX: {e}"
-def parse_cv(file_bytes, job_description):
     """Analyze the CV, show the prompt (debug) and return LLM analysis."""
-    if file_bytes is None:
         return "Please upload a CV file.", ""
-    # Determine file extension by header bytes
     try:
-        if file_bytes.startswith(b'%PDF'):
-            file_ext = "pdf"
-        elif file_bytes.startswith(b'PK\x03\x04'):
-            file_ext = "docx"
         else:
-            return "Unsupported file format.", "Cannot determine file type from content"
     except Exception as e:
         error_msg = f"Error reading file: {e}"
         return error_msg, error_msg
-    # Extract text
-    if file_ext == "pdf":
-        extracted_text = extract_text_from_pdf(file_bytes)
-    else:  # docx
-        extracted_text = extract_text_from_docx(file_bytes)
     # Check for extraction errors
     if extracted_text.startswith("Error"):
         return extracted_text, "Error during text extraction. Please check the file."
@@ -80,7 +87,15 @@ def parse_cv(file_bytes, job_description):
     except Exception as e:
         return extracted_text, f"Analysis Error: {e}"
-def respond(message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p):
     """Generate chatbot response."""
     messages = [{"role": "system", "content": system_message}]
     for user_msg, bot_msg in history:
@@ -105,32 +120,38 @@ def respond(message, history: list[tuple[str, str]], system_message, max_tokens,
     except Exception as e:
         yield f"Error during chat generation: {e}"
 def create_pdf_report(report_text):
-    """Creates a PDF report and returns it as (bytes, filename)."""
     if not report_text.strip():
         report_text = "No analysis report to convert."
-    pdf_buffer = io.BytesIO()
-    c = canvas.Canvas(pdf_buffer, pagesize=letter)
-    c.setFont("Helvetica-Bold", 14)
-    c.drawString(72, 750, "Analysis Report")
-    text_obj = c.beginText(72, 730)
-    text_obj.setFont("Helvetica", 11)
     for line in report_text.split("\n"):
-        text_obj.textLine(line)
-    c.drawText(text_obj)
-    c.showPage()
-    c.save()
-    pdf_buffer.seek(0)
-    # Return as a bytes-filename tuple that Gradio will treat as a file download
-    return (pdf_buffer.getvalue(), "analysis_report.pdf")
 def toggle_download_button(analysis_report):
-    """Toggle the download button if we have an analysis report."""
-    return gr.update(interactive=bool(analysis_report.strip()), visible=bool(analysis_report.strip()))
 # Build the Gradio UI
 demo = gr.Blocks()
@@ -138,35 +159,45 @@ with demo:
     gr.Markdown("## AI-powered CV Analyzer and Chatbot")
     with gr.Tab("Chatbot"):
-        # Simple chat interface
         chat_interface = gr.ChatInterface(
             respond,
-            chatbot=gr.Chatbot(value=[], label="Chatbot", type="messages"),
-            type="messages",
             additional_inputs=[
-                gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
-                gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
-                gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-                gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
             ],
         )
     with gr.Tab("CV Analyzer"):
         gr.Markdown("### Upload your CV and provide the job description")
-        # IMPORTANT: set type="file" and file_types to handle multiple file types
-        file_input = gr.File(label="Upload CV", type="file", file_types=['.pdf', '.docx'])
         job_desc_input = gr.Textbox(label="Job Description", lines=5)
-        extracted_text = gr.Textbox(label="Extracted CV Content", lines=10, interactive=False)
-        analysis_output = gr.Textbox(label="Analysis Report", lines=10, interactive=False)
-        download_pdf_button = gr.Button("Download Analysis as PDF", visible=False, interactive=False)
-        pdf_file = gr.File(label="Download PDF", file_count="single", interactive=False)
         analyze_button = gr.Button("Analyze CV")
         analyze_button.click(
-            lambda file, job_desc: parse_cv(file.read(), job_desc) if file else ("Please upload a CV", ""),
             inputs=[file_input, job_desc_input],
             outputs=[extracted_text, analysis_output],
         ).then(
@@ -176,9 +207,7 @@ with demo:
         )
         download_pdf_button.click(
-            create_pdf_report,
-            inputs=[analysis_output],
-            outputs=[pdf_file],
         )
 if __name__ == "__main__":

 import PyPDF2
 import io
 from docx import Document
+import os
 # For PDF generation
 from reportlab.pdfgen import canvas
 from reportlab.lib.pagesizes import letter
+from reportlab.lib import utils
+from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
+from reportlab.lib.styles import getSampleStyleSheet
 # Initialize the inference client from Hugging Face.
 client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
+def extract_text_from_pdf(pdf_file):
+    """Extract text from PDF file."""
     try:
+        pdf_reader = PyPDF2.PdfReader(pdf_file)
         text = ""
         for page in pdf_reader.pages:
             page_text = page.extract_text()
     except Exception as e:
         return f"Error reading PDF: {e}"
+def extract_text_from_docx(docx_file):
+    """Extract text from DOCX file."""
     try:
+        doc = Document(docx_file)
         text = "\n".join(para.text for para in doc.paragraphs)
         return text.strip() or "No text could be extracted from the DOCX file."
     except Exception as e:
         return f"Error reading DOCX: {e}"
+def parse_cv(file, job_description):
     """Analyze the CV, show the prompt (debug) and return LLM analysis."""
+    if file is None:
         return "Please upload a CV file.", ""
     try:
+        file_path = file.name  # Get the file path
+        file_ext = os.path.splitext(file_path)[1].lower()
+        if file_ext == ".pdf":
+            extracted_text = extract_text_from_pdf(file_path)
+        elif file_ext == ".docx":
+            extracted_text = extract_text_from_docx(file_path)
         else:
+            return (
+                "Unsupported file format. Please upload a PDF or DOCX file.",
+                "Unsupported file format.",
+            )
     except Exception as e:
         error_msg = f"Error reading file: {e}"
         return error_msg, error_msg
     # Check for extraction errors
     if extracted_text.startswith("Error"):
         return extracted_text, "Error during text extraction. Please check the file."
     except Exception as e:
         return extracted_text, f"Analysis Error: {e}"
+def respond(
+    message,
+    history: list[tuple[str, str]],
+    system_message,
+    max_tokens,
+    temperature,
+    top_p,
+):
     """Generate chatbot response."""
     messages = [{"role": "system", "content": system_message}]
     for user_msg, bot_msg in history:
     except Exception as e:
         yield f"Error during chat generation: {e}"
 def create_pdf_report(report_text):
+    """Creates a PDF report using SimpleDocTemplate for better formatting."""
     if not report_text.strip():
         report_text = "No analysis report to convert."
+    buffer = io.BytesIO()
+    doc = SimpleDocTemplate(buffer, pagesize=letter)
+    styles = getSampleStyleSheet()
+    Story = []
+    # Title
+    Story.append(Paragraph("<b>Analysis Report</b>", styles["Title"]))
+    Story.append(Spacer(1, 12))
+    # Report Content
     for line in report_text.split("\n"):
+        Story.append(Paragraph(line, styles["Normal"]))
+        Story.append(Spacer(1, 6))  # Add a small space between lines
+    doc.build(Story)
+    buffer.seek(0)
+    return buffer
 def toggle_download_button(analysis_report):
+    """Toggle the download button."""
+    return gr.update(
+        interactive=bool(analysis_report.strip()),
+        visible=bool(analysis_report.strip()),
+    )
 # Build the Gradio UI
 demo = gr.Blocks()
     gr.Markdown("## AI-powered CV Analyzer and Chatbot")
     with gr.Tab("Chatbot"):
         chat_interface = gr.ChatInterface(
             respond,
+            chatbot=gr.Chatbot(value=[], label="Chatbot"),
+            textbox=gr.Textbox(placeholder="Enter your message here...", label="Message"),
             additional_inputs=[
+                gr.Textbox(
+                    value="You are a friendly Chatbot.", label="System message"
+                ),
+                gr.Slider(
+                    minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"
+                ),
+                gr.Slider(
+                    minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"
+                ),
+                gr.Slider(
+                    minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"
+                ),
             ],
         )
     with gr.Tab("CV Analyzer"):
         gr.Markdown("### Upload your CV and provide the job description")
+        file_input = gr.File(label="Upload CV", file_types=[".pdf", ".docx"])
         job_desc_input = gr.Textbox(label="Job Description", lines=5)
+        extracted_text = gr.Textbox(
+            label="Extracted CV Content", lines=10, interactive=False
+        )
+        analysis_output = gr.Textbox(
+            label="Analysis Report", lines=10, interactive=False
+        )
+        download_pdf_button = gr.Button(
+            "Download Analysis as PDF", visible=False, interactive=False
+        )
+        pdf_file = gr.File(label="Download PDF", interactive=False)
         analyze_button = gr.Button("Analyze CV")
         analyze_button.click(
+            parse_cv,
             inputs=[file_input, job_desc_input],
             outputs=[extracted_text, analysis_output],
         ).then(
         )
         download_pdf_button.click(
+            create_pdf_report, inputs=[analysis_output], outputs=[pdf_file]
         )
 if __name__ == "__main__":