import gradio as gr
from pdf2docx import Converter
from docx import Document
from fpdf import FPDF
import os
import tempfile
import pytesseract
from pdf2image import convert_from_path
from reportlab.lib.pagesizes import A4
from reportlab.pdfgen import canvas
from io import BytesIO

# Ensure Tesseract is installed and its path is set
pytesseract.pytesseract.tesseract_cmd = r'/usr/bin/tesseract'  # Update this path to your Tesseract installation

title_and_description = """
# PDF to Word and Word to PDF Converter

This tool allows you to convert PDF files to Word documents and Word documents to PDF files.
Note: Scanned PDFs (image-based PDFs) are supported using OCR.
"""

def pdf_to_word(pdf_file):
    """
    Converts a PDF file to a Word document.
    Handles both text-based and image-based (scanned) PDFs.
    """
    try:
        # Check if Poppler is installed
        try:
            pages = convert_from_path(pdf_file.name, 500)  # Convert PDF to images
        except Exception as e:
            return f"Error: Unable to process PDF. Is Poppler installed and in PATH? ({e})"

        # Create a temporary directory to store intermediate files
        with tempfile.TemporaryDirectory() as temp_dir:
            docx_filename = os.path.join(temp_dir, os.path.basename(pdf_file.name).replace('.pdf', '.docx'))
            
            # If the PDF is image-based, use OCR to extract text
            doc = Document()
            for page in pages:
                text = pytesseract.image_to_string(page)  # Extract text from the image
                doc.add_paragraph(text)  # Add the extracted text to the Word document
            
            # Save the Word document
            doc.save(docx_filename)
            
            # Return the path to the converted file
            return docx_filename
    except Exception as e:
        return f"Error: {e}"

def word_to_pdf(docx_file):
    """
    Converts a Word document to a PDF file.
    Handles images carefully using reportlab.
    """
    try:
        # Create a temporary directory to store the output file
        with tempfile.TemporaryDirectory() as temp_dir:
            pdf_filename = os.path.join(temp_dir, "output.pdf")
            
            # Create a PDF using reportlab
            packet = BytesIO()
            can = canvas.Canvas(packet, pagesize=A4)
            can.setFont("Helvetica", 12)

            # Read the Word document
            doc = Document(docx_file.name)
            y = 800  # Starting y-coordinate for text

            for para in doc.paragraphs:
                text = para.text.strip()
                if not text:
                    continue

                # Add text to the PDF
                can.drawString(100, y, text)
                y -= 15  # Move down for the next line

                # Handle page breaks
                if y < 50:
                    can.showPage()
                    y = 800

            # Save the PDF
            can.save()
            packet.seek(0)
            with open(pdf_filename, "wb") as f:
                f.write(packet.read())

            # Return the path to the converted file
            return pdf_filename
    except Exception as e:
        return f"Error: {e}"

with gr.Blocks() as app:
    gr.Markdown(title_and_description)
    
    with gr.Row():
        with gr.Column():
            with gr.Accordion("PDF to Word"):
                pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"])
                convert_pdf_to_word = gr.Button("Convert to Word")
                word_output = gr.File(label="Download Word file", type="filepath", file_types=[".docx"])
                
                convert_pdf_to_word.click(pdf_to_word, inputs=[pdf_input], outputs=[word_output])
                
        with gr.Column():
            with gr.Accordion("Word to PDF"):
                word_input = gr.File(label="Upload Word", file_types=[".docx"])
                convert_word_to_pdf = gr.Button("Convert to PDF")
                pdf_output = gr.File(label="Download PDF file", type="filepath", file_types=[".pdf"])
                
                convert_word_to_pdf.click(word_to_pdf, inputs=[word_input], outputs=[pdf_output])

app.launch()