import gradio as gr from pdf2docx import Converter from docx import Document from fpdf import FPDF import os import tempfile from pdfminer.high_level import extract_text from reportlab.lib.pagesizes import A4 from reportlab.pdfgen import canvas from io import BytesIO title_and_description = """ # PDF to Word and Word to PDF Converter This tool allows you to convert PDF files to Word documents and Word documents to PDF files. Note: Scanned PDFs (image-based PDFs) are not supported. """ def pdf_to_word(pdf_file): """ Converts a text-based PDF file to a Word document. Scanned PDFs (image-based PDFs) are not supported. """ try: # Extract text from the PDF using pdfminer text = extract_text(pdf_file.name) if not text.strip(): return "Error: The PDF appears to be image-based (scanned). Scanned PDFs are not supported." # Create a temporary directory to store the output file with tempfile.TemporaryDirectory() as temp_dir: docx_filename = os.path.join(temp_dir, os.path.basename(pdf_file.name).replace('.pdf', '.docx')) # Create a Word document and add the extracted text doc = Document() for line in text.splitlines(): doc.add_paragraph(line) doc.save(docx_filename) # Return the path to the converted file return docx_filename except Exception as e: return f"Error: {e}" def word_to_pdf(docx_file): """ Converts a Word document to a PDF file. Handles text and basic formatting. """ try: # Create a temporary directory to store the output file with tempfile.TemporaryDirectory() as temp_dir: pdf_filename = os.path.join(temp_dir, "output.pdf") # Create a PDF using reportlab packet = BytesIO() can = canvas.Canvas(packet, pagesize=A4) can.setFont("Helvetica", 12) # Read the Word document doc = Document(docx_file.name) y = 800 # Starting y-coordinate for text for para in doc.paragraphs: text = para.text.strip() if not text: continue # Add text to the PDF can.drawString(100, y, text) y -= 15 # Move down for the next line # Handle page breaks if y < 50: can.showPage() y = 800 # Save the PDF can.save() packet.seek(0) with open(pdf_filename, "wb") as f: f.write(packet.read()) # Return the path to the converted file return pdf_filename except Exception as e: return f"Error: {e}" with gr.Blocks() as app: gr.Markdown(title_and_description) with gr.Row(): with gr.Column(): with gr.Accordion("PDF to Word"): pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"]) convert_pdf_to_word = gr.Button("Convert to Word") word_output = gr.File(label="Download Word file", type="filepath", file_types=[".docx"]) convert_pdf_to_word.click(pdf_to_word, inputs=[pdf_input], outputs=[word_output]) with gr.Column(): with gr.Accordion("Word to PDF"): word_input = gr.File(label="Upload Word", file_types=[".docx"]) convert_word_to_pdf = gr.Button("Convert to PDF") pdf_output = gr.File(label="Download PDF file", type="filepath", file_types=[".pdf"]) convert_word_to_pdf.click(word_to_pdf, inputs=[word_input], outputs=[pdf_output]) app.launch(share=True)