Spaces:

Nymbo
/

data-boards

Sleeping

App Files Files Community

prithivMLmods commited on Dec 20, 2024

Commit

4194e87

verified ·

1 Parent(s): 59a53f8

Update app.py

Browse files

Files changed (1) hide show

app.py +56 -99

app.py CHANGED Viewed

@@ -1,109 +1,66 @@
 import gradio as gr
-from pdf2docx import Converter
 from docx import Document
-from fpdf import FPDF
-import os
-import tempfile
-from pdfminer.high_level import extract_text
-from reportlab.lib.pagesizes import A4
 from reportlab.pdfgen import canvas
-from io import BytesIO
-title_and_description = """
-# PDF to Word and Word to PDF Converter
-This tool allows you to convert PDF files to Word documents and Word documents to PDF files.
-Note: Scanned PDFs (image-based PDFs) are not supported.
-"""
 def pdf_to_word(pdf_file):
-    """
-    Converts a text-based PDF file to a Word document.
-    Scanned PDFs (image-based PDFs) are not supported.
-    """
-    try:
-        # Extract text from the PDF using pdfminer
-        text = extract_text(pdf_file.name)
-        if not text.strip():
-            return "Error: The PDF appears to be image-based (scanned). Scanned PDFs are not supported."
-        # Create a temporary directory to store the output file
-        with tempfile.TemporaryDirectory() as temp_dir:
-            docx_filename = os.path.join(temp_dir, os.path.basename(pdf_file.name).replace('.pdf', '.docx'))
-            # Create a Word document and add the extracted text
-            doc = Document()
-            for line in text.splitlines():
-                doc.add_paragraph(line)
-            doc.save(docx_filename)
-            # Return the path to the converted file
-            return docx_filename
-    except Exception as e:
-        return f"Error: {e}"
 def word_to_pdf(docx_file):
-    """
-    Converts a Word document to a PDF file.
-    Handles text and basic formatting.
-    """
-    try:
-        # Create a temporary directory to store the output file
-        with tempfile.TemporaryDirectory() as temp_dir:
-            pdf_filename = os.path.join(temp_dir, "output.pdf")
-            # Create a PDF using reportlab
-            packet = BytesIO()
-            can = canvas.Canvas(packet, pagesize=A4)
-            can.setFont("Helvetica", 12)
-            # Read the Word document
-            doc = Document(docx_file.name)
-            y = 800  # Starting y-coordinate for text
-            for para in doc.paragraphs:
-                text = para.text.strip()
-                if not text:
-                    continue
-                # Add text to the PDF
-                can.drawString(100, y, text)
-                y -= 15  # Move down for the next line
-                # Handle page breaks
-                if y < 50:
-                    can.showPage()
-                    y = 800
-            # Save the PDF
-            can.save()
-            packet.seek(0)
-            with open(pdf_filename, "wb") as f:
-                f.write(packet.read())
-            # Return the path to the converted file
-            return pdf_filename
-    except Exception as e:
-        return f"Error: {e}"
-with gr.Blocks() as app:
-    gr.Markdown(title_and_description)
-    with gr.Row():
-        with gr.Column():
-            with gr.Accordion("PDF to Word"):
-                pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"])
-                convert_pdf_to_word = gr.Button("Convert to Word")
-                word_output = gr.File(label="Download Word file", type="filepath", file_types=[".docx"])
-                convert_pdf_to_word.click(pdf_to_word, inputs=[pdf_input], outputs=[word_output])
-        with gr.Column():
-            with gr.Accordion("Word to PDF"):
-                word_input = gr.File(label="Upload Word", file_types=[".docx"])
-                convert_word_to_pdf = gr.Button("Convert to PDF")
-                pdf_output = gr.File(label="Download PDF file", type="filepath", file_types=[".pdf"])
-                convert_word_to_pdf.click(word_to_pdf, inputs=[word_input], outputs=[pdf_output])
-app.launch(share=True)

 import gradio as gr
+import fitz  # PyMuPDF
 from docx import Document
 from reportlab.pdfgen import canvas
+from reportlab.lib.pagesizes import letter
+import io
+# PDF to Word conversion function
 def pdf_to_word(pdf_file):
+    # Read PDF file using PyMuPDF
+    doc = fitz.open(stream=pdf_file.read(), filetype="pdf")
+    text = ""
+    for page in doc:
+        text += page.get_text()
+    # Create a Word document using python-docx
+    docx = Document()
+    docx.add_paragraph(text)
+    # Save the Word document to a bytes buffer
+    buffer = io.BytesIO()
+    docx.save(buffer)
+    buffer.seek(0)
+    return buffer, "converted.docx"
+# Word to PDF conversion function
 def word_to_pdf(docx_file):
+    # Read Word document using python-docx
+    doc = Document(docx_file)
+    text = ""
+    for para in doc.paragraphs:
+        text += para.text + "\n"
+    # Create a PDF using reportlab
+    buffer = io.BytesIO()
+    c = canvas.Canvas(buffer, pagesize=letter)
+    textobject = c.beginText()
+    textobject.setTextOrigin(50, 750)
+    lines = text.split('\n')
+    for line in lines:
+        textobject.textLine(line)
+    c.drawText(textobject)
+    c.showPage()
+    c.save()
+    buffer.seek(0)
+    return buffer, "converted.pdf"
+# Gradio interface
+with gr.Blocks() as demo:
+    gr.Markdown("<h1>PDF to Word and Word to PDF Converter</h1>")
+    with gr.Tab("PDF to Word"):
+        pdf_input = gr.File(label="Upload PDF File", type="file")
+        pdf_convert_btn = gr.Button("Convert to Word")
+        word_output = gr.File(label="Download Word File")
+        pdf_convert_btn.click(pdf_to_word, inputs=pdf_input, outputs=word_output)
+    with gr.Tab("Word to PDF"):
+        word_input = gr.File(label="Upload Word File", type="file")
+        word_convert_btn = gr.Button("Convert to PDF")
+        pdf_output = gr.File(label="Download PDF File")
+        word_convert_btn.click(word_to_pdf, inputs=word_input, outputs=pdf_output)
+demo.launch()