data-boards / app.py
prithivMLmods's picture
Update app.py
4194e87 verified
raw
history blame
1.97 kB
import gradio as gr
import fitz # PyMuPDF
from docx import Document
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import letter
import io
# PDF to Word conversion function
def pdf_to_word(pdf_file):
# Read PDF file using PyMuPDF
doc = fitz.open(stream=pdf_file.read(), filetype="pdf")
text = ""
for page in doc:
text += page.get_text()
# Create a Word document using python-docx
docx = Document()
docx.add_paragraph(text)
# Save the Word document to a bytes buffer
buffer = io.BytesIO()
docx.save(buffer)
buffer.seek(0)
return buffer, "converted.docx"
# Word to PDF conversion function
def word_to_pdf(docx_file):
# Read Word document using python-docx
doc = Document(docx_file)
text = ""
for para in doc.paragraphs:
text += para.text + "\n"
# Create a PDF using reportlab
buffer = io.BytesIO()
c = canvas.Canvas(buffer, pagesize=letter)
textobject = c.beginText()
textobject.setTextOrigin(50, 750)
lines = text.split('\n')
for line in lines:
textobject.textLine(line)
c.drawText(textobject)
c.showPage()
c.save()
buffer.seek(0)
return buffer, "converted.pdf"
# Gradio interface
with gr.Blocks() as demo:
gr.Markdown("<h1>PDF to Word and Word to PDF Converter</h1>")
with gr.Tab("PDF to Word"):
pdf_input = gr.File(label="Upload PDF File", type="file")
pdf_convert_btn = gr.Button("Convert to Word")
word_output = gr.File(label="Download Word File")
pdf_convert_btn.click(pdf_to_word, inputs=pdf_input, outputs=word_output)
with gr.Tab("Word to PDF"):
word_input = gr.File(label="Upload Word File", type="file")
word_convert_btn = gr.Button("Convert to PDF")
pdf_output = gr.File(label="Download PDF File")
word_convert_btn.click(word_to_pdf, inputs=word_input, outputs=pdf_output)
demo.launch()