Spaces:
Sleeping
Sleeping
import gradio as gr | |
from pdf2docx import Converter | |
from docx import Document | |
from fpdf import FPDF | |
import os | |
import tempfile | |
from pdfminer.high_level import extract_text | |
from reportlab.lib.pagesizes import A4 | |
from reportlab.pdfgen import canvas | |
from io import BytesIO | |
title_and_description = """ | |
# PDF to Word and Word to PDF Converter | |
This tool allows you to convert PDF files to Word documents and Word documents to PDF files. | |
Note: Scanned PDFs (image-based PDFs) are not supported. | |
""" | |
def pdf_to_word(pdf_file): | |
""" | |
Converts a text-based PDF file to a Word document. | |
Scanned PDFs (image-based PDFs) are not supported. | |
""" | |
try: | |
# Extract text from the PDF using pdfminer | |
text = extract_text(pdf_file.name) | |
if not text.strip(): | |
return "Error: The PDF appears to be image-based (scanned). Scanned PDFs are not supported." | |
# Create a temporary directory to store the output file | |
with tempfile.TemporaryDirectory() as temp_dir: | |
docx_filename = os.path.join(temp_dir, os.path.basename(pdf_file.name).replace('.pdf', '.docx')) | |
# Create a Word document and add the extracted text | |
doc = Document() | |
for line in text.splitlines(): | |
doc.add_paragraph(line) | |
doc.save(docx_filename) | |
# Return the path to the converted file | |
return docx_filename | |
except Exception as e: | |
return f"Error: {e}" | |
def word_to_pdf(docx_file): | |
""" | |
Converts a Word document to a PDF file. | |
Handles text and basic formatting. | |
""" | |
try: | |
# Create a temporary directory to store the output file | |
with tempfile.TemporaryDirectory() as temp_dir: | |
pdf_filename = os.path.join(temp_dir, "output.pdf") | |
# Create a PDF using reportlab | |
packet = BytesIO() | |
can = canvas.Canvas(packet, pagesize=A4) | |
can.setFont("Helvetica", 12) | |
# Read the Word document | |
doc = Document(docx_file.name) | |
y = 800 # Starting y-coordinate for text | |
for para in doc.paragraphs: | |
text = para.text.strip() | |
if not text: | |
continue | |
# Add text to the PDF | |
can.drawString(100, y, text) | |
y -= 15 # Move down for the next line | |
# Handle page breaks | |
if y < 50: | |
can.showPage() | |
y = 800 | |
# Save the PDF | |
can.save() | |
packet.seek(0) | |
with open(pdf_filename, "wb") as f: | |
f.write(packet.read()) | |
# Return the path to the converted file | |
return pdf_filename | |
except Exception as e: | |
return f"Error: {e}" | |
with gr.Blocks() as app: | |
gr.Markdown(title_and_description) | |
with gr.Row(): | |
with gr.Column(): | |
with gr.Accordion("PDF to Word"): | |
pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"]) | |
convert_pdf_to_word = gr.Button("Convert to Word") | |
word_output = gr.File(label="Download Word file", type="filepath", file_types=[".docx"]) | |
convert_pdf_to_word.click(pdf_to_word, inputs=[pdf_input], outputs=[word_output]) | |
with gr.Column(): | |
with gr.Accordion("Word to PDF"): | |
word_input = gr.File(label="Upload Word", file_types=[".docx"]) | |
convert_word_to_pdf = gr.Button("Convert to PDF") | |
pdf_output = gr.File(label="Download PDF file", type="filepath", file_types=[".pdf"]) | |
convert_word_to_pdf.click(word_to_pdf, inputs=[word_input], outputs=[pdf_output]) | |
app.launch(share=True) |