Spaces:

Nymbo
/

data-boards

Sleeping

App Files Files Community

data-boards / app.py

prithivMLmods

Update app.py

5556171 verified 8 months ago

raw

history blame

3.84 kB

	import gradio as gr
	from pdf2docx import Converter
	from docx import Document
	from fpdf import FPDF
	import os
	import tempfile
	from pdfminer.high_level import extract_text
	from reportlab.lib.pagesizes import A4
	from reportlab.pdfgen import canvas
	from io import BytesIO

	title_and_description = """
	# PDF to Word and Word to PDF Converter

	This tool allows you to convert PDF files to Word documents and Word documents to PDF files.
	Note: Scanned PDFs (image-based PDFs) are not supported.
	"""

	def pdf_to_word(pdf_file):
	"""
	Converts a text-based PDF file to a Word document.
	Scanned PDFs (image-based PDFs) are not supported.
	"""
	try:
	# Extract text from the PDF using pdfminer
	text = extract_text(pdf_file.name)
	if not text.strip():
	return "Error: The PDF appears to be image-based (scanned). Scanned PDFs are not supported."

	# Create a temporary directory to store the output file
	with tempfile.TemporaryDirectory() as temp_dir:
	docx_filename = os.path.join(temp_dir, os.path.basename(pdf_file.name).replace('.pdf', '.docx'))

	# Create a Word document and add the extracted text
	doc = Document()
	for line in text.splitlines():
	doc.add_paragraph(line)
	doc.save(docx_filename)

	# Return the path to the converted file
	return docx_filename
	except Exception as e:
	return f"Error: {e}"

	def word_to_pdf(docx_file):
	"""
	Converts a Word document to a PDF file.
	Handles text and basic formatting.
	"""
	try:
	# Create a temporary directory to store the output file
	with tempfile.TemporaryDirectory() as temp_dir:
	pdf_filename = os.path.join(temp_dir, "output.pdf")

	# Create a PDF using reportlab
	packet = BytesIO()
	can = canvas.Canvas(packet, pagesize=A4)
	can.setFont("Helvetica", 12)

	# Read the Word document
	doc = Document(docx_file.name)
	y = 800 # Starting y-coordinate for text

	for para in doc.paragraphs:
	text = para.text.strip()
	if not text:
	continue

	# Add text to the PDF
	can.drawString(100, y, text)
	y -= 15 # Move down for the next line

	# Handle page breaks
	if y < 50:
	can.showPage()
	y = 800

	# Save the PDF
	can.save()
	packet.seek(0)
	with open(pdf_filename, "wb") as f:
	f.write(packet.read())

	# Return the path to the converted file
	return pdf_filename
	except Exception as e:
	return f"Error: {e}"

	with gr.Blocks() as app:
	gr.Markdown(title_and_description)

	with gr.Row():
	with gr.Column():
	with gr.Accordion("PDF to Word"):
	pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"])
	convert_pdf_to_word = gr.Button("Convert to Word")
	word_output = gr.File(label="Download Word file", type="filepath", file_types=[".docx"])

	convert_pdf_to_word.click(pdf_to_word, inputs=[pdf_input], outputs=[word_output])

	with gr.Column():
	with gr.Accordion("Word to PDF"):
	word_input = gr.File(label="Upload Word", file_types=[".docx"])
	convert_word_to_pdf = gr.Button("Convert to PDF")
	pdf_output = gr.File(label="Download PDF file", type="filepath", file_types=[".pdf"])

	convert_word_to_pdf.click(word_to_pdf, inputs=[word_input], outputs=[pdf_output])

	app.launch(share=True)