pdf2tmx / app.py
nelsonjq's picture
Upload 2 files
a9d9c73 verified
raw
history blame
1.41 kB
from gradio import Interface, File, Dropdown, Button, HTML
import pandas as pd
import os
from utils import pdf_to_text, align_text
def process_files(source_file, target_file, lang1, lang2):
if source_file is None or target_file is None:
return "Please upload both PDF files."
# Convert PDFs to text
text_content1 = pdf_to_text(source_file.name)
text_content2 = pdf_to_text(target_file.name)
# Align the texts
aligned_df = align_text(text_content1, text_content2, lang1, lang2)
# Convert DataFrame to HTML
aligned_html = aligned_df.to_html(index=False)
# Save DataFrame as Excel file
excel_path = "aligned_data.xlsx"
aligned_df.to_excel(excel_path, index=False)
return aligned_html, excel_path
# Define the Gradio interface
interface = Interface(
fn=process_files,
inputs=[
File(label="Upload Source PDF"),
File(label="Upload Target PDF"),
Dropdown(choices=["en", "es", "fr", "ch", "ar", "ru", "pt", "sw"], label="Select Language 1"),
Dropdown(choices=["en", "es", "fr", "ch", "ar", "ru", "pt", "sw"], label="Select Language 2"),
],
outputs=[
HTML(label="Aligned DataFrame"),
Button(label="Download Aligned DataFrame")
],
title="PDF Text Aligner",
description="Upload two PDF files and select languages to align the text."
)
if __name__ == "__main__":
interface.launch()