|
from gradio import Interface, File, Dropdown, Button, HTML |
|
import pandas as pd |
|
import os |
|
from utils import pdf_to_text, align_text |
|
|
|
def process_files(source_file, target_file, lang1, lang2): |
|
if source_file is None or target_file is None: |
|
return "Please upload both PDF files." |
|
|
|
|
|
text_content1 = pdf_to_text(source_file.name) |
|
text_content2 = pdf_to_text(target_file.name) |
|
|
|
|
|
aligned_df = align_text(text_content1, text_content2, lang1, lang2) |
|
|
|
|
|
aligned_html = aligned_df.to_html(index=False) |
|
|
|
|
|
excel_path = "aligned_data.xlsx" |
|
aligned_df.to_excel(excel_path, index=False) |
|
|
|
return aligned_html, excel_path |
|
|
|
|
|
interface = Interface( |
|
fn=process_files, |
|
inputs=[ |
|
File(label="Upload Source PDF"), |
|
File(label="Upload Target PDF"), |
|
Dropdown(choices=["en", "es", "fr", "ch", "ar", "ru", "pt", "sw"], label="Select Language 1"), |
|
Dropdown(choices=["en", "es", "fr", "ch", "ar", "ru", "pt", "sw"], label="Select Language 2"), |
|
], |
|
outputs=[ |
|
HTML(label="Aligned DataFrame"), |
|
Button(label="Download Aligned DataFrame") |
|
], |
|
title="PDF Text Aligner", |
|
description="Upload two PDF files and select languages to align the text." |
|
) |
|
|
|
if __name__ == "__main__": |
|
interface.launch() |