File size: 1,413 Bytes
a9d9c73
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
from gradio import Interface, File, Dropdown, Button, HTML
import pandas as pd
import os
from utils import pdf_to_text, align_text

def process_files(source_file, target_file, lang1, lang2):
    if source_file is None or target_file is None:
        return "Please upload both PDF files."

    # Convert PDFs to text
    text_content1 = pdf_to_text(source_file.name)
    text_content2 = pdf_to_text(target_file.name)

    # Align the texts
    aligned_df = align_text(text_content1, text_content2, lang1, lang2)

    # Convert DataFrame to HTML
    aligned_html = aligned_df.to_html(index=False)

    # Save DataFrame as Excel file
    excel_path = "aligned_data.xlsx"
    aligned_df.to_excel(excel_path, index=False)

    return aligned_html, excel_path

# Define the Gradio interface
interface = Interface(
    fn=process_files,
    inputs=[
        File(label="Upload Source PDF"),
        File(label="Upload Target PDF"),
        Dropdown(choices=["en", "es", "fr", "ch", "ar", "ru", "pt", "sw"], label="Select Language 1"),
        Dropdown(choices=["en", "es", "fr", "ch", "ar", "ru", "pt", "sw"], label="Select Language 2"),
    ],
    outputs=[
        HTML(label="Aligned DataFrame"),
        Button(label="Download Aligned DataFrame")
    ],
    title="PDF Text Aligner",
    description="Upload two PDF files and select languages to align the text."
)

if __name__ == "__main__":
    interface.launch()