File size: 1,670 Bytes
46048a7
a9d9c73
 
 
 
 
 
46048a7
 
 
 
a9d9c73
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46048a7
 
 
 
5323684
 
46048a7
 
 
 
 
 
 
 
 
a9d9c73
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import gradio as gr
import pandas as pd
import os
from utils import pdf_to_text, align_text

def process_files(source_file, target_file, lang1, lang2):
    if source_file is None or target_file is None:
        return "Please upload both PDF files.", None

    if lang1 == lang2:
        return "Please select different languages.", None

    # Convert PDFs to text
    text_content1 = pdf_to_text(source_file.name)
    text_content2 = pdf_to_text(target_file.name)

    # Align the texts
    aligned_df = align_text(text_content1, text_content2, lang1, lang2)

    # Convert DataFrame to HTML
    aligned_html = aligned_df.to_html(index=False)

    # Save DataFrame as Excel file
    excel_path = "aligned_data.xlsx"
    aligned_df.to_excel(excel_path, index=False)

    return aligned_html, excel_path

# Define the Gradio interface
with gr.Blocks() as interface:
    gr.Markdown("# PDF Text Aligner\nUpload two PDF files and select languages to align the text.")
    source_file = gr.File(label="Upload Source PDF")
    target_file = gr.File(label="Upload Target PDF")
    lang1 = gr.Dropdown(choices=["en", "es", "fr", "zh", "ar", "ru", "pt"], label="Select Language 1")
    lang2 = gr.Dropdown(choices=["en", "es", "fr", "zh", "ar", "ru", "pt"], label="Select Language 2", value="es")
    start_button = gr.Button(value="Start")
    aligned_html = gr.HTML(label="Aligned DataFrame")
    download_button = gr.File(label="Download Aligned Data as Excel")

    start_button.click(
        fn=process_files,
        inputs=[source_file, target_file, lang1, lang2],
        outputs=[aligned_html, download_button]
    )

if __name__ == "__main__":
    interface.launch()