Delete src
Browse files- src/app.py +0 -44
- src/utils.py +0 -12
src/app.py
DELETED
|
@@ -1,44 +0,0 @@
|
|
| 1 |
-
from gradio import Interface, File, Dropdown, Button, HTML
|
| 2 |
-
import pandas as pd
|
| 3 |
-
import os
|
| 4 |
-
from utils import pdf_to_text, align_text
|
| 5 |
-
|
| 6 |
-
def process_files(source_file, target_file, lang1, lang2):
|
| 7 |
-
if source_file is None or target_file is None:
|
| 8 |
-
return "Please upload both PDF files."
|
| 9 |
-
|
| 10 |
-
# Convert PDFs to text
|
| 11 |
-
text_content1 = pdf_to_text(source_file.name)
|
| 12 |
-
text_content2 = pdf_to_text(target_file.name)
|
| 13 |
-
|
| 14 |
-
# Align the texts
|
| 15 |
-
aligned_df = align_text(text_content1, text_content2, lang1, lang2)
|
| 16 |
-
|
| 17 |
-
# Convert DataFrame to HTML
|
| 18 |
-
aligned_html = aligned_df.to_html(index=False)
|
| 19 |
-
|
| 20 |
-
# Save DataFrame as Excel file
|
| 21 |
-
excel_path = "aligned_data.xlsx"
|
| 22 |
-
aligned_df.to_excel(excel_path, index=False)
|
| 23 |
-
|
| 24 |
-
return aligned_html, excel_path
|
| 25 |
-
|
| 26 |
-
# Define the Gradio interface
|
| 27 |
-
interface = Interface(
|
| 28 |
-
fn=process_files,
|
| 29 |
-
inputs=[
|
| 30 |
-
File(label="Upload Source PDF"),
|
| 31 |
-
File(label="Upload Target PDF"),
|
| 32 |
-
Dropdown(choices=["en", "es", "fr", "ch", "ar", "ru", "pt", "sw"], label="Select Language 1"),
|
| 33 |
-
Dropdown(choices=["en", "es", "fr", "ch", "ar", "ru", "pt", "sw"], label="Select Language 2"),
|
| 34 |
-
],
|
| 35 |
-
outputs=[
|
| 36 |
-
HTML(label="Aligned DataFrame"),
|
| 37 |
-
Button(label="Download Aligned DataFrame")
|
| 38 |
-
],
|
| 39 |
-
title="PDF Text Aligner",
|
| 40 |
-
description="Upload two PDF files and select languages to align the text."
|
| 41 |
-
)
|
| 42 |
-
|
| 43 |
-
if __name__ == "__main__":
|
| 44 |
-
interface.launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/utils.py
DELETED
|
@@ -1,12 +0,0 @@
|
|
| 1 |
-
def pdf_to_text(pdf_path: str) -> str:
|
| 2 |
-
from PyPDF2 import PdfReader
|
| 3 |
-
|
| 4 |
-
text = ""
|
| 5 |
-
with open(pdf_path, "rb") as file:
|
| 6 |
-
reader = PdfReader(file)
|
| 7 |
-
for page in reader.pages:
|
| 8 |
-
text += page.extract_text() + "\n"
|
| 9 |
-
return text
|
| 10 |
-
|
| 11 |
-
def save_to_excel(df, file_name: str):
|
| 12 |
-
df.to_excel(file_name, index=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|