nelsonjq commited on
Commit
42da2b2
·
verified ·
1 Parent(s): bbdbe4c

Delete src

Browse files
Files changed (2) hide show
  1. src/app.py +0 -44
  2. src/utils.py +0 -12
src/app.py DELETED
@@ -1,44 +0,0 @@
1
- from gradio import Interface, File, Dropdown, Button, HTML
2
- import pandas as pd
3
- import os
4
- from utils import pdf_to_text, align_text
5
-
6
- def process_files(source_file, target_file, lang1, lang2):
7
- if source_file is None or target_file is None:
8
- return "Please upload both PDF files."
9
-
10
- # Convert PDFs to text
11
- text_content1 = pdf_to_text(source_file.name)
12
- text_content2 = pdf_to_text(target_file.name)
13
-
14
- # Align the texts
15
- aligned_df = align_text(text_content1, text_content2, lang1, lang2)
16
-
17
- # Convert DataFrame to HTML
18
- aligned_html = aligned_df.to_html(index=False)
19
-
20
- # Save DataFrame as Excel file
21
- excel_path = "aligned_data.xlsx"
22
- aligned_df.to_excel(excel_path, index=False)
23
-
24
- return aligned_html, excel_path
25
-
26
- # Define the Gradio interface
27
- interface = Interface(
28
- fn=process_files,
29
- inputs=[
30
- File(label="Upload Source PDF"),
31
- File(label="Upload Target PDF"),
32
- Dropdown(choices=["en", "es", "fr", "ch", "ar", "ru", "pt", "sw"], label="Select Language 1"),
33
- Dropdown(choices=["en", "es", "fr", "ch", "ar", "ru", "pt", "sw"], label="Select Language 2"),
34
- ],
35
- outputs=[
36
- HTML(label="Aligned DataFrame"),
37
- Button(label="Download Aligned DataFrame")
38
- ],
39
- title="PDF Text Aligner",
40
- description="Upload two PDF files and select languages to align the text."
41
- )
42
-
43
- if __name__ == "__main__":
44
- interface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/utils.py DELETED
@@ -1,12 +0,0 @@
1
- def pdf_to_text(pdf_path: str) -> str:
2
- from PyPDF2 import PdfReader
3
-
4
- text = ""
5
- with open(pdf_path, "rb") as file:
6
- reader = PdfReader(file)
7
- for page in reader.pages:
8
- text += page.extract_text() + "\n"
9
- return text
10
-
11
- def save_to_excel(df, file_name: str):
12
- df.to_excel(file_name, index=False)