Spaces:
Sleeping
Sleeping
added language dropdown menus to document translation tab
Browse files- gradio_app.py +23 -12
- src/translate_any_doc.py +1 -1
gradio_app.py
CHANGED
@@ -1,37 +1,48 @@
|
|
1 |
import gradio as gr
|
2 |
-
from src.translate_any_doc import translate_document
|
|
|
|
|
3 |
from src.aligner import Aligner
|
4 |
from nltk.tokenize.treebank import TreebankWordDetokenizer
|
5 |
|
6 |
-
|
7 |
-
ip='10.192.31.127'
|
8 |
config_folder = 'fast_align_config'
|
9 |
-
source_lang = 'en'
|
10 |
-
target_lang = 'ca'
|
11 |
temp_folder = 'tmp'
|
12 |
-
|
|
|
13 |
detokenizer = TreebankWordDetokenizer()
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
|
15 |
|
16 |
-
def upload_file(filepath):
|
17 |
-
|
18 |
-
|
|
|
|
|
|
|
19 |
|
20 |
def download_file():
|
21 |
return [gr.UploadButton(visible=True), gr.DownloadButton(visible=False)]
|
22 |
|
23 |
|
24 |
with gr.Blocks() as demo:
|
25 |
-
|
26 |
with gr.Tab("Text"):
|
27 |
-
gr.Interface(fn=translate, inputs=["text","text","text"], outputs="text")
|
28 |
with gr.Tab("Docx documents"):
|
|
|
|
|
|
|
|
|
29 |
gr.Markdown("First upload a file and and then you'll be able download it (but only once!)")
|
30 |
with gr.Row():
|
31 |
u = gr.UploadButton("Upload a file", file_count="single")
|
32 |
d = gr.DownloadButton("Download the file", visible=False)
|
33 |
|
34 |
-
u.upload(upload_file, u, [u, d])
|
35 |
d.click(download_file, None, [u, d])
|
36 |
if __name__ == "__main__":
|
37 |
demo.launch()
|
|
|
1 |
import gradio as gr
|
2 |
+
from src.translate_any_doc import translate_document
|
3 |
+
from src.salamandraTA7b_translator import SalamandraTA7bTranslator
|
4 |
+
from src.mtuoc_aina_translator import MTUOCAinaTranslator
|
5 |
from src.aligner import Aligner
|
6 |
from nltk.tokenize.treebank import TreebankWordDetokenizer
|
7 |
|
|
|
|
|
8 |
config_folder = 'fast_align_config'
|
|
|
|
|
9 |
temp_folder = 'tmp'
|
10 |
+
hf_token = ""
|
11 |
+
|
12 |
detokenizer = TreebankWordDetokenizer()
|
13 |
+
translator = SalamandraTA7bTranslator(hf_token)
|
14 |
+
|
15 |
+
|
16 |
+
# ip = ''
|
17 |
+
# port = ''
|
18 |
+
# translator = MTUOCAinaTranslator(ip, port)
|
19 |
|
20 |
|
21 |
+
def upload_file(filepath, source_lang, target_lang):
|
22 |
+
aligner = Aligner(config_folder, source_lang, target_lang, temp_folder)
|
23 |
+
translated_file_name = translate_document(filepath, source_lang, target_lang, translator, aligner, detokenizer)
|
24 |
+
return [gr.UploadButton(visible=False),
|
25 |
+
gr.DownloadButton(label=f"Download {translated_file_name}", value=translated_file_name, visible=True)]
|
26 |
+
|
27 |
|
28 |
def download_file():
|
29 |
return [gr.UploadButton(visible=True), gr.DownloadButton(visible=False)]
|
30 |
|
31 |
|
32 |
with gr.Blocks() as demo:
|
|
|
33 |
with gr.Tab("Text"):
|
34 |
+
gr.Interface(fn=translator.translate, inputs=["text", "text", "text"], outputs="text")
|
35 |
with gr.Tab("Docx documents"):
|
36 |
+
with gr.Row():
|
37 |
+
dropdown1 = gr.Dropdown(label="Source language", choices=["en", "ca"], value=None,
|
38 |
+
interactive=True)
|
39 |
+
dropdown2 = gr.Dropdown(label="Target language", choices=["en", "ca"], value=None, interactive=True)
|
40 |
gr.Markdown("First upload a file and and then you'll be able download it (but only once!)")
|
41 |
with gr.Row():
|
42 |
u = gr.UploadButton("Upload a file", file_count="single")
|
43 |
d = gr.DownloadButton("Download the file", visible=False)
|
44 |
|
45 |
+
u.upload(upload_file, [u, dropdown1, dropdown2], [u, d])
|
46 |
d.click(download_file, None, [u, d])
|
47 |
if __name__ == "__main__":
|
48 |
demo.launch()
|
src/translate_any_doc.py
CHANGED
@@ -44,7 +44,7 @@ def doc_to_plain_text(input_file: str, source_lang: str, target_lang: str, tikal
|
|
44 |
return os.path.join(original_xliff_file_path + f".{source_lang}")
|
45 |
|
46 |
|
47 |
-
def get_runs_from_paragraph(text: str, paragraph_index: int) ->
|
48 |
"""
|
49 |
Given some text that may or may not contain some chunks tagged with something like <g id=1> </g>, extract each
|
50 |
of the runs of text and convert them into dictionaries to keep this information
|
|
|
44 |
return os.path.join(original_xliff_file_path + f".{source_lang}")
|
45 |
|
46 |
|
47 |
+
def get_runs_from_paragraph(text: str, paragraph_index: int) -> list[dict[str, str | tuple[str, ...]]]:
|
48 |
"""
|
49 |
Given some text that may or may not contain some chunks tagged with something like <g id=1> </g>, extract each
|
50 |
of the runs of text and convert them into dictionaries to keep this information
|