mjuvilla commited on
Commit
0b349b6
·
1 Parent(s): 164a644

added language dropdown menus to document translation tab

Browse files
Files changed (2) hide show
  1. gradio_app.py +23 -12
  2. src/translate_any_doc.py +1 -1
gradio_app.py CHANGED
@@ -1,37 +1,48 @@
1
  import gradio as gr
2
- from src.translate_any_doc import translate_document, translate
 
 
3
  from src.aligner import Aligner
4
  from nltk.tokenize.treebank import TreebankWordDetokenizer
5
 
6
-
7
- ip='10.192.31.127'
8
  config_folder = 'fast_align_config'
9
- source_lang = 'en'
10
- target_lang = 'ca'
11
  temp_folder = 'tmp'
12
- aligner = Aligner(config_folder, source_lang, target_lang, temp_folder)
 
13
  detokenizer = TreebankWordDetokenizer()
 
 
 
 
 
 
14
 
15
 
16
- def upload_file(filepath):
17
- translated_file_name = translate_document(filepath, source_lang, target_lang, aligner, detokenizer, ip)
18
- return [gr.UploadButton(visible=False), gr.DownloadButton(label=f"Download {translated_file_name}", value=translated_file_name, visible=True)]
 
 
 
19
 
20
  def download_file():
21
  return [gr.UploadButton(visible=True), gr.DownloadButton(visible=False)]
22
 
23
 
24
  with gr.Blocks() as demo:
25
-
26
  with gr.Tab("Text"):
27
- gr.Interface(fn=translate, inputs=["text","text","text"], outputs="text")
28
  with gr.Tab("Docx documents"):
 
 
 
 
29
  gr.Markdown("First upload a file and and then you'll be able download it (but only once!)")
30
  with gr.Row():
31
  u = gr.UploadButton("Upload a file", file_count="single")
32
  d = gr.DownloadButton("Download the file", visible=False)
33
 
34
- u.upload(upload_file, u, [u, d])
35
  d.click(download_file, None, [u, d])
36
  if __name__ == "__main__":
37
  demo.launch()
 
1
  import gradio as gr
2
+ from src.translate_any_doc import translate_document
3
+ from src.salamandraTA7b_translator import SalamandraTA7bTranslator
4
+ from src.mtuoc_aina_translator import MTUOCAinaTranslator
5
  from src.aligner import Aligner
6
  from nltk.tokenize.treebank import TreebankWordDetokenizer
7
 
 
 
8
  config_folder = 'fast_align_config'
 
 
9
  temp_folder = 'tmp'
10
+ hf_token = ""
11
+
12
  detokenizer = TreebankWordDetokenizer()
13
+ translator = SalamandraTA7bTranslator(hf_token)
14
+
15
+
16
+ # ip = ''
17
+ # port = ''
18
+ # translator = MTUOCAinaTranslator(ip, port)
19
 
20
 
21
+ def upload_file(filepath, source_lang, target_lang):
22
+ aligner = Aligner(config_folder, source_lang, target_lang, temp_folder)
23
+ translated_file_name = translate_document(filepath, source_lang, target_lang, translator, aligner, detokenizer)
24
+ return [gr.UploadButton(visible=False),
25
+ gr.DownloadButton(label=f"Download {translated_file_name}", value=translated_file_name, visible=True)]
26
+
27
 
28
  def download_file():
29
  return [gr.UploadButton(visible=True), gr.DownloadButton(visible=False)]
30
 
31
 
32
  with gr.Blocks() as demo:
 
33
  with gr.Tab("Text"):
34
+ gr.Interface(fn=translator.translate, inputs=["text", "text", "text"], outputs="text")
35
  with gr.Tab("Docx documents"):
36
+ with gr.Row():
37
+ dropdown1 = gr.Dropdown(label="Source language", choices=["en", "ca"], value=None,
38
+ interactive=True)
39
+ dropdown2 = gr.Dropdown(label="Target language", choices=["en", "ca"], value=None, interactive=True)
40
  gr.Markdown("First upload a file and and then you'll be able download it (but only once!)")
41
  with gr.Row():
42
  u = gr.UploadButton("Upload a file", file_count="single")
43
  d = gr.DownloadButton("Download the file", visible=False)
44
 
45
+ u.upload(upload_file, [u, dropdown1, dropdown2], [u, d])
46
  d.click(download_file, None, [u, d])
47
  if __name__ == "__main__":
48
  demo.launch()
src/translate_any_doc.py CHANGED
@@ -44,7 +44,7 @@ def doc_to_plain_text(input_file: str, source_lang: str, target_lang: str, tikal
44
  return os.path.join(original_xliff_file_path + f".{source_lang}")
45
 
46
 
47
- def get_runs_from_paragraph(text: str, paragraph_index: int) -> list[dict[str, str]]:
48
  """
49
  Given some text that may or may not contain some chunks tagged with something like <g id=1> </g>, extract each
50
  of the runs of text and convert them into dictionaries to keep this information
 
44
  return os.path.join(original_xliff_file_path + f".{source_lang}")
45
 
46
 
47
+ def get_runs_from_paragraph(text: str, paragraph_index: int) -> list[dict[str, str | tuple[str, ...]]]:
48
  """
49
  Given some text that may or may not contain some chunks tagged with something like <g id=1> </g>, extract each
50
  of the runs of text and convert them into dictionaries to keep this information