Spaces:

LangTech-MT
/

document-translator

Sleeping

mjuvilla commited on Apr 16

Commit

36f2ac1

1 Parent(s): 0348f21

added execution time computation for alignments and cleanup old imports

Files changed (1) hide show

translate_docx.py CHANGED Viewed

@@ -3,8 +3,6 @@ import json
 import requests
 import tqdm
 import os
-import string
-from collections import defaultdict
 from docx import Document
 from docx.text.hyperlink import Hyperlink
@@ -16,7 +14,6 @@ nltk.download('punkt')
 nltk.download('punkt_tab')
 from nltk.tokenize import sent_tokenize, word_tokenize
-from nltk.tokenize.treebank import TreebankWordDetokenizer
 from subprocess import Popen, PIPE
@@ -318,16 +315,16 @@ def translate_document(input_file,
     processed_original_paragraphs_with_runs = [preprocess_runs(runs) for runs in paragraphs_with_runs]
     print("Generating alignments...")
     translated_sentences_with_style = generate_alignments(processed_original_paragraphs_with_runs,
                                                           translated_paragraphs, aligner,
                                                           temp_folder, detokenizer)
-    print("Finished alignments")
     # flatten the sentences into a list of tokens
     translated_tokens_with_style = [item for sublist in translated_sentences_with_style for item in sublist]
     # group the tokens by style/run
     translated_runs_with_style = group_by_style(translated_tokens_with_style, detokenizer)
-    print("Grouped by style")
     # group the runs by original paragraph
     translated_paragraphs_with_style = dict()

 import requests
 import tqdm
 import os
 from docx import Document
 from docx.text.hyperlink import Hyperlink
 nltk.download('punkt_tab')
 from nltk.tokenize import sent_tokenize, word_tokenize
 from subprocess import Popen, PIPE
     processed_original_paragraphs_with_runs = [preprocess_runs(runs) for runs in paragraphs_with_runs]
     print("Generating alignments...")
+    start_time = time.time()
     translated_sentences_with_style = generate_alignments(processed_original_paragraphs_with_runs,
                                                           translated_paragraphs, aligner,
                                                           temp_folder, detokenizer)
+    print(f"Finished alignments in {time.time() - start_time} seconds")
     # flatten the sentences into a list of tokens
     translated_tokens_with_style = [item for sublist in translated_sentences_with_style for item in sublist]
     # group the tokens by style/run
     translated_runs_with_style = group_by_style(translated_tokens_with_style, detokenizer)
     # group the runs by original paragraph
     translated_paragraphs_with_style = dict()