Spaces:
Sleeping
Sleeping
added execution time computation for alignments and cleanup old imports
Browse files- translate_docx.py +2 -5
translate_docx.py
CHANGED
@@ -3,8 +3,6 @@ import json
|
|
3 |
import requests
|
4 |
import tqdm
|
5 |
import os
|
6 |
-
import string
|
7 |
-
from collections import defaultdict
|
8 |
|
9 |
from docx import Document
|
10 |
from docx.text.hyperlink import Hyperlink
|
@@ -16,7 +14,6 @@ nltk.download('punkt')
|
|
16 |
nltk.download('punkt_tab')
|
17 |
|
18 |
from nltk.tokenize import sent_tokenize, word_tokenize
|
19 |
-
from nltk.tokenize.treebank import TreebankWordDetokenizer
|
20 |
|
21 |
from subprocess import Popen, PIPE
|
22 |
|
@@ -318,16 +315,16 @@ def translate_document(input_file,
|
|
318 |
processed_original_paragraphs_with_runs = [preprocess_runs(runs) for runs in paragraphs_with_runs]
|
319 |
|
320 |
print("Generating alignments...")
|
|
|
321 |
translated_sentences_with_style = generate_alignments(processed_original_paragraphs_with_runs,
|
322 |
translated_paragraphs, aligner,
|
323 |
temp_folder, detokenizer)
|
324 |
-
print("Finished alignments")
|
325 |
|
326 |
# flatten the sentences into a list of tokens
|
327 |
translated_tokens_with_style = [item for sublist in translated_sentences_with_style for item in sublist]
|
328 |
# group the tokens by style/run
|
329 |
translated_runs_with_style = group_by_style(translated_tokens_with_style, detokenizer)
|
330 |
-
print("Grouped by style")
|
331 |
|
332 |
# group the runs by original paragraph
|
333 |
translated_paragraphs_with_style = dict()
|
|
|
3 |
import requests
|
4 |
import tqdm
|
5 |
import os
|
|
|
|
|
6 |
|
7 |
from docx import Document
|
8 |
from docx.text.hyperlink import Hyperlink
|
|
|
14 |
nltk.download('punkt_tab')
|
15 |
|
16 |
from nltk.tokenize import sent_tokenize, word_tokenize
|
|
|
17 |
|
18 |
from subprocess import Popen, PIPE
|
19 |
|
|
|
315 |
processed_original_paragraphs_with_runs = [preprocess_runs(runs) for runs in paragraphs_with_runs]
|
316 |
|
317 |
print("Generating alignments...")
|
318 |
+
start_time = time.time()
|
319 |
translated_sentences_with_style = generate_alignments(processed_original_paragraphs_with_runs,
|
320 |
translated_paragraphs, aligner,
|
321 |
temp_folder, detokenizer)
|
322 |
+
print(f"Finished alignments in {time.time() - start_time} seconds")
|
323 |
|
324 |
# flatten the sentences into a list of tokens
|
325 |
translated_tokens_with_style = [item for sublist in translated_sentences_with_style for item in sublist]
|
326 |
# group the tokens by style/run
|
327 |
translated_runs_with_style = group_by_style(translated_tokens_with_style, detokenizer)
|
|
|
328 |
|
329 |
# group the runs by original paragraph
|
330 |
translated_paragraphs_with_style = dict()
|