Spaces:
Sleeping
Sleeping
fixed some formatting issues
Browse files- translate_docx.py +11 -3
translate_docx.py
CHANGED
@@ -247,7 +247,7 @@ def group_by_style(values, detokenizer):
|
|
247 |
x['paragraph_index'])):
|
248 |
text = detokenizer.detokenize([item['text'] for item in group])
|
249 |
|
250 |
-
if groups and not text.startswith((",", ";", ":", ".", ")")):
|
251 |
text = " " + text
|
252 |
|
253 |
groups.append({"text": text,
|
@@ -330,9 +330,17 @@ def translate_document(input_file,
|
|
330 |
print("Grouped by style")
|
331 |
|
332 |
# group the runs by original paragraph
|
333 |
-
translated_paragraphs_with_style =
|
334 |
for item in translated_runs_with_style:
|
335 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
336 |
|
337 |
for paragraph_index, original_paragraph in enumerate(doc.paragraphs):
|
338 |
# in case there are empty paragraphs
|
|
|
247 |
x['paragraph_index'])):
|
248 |
text = detokenizer.detokenize([item['text'] for item in group])
|
249 |
|
250 |
+
if groups and not text.startswith((",", ";", ":", ".", ")", "!", "?")):
|
251 |
text = " " + text
|
252 |
|
253 |
groups.append({"text": text,
|
|
|
330 |
print("Grouped by style")
|
331 |
|
332 |
# group the runs by original paragraph
|
333 |
+
translated_paragraphs_with_style = dict()
|
334 |
for item in translated_runs_with_style:
|
335 |
+
if item['paragraph_index'] in translated_paragraphs_with_style:
|
336 |
+
translated_paragraphs_with_style[item['paragraph_index']].append(item)
|
337 |
+
else:
|
338 |
+
# first item in the paragraph, remove starting blank space we introduced in group_by_style(), where we
|
339 |
+
# didn't know where paragraphs started and ended
|
340 |
+
first_item_in_paragraph = item.copy()
|
341 |
+
first_item_in_paragraph["text"] = first_item_in_paragraph["text"].lstrip(" ")
|
342 |
+
translated_paragraphs_with_style[item['paragraph_index']] = []
|
343 |
+
translated_paragraphs_with_style[item['paragraph_index']].append(first_item_in_paragraph)
|
344 |
|
345 |
for paragraph_index, original_paragraph in enumerate(doc.paragraphs):
|
346 |
# in case there are empty paragraphs
|