|
import os
|
|
import threading
|
|
from . import to_translate
|
|
from . import common
|
|
import datetime
|
|
import time
|
|
import re
|
|
|
|
def start(trans):
|
|
|
|
threads=trans['threads']
|
|
if threads is None or int(threads)<0:
|
|
max_threads=10
|
|
else:
|
|
max_threads=int(threads)
|
|
|
|
run_index=0
|
|
start_time = datetime.datetime.now()
|
|
|
|
try:
|
|
with open(trans['file_path'], 'r', encoding='utf-8') as file:
|
|
content = file.read()
|
|
except Exception as e:
|
|
print(f"无法读取文件 {trans['file_path']}: {e}")
|
|
return False
|
|
|
|
trans_type=trans['type']
|
|
keepBoth=True
|
|
if trans_type=="trans_text_only_inherit" or trans_type=="trans_text_only_new" or trans_type=="trans_all_only_new" or trans_type=="trans_all_only_inherit":
|
|
keepBoth=False
|
|
|
|
|
|
paragraphs = content.split('\n')
|
|
|
|
max_word = 1000
|
|
texts = []
|
|
current_text = ""
|
|
|
|
for paragraph in paragraphs:
|
|
if check_text(paragraph) or paragraph.strip() == "":
|
|
|
|
|
|
|
|
|
|
|
|
if keepBoth:
|
|
|
|
if len(paragraph) > max_word:
|
|
|
|
sub_paragraphs = split_paragraph(paragraph, max_word)
|
|
for sub_paragraph in sub_paragraphs:
|
|
|
|
append_text(sub_paragraph, texts, True)
|
|
else:
|
|
|
|
append_text(paragraph, texts, False)
|
|
else:
|
|
|
|
if len(paragraph) > max_word:
|
|
|
|
if current_text:
|
|
append_text(current_text, texts, False)
|
|
current_text = ""
|
|
|
|
|
|
sub_paragraphs = split_paragraph(paragraph, max_word)
|
|
for sub_paragraph in sub_paragraphs:
|
|
|
|
append_text(sub_paragraph, texts, True)
|
|
else:
|
|
|
|
if len(current_text) + len(paragraph) > max_word:
|
|
|
|
append_text(current_text, texts, False)
|
|
current_text = ""
|
|
|
|
|
|
current_text += paragraph+"\n"
|
|
|
|
|
|
append_text(current_text, texts, False)
|
|
|
|
|
|
max_run=max_threads if len(texts)>max_threads else len(texts)
|
|
before_active_count=threading.activeCount()
|
|
event=threading.Event()
|
|
while run_index<=len(texts)-1:
|
|
if threading.activeCount()<max_run+before_active_count:
|
|
if not event.is_set():
|
|
thread = threading.Thread(target=to_translate.get, args=(trans, event, texts, run_index))
|
|
thread.start()
|
|
run_index+=1
|
|
else:
|
|
return False
|
|
|
|
while True:
|
|
complete=True
|
|
for text in texts:
|
|
if not text['complete']:
|
|
complete=False
|
|
if complete:
|
|
break
|
|
else:
|
|
time.sleep(1)
|
|
|
|
text_count=0
|
|
|
|
|
|
try:
|
|
with open(trans['target_file'], 'w', encoding='utf-8') as file:
|
|
translated_paragraph=""
|
|
origin_paragraph=""
|
|
for item in texts:
|
|
if item["sub"]:
|
|
translated_paragraph+=item["text"]
|
|
origin_paragraph+=item["origin"]
|
|
else:
|
|
if translated_paragraph!="":
|
|
if keepBoth:
|
|
file.write(origin_paragraph+'\n')
|
|
file.write(translated_paragraph+'\n')
|
|
translated_paragraph=""
|
|
origin_paragraph=""
|
|
if keepBoth and item["origin"].strip() != "":
|
|
file.write(item["origin"] + '\n')
|
|
file.write(item["text"] + '\n')
|
|
|
|
if translated_paragraph!="":
|
|
if keepBoth and item["origin"].strip() != "":
|
|
file.write(origin_paragraph+'\n')
|
|
file.write(translated_paragraph+'\n')
|
|
except Exception as e:
|
|
print(f"无法写入文件 {target_file_path}: {e}")
|
|
return False
|
|
|
|
end_time = datetime.datetime.now()
|
|
spend_time=common.display_spend(start_time, end_time)
|
|
to_translate.complete(trans, text_count, spend_time)
|
|
return True
|
|
|
|
def split_paragraph(paragraph, max_length):
|
|
"""将段落分割成多个部分,每部分不超过 max_length 字符,并考虑断"""
|
|
sentences = re.split(r'(?<=[.!?。!?]) +|(?<=[。!?])\s*', paragraph)
|
|
current_length = 0
|
|
current_part = []
|
|
parts = []
|
|
|
|
for sentence in sentences:
|
|
if current_length + len(sentence) > max_length:
|
|
|
|
parts.append(' '.join(current_part))
|
|
current_part = [sentence]
|
|
current_length = len(sentence)
|
|
else:
|
|
current_part.append(sentence)
|
|
current_length += len(sentence)
|
|
|
|
|
|
if current_part:
|
|
parts.append(' '.join(current_part))
|
|
|
|
return parts
|
|
|
|
def append_text(text, texts, sub=False):
|
|
if check_text(text):
|
|
texts.append({"text": text, "origin": text, "complete": False, "sub": sub, "ext":"md"})
|
|
else:
|
|
texts.append({"text": "", "origin": "", "complete": True, "sub": sub, "ext":"md"})
|
|
|
|
def check_text(text):
|
|
return text!=None and text!="\n" and len(text)>0 and not common.is_all_punc(text)
|
|
|