Spaces:

gitdeem
/

dt

Running

App Files Files Community

dt / app /translate /csv_handle.py

gitdeem

Upload 96 files

4e9efe9 verified 4 months ago

raw

history blame contribute delete

4.63 kB

	import os
	import threading
	from . import to_translate
	from . import common
	import datetime
	import time
	import csv
	import io

	def start(trans):
	# 允许的最大线程
	threads = trans.get('threads')
	max_threads = 10 if threads is None or int(threads) < 0 else int(threads)

	# 当前执行的索引位置
	run_index = 0
	start_time = datetime.datetime.now()

	encodings = ['utf-8', 'gbk', 'gb2312', 'iso-8859-1']
	content = None

	for encoding in encodings:
	try:
	with open(trans['file_path'], 'r', encoding=encoding, newline='') as file:
	reader = csv.reader(file)
	content = list(reader)
	break # 如果成功读取，跳出循环
	except UnicodeDecodeError:
	continue # 如果解码失败，尝试下一种编码
	except Exception as e:
	print(f"无法读取CSV文件 {trans['file_path']}: {e}")
	return False

	if content is None:
	print(f"无法以任何支持的编码格式读取CSV文件 {trans['file_path']}")
	return False

	texts = []

	# 支持最多单词量
	max_word = 1000

	# 处理每一行CSV数据
	for row in content:
	for cell in row:
	if check_text(cell):
	if len(cell) > max_word:
	sub_cells = split_cell(cell, max_word)
	for sub_cell in sub_cells:
	texts.append({"text": sub_cell, "origin": sub_cell, "complete": False, "sub": True})
	else:
	texts.append({"text": cell, "origin": cell, "complete": False, "sub": False})


	max_run = min(max_threads, len(texts))
	before_active_count = threading.activeCount()
	event = threading.Event()

	while run_index <= len(texts) - 1:
	if threading.activeCount() < max_run + before_active_count:
	if not event.is_set():
	thread = threading.Thread(target=translate.get, args=(trans, event, texts, run_index))
	thread.start()
	run_index += 1
	else:
	return False

	while True:
	if all(text['complete'] for text in texts):
	break
	else:
	time.sleep(1)

	text_count = len(texts)
	trans_type = trans['type']
	only_trans_text = trans_type in ["trans_text_only_inherit", "trans_text_only_new", "trans_all_only_new", "trans_all_only_inherit"]

	# 将翻译结果写入新的 CSV 文件
	try:
	with open(trans['target_file'], 'w', encoding='utf-8', newline='') as file:
	writer = csv.writer(file)
	translated_row = []
	origin_row = []
	text_index = 0

	for row in content:
	for cell in row:
	if check_text(cell):
	translated_cell = ""
	while text_index < len(texts) and texts[text_index]['origin'] == cell:
	translated_cell += texts[text_index]['text']
	text_index += 1
	translated_row.append(translated_cell)
	origin_row.append(cell)
	else:
	translated_row.append(cell)
	origin_row.append(cell)

	if only_trans_text:
	writer.writerow(translated_row)
	else:
	writer.writerow(origin_row)
	writer.writerow(translated_row)

	translated_row = []
	origin_row = []

	except Exception as e:
	print(f"无法写入CSV文件 {trans['target_file']}: {e}")
	return False

	end_time = datetime.datetime.now()
	spend_time = common.display_spend(start_time, end_time)
	to_translate.complete(trans, text_count, spend_time)
	return True

	def split_cell(cell, max_length):
	"""将单元格内容分割成多个部分，每部分不超过 max_length 字符"""
	parts = []
	current_part = ""

	words = cell.split()
	for word in words:
	if len(current_part) + len(word) + 1 > max_length:
	parts.append(current_part.strip())
	current_part = word
	else:
	current_part += " " + word if current_part else word

	if current_part:
	parts.append(current_part.strip())

	return parts

	def check_text(text):
	return text is not None and len(text) > 0 and not common.is_all_punc(text)