dt / app /translate /test.py
gitdeem's picture
Upload 96 files
4e9efe9 verified
import zipfile
import xml.etree.ElementTree as ET
import os
from docx import Document
def read_comments_from_docx(docx_path):
comments = []
with zipfile.ZipFile(docx_path, 'r') as docx:
# 尝试读取批注文件
with docx.open('word/comments.xml') as comments_file:
# 解析 XML
tree = ET.parse(comments_file)
root = tree.getroot()
# 定义命名空间
namespace = {'ns0': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'}
# 查找所有批注
for comment in root.findall('ns0:comment', namespace):
comment_id = comment.get('{http://schemas.openxmlformats.org/wordprocessingml/2006/main}id')
author = comment.get('{http://schemas.openxmlformats.org/wordprocessingml/2006/main}author')
date = comment.get('{http://schemas.openxmlformats.org/wordprocessingml/2006/main}date')
text = ''.join(t.text for p in comment.findall('.//ns0:p', namespace) for r in p.findall('.//ns0:r', namespace) for t in r.findall('.//ns0:t', namespace))
comments.append({
'id': comment_id,
'author': author,
'date': date,
'text': text,
})
modified_xml = ET.tostring(root, encoding='utf-8', xml_declaration=True).decode('utf-8')
print("XML 内容:")
print(modified_xml)
return comments
def modify_comment_in_docx(docx_path, comment_id, new_text):
# 创建一个临时文件名,保留原始路径
temp_docx_path = os.path.join(os.path.dirname(docx_path), 'temp_' + os.path.basename(docx_path))
# 打开原始 docx 文件
with zipfile.ZipFile(docx_path, 'r') as docx:
# 创建一个新的 docx 文件
with zipfile.ZipFile(temp_docx_path, 'w') as new_docx:
for item in docx.infolist():
# 读取每个文件
with docx.open(item) as file:
if item.filename == 'word/comments.xml':
# 解析批注 XML
tree = ET.parse(file)
root = tree.getroot()
# 打印原始 XML 内容
print("原始 XML 内容:")
print(ET.tostring(root, encoding='utf-8', xml_declaration=True).decode('utf-8'))
# 定义命名空间
namespace = {'ns0': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'}
# 查找并修改批注
for comment in root.findall('ns0:comment', namespace):
if comment.get('{http://schemas.openxmlformats.org/wordprocessingml/2006/main}id') == comment_id:
# 清除现有段落
for p in list(comment.findall('.//ns0:p', namespace)):
comment.remove(p) # 从批注中移除段落元素
# 创建新的段落
new_paragraph = ET.Element('{http://schemas.openxmlformats.org/wordprocessingml/2006/main}p')
# 创建新的 run 元素
new_run = ET.Element('{http://schemas.openxmlformats.org/wordprocessingml/2006/main}r')
# 创建新的 text 元素
new_text_elem = ET.Element('{http://schemas.openxmlformats.org/wordprocessingml/2006/main}t')
new_text_elem.text = new_text # 设置文本内容
# 将 text 元素添加到 run 元素中
new_run.append(new_text_elem)
# 将 run 添加到段落中
new_paragraph.append(new_run)
# 将新段落添加到批注中
comment.append(new_paragraph)
# 打印修改后的 XML 内容
modified_xml = ET.tostring(root, encoding='utf-8', xml_declaration=True).decode('utf-8')
print("修改后的 XML 内容:")
print(modified_xml)
# 将修改后的 XML 写入新的 docx 文件
new_docx.writestr(item.filename, modified_xml)
else:
# 其他文件直接写入新的 docx 文件
new_docx.writestr(item.filename, file.read())
# 替换原始文件
os.replace(temp_docx_path, docx_path)
# 示例用法
docx_path = '/Volumes/data/erui/ezwork-api/storage/app/public/uploads/240928/jZtoN0Ak8P1A5Eojw9KndxoV7OkpPJv1J3NVtsBS.docx' # 替换为您的文档路径
# docx_path = '/Volumes/data/erui/ezwork-api/storage/app/public//translate/jZtoN0Ak8P1A5Eojw9KndxoV7OkpPJv1J3NVtsBS/comments-英语.docx' # 替换为您的文档路径
comment_id = '3' # 替换为您要修改的批注 ID
new_text = 'test test' # 替换为新的批注文本
# document = Document("/Volumes/data/erui/ezwork-api/storage/app/public/uploads/240928/jZtoN0Ak8P1A5Eojw9KndxoV7OkpPJv1J3NVtsBS.docx")
# document.save(docx_path)
# 读取批注
comments = read_comments_from_docx(docx_path)
print("读取的批注:")
for comment in comments:
print(comment)
# 修改批注
# modify_comment_in_docx(docx_path, comment_id, new_text)