|
import zipfile
|
|
import xml.etree.ElementTree as ET
|
|
import os
|
|
from docx import Document
|
|
|
|
def read_comments_from_docx(docx_path):
|
|
comments = []
|
|
with zipfile.ZipFile(docx_path, 'r') as docx:
|
|
|
|
with docx.open('word/comments.xml') as comments_file:
|
|
|
|
tree = ET.parse(comments_file)
|
|
root = tree.getroot()
|
|
|
|
|
|
namespace = {'ns0': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'}
|
|
|
|
|
|
for comment in root.findall('ns0:comment', namespace):
|
|
comment_id = comment.get('{http://schemas.openxmlformats.org/wordprocessingml/2006/main}id')
|
|
author = comment.get('{http://schemas.openxmlformats.org/wordprocessingml/2006/main}author')
|
|
date = comment.get('{http://schemas.openxmlformats.org/wordprocessingml/2006/main}date')
|
|
text = ''.join(t.text for p in comment.findall('.//ns0:p', namespace) for r in p.findall('.//ns0:r', namespace) for t in r.findall('.//ns0:t', namespace))
|
|
|
|
comments.append({
|
|
'id': comment_id,
|
|
'author': author,
|
|
'date': date,
|
|
'text': text,
|
|
})
|
|
modified_xml = ET.tostring(root, encoding='utf-8', xml_declaration=True).decode('utf-8')
|
|
print("XML 内容:")
|
|
print(modified_xml)
|
|
return comments
|
|
|
|
def modify_comment_in_docx(docx_path, comment_id, new_text):
|
|
|
|
temp_docx_path = os.path.join(os.path.dirname(docx_path), 'temp_' + os.path.basename(docx_path))
|
|
|
|
|
|
with zipfile.ZipFile(docx_path, 'r') as docx:
|
|
|
|
with zipfile.ZipFile(temp_docx_path, 'w') as new_docx:
|
|
for item in docx.infolist():
|
|
|
|
with docx.open(item) as file:
|
|
if item.filename == 'word/comments.xml':
|
|
|
|
tree = ET.parse(file)
|
|
root = tree.getroot()
|
|
|
|
|
|
print("原始 XML 内容:")
|
|
print(ET.tostring(root, encoding='utf-8', xml_declaration=True).decode('utf-8'))
|
|
|
|
|
|
namespace = {'ns0': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'}
|
|
|
|
|
|
for comment in root.findall('ns0:comment', namespace):
|
|
if comment.get('{http://schemas.openxmlformats.org/wordprocessingml/2006/main}id') == comment_id:
|
|
|
|
for p in list(comment.findall('.//ns0:p', namespace)):
|
|
comment.remove(p)
|
|
|
|
|
|
new_paragraph = ET.Element('{http://schemas.openxmlformats.org/wordprocessingml/2006/main}p')
|
|
|
|
new_run = ET.Element('{http://schemas.openxmlformats.org/wordprocessingml/2006/main}r')
|
|
|
|
new_text_elem = ET.Element('{http://schemas.openxmlformats.org/wordprocessingml/2006/main}t')
|
|
new_text_elem.text = new_text
|
|
|
|
|
|
new_run.append(new_text_elem)
|
|
|
|
new_paragraph.append(new_run)
|
|
|
|
comment.append(new_paragraph)
|
|
|
|
|
|
modified_xml = ET.tostring(root, encoding='utf-8', xml_declaration=True).decode('utf-8')
|
|
print("修改后的 XML 内容:")
|
|
print(modified_xml)
|
|
|
|
|
|
new_docx.writestr(item.filename, modified_xml)
|
|
else:
|
|
|
|
new_docx.writestr(item.filename, file.read())
|
|
|
|
|
|
os.replace(temp_docx_path, docx_path)
|
|
|
|
|
|
docx_path = '/Volumes/data/erui/ezwork-api/storage/app/public/uploads/240928/jZtoN0Ak8P1A5Eojw9KndxoV7OkpPJv1J3NVtsBS.docx'
|
|
|
|
comment_id = '3'
|
|
new_text = 'test test'
|
|
|
|
|
|
|
|
|
|
comments = read_comments_from_docx(docx_path)
|
|
print("读取的批注:")
|
|
for comment in comments:
|
|
print(comment)
|
|
|
|
|
|
|