Using python-docx, it is impossible to directly create a comment in a Word Document, that is why this function was created :
from datetime import datetime
from typing import List
from xml.etree.ElementTree import Element, tostring
import xml.etree.ElementTree as ET
from docx import Document
from docx.opc.constants import CONTENT_TYPE, RELATIONSHIP_TYPE
from docx.text.paragraph import Paragraph
from docx.opc.oxml import parse_xml
from docx.opc.packuri import PackURI
from docx.opc.part import Part
from docx.oxml import OxmlElement
from docx.oxml.ns import qn
def add_comment_to_elements_in_place(
docx_doc: Document, elements: List[Element], author: str, comment_text: str
) -> None:
if not elements:
return
try:
comments_part = docx_doc.part.part_related_by(
RELATIONSHIP_TYPE.COMMENTS
)
except KeyError:
comments_part = Part(
partname=PackURI("/word/comments.xml"),
content_type=CONTENT_TYPE.WML_COMMENTS,
blob=_COMMENTS_PART_DEFAULT_XML_BYTES,
package=docx_doc.part.package,
)
docx_doc.part.relate_to(comments_part, RELATIONSHIP_TYPE.COMMENTS)
ET.register_namespace("w", "http://schemas.openxmlformats.org/wordprocessingml/2006/main")
comments_xml = parse_xml(comments_part.blob)
# Create the comment
comment_id = str(len(comments_xml.findall(qn("w:comment"))))
comment_element = OxmlElement("w:comment")
comment_element.set(qn("w:id"), comment_id)
comment_element.set(qn("w:author"), author)
comment_element.set(qn("w:date"), datetime.now().isoformat())
comment_element.set(qn("w:initials"), "AP")
# Create the text element for the comment
comment_paragraph = OxmlElement("w:p")
comment_run = OxmlElement("w:r")
comment_text_element = OxmlElement("w:t")
comment_text_element.text = comment_text
comment_run.append(comment_text_element)
comment_paragraph.append(comment_run)
comment_element.append(comment_paragraph)
comments_xml.append(comment_element)
comments_part._blob = tostring(comments_xml)
# Create the commentRangeStart and commentRangeEnd elements
comment_range_start = OxmlElement("w:commentRangeStart")
comment_range_start.set(qn("w:id"), comment_id)
comment_range_end = OxmlElement("w:commentRangeEnd")
comment_range_end.set(qn("w:id"), comment_id)
# Add the commentRangeStart to the first element and commentRangeEnd to
# the last element
elements[0].insert(0, comment_range_start)
elements[-1].append(comment_range_end)
# Add the comment reference to each element in the range
# for element in elements:
comment_reference = OxmlElement("w:r")
comment_reference_run = OxmlElement("w:r")
comment_reference_run_properties = OxmlElement("w:rPr")
comment_reference_run_properties.append(
OxmlElement("w:rStyle", {qn("w:val"): "CommentReference"})
)
comment_reference_run.append(comment_reference_run_properties)
comment_reference_element = OxmlElement("w:commentReference")
comment_reference_element.set(qn("w:id"), comment_id)
comment_reference_run.append(comment_reference_element)
comment_reference.append(comment_reference_run)
elements[0].append(comment_reference)
You can refer to this s.o thread to get some insights : Inserting a comment in docx file using python 3
This function is currently almost working as intended with another function : find_section_paragraphs
def find_section_paragraphs(docx_doc: Document, section_titles: set, section_style_begin: str, section_style_end: str, add_paragraph=False) -> list:
"""
Returns a list of all the paragraphs in a specified section of a Word document.
To do this, the function iterates through all the paragraphs of the document and looks for
the paragraph with the section title and the appropriate style.
If any paragraph in the section has the style specified in the 'section_style_end' argument,
it means that the paragraph no longer belongs to the section, and the loop stops.
"""
section_paragraphs = list()
in_section = False
for paragraph in docx_doc.paragraphs:
if not in_section:
if any(title in paragraph.text.upper() for title in section_titles) and paragraph.style.name == section_style_begin:
in_section = True
if add_paragraph:
insert_paragraph_after(paragraph, "\n")
else:
if paragraph.style.name == section_style_end:
break
section_paragraphs.append(paragraph)
return section_paragraphs
Here is an example :
word_doc = Document("test.docx")
test = find_section_paragraphs(word_doc, {"TEST"}, "Heading 2", "Heading 2")
if test:
add_comment_to_elements_in_place(
test,
[para._element for para in test],
"Autogenerated by Python",
"Comment to the test section of the document",
)
word_doc.save("auto_commented.docx")
Now, the auto-commented docx display the right comment at the right position.
HOWEVER, if I transform the docx into zip and check the comments.xml part of the document, something weird happens
<w:comments xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
<w:comment w:id="0" w:author="Autogenerated by Python" w:date="2024-11-22T15:37:54.735270">
<w:p>Comment to the test section of the document<w:r>Comment to the test section of the document<w:t>Comment to the test section of the document</w:t>
</w:r>
</w:p>
</w:comment>
</w:comments>
As you can see, the text is generated 3 times. It should only be contained inside <w:t> instead.
Ideally, the end result should look like this (very similar to a comment generated by a real user manually in Word), which requires some tweaks to the add_comment_to_element_in_place :
<w:comments xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
<w:comment w:id="0" w:author="Autogenerated by Python" w:date="2024-11-22T15:37:54.735270">
<w:p>
<w:pPr>
<w:pStyle w:val="Commentaire"/>
</w:pPr>
<w:r>
<w:rPr>
<w:rStyle w:val="Marquedecommentaire"/>
</w:rPr>
<w:annotationRef/>
</w:r>
<w:r>
<w:t>Comment to the test section of the document</w:t>
</w:r>
</w:p>
</w:comment>
</w:comments>
Any suggestion ?
[INFO] : python 3.8.5