i try to split a docx-document using at all "Heading 1" using the following code:
from docx import Document
import sys
import os
path = os.path.abspath(os.path.dirname(sys.argv[0]))
fn = os.path.join(path, "inp.docx")
document = Document(fn)
newDoc = False
counter = 1
for p in document.paragraphs:
wStyleName = p.style.name
wText = p.text
if wStyleName == "Heading 1":
if newDoc:
fnOut = os.path.join(path, f"out{counter}.docx")
newDoc.save(fnOut)
print(f"Saving {fnOut}")
counter += 1
if counter > 5:
break
newDoc = Document()
if "Heading 1" in wStyleName:
newDoc.add_heading(wText, level=1)
elif "Heading 2" in wStyleName:
newDoc.add_heading(wText, level=2)
else:
newP = newDoc.add_paragraph()
newP.paragraph_format.alignment = p.paragraph_format.alignment
for row in p.runs:
output_row = newP.add_run(row.text)
output_row.style.name = row.style.name
output_row.font.size = row.font.size
output_row.bold = row.bold
output_row.italic = row.italic
output_row.underline = row.underline
output_row.font.color.rgb = row.font.color.rgb
Generally this works fine but i am not able to keep especially the list numbering in the splitted files as you can see attached - and also the formating is different.

