I want the top pipeline to be closer to the image when the photo is placed at the top of the page. The pipeline appears on the previous page but is at the same height as the other bottom pipeline.
I have errors in the code. I don't know what to do.
def procesar_imagenes_html(doc):
script_dir = os.path.dirname(os.path.abspath(__file__))
attachments_dir = os.path.join(script_dir, ".attachments")
if not os.path.isdir(attachments_dir):
os.makedirs(attachments_dir)
print(f"{attachments_dir}")
else:
print(f"Directory .attachments found: {attachments_dir}")
img_pattern = re.compile(r'\(a href="[^"]+"\)\(img src="([^"]+)"[^>]*\/\)\(\/a\)')
paragraphs = list(doc.Paragraphs)
for paragraph in paragraphs:
match = img_pattern.search(paragraph.Range.Text)
if match:
img_url = match.group(1)
print(f"{img_url}")
try:
response = requests.get(img_url)
if response.status_code == 200:
img_name = unquote(os.path.basename(img_url))
img_name = img_name.replace(' ', '_')
img_path = os.path.join(attachments_dir, img_name)
with open(img_path, 'wb') as img_file:
img_file.write(response.content)
print(f"{img_path}")
match_range = paragraph.Range.Duplicate
match_range.Start = paragraph.Range.Start + match.start()
match_range.End = paragraph.Range.Start + match.end()
match_range.Delete()
paragraph.Range.InsertParagraphBefore()
table = doc.Tables.Add(paragraph.Range, 3, 1)
table.Borders.Enable = False # No borders
cell_sup = table.Cell(1, 1)
cell_sup.Range.Text = "|"
cell_sup.Range.ParagraphFormat.Alignment = win32.constants.wdAlignParagraphLeft
cell_sup.Range.ParagraphFormat.SpaceBefore = 0
cell_sup.Range.ParagraphFormat.SpaceAfter = 0
cell_img = table.Cell(2, 1)
image = cell_img.Range.InlineShapes.AddPicture(
FileName=img_path,
LinkToFile=False,
SaveWithDocument=True
)
if image.Height > max_height:
image.Height = max_height
cell_img.Range.ParagraphFormat.Alignment = win32.constants.wdAlignParagraphCenter
cell_inf = table.Cell(3, 1)
cell_inf.Range.Text = "|"
cell_inf.Range.ParagraphFormat.Alignment = win32.constants.wdAlignParagraphLeft
cell_inf.Range.ParagraphFormat.SpaceBefore = 0
cell_inf.Range.ParagraphFormat.SpaceAfter = 0
table.Columns.AutoFit()
print(f"Image inserted: {img_name}")
else:
print(f"Error downloading image: {img_url}")
except Exception as e:
print(f"Error processing image {img_url}: {e}")
procesar_imagenes_html(doc)
Description:
I am working on a Python script that processes a Word document (.docx) and replaces certain text patterns with images downloaded from URLs. The issue is that when I insert an image at the beginning of the document, the pipeline (a table cell with a "|" character) located at the top left corner of the image appears on the previous page and has a different size.
import os
import re
import requests
from urllib.parse import unquote
import win32com.client as win32
def procesar_imagenes_html(doc):
script_dir = os.path.dirname(os.path.abspath(__file__))
attachments_dir = os.path.join(script_dir, ".attachments")
if not os.path.isdir(attachments_dir):
os.makedirs(attachments_dir)
print(f"Directory created: {attachments_dir}")
else:
print(f"Directory .attachments found: {attachments_dir}")
img_pattern = re.compile(r'\(a href="[^"]+"\)\(img src="([^"]+)"[^>]*\/\)\(\/a\)')
paragraphs = list(doc.Paragraphs)
for paragraph in paragraphs:
match = img_pattern.search(paragraph.Range.Text)
if match:
img_url = match.group(1)
print(f"Image URL: {img_url}")
try:
response = requests.get(img_url)
if response.status_code == 200:
img_name = unquote(os.path.basename(img_url))
img_name = img_name.replace(' ', '_')
img_path = os.path.join(attachments_dir, img_name)
with open(img_path, 'wb') as img_file:
img_file.write(response.content)
print(f"Image saved to: {img_path}")
# Delete the matched text
match_range = paragraph.Range.Duplicate
match_range.Start = paragraph.Range.Start + match.start()
match_range.End = paragraph.Range.Start + match.end()
match_range.Delete()
# Insert a new table with the image
paragraph.Range.InsertParagraphBefore()
table = doc.Tables.Add(paragraph.Range, 3, 1)
table.Borders.Enable = False # No borders
# Configure the table cells
cell_sup = table.Cell(1, 1)
cell_sup.Range.Text = "|"
cell_sup.Range.ParagraphFormat.Alignment = win32.constants.wdAlignParagraphLeft
cell_sup.Range.ParagraphFormat.SpaceBefore = 0
cell_sup.Range.ParagraphFormat.SpaceAfter = 0
cell_img = table.Cell(2, 1)
image = cell_img.Range.InlineShapes.AddPicture(
FileName=img_path,
LinkToFile=False,
SaveWithDocument=True
)
max_height = 200 # Adjust this value as needed
if image.Height > max_height:
image.Height = max_height
cell_img.Range.ParagraphFormat.Alignment = win32.constants.wdAlignParagraphCenter
cell_inf = table.Cell(3, 1)
cell_inf.Range.Text = "|"
cell_inf.Range.ParagraphFormat.Alignment = win32.constants.wdAlignParagraphLeft
cell_inf.Range.ParagraphFormat.SpaceBefore = 0
cell_inf.Range.ParagraphFormat.SpaceAfter = 0
table.Columns.AutoFit()
print(f"Image inserted: {img_name}")
else:
print(f"Error downloading image: {img_url}")
except Exception as e:
print(f"Error processing image {img_url}: {e}")
# Example usage
# doc = win32.Dispatch("Word.Application").Documents.Open("path/to/document.docx")
# procesar_imagenes_html(doc)
Problem:
When I insert an image at the beginning of the document, I want only the top pipeline (the one above the image in the top left corner) to have the same size as the bottom pipeline. Currently, the top pipeline appears on the previous page and has a different size.
Question:
Is there any specific property or method in the win32com library that allows me to control the layout of pipelines and images in Word documents when using HTML image links with the img src tag?
Note: I cannot provide screenshots due to company security restrictions. You can create your own Word template to test the code.
I tried adjusting the SpaceBefore and SpaceAfter properties of the paragraphs in the table cells to ensure that the pipelines are aligned correctly. I expected the top pipeline to appear on the same page as the image and have the same size as the bottom pipeline. However, the top pipeline still appears on the previous page and has a different size.