0

I want the top pipeline to be closer to the image when the photo is placed at the top of the page. The pipeline appears on the previous page but is at the same height as the other bottom pipeline.

I have errors in the code. I don't know what to do.

def procesar_imagenes_html(doc):

    script_dir = os.path.dirname(os.path.abspath(__file__))
    attachments_dir = os.path.join(script_dir, ".attachments")

    if not os.path.isdir(attachments_dir):
        os.makedirs(attachments_dir)
        print(f"{attachments_dir}")
    else:
        print(f"Directory .attachments found: {attachments_dir}")
    img_pattern = re.compile(r'\(a href="[^"]+"\)\(img src="([^"]+)"[^>]*\/\)\(\/a\)')
    paragraphs = list(doc.Paragraphs)

    for paragraph in paragraphs:
        match = img_pattern.search(paragraph.Range.Text)
        if match:
            img_url = match.group(1)
            print(f"{img_url}")

            try:
                response = requests.get(img_url)
                if response.status_code == 200:
                    img_name = unquote(os.path.basename(img_url))
                    img_name = img_name.replace(' ', '_')
                    img_path = os.path.join(attachments_dir, img_name)

                    with open(img_path, 'wb') as img_file:
                        img_file.write(response.content)
                    print(f"{img_path}")
                    match_range = paragraph.Range.Duplicate
                    match_range.Start = paragraph.Range.Start + match.start()
                    match_range.End = paragraph.Range.Start + match.end()
                    match_range.Delete()
                    paragraph.Range.InsertParagraphBefore()
                    table = doc.Tables.Add(paragraph.Range, 3, 1)
                    table.Borders.Enable = False  # No borders

                    cell_sup = table.Cell(1, 1)
                    cell_sup.Range.Text = "|"
                    cell_sup.Range.ParagraphFormat.Alignment = win32.constants.wdAlignParagraphLeft
                    cell_sup.Range.ParagraphFormat.SpaceBefore = 0
                    cell_sup.Range.ParagraphFormat.SpaceAfter = 0

                    cell_img = table.Cell(2, 1)
                    image = cell_img.Range.InlineShapes.AddPicture(
                        FileName=img_path,
                        LinkToFile=False,
                        SaveWithDocument=True
                    )
                    if image.Height > max_height:
                        image.Height = max_height
                    cell_img.Range.ParagraphFormat.Alignment = win32.constants.wdAlignParagraphCenter

                    cell_inf = table.Cell(3, 1)
                    cell_inf.Range.Text = "|"
                    cell_inf.Range.ParagraphFormat.Alignment = win32.constants.wdAlignParagraphLeft
                    cell_inf.Range.ParagraphFormat.SpaceBefore = 0
                    cell_inf.Range.ParagraphFormat.SpaceAfter = 0

                    table.Columns.AutoFit()

                    print(f"Image inserted: {img_name}")
                else:
                    print(f"Error downloading image: {img_url}")
            except Exception as e:
                print(f"Error processing image {img_url}: {e}")

procesar_imagenes_html(doc)

Description:

I am working on a Python script that processes a Word document (.docx) and replaces certain text patterns with images downloaded from URLs. The issue is that when I insert an image at the beginning of the document, the pipeline (a table cell with a "|" character) located at the top left corner of the image appears on the previous page and has a different size.

import os
import re
import requests
from urllib.parse import unquote
import win32com.client as win32

def procesar_imagenes_html(doc):
    script_dir = os.path.dirname(os.path.abspath(__file__))
    attachments_dir = os.path.join(script_dir, ".attachments")

    if not os.path.isdir(attachments_dir):
        os.makedirs(attachments_dir)
        print(f"Directory created: {attachments_dir}")
    else:
        print(f"Directory .attachments found: {attachments_dir}")

    img_pattern = re.compile(r'\(a href="[^"]+"\)\(img src="([^"]+)"[^>]*\/\)\(\/a\)')
    paragraphs = list(doc.Paragraphs)

    for paragraph in paragraphs:
        match = img_pattern.search(paragraph.Range.Text)
        if match:
            img_url = match.group(1)
            print(f"Image URL: {img_url}")

            try:
                response = requests.get(img_url)
                if response.status_code == 200:
                    img_name = unquote(os.path.basename(img_url))
                    img_name = img_name.replace(' ', '_')
                    img_path = os.path.join(attachments_dir, img_name)

                    with open(img_path, 'wb') as img_file:
                        img_file.write(response.content)
                    print(f"Image saved to: {img_path}")

                    # Delete the matched text
                    match_range = paragraph.Range.Duplicate
                    match_range.Start = paragraph.Range.Start + match.start()
                    match_range.End = paragraph.Range.Start + match.end()
                    match_range.Delete()

                    # Insert a new table with the image
                    paragraph.Range.InsertParagraphBefore()
                    table = doc.Tables.Add(paragraph.Range, 3, 1)
                    table.Borders.Enable = False  # No borders

                    # Configure the table cells
                    cell_sup = table.Cell(1, 1)
                    cell_sup.Range.Text = "|"
                    cell_sup.Range.ParagraphFormat.Alignment = win32.constants.wdAlignParagraphLeft
                    cell_sup.Range.ParagraphFormat.SpaceBefore = 0
                    cell_sup.Range.ParagraphFormat.SpaceAfter = 0

                    cell_img = table.Cell(2, 1)
                    image = cell_img.Range.InlineShapes.AddPicture(
                        FileName=img_path,
                        LinkToFile=False,
                        SaveWithDocument=True
                    )
                    max_height = 200  # Adjust this value as needed
                    if image.Height > max_height:
                        image.Height = max_height
                    cell_img.Range.ParagraphFormat.Alignment = win32.constants.wdAlignParagraphCenter

                    cell_inf = table.Cell(3, 1)
                    cell_inf.Range.Text = "|"
                    cell_inf.Range.ParagraphFormat.Alignment = win32.constants.wdAlignParagraphLeft
                    cell_inf.Range.ParagraphFormat.SpaceBefore = 0
                    cell_inf.Range.ParagraphFormat.SpaceAfter = 0

                    table.Columns.AutoFit()

                    print(f"Image inserted: {img_name}")
                else:
                    print(f"Error downloading image: {img_url}")
            except Exception as e:
                print(f"Error processing image {img_url}: {e}")

# Example usage
# doc = win32.Dispatch("Word.Application").Documents.Open("path/to/document.docx")
# procesar_imagenes_html(doc)

Problem:

When I insert an image at the beginning of the document, I want only the top pipeline (the one above the image in the top left corner) to have the same size as the bottom pipeline. Currently, the top pipeline appears on the previous page and has a different size.

Question:

Is there any specific property or method in the win32com library that allows me to control the layout of pipelines and images in Word documents when using HTML image links with the img src tag?

Note: I cannot provide screenshots due to company security restrictions. You can create your own Word template to test the code.

I tried adjusting the SpaceBefore and SpaceAfter properties of the paragraphs in the table cells to ensure that the pipelines are aligned correctly. I expected the top pipeline to appear on the same page as the image and have the same size as the bottom pipeline. However, the top pipeline still appears on the previous page and has a different size.

0

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.