Parallelize for loops in Python to speed up the algorithm

Question

This algorithm consists of reading all images in a folder ending with clipped.tiff and the for loop, which changes the gamma value of all scanned images, is to be accelerated.

How is it possible to parallelize and speed up this simple algorithm?

SinglestreetsFolder = "/dop_shapefile/"
shp_list = [x for x in os.listdir(SinglestreetsFolder) if x.endswith("clipped.tiff")]


for i in range(0, len(shp_list), 1):

    originalPath = "/dop_shapefile/" + shp_list[i]
    original = cv2.imread(originalPath)

    adjusted = adjust_gamma(original, gamma=0.3)

    cv2.imwrite("/gamma/" + shp_list[i] + "_gamma.tiff", adjusted)
    print "status_Gamma: ", i + 1, "/", len(shp_list)

Another algorithm

This algorithm consists of reading all images in a folder ending with .tiff and the for loop, that executes the ConnectedComponentLabeling algorithm (with customizations) to speed up.

SinglestreetsFolder = "/gabor/"
shp_list = [x for x in os.listdir(SinglestreetsFolder) if x.endswith(".tiff")]


img = cv2.imread(SinglestreetsFolder + shp_list[0], 0)
wholeimage = np.zeros(shape=(len(img), len(img[0]), 3))
erosion = np.ones((2, 2), np.uint8)     # kernel: erosion

for j in range(0, len(shp_list), 1):
    if "motorway" in shp_list[j]:
        N = 40  # pixel threshold (city: ca. 10, motorway: ca. 40)
    else:
        N = 10   # pixel threshold (city: ca. 10, motorway: ca. 40)

    img = cv2.imread(SinglestreetsFolder + shp_list[j], 0)
    connectivity = 8    # 4- OR 8-connectivity connected component labeling


    if "reverse" in shp_list[j]:
        img = cv2.threshold(img, 140, 255, cv2.THRESH_BINARY)[1]  # ensure binary

        img = cv2.morphologyEx(img, cv2.MORPH_OPEN, erosion)

        retval, labels = cv2.connectedComponents(img, connectivity)

        num = labels.max()

        # If the count of pixels less than a threshold, then set pixels to `0` (background)
        for i in range(1, num + 1):
            pts = np.where(labels == i)
            if len(pts[0]) < N:
                labels[pts] = 0

        # Map component labels to hue val
        label_hue = np.uint8(179 * labels / np.max(labels))
        blank_ch = 255 * np.ones_like(labels)
        labeled_img = cv2.merge([blank_ch, blank_ch, blank_ch])

        # set bg label to black
        labeled_img[label_hue == 0] = 0

        wholeimage = np.where(labeled_img == 0, wholeimage, labeled_img)

    else:
        img = cv2.threshold(img, 140, 255, cv2.THRESH_BINARY)[1]  # ensure binary

        retval, labels = cv2.connectedComponents(img, connectivity)

        num = labels.max()

        # If the count of pixels less than a threshold, then set pixels to `0` (background)
        for i in range(1, num + 1):
            pts = np.where(labels == i)
            if len(pts[0]) < N:
                labels[pts] = 0

        # Map component labels to hue val
        label_hue = np.uint8(179 * labels / np.max(labels))
        blank_ch = 255 * np.ones_like(labels)
        labeled_img = cv2.merge([blank_ch, blank_ch, blank_ch])

        # set bg label to black
        labeled_img[label_hue == 0] = 0

        wholeimage = np.where(labeled_img == 0, wholeimage, labeled_img)


    print "status_CCL: ", j + 1, "/", len(shp_list)

cv2.imwrite("/ccl/streets_gabor_ccl.tiff", wholeimage)

My solution, but "wholeimage" is black at the end

SinglestreetsFolder = "/gabor/"
shp_list = [x for x in os.listdir(SinglestreetsFolder) if x.endswith(".tiff")]


def gabor(params):
    img = cv2.imread(SinglestreetsFolder + shp_list[0], 0)
    wholeimage = np.zeros(shape=(len(img), len(img[0]), 3))
    erosion = np.ones((2, 2), np.uint8)  # kernel: erosion

    j, image_name = params

    if "motorway" in image_name:
        N = 40  # pixel threshold (city: ca. 10, motorway: ca. 40)
    else:
        N = 10   # pixel threshold (city: ca. 10, motorway: ca. 40)

    img = cv2.imread(SinglestreetsFolder + image_name, 0)
    connectivity = 8    # 4- OR 8-connectivity connected component labeling


    if "reverse" in image_name:
        img = cv2.threshold(img, 140, 255, cv2.THRESH_BINARY)[1]  # ensure binary

        img = cv2.morphologyEx(img, cv2.MORPH_OPEN, erosion)
        # img = cv2.erode(img, erosion, iterations=1)

        retval, labels = cv2.connectedComponents(img, connectivity)

        num = labels.max()

        # If the count of pixels less than a threshold, then set pixels to `0` (background)
        for i in range(1, num + 1):
            pts = np.where(labels == i)
            if len(pts[0]) < N:
                labels[pts] = 0

        # Map component labels to hue val
        label_hue = np.uint8(179 * labels / np.max(labels))
        blank_ch = 255 * np.ones_like(labels)
        labeled_img = cv2.merge([blank_ch, blank_ch, blank_ch])

        # set bg label to black
        labeled_img[label_hue == 0] = 0

        wholeimage = np.where(labeled_img == 0, wholeimage, labeled_img)

    else:
        img = cv2.threshold(img, 140, 255, cv2.THRESH_BINARY)[1]  # ensure binary

        retval, labels = cv2.connectedComponents(img, connectivity)

        num = labels.max()

        # If the count of pixels less than a threshold, then set pixels to `0` (background)
        for i in range(1, num + 1):
            pts = np.where(labels == i)
            if len(pts[0]) < N:
                labels[pts] = 0

        # Map component labels to hue val
        label_hue = np.uint8(179 * labels / np.max(labels))
        blank_ch = 255 * np.ones_like(labels)
        labeled_img = cv2.merge([blank_ch, blank_ch, blank_ch])

        # set bg label to black
        labeled_img[label_hue == 0] = 0

        wholeimage = np.where(labeled_img == 0, wholeimage, labeled_img)


if __name__ == '__main__':
    p = Pool()

    list(p.imap(gabor, enumerate(shp_list)))
    cv2.imwrite(/ccl/streets_gabor_ccl.tiff", wholeimage)

docs.python.org/2/library/multiprocessing.html Pool might be useful for your needs, if the actions you want to perform are independent. — Alex Dubrovsky
– Alex Dubrovsky, Commented Feb 21, 2018 at 10:10

Ilija · Accepted Answer · 2018-02-21 10:46:05Z

3

Starting from assumption that adjust_gamma doesn't fork child processes or uses C threads, image processing is CPU bound process so you should rely on processes and not threads in Python.

Here is how it should look:

from multiprocessing.pool import Pool

SinglestreetsFolder = "/dop_shapefile/"
shp_list = [x for x in os.listdir(SinglestreetsFolder) if x.endswith("clipped.tiff")]

def process_image(params):
    i, image_name = params
    originalPath = "/dop_shapefile/" + image_name
    original = cv2.imread(originalPath)

    adjusted = adjust_gamma(original, gamma=0.3)

    cv2.imwrite("/gamma/" + image_name + "_gamma.tiff", adjusted)
    return "status_Gamma: ", i + 1, "/", len(shp_list)

if __name__ == '__main__':
    p = Pool()
    results = list(p.imap(process_image, enumerate(shp_list)))
    print results

There are certain parts that could be changed, I just went to adapt your example with as less changes as I could make.

edited Feb 21, 2018 at 10:46

answered Feb 21, 2018 at 10:18

Ilija

1,6141 gold badge10 silver badges12 bronze badges

Sign up to request clarification or add additional context in comments.

11 Comments

user9333067 Over a year ago

could you explain why the use of processes is more appropriate then the use of threads in this example?

Bernhard Over a year ago

Thumbs up for using multiprocessing. Only this will benefit from multicore systems in this situation.

freddykrueger Over a year ago

When I run your algorithm, "TypeError: process_image() takes exactly 2 arguments (1 given)" appears

Ilija Over a year ago

Threads in Python are constrained by Python's GIL. That way you end up with multiple threads being executed on one CPU core. Multiprocessing gives you option to utilize multiple cores.

freakish Over a year ago

@ikac That's under the assumption that the image processing lib is actually fully implemented in Python and in the process. If the lib creates subprocesses or it is written as a C addon then threads may perform better then processes.

|

Ilija · Accepted Answer · 2018-02-21 12:02:08Z

0

You could use the threading-lib:

import threading

SinglestreetsFolder = "/dop_shapefile/"
shp_list = [x for x in os.listdir(SinglestreetsFolder) if x.endswith("clipped.tiff")]

def wrapper(i):
    originalPath = "/dop_shapefile/" + shp_list[i]
    original = cv2.imread(originalPath)
    adjusted = adjust_gamma(original, gamma=0.3)
    cv2.imwrite("/gamma/" + shp_list[i] + "_gamma.tiff", adjusted)
    print "status_Gamma: ", i + 1, "/", len(shp_list)

threads = []

for i in range(0, len(shp_list), 1):
    t = threading.Thread(target=wrapper, args=(i,))
    threads.append(t)
    t.start()

for t in threads: 
    t.join() # wait for all of them to finsih
# continue with your code

edited Feb 21, 2018 at 12:02

Ilija

1,6141 gold badge10 silver badges12 bronze badges

answered Feb 21, 2018 at 10:17

user9333067

1 Comment

Ilija Over a year ago

This code has to start from assumption that adjust_gamma is not offloaded to child process or any C thread solution that will be able to utilize multiple CPU cores. Even then, if there are hundreds of images, it creates hundreds of threads, which still can be improved by using threads pool.

Collectives™ on Stack Overflow

Parallelize for loops in Python to speed up the algorithm

2 Answers 2

11 Comments

1 Comment

Your Answer

Hot Network Questions

Collectives™ on Stack Overflow

2 Answers 2

11 Comments

1 Comment

Your Answer

Sign up or log in

Post as a guest

Related