2

I have a code that reads a directory for files and upload the files to Azure Blob storage. It works well and file upload it successful. However, I need help to modify this code to run async operations to have concurrent upload.

import os
import asyncio
import yaml
from azure.storage.blob import BlobServiceClient, BlobClient, ContainerClient, __version__


def load_config():
    path_root = os.path.dirname(os.path.abspath(__file__))
    with open(path_root + "/config.yaml", "r") as configfile:
        return yaml.load(configfile, Loader=yaml.FullLoader)


def read_files(dir):
    with os.scandir(dir) as files:
        for filename in files:
            if filename.is_file() and not filename.name.startswith('.'):
                yield filename


def upload(files, connection_string, container_name):
    container_client = ContainerClient.from_connection_string(connection_string, container_name)
    print("Uploading images to remote blob storage")

    for file in files:
        blob_client = container_client.get_blob_client(file.name)
        with open(file.path, "rb") as data:
            blob_client.upload_blob(data)
            print(f"{file.name} upload to remote blob storage")


config = load_config()
images = read_files(config['source_folder'] + '/images')
upload(images, config['azure_storage_connectionstring'], config['images_container_name'])
1
  • Could you please tell me what error you face? Commented Mar 22, 2021 at 1:17

1 Answer 1

9

You can use the package azure.storage.blob.aio to upload blob asynchronously.

For example

async def load_config():
    path_root = os.path.dirname(os.path.abspath(__file__))
    with open(path_root + "/config.yaml", "r") as configfile:
        return yaml.load(configfile, Loader=yaml.FullLoader)


async def read_files(dir):
    with os.scandir(dir) as files:
        for filename in files:
            if filename.is_file() and not filename.name.startswith('.'):
                yield filename


async def upload_blob():
    tasks = []
    config = await load_config()
    container_client = ContainerClient.from_connection_string(
        config['azure_storage_connectionstring'],config['images_container_name'])
    async with container_client:
        async for file in read_files(config['source_folder'] + '/images'):
            with open(file.path, "rb") as data:
                tasks.append(asyncio.create_task(
                    container_client.upload_blob(name=file.name, data=data)))
                print(f"{file.name} upload to remote blob storage")

                await asyncio.gather(*tasks)
    print("Finished")
if __name__ == '__main__':
    loop = asyncio.get_event_loop()
    loop.run_until_complete(upload_blob())

enter image description here enter image description here

Sign up to request clarification or add additional context in comments.

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.