From e3f111a095cdbf6871e924e43f9758dc331b0413 Mon Sep 17 00:00:00 2001 From: DN6 Date: Mon, 15 Sep 2025 23:16:43 +0530 Subject: [PATCH 01/19] update --- .../en/modular_diffusers/custom_blocks.md | 310 ++++++++++++++++++ 1 file changed, 310 insertions(+) create mode 100644 docs/source/en/modular_diffusers/custom_blocks.md diff --git a/docs/source/en/modular_diffusers/custom_blocks.md b/docs/source/en/modular_diffusers/custom_blocks.md new file mode 100644 index 000000000000..8c7682ba6bc0 --- /dev/null +++ b/docs/source/en/modular_diffusers/custom_blocks.md @@ -0,0 +1,310 @@ + + + +# Building Custom Blocks + +Modular Diffusers allows you to create custom blocks that can be used in a pipeline. This guide will show you how to create a custom block, define its inputs and outputs, and implement the computation logic. + +Let's create a custom block that uses the Florence2 model to process an input image and generate a mask for inpainting + +First let's define a custom block in a file called `block.py`: + +```py +from typing import List, Union +from PIL import Image, ImageDraw +import torch +import numpy as np + +from diffusers.modular_pipelines import ( + PipelineState, + ModularPipelineBlocks, + InputParam, + ComponentSpec, + OutputParam, +) +from transformers import AutoProcessor, AutoModelForCausalLM + + +class Florence2ImageAnnotatorBlock(ModularPipelineBlocks): + @property + def expected_components(self): + return [ + ComponentSpec( + name="image_annotator", + type_hint=AutoModelForCausalLM, + repo="mrhendrey/Florence-2-large-ft-safetensors", + ), + ComponentSpec( + name="image_annotator_processor", + type_hint=AutoProcessor, + repo="mrhendrey/Florence-2-large-ft-safetensors", + ), + ] + + @property + def inputs(self) -> List[InputParam]: + return [ + InputParam( + "image", + type_hint=Union[Image.Image, List[Image.Image]], + required=True, + description="Image(s) to annotate", + ), + InputParam( + "annotation_task", + type_hint=Union[str, List[str]], + required=True, + default="", + description="""Annotation Task to perform on the image. + Supported Tasks: + + + + + + + + + + + """, + ), + InputParam( + "annotation_prompt", + type_hint=Union[str, List[str]], + required=True, + description="""Annotation Prompt to provide more context to the task. + Can be used to detect or segment out specific elements in the image + """, + ), + InputParam( + "annotation_output_type", + type_hint=str, + required=True, + default="mask_image", + description="""Output type from annotation predictions. Availabe options are + annotation: + - raw annotation predictions from the model based on task type. + mask_image: + -black and white mask image for the given image based on the task type + mask_overlay: + - white mask overlayed on the original image + bounding_box: + - bounding boxes drawn on the original image + """, + ), + InputParam( + "annotation_overlay", + type_hint=bool, + required=True, + default=False, + description="", + ), + ] + + @property + def intermediate_outputs(self) -> List[OutputParam]: + return [ + OutputParam( + "mask_image", + type_hint=Image, + description="Inpainting Mask for input Image(s)", + ), + OutputParam( + "annotations", + type_hint=dict, + description="Annotations Predictions for input Image(s)", + ), + OutputParam( + "image", + type_hint=Image, + description="Annotated input Image(s)", + ), + ] + + def get_annotations(self, components, images, prompts, task): + task_prompts = [task + prompt for prompt in prompts] + + inputs = components.image_annotator_processor( + text=task_prompts, images=images, return_tensors="pt" + ).to(components.image_annotator.device, components.image_annotator.dtype) + + generated_ids = components.image_annotator.generate( + input_ids=inputs["input_ids"], + pixel_values=inputs["pixel_values"], + max_new_tokens=1024, + early_stopping=False, + do_sample=False, + num_beams=3, + ) + annotations = components.image_annotator_processor.batch_decode( + generated_ids, skip_special_tokens=False + ) + outputs = [] + for image, annotation in zip(images, annotations): + outputs.append( + components.image_annotator_processor.post_process_generation( + annotation, task=task, image_size=(image.width, image.height) + ) + ) + return outputs + + def prepare_mask(self, images, annotations, overlay=False): + masks = [] + for image, annotation in zip(images, annotations): + mask_image = image.copy() if overlay else Image.new("L", image.size, 0) + draw = ImageDraw.Draw(mask_image) + + for _, _annotation in annotation.items(): + if "polygons" in _annotation: + for polygon in _annotation["polygons"]: + polygon = np.array(polygon).reshape(-1, 2) + if len(polygon) < 3: + continue + polygon = polygon.reshape(-1).tolist() + draw.polygon(polygon, fill="white") + + elif "bbox" in _annotation: + bbox = _annotation["bbox"] + draw.rectangle(bbox, fill="white") + + masks.append(mask_image) + + return masks + + def prepare_bounding_boxes(self, images, annotations): + outputs = [] + for image, annotation in zip(images, annotations): + image_copy = image.copy() + draw = ImageDraw.Draw(image_copy) + for _, _annotation in annotation.items(): + bbox = _annotation["bbox"] + label = _annotation["label"] + + draw.rectangle(bbox, outline="red", width=3) + draw.text((bbox[0], bbox[1] - 20), label, fill="red") + + outputs.append(image_copy) + + return outputs + + def prepare_inputs(self, images, prompts): + prompts = prompts or "" + + if isinstance(images, Image.Image): + images = [images] + if isinstance(prompts, str): + prompts = [prompts] + + if len(images) != len(prompts): + raise ValueError("Number of images and annotation prompts must match.") + + return images, prompts + + @torch.no_grad() + def __call__(self, components, state: PipelineState) -> PipelineState: + block_state = self.get_block_state(state) + images, annotation_task_prompt = self.prepare_inputs( + block_state.image, block_state.annotation_prompt + ) + task = block_state.annotation_task + + annotations = self.get_annotations( + components, images, annotation_task_prompt, task + ) + block_state.annotations = annotations + if block_state.annotation_output_type == "mask_image": + block_state.mask_image = self.prepare_mask(images, annotations) + else: + block_state.mask_image = None + + if block_state.annotation_output_type == "mask_overlay": + block_state.image = self.prepare_mask(images, annotations, overlay=True) + + elif block_state.annotation_output_type == "bounding_box": + block_state.image = self.prepare_bounding_boxes(images, annotations) + + self.set_block_state(state, block_state) + + return components, state +``` + +Once we have defined our custom block, we can save it as a model repo so that we can easily reuse it. + +There are two ways to save the block: + +1. From the CLI + +```shell +# In the folder with the `block.py` file, run: +diffusers-cli custom_block +``` + +Then upload the block to the Hub: + +```shell +hf upload . . +``` + +2. From Python + +```py +from block import Florence2ImageAnnotatorBlock +block = Florence2ImageAnnotatorBlock() +block.push_to_hub("") +``` + +## Using the Custom Block + +Let's use this custom block in an inpainting workflow. + +```py +import torch +from diffusers.modular_pipelines import ModularPipelineBlocks, SequentialPipelineBlocks +from diffusers.modular_pipelines.stable_diffusion_xl import INPAINT_BLOCKS +from diffusers.utils import load_image + +# Fetch the Florence2 image annotator block that will create our mask +image_annotator_block = ModularPipelineBlocks.from_pretrained("diffusers/florence2-image-annotator", trust_remote_code=True) + +my_blocks = INPAINT_BLOCKS.copy() +# insert the annotation block before the image encoding step +my_blocks.insert("image_annotator", image_annotator_block, 1) + +# Create our initial set of inpainting blocks +blocks = SequentialPipelineBlocks.from_blocks_dict(my_blocks) + +repo_id = "diffusers-internal-dev/modular-sdxl-inpainting" +pipe = blocks.init_pipeline(repo_id) +pipe.load_components(torch_dtype=torch.float16, device_map="cuda", trust_remote_code=True) + +image = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/car.jpg?download=true") +image = image.resize((1024, 1024)) + +prompt = ["A red car"] +annotation_task = "" +annotation_prompt = ["the car"] + +output = pipe( + prompt=prompt, + image=image, + annotation_task=annotation_task, + annotation_prompt=annotation_prompt, + annotation_output_type="mask_image", + num_inference_steps=35, + guidance_scale=7.5, + strength=0.95, + output="images" +) +output[0].save("florence-inpainting.png") +``` From c67dda4bddb6136d3cba79f91eedc91730969702 Mon Sep 17 00:00:00 2001 From: DN6 Date: Tue, 16 Sep 2025 18:11:20 +0530 Subject: [PATCH 02/19] update --- docs/source/en/_toctree.yml | 2 + .../en/modular_diffusers/custom_blocks.md | 39 +++++++++++-------- 2 files changed, 25 insertions(+), 16 deletions(-) diff --git a/docs/source/en/_toctree.yml b/docs/source/en/_toctree.yml index 14dbfe3ea1d3..188b21f7f168 100644 --- a/docs/source/en/_toctree.yml +++ b/docs/source/en/_toctree.yml @@ -126,6 +126,8 @@ title: ComponentsManager - local: modular_diffusers/guiders title: Guiders + - local: modular_diffusers/custom_blocks + title: Building Custom Blocks - title: Training isExpanded: false diff --git a/docs/source/en/modular_diffusers/custom_blocks.md b/docs/source/en/modular_diffusers/custom_blocks.md index 8c7682ba6bc0..0b8815886bb9 100644 --- a/docs/source/en/modular_diffusers/custom_blocks.md +++ b/docs/source/en/modular_diffusers/custom_blocks.md @@ -13,11 +13,19 @@ specific language governing permissions and limitations under the License. # Building Custom Blocks -Modular Diffusers allows you to create custom blocks that can be used in a pipeline. This guide will show you how to create a custom block, define its inputs and outputs, and implement the computation logic. +Modular Diffusers allows you to create custom blocks that can be plugged into Modular Pipelines. This guide will show you how to create and use a custom block. -Let's create a custom block that uses the Florence2 model to process an input image and generate a mask for inpainting +First let's take a look at the structure of our custom block project: -First let's define a custom block in a file called `block.py`: +```shell +. +├── block.py +└── modular_config.json +``` + +The code to define the custom block lives in a file called `block.py`. The `modular_config.json` file contains metadata for loading the block with Modular Diffusers. + +In this example, we will create a custom block that uses the Florence 2 model to process an input image and generate a mask for inpainting ```py from typing import List, Union @@ -32,7 +40,7 @@ from diffusers.modular_pipelines import ( ComponentSpec, OutputParam, ) -from transformers import AutoProcessor, AutoModelForCausalLM +from transformers import AutoProcessor, Florence2ForConditionalGeneration class Florence2ImageAnnotatorBlock(ModularPipelineBlocks): @@ -41,13 +49,13 @@ class Florence2ImageAnnotatorBlock(ModularPipelineBlocks): return [ ComponentSpec( name="image_annotator", - type_hint=AutoModelForCausalLM, - repo="mrhendrey/Florence-2-large-ft-safetensors", + type_hint=Florence2ForConditionalGeneration, + repo="florence-community/Florence-2-base-ft", ), ComponentSpec( name="image_annotator_processor", type_hint=AutoProcessor, - repo="mrhendrey/Florence-2-large-ft-safetensors", + repo="florence-community/Florence-2-base-ft", ), ] @@ -93,12 +101,10 @@ class Florence2ImageAnnotatorBlock(ModularPipelineBlocks): required=True, default="mask_image", description="""Output type from annotation predictions. Availabe options are - annotation: - - raw annotation predictions from the model based on task type. mask_image: -black and white mask image for the given image based on the task type mask_overlay: - - white mask overlayed on the original image + - mask overlayed on the original image bounding_box: - bounding boxes drawn on the original image """, @@ -159,7 +165,7 @@ class Florence2ImageAnnotatorBlock(ModularPipelineBlocks): ) return outputs - def prepare_mask(self, images, annotations, overlay=False): + def prepare_mask(self, images, annotations, overlay=False, fill="white"): masks = [] for image, annotation in zip(images, annotations): mask_image = image.copy() if overlay else Image.new("L", image.size, 0) @@ -172,7 +178,7 @@ class Florence2ImageAnnotatorBlock(ModularPipelineBlocks): if len(polygon) < 3: continue polygon = polygon.reshape(-1).tolist() - draw.polygon(polygon, fill="white") + draw.polygon(polygon, fill=fill) elif "bbox" in _annotation: bbox = _annotation["bbox"] @@ -218,6 +224,7 @@ class Florence2ImageAnnotatorBlock(ModularPipelineBlocks): block_state.image, block_state.annotation_prompt ) task = block_state.annotation_task + fill = block_state.fill annotations = self.get_annotations( components, images, annotation_task_prompt, task @@ -229,7 +236,7 @@ class Florence2ImageAnnotatorBlock(ModularPipelineBlocks): block_state.mask_image = None if block_state.annotation_output_type == "mask_overlay": - block_state.image = self.prepare_mask(images, annotations, overlay=True) + block_state.image = self.prepare_mask(images, annotations, overlay=True, fill=fill) elif block_state.annotation_output_type == "bounding_box": block_state.image = self.prepare_bounding_boxes(images, annotations) @@ -239,7 +246,7 @@ class Florence2ImageAnnotatorBlock(ModularPipelineBlocks): return components, state ``` -Once we have defined our custom block, we can save it as a model repo so that we can easily reuse it. +Now that we have defined our custom block, we can save it as a model repository on the Huggingface Hub so that it is easy to share and reuse. There are two ways to save the block: @@ -275,7 +282,7 @@ from diffusers.modular_pipelines.stable_diffusion_xl import INPAINT_BLOCKS from diffusers.utils import load_image # Fetch the Florence2 image annotator block that will create our mask -image_annotator_block = ModularPipelineBlocks.from_pretrained("diffusers/florence2-image-annotator", trust_remote_code=True) +image_annotator_block = ModularPipelineBlocks.from_pretrained("diffusers/florence-2-custom-block", trust_remote_code=True) my_blocks = INPAINT_BLOCKS.copy() # insert the annotation block before the image encoding step @@ -284,7 +291,7 @@ my_blocks.insert("image_annotator", image_annotator_block, 1) # Create our initial set of inpainting blocks blocks = SequentialPipelineBlocks.from_blocks_dict(my_blocks) -repo_id = "diffusers-internal-dev/modular-sdxl-inpainting" +repo_id = "diffusers/modular-stable-diffusion-xl-base-1.0" pipe = blocks.init_pipeline(repo_id) pipe.load_components(torch_dtype=torch.float16, device_map="cuda", trust_remote_code=True) From ed3f88528a9acdaedf2e334e3a24238a48ff922a Mon Sep 17 00:00:00 2001 From: Dhruv Nair Date: Wed, 17 Sep 2025 08:41:02 +0530 Subject: [PATCH 03/19] Update docs/source/en/modular_diffusers/custom_blocks.md Co-authored-by: Steven Liu <59462357+stevhliu@users.noreply.github.com> --- docs/source/en/modular_diffusers/custom_blocks.md | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/source/en/modular_diffusers/custom_blocks.md b/docs/source/en/modular_diffusers/custom_blocks.md index 0b8815886bb9..6f0adaf7c9c1 100644 --- a/docs/source/en/modular_diffusers/custom_blocks.md +++ b/docs/source/en/modular_diffusers/custom_blocks.md @@ -271,7 +271,6 @@ block = Florence2ImageAnnotatorBlock() block.push_to_hub("") ``` -## Using the Custom Block Let's use this custom block in an inpainting workflow. From 830603e323e55b8b463d0f4ab9125cd226687c10 Mon Sep 17 00:00:00 2001 From: Dhruv Nair Date: Wed, 17 Sep 2025 08:41:10 +0530 Subject: [PATCH 04/19] Update docs/source/en/modular_diffusers/custom_blocks.md Co-authored-by: Steven Liu <59462357+stevhliu@users.noreply.github.com> --- docs/source/en/modular_diffusers/custom_blocks.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/en/modular_diffusers/custom_blocks.md b/docs/source/en/modular_diffusers/custom_blocks.md index 6f0adaf7c9c1..ce9cb94e5ec2 100644 --- a/docs/source/en/modular_diffusers/custom_blocks.md +++ b/docs/source/en/modular_diffusers/custom_blocks.md @@ -272,7 +272,7 @@ block.push_to_hub("") ``` -Let's use this custom block in an inpainting workflow. +Load the custom block with [`~ModularPipelineBlocks.from_pretrained`] and set `trust_remote_code=True`. ```py import torch From 180c9eaed1ee1f2d1276fd655b49bae7139da036 Mon Sep 17 00:00:00 2001 From: Dhruv Nair Date: Wed, 17 Sep 2025 08:41:19 +0530 Subject: [PATCH 05/19] Update docs/source/en/_toctree.yml Co-authored-by: Steven Liu <59462357+stevhliu@users.noreply.github.com> --- docs/source/en/_toctree.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/en/_toctree.yml b/docs/source/en/_toctree.yml index 188b21f7f168..e06a59cd97e2 100644 --- a/docs/source/en/_toctree.yml +++ b/docs/source/en/_toctree.yml @@ -127,7 +127,7 @@ - local: modular_diffusers/guiders title: Guiders - local: modular_diffusers/custom_blocks - title: Building Custom Blocks + title: Building custom blocks - title: Training isExpanded: false From e78aa54b82013635a3b25a7de9e3bf2263fe2c0b Mon Sep 17 00:00:00 2001 From: Dhruv Nair Date: Wed, 17 Sep 2025 08:41:51 +0530 Subject: [PATCH 06/19] Update docs/source/en/modular_diffusers/custom_blocks.md Co-authored-by: Steven Liu <59462357+stevhliu@users.noreply.github.com> --- docs/source/en/modular_diffusers/custom_blocks.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/source/en/modular_diffusers/custom_blocks.md b/docs/source/en/modular_diffusers/custom_blocks.md index ce9cb94e5ec2..935d3e03656d 100644 --- a/docs/source/en/modular_diffusers/custom_blocks.md +++ b/docs/source/en/modular_diffusers/custom_blocks.md @@ -13,7 +13,9 @@ specific language governing permissions and limitations under the License. # Building Custom Blocks -Modular Diffusers allows you to create custom blocks that can be plugged into Modular Pipelines. This guide will show you how to create and use a custom block. +[ModularPipelineBlocks](./pipeline_block) are the fundamental building blocks for a [`ModularPipeline`]. As long as they contain the appropriate inputs, outputs, and computation logic, you can customize these blocks to create custom blocks. + +This guide will show you how to create and use a custom block. First let's take a look at the structure of our custom block project: From ddaf986eb41579d186592c53b0853f3e9cf25d0a Mon Sep 17 00:00:00 2001 From: Dhruv Nair Date: Wed, 8 Oct 2025 13:49:38 +0530 Subject: [PATCH 07/19] Apply suggestion from @stevhliu Co-authored-by: Steven Liu <59462357+stevhliu@users.noreply.github.com> --- docs/source/en/modular_diffusers/custom_blocks.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/en/modular_diffusers/custom_blocks.md b/docs/source/en/modular_diffusers/custom_blocks.md index 935d3e03656d..511a59bfd4e4 100644 --- a/docs/source/en/modular_diffusers/custom_blocks.md +++ b/docs/source/en/modular_diffusers/custom_blocks.md @@ -11,7 +11,7 @@ specific language governing permissions and limitations under the License. --> -# Building Custom Blocks +# Building custom blocks [ModularPipelineBlocks](./pipeline_block) are the fundamental building blocks for a [`ModularPipeline`]. As long as they contain the appropriate inputs, outputs, and computation logic, you can customize these blocks to create custom blocks. From df67d521ee4adbcf1a523c136263f6e162be79f4 Mon Sep 17 00:00:00 2001 From: Dhruv Nair Date: Wed, 8 Oct 2025 13:51:07 +0530 Subject: [PATCH 08/19] Apply suggestion from @stevhliu Co-authored-by: Steven Liu <59462357+stevhliu@users.noreply.github.com> --- docs/source/en/modular_diffusers/custom_blocks.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/docs/source/en/modular_diffusers/custom_blocks.md b/docs/source/en/modular_diffusers/custom_blocks.md index 511a59bfd4e4..1f01e86e19d0 100644 --- a/docs/source/en/modular_diffusers/custom_blocks.md +++ b/docs/source/en/modular_diffusers/custom_blocks.md @@ -248,9 +248,7 @@ class Florence2ImageAnnotatorBlock(ModularPipelineBlocks): return components, state ``` -Now that we have defined our custom block, we can save it as a model repository on the Huggingface Hub so that it is easy to share and reuse. - -There are two ways to save the block: +Save the custom block to the Hub, from either the CLI or with the [`push_to_hub`] method, so you can easily share and reuse it. 1. From the CLI From c194bf11a04204fe1fb5fd4ea2b5cd24bbd67dd3 Mon Sep 17 00:00:00 2001 From: DN6 Date: Wed, 8 Oct 2025 13:48:49 +0530 Subject: [PATCH 09/19] update --- .../source/en/modular_diffusers/custom_blocks.md | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/docs/source/en/modular_diffusers/custom_blocks.md b/docs/source/en/modular_diffusers/custom_blocks.md index 1f01e86e19d0..fb41c1d53447 100644 --- a/docs/source/en/modular_diffusers/custom_blocks.md +++ b/docs/source/en/modular_diffusers/custom_blocks.md @@ -314,3 +314,19 @@ output = pipe( ) output[0].save("florence-inpainting.png") ``` + +## Editing Custom Blocks + +By default, custom blocks are saved in your cache directory. To download and edit a custom block you can use the `local_dir` argument to save the block to a specific folder. + +```py +import torch +from diffusers.modular_pipelines import ModularPipelineBlocks, SequentialPipelineBlocks +from diffusers.modular_pipelines.stable_diffusion_xl import INPAINT_BLOCKS +from diffusers.utils import load_image + +# Fetch the Florence2 image annotator block that will create our mask +image_annotator_block = ModularPipelineBlocks.from_pretrained("diffusers/florence-2-custom-block", trust_remote_code=True, local_dir="/my-local-folder") +``` + +Any changes made to the block files to the blocks in this file will be reflected when you load the block again. From a4815ab1c8fdb6025d2bb706e80b76f472a635cc Mon Sep 17 00:00:00 2001 From: DN6 Date: Wed, 8 Oct 2025 13:53:54 +0530 Subject: [PATCH 10/19] update --- docs/source/en/modular_diffusers/custom_blocks.md | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/docs/source/en/modular_diffusers/custom_blocks.md b/docs/source/en/modular_diffusers/custom_blocks.md index fb41c1d53447..f94b9a1df1df 100644 --- a/docs/source/en/modular_diffusers/custom_blocks.md +++ b/docs/source/en/modular_diffusers/custom_blocks.md @@ -250,7 +250,8 @@ class Florence2ImageAnnotatorBlock(ModularPipelineBlocks): Save the custom block to the Hub, from either the CLI or with the [`push_to_hub`] method, so you can easily share and reuse it. -1. From the CLI + + ```shell # In the folder with the `block.py` file, run: @@ -262,8 +263,8 @@ Then upload the block to the Hub: ```shell hf upload . . ``` - -2. From Python + + ```py from block import Florence2ImageAnnotatorBlock @@ -271,6 +272,8 @@ block = Florence2ImageAnnotatorBlock() block.push_to_hub("") ``` + + Load the custom block with [`~ModularPipelineBlocks.from_pretrained`] and set `trust_remote_code=True`. From c918079f8b65ce9a19de6f9333aecd3a4e36818f Mon Sep 17 00:00:00 2001 From: DN6 Date: Wed, 8 Oct 2025 14:21:37 +0530 Subject: [PATCH 11/19] update --- docs/source/en/modular_diffusers/custom_blocks.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/en/modular_diffusers/custom_blocks.md b/docs/source/en/modular_diffusers/custom_blocks.md index f94b9a1df1df..2d57f1f58eb1 100644 --- a/docs/source/en/modular_diffusers/custom_blocks.md +++ b/docs/source/en/modular_diffusers/custom_blocks.md @@ -284,7 +284,7 @@ from diffusers.modular_pipelines.stable_diffusion_xl import INPAINT_BLOCKS from diffusers.utils import load_image # Fetch the Florence2 image annotator block that will create our mask -image_annotator_block = ModularPipelineBlocks.from_pretrained("diffusers/florence-2-custom-block", trust_remote_code=True) +image_annotator_block = ModularPipelineBlocks.from_pretrained("diffusers/florence-2-custom-block", }}}trust_remote_code=True) my_blocks = INPAINT_BLOCKS.copy() # insert the annotation block before the image encoding step From 28d3856a0e88fee658c0039157c78915ee2bda69 Mon Sep 17 00:00:00 2001 From: Dhruv Nair Date: Wed, 8 Oct 2025 14:26:40 +0530 Subject: [PATCH 12/19] Apply suggestion from @stevhliu Co-authored-by: Steven Liu <59462357+stevhliu@users.noreply.github.com> --- docs/source/en/modular_diffusers/custom_blocks.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/en/modular_diffusers/custom_blocks.md b/docs/source/en/modular_diffusers/custom_blocks.md index 2d57f1f58eb1..39a6359fb028 100644 --- a/docs/source/en/modular_diffusers/custom_blocks.md +++ b/docs/source/en/modular_diffusers/custom_blocks.md @@ -17,7 +17,7 @@ specific language governing permissions and limitations under the License. This guide will show you how to create and use a custom block. -First let's take a look at the structure of our custom block project: +The project should be structured as shown below. The custom block code is contained in `block.py` and the `modular_config.json` file contains metadata for loading the block. ```shell . From 331a7a13562a36cc0d7e25127de9216ce0d8be44 Mon Sep 17 00:00:00 2001 From: Dhruv Nair Date: Wed, 8 Oct 2025 14:26:46 +0530 Subject: [PATCH 13/19] Apply suggestion from @stevhliu Co-authored-by: Steven Liu <59462357+stevhliu@users.noreply.github.com> --- docs/source/en/modular_diffusers/custom_blocks.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/docs/source/en/modular_diffusers/custom_blocks.md b/docs/source/en/modular_diffusers/custom_blocks.md index 39a6359fb028..641286562221 100644 --- a/docs/source/en/modular_diffusers/custom_blocks.md +++ b/docs/source/en/modular_diffusers/custom_blocks.md @@ -25,9 +25,7 @@ The project should be structured as shown below. The custom block code is contai └── modular_config.json ``` -The code to define the custom block lives in a file called `block.py`. The `modular_config.json` file contains metadata for loading the block with Modular Diffusers. - -In this example, we will create a custom block that uses the Florence 2 model to process an input image and generate a mask for inpainting +This example creates a custom block that uses the [Florence 2](https://huggingface.co/docs/transformers/model_doc/florence2) model to process an input image and generate a mask for inpainting. ```py from typing import List, Union From a65e0a60dff5009ab58160cfd200954757701c1f Mon Sep 17 00:00:00 2001 From: Dhruv Nair Date: Fri, 7 Nov 2025 07:34:14 +0100 Subject: [PATCH 14/19] update --- .../en/modular_diffusers/custom_blocks.md | 183 +++++++++++++++++- 1 file changed, 174 insertions(+), 9 deletions(-) diff --git a/docs/source/en/modular_diffusers/custom_blocks.md b/docs/source/en/modular_diffusers/custom_blocks.md index 641286562221..f77180502176 100644 --- a/docs/source/en/modular_diffusers/custom_blocks.md +++ b/docs/source/en/modular_diffusers/custom_blocks.md @@ -13,19 +13,182 @@ specific language governing permissions and limitations under the License. # Building custom blocks -[ModularPipelineBlocks](./pipeline_block) are the fundamental building blocks for a [`ModularPipeline`]. As long as they contain the appropriate inputs, outputs, and computation logic, you can customize these blocks to create custom blocks. +[ModularPipelineBlocks](./pipeline_block) are the fundamental building blocks of a [`ModularPipeline`]. You can create custom blocks by defining their inputs, outputs, and computation logic. -This guide will show you how to create and use a custom block. +This guide demonstrates how to create and use a custom block. -The project should be structured as shown below. The custom block code is contained in `block.py` and the `modular_config.json` file contains metadata for loading the block. +## Project Structure +Your custom block project should follow this structure: ```shell . ├── block.py └── modular_config.json ``` -This example creates a custom block that uses the [Florence 2](https://huggingface.co/docs/transformers/model_doc/florence2) model to process an input image and generate a mask for inpainting. +- `block.py` contains the custom block implementation +- `modular_config.json` contains the metadata needed to load the block + +## Example: Florence 2 Inpainting Block + +In This example we will create a custom block that uses the [Florence 2](https://huggingface.co/docs/transformers/model_doc/florence2) model to process an input image and generate a mask for inpainting. + + +The first step is to define the components that the block will use. In this case, we will use the `Florence2ForConditionalGeneration` model and its corresponding processor `AutoProcessor`. When defining components, we specify the name of the component within our pipeline, model class, and provided a `pretrained_model_name_or_path` for the component if we intend to load the model weights from a specific repository on the Hub. + +```py +from typing import List, Union +from PIL import Image, ImageDraw +import torch +import numpy as np + +from diffusers.modular_pipelines import ( + PipelineState, + ModularPipelineBlocks, + InputParam, + ComponentSpec, + OutputParam, +) +from transformers import AutoProcessor, Florence2ForConditionalGeneration + + +class Florence2ImageAnnotatorBlock(ModularPipelineBlocks): + + # Define the expected components (models and processors) for this block + @property + def expected_components(self): + return [ + ComponentSpec( + name="image_annotator", + type_hint=Florence2ForConditionalGeneration, + pretrained_model_name_or_path="florence-community/Florence-2-base-ft", + ), + ComponentSpec( + name="image_annotator_processor", + type_hint=AutoProcessor, + pretrained_model_name_or_path="florence-community/Florence-2-base-ft", + ), + ] +``` + +Next, we define the inputs and outputs of the block. The inputs include the image to be annotated, the annotation task, and the annotation prompt. The outputs include the generated mask image and annotations. + +```py +from typing import List, Union +from PIL import Image, ImageDraw +import torch +import numpy as np + +from diffusers.modular_pipelines import ( + PipelineState, + ModularPipelineBlocks, + InputParam, + ComponentSpec, + OutputParam, +) +from transformers import AutoProcessor, Florence2ForConditionalGeneration + + +class Florence2ImageAnnotatorBlock(ModularPipelineBlocks): + + # Define the expected components (models and processors) for this block + @property + def expected_components(self): + return [ + ComponentSpec( + name="image_annotator", + type_hint=Florence2ForConditionalGeneration, + pretrained_model_name_or_path="florence-community/Florence-2-base-ft", + ), + ComponentSpec( + name="image_annotator_processor", + type_hint=AutoProcessor, + pretrained_model_name_or_path="florence-community/Florence-2-base-ft", + ), + ] + + @property + def inputs(self) -> List[InputParam]: + return [ + InputParam( + "image", + type_hint=Union[Image.Image, List[Image.Image]], + required=True, + description="Image(s) to annotate", + ), + InputParam( + "annotation_task", + type_hint=Union[str, List[str]], + required=True, + default="", + description="""Annotation Task to perform on the image. + Supported Tasks: + + + + + + + + + + + """, + ), + InputParam( + "annotation_prompt", + type_hint=Union[str, List[str]], + required=True, + description="""Annotation Prompt to provide more context to the task. + Can be used to detect or segment out specific elements in the image + """, + ), + InputParam( + "annotation_output_type", + type_hint=str, + required=True, + default="mask_image", + description="""Output type from annotation predictions. Availabe options are + mask_image: + -black and white mask image for the given image based on the task type + mask_overlay: + - mask overlayed on the original image + bounding_box: + - bounding boxes drawn on the original image + """, + ), + InputParam( + "annotation_overlay", + type_hint=bool, + required=True, + default=False, + description="", + ), + ] + + @property + def intermediate_outputs(self) -> List[OutputParam]: + return [ + OutputParam( + "mask_image", + type_hint=Image, + description="Inpainting Mask for input Image(s)", + ), + OutputParam( + "annotations", + type_hint=dict, + description="Annotations Predictions for input Image(s)", + ), + OutputParam( + "image", + type_hint=Image, + description="Annotated input Image(s)", + ), + ] + +``` + +Now we implement the `__call__` method, which contains the logic for processing the input image and generating the mask. ```py from typing import List, Union @@ -44,18 +207,19 @@ from transformers import AutoProcessor, Florence2ForConditionalGeneration class Florence2ImageAnnotatorBlock(ModularPipelineBlocks): + @property def expected_components(self): return [ ComponentSpec( name="image_annotator", type_hint=Florence2ForConditionalGeneration, - repo="florence-community/Florence-2-base-ft", + pretrained_model_name_or_path="florence-community/Florence-2-base-ft", ), ComponentSpec( name="image_annotator_processor", type_hint=AutoProcessor, - repo="florence-community/Florence-2-base-ft", + pretrained_model_name_or_path="florence-community/Florence-2-base-ft", ), ] @@ -244,9 +408,10 @@ class Florence2ImageAnnotatorBlock(ModularPipelineBlocks): self.set_block_state(state, block_state) return components, state + ``` -Save the custom block to the Hub, from either the CLI or with the [`push_to_hub`] method, so you can easily share and reuse it. +Once we have defined our custom block we can save it to the Hub, using either the CLI or the [`push_to_hub`] method, so it is easy to share and reuse. @@ -318,7 +483,7 @@ output[0].save("florence-inpainting.png") ## Editing Custom Blocks -By default, custom blocks are saved in your cache directory. To download and edit a custom block you can use the `local_dir` argument to save the block to a specific folder. +By default, custom blocks are saved in your cache directory. Use the `local_dir` argument to download and edit a custom block in a specific folder. ```py import torch @@ -330,4 +495,4 @@ from diffusers.utils import load_image image_annotator_block = ModularPipelineBlocks.from_pretrained("diffusers/florence-2-custom-block", trust_remote_code=True, local_dir="/my-local-folder") ``` -Any changes made to the block files to the blocks in this file will be reflected when you load the block again. +Any changes made to the block files in this folder will be reflected when you load the block again. From 218cb72a5d4da0979f5075933d3c515de19cfb9c Mon Sep 17 00:00:00 2001 From: Dhruv Nair Date: Fri, 7 Nov 2025 08:42:18 +0100 Subject: [PATCH 15/19] update --- docs/source/en/_toctree.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/source/en/_toctree.yml b/docs/source/en/_toctree.yml index 75a5bea78014..f6de895ca87e 100644 --- a/docs/source/en/_toctree.yml +++ b/docs/source/en/_toctree.yml @@ -121,8 +121,7 @@ title: Guiders - local: modular_diffusers/custom_blocks title: Building custom blocks - -- title: Training + title: Modular Diffusers isExpanded: false sections: - local: training/overview From 9b7065d1bbc634b77b7452e33b9493496af540ff Mon Sep 17 00:00:00 2001 From: Dhruv Nair Date: Fri, 7 Nov 2025 08:49:41 +0100 Subject: [PATCH 16/19] update --- docs/source/en/_toctree.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/en/_toctree.yml b/docs/source/en/_toctree.yml index f6de895ca87e..7f461c3a089e 100644 --- a/docs/source/en/_toctree.yml +++ b/docs/source/en/_toctree.yml @@ -122,7 +122,7 @@ - local: modular_diffusers/custom_blocks title: Building custom blocks title: Modular Diffusers - isExpanded: false +- isExpanded: false sections: - local: training/overview title: Overview From 2b3558adab15ec360ad0a1e9a9ce34df7067bc24 Mon Sep 17 00:00:00 2001 From: Dhruv Nair Date: Fri, 7 Nov 2025 15:58:13 +0100 Subject: [PATCH 17/19] update --- docs/source/en/_toctree.yml | 2 +- .../en/modular_diffusers/custom_blocks.md | 31 +++++++------------ 2 files changed, 12 insertions(+), 21 deletions(-) diff --git a/docs/source/en/_toctree.yml b/docs/source/en/_toctree.yml index 7f461c3a089e..ce52baeaadd7 100644 --- a/docs/source/en/_toctree.yml +++ b/docs/source/en/_toctree.yml @@ -120,7 +120,7 @@ - local: modular_diffusers/guiders title: Guiders - local: modular_diffusers/custom_blocks - title: Building custom blocks + title: Building Custom Blocks title: Modular Diffusers - isExpanded: false sections: diff --git a/docs/source/en/modular_diffusers/custom_blocks.md b/docs/source/en/modular_diffusers/custom_blocks.md index f77180502176..e0e6825273df 100644 --- a/docs/source/en/modular_diffusers/custom_blocks.md +++ b/docs/source/en/modular_diffusers/custom_blocks.md @@ -11,15 +11,14 @@ specific language governing permissions and limitations under the License. --> -# Building custom blocks +# Building Custom Blocks -[ModularPipelineBlocks](./pipeline_block) are the fundamental building blocks of a [`ModularPipeline`]. You can create custom blocks by defining their inputs, outputs, and computation logic. - -This guide demonstrates how to create and use a custom block. +[ModularPipelineBlocks](./pipeline_block) are the fundamental building blocks of a [`ModularPipeline`]. You can create custom blocks by defining their inputs, outputs, and computation logic. This guide demonstrates how to create and use a custom block. ## Project Structure -Your custom block project should follow this structure: +Your custom block project should use the following structure: + ```shell . ├── block.py @@ -31,30 +30,21 @@ Your custom block project should follow this structure: ## Example: Florence 2 Inpainting Block -In This example we will create a custom block that uses the [Florence 2](https://huggingface.co/docs/transformers/model_doc/florence2) model to process an input image and generate a mask for inpainting. - +In this example we will create a custom block that uses the [Florence 2](https://huggingface.co/docs/transformers/model_doc/florence2) model to process an input image and generate a mask for inpainting. -The first step is to define the components that the block will use. In this case, we will use the `Florence2ForConditionalGeneration` model and its corresponding processor `AutoProcessor`. When defining components, we specify the name of the component within our pipeline, model class, and provided a `pretrained_model_name_or_path` for the component if we intend to load the model weights from a specific repository on the Hub. +The first step is to define the components that the block will use. In this case, we will need to use the `Florence2ForConditionalGeneration` model and its corresponding processor `AutoProcessor`. When defining components, we must specify the name of the component within our pipeline, model class via `type_hint`, and provide a `pretrained_model_name_or_path` for the component if we intend to load the model weights from a specific repository on the Hub. ```py -from typing import List, Union -from PIL import Image, ImageDraw -import torch -import numpy as np - +# Inside block.py from diffusers.modular_pipelines import ( - PipelineState, ModularPipelineBlocks, - InputParam, ComponentSpec, - OutputParam, ) from transformers import AutoProcessor, Florence2ForConditionalGeneration class Florence2ImageAnnotatorBlock(ModularPipelineBlocks): - # Define the expected components (models and processors) for this block @property def expected_components(self): return [ @@ -91,7 +81,6 @@ from transformers import AutoProcessor, Florence2ForConditionalGeneration class Florence2ImageAnnotatorBlock(ModularPipelineBlocks): - # Define the expected components (models and processors) for this block @property def expected_components(self): return [ @@ -411,7 +400,7 @@ class Florence2ImageAnnotatorBlock(ModularPipelineBlocks): ``` -Once we have defined our custom block we can save it to the Hub, using either the CLI or the [`push_to_hub`] method, so it is easy to share and reuse. +Once we have defined our custom block, we can save it to the Hub, using either the CLI or the [`push_to_hub`] method. This will make it easy to share and reuse our custom block with other pipelines. @@ -438,6 +427,8 @@ block.push_to_hub("") +## Using Custom Blocks + Load the custom block with [`~ModularPipelineBlocks.from_pretrained`] and set `trust_remote_code=True`. ```py @@ -447,7 +438,7 @@ from diffusers.modular_pipelines.stable_diffusion_xl import INPAINT_BLOCKS from diffusers.utils import load_image # Fetch the Florence2 image annotator block that will create our mask -image_annotator_block = ModularPipelineBlocks.from_pretrained("diffusers/florence-2-custom-block", }}}trust_remote_code=True) +image_annotator_block = ModularPipelineBlocks.from_pretrained("diffusers/florence-2-custom-block", trust_remote_code=True) my_blocks = INPAINT_BLOCKS.copy() # insert the annotation block before the image encoding step From cfb83e54f33f4e07055dada026f3c58c876a44b0 Mon Sep 17 00:00:00 2001 From: Dhruv Nair Date: Fri, 7 Nov 2025 16:02:16 +0100 Subject: [PATCH 18/19] update --- docs/source/en/modular_diffusers/custom_blocks.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/source/en/modular_diffusers/custom_blocks.md b/docs/source/en/modular_diffusers/custom_blocks.md index e0e6825273df..ae91aa427eb8 100644 --- a/docs/source/en/modular_diffusers/custom_blocks.md +++ b/docs/source/en/modular_diffusers/custom_blocks.md @@ -15,6 +15,10 @@ specific language governing permissions and limitations under the License. [ModularPipelineBlocks](./pipeline_block) are the fundamental building blocks of a [`ModularPipeline`]. You can create custom blocks by defining their inputs, outputs, and computation logic. This guide demonstrates how to create and use a custom block. + +You can find examples of different types of custom blocks in the [Modular Diffusers Custom Blocks collection](https://huggingface.co/collections/diffusers/modular-diffusers-custom-blocks) + + ## Project Structure Your custom block project should use the following structure: From 7516680d8c9e4cd1d92a0c278383c98069aa3449 Mon Sep 17 00:00:00 2001 From: Dhruv Nair Date: Mon, 10 Nov 2025 04:05:18 +0100 Subject: [PATCH 19/19] update --- .github/workflows/push_tests.yml | 45 +++----------------------------- 1 file changed, 3 insertions(+), 42 deletions(-) diff --git a/.github/workflows/push_tests.yml b/.github/workflows/push_tests.yml index a10dfcaf4e4d..1685216d64bb 100644 --- a/.github/workflows/push_tests.yml +++ b/.github/workflows/push_tests.yml @@ -76,6 +76,7 @@ jobs: run: | uv pip install -e ".[quality]" uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git + uv pip uninstall transformers -y && pip uninstall huggingface_hub -y && python -m uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git - name: Environment run: | python utils/print_env.py @@ -127,7 +128,7 @@ jobs: uv pip install -e ".[quality]" uv pip install peft@git+https://github.com/huggingface/peft.git uv pip uninstall accelerate && uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git - + uv pip uninstall transformers -y && pip uninstall huggingface_hub -y && python -m uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git - name: Environment run: | python utils/print_env.py @@ -178,6 +179,7 @@ jobs: - name: Install dependencies run: | uv pip install -e ".[quality,training]" + uv pip uninstall transformers -y && pip uninstall huggingface_hub -y && python -m uv pip install --prerelease allow -U transformers@git+https://github.com/huggingface/transformers.git - name: Environment run: | python utils/print_env.py @@ -198,47 +200,6 @@ jobs: name: torch_compile_test_reports path: reports - run_xformers_tests: - name: PyTorch xformers CUDA tests - - runs-on: - group: aws-g4dn-2xlarge - - container: - image: diffusers/diffusers-pytorch-xformers-cuda - options: --gpus all --shm-size "16gb" --ipc host - - steps: - - name: Checkout diffusers - uses: actions/checkout@v3 - with: - fetch-depth: 2 - - - name: NVIDIA-SMI - run: | - nvidia-smi - - name: Install dependencies - run: | - uv pip install -e ".[quality,training]" - - name: Environment - run: | - python utils/print_env.py - - name: Run example tests on GPU - env: - HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }} - run: | - pytest -n 1 --max-worker-restart=0 --dist=loadfile -s -v -k "xformers" --make-reports=tests_torch_xformers_cuda tests/ - - name: Failure short reports - if: ${{ failure() }} - run: cat reports/tests_torch_xformers_cuda_failures_short.txt - - - name: Test suite reports artifacts - if: ${{ always() }} - uses: actions/upload-artifact@v4 - with: - name: torch_xformers_test_reports - path: reports - run_examples_tests: name: Examples PyTorch CUDA tests on Ubuntu