50
50
from diffusers .optimization import get_scheduler
51
51
from diffusers .utils import check_min_version , is_wandb_available
52
52
from diffusers .utils .import_utils import is_xformers_available
53
+ from diffusers .utils .torch_utils import is_compiled_module
53
54
54
55
55
56
if is_wandb_available ():
@@ -787,6 +788,12 @@ def main(args):
787
788
logger .info ("Initializing controlnet weights from unet" )
788
789
controlnet = ControlNetModel .from_unet (unet )
789
790
791
+ # Taken from [Sayak Paul's Diffusers PR #6511](https://github.com/huggingface/diffusers/pull/6511/files)
792
+ def unwrap_model (model ):
793
+ model = accelerator .unwrap_model (model )
794
+ model = model ._orig_mod if is_compiled_module (model ) else model
795
+ return model
796
+
790
797
# `accelerate` 0.16.0 will have better support for customized saving
791
798
if version .parse (accelerate .__version__ ) >= version .parse ("0.16.0" ):
792
799
# create custom saving & loading hooks so that `accelerator.save_state(...)` serializes in a nice format
@@ -846,9 +853,9 @@ def load_model_hook(models, input_dir):
846
853
" doing mixed precision training, copy of the weights should still be float32."
847
854
)
848
855
849
- if accelerator . unwrap_model (controlnet ).dtype != torch .float32 :
856
+ if unwrap_model (controlnet ).dtype != torch .float32 :
850
857
raise ValueError (
851
- f"Controlnet loaded as datatype { accelerator . unwrap_model (controlnet ).dtype } . { low_precision_error_string } "
858
+ f"Controlnet loaded as datatype { unwrap_model (controlnet ).dtype } . { low_precision_error_string } "
852
859
)
853
860
854
861
# Enable TF32 for faster training on Ampere GPUs,
@@ -1015,7 +1022,7 @@ def load_model_hook(models, input_dir):
1015
1022
noisy_latents = noise_scheduler .add_noise (latents , noise , timesteps )
1016
1023
1017
1024
# Get the text embedding for conditioning
1018
- encoder_hidden_states = text_encoder (batch ["input_ids" ])[0 ]
1025
+ encoder_hidden_states = text_encoder (batch ["input_ids" ], return_dict = False )[0 ]
1019
1026
1020
1027
controlnet_image = batch ["conditioning_pixel_values" ].to (dtype = weight_dtype )
1021
1028
@@ -1036,7 +1043,8 @@ def load_model_hook(models, input_dir):
1036
1043
sample .to (dtype = weight_dtype ) for sample in down_block_res_samples
1037
1044
],
1038
1045
mid_block_additional_residual = mid_block_res_sample .to (dtype = weight_dtype ),
1039
- ).sample
1046
+ return_dict = False ,
1047
+ )[0 ]
1040
1048
1041
1049
# Get the target for loss depending on the prediction type
1042
1050
if noise_scheduler .config .prediction_type == "epsilon" :
@@ -1109,7 +1117,7 @@ def load_model_hook(models, input_dir):
1109
1117
# Create the pipeline using using the trained modules and save it.
1110
1118
accelerator .wait_for_everyone ()
1111
1119
if accelerator .is_main_process :
1112
- controlnet = accelerator . unwrap_model (controlnet )
1120
+ controlnet = unwrap_model (controlnet )
1113
1121
controlnet .save_pretrained (args .output_dir )
1114
1122
1115
1123
if args .push_to_hub :
0 commit comments