Add option for float32 sampling with float16 UNet

This also handles type casting so that ROCm and MPS torch devices work correctly without --no-half. One cast is required for deepbooru in deepbooru_model.py, some explicit casting is required for img2img and inpainting. depth_model can't be converted to float16 or it won't work correctly on some systems (it's known to have issues on MPS) so in sd_models.py model.depth_model is removed for model.half().
2025-08-04 03:10:21 +00:00 · 2023-01-24 23:51:45 -05:00
parent 48a15821de
commit 84d9ce30cb
8 changed files with 82 additions and 8 deletions
--- a/modules/sd_models.py
+++ b/modules/sd_models.py
@@ -257,16 +257,24 @@ def load_model_weights(model, checkpoint_info: CheckpointInfo):

        if not shared.cmd_opts.no_half:
            vae = model.first_stage_model
+            depth_model = getattr(model, 'depth_model', None)

            # with --no-half-vae, remove VAE from model when doing half() to prevent its weights from being converted to float16
            if shared.cmd_opts.no_half_vae:
                model.first_stage_model = None
+            # with --upcast-sampling, don't convert the depth model weights to float16
+            if shared.cmd_opts.upcast_sampling and depth_model:
+                model.depth_model = None

            model.half()
            model.first_stage_model = vae
+            if depth_model:
+                model.depth_model = depth_model

        devices.dtype = torch.float32 if shared.cmd_opts.no_half else torch.float16
        devices.dtype_vae = torch.float32 if shared.cmd_opts.no_half or shared.cmd_opts.no_half_vae else torch.float16
+        devices.dtype_unet = model.model.diffusion_model.dtype
+        devices.unet_needs_upcast = shared.cmd_opts.upcast_sampling and devices.dtype == torch.float16 and devices.dtype_unet == torch.float16

        model.first_stage_model.to(devices.dtype_vae)

@@ -372,6 +380,8 @@ def load_model(checkpoint_info=None):

    if shared.cmd_opts.no_half:
        sd_config.model.params.unet_config.params.use_fp16 = False
+    elif shared.cmd_opts.upcast_sampling:
+        sd_config.model.params.unet_config.params.use_fp16 = True

    timer = Timer()