mirror of
https://github.com/AUTOMATIC1111/stable-diffusion-webui.git
synced 2025-08-08 13:19:54 +00:00
update ESRGAN architecture and model to support all ESRGAN models in the DB, BSRGAN and real-ESRGAN models
This commit is contained in:
@@ -5,68 +5,115 @@ import torch
|
||||
from PIL import Image
|
||||
from basicsr.utils.download_util import load_file_from_url
|
||||
|
||||
import modules.esrgam_model_arch as arch
|
||||
import modules.esrgan_model_arch as arch
|
||||
from modules import shared, modelloader, images, devices
|
||||
from modules.upscaler import Upscaler, UpscalerData
|
||||
from modules.shared import opts
|
||||
|
||||
|
||||
def fix_model_layers(crt_model, pretrained_net):
|
||||
# this code is adapted from https://github.com/xinntao/ESRGAN
|
||||
if 'conv_first.weight' in pretrained_net:
|
||||
return pretrained_net
|
||||
|
||||
if 'model.0.weight' not in pretrained_net:
|
||||
is_realesrgan = "params_ema" in pretrained_net and 'body.0.rdb1.conv1.weight' in pretrained_net["params_ema"]
|
||||
if is_realesrgan:
|
||||
raise Exception("The file is a RealESRGAN model, it can't be used as a ESRGAN model.")
|
||||
else:
|
||||
raise Exception("The file is not a ESRGAN model.")
|
||||
def mod2normal(state_dict):
|
||||
# this code is copied from https://github.com/victorca25/iNNfer
|
||||
if 'conv_first.weight' in state_dict:
|
||||
crt_net = {}
|
||||
items = []
|
||||
for k, v in state_dict.items():
|
||||
items.append(k)
|
||||
|
||||
crt_net = crt_model.state_dict()
|
||||
load_net_clean = {}
|
||||
for k, v in pretrained_net.items():
|
||||
if k.startswith('module.'):
|
||||
load_net_clean[k[7:]] = v
|
||||
else:
|
||||
load_net_clean[k] = v
|
||||
pretrained_net = load_net_clean
|
||||
crt_net['model.0.weight'] = state_dict['conv_first.weight']
|
||||
crt_net['model.0.bias'] = state_dict['conv_first.bias']
|
||||
|
||||
tbd = []
|
||||
for k, v in crt_net.items():
|
||||
tbd.append(k)
|
||||
for k in items.copy():
|
||||
if 'RDB' in k:
|
||||
ori_k = k.replace('RRDB_trunk.', 'model.1.sub.')
|
||||
if '.weight' in k:
|
||||
ori_k = ori_k.replace('.weight', '.0.weight')
|
||||
elif '.bias' in k:
|
||||
ori_k = ori_k.replace('.bias', '.0.bias')
|
||||
crt_net[ori_k] = state_dict[k]
|
||||
items.remove(k)
|
||||
|
||||
# directly copy
|
||||
for k, v in crt_net.items():
|
||||
if k in pretrained_net and pretrained_net[k].size() == v.size():
|
||||
crt_net[k] = pretrained_net[k]
|
||||
tbd.remove(k)
|
||||
crt_net['model.1.sub.23.weight'] = state_dict['trunk_conv.weight']
|
||||
crt_net['model.1.sub.23.bias'] = state_dict['trunk_conv.bias']
|
||||
crt_net['model.3.weight'] = state_dict['upconv1.weight']
|
||||
crt_net['model.3.bias'] = state_dict['upconv1.bias']
|
||||
crt_net['model.6.weight'] = state_dict['upconv2.weight']
|
||||
crt_net['model.6.bias'] = state_dict['upconv2.bias']
|
||||
crt_net['model.8.weight'] = state_dict['HRconv.weight']
|
||||
crt_net['model.8.bias'] = state_dict['HRconv.bias']
|
||||
crt_net['model.10.weight'] = state_dict['conv_last.weight']
|
||||
crt_net['model.10.bias'] = state_dict['conv_last.bias']
|
||||
state_dict = crt_net
|
||||
return state_dict
|
||||
|
||||
crt_net['conv_first.weight'] = pretrained_net['model.0.weight']
|
||||
crt_net['conv_first.bias'] = pretrained_net['model.0.bias']
|
||||
|
||||
for k in tbd.copy():
|
||||
if 'RDB' in k:
|
||||
ori_k = k.replace('RRDB_trunk.', 'model.1.sub.')
|
||||
if '.weight' in k:
|
||||
ori_k = ori_k.replace('.weight', '.0.weight')
|
||||
elif '.bias' in k:
|
||||
ori_k = ori_k.replace('.bias', '.0.bias')
|
||||
crt_net[k] = pretrained_net[ori_k]
|
||||
tbd.remove(k)
|
||||
def resrgan2normal(state_dict, nb=23):
|
||||
# this code is copied from https://github.com/victorca25/iNNfer
|
||||
if "conv_first.weight" in state_dict and "body.0.rdb1.conv1.weight" in state_dict:
|
||||
crt_net = {}
|
||||
items = []
|
||||
for k, v in state_dict.items():
|
||||
items.append(k)
|
||||
|
||||
crt_net['trunk_conv.weight'] = pretrained_net['model.1.sub.23.weight']
|
||||
crt_net['trunk_conv.bias'] = pretrained_net['model.1.sub.23.bias']
|
||||
crt_net['upconv1.weight'] = pretrained_net['model.3.weight']
|
||||
crt_net['upconv1.bias'] = pretrained_net['model.3.bias']
|
||||
crt_net['upconv2.weight'] = pretrained_net['model.6.weight']
|
||||
crt_net['upconv2.bias'] = pretrained_net['model.6.bias']
|
||||
crt_net['HRconv.weight'] = pretrained_net['model.8.weight']
|
||||
crt_net['HRconv.bias'] = pretrained_net['model.8.bias']
|
||||
crt_net['conv_last.weight'] = pretrained_net['model.10.weight']
|
||||
crt_net['conv_last.bias'] = pretrained_net['model.10.bias']
|
||||
crt_net['model.0.weight'] = state_dict['conv_first.weight']
|
||||
crt_net['model.0.bias'] = state_dict['conv_first.bias']
|
||||
|
||||
for k in items.copy():
|
||||
if "rdb" in k:
|
||||
ori_k = k.replace('body.', 'model.1.sub.')
|
||||
ori_k = ori_k.replace('.rdb', '.RDB')
|
||||
if '.weight' in k:
|
||||
ori_k = ori_k.replace('.weight', '.0.weight')
|
||||
elif '.bias' in k:
|
||||
ori_k = ori_k.replace('.bias', '.0.bias')
|
||||
crt_net[ori_k] = state_dict[k]
|
||||
items.remove(k)
|
||||
|
||||
crt_net[f'model.1.sub.{nb}.weight'] = state_dict['conv_body.weight']
|
||||
crt_net[f'model.1.sub.{nb}.bias'] = state_dict['conv_body.bias']
|
||||
crt_net['model.3.weight'] = state_dict['conv_up1.weight']
|
||||
crt_net['model.3.bias'] = state_dict['conv_up1.bias']
|
||||
crt_net['model.6.weight'] = state_dict['conv_up2.weight']
|
||||
crt_net['model.6.bias'] = state_dict['conv_up2.bias']
|
||||
crt_net['model.8.weight'] = state_dict['conv_hr.weight']
|
||||
crt_net['model.8.bias'] = state_dict['conv_hr.bias']
|
||||
crt_net['model.10.weight'] = state_dict['conv_last.weight']
|
||||
crt_net['model.10.bias'] = state_dict['conv_last.bias']
|
||||
state_dict = crt_net
|
||||
return state_dict
|
||||
|
||||
|
||||
def infer_params(state_dict):
|
||||
# this code is copied from https://github.com/victorca25/iNNfer
|
||||
scale2x = 0
|
||||
scalemin = 6
|
||||
n_uplayer = 0
|
||||
plus = False
|
||||
|
||||
for block in list(state_dict):
|
||||
parts = block.split(".")
|
||||
n_parts = len(parts)
|
||||
if n_parts == 5 and parts[2] == "sub":
|
||||
nb = int(parts[3])
|
||||
elif n_parts == 3:
|
||||
part_num = int(parts[1])
|
||||
if (part_num > scalemin
|
||||
and parts[0] == "model"
|
||||
and parts[2] == "weight"):
|
||||
scale2x += 1
|
||||
if part_num > n_uplayer:
|
||||
n_uplayer = part_num
|
||||
out_nc = state_dict[block].shape[0]
|
||||
if not plus and "conv1x1" in block:
|
||||
plus = True
|
||||
|
||||
nf = state_dict["model.0.weight"].shape[0]
|
||||
in_nc = state_dict["model.0.weight"].shape[1]
|
||||
out_nc = out_nc
|
||||
scale = 2 ** scale2x
|
||||
|
||||
return in_nc, out_nc, nf, nb, plus, scale
|
||||
|
||||
return crt_net
|
||||
|
||||
class UpscalerESRGAN(Upscaler):
|
||||
def __init__(self, dirname):
|
||||
@@ -109,20 +156,39 @@ class UpscalerESRGAN(Upscaler):
|
||||
print("Unable to load %s from %s" % (self.model_path, filename))
|
||||
return None
|
||||
|
||||
pretrained_net = torch.load(filename, map_location='cpu' if devices.device_esrgan.type == 'mps' else None)
|
||||
crt_model = arch.RRDBNet(3, 3, 64, 23, gc=32)
|
||||
state_dict = torch.load(filename, map_location='cpu' if devices.device_esrgan.type == 'mps' else None)
|
||||
|
||||
pretrained_net = fix_model_layers(crt_model, pretrained_net)
|
||||
crt_model.load_state_dict(pretrained_net)
|
||||
crt_model.eval()
|
||||
if "params_ema" in state_dict:
|
||||
state_dict = state_dict["params_ema"]
|
||||
elif "params" in state_dict:
|
||||
state_dict = state_dict["params"]
|
||||
num_conv = 16 if "realesr-animevideov3" in filename else 32
|
||||
model = arch.SRVGGNetCompact(num_in_ch=3, num_out_ch=3, num_feat=64, num_conv=num_conv, upscale=4, act_type='prelu')
|
||||
model.load_state_dict(state_dict)
|
||||
model.eval()
|
||||
return model
|
||||
|
||||
return crt_model
|
||||
if "body.0.rdb1.conv1.weight" in state_dict and "conv_first.weight" in state_dict:
|
||||
nb = 6 if "RealESRGAN_x4plus_anime_6B" in filename else 23
|
||||
state_dict = resrgan2normal(state_dict, nb)
|
||||
elif "conv_first.weight" in state_dict:
|
||||
state_dict = mod2normal(state_dict)
|
||||
elif "model.0.weight" not in state_dict:
|
||||
raise Exception("The file is not a recognized ESRGAN model.")
|
||||
|
||||
in_nc, out_nc, nf, nb, plus, mscale = infer_params(state_dict)
|
||||
|
||||
model = arch.RRDBNet(in_nc=in_nc, out_nc=out_nc, nf=nf, nb=nb, upscale=mscale, plus=plus)
|
||||
model.load_state_dict(state_dict)
|
||||
model.eval()
|
||||
|
||||
return model
|
||||
|
||||
|
||||
def upscale_without_tiling(model, img):
|
||||
img = np.array(img)
|
||||
img = img[:, :, ::-1]
|
||||
img = np.moveaxis(img, 2, 0) / 255
|
||||
img = np.ascontiguousarray(np.transpose(img, (2, 0, 1))) / 255
|
||||
img = torch.from_numpy(img).float()
|
||||
img = img.unsqueeze(0).to(devices.device_esrgan)
|
||||
with torch.no_grad():
|
||||
|
Reference in New Issue
Block a user