mirror of
https://github.com/AUTOMATIC1111/stable-diffusion-webui.git
synced 2025-08-04 11:12:35 +00:00
Some changes to the tensorboard code and hypernetwork support
This commit is contained in:
@@ -4,6 +4,7 @@ import html
|
||||
import os
|
||||
import sys
|
||||
import traceback
|
||||
import tensorboard
|
||||
import tqdm
|
||||
import csv
|
||||
|
||||
@@ -18,7 +19,6 @@ import modules.textual_inversion.dataset
|
||||
from modules.textual_inversion import textual_inversion
|
||||
from modules.textual_inversion.learn_schedule import LearnRateScheduler
|
||||
|
||||
|
||||
class HypernetworkModule(torch.nn.Module):
|
||||
multiplier = 1.0
|
||||
|
||||
@@ -291,6 +291,9 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log
|
||||
scheduler = LearnRateScheduler(learn_rate, steps, ititial_step)
|
||||
optimizer = torch.optim.AdamW(weights, lr=scheduler.learn_rate)
|
||||
|
||||
if shared.opts.training_enable_tensorboard:
|
||||
tensorboard_writer = textual_inversion.tensorboard_setup(log_directory)
|
||||
|
||||
pbar = tqdm.tqdm(enumerate(ds), total=steps - ititial_step)
|
||||
for i, entries in pbar:
|
||||
hypernetwork.step = i + ititial_step
|
||||
@@ -315,6 +318,7 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log
|
||||
optimizer.zero_grad()
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
|
||||
mean_loss = losses.mean()
|
||||
if torch.isnan(mean_loss):
|
||||
raise RuntimeError("Loss diverged.")
|
||||
@@ -323,6 +327,14 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log
|
||||
if hypernetwork.step > 0 and hypernetwork_dir is not None and hypernetwork.step % save_hypernetwork_every == 0:
|
||||
last_saved_file = os.path.join(hypernetwork_dir, f'{hypernetwork_name}-{hypernetwork.step}.pt')
|
||||
hypernetwork.save(last_saved_file)
|
||||
|
||||
if shared.opts.training_enable_tensorboard:
|
||||
epoch_num = hypernetwork.step // len(ds)
|
||||
epoch_step = hypernetwork.step - (epoch_num * len(ds)) + 1
|
||||
|
||||
textual_inversion.tensorboard_add(tensorboard_writer, loss=mean_loss,
|
||||
global_step=hypernetwork.step, step=epoch_step,
|
||||
learn_rate=scheduler.learn_rate, epoch_num=epoch_num)
|
||||
|
||||
textual_inversion.write_loss(log_directory, "hypernetwork_loss.csv", hypernetwork.step, len(ds), {
|
||||
"loss": f"{mean_loss:.7f}",
|
||||
@@ -360,6 +372,10 @@ def train_hypernetwork(hypernetwork_name, learn_rate, batch_size, data_root, log
|
||||
processed = processing.process_images(p)
|
||||
image = processed.images[0] if len(processed.images)>0 else None
|
||||
|
||||
if shared.opts.training_enable_tensorboard and shared.opts.training_tensorboard_save_images:
|
||||
textual_inversion.tensorboard_add_image(tensorboard_writer, f"Validation at epoch {epoch_num}",
|
||||
image, hypernetwork.step)
|
||||
|
||||
if unload:
|
||||
shared.sd_model.cond_stage_model.to(devices.cpu)
|
||||
shared.sd_model.first_stage_model.to(devices.cpu)
|
||||
|
Reference in New Issue
Block a user