2023年6月21日发(作者:)
Stylenga_pytorch代码解析import argparseimport mathimport randomimport osimport numpy as npimport torchfrom torch import nn, autograd, optimfrom import functional as Ffrom import dataimport buted as distfrom torchvision import transforms, utilsfrom tqdm import tqdmtry: import wandbexcept ImportError: wandb = Nonefrom dataset import MultiResolutionDatasetfrom distributed import ( get_rank, synchronize, reduce_loss_dict, reduce_sum, get_world_size,)from op import conv2d_gradfixfrom non_leaking import augment, AdaptiveAugmentdef data_sampler(dataset, shuffle, distributed): if distributed: return butedSampler(dataset, shuffle=shuffle) if shuffle: return Sampler(dataset) else: return tialSampler(dataset)def requires_grad(model, flag=True): for p in ters(): es_grad = flagdef accumulate(model1, model2, decay=0.999): par1 = dict(_parameters()) par2 = dict(_parameters()) for k in (): par1[k]._(decay).add_(par2[k].data, alpha=1 - decay)def sample_data(loader): while True: for batch in loader: yield batchdef d_logistic_loss(real_pred, fake_pred): real_loss = us(-real_pred) fake_loss = us(fake_pred) return real_() + fake_()def d_r1_loss(real_pred, real_img): with conv2d__weight_gradients(): grad_real, = ( outputs=real_(), inputs=real_img, create_graph=True ) grad_penalty = grad_(2).reshape(grad_[0], -1).sum(1).mean() return grad_penaltydef g_nonsaturating_loss(fake_pred): loss = us(-fake_pred).mean() return lossdef g_path_regularize(fake_img, latents, mean_path_length, decay=0.01): noise = _like(fake_img) / ( fake_[2] * fake_[3] ) grad, = ( outputs=(fake_img * noise).sum(), inputs=latents, create_graph=True ) path_lengths = ((2).sum(2).mean(1)) path_mean = mean_path_length + decay * (path_() - mean_path_length) path_penalty = (path_lengths - path_mean).pow(2).mean() return path_penalty, path_(), path_lengthsdef make_noise(batch, latent_dim, n_noise, device): if n_noise == 1: return (batch, latent_dim, device=device) noises = (n_noise, batch, latent_dim, device=device).unbind(0) return noisesdef mixing_noise(batch, latent_dim, prob, device): if prob > 0 and () < prob: return make_noise(batch, latent_dim, 2, device) else: return [make_noise(batch, latent_dim, 1, device)]def set_grad_none(model, targets): for n, p in _parameters(): if n in targets: = Nonedef train(args, loader, generator, discriminator, g_optim, d_optim, g_ema, device): loader = sample_data(loader) loader = sample_data(loader) pbar = range() if get_rank() == 0: pbar = tqdm(pbar, initial=_iter, dynamic_ncols=True, smoothing=0.01) mean_path_length = 0 d_loss_val = 0 r1_loss = (0.0, device=device) g_loss_val = 0 path_loss = (0.0, device=device) path_lengths = (0.0, device=device) mean_path_length_avg = 0 loss_dict = {} if buted: g_module = d_module = else: g_module = generator d_module = discriminator accum = 0.5 ** (32 / (10 * 1000)) ada_aug_p = t_p if t_p > 0 else 0.0 r_t_stat = 0 if t and t_p == 0: ada_augment = AdaptiveAugment(_target, _length, 8, device) sample_z = (args.n_sample, , device=device) for idx in pbar: i = idx + _iter if i > : print("Done!") break real_img = next(loader) real_img = real_(device) #⽣成器设置为False,判别器设置为True 固定⽣成器,训练判别器,为什么是先固定⽣成器,先训练判别器?与原始的不同? requires_grad(generator, False) requires_grad(discriminator, True) #⽣成噪声 noise = mixing_noise(, , , device)
#得到输出fake_img ⽣成器⾝⽣成假图 fake_img, _ = generator(noise) #如果使⽤数据增强:对原始数据和fake 数据都进⾏增强 if t: real_img_aug, _ = augment(real_img, ada_aug_p) fake_img, _ = augment(fake_img, ada_aug_p) else: real_img_aug = real_img # 对假图像和真图像进⾏判别器 fake_pred = discriminator(fake_img) fake_pred = discriminator(fake_img) real_pred = discriminator(real_img_aug) #计算d_logistic_loss d_loss = d_logistic_loss(real_pred, fake_pred) loss_dict["d"] = d_loss loss_dict["real_score"] = real_() loss_dict["fake_score"] = fake_() _grad() d_rd() d_() if t and t_p == 0: ada_aug_p = ada_(real_pred) r_t_stat = ada_augment.r_t_stat d_regularize = i % args.d_reg_every == 0 #for lazy regularization 不⽤每次都计算 ⼀般是每16个minibatches 计算⼀次 。见paper 的3.1 部分 if d_regularize: real_es_grad = True if t: real_img_aug, _ = augment(real_img, ada_aug_p) else: real_img_aug = real_img #对扩增后的图进⾏判别 real_pred = discriminator(real_img_aug) #计算d_r1_loss r1_loss = d_r1_loss(real_pred, real_img) _grad() #此处为什么不是:r1_rd(),这样做的⽬的是什么 (args.r1 / 2 * r1_loss * args.d_reg_every + 0 * real_pred[0]).backward() d_() loss_dict["r1"] = r1_loss #⽣成器设置为True,判别器设置为False requires_grad(generator, True) requires_grad(discriminator, False) noise = mixing_noise(, , , device) fake_img, _ = generator(noise) if t: fake_img, _ = augment(fake_img, ada_aug_p) fake_pred = discriminator(fake_img) #对结果计算⾮饱和损失函数计算 g_loss = g_nonsaturating_loss(fake_pred) loss_dict["g"] = g_loss _grad() g_rd() g_() g_regularize = i % args.g_reg_every == 0 #for lazy regularization 不⽤每次都计算 ⼀般是每4个minibatches 计算⼀次,见paper 的3.1 部分 if g_regularize: path_batch_size = max(1, // _batch_shrink) noise = mixing_noise(path_batch_size, , , device) fake_img, latents = generator(noise, return_latents=True) path_loss, mean_path_length, path_lengths = g_path_regularize( fake_img, latents, mean_path_length ) weighted_path_loss = _regularize * args.g_reg_every * path_loss _grad()
if _batch_shrink: weighted_path_loss += 0 * fake_img[0, 0, 0, 0] weighted_path_rd() g_() mean_path_length_avg = ( reduce_sum(mean_path_length).item() / get_world_size() ) loss_dict["path"] = path_loss loss_dict["path_length"] = path_() accumulate(g_ema, g_module, accum) loss_reduced = reduce_loss_dict(loss_dict) d_loss_val = loss_reduced["d"].mean().item() g_loss_val = loss_reduced["g"].mean().item() r1_val = loss_reduced["r1"].mean().item() path_loss_val = loss_reduced["path"].mean().item() real_score_val = loss_reduced["real_score"].mean().item() fake_score_val = loss_reduced["fake_score"].mean().item() path_length_val = loss_reduced["path_length"].mean().item() if get_rank() == 0: _description( ( f"d: {d_loss_val:.4f}; g: {g_loss_val:.4f}; r1: {r1_val:.4f}; " f"path: {path_loss_val:.4f}; mean path: {mean_path_length_avg:.4f}; " f"augment: {ada_aug_p:.4f}" ) ) if wandb and : ( { "Generator": g_loss_val, "Discriminator": d_loss_val, "Augment": ada_aug_p, "Rt": r_t_stat, "R1": r1_val, "Path Length Regularization": path_loss_val, "Mean Path Length": mean_path_length, "Real Score": real_score_val, "Fake Score": fake_score_val, "Path Length": path_length_val, } ) if i % 100 == 0: with _grad(): g_() sample, _ = g_ema([sample_z]) _image( sample, f"sample/{str(i).zfill(6)}.png", nrow=int(args.n_sample ** 0.5), normalize=True, range=(-1, 1), ) if i % 10000 == 0: ( { "g": g__dict(), "d": d__dict(), "g_ema": g__dict(), "g_optim": g__dict(), "d_optim": d__dict(), "args": args, "ada_aug_p": ada_aug_p, }, f"checkpoint/{str(i).zfill(6)}.pt", )if __name__ == "__main__": device = "cuda" parser = ntParser(description="StyleGAN2 trainer") _argument("path", type=str, help="path to the lmdb dataset") _argument('--arch', type=str, default='stylegan2', help='model architectures (stylegan2 | swagan)') _argument( "--iter", type=int, default=800000, help="total training iterations" ) _argument( "--batch", type=int, default=16, help="batch sizes for each gpus" ) _argument( "--n_sample", type=int, default=64, help="number of the samples generated during training", ) _argument( "--size", type=int, default=256, help="image sizes for the model" ) _argument( "--r1", type=float, default=10, help="weight of the r1 regularization" ) _argument( "--path_regularize", type=float, default=2, help="weight of the path length regularization", ) _argument( "--path_batch_shrink", type=int, default=2, help="batch size reducing factor for the path length regularization (reduce memory consumption)", ) _argument( "--d_reg_every", type=int, default=16, help="interval of the applying r1 regularization", ) _argument( "--g_reg_every", type=int, default=4, help="interval of the applying path length regularization", ) _argument( "--mixing", type=float, default=0.9, help="probability of latent code mixing" ) _argument( "--ckpt", type=str, default=None, help="path to the checkpoints to resume training", ) _argument("--lr", type=float, default=0.002, help="learning rate") _argument( "--channel_multiplier", type=int, default=2, help="channel multiplier factor for the model. config-f = 2, else = 1", ) _argument( "--wandb", action="store_true", help="use weights and biases logging" ) _argument( "--local_rank", type=int, default=0, help="local rank for distributed training" ) _argument( "--augment", action="store_true", help="apply non leaking augmentation" ) _argument( "--augment_p", type=float, default=0, help="probability of applying augmentation. 0 = use adaptive augmentation", ) _argument( "--ada_target", type=float, default=0.6, help="target augmentation probability for adaptive augmentation", ) _argument( "--ada_length", type=int, default=500 * 1000, help="target duraing to reach augmentation probability for adaptive augmentation", ) _argument( "--ada_every", type=int, default=256, help="probability update interval of the adaptive augmentation", ) args = _args() n_gpu = int(n["WORLD_SIZE"]) if "WORLD_SIZE" in n else 1 buted = n_gpu > 1 if buted: _device(_rank) _process_group(backend="nccl", init_method="env://") synchronize() = 512 args.n_mlp = 8 _iter = 0 if == 'stylegan2': from model import Generator, Discriminator elif == 'swagan': from swagan import Generator, Discriminator #好像有两个Generator,没有区别 ⽣成器 判别器 ⽣成器 generator = Generator( , , args.n_mlp, channel_multiplier=l_multiplier ).to(device) discriminator = Discriminator( , channel_multiplier=l_multiplier ).to(device) g_ema = Generator( , , args.n_mlp, channel_multiplier=l_multiplier ).to(device) g_() accumulate(g_ema, generator, 0) #作⽤是什么 g_reg_ratio = args.g_reg_every / (args.g_reg_every + 1) d_reg_ratio = args.d_reg_every / (args.d_reg_every + 1) g_optim = ( ters(), lr= * g_reg_ratio, betas=(0 ** g_reg_ratio, 0.99 ** g_reg_ratio), ) d_optim = ( ters(), lr= * d_reg_ratio, betas=(0 ** d_reg_ratio, 0.99 ** d_reg_ratio), ) if is not None: print("load model:", ) ckpt = (, map_location=lambda storage, loc: storage) try: ckpt_name = me() _iter = int(xt(ckpt_name)[0]) except ValueError: pass _state_dict(ckpt["g"]) _state_dict(ckpt["d"]) g__state_dict(ckpt["g_ema"]) g__state_dict(ckpt["g_optim"]) d__state_dict(ckpt["d_optim"]) if buted: generator = butedDataParallel( generator, device_ids=[_rank], output_device=_rank, broadcast_buffers=False, ) discriminator = butedDataParallel( discriminator, device_ids=[_rank], output_device=_rank, broadcast_buffers=False, ) transform = e( [ HorizontalFlip(), or(), ize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5), inplace=True), ] ) dataset = MultiResolutionDataset(, transform, ) loader = ader( dataset, batch_size=, sampler=data_sampler(dataset, shuffle=True, distributed=buted), drop_last=True, ) if get_rank() == 0 and wandb is not None and : (project="stylegan 2") train(args, loader, generator, discriminator, g_optim, d_optim, g_ema, device)
2023年6月21日发(作者:)
Stylenga_pytorch代码解析import argparseimport mathimport randomimport osimport numpy as npimport torchfrom torch import nn, autograd, optimfrom import functional as Ffrom import dataimport buted as distfrom torchvision import transforms, utilsfrom tqdm import tqdmtry: import wandbexcept ImportError: wandb = Nonefrom dataset import MultiResolutionDatasetfrom distributed import ( get_rank, synchronize, reduce_loss_dict, reduce_sum, get_world_size,)from op import conv2d_gradfixfrom non_leaking import augment, AdaptiveAugmentdef data_sampler(dataset, shuffle, distributed): if distributed: return butedSampler(dataset, shuffle=shuffle) if shuffle: return Sampler(dataset) else: return tialSampler(dataset)def requires_grad(model, flag=True): for p in ters(): es_grad = flagdef accumulate(model1, model2, decay=0.999): par1 = dict(_parameters()) par2 = dict(_parameters()) for k in (): par1[k]._(decay).add_(par2[k].data, alpha=1 - decay)def sample_data(loader): while True: for batch in loader: yield batchdef d_logistic_loss(real_pred, fake_pred): real_loss = us(-real_pred) fake_loss = us(fake_pred) return real_() + fake_()def d_r1_loss(real_pred, real_img): with conv2d__weight_gradients(): grad_real, = ( outputs=real_(), inputs=real_img, create_graph=True ) grad_penalty = grad_(2).reshape(grad_[0], -1).sum(1).mean() return grad_penaltydef g_nonsaturating_loss(fake_pred): loss = us(-fake_pred).mean() return lossdef g_path_regularize(fake_img, latents, mean_path_length, decay=0.01): noise = _like(fake_img) / ( fake_[2] * fake_[3] ) grad, = ( outputs=(fake_img * noise).sum(), inputs=latents, create_graph=True ) path_lengths = ((2).sum(2).mean(1)) path_mean = mean_path_length + decay * (path_() - mean_path_length) path_penalty = (path_lengths - path_mean).pow(2).mean() return path_penalty, path_(), path_lengthsdef make_noise(batch, latent_dim, n_noise, device): if n_noise == 1: return (batch, latent_dim, device=device) noises = (n_noise, batch, latent_dim, device=device).unbind(0) return noisesdef mixing_noise(batch, latent_dim, prob, device): if prob > 0 and () < prob: return make_noise(batch, latent_dim, 2, device) else: return [make_noise(batch, latent_dim, 1, device)]def set_grad_none(model, targets): for n, p in _parameters(): if n in targets: = Nonedef train(args, loader, generator, discriminator, g_optim, d_optim, g_ema, device): loader = sample_data(loader) loader = sample_data(loader) pbar = range() if get_rank() == 0: pbar = tqdm(pbar, initial=_iter, dynamic_ncols=True, smoothing=0.01) mean_path_length = 0 d_loss_val = 0 r1_loss = (0.0, device=device) g_loss_val = 0 path_loss = (0.0, device=device) path_lengths = (0.0, device=device) mean_path_length_avg = 0 loss_dict = {} if buted: g_module = d_module = else: g_module = generator d_module = discriminator accum = 0.5 ** (32 / (10 * 1000)) ada_aug_p = t_p if t_p > 0 else 0.0 r_t_stat = 0 if t and t_p == 0: ada_augment = AdaptiveAugment(_target, _length, 8, device) sample_z = (args.n_sample, , device=device) for idx in pbar: i = idx + _iter if i > : print("Done!") break real_img = next(loader) real_img = real_(device) #⽣成器设置为False,判别器设置为True 固定⽣成器,训练判别器,为什么是先固定⽣成器,先训练判别器?与原始的不同? requires_grad(generator, False) requires_grad(discriminator, True) #⽣成噪声 noise = mixing_noise(, , , device)
#得到输出fake_img ⽣成器⾝⽣成假图 fake_img, _ = generator(noise) #如果使⽤数据增强:对原始数据和fake 数据都进⾏增强 if t: real_img_aug, _ = augment(real_img, ada_aug_p) fake_img, _ = augment(fake_img, ada_aug_p) else: real_img_aug = real_img # 对假图像和真图像进⾏判别器 fake_pred = discriminator(fake_img) fake_pred = discriminator(fake_img) real_pred = discriminator(real_img_aug) #计算d_logistic_loss d_loss = d_logistic_loss(real_pred, fake_pred) loss_dict["d"] = d_loss loss_dict["real_score"] = real_() loss_dict["fake_score"] = fake_() _grad() d_rd() d_() if t and t_p == 0: ada_aug_p = ada_(real_pred) r_t_stat = ada_augment.r_t_stat d_regularize = i % args.d_reg_every == 0 #for lazy regularization 不⽤每次都计算 ⼀般是每16个minibatches 计算⼀次 。见paper 的3.1 部分 if d_regularize: real_es_grad = True if t: real_img_aug, _ = augment(real_img, ada_aug_p) else: real_img_aug = real_img #对扩增后的图进⾏判别 real_pred = discriminator(real_img_aug) #计算d_r1_loss r1_loss = d_r1_loss(real_pred, real_img) _grad() #此处为什么不是:r1_rd(),这样做的⽬的是什么 (args.r1 / 2 * r1_loss * args.d_reg_every + 0 * real_pred[0]).backward() d_() loss_dict["r1"] = r1_loss #⽣成器设置为True,判别器设置为False requires_grad(generator, True) requires_grad(discriminator, False) noise = mixing_noise(, , , device) fake_img, _ = generator(noise) if t: fake_img, _ = augment(fake_img, ada_aug_p) fake_pred = discriminator(fake_img) #对结果计算⾮饱和损失函数计算 g_loss = g_nonsaturating_loss(fake_pred) loss_dict["g"] = g_loss _grad() g_rd() g_() g_regularize = i % args.g_reg_every == 0 #for lazy regularization 不⽤每次都计算 ⼀般是每4个minibatches 计算⼀次,见paper 的3.1 部分 if g_regularize: path_batch_size = max(1, // _batch_shrink) noise = mixing_noise(path_batch_size, , , device) fake_img, latents = generator(noise, return_latents=True) path_loss, mean_path_length, path_lengths = g_path_regularize( fake_img, latents, mean_path_length ) weighted_path_loss = _regularize * args.g_reg_every * path_loss _grad()
if _batch_shrink: weighted_path_loss += 0 * fake_img[0, 0, 0, 0] weighted_path_rd() g_() mean_path_length_avg = ( reduce_sum(mean_path_length).item() / get_world_size() ) loss_dict["path"] = path_loss loss_dict["path_length"] = path_() accumulate(g_ema, g_module, accum) loss_reduced = reduce_loss_dict(loss_dict) d_loss_val = loss_reduced["d"].mean().item() g_loss_val = loss_reduced["g"].mean().item() r1_val = loss_reduced["r1"].mean().item() path_loss_val = loss_reduced["path"].mean().item() real_score_val = loss_reduced["real_score"].mean().item() fake_score_val = loss_reduced["fake_score"].mean().item() path_length_val = loss_reduced["path_length"].mean().item() if get_rank() == 0: _description( ( f"d: {d_loss_val:.4f}; g: {g_loss_val:.4f}; r1: {r1_val:.4f}; " f"path: {path_loss_val:.4f}; mean path: {mean_path_length_avg:.4f}; " f"augment: {ada_aug_p:.4f}" ) ) if wandb and : ( { "Generator": g_loss_val, "Discriminator": d_loss_val, "Augment": ada_aug_p, "Rt": r_t_stat, "R1": r1_val, "Path Length Regularization": path_loss_val, "Mean Path Length": mean_path_length, "Real Score": real_score_val, "Fake Score": fake_score_val, "Path Length": path_length_val, } ) if i % 100 == 0: with _grad(): g_() sample, _ = g_ema([sample_z]) _image( sample, f"sample/{str(i).zfill(6)}.png", nrow=int(args.n_sample ** 0.5), normalize=True, range=(-1, 1), ) if i % 10000 == 0: ( { "g": g__dict(), "d": d__dict(), "g_ema": g__dict(), "g_optim": g__dict(), "d_optim": d__dict(), "args": args, "ada_aug_p": ada_aug_p, }, f"checkpoint/{str(i).zfill(6)}.pt", )if __name__ == "__main__": device = "cuda" parser = ntParser(description="StyleGAN2 trainer") _argument("path", type=str, help="path to the lmdb dataset") _argument('--arch', type=str, default='stylegan2', help='model architectures (stylegan2 | swagan)') _argument( "--iter", type=int, default=800000, help="total training iterations" ) _argument( "--batch", type=int, default=16, help="batch sizes for each gpus" ) _argument( "--n_sample", type=int, default=64, help="number of the samples generated during training", ) _argument( "--size", type=int, default=256, help="image sizes for the model" ) _argument( "--r1", type=float, default=10, help="weight of the r1 regularization" ) _argument( "--path_regularize", type=float, default=2, help="weight of the path length regularization", ) _argument( "--path_batch_shrink", type=int, default=2, help="batch size reducing factor for the path length regularization (reduce memory consumption)", ) _argument( "--d_reg_every", type=int, default=16, help="interval of the applying r1 regularization", ) _argument( "--g_reg_every", type=int, default=4, help="interval of the applying path length regularization", ) _argument( "--mixing", type=float, default=0.9, help="probability of latent code mixing" ) _argument( "--ckpt", type=str, default=None, help="path to the checkpoints to resume training", ) _argument("--lr", type=float, default=0.002, help="learning rate") _argument( "--channel_multiplier", type=int, default=2, help="channel multiplier factor for the model. config-f = 2, else = 1", ) _argument( "--wandb", action="store_true", help="use weights and biases logging" ) _argument( "--local_rank", type=int, default=0, help="local rank for distributed training" ) _argument( "--augment", action="store_true", help="apply non leaking augmentation" ) _argument( "--augment_p", type=float, default=0, help="probability of applying augmentation. 0 = use adaptive augmentation", ) _argument( "--ada_target", type=float, default=0.6, help="target augmentation probability for adaptive augmentation", ) _argument( "--ada_length", type=int, default=500 * 1000, help="target duraing to reach augmentation probability for adaptive augmentation", ) _argument( "--ada_every", type=int, default=256, help="probability update interval of the adaptive augmentation", ) args = _args() n_gpu = int(n["WORLD_SIZE"]) if "WORLD_SIZE" in n else 1 buted = n_gpu > 1 if buted: _device(_rank) _process_group(backend="nccl", init_method="env://") synchronize() = 512 args.n_mlp = 8 _iter = 0 if == 'stylegan2': from model import Generator, Discriminator elif == 'swagan': from swagan import Generator, Discriminator #好像有两个Generator,没有区别 ⽣成器 判别器 ⽣成器 generator = Generator( , , args.n_mlp, channel_multiplier=l_multiplier ).to(device) discriminator = Discriminator( , channel_multiplier=l_multiplier ).to(device) g_ema = Generator( , , args.n_mlp, channel_multiplier=l_multiplier ).to(device) g_() accumulate(g_ema, generator, 0) #作⽤是什么 g_reg_ratio = args.g_reg_every / (args.g_reg_every + 1) d_reg_ratio = args.d_reg_every / (args.d_reg_every + 1) g_optim = ( ters(), lr= * g_reg_ratio, betas=(0 ** g_reg_ratio, 0.99 ** g_reg_ratio), ) d_optim = ( ters(), lr= * d_reg_ratio, betas=(0 ** d_reg_ratio, 0.99 ** d_reg_ratio), ) if is not None: print("load model:", ) ckpt = (, map_location=lambda storage, loc: storage) try: ckpt_name = me() _iter = int(xt(ckpt_name)[0]) except ValueError: pass _state_dict(ckpt["g"]) _state_dict(ckpt["d"]) g__state_dict(ckpt["g_ema"]) g__state_dict(ckpt["g_optim"]) d__state_dict(ckpt["d_optim"]) if buted: generator = butedDataParallel( generator, device_ids=[_rank], output_device=_rank, broadcast_buffers=False, ) discriminator = butedDataParallel( discriminator, device_ids=[_rank], output_device=_rank, broadcast_buffers=False, ) transform = e( [ HorizontalFlip(), or(), ize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5), inplace=True), ] ) dataset = MultiResolutionDataset(, transform, ) loader = ader( dataset, batch_size=, sampler=data_sampler(dataset, shuffle=True, distributed=buted), drop_last=True, ) if get_rank() == 0 and wandb is not None and : (project="stylegan 2") train(args, loader, generator, discriminator, g_optim, d_optim, g_ema, device)
发布评论