From 064f9dd58956a883dda7f88b3d0c06ef84d1774c Mon Sep 17 00:00:00 2001 From: "Alex J. Champandard" Date: Mon, 31 Oct 2016 06:21:52 +0100 Subject: [PATCH 01/15] Add three image pre-processing options, improve loading code. --- enhance.py | 77 ++++++++++++++++++++++++++++++++---------------------- 1 file changed, 46 insertions(+), 31 deletions(-) diff --git a/enhance.py b/enhance.py index 23ca9d2..9da77d5 100755 --- a/enhance.py +++ b/enhance.py @@ -16,6 +16,7 @@ __version__ = '0.1' +import io import os import sys import bz2 @@ -38,6 +39,9 @@ add_arg('files', nargs='*', default=[]) add_arg('--scales', default=2, type=int, help='How many times to perform 2x upsampling.') add_arg('--model', default='small', type=str, help='Name of the neural network to load/save.') add_arg('--train', default=False, type=str, help='File pattern to load for training.') +add_arg('--train-blur', default=None, type=float, help='Sigma value for gaussian blur preprocess.') +add_arg('--train-noise', default=None, type=float, help='Sigma of normal distribution in preproc.') +add_arg('--train-jpeg', default=None, type=int, help='JPEG compression level in preprocessing.') add_arg('--epochs', default=10, type=int, help='Total number of iterations in training.') add_arg('--epoch-size', default=72, type=int, help='Number of batches trained in an epoch.') add_arg('--save-every', default=10, type=int, help='Save generator after every training epoch.') @@ -100,11 +104,10 @@ os.environ.setdefault('THEANO_FLAGS', 'floatX=float32,device={},force_device=Tru # Scientific & Imaging Libraries import numpy as np -import scipy.optimize, scipy.ndimage, scipy.misc +import scipy.ndimage, scipy.misc, PIL.Image # Numeric Computing (GPU) -import theano -import theano.tensor as T +import theano, theano.tensor as T T.nnet.softminus = lambda x: x - T.nnet.softplus(x) # Support ansi colors in Windows too. @@ -147,35 +150,47 @@ class DataLoader(threading.Thread): def run(self): while True: random.shuffle(self.files) - for f in self.files: - filename = os.path.join(self.cwd, f) - try: - img = scipy.ndimage.imread(filename, mode='RGB') - except Exception as e: - warn('Could not load `{}` as image.'.format(filename), - ' - Try fixing or removing the file before next run.') - files.remove(f) - continue - - for _ in range(args.buffer_similar): - copy = img[:,::-1] if random.choice([True, False]) else img - h = random.randint(0, copy.shape[0] - self.orig_shape) - w = random.randint(0, copy.shape[1] - self.orig_shape) - copy = copy[h:h+self.orig_shape, w:w+self.orig_shape] - - while len(self.available) == 0: - self.data_copied.wait() - self.data_copied.clear() - - i = self.available.pop() - self.orig_buffer[i] = np.transpose(copy / 255.0 - 0.5, (2, 0, 1)) - seed = scipy.misc.imresize(copy, size=(self.seed_shape, self.seed_shape), interp='bilinear') - self.seed_buffer[i] = np.transpose(seed / 255.0 - 0.5, (2, 0, 1)) - self.ready.add(i) - - if len(self.ready) >= args.batch_size: - self.data_ready.set() + self.add_to_buffer(f) + + def add_to_buffer(self, f): + filename = os.path.join(self.cwd, f) + try: + img = scipy.ndimage.imread(filename, mode='RGB').astype(np.float32) + if img.shape[0] < args.batch_shape or img.shape[1] < args.batch_shape: + raise ValueError('Image is too small for training with size {}'.format(img.shape)) + except Exception as e: + warn('Could not load `{}` as image.'.format(filename), + ' - Try fixing or removing the file before next run.') + self.files.remove(f) + return + + img = scipy.ndimage.gaussian_blur(img, sigma=args.train_blur) if args.train_blur else img + img += scipy.random.normal(scale=args.train_noise) if args.train_noise else 0.0 + if args.train_jpeg: + buffer = io.BytesIO() + scipy.misc.toimage(img, cmin=0, cmax=255).save(buffer, format='jpeg', quality=args.train_jpeg) + with PIL.Image.open(buffer) as compressed: + img = scipy.misc.fromimage(compressed, mode='RGB') + + for _ in range(args.buffer_similar): + copy = img[:,::-1] if random.choice([True, False]) else img + h = random.randint(0, copy.shape[0] - self.orig_shape) + w = random.randint(0, copy.shape[1] - self.orig_shape) + copy = copy[h:h+self.orig_shape, w:w+self.orig_shape] + + while len(self.available) == 0: + self.data_copied.wait() + self.data_copied.clear() + + i = self.available.pop() + self.orig_buffer[i] = np.transpose(copy / 255.0 - 0.5, (2, 0, 1)) + seed = scipy.misc.imresize(copy, size=(self.seed_shape, self.seed_shape), interp='bilinear') + self.seed_buffer[i] = np.transpose(seed / 255.0 - 0.5, (2, 0, 1)) + self.ready.add(i) + + if len(self.ready) >= args.batch_size: + self.data_ready.set() def copy(self, origs_out, seeds_out): self.data_ready.wait() From f2494f80781ee6d8bcc89f7cb2fb3476a20e652a Mon Sep 17 00:00:00 2001 From: "Alex J. Champandard" Date: Mon, 31 Oct 2016 13:49:17 +0100 Subject: [PATCH 02/15] Add new downscale layers, separate from upscale steps. Renamed --scales to --zoom for inference. --- enhance.py | 48 ++++++++++++++++++++++++++++-------------------- 1 file changed, 28 insertions(+), 20 deletions(-) diff --git a/enhance.py b/enhance.py index 9da77d5..0e325f5 100755 --- a/enhance.py +++ b/enhance.py @@ -36,7 +36,7 @@ parser = argparse.ArgumentParser(description='Generate a new image by applying s formatter_class=argparse.ArgumentDefaultsHelpFormatter) add_arg = parser.add_argument add_arg('files', nargs='*', default=[]) -add_arg('--scales', default=2, type=int, help='How many times to perform 2x upsampling.') +add_arg('--zoom', default=4, type=int, help='Resolution increase factor for inference.') add_arg('--model', default='small', type=str, help='Name of the neural network to load/save.') add_arg('--train', default=False, type=str, help='File pattern to load for training.') add_arg('--train-blur', default=None, type=float, help='Sigma value for gaussian blur preprocess.') @@ -50,8 +50,10 @@ add_arg('--batch-size', default=15, type=int, help='Number add_arg('--buffer-size', default=1500, type=int, help='Total image fragments kept in cache.') add_arg('--buffer-similar', default=5, type=int, help='Fragments cached for each image loaded.') add_arg('--learning-rate', default=1E-4, type=float, help='Parameter for the ADAM optimizer.') -add_arg('--learning-period', default=50, type=int, help='How often to decay the learning rate.') +add_arg('--learning-period', default=100, type=int, help='How often to decay the learning rate.') add_arg('--learning-decay', default=0.5, type=float, help='How much to decay the learning rate.') +add_arg('--generator-upscale', default=2, type=int, help='Steps of 2x up-sampling as post-process.') +add_arg('--generator-downscale',default=0, type=int, help='Steps of 2x down-sampling as preprocess.') add_arg('--generator-filters', default=[64], nargs='+', type=int, help='Number of convolution units in network.') add_arg('--generator-blocks', default=4, type=int, help='Number of residual blocks per iteration.') add_arg('--generator-residual', default=2, type=int, help='Number of layers in a residual block.') @@ -59,7 +61,7 @@ add_arg('--perceptual-layer', default='conv2_2', type=str, help='Which add_arg('--perceptual-weight', default=1e0, type=float, help='Weight for VGG-layer perceptual loss.') add_arg('--discriminator-size', default=32, type=int, help='Multiplier for number of filters in D.') add_arg('--smoothness-weight', default=2e5, type=float, help='Weight of the total-variation loss.') -add_arg('--adversary-weight', default=1e2, type=float, help='Weight of adversarial loss compoment.') +add_arg('--adversary-weight', default=5e2, type=float, help='Weight of adversarial loss compoment.') add_arg('--generator-start', default=0, type=int, help='Epoch count to start training generator.') add_arg('--discriminator-start',default=1, type=int, help='Epoch count to update the discriminator.') add_arg('--adversarial-start', default=2, type=int, help='Epoch for generator to use discriminator.') @@ -132,7 +134,7 @@ class DataLoader(threading.Thread): self.data_ready = threading.Event() self.data_copied = threading.Event() - self.orig_shape, self.seed_shape = args.batch_shape, int(args.batch_shape / 2**args.scales) + self.orig_shape, self.seed_shape = args.batch_shape, int(args.batch_shape / args.zoom) self.orig_buffer = np.zeros((args.buffer_size, 3, self.orig_shape, self.orig_shape), dtype=np.float32) self.seed_buffer = np.zeros((args.buffer_size, 3, self.seed_shape, self.seed_shape), dtype=np.float32) @@ -199,9 +201,7 @@ class DataLoader(threading.Thread): for i, j in enumerate(random.sample(self.ready, args.batch_size)): origs_out[i] = self.orig_buffer[j] seeds_out[i] = self.seed_buffer[j] - self.available.add(j) - self.data_copied.set() @@ -269,20 +269,26 @@ class Model(object): def setup_generator(self, input, config): for k, v in config.items(): setattr(args, k, v) + args.zoom = 2**(args.generator_upscale - args.generator_downscale) + units_iter = extend(args.generator_filters) units = next(units_iter) self.make_layer('iter.0-A', input, units, filter_size=(5,5), pad=(2,2)) self.make_layer('iter.0-B', self.last_layer(), units, filter_size=(5,5), pad=(2,2)) self.network['iter.0'] = self.last_layer() + for i in range(0, args.generator_downscale): + self.make_layer('downscale%i'%i, self.last_layer(), next(units_iter), filter_size=(4,4), stride=(2,2)) + + units = next(units_iter) for i in range(0, args.generator_blocks): self.make_block('iter.%i'%(i+1), self.last_layer(), units) - for i in range(0, args.scales): + for i in range(0, args.generator_upscale): u = next(units_iter) - self.make_layer('scale%i.3'%i, self.last_layer(), u*4) - self.network['scale%i.2'%i] = SubpixelReshuffleLayer(self.last_layer(), u, 2) - self.make_layer('scale%i.1'%i, self.last_layer(), u) + self.make_layer('upscale%i.3'%i, self.last_layer(), u*4) + self.network['upscale%i.2'%i] = SubpixelReshuffleLayer(self.last_layer(), u, 2) + self.make_layer('upscale%i.1'%i, self.last_layer(), u) self.network['out'] = ConvLayer(self.last_layer(), 3, filter_size=(5,5), stride=(1,1), pad=(2,2), nonlinearity=lasagne.nonlinearities.tanh) @@ -355,13 +361,14 @@ class Model(object): def save_generator(self): def cast(p): return p.get_value().astype(np.float16) params = {k: [cast(p) for p in l.get_params()] for (k, l) in self.list_generator_layers()} - config = {k: getattr(args, k) for k in ['generator_blocks', 'generator_residual', 'generator_filters']} - filename = 'ne%ix-%s-%s.pkl.bz2' % (2**args.scales, args.model, __version__) + config = {k: getattr(args, k) for k in ['generator_blocks', 'generator_residual', 'generator_filters'] + \ + ['generator_upscale', 'generator_downscale']} + filename = 'ne%ix-%s-%s.pkl.bz2' % (args.zoom, args.model, __version__) pickle.dump((config, params), bz2.open(filename, 'wb')) print(' - Saved model as `{}` after training.'.format(filename)) def load_model(self): - filename = 'ne%ix-%s-%s.pkl.bz2' % (2**args.scales, args.model, __version__) + filename = 'ne%ix-%s-%s.pkl.bz2' % (args.zoom, args.model, __version__) if not os.path.exists(filename): if args.train: return {}, {} error("Model file with pre-trained convolution layers not found. Download it here...", @@ -431,7 +438,7 @@ class Model(object): class NeuralEnhancer(object): - def __init__(self): + def __init__(self, loader): if args.train: print('{}Training {} epochs on random image sections with batch size {}.{}'\ .format(ansi.BLUE_B, args.epochs, args.batch_size, ansi.BLUE)) @@ -440,8 +447,8 @@ class NeuralEnhancer(object): print('{}Enhancing {} image(s) specified on the command-line.{}'\ .format(ansi.BLUE_B, len(args.files), ansi.BLUE)) - self.thread = DataLoader() if args.train else None self.model = Model() + self.thread = DataLoader() if loader else None print('{}'.format(ansi.ENDC)) @@ -466,7 +473,7 @@ class NeuralEnhancer(object): if t_cur % args.learning_period == 0: l_r *= args.learning_decay def train(self): - seed_size = int(args.batch_shape / 2**args.scales) + seed_size = int(args.batch_shape / args.zoom) images = np.zeros((args.batch_size, 3, args.batch_shape, args.batch_shape), dtype=np.float32) seeds = np.zeros((args.batch_size, 3, seed_size, seed_size), dtype=np.float32) learning_rate = self.decay_learning_rate() @@ -512,7 +519,7 @@ class NeuralEnhancer(object): pass print('\n{}Trained {}x super-resolution for {} epochs.{}'\ - .format(ansi.CYAN_B, 2**args.scales, epoch+1, ansi.CYAN)) + .format(ansi.CYAN_B, args.zoom, epoch+1, ansi.CYAN)) self.model.save_generator() print(ansi.ENDC) @@ -524,11 +531,12 @@ class NeuralEnhancer(object): if __name__ == "__main__": - enhancer = NeuralEnhancer() - if args.train: + args.zoom = 2**(args.generator_upscale - args.generator_downscale) + enhancer = NeuralEnhancer(loader=True) enhancer.train() else: + enhancer = NeuralEnhancer(loader=False) for filename in args.files: print(filename) img = scipy.ndimage.imread(filename, mode='RGB') @@ -538,5 +546,5 @@ if __name__ == "__main__": continue out = enhancer.process(img) - out.save(os.path.splitext(filename)[0]+'_ne%ix.png'%(2**args.scales)) + out.save(os.path.splitext(filename)[0]+'_ne%ix.png'%args.zoom) print(ansi.ENDC) From c610623b11bcb3b8b8a07d4ba4bc4ad53a13ed3a Mon Sep 17 00:00:00 2001 From: "Alex J. Champandard" Date: Mon, 31 Oct 2016 21:38:23 +0100 Subject: [PATCH 03/15] Add gradient clipping, helpful for preventing problems with extreme parameters/architectures. --- enhance.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/enhance.py b/enhance.py index 0e325f5..ea907e8 100755 --- a/enhance.py +++ b/enhance.py @@ -428,7 +428,8 @@ class Model(object): disc_losses = [self.loss_discriminator(disc_out)] disc_params = list(itertools.chain(*[l.get_params() for k, l in self.network.items() if 'disc' in k])) print(' - {} tensors learned for discriminator.'.format(len(disc_params))) - disc_updates = lasagne.updates.adam(sum(disc_losses, 0.0), disc_params, learning_rate=self.disc_lr) + grads = T.grad(sum(disc_losses, 0.0), disc_params).clip(-1.0, 1.0) + disc_updates = lasagne.updates.adam(grads, disc_params, learning_rate=self.disc_lr) # Combined Theano function for updating both generator and discriminator at the same time. updates = collections.OrderedDict(list(gen_updates.items()) + list(disc_updates.items())) From cf65207a2e99c847009c66a8e82f718b70fd3715 Mon Sep 17 00:00:00 2001 From: "Alex J. Champandard" Date: Tue, 1 Nov 2016 16:02:18 +0100 Subject: [PATCH 04/15] Use full range of tanh output rather than [-0.5, +0.5], avoids clipping. --- enhance.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/enhance.py b/enhance.py index ea907e8..781efd4 100755 --- a/enhance.py +++ b/enhance.py @@ -186,9 +186,9 @@ class DataLoader(threading.Thread): self.data_copied.clear() i = self.available.pop() - self.orig_buffer[i] = np.transpose(copy / 255.0 - 0.5, (2, 0, 1)) + self.orig_buffer[i] = np.transpose(copy / 127.5 - 1.0, (2, 0, 1)) seed = scipy.misc.imresize(copy, size=(self.seed_shape, self.seed_shape), interp='bilinear') - self.seed_buffer[i] = np.transpose(seed / 255.0 - 0.5, (2, 0, 1)) + self.seed_buffer[i] = np.transpose(seed / 127.5 - 1.0, (2, 0, 1)) self.ready.add(i) if len(self.ready) >= args.batch_size: @@ -298,7 +298,7 @@ class Model(object): """ offset = np.array([103.939, 116.779, 123.680], dtype=np.float32).reshape((1,3,1,1)) - self.network['percept'] = lasagne.layers.NonlinearityLayer(input, lambda x: ((x+0.5).clip(0.0, 1.0)*255.0) - offset) + self.network['percept'] = lasagne.layers.NonlinearityLayer(input, lambda x: ((x+1.0)*127.5) - offset) self.network['mse'] = self.network['percept'] self.network['conv1_1'] = ConvLayer(self.network['percept'], 64, 3, pad=1) @@ -399,7 +399,7 @@ class Model(object): return T.mean(1.0 - T.nnet.softplus(d[args.batch_size:])) def loss_discriminator(self, d): - return T.mean(T.nnet.softminus(d[args.batch_size:]) - T.nnet.softplus(d[:args.batch_size])) + return T.mean(T.nnet.softplus(d[:args.batch_size]) - T.nnet.softminus(d[args.batch_size:])) def compile(self): # Helper function for rendering test images during training, or standalone non-training mode. @@ -454,8 +454,8 @@ class NeuralEnhancer(object): print('{}'.format(ansi.ENDC)) def imsave(self, fn, img): - img = np.transpose(img + 0.5, (1, 2, 0)).clip(0.0, 1.0) - image = scipy.misc.toimage(img * 255.0, cmin=0, cmax=255) + img = np.transpose(img + 1.0, (1, 2, 0)).clip(0.0, 1.0) + image = scipy.misc.toimage(img * 127.5, cmin=0, cmax=255) image.save(fn) def show_progress(self, orign, scald, repro): @@ -525,10 +525,10 @@ class NeuralEnhancer(object): print(ansi.ENDC) def process(self, image): - img = np.transpose(image / 255.0 - 0.5, (2, 0, 1))[np.newaxis].astype(np.float32) + img = np.transpose(image / 127.5 - 1.0, (2, 0, 1))[np.newaxis].astype(np.float32) *_, repro = self.model.predict(img) - repro = np.transpose(repro[0] + 0.5, (1, 2, 0)).clip(0.0, 1.0) - return scipy.misc.toimage(repro * 255.0, cmin=0, cmax=255) + repro = np.transpose(repro[0] + 1.0, (1, 2, 0)) + return scipy.misc.toimage(repro * 127.5, cmin=0, cmax=255) if __name__ == "__main__": From 11ba505252b41bfee8273391a5aed0fadba84f29 Mon Sep 17 00:00:00 2001 From: "Alex J. Champandard" Date: Tue, 1 Nov 2016 16:04:33 +0100 Subject: [PATCH 05/15] Fix for gradient clipping code. --- enhance.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/enhance.py b/enhance.py index 781efd4..9998f89 100755 --- a/enhance.py +++ b/enhance.py @@ -428,7 +428,7 @@ class Model(object): disc_losses = [self.loss_discriminator(disc_out)] disc_params = list(itertools.chain(*[l.get_params() for k, l in self.network.items() if 'disc' in k])) print(' - {} tensors learned for discriminator.'.format(len(disc_params))) - grads = T.grad(sum(disc_losses, 0.0), disc_params).clip(-1.0, 1.0) + grads = [g.clip(-1.0, +1.0) for g in T.grad(sum(disc_losses, 0.0), disc_params)] disc_updates = lasagne.updates.adam(grads, disc_params, learning_rate=self.disc_lr) # Combined Theano function for updating both generator and discriminator at the same time. From 93e5a41d9a2088d6ffa474a0741ff0b8f19e6f02 Mon Sep 17 00:00:00 2001 From: "Alex J. Champandard" Date: Tue, 1 Nov 2016 20:40:10 +0100 Subject: [PATCH 06/15] Fix and optimize pre-processing of images. --- enhance.py | 47 +++++++++++++++++++++++++++-------------------- 1 file changed, 27 insertions(+), 20 deletions(-) diff --git a/enhance.py b/enhance.py index 9998f89..2662ebb 100755 --- a/enhance.py +++ b/enhance.py @@ -39,8 +39,8 @@ add_arg('files', nargs='*', default=[]) add_arg('--zoom', default=4, type=int, help='Resolution increase factor for inference.') add_arg('--model', default='small', type=str, help='Name of the neural network to load/save.') add_arg('--train', default=False, type=str, help='File pattern to load for training.') -add_arg('--train-blur', default=None, type=float, help='Sigma value for gaussian blur preprocess.') -add_arg('--train-noise', default=None, type=float, help='Sigma of normal distribution in preproc.') +add_arg('--train-blur', default=None, type=int, help='Sigma value for gaussian blur preprocess.') +add_arg('--train-noise', default=None, type=float, help='Radius for preprocessing gaussian blur.') add_arg('--train-jpeg', default=None, type=int, help='JPEG compression level in preprocessing.') add_arg('--epochs', default=10, type=int, help='Total number of iterations in training.') add_arg('--epoch-size', default=72, type=int, help='Number of batches trained in an epoch.') @@ -158,8 +158,10 @@ class DataLoader(threading.Thread): def add_to_buffer(self, f): filename = os.path.join(self.cwd, f) try: - img = scipy.ndimage.imread(filename, mode='RGB').astype(np.float32) - if img.shape[0] < args.batch_shape or img.shape[1] < args.batch_shape: + orig = PIL.Image.open(filename).convert('RGB') + if all(s > args.batch_shape * 2 for s in orig.size): + orig = orig.resize((orig.size[0]//2, orig.size[1]//2), resample=PIL.Image.LANCZOS) + if any(s < args.batch_shape * 2 for s in orig.size): raise ValueError('Image is too small for training with size {}'.format(img.shape)) except Exception as e: warn('Could not load `{}` as image.'.format(filename), @@ -167,28 +169,35 @@ class DataLoader(threading.Thread): self.files.remove(f) return - img = scipy.ndimage.gaussian_blur(img, sigma=args.train_blur) if args.train_blur else img - img += scipy.random.normal(scale=args.train_noise) if args.train_noise else 0.0 + seed = orig.filter(PIL.ImageFilter.GaussianBlur(radius=args.train_blur)) if args.train_blur else orig + seed = seed.resize((orig.size[0]//args.zoom, orig.size[1]//args.zoom), resample=PIL.Image.LANCZOS) + seed = scipy.misc.fromimage(seed).astype(np.float32) + seed += scipy.random.normal(scale=args.train_noise, size=(seed.shape[0], seed.shape[1], 1)) if args.train_noise else 0.0 + + """ if args.train_jpeg: buffer = io.BytesIO() - scipy.misc.toimage(img, cmin=0, cmax=255).save(buffer, format='jpeg', quality=args.train_jpeg) + scipy.misc.toimage(seed, cmin=0, cmax=255).save(buffer, format='jpeg', quality=args.train_jpeg) with PIL.Image.open(buffer) as compressed: img = scipy.misc.fromimage(compressed, mode='RGB') + """ + + orig = scipy.misc.fromimage(orig).astype(np.float32) for _ in range(args.buffer_similar): - copy = img[:,::-1] if random.choice([True, False]) else img - h = random.randint(0, copy.shape[0] - self.orig_shape) - w = random.randint(0, copy.shape[1] - self.orig_shape) - copy = copy[h:h+self.orig_shape, w:w+self.orig_shape] + h = random.randint(0, seed.shape[0] - self.seed_shape) + w = random.randint(0, seed.shape[1] - self.seed_shape) + seed_chunk = seed[h:h+self.seed_shape, w:w+self.seed_shape] + h, w = h * args.zoom, w * args.zoom + orig_chunk = orig[h:h+self.orig_shape, w:w+self.orig_shape] while len(self.available) == 0: self.data_copied.wait() self.data_copied.clear() i = self.available.pop() - self.orig_buffer[i] = np.transpose(copy / 127.5 - 1.0, (2, 0, 1)) - seed = scipy.misc.imresize(copy, size=(self.seed_shape, self.seed_shape), interp='bilinear') - self.seed_buffer[i] = np.transpose(seed / 127.5 - 1.0, (2, 0, 1)) + self.orig_buffer[i] = np.transpose(orig_chunk.astype(np.float32) / 127.5 - 1.0, (2, 0, 1)) + self.seed_buffer[i] = np.transpose(seed_chunk.astype(np.float32) / 127.5 - 1.0, (2, 0, 1)) self.ready.add(i) if len(self.ready) >= args.batch_size: @@ -399,7 +408,7 @@ class Model(object): return T.mean(1.0 - T.nnet.softplus(d[args.batch_size:])) def loss_discriminator(self, d): - return T.mean(T.nnet.softplus(d[:args.batch_size]) - T.nnet.softminus(d[args.batch_size:])) + return T.mean(T.nnet.softminus(d[args.batch_size:]) - T.nnet.softplus(d[:args.batch_size])) def compile(self): # Helper function for rendering test images during training, or standalone non-training mode. @@ -454,9 +463,7 @@ class NeuralEnhancer(object): print('{}'.format(ansi.ENDC)) def imsave(self, fn, img): - img = np.transpose(img + 1.0, (1, 2, 0)).clip(0.0, 1.0) - image = scipy.misc.toimage(img * 127.5, cmin=0, cmax=255) - image.save(fn) + scipy.misc.toimage(np.transpose(img + 1.0, (1, 2, 0)) * 127.5, cmin=0, cmax=255).save(fn) def show_progress(self, orign, scald, repro): os.makedirs('valid', exist_ok=True) @@ -503,7 +510,7 @@ class NeuralEnhancer(object): stats /= args.epoch_size totals, labels = [sum(total)] + list(total), ['total', 'prcpt', 'smthn', 'advrs'] gen_info = ['{}{}{}={:4.2e}'.format(ansi.WHITE_B, k, ansi.ENDC, v) for k, v in zip(labels, totals)] - print('\rEpoch #{} at {:4.1f}s, lr={:4.2e} {}'.format(epoch+1, time.time()-start, l_r, ' '*args.epoch_size)) + print('\rEpoch #{} at {:4.1f}s, lr={:4.2e}{}'.format(epoch+1, time.time()-start, l_r, ' '*(args.epoch_size-60))) print(' - generator {}'.format(' '.join(gen_info))) real, fake = stats[:args.batch_size], stats[args.batch_size:] @@ -547,5 +554,5 @@ if __name__ == "__main__": continue out = enhancer.process(img) - out.save(os.path.splitext(filename)[0]+'_ne%ix.png'%args.zoom) + out.save(os.path.splitext(filename)[0]+'_ne%ix.png' % args.zoom) print(ansi.ENDC) From 7924cc4a856206c17529b7236ea4588c44341a64 Mon Sep 17 00:00:00 2001 From: "Alex J. Champandard" Date: Wed, 2 Nov 2016 10:16:31 +0100 Subject: [PATCH 07/15] Improve display and filenames for saving output. --- enhance.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/enhance.py b/enhance.py index 2662ebb..882fa68 100755 --- a/enhance.py +++ b/enhance.py @@ -14,7 +14,7 @@ # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # -__version__ = '0.1' +__version__ = '0.2' import io import os @@ -159,10 +159,10 @@ class DataLoader(threading.Thread): filename = os.path.join(self.cwd, f) try: orig = PIL.Image.open(filename).convert('RGB') - if all(s > args.batch_shape * 2 for s in orig.size): - orig = orig.resize((orig.size[0]//2, orig.size[1]//2), resample=PIL.Image.LANCZOS) - if any(s < args.batch_shape * 2 for s in orig.size): - raise ValueError('Image is too small for training with size {}'.format(img.shape)) + # if all(s > args.batch_shape * 2 for s in orig.size): + # orig = orig.resize((orig.size[0]//2, orig.size[1]//2), resample=PIL.Image.LANCZOS) + if any(s < args.batch_shape for s in orig.size): + raise ValueError('Image is too small for training with size {}'.format(orig.size)) except Exception as e: warn('Could not load `{}` as image.'.format(filename), ' - Try fixing or removing the file before next run.') @@ -468,9 +468,9 @@ class NeuralEnhancer(object): def show_progress(self, orign, scald, repro): os.makedirs('valid', exist_ok=True) for i in range(args.batch_size): - self.imsave('valid/%03i_origin.png' % i, orign[i]) - self.imsave('valid/%03i_pixels.png' % i, scald[i]) - self.imsave('valid/%03i_reprod.png' % i, repro[i]) + self.imsave('valid/%s_%03i_origin.png' % (args.model, i), orign[i]) + self.imsave('valid/%s_%03i_pixels.png' % (args.model, i), scald[i]) + self.imsave('valid/%s_%03i_reprod.png' % (args.model, i), repro[i]) def decay_learning_rate(self): l_r, t_cur = args.learning_rate, 0 @@ -510,7 +510,7 @@ class NeuralEnhancer(object): stats /= args.epoch_size totals, labels = [sum(total)] + list(total), ['total', 'prcpt', 'smthn', 'advrs'] gen_info = ['{}{}{}={:4.2e}'.format(ansi.WHITE_B, k, ansi.ENDC, v) for k, v in zip(labels, totals)] - print('\rEpoch #{} at {:4.1f}s, lr={:4.2e}{}'.format(epoch+1, time.time()-start, l_r, ' '*(args.epoch_size-60))) + print('\rEpoch #{} at {:4.1f}s, lr={:4.2e}{}'.format(epoch+1, time.time()-start, l_r, ' '*(args.epoch_size-35))) print(' - generator {}'.format(' '.join(gen_info))) real, fake = stats[:args.batch_size], stats[args.batch_size:] From 3b2a6b9d8de12a47aec82c1e873443383f7bba47 Mon Sep 17 00:00:00 2001 From: "Alex J. Champandard" Date: Wed, 2 Nov 2016 12:08:07 +0100 Subject: [PATCH 08/15] Add extra padding on input to avoid zero-padding. Experiment with training values from ENet (segmentation). --- enhance.py | 34 ++++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/enhance.py b/enhance.py index 882fa68..60128cf 100755 --- a/enhance.py +++ b/enhance.py @@ -46,10 +46,10 @@ add_arg('--epochs', default=10, type=int, help='Total add_arg('--epoch-size', default=72, type=int, help='Number of batches trained in an epoch.') add_arg('--save-every', default=10, type=int, help='Save generator after every training epoch.') add_arg('--batch-shape', default=192, type=int, help='Resolution of images in training batch.') -add_arg('--batch-size', default=15, type=int, help='Number of images per training batch.') +add_arg('--batch-size', default=10, type=int, help='Number of images per training batch.') add_arg('--buffer-size', default=1500, type=int, help='Total image fragments kept in cache.') add_arg('--buffer-similar', default=5, type=int, help='Fragments cached for each image loaded.') -add_arg('--learning-rate', default=1E-4, type=float, help='Parameter for the ADAM optimizer.') +add_arg('--learning-rate', default=5E-4, type=float, help='Parameter for the ADAM optimizer.') add_arg('--learning-period', default=100, type=int, help='How often to decay the learning rate.') add_arg('--learning-decay', default=0.5, type=float, help='How much to decay the learning rate.') add_arg('--generator-upscale', default=2, type=int, help='Steps of 2x up-sampling as post-process.') @@ -248,14 +248,25 @@ class Model(object): config, params = self.load_model() self.setup_generator(self.last_layer(), config) + # Compute batch-size to take into account there's no zero-padding of generator convolution layers. + s = args.batch_shape // args.zoom + current = lasagne.layers.helper.get_output_shape(self.network['out'], {self.network['seed']: (1, 3, s, s)}) + args.batch_shape = args.batch_shape * 2 - current[2] + + self.network['img'].shape = (args.batch_size, 3, args.batch_shape, args.batch_shape) + self.network['seed'].shape = (args.batch_size, 3, args.batch_shape // args.zoom, args.batch_shape // args.zoom) + # How to re-force this to compute more elegantly using Lasagne? + self.network['out'].input_shape = lasagne.layers.get_output_shape(self.network['out'].input_layer, + {self.network['seed']: self.network['seed'].shape}) + if args.train: - concatenated = lasagne.layers.ConcatLayer([self.network['img'], self.network['out']], axis=0) + concatenated = lasagne.layers.ConcatLayer([self.network['img'], self.network['out']], + axis=0, cropping=(None, None, 'center', 'center')) self.setup_perceptual(concatenated) self.load_perceptual() self.setup_discriminator() self.load_generator(params) - self.compile() #------------------------------------------------------------------------------------------------------------------ # Network Configuration @@ -265,7 +276,7 @@ class Model(object): return list(self.network.values())[-1] def make_layer(self, name, input, units, filter_size=(3,3), stride=(1,1), pad=(1,1), alpha=0.25): - conv = ConvLayer(input, units, filter_size=filter_size, stride=stride, pad=pad, nonlinearity=None) + conv = ConvLayer(input, units, filter_size, stride=stride, pad=self.pad_override or pad, nonlinearity=None) prelu = lasagne.layers.ParametricRectifierLayer(conv, alpha=lasagne.init.Constant(alpha)) self.network[name+'x'] = conv self.network[name+'>'] = prelu @@ -277,6 +288,7 @@ class Model(object): return ElemwiseSumLayer([input, self.last_layer()]) if args.generator_residual else self.last_layer() def setup_generator(self, input, config): + self.pad_override = (0, 0) for k, v in config.items(): setattr(args, k, v) args.zoom = 2**(args.generator_upscale - args.generator_downscale) @@ -301,6 +313,7 @@ class Model(object): self.network['out'] = ConvLayer(self.last_layer(), 3, filter_size=(5,5), stride=(1,1), pad=(2,2), nonlinearity=lasagne.nonlinearities.tanh) + self.pad_override = None def setup_perceptual(self, input): """Use lasagne to create a network of convolution layers using pre-trained VGG19 weights. @@ -405,13 +418,13 @@ class Model(object): return T.mean(((x[:,:,:-1,:-1] - x[:,:,1:,:-1])**2 + (x[:,:,:-1,:-1] - x[:,:,:-1,1:])**2)**1.25) def loss_adversarial(self, d): - return T.mean(1.0 - T.nnet.softplus(d[args.batch_size:])) + return T.mean(1.0 - T.nnet.softminus(d[args.batch_size:])) def loss_discriminator(self, d): return T.mean(T.nnet.softminus(d[args.batch_size:]) - T.nnet.softplus(d[:args.batch_size])) def compile(self): - # Helper function for rendering test images during training, or standalone non-training mode. + # Helper function for rendering test images during training, or standalone inference mode. input_tensor, seed_tensor = T.tensor4(), T.tensor4() input_layers = {self.network['img']: input_tensor, self.network['seed']: seed_tensor} output = lasagne.layers.get_output([self.network[k] for k in ['seed', 'out']], input_layers, deterministic=True) @@ -437,7 +450,7 @@ class Model(object): disc_losses = [self.loss_discriminator(disc_out)] disc_params = list(itertools.chain(*[l.get_params() for k, l in self.network.items() if 'disc' in k])) print(' - {} tensors learned for discriminator.'.format(len(disc_params))) - grads = [g.clip(-1.0, +1.0) for g in T.grad(sum(disc_losses, 0.0), disc_params)] + grads = [g.clip(-5.0, +5.0) for g in T.grad(sum(disc_losses, 0.0), disc_params)] disc_updates = lasagne.updates.adam(grads, disc_params, learning_rate=self.disc_lr) # Combined Theano function for updating both generator and discriminator at the same time. @@ -459,6 +472,7 @@ class NeuralEnhancer(object): self.model = Model() self.thread = DataLoader() if loader else None + self.model.compile() print('{}'.format(ansi.ENDC)) @@ -476,7 +490,7 @@ class NeuralEnhancer(object): l_r, t_cur = args.learning_rate, 0 while True: - yield l_r if t_cur > 0 else l_r * 0.1 + yield l_r t_cur += 1 if t_cur % args.learning_period == 0: l_r *= args.learning_decay @@ -510,7 +524,7 @@ class NeuralEnhancer(object): stats /= args.epoch_size totals, labels = [sum(total)] + list(total), ['total', 'prcpt', 'smthn', 'advrs'] gen_info = ['{}{}{}={:4.2e}'.format(ansi.WHITE_B, k, ansi.ENDC, v) for k, v in zip(labels, totals)] - print('\rEpoch #{} at {:4.1f}s, lr={:4.2e}{}'.format(epoch+1, time.time()-start, l_r, ' '*(args.epoch_size-35))) + print('\rEpoch #{} at {:4.1f}s, lr={:4.2e}{}'.format(epoch+1, time.time()-start, l_r, ' '*(args.epoch_size-30))) print(' - generator {}'.format(' '.join(gen_info))) real, fake = stats[:args.batch_size], stats[args.batch_size:] From 90c0b7ea43a5a84f33daa1bf7720e4755e4842e8 Mon Sep 17 00:00:00 2001 From: "Alex J. Champandard" Date: Wed, 2 Nov 2016 13:03:00 +0100 Subject: [PATCH 09/15] Fix padding code, more reliable for specific upscale/downscale combinations. --- enhance.py | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/enhance.py b/enhance.py index 60128cf..8c702df 100755 --- a/enhance.py +++ b/enhance.py @@ -45,7 +45,7 @@ add_arg('--train-jpeg', default=None, type=int, help='JPEG c add_arg('--epochs', default=10, type=int, help='Total number of iterations in training.') add_arg('--epoch-size', default=72, type=int, help='Number of batches trained in an epoch.') add_arg('--save-every', default=10, type=int, help='Save generator after every training epoch.') -add_arg('--batch-shape', default=192, type=int, help='Resolution of images in training batch.') +add_arg('--batch-shape', default=256, type=int, help='Resolution of images in training batch.') add_arg('--batch-size', default=10, type=int, help='Number of images per training batch.') add_arg('--buffer-size', default=1500, type=int, help='Total image fragments kept in cache.') add_arg('--buffer-similar', default=5, type=int, help='Fragments cached for each image loaded.') @@ -171,16 +171,14 @@ class DataLoader(threading.Thread): seed = orig.filter(PIL.ImageFilter.GaussianBlur(radius=args.train_blur)) if args.train_blur else orig seed = seed.resize((orig.size[0]//args.zoom, orig.size[1]//args.zoom), resample=PIL.Image.LANCZOS) - seed = scipy.misc.fromimage(seed).astype(np.float32) - seed += scipy.random.normal(scale=args.train_noise, size=(seed.shape[0], seed.shape[1], 1)) if args.train_noise else 0.0 - """ if args.train_jpeg: buffer = io.BytesIO() - scipy.misc.toimage(seed, cmin=0, cmax=255).save(buffer, format='jpeg', quality=args.train_jpeg) - with PIL.Image.open(buffer) as compressed: - img = scipy.misc.fromimage(compressed, mode='RGB') - """ + seed.save(buffer, format='jpeg', quality=args.train_jpeg+random.randrange(-15,+15)) + seed = PIL.Image.open(buffer) + + seed = scipy.misc.fromimage(seed, mode='RGB').astype(np.float32) + seed += scipy.random.normal(scale=args.train_noise, size=(seed.shape[0], seed.shape[1], 1)) if args.train_noise else 0.0 orig = scipy.misc.fromimage(orig).astype(np.float32) @@ -251,13 +249,13 @@ class Model(object): # Compute batch-size to take into account there's no zero-padding of generator convolution layers. s = args.batch_shape // args.zoom current = lasagne.layers.helper.get_output_shape(self.network['out'], {self.network['seed']: (1, 3, s, s)}) - args.batch_shape = args.batch_shape * 2 - current[2] + args.batch_shape += int(args.batch_shape - current[2]) self.network['img'].shape = (args.batch_size, 3, args.batch_shape, args.batch_shape) self.network['seed'].shape = (args.batch_size, 3, args.batch_shape // args.zoom, args.batch_shape // args.zoom) # How to re-force this to compute more elegantly using Lasagne? self.network['out'].input_shape = lasagne.layers.get_output_shape(self.network['out'].input_layer, - {self.network['seed']: self.network['seed'].shape}) + {self.network['seed']: self.network['seed'].shape}) if args.train: concatenated = lasagne.layers.ConcatLayer([self.network['img'], self.network['out']], @@ -311,8 +309,8 @@ class Model(object): self.network['upscale%i.2'%i] = SubpixelReshuffleLayer(self.last_layer(), u, 2) self.make_layer('upscale%i.1'%i, self.last_layer(), u) - self.network['out'] = ConvLayer(self.last_layer(), 3, filter_size=(5,5), stride=(1,1), pad=(2,2), - nonlinearity=lasagne.nonlinearities.tanh) + self.network['out'] = ConvLayer(self.last_layer(), 3, filter_size=(3,3), stride=(1,1), + pad=self.pad_override or (1,1), nonlinearity=lasagne.nonlinearities.tanh) self.pad_override = None def setup_perceptual(self, input): @@ -495,7 +493,7 @@ class NeuralEnhancer(object): if t_cur % args.learning_period == 0: l_r *= args.learning_decay def train(self): - seed_size = int(args.batch_shape / args.zoom) + seed_size = args.batch_shape // args.zoom images = np.zeros((args.batch_size, 3, args.batch_shape, args.batch_shape), dtype=np.float32) seeds = np.zeros((args.batch_size, 3, seed_size, seed_size), dtype=np.float32) learning_rate = self.decay_learning_rate() From d18c08f1b59c95d15db03ba01141c2a7bb30d903 Mon Sep 17 00:00:00 2001 From: "Alex J. Champandard" Date: Wed, 2 Nov 2016 21:39:34 +0100 Subject: [PATCH 10/15] Integrated reflection padding instead of zero padding for extra quality during training and inference. --- enhance.py | 57 +++++++++++++++++++++++++++++++++--------------------- 1 file changed, 35 insertions(+), 22 deletions(-) diff --git a/enhance.py b/enhance.py index 8c702df..f54e062 100755 --- a/enhance.py +++ b/enhance.py @@ -236,6 +236,34 @@ class SubpixelReshuffleLayer(lasagne.layers.Layer): return out +class ReflectLayer(lasagne.layers.Layer): + """Based on more code by ajbrock: https://gist.github.com/ajbrock/a3858c26282d9731191901b397b3ce9f + """ + + def __init__(self, incoming, pad, batch_ndim=2, **kwargs): + super(ReflectLayer, self).__init__(incoming, **kwargs) + self.pad = (pad, pad) + self.batch_ndim = batch_ndim + + def get_output_shape_for(self, input_shape): + output_shape = list(input_shape) + for k, p in enumerate(self.pad): + if output_shape[k + self.batch_ndim] is None: continue + l, r = p, p + output_shape[k + self.batch_ndim] += l + r + return tuple(output_shape) + + def get_output_for(self, x, **kwargs): + out = T.zeros(self.get_output_shape_for(x.shape)) + p0, p1 = self.pad + out = T.set_subtensor(out[:,:,:p0,p1:-p1], x[:,:,p0:0:-1,:]) + out = T.set_subtensor(out[:,:,-p0:,p1:-p1], x[:,:,-2:-(2+p0):-1,:]) + out = T.set_subtensor(out[:,:,p0:-p0,p1:-p1], x) + out = T.set_subtensor(out[:,:,:,:p1], out[:,:,:,(2*p1):p1:-1]) + out = T.set_subtensor(out[:,:,:,-p1:], out[:,:,:,-(p1+2):-(2*p1+2):-1]) + return out + + class Model(object): def __init__(self): @@ -246,25 +274,13 @@ class Model(object): config, params = self.load_model() self.setup_generator(self.last_layer(), config) - # Compute batch-size to take into account there's no zero-padding of generator convolution layers. - s = args.batch_shape // args.zoom - current = lasagne.layers.helper.get_output_shape(self.network['out'], {self.network['seed']: (1, 3, s, s)}) - args.batch_shape += int(args.batch_shape - current[2]) - - self.network['img'].shape = (args.batch_size, 3, args.batch_shape, args.batch_shape) - self.network['seed'].shape = (args.batch_size, 3, args.batch_shape // args.zoom, args.batch_shape // args.zoom) - # How to re-force this to compute more elegantly using Lasagne? - self.network['out'].input_shape = lasagne.layers.get_output_shape(self.network['out'].input_layer, - {self.network['seed']: self.network['seed'].shape}) - if args.train: - concatenated = lasagne.layers.ConcatLayer([self.network['img'], self.network['out']], - axis=0, cropping=(None, None, 'center', 'center')) + concatenated = lasagne.layers.ConcatLayer([self.network['img'], self.network['out']], axis=0) self.setup_perceptual(concatenated) self.load_perceptual() self.setup_discriminator() self.load_generator(params) - + self.compile() #------------------------------------------------------------------------------------------------------------------ # Network Configuration @@ -274,7 +290,8 @@ class Model(object): return list(self.network.values())[-1] def make_layer(self, name, input, units, filter_size=(3,3), stride=(1,1), pad=(1,1), alpha=0.25): - conv = ConvLayer(input, units, filter_size, stride=stride, pad=self.pad_override or pad, nonlinearity=None) + reflected = ReflectLayer(input, pad=pad[0]) if pad[0] > 0 else input + conv = ConvLayer(reflected, units, filter_size, stride=stride, pad=(0,0), nonlinearity=None) prelu = lasagne.layers.ParametricRectifierLayer(conv, alpha=lasagne.init.Constant(alpha)) self.network[name+'x'] = conv self.network[name+'>'] = prelu @@ -286,7 +303,6 @@ class Model(object): return ElemwiseSumLayer([input, self.last_layer()]) if args.generator_residual else self.last_layer() def setup_generator(self, input, config): - self.pad_override = (0, 0) for k, v in config.items(): setattr(args, k, v) args.zoom = 2**(args.generator_upscale - args.generator_downscale) @@ -309,14 +325,12 @@ class Model(object): self.network['upscale%i.2'%i] = SubpixelReshuffleLayer(self.last_layer(), u, 2) self.make_layer('upscale%i.1'%i, self.last_layer(), u) - self.network['out'] = ConvLayer(self.last_layer(), 3, filter_size=(3,3), stride=(1,1), - pad=self.pad_override or (1,1), nonlinearity=lasagne.nonlinearities.tanh) - self.pad_override = None + self.network['out'] = ConvLayer(self.last_layer(), 3, filter_size=(3,3), stride=(1,1), pad=(1,1), + nonlinearity=lasagne.nonlinearities.tanh) def setup_perceptual(self, input): """Use lasagne to create a network of convolution layers using pre-trained VGG19 weights. """ - offset = np.array([103.939, 116.779, 123.680], dtype=np.float32).reshape((1,3,1,1)) self.network['percept'] = lasagne.layers.NonlinearityLayer(input, lambda x: ((x+1.0)*127.5) - offset) @@ -468,9 +482,8 @@ class NeuralEnhancer(object): print('{}Enhancing {} image(s) specified on the command-line.{}'\ .format(ansi.BLUE_B, len(args.files), ansi.BLUE)) - self.model = Model() self.thread = DataLoader() if loader else None - self.model.compile() + self.model = Model() print('{}'.format(ansi.ENDC)) From 095fe42dc3e9b02b37a78cfce702783b25d1b957 Mon Sep 17 00:00:00 2001 From: "Alex J. Champandard" Date: Thu, 3 Nov 2016 10:35:43 +0100 Subject: [PATCH 11/15] Add tiled rendering, currently with no padding for each tile. --- enhance.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/enhance.py b/enhance.py index f54e062..869a2c7 100755 --- a/enhance.py +++ b/enhance.py @@ -557,10 +557,14 @@ class NeuralEnhancer(object): print(ansi.ENDC) def process(self, image): - img = np.transpose(image / 127.5 - 1.0, (2, 0, 1))[np.newaxis].astype(np.float32) - *_, repro = self.model.predict(img) - repro = np.transpose(repro[0] + 1.0, (1, 2, 0)) - return scipy.misc.toimage(repro * 127.5, cmin=0, cmax=255) + s, z = args.batch_shape, args.zoom + output = np.zeros((image.shape[0] * z, image.shape[1] * z, 3), dtype=np.float32) + for y, x in itertools.product(range(0, image.shape[0], s), range(0, image.shape[1], s)): + img = np.transpose(image[y:y+s,x:x+s,:] / 127.5 - 1.0, (2, 0, 1))[np.newaxis].astype(np.float32) + *_, repro = self.model.predict(img) + output[y*z:(y+s)*z,x*z:(x+s)*z,:] = np.transpose(repro[0] + 1.0, (1, 2, 0)) + print('.', end='') + return scipy.misc.toimage(output * 127.5, cmin=0, cmax=255) if __name__ == "__main__": @@ -571,13 +575,9 @@ if __name__ == "__main__": else: enhancer = NeuralEnhancer(loader=False) for filename in args.files: - print(filename) + print(filename, end=' ') img = scipy.ndimage.imread(filename, mode='RGB') - if img.shape[0] * img.shape[1] > 256 ** 2 and args.scales >= 2: - error('This file is (probably) too large to process in one shot and was ignored.', - ' - Until tiled rendering is added, edit this code at your own peril!') - continue - out = enhancer.process(img) out.save(os.path.splitext(filename)[0]+'_ne%ix.png' % args.zoom) + print(flush=True) print(ansi.ENDC) From ac4967641505d40067e4dbab387da906b5b8b590 Mon Sep 17 00:00:00 2001 From: "Alex J. Champandard" Date: Thu, 3 Nov 2016 19:26:55 +0100 Subject: [PATCH 12/15] Add tiled rendering with padding, no feather-blending but looks good enough. --- enhance.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/enhance.py b/enhance.py index 869a2c7..3307f06 100755 --- a/enhance.py +++ b/enhance.py @@ -37,6 +37,8 @@ parser = argparse.ArgumentParser(description='Generate a new image by applying s add_arg = parser.add_argument add_arg('files', nargs='*', default=[]) add_arg('--zoom', default=4, type=int, help='Resolution increase factor for inference.') +add_arg('--rendering-tile', default=128, type=int, help='Size of tiles used for rendering images.') +add_arg('--rendering-overlap', default=32, type=int, help='Number of pixels padding around each tile.') add_arg('--model', default='small', type=str, help='Name of the neural network to load/save.') add_arg('--train', default=False, type=str, help='File pattern to load for training.') add_arg('--train-blur', default=None, type=int, help='Sigma value for gaussian blur preprocess.') @@ -45,7 +47,7 @@ add_arg('--train-jpeg', default=None, type=int, help='JPEG c add_arg('--epochs', default=10, type=int, help='Total number of iterations in training.') add_arg('--epoch-size', default=72, type=int, help='Number of batches trained in an epoch.') add_arg('--save-every', default=10, type=int, help='Save generator after every training epoch.') -add_arg('--batch-shape', default=256, type=int, help='Resolution of images in training batch.') +add_arg('--batch-shape', default=192, type=int, help='Resolution of images in training batch.') add_arg('--batch-size', default=10, type=int, help='Number of images per training batch.') add_arg('--buffer-size', default=1500, type=int, help='Total image fragments kept in cache.') add_arg('--buffer-similar', default=5, type=int, help='Fragments cached for each image loaded.') @@ -556,13 +558,14 @@ class NeuralEnhancer(object): self.model.save_generator() print(ansi.ENDC) - def process(self, image): - s, z = args.batch_shape, args.zoom - output = np.zeros((image.shape[0] * z, image.shape[1] * z, 3), dtype=np.float32) - for y, x in itertools.product(range(0, image.shape[0], s), range(0, image.shape[1], s)): - img = np.transpose(image[y:y+s,x:x+s,:] / 127.5 - 1.0, (2, 0, 1))[np.newaxis].astype(np.float32) + def process(self, original): + s, p, z = args.rendering_tile, args.rendering_overlap, args.zoom + image = np.pad(original, ((p*z, p*z), (p*z, p*z), (0, 0)), mode='reflect') + output = np.zeros((original.shape[0] * z, original.shape[1] * z, 3), dtype=np.float32) + for y, x in itertools.product(range(0, original.shape[0], s), range(0, original.shape[1], s)): + img = np.transpose(image[y:y+p*2+s,x:x+p*2+s,:] / 127.5 - 1.0, (2, 0, 1))[np.newaxis].astype(np.float32) *_, repro = self.model.predict(img) - output[y*z:(y+s)*z,x*z:(x+s)*z,:] = np.transpose(repro[0] + 1.0, (1, 2, 0)) + output[y*z:(y+s)*z,x*z:(x+s)*z,:] = np.transpose(repro[0] + 1.0, (1, 2, 0))[p*z:-p*z,p*z:-p*z,:] print('.', end='') return scipy.misc.toimage(output * 127.5, cmin=0, cmax=255) From cabaaeeefe310e1cfcdb7160cbf89eaf7a6bee6e Mon Sep 17 00:00:00 2001 From: "Alex J. Champandard" Date: Thu, 3 Nov 2016 20:12:58 +0100 Subject: [PATCH 13/15] Add training scripts for networks currently being trained, for release v0.2. --- scripts/small-1x.sh | 18 ++++++++++++++++++ scripts/small-2x.sh | 18 ++++++++++++++++++ scripts/small-4x.sh | 18 ++++++++++++++++++ 3 files changed, 54 insertions(+) create mode 100644 scripts/small-1x.sh create mode 100644 scripts/small-2x.sh create mode 100644 scripts/small-4x.sh diff --git a/scripts/small-1x.sh b/scripts/small-1x.sh new file mode 100644 index 0000000..1e60c92 --- /dev/null +++ b/scripts/small-1x.sh @@ -0,0 +1,18 @@ +#!/bin/sh + +python3.4 enhance.py \ + --train "$OPEN_IMAGES_PATH/*/*.jpg" --model small \ + --epochs=50 --batch-shape=192 --device=gpu0 \ + --generator-downscale=2 --generator-upscale=2 \ + --generator-blocks=8 --generator-filters=64 \ + --perceptual-layer=conv2_2 --smoothness-weight=1e7 --adversary-weight=0.0 \ + --train-blur=3 --train-noise=5.0 + +python3.4 enhance.py \ + --train "$OPEN_IMAGES_PATH/*/*.jpg" --model small \ + --epochs=500 --batch-shape=192 --device=gpu0 \ + --generator-downscale=2 --generator-upscale=2 \ + --perceptual-layer=conv5_2 --smoothness-weight=2e4 --adversary-weight=2e2 \ + --generator-start=5 --discriminator-start=0 --adversarial-start=5 \ + --discriminator-size=32 \ + --train-blur=3 --train-noise=5.0 diff --git a/scripts/small-2x.sh b/scripts/small-2x.sh new file mode 100644 index 0000000..2e8bdc2 --- /dev/null +++ b/scripts/small-2x.sh @@ -0,0 +1,18 @@ +#!/bin/sh + +python3.4 enhance.py \ + --train "$OPEN_IMAGES_PATH/*/*.jpg" --model small \ + --epochs=50 --batch-shape=192 --device=gpu0 \ + --generator-downscale=1 --generator-upscale=2 \ + --generator-blocks=8 --generator-filters=64 \ + --perceptual-layer=conv2_2 --smoothness-weight=1e7 --adversary-weight=0.0 \ + --train-blur=2 --train-noise=4.0 + +python3.4 enhance.py \ + --train "$OPEN_IMAGES_PATH/*/*.jpg" --model small \ + --epochs=500 --batch-shape=192 --device=gpu0 \ + --generator-downscale=1 --generator-upscale=2 \ + --perceptual-layer=conv5_2 --smoothness-weight=2e4 --adversary-weight=2e2 \ + --generator-start=5 --discriminator-start=0 --adversarial-start=5 \ + --discriminator-size=32 \ + --train-blur=2 --train-noise=4.0 diff --git a/scripts/small-4x.sh b/scripts/small-4x.sh new file mode 100644 index 0000000..17d0493 --- /dev/null +++ b/scripts/small-4x.sh @@ -0,0 +1,18 @@ +#!/bin/sh + +python3.4 enhance.py \ + --train "$OPEN_IMAGES_PATH/*/*.jpg" --model small \ + --epochs=50 --batch-shape=192 --device=gpu0 \ + --generator-downscale=0 --generator-upscale=2 \ + --generator-blocks=8 --generator-filters=64 \ + --perceptual-layer=conv2_2 --smoothness-weight=1e7 --adversary-weight=0.0 \ + --train-blur=1 --train-noise=3.0 + +python3.4 enhance.py \ + --train "$OPEN_IMAGES_PATH/*/*.jpg" --model small \ + --epochs=500 --batch-shape=192 --device=gpu0 \ + --generator-downscale=0 --generator-upscale=2 \ + --perceptual-layer=conv5_2 --smoothness-weight=2e4 --adversary-weight=2e2 \ + --generator-start=5 --discriminator-start=0 --adversarial-start=5 \ + --discriminator-size=32 \ + --train-blur=1 --train-noise=3.0 From 448e7b93dcf19a82104f2d6b1553a8a9fa7528df Mon Sep 17 00:00:00 2001 From: "Alex J. Champandard" Date: Thu, 3 Nov 2016 22:01:08 +0100 Subject: [PATCH 14/15] Fix progress output when tiled rendering, changed default zoom level. --- enhance.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/enhance.py b/enhance.py index d837576..f9472f0 100755 --- a/enhance.py +++ b/enhance.py @@ -36,7 +36,7 @@ parser = argparse.ArgumentParser(description='Generate a new image by applying s formatter_class=argparse.ArgumentDefaultsHelpFormatter) add_arg = parser.add_argument add_arg('files', nargs='*', default=[]) -add_arg('--zoom', default=4, type=int, help='Resolution increase factor for inference.') +add_arg('--zoom', default=1, type=int, help='Resolution increase factor for inference.') add_arg('--rendering-tile', default=128, type=int, help='Size of tiles used for rendering images.') add_arg('--rendering-overlap', default=32, type=int, help='Number of pixels padding around each tile.') add_arg('--model', default='small', type=str, help='Name of the neural network to load/save.') @@ -566,7 +566,7 @@ class NeuralEnhancer(object): img = np.transpose(image[y:y+p*2+s,x:x+p*2+s,:] / 127.5 - 1.0, (2, 0, 1))[np.newaxis].astype(np.float32) *_, repro = self.model.predict(img) output[y*z:(y+s)*z,x*z:(x+s)*z,:] = np.transpose(repro[0] + 1.0, (1, 2, 0))[p*z:-p*z,p*z:-p*z,:] - print('.', end='') + print('.', end='', flush=True) return scipy.misc.toimage(output * 127.5, cmin=0, cmax=255) From 03914db3646875981ccc8523d144bc66595ef327 Mon Sep 17 00:00:00 2001 From: "Alex J. Champandard" Date: Thu, 3 Nov 2016 22:27:28 +0100 Subject: [PATCH 15/15] Fix formatting, minor tweaks to Docker build files for release. --- docker-cpu.df | 5 ++--- docker-gpu.df | 5 ++--- enhance.py | 8 +++++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/docker-cpu.df b/docker-cpu.df index 2762f34..2e8dcb4 100644 --- a/docker-cpu.df +++ b/docker-cpu.df @@ -26,9 +26,8 @@ RUN /opt/conda/bin/python3.5 -m pip install -q -r "requirements.txt" COPY enhance.py . # Get a pre-trained neural networks, non-commercial & attribution. -RUN wget -q "https://github.com/alexjc/neural-enhance/releases/download/v0.1/ne4x-small-0.1.pkl.bz2" -RUN wget -q "https://github.com/alexjc/neural-enhance/releases/download/v0.1/ne4x-medium-0.1.pkl.bz2" -RUN wget -q "https://github.com/alexjc/neural-enhance/releases/download/v0.1/ne4x-large-0.1.pkl.bz2" +RUN wget -q "https://github.com/alexjc/neural-enhance/releases/download/v0.2/ne1x-small-0.2.pkl.bz2" +RUN wget -q "https://github.com/alexjc/neural-enhance/releases/download/v0.2/ne2x-small-0.2.pkl.bz2" # Set an entrypoint to the main enhance.py script ENTRYPOINT ["/opt/conda/bin/python3.5", "enhance.py", "--device=cpu"] diff --git a/docker-gpu.df b/docker-gpu.df index 2fc6c6a..90f33fd 100644 --- a/docker-gpu.df +++ b/docker-gpu.df @@ -24,9 +24,8 @@ RUN /opt/conda/bin/python3.5 -m pip install -q -r "requirements.txt" COPY enhance.py . # Get a pre-trained neural networks, non-commercial & attribution. -RUN wget -q "https://github.com/alexjc/neural-enhance/releases/download/v0.1/ne4x-small-0.1.pkl.bz2" -RUN wget -q "https://github.com/alexjc/neural-enhance/releases/download/v0.1/ne4x-medium-0.1.pkl.bz2" -RUN wget -q "https://github.com/alexjc/neural-enhance/releases/download/v0.1/ne4x-large-0.1.pkl.bz2" +RUN wget -q "https://github.com/alexjc/neural-enhance/releases/download/v0.2/ne1x-small-0.2.pkl.bz2" +RUN wget -q "https://github.com/alexjc/neural-enhance/releases/download/v0.2/ne2x-small-0.2.pkl.bz2" # Set an entrypoint to the main enhance.py script ENTRYPOINT ["/opt/conda/bin/python3.5", "enhance.py", "--device=gpu"] diff --git a/enhance.py b/enhance.py index f9472f0..d7d64eb 100755 --- a/enhance.py +++ b/enhance.py @@ -180,7 +180,8 @@ class DataLoader(threading.Thread): seed = PIL.Image.open(buffer) seed = scipy.misc.fromimage(seed, mode='RGB').astype(np.float32) - seed += scipy.random.normal(scale=args.train_noise, size=(seed.shape[0], seed.shape[1], 1)) if args.train_noise else 0.0 + seed += scipy.random.normal(scale=args.train_noise, size=(seed.shape[0], seed.shape[1], 1))\ + if args.train_noise else 0.0 orig = scipy.misc.fromimage(orig).astype(np.float32) @@ -441,7 +442,7 @@ class Model(object): # Helper function for rendering test images during training, or standalone inference mode. input_tensor, seed_tensor = T.tensor4(), T.tensor4() input_layers = {self.network['img']: input_tensor, self.network['seed']: seed_tensor} - output = lasagne.layers.get_output([self.network[k] for k in ['seed', 'out']], input_layers, deterministic=True) + output = lasagne.layers.get_output([self.network[k] for k in ['seed','out']], input_layers, deterministic=True) self.predict = theano.function([seed_tensor], output) if not args.train: return @@ -541,7 +542,8 @@ class NeuralEnhancer(object): print(' - generator {}'.format(' '.join(gen_info))) real, fake = stats[:args.batch_size], stats[args.batch_size:] - print(' - discriminator', real.mean(), len(np.where(real > 0.5)[0]), fake.mean(), len(np.where(fake < -0.5)[0])) + print(' - discriminator', real.mean(), len(np.where(real > 0.5)[0]), + fake.mean(), len(np.where(fake < -0.5)[0])) if epoch == args.adversarial_start-1: print(' - generator now optimizing against discriminator.') self.model.adversary_weight.set_value(args.adversary_weight)