From 0d4621df0a7f29beba7dbaf1f5edfbb4aeaf132c Mon Sep 17 00:00:00 2001 From: "Alex J. Champandard" Date: Tue, 8 Nov 2016 17:41:28 +0100 Subject: [PATCH] Simplify architecture outside of convolution blocks for speed, improve training code. --- enhance.py | 58 ++++++++++++++++++++++++++---------------------------- 1 file changed, 28 insertions(+), 30 deletions(-) diff --git a/enhance.py b/enhance.py index 1cf570f..9db5c96 100755 --- a/enhance.py +++ b/enhance.py @@ -14,7 +14,7 @@ # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # -__version__ = '0.2' +__version__ = '0.3' import io import os @@ -51,7 +51,7 @@ add_arg('--save-every', default=10, type=int, help='Save g add_arg('--batch-shape', default=192, type=int, help='Resolution of images in training batch.') add_arg('--batch-size', default=15, type=int, help='Number of images per training batch.') add_arg('--buffer-size', default=1500, type=int, help='Total image fragments kept in cache.') -add_arg('--buffer-similar', default=5, type=int, help='Fragments cached for each image loaded.') +add_arg('--buffer-fraction', default=5, type=int, help='Fragments cached for each image loaded.') add_arg('--learning-rate', default=1E-4, type=float, help='Parameter for the ADAM optimizer.') add_arg('--learning-period', default=75, type=int, help='How often to decay the learning rate.') add_arg('--learning-decay', default=0.5, type=float, help='How much to decay the learning rate.') @@ -137,7 +137,7 @@ class DataLoader(threading.Thread): self.data_ready = threading.Event() self.data_copied = threading.Event() - self.orig_shape, self.seed_shape = args.batch_shape, int(args.batch_shape / args.zoom) + self.orig_shape, self.seed_shape = args.batch_shape, args.batch_shape // args.zoom self.orig_buffer = np.zeros((args.buffer_size, 3, self.orig_shape, self.orig_shape), dtype=np.float32) self.seed_buffer = np.zeros((args.buffer_size, 3, self.seed_shape, self.seed_shape), dtype=np.float32) @@ -163,7 +163,7 @@ class DataLoader(threading.Thread): try: orig = PIL.Image.open(filename).convert('RGB') scale = 2 ** random.randint(0, args.train_scales) - if scale > 1 and all(s > args.batch_shape * scale for s in orig.size): + if scale > 1 and all(s//scale >= args.batch_shape for s in orig.size): orig = orig.resize((orig.size[0]//scale, orig.size[1]//scale), resample=PIL.Image.LANCZOS) if any(s < args.batch_shape for s in orig.size): raise ValueError('Image is too small for training with size {}'.format(orig.size)) @@ -173,22 +173,24 @@ class DataLoader(threading.Thread): self.files.remove(f) return - if args.train_blur: - seed = orig.filter(PIL.ImageFilter.GaussianBlur(radius=random.randint(0, args.train_blur*2))) + seed = orig + if args.train_blur is not None: + seed = seed.filter(PIL.ImageFilter.GaussianBlur(radius=random.randint(0, args.train_blur*2))) if args.zoom > 1: seed = seed.resize((orig.size[0]//args.zoom, orig.size[1]//args.zoom), resample=PIL.Image.LANCZOS) - if args.train_jpeg: + if args.train_jpeg is not None: buffer = io.BytesIO() seed.save(buffer, format='jpeg', quality=args.train_jpeg+random.randrange(-15,+15)) seed = PIL.Image.open(buffer) - orig = scipy.misc.fromimage(orig, mode='RGB').astype(np.float32) - seed = scipy.misc.fromimage(seed, mode='RGB').astype(np.float32) + orig = scipy.misc.fromimage(orig).astype(np.float32) + seed = scipy.misc.fromimage(seed).astype(np.float32) - if args.train_noise: - seed += scipy.random.normal(scale=args.train_noise, size=(seed.shape[0], seed.shape[1], 1)) ** 4.0 + if args.train_noise is not None: + noise = scipy.random.normal(scale=args.train_noise/10.0, size=(seed.shape[0], seed.shape[1], 1)) + seed += (noise ** 7.0).clip(-args.train_noise, args.train_noise) - for _ in range(seed.shape[0] * seed.shape[1] // self.seed_shape * 2): + for _ in range(seed.shape[0] * seed.shape[1] // (args.buffer_fraction * self.seed_shape ** 2)): h = random.randint(0, seed.shape[0] - self.seed_shape) w = random.randint(0, seed.shape[1] - self.seed_shape) seed_chunk = seed[h:h+self.seed_shape, w:w+self.seed_shape] @@ -200,8 +202,8 @@ class DataLoader(threading.Thread): self.data_copied.clear() i = self.available.pop() - self.orig_buffer[i] = np.transpose(orig_chunk.astype(np.float32) / 127.5 - 1.0, (2, 0, 1)) - self.seed_buffer[i] = np.transpose(seed_chunk.astype(np.float32) / 127.5 - 1.0, (2, 0, 1)) + self.orig_buffer[i] = np.transpose(orig_chunk.astype(np.float32) / 255.0 - 0.5, (2, 0, 1)) + self.seed_buffer[i] = np.transpose(seed_chunk.astype(np.float32) / 255.0 - 0.5, (2, 0, 1)) self.ready.add(i) if len(self.ready) >= args.batch_size: @@ -275,8 +277,8 @@ class Model(object): return prelu def make_block(self, name, input, units): - self.make_layer(name+'-A', input, units, alpha=0.25) - self.make_layer(name+'-B', self.last_layer(), units, alpha=1.0) + self.make_layer(name+'-A', input, units, alpha=0.1) + # self.make_layer(name+'-B', self.last_layer(), units, alpha=1.0) return ElemwiseSumLayer([input, self.last_layer()]) if args.generator_residual else self.last_layer() def setup_generator(self, input, config): @@ -285,9 +287,7 @@ class Model(object): units_iter = extend(args.generator_filters) units = next(units_iter) - self.make_layer('iter.0-A', input, units, filter_size=(5,5), pad=(2,2)) - self.make_layer('iter.0-B', self.last_layer(), units, filter_size=(5,5), pad=(2,2)) - self.network['iter.0'] = self.last_layer() + self.make_layer('iter.0', input, units, filter_size=(7,7), pad=(3,3)) for i in range(0, args.generator_downscale): self.make_layer('downscale%i'%i, self.last_layer(), next(units_iter), filter_size=(4,4), stride=(2,2)) @@ -298,18 +298,16 @@ class Model(object): for i in range(0, args.generator_upscale): u = next(units_iter) - self.make_layer('upscale%i.3'%i, self.last_layer(), u*4) - self.network['upscale%i.2'%i] = SubpixelReshuffleLayer(self.last_layer(), u, 2) - self.make_layer('upscale%i.1'%i, self.last_layer(), u) + self.make_layer('upscale%i.2'%i, self.last_layer(), u*4) + self.network['upscale%i.1'%i] = SubpixelReshuffleLayer(self.last_layer(), u, 2) - self.network['out'] = ConvLayer(self.last_layer(), 3, filter_size=(3,3), stride=(1,1), pad=(1,1), - nonlinearity=lasagne.nonlinearities.tanh) + self.network['out'] = ConvLayer(self.last_layer(), 3, filter_size=(7,7), pad=(3,3), nonlinearity=None) def setup_perceptual(self, input): """Use lasagne to create a network of convolution layers using pre-trained VGG19 weights. """ offset = np.array([103.939, 116.779, 123.680], dtype=np.float32).reshape((1,3,1,1)) - self.network['percept'] = lasagne.layers.NonlinearityLayer(input, lambda x: ((x+1.0)*127.5) - offset) + self.network['percept'] = lasagne.layers.NonlinearityLayer(input, lambda x: ((x+0.5)*255.0) - offset) self.network['mse'] = self.network['percept'] self.network['conv1_1'] = ConvLayer(self.network['percept'], 64, 3, pad=1) @@ -393,7 +391,7 @@ class Model(object): assert k in params, "Couldn't find layer `%s` in loaded model.'" % k assert len(l.get_params()) == len(params[k]), "Mismatch in types of layers." for p, v in zip(l.get_params(), params[k]): - assert v.shape == p.get_value().shape, "Mismatch in number of parameters." + assert v.shape == p.get_value().shape, "Mismatch in number of parameters for layer {}.".format(k) p.set_value(v.astype(np.float32)) #------------------------------------------------------------------------------------------------------------------ @@ -465,7 +463,7 @@ class NeuralEnhancer(object): print('{}'.format(ansi.ENDC)) def imsave(self, fn, img): - scipy.misc.toimage(np.transpose(img + 1.0, (1, 2, 0)) * 127.5, cmin=0, cmax=255).save(fn) + scipy.misc.toimage(np.transpose(img + 0.5, (1, 2, 0)).clip(0.0, 1.0) * 255.0, cmin=0, cmax=255).save(fn) def show_progress(self, orign, scald, repro): os.makedirs('valid', exist_ok=True) @@ -547,11 +545,11 @@ class NeuralEnhancer(object): # Iterate through the tile coordinates and pass them through the network. for y, x in itertools.product(range(0, original.shape[0], s), range(0, original.shape[1], s)): - img = np.transpose(image[y:y+p*2+s,x:x+p*2+s,:] / 127.5 - 1.0, (2, 0, 1))[np.newaxis].astype(np.float32) + img = np.transpose(image[y:y+p*2+s,x:x+p*2+s,:] / 255.0 - 0.5, (2, 0, 1))[np.newaxis].astype(np.float32) *_, repro = self.model.predict(img) - output[y*z:(y+s)*z,x*z:(x+s)*z,:] = np.transpose(repro[0] + 1.0, (1, 2, 0))[p*z:-p*z,p*z:-p*z,:] + output[y*z:(y+s)*z,x*z:(x+s)*z,:] = np.transpose(repro[0] + 0.5, (1, 2, 0))[p*z:-p*z,p*z:-p*z,:] print('.', end='', flush=True) - return scipy.misc.toimage(output * 127.5, cmin=0, cmax=255) + return scipy.misc.toimage(output.clip(0.0, 1.0) * 255.0, cmin=0, cmax=255) if __name__ == "__main__":