Deep learning Memory Error -
i want use deep architecture 15 layers, after change parameters , run again, run following memory error. memory error not resurface when reduce layers 10 layers.
but afterwards 10 layers not work anymore, , have reduce layers.
i'm wondering if there's way clear 'junk' files can let run @ 15 layers again?
memoryerror: error allocating 419430400 bytes of device memory (cnmem_status_out_of_memory). apply node caused error: gpuelemwise{sub,no_inplace}(gpucorrmm{half, (1, 1)}.0, gpuelemwise{composite{(((i0 / i1) / i2) / i3)},no_inplace}.0) toposort index: 163 inputs types: [cudandarraytype(float32, 4d), cudandarraytype(float32, (true, false, true, true))] inputs shapes: [(1024, 64, 40, 40), (1, 64, 1, 1)] inputs strides: [(102400, 1600, 40, 1), (0, 1, 0, 0)] inputs values: ['not shown', 'not shown'] outputs clients: [[gpucareduce{pre=sqr,red=add}{1,0,1,1}(gpuelemwise{sub,no_inplace}.0), gpuelemwise{composite{(((i0 + i1) + ((i2 * i3 * i4) / i5)) + i6)},no_inplace}(gpuelemwise{composite{(((i0 * i1 * i2) + (i0 * i1 * i2 * i3)) * i4)},no_inplace}.0, gpuelemwise{composite{((((-i0) / i1) / i2) / i3)},no_inplace}.0, cudandarrayconstant{[[[[-1.]]]]}, gpuelemwise{mul,no_inplace}.0, gpuelemwise{sub,no_inplace}.0, gpuelemwise{mul,no_inplace}.0, gpuelemwise{composite{(((i0 / i1) / i2) / i3)},no_inplace}.0), gpuelemwise{composite{((i0 * i1) / i2)},no_inplace}(gpuelemwise{mul,no_inplace}.0, gpuelemwise{sub,no_inplace}.0, gpuelemwise{mul,no_inplace}.0)]] hint: re-running theano optimization disabled give back-trace of when node created. can done setting theano flag 'optimizer=fast_compile'. if not work, theano optimizations can disabled 'optimizer=none'. hint: use theano flag 'exception_verbosity=high' debugprint , storage map footprint of apply node.
my code follows def functions(network, eta,m,n): ## symbolics x = t.tensor4(); y = t.tensor4() # both input , output images of size (number of images, channels, m ,n)
## non-deterministic training parameters = nn.layers.get_all_params(layer=network, trainable=true) output = nn.layers.get_output(layer_or_layers=network, inputs=x, deterministic=false) prediction = output all_layers = nn.layers.get_all_layers(network) loss = t.mean(nn.objectives.squared_error(output, y)) psnr_train = psnr(prediction,y,m,n) gradient = t.grad(cost=loss, wrt=parameters) update = nn.updates.sgd(loss_or_grads=gradient, params=parameters, learning_rate=eta) ## deterministic tests det_output = nn.layers.get_output(layer_or_layers=network, inputs=x, deterministic=true) det_prediction = output det_loss = t.mean(nn.objectives.squared_error(output, y)) det_psnr = psnr(det_prediction,y,m,n) ## functions train = theano.function( inputs=[x,y], outputs=[loss, psnr_train], updates=update) validate = theano.function( inputs=[x,y], outputs=[det_loss, det_psnr]) predict = theano.function( inputs=[x], outputs=det_prediction) return train, validate, predict def pickle_load_data(train_folder,pickle_data=2): if pickle_data==2: print('reading images npy...') x_train = np.load(train_folder+'/x_train.npy').astype('float32') x_train = np.reshape(x_train, newshape=(len(x_train), 1, 40, 40)) y_train = np.load(train_folder+'/y_train.npy').astype('float32') y_train = np.reshape(y_train, newshape=(len(y_train), 1, 40, 40)) return x_train,y_train #,x_test,y_test def generate_batches(data, target, batch_size=100, stochastic=true): idx = np.arange(len(data)) # len(data) = number of images np.random.shuffle(idx) if stochastic else idx k in range(0, len(data), batch_size): sample = idx[slice(k, k + batch_size)] yield data[sample], target[sample] def dncnn_network(input_m=40,input_n=40, c=1, n_blocks=15): ## input l = nn.layers.inputlayer(shape=(none,c,input_m,input_n)) print('inputlayer',nn.layers.get_output_shape(l)); ## conv+relu l = nn.layers.conv2dlayer(incoming=l, num_filters=64, filter_size=(3,3), stride=1, pad='same', nonlinearity=nn.nonlinearities.rectify) ## conv+batchnorm+relu _ in range(n_blocks): l = nn.layers.conv2dlayer(incoming=l, num_filters=64, filter_size=(3,3), stride=1, pad='same', nonlinearity=nn.nonlinearities.rectify) l = nn.layers.batch_norm(l) ## output layer (residual of image) l = nn.layers.conv2dlayer(incoming=l, num_filters=c, filter_size=(3,3), stride=1, pad='same', nonlinearity=nn.nonlinearities.identity) return l def patch_generation(sigma,save_image, folder_in,folder_out_noisy,folder_out_residual,max_numpatches,count,mode_aug): y,v = [],[] # append data here according paper # y=noisy image. v=residual x = y-v = ground truth stride = 20 # stride crop next image m = 40 # size of output m,n = 180,180 # size of input count_itr = 0 print('start generating data '+folder_in+'...') img in glob.glob(folder_in+"/*.png"): image = imread(img) aug in range(1,mode_aug): image_aug = data_augmentation(image,aug)/np.float32(255) ## crop m m in range(0,m-stride-1,stride): j in range(0,n-stride-1,stride): subim_input = image_aug[i:i+m,j:j+m] subim_label = subim_input ## add noise (alrdy in single)\ resi = np.random.normal(loc=0.0,scale=sigma,size=(m,m))/np.float32(255)*sigma noisy_im = np.zeros((m,m)) noisy_im = resi + subim_input count+=1 count_itr += 1 # concatenate ## use in code y.append([noisy_im]) v.append([resi]) ys = np.concatenate(y[0:count_itr],axis=0) vs = np.concatenate(v[0:count_itr],axis=0) print('done generating data '+folder_out_noisy) return ys,vs,count ############################# initialisation ###################################### ## pre-training first_time = 0 batchsize = 256 max_numpatches = batchsize*100 #limit iterations each epoch 100 sigma = np.array([4.9,5.05,5,4.95,5.1]) save_image = 0 mode_aug = 9 ## training network n_blocks = 15 eta = 0.01 epochs = 500 e=1 batchsize_train = 256*4 input_m = 40 input_n = 40 ## folders folder_in = 'combined' folder_out_noisy = 'combined/augmented' folder_out_residual = 'combined/residual' ############################# building network ###################################### strt_time = time.time() folder_train = 'combined' folder_out = 'combined/denoised' x_train,y_train = pickle_load_data(folder_train,pickle_data=2) ## network net = dncnn_network(n_blocks=n_blocks) #################### load parameters ################################################ train, validate, predict = functions(net, eta,input_m,input_n) output_file = 'training_log.txt' report('start: {}'.format(dt.datetime.now()), output_file) ###################### validation ###################################### ## building network input_var = t.tensor4() net_val = dncnn_network(input_m=180, input_n=180, c=1, n_blocks=n_blocks) train_val, validate_val, predict_val = functions(net_val, eta, 180,180) folder_truth = 'combined';folder_noisy = 'combined/noisy';folder_result = 'combined/denoised' x_noisy,y_truth = load_simulation_data(folder_truth,folder_noisy,sigma=sigma,pickle_data=1,stop=5) ################################################################################# ############################# training network ###################################### tl, ta, = [], [] # e in range(epochs): e = range(500): e+=1 start_time = time.time() tl, ta, total = 0., 0., 0. np.savez('dncnn_model.npz', nn.layers.get_all_param_values(net)) ## training batch in generate_batches(x_train, y_train, batchsize_train): data, targets = batch l, = train(data, targets) tl += l ta += total += 1 tl /= total ta /= total row = [e , tl, ta, time.time() - start_time] report('{:<10}{:<20.5f}{:<20.5f}{:<20.5f}'.format(*row),output_file) ## validation if e%10 == 0: j = 0 np.load('dncnn_model.npz') f: param_values = [ f['arr_%d' % i] in range(len(f.files)) ] para_x = np.array(param_values) print('reloaded network parameters shape: ',para_x.shape) set_all_param_values(net_val, para_x[0]) img in x_noisy: y = np.reshape(img, newshape=(1, 1, 180, 180)) # fit input layer dim train_val, validate_val, predict_val = functions(net_val, eta, 180,180) y = y.astype(np.float32) v = predict_val( y ) x = y - v j += 1 imsave(folder_result+'/denoised_'+str(j)+'.png', np.uint32(255*abs(x[0][0]))) imsave(folder_noisy+'/noisy_'+str(j)+'.png', np.uint32(255*abs(y[0][0]))) report('finished training.', output_file) duration = time.time()-strt_time print('total time taken ',duration,' seconds',duration/60,' minutes')
Comments
Post a Comment