Deep learning Memory Error -

i want use deep architecture 15 layers, after change parameters , run again, run following memory error. memory error not resurface when reduce layers 10 layers.

but afterwards 10 layers not work anymore, , have reduce layers.

i'm wondering if there's way clear 'junk' files can let run @ 15 layers again?

memoryerror: error allocating 419430400 bytes of device memory (cnmem_status_out_of_memory). apply node caused error: gpuelemwise{sub,no_inplace}(gpucorrmm{half, (1, 1)}.0, gpuelemwise{composite{(((i0 / i1) / i2) / i3)},no_inplace}.0) toposort index: 163 inputs types: [cudandarraytype(float32, 4d), cudandarraytype(float32, (true, false, true, true))] inputs shapes: [(1024, 64, 40, 40), (1, 64, 1, 1)] inputs strides: [(102400, 1600, 40, 1), (0, 1, 0, 0)] inputs values: ['not shown', 'not shown'] outputs clients: [[gpucareduce{pre=sqr,red=add}{1,0,1,1}(gpuelemwise{sub,no_inplace}.0), gpuelemwise{composite{(((i0 + i1) + ((i2 * i3 * i4) / i5)) + i6)},no_inplace}(gpuelemwise{composite{(((i0 * i1 * i2) + (i0 * i1 * i2 * i3)) * i4)},no_inplace}.0, gpuelemwise{composite{((((-i0) / i1) / i2) / i3)},no_inplace}.0, cudandarrayconstant{[[[[-1.]]]]}, gpuelemwise{mul,no_inplace}.0, gpuelemwise{sub,no_inplace}.0, gpuelemwise{mul,no_inplace}.0, gpuelemwise{composite{(((i0 / i1) / i2) / i3)},no_inplace}.0), gpuelemwise{composite{((i0 * i1) / i2)},no_inplace}(gpuelemwise{mul,no_inplace}.0, gpuelemwise{sub,no_inplace}.0, gpuelemwise{mul,no_inplace}.0)]]  hint: re-running theano optimization disabled give back-trace of when node created. can done setting theano flag 'optimizer=fast_compile'. if not work, theano optimizations can disabled 'optimizer=none'. hint: use theano flag 'exception_verbosity=high' debugprint , storage map footprint of apply node.

my code follows def functions(network, eta,m,n): ## symbolics x = t.tensor4(); y = t.tensor4() # both input , output images of size (number of images, channels, m ,n)

    ## non-deterministic training     parameters = nn.layers.get_all_params(layer=network, trainable=true)        output = nn.layers.get_output(layer_or_layers=network, inputs=x,                                   deterministic=false)     prediction = output      all_layers = nn.layers.get_all_layers(network)      loss = t.mean(nn.objectives.squared_error(output, y))     psnr_train = psnr(prediction,y,m,n)       gradient = t.grad(cost=loss, wrt=parameters)     update = nn.updates.sgd(loss_or_grads=gradient,                              params=parameters, learning_rate=eta)      ## deterministic tests     det_output = nn.layers.get_output(layer_or_layers=network, inputs=x,                                        deterministic=true)     det_prediction = output     det_loss = t.mean(nn.objectives.squared_error(output, y))     det_psnr = psnr(det_prediction,y,m,n)      ## functions     train = theano.function(         inputs=[x,y], outputs=[loss, psnr_train], updates=update)     validate = theano.function(         inputs=[x,y], outputs=[det_loss, det_psnr])     predict = theano.function(         inputs=[x], outputs=det_prediction)      return train, validate, predict  def pickle_load_data(train_folder,pickle_data=2):     if pickle_data==2:         print('reading images npy...')         x_train = np.load(train_folder+'/x_train.npy').astype('float32')         x_train = np.reshape(x_train, newshape=(len(x_train), 1, 40, 40))         y_train = np.load(train_folder+'/y_train.npy').astype('float32')         y_train = np.reshape(y_train, newshape=(len(y_train), 1, 40, 40))     return x_train,y_train #,x_test,y_test  def generate_batches(data, target, batch_size=100, stochastic=true):     idx = np.arange(len(data)) # len(data) = number of images     np.random.shuffle(idx) if stochastic else idx     k in range(0, len(data), batch_size):         sample = idx[slice(k, k + batch_size)]         yield data[sample], target[sample]   def dncnn_network(input_m=40,input_n=40, c=1, n_blocks=15):      ## input     l = nn.layers.inputlayer(shape=(none,c,input_m,input_n))     print('inputlayer',nn.layers.get_output_shape(l));      ## conv+relu     l = nn.layers.conv2dlayer(incoming=l, num_filters=64,         filter_size=(3,3), stride=1, pad='same',         nonlinearity=nn.nonlinearities.rectify)      ## conv+batchnorm+relu     _ in range(n_blocks):         l = nn.layers.conv2dlayer(incoming=l, num_filters=64,             filter_size=(3,3), stride=1, pad='same',             nonlinearity=nn.nonlinearities.rectify)         l = nn.layers.batch_norm(l)      ## output layer (residual of image)     l = nn.layers.conv2dlayer(incoming=l, num_filters=c,         filter_size=(3,3), stride=1, pad='same',         nonlinearity=nn.nonlinearities.identity)      return l  def patch_generation(sigma,save_image, folder_in,folder_out_noisy,folder_out_residual,max_numpatches,count,mode_aug):                           y,v = [],[] # append data here according paper                  # y=noisy image. v=residual x = y-v = ground truth     stride = 20     # stride crop next image     m = 40          # size of output     m,n = 180,180   # size of input     count_itr = 0      print('start generating data '+folder_in+'...')     img in glob.glob(folder_in+"/*.png"):          image = imread(img)           aug in range(1,mode_aug):              image_aug = data_augmentation(image,aug)/np.float32(255)             ## crop m m             in range(0,m-stride-1,stride):                 j in range(0,n-stride-1,stride):                     subim_input = image_aug[i:i+m,j:j+m]                     subim_label = subim_input                     ## add noise (alrdy in single)\                     resi = np.random.normal(loc=0.0,scale=sigma,size=(m,m))/np.float32(255)*sigma                     noisy_im = np.zeros((m,m))                     noisy_im = resi + subim_input                      count+=1                     count_itr += 1 # concatenate                     ## use in code                     y.append([noisy_im])                             v.append([resi])      ys = np.concatenate(y[0:count_itr],axis=0)     vs = np.concatenate(v[0:count_itr],axis=0)      print('done generating data '+folder_out_noisy)      return ys,vs,count  ############################# initialisation  ###################################### ## pre-training first_time = 0 batchsize      = 256 max_numpatches = batchsize*100 #limit iterations each epoch 100 sigma = np.array([4.9,5.05,5,4.95,5.1]) save_image = 0 mode_aug = 9  ## training network n_blocks = 15      eta = 0.01 epochs = 500 e=1 batchsize_train = 256*4 input_m = 40 input_n = 40 ## folders folder_in = 'combined' folder_out_noisy = 'combined/augmented' folder_out_residual = 'combined/residual'  ############################# building network ###################################### strt_time = time.time() folder_train = 'combined' folder_out = 'combined/denoised' x_train,y_train = pickle_load_data(folder_train,pickle_data=2) ## network net = dncnn_network(n_blocks=n_blocks) #################### load parameters ################################################ train, validate, predict = functions(net, eta,input_m,input_n) output_file = 'training_log.txt' report('start: {}'.format(dt.datetime.now()), output_file) ###################### validation ###################################### ## building network input_var = t.tensor4() net_val = dncnn_network(input_m=180, input_n=180, c=1, n_blocks=n_blocks)  train_val, validate_val, predict_val = functions(net_val, eta, 180,180) folder_truth = 'combined';folder_noisy = 'combined/noisy';folder_result = 'combined/denoised' x_noisy,y_truth = load_simulation_data(folder_truth,folder_noisy,sigma=sigma,pickle_data=1,stop=5) ################################################################################# ############################# training network ###################################### tl, ta, = [], [] # e in range(epochs): e = range(500):     e+=1     start_time = time.time()     tl, ta, total = 0., 0., 0.     np.savez('dncnn_model.npz', nn.layers.get_all_param_values(net))      ## training      batch in generate_batches(x_train, y_train, batchsize_train):         data, targets = batch         l, = train(data, targets)          tl += l         ta +=         total += 1     tl /= total      ta /= total     row = [e , tl, ta, time.time() - start_time]     report('{:<10}{:<20.5f}{:<20.5f}{:<20.5f}'.format(*row),output_file)      ## validation        if e%10 == 0:         j = 0         np.load('dncnn_model.npz') f:             param_values = [ f['arr_%d' % i] in range(len(f.files)) ]             para_x = np.array(param_values)             print('reloaded network parameters shape: ',para_x.shape)         set_all_param_values(net_val, para_x[0])         img in x_noisy:             y = np.reshape(img, newshape=(1, 1, 180, 180)) # fit input layer dim             train_val, validate_val, predict_val = functions(net_val, eta, 180,180)             y = y.astype(np.float32)             v = predict_val( y )             x = y - v             j += 1             imsave(folder_result+'/denoised_'+str(j)+'.png', np.uint32(255*abs(x[0][0])))             imsave(folder_noisy+'/noisy_'+str(j)+'.png', np.uint32(255*abs(y[0][0])))  report('finished training.', output_file) duration = time.time()-strt_time print('total time taken ',duration,' seconds',duration/60,' minutes')

Search This Blog

QR

Deep learning Memory Error -

Comments

Post a Comment

Popular posts from this blog

java - .class files under target/classes folder Maven -

linux - Could not find a package configuration file provided by "Qt5Svg" -

simple.odata.client - Simple OData Client Unlink -