PK! qzeronet/__init__.py__version__ = '0.1.0' PK!zeronet/core/__init__.pyPK!$j&j&zeronet/core/function.py# Author: ddlee, me@ddlee.cn # License: MIT # Docs: import numpy as np __all__ = ['linear_forward', 'linear_backward', 'conv_forward', 'conv_backward', 'max_pool_forward', 'max_pool_backward', 'relu_forward', 'relu_backward', 'sigmoid_backward', 'sigmoid_backward', 'svm_loss', 'softmax_loss'] def linear_forward(x, weights): """ Computes the forward pass for an affine (fully-connected) layer. The input x has shape (N, d_1, ..., d_k) and contains a minibatch of N examples, where each example x[i] has shape (d_1, ..., d_k). We will reshape each input into a vector of dimension D = d_1 * ... * d_k, and then transform it to an output vector of dimension M. Inputs: - x: A numpy array containing input data, of shape (N, d_1, ..., d_k) - w: A numpy array of weights, of shape (D, M) - b: A numpy array of biases, of shape (M,) Returns a tuple of: - out: output, of shape (N, M) - cache: (x, w, b) """ w, b = weights out = None batch_size = x.shape[0] input_size = np.prod(x.shape[1:]) x_ = x.reshape(batch_size, input_size) out = np.dot(x_, w) + b return out def linear_backward(x, weights, dout): """ Computes the backward pass for an affine layer. Inputs: - dout: Upstream derivative, of shape (N, M) - cache: Tuple of: - x: Input data, of shape (N, d_1, ... d_k) - w: Weights, of shape (D, M) Returns a tuple of: - dx: Gradient with respect to x, of shape (N, d1, ..., d_k) - dw: Gradient with respect to w, of shape (D, M) - db: Gradient with respect to b, of shape (M,) """ w, b = weights dx, dw, db = None, None, None dx = np.dot(dout, w.T) dx = dx.reshape(x.shape) batch_size = x.shape[0] input_size = np.prod(x.shape[1:]) x = x.reshape(batch_size, input_size) dw = np.dot(x.T, dout) db = np.dot(dout.T, np.ones(batch_size)) grads = dict({'x': dx, 'w': dw, 'b': db}) return grads def conv_forward(x, weights, conv_params): ''' The input consists of N data points, each with C channels, height H and width W. We convolve each input with F different filters, where each filter spans all C channels and has height HH and width HH. Input: - x: Input data of shape (N, C, H, W) - w: Filter weights of shape (F, C, HH, WW) - b: Biases, of shape (F,) - conv_param: A dictionary with the following keys: - 'stride': The number of pixels between adjacent receptive fields in the horizontal and vertical directions. - 'pad': The number of pixels that will be used to zero-pad the input. Returns a tuple of: - out: Output data, of shape (N, F, H', W') where H' and W' are given by H' = 1 + (H + 2 * pad - HH) / stride W' = 1 + (W + 2 * pad - WW) / stride - cache: (x, w, b, conv_param) ''' out = None w, b = weights N, C, H, W = x.shape F, _, HH, WW = w.shape stride, pad = conv_params['stride'], conv_params['pad'] H_out = int(1 + (H + 2 * pad - HH) / stride) W_out = int(1 + (W + 2 * pad - WW) / stride) out = np.zeros((N, F, H_out, W_out)) x_pad = np.pad(x, ((0,), (0,), (pad,), (pad,)), mode='constant', constant_values=0) for i in range(H_out): for j in range(W_out): x_pad_masked = x_pad[:, :, i * stride:i * stride + HH, j * stride:j * stride + WW] # slide window for k in range(F): # of filters out[:, k, i, j] = np.sum( x_pad_masked * w[k, :, :, :], axis=(1, 2, 3)) out = out + (b)[None, :, None, None] # add bias return out def conv_backward(x, weights, conv_params, dout): ''' Inputs: - dout: Upstream derivatives. Returns a tuple of: - dx: Gradient with respect to x - dw: Gradient with respect to w - db: Gradient with respect to b ''' w, b = weights N, C, H, W = x.shape F, _, HH, WW = w.shape stride, pad = conv_params['stride'], conv_params['pad'] H_out = int(1 + (H + 2 * pad - HH) / stride) W_out = int(1 + (W + 2 * pad - WW) / stride) x_pad = np.pad(x, ((0,), (0,), (pad,), (pad,)), mode='constant', constant_values=0) dx = np.zeros_like(x) dx_pad = np.zeros_like(x_pad) dw = np.zeros_like(w) db = np.zeros_like(b) db = np.sum(dout, axis=(0, 2, 3)) for i in range(H_out): for j in range(W_out): x_pad_masked = x_pad[:, :, i * stride:i * stride + HH, j * stride:j * stride + WW] for k in range(F): dw[k, :, :, :] += np.sum(x_pad_masked * (dout[:, k, i, j]) [:, None, None, None], axis=0) # ? for n in range(N): dx_pad[n, :, i * stride:i * stride + HH, j * stride:j * stride + WW] += np.sum((w[:, :, :, :] * (dout[n, :, i, j])[:, None, None, None]), axis=0) dx = dx_pad[:, :, pad:-pad, pad:-pad] grads = dict({"x": dx, "w": dw, "b": db}) return grads def max_pool_forward(x, pool_param): """ A naive implementation of the forward pass for a max pooling layer. Inputs: - x: Input data, of shape (N, C, H, W) - pool_param: dictionary with the following keys: - 'pool_height': The height of each pooling region - 'pool_width': The width of each pooling region - 'stride': The distance between adjacent pooling regions Returns a tuple of: - out: Output data - cache: (x, pool_param) """ out = None N, C, H, W = x.shape HH, WW, stride = pool_param['pool_height'], pool_param['pool_width'], pool_param['stride'] H_out = int((H-HH)/stride+1) W_out = int((W-WW)/stride+1) out = np.zeros((N, C, H_out, W_out)) for i in range(H_out): for j in range(W_out): x_masked = x[:, :, i*stride:i*stride+HH, j*stride:j*stride+WW] out[:, :, i, j] = np.max(x_masked, axis=(2,3)) cache = (x, pool_param) return out def max_pool_backward(x, pool_param, dout): """ A naive implementation of the backward pass for a max pooling layer. Inputs: - dout: Upstream derivatives - cache: A tuple of (x, pool_param) as in the forward pass. Returns: - dx: Gradient with respect to x """ dx = None N, C, H, W = x.shape HH, WW, stride = pool_param['pool_height'], pool_param['pool_width'], pool_param['stride'] H_out = int((H-HH)/stride+1) W_out = int((W-WW)/stride+1) dx = np.zeros_like(x) for i in range(H_out): for j in range(W_out): x_masked = x[:,:,i*stride : i*stride+HH, j*stride : j*stride+WW] max_x_masked = np.max(x_masked,axis=(2,3)) temp_binary_mask = (x_masked == (max_x_masked)[:,:,None,None]) dx[:,:,i*stride : i*stride+HH, j*stride : j*stride+WW] += temp_binary_mask * (dout[:,:,i,j])[:,:,None,None] grads = dict({"x":dx}) return grads def relu_forward(x): """ Computes ReLU activation for input tensor. Inputs: - x: Input data. of arbitary shape Returns: - out: ReLU activation of x """ out = x * (x > 0) return out def relu_backward(x, dout): """ Computes gradients of ReLU activation for input tensor. Inputs: - x: Input data. of arbitary shape - dout: Upstream derivatives Returns: - dx: Gradient with respect to x """ dx = dout * (x > 0) grads = dict({'x': dx}) return grads def sigmoid_forward(x): """ Computes Sigmoid activation for input tensor. Inputs: - x: Input data. of arbitary shape Returns: - out: Sigmoid activation of x """ out = 1 / (1 + np.exp(-x)) return out def sigmoid_backward(x, dout): """ Computes gradients of Sigmoid activation for input tensor. Inputs: - x: Input data. of arbitary shape - dout: Upstream derivatives Returns: - dx: Gradient with respect to x """ dx = dout * (1.0 - dout) grads = dict({'x': dx}) return grads def svm_loss(x, y): """ Computes the loss and gradient using for multiclass SVM classification. Inputs: - x: Input data, of shape (N, C) where x[i, j] is the score for the jth class for the ith input. - y: Vector of labels, of shape (N,) where y[i] is the label for x[i] and 0 <= y[i] < C Returns a tuple of: - loss: Scalar giving the loss - dx: Gradient of the loss with respect to x """ N = x.shape[0] correct_class_scores = x[np.arange(N), y] margins = np.maximum(0, x - correct_class_scores[:, np.newaxis] + 1.0) margins[np.arange(N), y] = 0 loss = np.sum(margins) / N num_pos = np.sum(margins > 0, axis=1) dx = np.zeros_like(x) dx[margins > 0] = 1 dx[np.arange(N), y] -= num_pos dx /= N return loss, dx def softmax_loss(x, y): """ Computes the loss and gradient for softmax classification. Inputs: - x: Input data, of shape (N, C) where x[i, j] is the score for the jth class for the ith input. - y: Vector of labels, of shape (N,) where y[i] is the label for x[i] and 0 <= y[i] < C Returns a tuple of: - loss: Scalar giving the loss - dx: Gradient of the loss with respect to x """ shifted_logits = x - np.max(x, axis=1, keepdims=True) Z = np.sum(np.exp(shifted_logits), axis=1, keepdims=True) log_probs = shifted_logits - np.log(Z) probs = np.exp(log_probs) N = x.shape[0] loss = -np.sum(log_probs[np.arange(N), y]) / N dx = probs.copy() dx[np.arange(N), y] -= 1 dx /= N return loss, dx PK! zeronet/core/layer.py# Author: ddlee, me@ddlee.cn # License: MIT # Docs: # there are two special layers, input and loss, # which stand for the start and the end of a network import numpy as np from collections import defaultdict from zeronet.core.function import * __all__ = ['layer', 'Linear', 'Conv', 'Pool', 'ReLU', 'Sigmoid'] class layer(object): ''' The base class for a layer in network. It contains parameters, forward and backward operation, as well as connection info state dict: config should include super-params for this layer shape_dict contains names and shapes for params inside layer params contains names and value for params insied layer ''' def __init__(self, name, *inputs): self.name = name self.shape_dict = defaultdict(tuple) self.params = defaultdict(np.array) def _infer_shape(self, warmup_data): ''' infer param shape using warmup data and configs return a dict with name and shape for params initiation, should be called in a warmup phase after the net is defined ''' raise NotImplementedError def _init_params(self): ''' initiate weights inside the layer, layer.params should be a dict with param names and values ''' mu = 0 std = 0.05 for name, shape in self.shape_dict.items(): self.params[name] = np.random.normal(mu, std, size=shape) def warmup(self, warmup_data): '''wrapper to be called in warmup phase''' self._infer_shape(warmup_data) self._init_params() def forward(self, input): raise NotImplementedError def grad(self, input, dout): raise NotImplementedError def update(self, grads, optimizer, config): ''' Update the params inside this layer, should be called after backward process. optimizer should be a partial function after configuration in the model class ''' DEBUG = False if len(self.params) == 0: # some layer have no params return config else: for name, param in self.params.items(): # Keep a optim config dict for every param in the net param_grad = grads[name] cfg = config[self.name + '_' + name] next_param, next_config = optimizer(param, param_grad, cfg) self.params[name] = next_param config[self.name + '_' + name] = next_config if DEBUG: print(name) print('param_shape', next_param.shape) return config class Linear(layer): ''' shape is a tuple that define the out shape of this FC layer, ''' def __init__(self, name="linear", output_shape=256): ''' for linear(FC) layer, config is an int which specifies the output shape ''' super(Linear, self).__init__(name) self.output_shape = output_shape def _infer_shape(self, warmup_data): self.batch_size = warmup_data.shape[0] self.input_shape = np.prod(warmup_data.shape[1:]) self.shape_dict['w'] = (self.input_shape, self.output_shape) self.shape_dict['b'] = self.output_shape def forward(self, input): out = linear_forward(input, (self.params['w'], self.params['b'])) return out def grad(self, input, dout): grads = linear_backward( input, (self.params['w'], self.params['b']), dout) return grads class Conv(layer): def __init__(self, name="conv", filter=1, kernel_size=3, stride=1, pad=0): super(Conv, self).__init__(name) self.filter = filter self.kernel_size = kernel_size self.conv_params = defaultdict(int) self.conv_params['stride'] = stride self.conv_params['pad'] = pad def _infer_shape(self, warmup_data): ''' the input should be in shape (N, C, H, W) (batch_size, channels, Height, Width)''' self.batch_size = warmup_data.shape[0] self.channels = warmup_data.shape[1] self.shape_dict['w'] = ( self.filter, self.channels, self.kernel_size, self.kernel_size) self.shape_dict['b'] = (self.filter, ) def forward(self, input): out = conv_forward( input, (self.params['w'], self.params['b']), self.conv_params) return out def grad(self, input, dout): grads = conv_backward( input, (self.params['w'], self.params['b']), self.conv_params, dout) return grads class Pool(layer): def __init__(self, name="pool", pool_height=2, pool_width=2, stride=2): super(Pool, self).__init__(name) self.pool_params = defaultdict(int) self.pool_params['pool_height'] = pool_height self.pool_params['pool_width'] = pool_width self.pool_params['stride'] = stride def _infer_shape(self, warmup_data): pass def forward(self, input): out = max_pool_forward(input, self.pool_params) return out def grad(self, input, dout): grads = max_pool_backward(input, self.pool_params, dout) return grads class ReLU(layer): def __init__(self, name="relu"): super(ReLU, self).__init__(name) def _infer_shape(self, warmup_data): pass def forward(self, input): return relu_forward(input) def grad(self, input, dout): return relu_backward(input, dout) class Sigmoid(layer): def __init__(self, name="sigmoid"): super(Sigmoid, self).__init__(name) def _infer_shape(self, warmup_data): pass def forward(self, input): return sigmoid_forward(input) def grad(self, input, dout): return sigmoid_backward(input, dout) PK!v..zeronet/core/model.py# Author: ddlee, me@ddlee.cn # License: MIT # Docs: import pickle as pickle import numpy as np import zeronet.core.optimizer as optim class model(object): """ the model encapsulates all the logic necessary for training classification models. The model accepts both training and validataion data and labels so it can periodically check classification accuracy on both training and validation data to watch out for overfitting. To train a model, you will first construct a model instance, passing the model, dataset, and various optoins (learning rate, batch size, etc) to the constructor. You will then call the train() method to run the optimization procedure and train the model. After the train() method returns, model.params will contain the parameters that performed best on the validation set over the course of training. In addition, the instance variable model.loss_history will contain a list of all losses encountered during training and the instance variables model.train_acc_history and model.val_acc_history will be lists of the accuracies of the model on the training and validation set at each epoch. Example usage might look something like this: data = { 'X_train': # training data 'y_train': # training labels 'X_val': # validation data 'y_val': # validation labels } model = MyAwesomeModel(hidden_size=100, reg=10) model = model(model, data, update_rule='sgd', optim_config={ 'learning_rate': 1e-3, }, lr_decay=0.95, num_epochs=10, batch_size=100, print_every=100) model.train() A model works on a model object that must conform to the following API: - model.params must be a dictionary mapping string parameter names to numpy arrays containing parameter values. - model.loss(X, y) must be a function that computes training-time loss and gradients, and test-time classification scores, with the following inputs and outputs: Inputs: - X: Array giving a minibatch of input data of shape (N, d_1, ..., d_k) - y: Array of labels, of shape (N,) giving labels for X where y[i] is the label for X[i]. Returns: If y is None, run a test-time forward pass and return: - scores: Array of shape (N, C) giving classification scores for X where scores[i, c] gives the score of class c for X[i]. If y is not None, run a training time forward and backward pass and return a tuple of: - loss: Scalar giving the loss - grads: Dictionary with the same keys as self.params mapping parameter names to gradients of the loss with respect to those parameters. """ def __init__(self, net, data, **kwargs): """ Construct a new model instance. Required arguments: - model: A model object conforming to the API described above - data: A dictionary of training and validation data containing: 'X_train': Array, shape (N_train, d_1, ..., d_k) of training images 'X_val': Array, shape (N_val, d_1, ..., d_k) of validation images 'y_train': Array, shape (N_train,) of labels for training images 'y_val': Array, shape (N_val,) of labels for validation images Optional arguments: - update_rule: A string giving the name of an update rule in optim.py. Default is 'sgd'. - optim_config: A dictionary containing hyperparameters that will be passed to the chosen update rule. Each update rule requires different hyperparameters (see optim.py) but all update rules require a 'learning_rate' parameter so that should always be present. - lr_decay: A scalar for learning rate decay; after each epoch the learning rate is multiplied by this value. - batch_size: Size of minibatches used to compute loss and gradient during training. - num_epochs: The number of epochs to run for during training. - print_every: Integer; training losses will be printed every print_every iterations. - verbose: Boolean; if set to false then no output will be printed during training. - num_train_samples: Number of training samples used to check training accuracy; default is 1000; set to None to use entire training set. - num_val_samples: Number of validation samples to use to check val accuracy; default is None, which uses the entire validation set. - checkpoint_name: If not None, then save model checkpoints here every epoch. """ self.net = net self.X_train = data['X_train'] self.y_train = data['y_train'] self.X_val = data['X_val'] self.y_val = data['y_val'] # Unpack keyword arguments self.update_rule = kwargs.pop('update_rule', 'sgd') self.optim_config = kwargs.pop('optim_config', {}) self.lr_decay = kwargs.pop('lr_decay', 1.0) self.batch_size = kwargs.pop('batch_size', 100) self.num_epochs = kwargs.pop('num_epochs', 10) self.num_train_samples = kwargs.pop('num_train_samples', 1000) self.num_val_samples = kwargs.pop('num_val_samples', None) self.checkpoint_name = kwargs.pop('checkpoint_name', None) self.print_every = kwargs.pop('print_every', 10) self.verbose = kwargs.pop('verbose', True) # Throw an error if there are extra keyword arguments if len(kwargs) > 0: extra = ', '.join('"%s"' % k for k in list(kwargs.keys())) raise ValueError('Unrecognized arguments %s' % extra) # Make sure the update rule exists, then replace the string # name with the actual function if not hasattr(optim, self.update_rule): raise ValueError('Invalid update_rule "%s"' % self.update_rule) self.optimizer = getattr(optim, self.update_rule) self._reset() def _reset(self): """ Set up some book-keeping variables for optimization. Don't call this manually. """ # Set up some variables for book-keeping self.epoch = 0 self.best_val_acc = 0 self.best_params = {} self.loss_history = [] self.train_acc_history = [] self.val_acc_history = [] def warmup(self): num_train = self.X_train.shape[0] batch_mask = np.random.choice(num_train, self.batch_size) warm_up_data = self.X_train[batch_mask] self.net.warmup(warm_up_data, self.optim_config) def _step(self): """ Make a single gradient update. This is called by train() and should not be called manually. """ # Make a minibatch of training data num_train = self.X_train.shape[0] batch_mask = np.random.choice(num_train, self.batch_size) X_batch = self.X_train[batch_mask] y_batch = self.y_train[batch_mask] # self.net.input_dict updates every pass # foward pass loss, dout = self.net.loss(X_batch, y_batch) # backward pass self.net.backward(self.optimizer, dout) self.loss_history.append(loss) def _save_checkpoint(self): if self.checkpoint_name is None: return checkpoint = { 'model': self.net, 'update_rule': self.update_rule, 'lr_decay': self.lr_decay, 'optim_config': self.net.optim_configs, 'batch_size': self.batch_size, 'num_train_samples': self.num_train_samples, 'num_val_samples': self.num_val_samples, 'epoch': self.epoch, 'loss_history': self.loss_history, 'train_acc_history': self.train_acc_history, 'val_acc_history': self.val_acc_history, } filename = '%s_epoch_%d.pkl' % (self.checkpoint_name, self.epoch) if self.verbose: print('Saving checkpoint to "%s"' % filename) with open(filename, 'wb') as f: pickle.dump(checkpoint, f) def predict(self, X, num_samples=None, batch_size=100): """ Check accuracy of the model on the provided data. Inputs: - X: Array of data, of shape (N, d_1, ..., d_k) - y: Array of labels, of shape (N,) - num_samples: If not None, subsample the data and only test the model on num_samples datapoints. - batch_size: Split X and y into batches of this size to avoid using too much memory. Returns: - y_pred: predictiong for test_data """ # Maybe subsample the data N = X.shape[0] if num_samples is not None and N > num_samples: mask = np.random.choice(N, num_samples) N = num_samples X = X[mask] # Compute predictions in batches num_batches = N // batch_size if N % batch_size != 0: num_batches += 1 y_pred = [] for i in range(num_batches): start = i * batch_size end = (i + 1) * batch_size # predict forward pass scores = self.net.forward(X[start:end]) y_pred.append(np.argmax(scores, axis=1)) y_pred = np.hstack(y_pred) return y_pred def check_accuracy(self, y_pred, y): return np.mean(y_pred == y) def train(self): """ Run optimization to train the model. """ num_train = self.X_train.shape[0] iterations_per_epoch = max(num_train // self.batch_size, 1) num_iterations = self.num_epochs * iterations_per_epoch for t in range(num_iterations): self._step() # Maybe print training loss if self.verbose and t % self.print_every == 0: print('(Iteration %d / %d) loss: %f' % ( t + 1, num_iterations, self.loss_history[-1])) # At the end of every epoch, increment the epoch counter and decay # the learning rate. epoch_end = (t + 1) % iterations_per_epoch == 0 if epoch_end: self.epoch += 1 for k in self.net.optim_configs: self.net.optim_configs[k]['learning_rate'] *= self.lr_decay # Check train and val accuracy on the first iteration, the last # iteration, and at the end of each epoch. first_it = (t == 0) last_it = (t == num_iterations - 1) if first_it or last_it or epoch_end: train_y_pred = self.predict( self.X_train, num_samples=self.num_train_samples) val_y_pred = self.predict( self.X_val, num_samples=self.num_val_samples) train_acc = self.check_accuracy(train_y_pred, self.y_train) val_acc = self.check_accuracy(val_y_pred, self.y_val) self.train_acc_history.append(train_acc) self.val_acc_history.append(val_acc) self._save_checkpoint() if self.verbose: print('(Epoch %d / %d) train acc: %f; val_acc: %f' % ( self.epoch, self.num_epochs, train_acc, val_acc)) # Keep track of the best model if val_acc > self.best_val_acc: self.best_val_acc = val_acc self.best_params = {} for layer, layer_params in self.net.params.items(): self.best_params[layer] = {} for param, value in layer_params.items(): self.best_params[layer][param] = value.copy() # At the end of training swap the best params into the model self.net.params = self.best_params PK!x` ` zeronet/core/net.py# Author: ddlee, me@ddlee.cn # License: MIT # Docs: import numpy as np class net(object): ''' the net class is the abstraction of DAG composed by stacked layers it call forward and backward methods of layers recursively ''' def __init__(self, layer_stack, loss_func, reg): ''' layer stack is a list of layers with specific order layer stack start with a Input Layer, loss function not included ''' self.layer_stack = layer_stack self.layer_mount = len(layer_stack) self.loss_func = loss_func self.reg = reg self.input_dict = None self.optim_configs = None self.params = None self.DEBUG = False def warmup(self, warmup_data, config): ''' init params using warmup_data, build param dict and optim_config dict for every param ''' self.optim_configs = {} self.params = {} self.input_dict = {} out = None for k, _layer in enumerate(self.layer_stack): self.input_dict[_layer] = None self.params[_layer.name] = {} if k != 0: _layer.warmup(out) out = _layer.forward(out) else: _layer.warmup(warmup_data) out = _layer.forward(warmup_data) for p, param in _layer.params.items(): d = {k: v for k, v in config.items()} self.optim_configs[_layer.name + '_' + p] = d self.params[_layer.name][p] = param def forward(self, data_batch): ''' perform computation layer by layer, recurstively. ''' out = None for k, _layer in enumerate(self.layer_stack): if k != 0: self.input_dict[_layer.name] = out out = _layer.forward(out) else: self.input_dict[_layer.name] = data_batch out = _layer.forward(data_batch) if self.DEBUG: print(_layer.name) print('input_shape', self.input_dict[_layer.name].shape) print('output_shape', out.shape) if self.DEBUG: print(out) return out def loss(self, X_batch, y_batch): ''' wrapper of forward ''' out = self.forward(X_batch) loss, dout = self.loss_func(out, y_batch) # add regularization reg_loss = 0 for layer_param in self.params.values(): for value in layer_param.values(): reg_loss += np.sum(value ** 2) loss += self.reg * 0.5 * reg_loss return (loss, dout) def backward(self, optimizer, dout): ''' perform back propagation and update params recurstively currently, just reverse the layer stack and caculate from last layer ''' # wrap dout from loss_function dout = dict({'x': dout}) for _layer in self.layer_stack[::-1]: dout = _layer.grad(self.input_dict[_layer.name], dout['x']) self.optim_configs = _layer.update( dout, optimizer, self.optim_configs) PK!C  zeronet/core/optimizer.py# Author: ddlee, me@ddlee.cn # License: MIT # Docs: import numpy as np __all__ = ['sgd', 'sgd_momentum'] """ This file implements various first-order update rules that are commonly used for training neural networks. Each update rule accepts current weights and the gradient of the loss with respect to those weights and produces the next set of weights. Each update rule has the same interface: def update(w, dw, config=None): Inputs: - w: A numpy array giving the current weights. - dw: A numpy array of the same shape as w giving the gradient of the loss with respect to w. - config: A dictionary containing hyperparameter values such as learning rate, momentum, etc. If the update rule requires caching values over many iterations, then config will also hold these cached values. Returns: - next_w: The next point after the update. - config: The config dictionary to be passed to the next iteration of the update rule. NOTE: For most update rules, the default learning rate will probably not perform well; however the default values of the other hyperparameters should work well for a variety of different problems. For efficiency, update rules may perform in-place updates, mutating w and setting next_w equal to w. """ def sgd(w, dw, config=None): """ Performs vanilla stochastic gradient descent. config format: - learning_rate: Scalar learning rate. """ if config is None: config = {} config.setdefault('learning_rate', 1e-2) w -= config['learning_rate'] * dw return w, config def sgd_momentum(w, dw, config=None): """ Performs stochastic gradient descent with momentum. config format: - learning_rate: Scalar learning rate. - momentum: Scalar between 0 and 1 giving the momentum value. Setting momentum = 0 reduces to sgd. - velocity: A numpy array of the same shape as w and dw used to store a moving average of the gradients. """ if config is None: config = {} config.setdefault('learning_rate', 1e-2) config.setdefault('momentum', 0.9) v = config.get('velocity', np.zeros_like(w)) next_w = None v = config['momentum'] * v - \ config['learning_rate'] * dw # integrate velocity next_w = w + v config['velocity'] = v return next_w, config PK!zeronet/utils/__init__.pyPK!HnHTUzeronet-0.1.0.dist-info/WHEEL A н#Z;/"d&F[xzw@Zpy3Fv]\fi4WZ^EgM_-]#0(q7PK!H7 zeronet-0.1.0.dist-info/METADATAON0+Rc5-Pa!QѦq^mj#86 #ݝݙY=&wiOYzeronet/core/model.pyPK!x` ` Omzeronet/core/net.pyPK!C  yzeronet/core/optimizer.pyPK!.zeronet/utils/__init__.pyPK!HnHTUezeronet-0.1.0.dist-info/WHEELPK!H7 zeronet-0.1.0.dist-info/METADATAPK!H 1@Mzeronet-0.1.0.dist-info/RECORDPK k