Before you understand the deep learning framework , We need to understand and even realize a process of online learning and parameter adjustment , And then understand the mechanism of deep learning ;
So , Bloggers here provide an example of their own writing , Let's take a look at the process of forward and backward propagation of e-learning ;
besides , In order to achieve batch Read , I also designed and provided a simple DataLoader Class to simulate the sampling of data iterators in deep learning ; And provides the function of access model ;
Catalog
3、 ... and 、 Detailed code description
Implement one by more than one Linear Layer network to fit the function , Project address :https://github.com/nickhuang1996/HJLNet, function :
python demo.py
The fitting function is :
The following results, from left to right, are ( The learning rate is 0.03,batchsize by 90):
Epoch:400,1000, 2000, 10000 above
Dataset.py
x yes 0 To 2 Data between , In steps of 0.01, So it is 200 Data ;
y It's the objective function , The amplitude is 20;
length It's data length ;
_build_items() It's about building a dict Storage x and y;
_transform() It's right x and y Data transformation ;
import numpy as np
class Dataset:
def __init__(self):
self.x = np.arange(0.0, 2.0, 0.01)
self.y = 20 * np.sin(2 * np.pi * self.x)
self.length = len(list(self.x))
self._build_items()
self._transform()
def _build_items(self):
self.items = [{
'x': list(self.x)[i],
'y': list(self.y)[i]
}for i in range(self.length)]
def _transform(self):
self.x = self.x.reshape(1, self.__len__())
self.y = self.y.reshape(1, self.__len__())
def __len__(self):
return self.length
def __getitem__(self, index):
return self.items[index]
DataLoader.py
Be similar to Pytorch Inside DataLoader, Bloggers here also pass in two parameters for initialization :dataset and batch_size
__next__() It's the function that each iteration performs , utilize __len__() obtain dataset The length of , utilize __getitem__() Get the data in the dataset ;
_concate() Is to take a batch To put together the data of ;
_transform() It's a conversion of batch Data form of ;
import numpy as np
class DataLoader:
def __init__(self, dataset, batch_size):
self.dataset = dataset
self.batch_size = batch_size
self.current = 0
def __next__(self):
if self.current < self.dataset.__len__():
if self.current + self.batch_size <= self.dataset.__len__():
item = self._concate([self.dataset.__getitem__(index) for index in range(self.current, self.current + self.batch_size)])
self.current += self.batch_size
else:
item = self._concate([self.dataset.__getitem__(index) for index in range(self.current, self.dataset.__len__())])
self.current = self.dataset.__len__()
return item
else:
self.current = 0
raise StopIteration
def _concate(self, dataset_items):
concated_item = {}
for item in dataset_items:
for k, v in item.items():
if k not in concated_item:
concated_item[k] = [v]
else:
concated_item[k].append(v)
concated_item = self._transform(concated_item)
return concated_item
def _transform(self, concated_item):
for k, v in concated_item.items():
concated_item[k] = np.array(v).reshape(1, len(v))
return concated_item
def __iter__(self):
return self
Linear.py
Be similar to Pytorch Inside Linear, Bloggers here also pass in three parameters for initialization :in_features, out_features, bias
_init_parameters() It's the initialization weight weight And offset bias,weight Size is [out_features, in_features],bias Size is [out_features, 1]
forward It's forward propagation :
import numpy as np
class Linear:
def __init__(self, in_features, out_features, bias=False):
self.in_features = in_features
self.out_features = out_features
self.bias = bias
self._init_parameters()
def _init_parameters(self):
self.weight = np.random.random([self.out_features, self.in_features])
if self.bias:
self.bias = np.zeros([self.out_features, 1])
else:
self.bias = None
def forward(self, input):
return self.weight.dot(input) + self.bias
*network.py
A simple multilayer Linear The Internet
_init_parameters() It's a Linear The weight and paranoia in the layer are placed in one dict Internal storage ;
forward() It's forward propagation , The last floor doesn't go through Sigmoid;
backward() It's back propagation , Using gradient descent to realize error transfer and parameter adjustment : For example, a two-layer Linear The back propagation of layers is as follows
update_grads() It's updating weights and offsets ;
# -*- coding: UTF-8 -*-
import numpy as np
from ..lib.Activation.Sigmoid import sigmoid_derivative, sigmoid
from ..lib.Module.Linear import Linear
class network:
def __init__(self, layers_dim):
self.layers_dim = layers_dim
self.linear_list = [Linear(layers_dim[i - 1], layers_dim[i], bias=True) for i in range(1, len(layers_dim))]
self.parameters = {}
self._init_parameters()
def _init_parameters(self):
for i in range(len(self.layers_dim) - 1):
self.parameters["w" + str(i)] = self.linear_list[i].weight
self.parameters["b" + str(i)] = self.linear_list[i].bias
def forward(self, x):
a = []
z = []
caches = {}
a.append(x)
z.append(x)
layers = len(self.parameters) // 2
for i in range(layers):
z_temp = self.linear_list[i].forward(a[i])
self.parameters["w" + str(i)] = self.linear_list[i].weight
self.parameters["b" + str(i)] = self.linear_list[i].bias
z.append(z_temp)
if i == layers - 1:
a.append(z_temp)
else:
a.append(sigmoid(z_temp))
caches["z"] = z
caches["a"] = a
return caches, a[layers]
def backward(self, caches, output, y):
layers = len(self.parameters) // 2
grads = {}
m = y.shape[1]
for i in reversed(range(layers)):
# Suppose the last layer doesn't go through the activation function
# It's written according to the formula in the picture above
if i == layers - 1:
grads["dz" + str(i)] = output - y
else: # The front is full of sigmoid Activate
grads["dz" + str(i)] = self.parameters["w" + str(i + 1)].T.dot(
grads["dz" + str(i + 1)]) * sigmoid_derivative(
caches["z"][i + 1])
grads["dw" + str(i)] = grads["dz" + str(i)].dot(caches["a"][i].T) / m
grads["db" + str(i)] = np.sum(grads["dz" + str(i)], axis=1, keepdims=True) / m
return grads
# It is to update all its weight and paranoia
def update_grads(self, grads, learning_rate):
layers = len(self.parameters) // 2
for i in range(layers):
self.parameters["w" + str(i)] -= learning_rate * grads["dw" + str(i)]
self.parameters["b" + str(i)] -= learning_rate * grads["db" + str(i)]
Sigmoid.py
Formula definition :
The derivative can be represented by itself :
import numpy as np
def sigmoid(x):
return 1.0 / (1.0 + np.exp(-x))
def sigmoid_derivative(x):
return sigmoid(x) * (1 - sigmoid(x))
demo.py
Entry file of training model , contain Training 、 test and The storage model
from code.scripts.trainer import Trainer
from code.config.default_config import _C
if __name__ == '__main__':
trainer = Trainer(cfg=_C)
trainer.train()
trainer.test()
trainer.save_models()
default_config.py
The configuration file :
layers_dim representative Linear The input and output dimensions of the layer ;
batch_size yes batch Size ;
total_epochs It's the total training time , Train once x For one epoch;
resume It's judgment. Keep training ;
result_img_path Is the path to the result store ;
ckpt_path It's the path to model storage ;
from easydict import EasyDict
_C = EasyDict()
_C.layers_dim = [1, 25, 1] # [1, 30, 10, 1]
_C.batch_size = 90
_C.total_epochs = 40000
_C.resume = True # False means retraining
_C.result_img_path = "D:/project/Pycharm/HJLNet/result.png"
_C.ckpt_path = 'D:/project/Pycharm/HJLNet/ckpt.npy'
trainer.py
I won't go into more details here , Mainly used train() This function trains ,test() To test
from ..lib.Data.DataLoader import DataLoader
from ..scripts.Dataset import Dataset
from ..scripts.network import network
import matplotlib.pyplot as plt
import numpy as np
class Trainer:
def __init__(self, cfg):
self.ckpt_path = cfg.ckpt_path
self.result_img_path = cfg.result_img_path
self.layers_dim = cfg.layers_dim
self.net = network(self.layers_dim)
if cfg.resume:
self.load_models()
self.dataset = Dataset()
self.dataloader = DataLoader(dataset=self.dataset, batch_size=cfg.batch_size)
self.total_epochs = cfg.total_epochs
self.iterations = 0
self.x = self.dataset.x
self.y = self.dataset.y
self.draw_data(self.x, self.y)
def train(self):
for i in range(self.total_epochs):
for item in self.dataloader:
caches, output = self.net.forward(item['x'])
grads = self.net.backward(caches, output, item['y'])
self.net.update_grads(grads, learning_rate=0.03)
if i % 100 == 0:
print("Epoch: {}/{} Iteration: {} Loss: {}".format(i + 1,
self.total_epochs,
self.iterations,
self.compute_loss(output, item['y'])))
self.iterations += 1
def test(self):
caches, output = self.net.forward(self.x)
self.draw_data(self.x, output)
self.save_results()
self.show()
def save_models(self):
ckpt = {
"layers_dim": self.net.layers_dim,
"parameters": self.net.linear_list
}
np.save(self.ckpt_path, ckpt)
print('Save models finish!!')
def load_models(self):
ckpt = np.load(self.ckpt_path).item()
self.net.layers_dim = ckpt["layers_dim"]
self.net.linear_list = ckpt["parameters"]
print('load models finish!!')
def draw_data(self, x, y):
plt.scatter(x, y)
def show(self):
plt.show()
def save_results(self):
plt.savefig(fname=self.result_img_path, figsize=[10, 10])
# Calculate the error value
def compute_loss(self, output, y):
return np.mean(np.square(output - y))
Training time will be output during training , Number of iterations and loss changes , Store the model and results at the end of the training .
1. Start training
2. After training , Read the last model and continue training
3. Result display
In this way, we will know a basic network training process forward and backward propagation process , More detailed code and principles will be updated later , To help you learn the knowledge and concepts of deep learning ~