Простая самописная нейросеть на Питоне

Немного самодеятельности в попытке кратко описать нейросеть на Питоне без использования специализированных библиотэк. К каждому новому листингу добавляет код предыдущего листинга, то есть он следует по мере заполнения.

Создаём двуслойную нейросеть (входной слой не берется при подсчете слоев).

двуслойная сеть

1. Создаем класс нейросети в Питоне

class NeuralNetwork:
    def __init__(self, x, y):
        self.input      = x
        self.weights1   = np.random.rand(self.input.shape[1],4) 
        self.weights2   = np.random.rand(4,1)                 
        self.y          = y
        self.output     = np.zeros(y.shape)

class NeuralNetwork:

def __init__(self, x, y):

self.input = x

self.weights1 = np.random.rand(self.input.shape[1],4)

self.weights2 = np.random.rand(4,1)

self.y = y

self.output = np.zeros(y.shape)

2. Добавляем функцию прямого прохода (от входного слоя к выходному)

Сигмоида используется в качестве функции активации (её описание будет дано в последнем листинге с примером).

class NeuralNetwork:
    def __init__(self, x, y):
        self.input      = x
        self.weights1   = np.random.rand(self.input.shape[1],4) 
        self.weights2   = np.random.rand(4,1)                 
        self.y          = y
        self.output     = np.zeros(self.y.shape)

    def feedforward(self):
        self.layer1 = sigmoid(np.dot(self.input, self.weights1))
        self.output = sigmoid(np.dot(self.layer1, self.weights2))

class NeuralNetwork:

def __init__(self, x, y):

self.input = x

self.weights1 = np.random.rand(self.input.shape[1],4)

self.weights2 = np.random.rand(4,1)

self.y = y

self.output = np.zeros(self.y.shape)

def feedforward(self):

self.layer1 = sigmoid(np.dot(self.input, self.weights1))

self.output = sigmoid(np.dot(self.layer1, self.weights2))

нейросеть

3. Добавляем обратное распространение

Функция потерь:

class NeuralNetwork:
    def __init__(self, x, y):
        self.input      = x
        self.weights1   = np.random.rand(self.input.shape[1],4) 
        self.weights2   = np.random.rand(4,1)                 
        self.y          = y
        self.output     = np.zeros(self.y.shape)

    def feedforward(self):
        self.layer1 = sigmoid(np.dot(self.input, self.weights1))
        self.output = sigmoid(np.dot(self.layer1, self.weights2))

    def backprop(self):
        # цепное правило вычисления производной для функции потерь относительно весов
        d_weights2 = np.dot(self.layer1.T, (2*(self.y - self.output) * sigmoid_derivative(self.output)))
        d_weights1 = np.dot(self.input.T,  (np.dot(2*(self.y - self.output) * sigmoid_derivative(self.output), self.weights2.T) * sigmoid_derivative(self.layer1)))

        # Обновление весов на основании производной
        self.weights1 += d_weights1
        self.weights2 += d_weights2

class NeuralNetwork:

def __init__(self, x, y):

self.input = x

self.weights1 = np.random.rand(self.input.shape[1],4)

self.weights2 = np.random.rand(4,1)

self.y = y

self.output = np.zeros(self.y.shape)

def feedforward(self):

self.layer1 = sigmoid(np.dot(self.input, self.weights1))

self.output = sigmoid(np.dot(self.layer1, self.weights2))

def backprop(self):

# цепное правило вычисления производной для функции потерь относительно весов

d_weights2 = np.dot(self.layer1.T, (2*(self.y - self.output) * sigmoid_derivative(self.output)))

d_weights1 = np.dot(self.input.T, (np.dot(2*(self.y - self.output) * sigmoid_derivative(self.output), self.weights2.T) * sigmoid_derivative(self.layer1)))

# Обновление весов на основании производной

self.weights1 += d_weights1

self.weights2 += d_weights2

Производная и градиент, поиск минимума:

минимизация функции потерь

4. Проверяем на примере (обратите внимание, что в начале добавлено несколько важных строк)

import numpy as np
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

def sigmoid(x):
    return 1.0/(1+ np.exp(-x))

def sigmoid_derivative(x):
    return x * (1.0 - x)

def compute_loss(y_hat, y):
    return ((y_hat - y)**2).sum()

class NeuralNetwork:
    def __init__(self, x, y):
        self.input      = x
        self.weights1   = np.random.rand(self.input.shape[1],4) 
        self.weights2   = np.random.rand(4,1)                 
        self.y          = y
        self.output     = np.zeros(self.y.shape)

    def feedforward(self):
        self.layer1 = sigmoid(np.dot(self.input, self.weights1))
        self.output = sigmoid(np.dot(self.layer1, self.weights2))

    def backprop(self):
        # цепное правило вычисления производной для функции потерь относительно весов
        d_weights2 = np.dot(self.layer1.T, (2*(self.y - self.output) * sigmoid_derivative(self.output)))
        d_weights1 = np.dot(self.input.T,  (np.dot(2*(self.y - self.output) * sigmoid_derivative(self.output), self.weights2.T) * sigmoid_derivative(self.layer1)))

        # Обновление весов на основании производной
        self.weights1 += d_weights1
        self.weights2 += d_weights2


X = np.array([[0,0,1],
              [0,1,1],
              [1,0,1],
              [1,1,1]])
y = np.array([[0],[1],[1],[0]])
nn = NeuralNetwork(X,y)

loss_values = []

for i in range(1500):
    nn.feedforward()
    nn.backprop()
    loss = compute_loss(nn.output, y)
    loss_values.append(loss)

print(nn.output)
print(loss)

plt.plot(loss_values)

import numpy as np

import matplotlib.pyplot as plt

%matplotlib inline

def sigmoid(x):

return 1.0/(1+ np.exp(-x))

def sigmoid_derivative(x):

return x * (1.0 - x)

def compute_loss(y_hat, y):

return ((y_hat - y)**2).sum()

class NeuralNetwork:

def __init__(self, x, y):

self.input = x

self.weights1 = np.random.rand(self.input.shape[1],4)

self.weights2 = np.random.rand(4,1)

self.y = y

self.output = np.zeros(self.y.shape)

def feedforward(self):

self.layer1 = sigmoid(np.dot(self.input, self.weights1))

self.output = sigmoid(np.dot(self.layer1, self.weights2))

def backprop(self):

# цепное правило вычисления производной для функции потерь относительно весов

d_weights2 = np.dot(self.layer1.T, (2*(self.y - self.output) * sigmoid_derivative(self.output)))

d_weights1 = np.dot(self.input.T, (np.dot(2*(self.y - self.output) * sigmoid_derivative(self.output), self.weights2.T) * sigmoid_derivative(self.layer1)))

# Обновление весов на основании производной

self.weights1 += d_weights1

self.weights2 += d_weights2

X = np.array([[0,0,1],

[0,1,1],

[1,0,1],

[1,1,1]])

y = np.array([[0],[1],[1],[0]])

nn = NeuralNetwork(X,y)

loss_values = []

for i in range(1500):

nn.feedforward()

nn.backprop()

loss = compute_loss(nn.output, y)

loss_values.append(loss)

print(nn.output)

print(loss)

plt.plot(loss_values)

Пояснение к примеру:
x1 принимает значения 0, 0, 1, 1
x2 принимает значения 0, 1, 0, 1
x3 принимает значения 1, 1, 1, 1
y (целевая переменная) принимает значения 0, 1, 1, 0 соответственно

Наша нейросеть пытается подобрать идеальный набор весов, чтобы описать данную функцию. Проведено 1500 прогонов (итераций).

В моём случае результат получился таким:
[[0.02313339]
[0.97440622]
[0.98318163]
[0.02296596]]
loss: 0.0019279632821274807
Сравните с идеалом:
[[0]
[1]
[1]
[0]]

Визуализация функции потерь показывается как она минимизируется (стремится к нулю).

визуализация функции потерь
Несмотря на то, что идеально эти значения не совпадают, результат получился хорошим. В противном случае мы можем иметь дело с переобучением модели, когда модель хорошо работает на тренировочных данных и плохо на новых данных.

Простая самописная нейросеть на Питоне

Искать

Недавнее

Тематика

Data Scientist # 1

Данные — новый актив!

Войдите в свой аккаунт