역전파 구현 및 시각화 해보기

Machine Learning

역전파 구현 및 시각화 해보기

se0_ing 2024. 2. 11. 12:21

역전파 구현

회귀 문제 구현 예시 코드

#역전파 구현 전체 코드(회귀)

#은닉층의 활성화 함: 시그모이드 함수
#출력층의 활성화 함수: 항등 함수
#손실 함수: 오차제곱합
#최적화 알고리즘: 확률적 경사 하강법
#배치 사이즈: 1


%matplotlib inline

import numpy as np
import matplotlib.pyplot as plt


input_data = np.arange(0, np.pi * 2, 0.1) #입력
correct_data = np.sin(input_data)
input_data = (input_data - np.pi) / np.pi
n_data = len(correct_data)


n_in = 1
n_mid = 3
n_out = 1

wb_width = 0.01
eta = 0.1
epoch = 2001
interval = 200

#--은닉층--
class MiddleLayer:
    
    def __init__(self, n_upper, n): #초기 설정
        #가중치(행렬)과 편향(벡터)
        self.w = wb_width * np.random.randn(n_upper, n)
        self.b = wb_width * np.random.randn(n)
         
    def forward(self, x): #순전파
        self.x = x
        u = np.dot(x, self.w) + self.b
        self.y = 1 / (1 + np.exp(-u)) #시그모이드 함수
        
    def backward(self, grad_y): #역전파
        delta = grad_y * (1 - self.y) * self.y #시그모이드 함수 미분
        
        self.grad_w = np.dot(self.x.T, delta)
        self.grad_b = np.sum(delta, axis = 0)
        self.grad_x = np.dot(delta, self.w.T)
        
        
    def update(self, eta): #가중치, 편향 수정
        self.w -= eta * self.grad_w
        self.b -= eta * self.grad_b

        
#--출력층--
class OutputLayer:
    
    def __init__(self, n_upper, n): #초기 설정
        self.w = wb_width * np.random.randn(n_upper, n)#가중치
        self.b = wb_width * np.random.randn(n) #편향(벡터)
        
    def forward(self, x): #순전파
        self.x = x
        u = np.dot(x, self.w) + self.b
        self.y = u #항등함수
        
    def backward(self, t): #역전파
        delta = self.y - t
        
        self.grad_w = np.dot(self.x.T, delta)
        self.grad_b = np.sum(delta, axis = 0)
        
        self.grad_x = np.dot(delta, self.w.T)
        
    def update(self, eta): #가중치, 편향 수정
        self.w -= eta * self.grad_w
        self.b -= eta * self.grad_b
        
        
#--각 층의 초기화--
middle_layer = MiddleLayer(n_in, n_mid)
output_layer = OutputLayer(n_mid, n_out)


#--학습--
for i in range(epoch):
    
    #인덱스 셔플
    index_random = np.arange(n_data)
    np.random.shuffle(index_random)
    
    #결과 표시
    total_error = 0
    plot_x = []
    plot_y = []
    
    
    for idx in index_random:
        
        x = input_data[idx:idx+1] #입력
        t = correct_data[idx:idx+1] #정답

        #순전파
        middle_layer.forward(x.reshape(1,1)) #입력을 행렬로 변환
        output_layer.forward(middle_layer.y)

        #역전파
        output_layer.backward(t.reshape(1,1)) #정답을 행렬로 변환
        middle_layer.backward(output_layer.grad_x)

        #가중치와 편향 수정
        middle_layer.update(eta)
        output_layer.update(eta)
        
        if i % interval == 0:
            
            y = output_layer.y.reshape(-1) #행렬을 벡터로 되돌림
            
            #오차제곱합 계산
            total_error += 1.0 / 2.0 * np.sum(np.square(y - t))
            
            #출력 기록
            plot_x.append(x)
            plot_y.append(y)
            
            
    if i % interval == 0:
        
        #출력 그래프 표시
        plt.plot(input_data, correct_data)#, linestyle = "dashed")
        plt.scatter(plot_x, plot_y, marker = "+", c = "red")
        plt.show()
        
        
    #에포크 수와 오차 표시
    print("Epoch:" + str(i) + "/" + str(epoch), "Error:" + str(total_error / n_data))

출력:

분류 문제 구현 예시 코드

#역전파 구현 전체 코드(분류)

#은닉층의 활성화 함: 시그모이드 함수
#출력층의 활성화 함수: 소프트맥스 함수
#손실 함수: 교차 엔트로피 오차
#최적화 알고리즘: 확률적 경사 하강법
#배치 사이즈: 1


%matplotlib inline

import numpy as np
import matplotlib.pyplot as plt

#--값--
X = np.arange(-1.0, 1.1, 0.1)
Y = np.arange(-1.0, 1.1, 0.1)

#--입력, 정답 벡터 생성--
input_data = []
correct_data = []
for x in X:
    for y in Y:
        input_data.append([x, y])
        if y < np.sin(np.pi * x): #y값이 sin 곡선 아래에 있으면
            correct_data.append([0, 1]) #아래 영역
        else:
            correct_data.append([1, 0]) #위 영역

n_data = len(correct_data)

input_data = np.array(input_data)
correct_data = np.array(correct_data)

#--각 설정 값--
n_in = 2 #입력층 뉴런 수
n_mid = 6 #은닉층 뉴런 수
n_out = 2 #출력층 뉴런 수

wb_width = 0.01 #가중치 편향 설정을 위한 정규분포 표준표차
eta = 0.1 #학습률
epoch = 101 #에포크
interval = 10 #경과표시 간격


#--은닉층--
class MiddleLayer:
    
    def __init__(self, n_upper, n): #초기 설정
        #가중치(행렬)과 편향(벡터)
        self.w = wb_width * np.random.randn(n_upper, n)
        self.b = wb_width * np.random.randn(n)
         
    def forward(self, x): #순전파
        self.x = x
        u = np.dot(x, self.w) + self.b
        self.y = 1 / (1 + np.exp(-u)) #시그모이드 함수
        
    def backward(self, grad_y): #역전파
        delta = grad_y * (1 - self.y) * self.y #시그모이드 함수 미분
        
        self.grad_w = np.dot(self.x.T, delta)
        self.grad_b = np.sum(delta, axis = 0)

        self.grad_x = np.dot(delta, self.w.T)
        
        
    def update(self, eta): #가중치, 편향 수정
        self.w -= eta * self.grad_w
        self.b -= eta * self.grad_b

        
#--출력층--
class OutputLayer:
    
    def __init__(self, n_upper, n): #초기 설정
        self.w = wb_width * np.random.randn(n_upper, n)#가중치
        self.b = wb_width * np.random.randn(n) #편향(벡터)
        
    def forward(self, x): #순전파
        self.x = x
        u = np.dot(x, self.w) + self.b
        
        #소프트맥스 함수
        self.y = np.exp(u) / np.sum(np.exp(u), axis = 1, keepdims=True)
        
    def backward(self, t): #역전파
        delta = self.y - t
        
        self.grad_w = np.dot(self.x.T, delta)
        self.grad_b = np.sum(delta, axis = 0)
        
        self.grad_x = np.dot(delta, self.w.T)
        
    def update(self, eta): #가중치, 편향 수정
        self.w -= eta * self.grad_w
        self.b -= eta * self.grad_b
        
        
#--각 층의 초기화--
middle_layer = MiddleLayer(n_in, n_mid)
output_layer = OutputLayer(n_mid, n_out)


#--학습--
sin_data = np.sin(np.pi * X) #결과 검증용

for i in range(epoch):
    
    #인덱스 셔플
    index_random = np.arange(n_data)
    np.random.shuffle(index_random)
    
    #결과 표시
    total_error = 0
    x_1 = []
    y_1 = []
    x_2 = []
    y_2 = []
    
    
    for idx in index_random:
        
        x = input_data[idx] #입력
        t = correct_data[idx] #정답

        #순전파
        middle_layer.forward(x.reshape(1,2)) #입력을 행렬로 변환
        output_layer.forward(middle_layer.y)

        #역전파
        output_layer.backward(t.reshape(1,2)) #정답을 행렬로 변환
        middle_layer.backward(output_layer.grad_x)

        #가중치와 편향 수정
        middle_layer.update(eta)
        output_layer.update(eta)
        
        if i % interval == 0:
            
            y = output_layer.y.reshape(-1) #행렬을 벡터로 되돌림
            
            #교차 엔트로피 오차
            total_error += - np.sum(t * np.log(y + 1e-7))
            
            #확률 크기를 비교하며 분류
            if y[0] > y[1]:
                x_1.append(x[0])
                y_1.append(x[1])
            else:
                x_2.append(x[0])
                y_2.append(x[1])

            
        if i % interval == 0:
        
            #출력 그래프 표시
            plt.plot(X, sin_data, linestyle = "dashed")
            plt.scatter(x_1, y_1 ,marker="+")
            plt.scatter(x_2, y_2, marker="x")
            plt.show()
        
        
            #에포크 수와 오차 표시
            print("Epoch:" + str(i) + "/" + str(epoch), "Error:" + str(total_error / n_data))

출력:

지금까지 역전파와 그에 대한 예시 구현 코드를 알아보았다.