BACK END/R

[R] R 정리 20 - MLP(deep learning)

circle kim 2021. 2. 4. 11:29

20. Neural Network : MLP - 역전파 지원 (deep learning)

 

- 라이브러리 load

install.packages("neuralnet")
library(neuralnet)

 

 - 데이터

head(iris, 2)
unique(iris$Species) # setosa     versicolor virginica 
iris$Species2[iris$Species == 'setosa'] <- 1
iris$Species2[iris$Species == 'versicolor'] <- 2
iris$Species2[iris$Species == 'virginica'] <- 3
iris$Species <- NULL
head(iris, 2)
# Sepal.Length Sepal.Width Petal.Length Petal.Width Species2
# 1          5.1         3.5          1.4         0.2        1
# 2          4.9         3.0          1.4         0.2        1

 

 - train, test

set.seed(42)
idx <- sample(1:nrow(iris), nrow(iris)*0.7)
train <- iris[idx, ]
test <- iris[-idx, ]

 

 - 정규화

normal_func <- function(x){
  return ((x - min(x) / max(x) - min(x)))
}
normal_func(c(1,2,3))

train_nor <- as.data.frame(lapply(train, normal_func))
head(train_nor, 3)
# Sepal.Length Sepal.Width Petal.Length Petal.Width   Species2
# 1    0.4556962   1.2454545    0.2358209        0.06 -0.3333333
# 2    0.7556962   0.4454545    2.3358209        1.16  0.6666667
# 3    1.2556962   0.3454545    3.4358209        1.06  0.6666667
test_nor <- as.data.frame(lapply(test, normal_func))
head(test_nor, 3)
# Sepal.Length Sepal.Width Petal.Length Petal.Width   Species2
# 1   -0.3714286   0.6210526    0.2550725        0.02 -0.3333333
# 2    0.4285714   0.9210526    0.3550725       -0.08 -0.3333333
# 3   -0.1714286   0.6210526    0.4550725       -0.08 -0.3333333

 

- 모델생성

help("neuralnet")
model <- neuralnet(Species2 ~ Sepal.Length+Sepal.Width+Petal.Length+Petal.Width,
                   data=train_nor, hidden = 1) # 레이블, 피쳐
model
plot(model)

 - 모델 성능평가 : predict() x compute() 사용

model_result <- compute(model, test_nor[c(1:4)])
names(model_result) # neurons    net.result
model_result$neurons
head(model_result$net.result, 3) # 예측값
# [,1]
# [1,] -0.2967633
# [2,] -0.3408647
# [3,] -0.3024315
head(test_nor$Species2, 3) # 실제값

 

 -  상관관계 확인 후 분류 정확도 출력

cor(model_result$net.result, test_nor$Species2) # 0.9762009

pred_weights <- model_result$net.result

func <- function(x){
  if(x >= 1)
    return ('virginica')
  else if(x >= 0)
    return ('versicolor')
  else
    return ('setosa')
}
func(-1)
func(2)
func(0.2)
sp <- apply(pred_weights, 1, func)
sp
t <- table(sp, test_nor$Species2)
# sp           -0.333333333333333 0.666666666666667 1.66666666666667
# setosa                     12                 0                0
# versicolor                  0                12                0
# virginica                   0                 3               18
sum(diag(t)) / nrow(test_nor)

 

 - 모델 파라미터 변경

model2 <- neuralnet(Species2 ~ Sepal.Length+Sepal.Width+Petal.Length+Petal.Width,
                   data=train_nor, hidden = 5, algorithm = "backprop", learningrate = 0.01)
# learningrate : 학습률
model2
plot(model2)

 


 - 입력값을 정규화 하지않고 모델 작성

data(iris)
head(iris, 2)
unique(iris$Species)
set.seed(123)
idx <- sample(1:nrow(iris), nrow(iris)*0.7)
train <- iris[idx, ]
test <- iris[-idx, ]

train <- cbind(train, train$Species == 'setosa')
train <- cbind(train, train$Species == 'versicolor')
train <- cbind(train, train$Species == 'virginica')
train
names(train)[6:8] <- c('setosa', 'versicolor', 'virginica')
head(train, 2)

model <- neuralnet(setosa+versicolor+virginica ~ Sepal.Length+Sepal.Width+Petal.Length+Petal.Width,
                   data=train, hidden = 3)
plot(model)

 

pred <- compute(model, test[-5])
pred_weight <- pred$net.result
idx <- apply(pred_weight, 1, which.max) # 행에서 가장 큰값을 반환
idx
# 1   2   3   5  11  18  19  28  29  33  36  45  48  49  55  56  57  58  59  61  62  65  66  68  70  77 
# 1   1   1   1   1   1   1   1   1   1   1   1   1   1   2   2   2   2   2   2   2   2   2   2   2   2 
# 83  84  94  95  98 100 101 104 105 111 113 116 125 131 133 135 140 141 145 
# 2   3   2   2   2   2   3   3   3   3   3   3   3   3   3   3   3   3   3 
c('setosa', 'versicolor', 'virginica')[1]

pred <- c('setosa', 'versicolor', 'virginica')[idx]
pred
# [1] "setosa"     "setosa"     "setosa"     "setosa"     "setosa"     "setosa"     "setosa"    
# [8] "setosa"     "setosa"     "setosa"     "setosa"     "setosa"     "setosa"     "setosa"    
# [15] "versicolor" "versicolor" "versicolor" "versicolor" "versicolor" "versicolor" "versicolor"
# [22] "versicolor" "versicolor" "versicolor" "versicolor" "versicolor" "versicolor" "virginica" 
# [29] "versicolor" "versicolor" "versicolor" "versicolor" "virginica"  "virginica"  "virginica" 
# [36] "virginica"  "virginica"  "virginica"  "virginica"  "virginica"  "virginica"  "virginica" 
# [43] "virginica"  "virginica"  "virginica" 
table(pred, test$Species)
# pred         setosa versicolor virginica
# setosa         14          0         0
# versicolor      0         17         0
# virginica       0          1        13

 

 - 새로운 값으로 예측

my <- test
my <- my[c(1:3), ]
my <- edit(my)
my
# Sepal.Length Sepal.Width Petal.Length Petal.Width Species
# 1            7           1            1           1  setosa
# 2            7           5            3           1  setosa
# 3            2           3            4           5  setosa

mycomp <- compute(model, my[-5])
mypred <- mycomp$net.result
idx2 <- apply(mypred, 1, which.max)
idx2
pred2 <- c('setosa', 'versicolor', 'virginica')[idx2]
pred2 # versicolor setosa virginica