BACK END/R
[R] R 정리 20 - MLP(deep learning)
circle kim
2021. 2. 4. 11:29
20. Neural Network : MLP - 역전파 지원 (deep learning)
- 라이브러리 load
install.packages("neuralnet")
library(neuralnet)
- 데이터
head(iris, 2)
unique(iris$Species) # setosa versicolor virginica
iris$Species2[iris$Species == 'setosa'] <- 1
iris$Species2[iris$Species == 'versicolor'] <- 2
iris$Species2[iris$Species == 'virginica'] <- 3
iris$Species <- NULL
head(iris, 2)
# Sepal.Length Sepal.Width Petal.Length Petal.Width Species2
# 1 5.1 3.5 1.4 0.2 1
# 2 4.9 3.0 1.4 0.2 1
- train, test
set.seed(42)
idx <- sample(1:nrow(iris), nrow(iris)*0.7)
train <- iris[idx, ]
test <- iris[-idx, ]
- 정규화
normal_func <- function(x){
return ((x - min(x) / max(x) - min(x)))
}
normal_func(c(1,2,3))
train_nor <- as.data.frame(lapply(train, normal_func))
head(train_nor, 3)
# Sepal.Length Sepal.Width Petal.Length Petal.Width Species2
# 1 0.4556962 1.2454545 0.2358209 0.06 -0.3333333
# 2 0.7556962 0.4454545 2.3358209 1.16 0.6666667
# 3 1.2556962 0.3454545 3.4358209 1.06 0.6666667
test_nor <- as.data.frame(lapply(test, normal_func))
head(test_nor, 3)
# Sepal.Length Sepal.Width Petal.Length Petal.Width Species2
# 1 -0.3714286 0.6210526 0.2550725 0.02 -0.3333333
# 2 0.4285714 0.9210526 0.3550725 -0.08 -0.3333333
# 3 -0.1714286 0.6210526 0.4550725 -0.08 -0.3333333
- 모델생성
help("neuralnet")
model <- neuralnet(Species2 ~ Sepal.Length+Sepal.Width+Petal.Length+Petal.Width,
data=train_nor, hidden = 1) # 레이블, 피쳐
model
plot(model)
- 모델 성능평가 : predict() x compute() 사용
model_result <- compute(model, test_nor[c(1:4)])
names(model_result) # neurons net.result
model_result$neurons
head(model_result$net.result, 3) # 예측값
# [,1]
# [1,] -0.2967633
# [2,] -0.3408647
# [3,] -0.3024315
head(test_nor$Species2, 3) # 실제값
- 상관관계 확인 후 분류 정확도 출력
cor(model_result$net.result, test_nor$Species2) # 0.9762009
pred_weights <- model_result$net.result
func <- function(x){
if(x >= 1)
return ('virginica')
else if(x >= 0)
return ('versicolor')
else
return ('setosa')
}
func(-1)
func(2)
func(0.2)
sp <- apply(pred_weights, 1, func)
sp
t <- table(sp, test_nor$Species2)
# sp -0.333333333333333 0.666666666666667 1.66666666666667
# setosa 12 0 0
# versicolor 0 12 0
# virginica 0 3 18
sum(diag(t)) / nrow(test_nor)
- 모델 파라미터 변경
model2 <- neuralnet(Species2 ~ Sepal.Length+Sepal.Width+Petal.Length+Petal.Width,
data=train_nor, hidden = 5, algorithm = "backprop", learningrate = 0.01)
# learningrate : 학습률
model2
plot(model2)
- 입력값을 정규화 하지않고 모델 작성
data(iris)
head(iris, 2)
unique(iris$Species)
set.seed(123)
idx <- sample(1:nrow(iris), nrow(iris)*0.7)
train <- iris[idx, ]
test <- iris[-idx, ]
train <- cbind(train, train$Species == 'setosa')
train <- cbind(train, train$Species == 'versicolor')
train <- cbind(train, train$Species == 'virginica')
train
names(train)[6:8] <- c('setosa', 'versicolor', 'virginica')
head(train, 2)
model <- neuralnet(setosa+versicolor+virginica ~ Sepal.Length+Sepal.Width+Petal.Length+Petal.Width,
data=train, hidden = 3)
plot(model)
pred <- compute(model, test[-5])
pred_weight <- pred$net.result
idx <- apply(pred_weight, 1, which.max) # 행에서 가장 큰값을 반환
idx
# 1 2 3 5 11 18 19 28 29 33 36 45 48 49 55 56 57 58 59 61 62 65 66 68 70 77
# 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2
# 83 84 94 95 98 100 101 104 105 111 113 116 125 131 133 135 140 141 145
# 2 3 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3
c('setosa', 'versicolor', 'virginica')[1]
pred <- c('setosa', 'versicolor', 'virginica')[idx]
pred
# [1] "setosa" "setosa" "setosa" "setosa" "setosa" "setosa" "setosa"
# [8] "setosa" "setosa" "setosa" "setosa" "setosa" "setosa" "setosa"
# [15] "versicolor" "versicolor" "versicolor" "versicolor" "versicolor" "versicolor" "versicolor"
# [22] "versicolor" "versicolor" "versicolor" "versicolor" "versicolor" "versicolor" "virginica"
# [29] "versicolor" "versicolor" "versicolor" "versicolor" "virginica" "virginica" "virginica"
# [36] "virginica" "virginica" "virginica" "virginica" "virginica" "virginica" "virginica"
# [43] "virginica" "virginica" "virginica"
table(pred, test$Species)
# pred setosa versicolor virginica
# setosa 14 0 0
# versicolor 0 17 0
# virginica 0 1 13
- 새로운 값으로 예측
my <- test
my <- my[c(1:3), ]
my <- edit(my)
my
# Sepal.Length Sepal.Width Petal.Length Petal.Width Species
# 1 7 1 1 1 setosa
# 2 7 5 3 1 setosa
# 3 2 3 4 5 setosa
mycomp <- compute(model, my[-5])
mypred <- mycomp$net.result
idx2 <- apply(mypred, 1, which.max)
idx2
pred2 <- c('setosa', 'versicolor', 'virginica')[idx2]
pred2 # versicolor setosa virginica