### ### Heart Data - Neuwal Network ### ### ver 0.0.4 #################################################### ###------------------------------------------------------- ###--- 0. Preliminary # Using Heart2 from https://nmimoto.github.io/R/heart0.txt library(tidyverse) Heart <- read_csv(file="https://nmimoto.github.io/datasets/heart.csv") # Rename medv column as resp. Heart2 <- Heart %>% select(-"index") %>% # remove col named "index" rename(resp=AHD) %>% # rename the column relocate(resp) %>% # move "resp" to 1st column mutate(resp=as.factor(resp), # Turn these columns to instead of ChestPain=as.factor(ChestPain), Thal=as.factor(Thal), Sex=as.factor(Sex), Fbs=as.factor(Fbs), RestECG=as.factor(RestECG), ExAng=as.factor(ExAng)) print(Heart2, width=1000) table(Heart2$resp) # note that first level is "No" ######################################## ### ### When we are doing SVM or NN, we need to turn all variables into numbers. ### The data will be scaled inside svm() automatically. ### ######################################## print(Heart, width=1000) # Rename medv column as resp. Heart3 <- Heart %>% select(-"index") %>% # remove col named "index" rename(resp=AHD) %>% # rename the column relocate(resp) %>% # move "resp" to 1st column mutate(resp=as.factor(resp)) # Turn these columns to instead of print(Heart3, width=1000) Heart3 <- Heart3 %>% select(-c("ChestPain", "Thal")) # remove columns print(Heart3, width=1000) # Scalse all variables to be used in NN Heart4 <- Heart3 %>% mutate_at(-1, list(~base::scale(.)[,])) # scale all col except 1st print(Heart4, width=1000) ###-------------------------------------------------------------------- # need to remove rows with NA. Orig <- Heart4 # Check for N/A in data. Remove if there's any. summary(Orig) dim(Orig) sum(is.na(Orig)) # If there is na in the data, run below Orig <- Orig %>% na.omit() dim(Orig) ###------------------------------------------------- ###--- 1. Data Separation (Copied from Rtut-CV) ### Divide Dataset to Training and Testing and Set up k fold CV Orig <- Orig # Entire Data set (have to be data.frame) train.size <- 250 # num of rows for training set test.size <- 47 # num of rows for testing set my.seed <- 7211 # give a seed ### ### This line replaces the AAAA to BBBB chunk source('https://nmimoto.github.io/R/ML-00.txt') ### # Output (all data.frame): # Train.set / Train.resp # Test.set / Test.resp # CV.train[[k]] / CV.train.resp[[k]] # CV.valid[[k]] / CV.valid.resp[[k]] ###------------------------------------------------- ###--- 3. Neural Network 5-fold CV library(neuralnet) # install.packages('neuralnet', repos='https://cran.case.edu/') sigmoid <- function(x) 1 / (1 + exp(-x)) AUCs <- MSE.valid <- matrix(0, 5, 2) colnames(AUCs) = c("Train AUC", "Valid AUC") for (k in 1:5) { set.seed(my.seed) Fit00 = neuralnet::neuralnet(resp ~., # <===== Try using different set of columns here CV.train[[k]], hidden=3, # <===== Try different num of nodes here learningrate=1e-2, # <===== For numerical problem try changing number here act.fct=sigmoid, linear.output=FALSE) # Should be FALSE for classification # linear.output FALSE means activation function is applied to output node # summary(Fit00) # plot(Fit00) # Use only if NN is small enough (hidden <10) Train.prob = predict(Fit00, newdata=CV.train[[k]], type="response")[,1] Valid.prob = predict(Fit00, newdata=CV.valid[[k]], type="response")[,1] #- Check the training set accuracy library(caret) library(pROC) AUCs[k,] <- round(c(auc(factor(as.matrix(CV.train.resp[[k]])), Train.prob, levels=c("No", "Yes")), auc(factor(as.matrix(CV.valid.resp[[k]])), Valid.prob, levels=c("No", "Yes"))), 4) } AUCs Av.AUCs = apply(AUCs, 2, mean) names(Av.AUCs) = c("Av.Train AUC", "Av.Valid AUC") Av.AUCs ## ## Try with Heart3 ## Try with linear output=TRUE ## Try with hidden=5, c(5,5). ## Try running above couple of times with same setting. ## You get slightly diffrent number due to random starting point for parameters. ## ## ## Use Av.Valid AUC to choose the best model ## ###------------------------------------------------- ###--- 3. NN final fit with Train set library(neuralnet) # install.packages('neuralnet', repos='https://cran.case.edu/') sigmoid <- function(x) 1 / (1 + exp(-x)) set.seed(my.seed) Fit01 = neuralnet::neuralnet(resp ~., Train.set, hidden=3, learningrate=1e-2, act.fct=sigmoid, linear.output=FALSE) # Should be FALSE for classification # linear.output FALSE means activation function is applied to output node summary(Fit01) # plot(Fit01) # Use only if NN is small enough (hidden <10) #--- Get training / validation fit Train.prob = predict(Fit01, newdata=Train.set)[,1] head(Train.prob) Test.prob = predict(Fit01, newdata=Test.set)[,1] head(Test.prob) ###---------------------------- #- 3a Output result for given threshold value threshold = .1 # pick a threshold #- Check the training set accuracy library(caret) Train.pred = ifelse(Train.prob > threshold, "Yes", "No") # Turn the fitted values to Up/Down using threshold of .5 Test.pred = ifelse(Test.prob > threshold, "Yes", "No") CM.train <- confusionMatrix(factor(Train.pred), factor(as.matrix(Train.resp)), positive="Yes") CM.test <- confusionMatrix(factor(Test.pred), factor(as.matrix(Test.resp)), positive="Yes") CM.train # Training set result CM.train$table # output just the table CM.train[["byClass"]][["Sensitivity"]] CM.train[["byClass"]][["Specificity"]] CM.test # Testing set CM.test$table # output just the table # Test set result # Reference # Prediction No Yes # No 1437 62 [Specificity][ ] = TrueNeg / sum of col # Yes 0 1 [ ][Sensitivity] = TruePos / sum of col colSums(CM.test$table) / sum(colSums(CM.test$table)) # % of Actual Yes/No rowSums(CM.test$table) / sum(rowSums(CM.test$table)) # % of predicted Yes/No ###---------------------------- #- 3b Output ROC curve and AUC for all threshold library(pROC) #- Training Set plot.roc(factor(as.matrix(Train.resp)), Train.prob, levels=c("No", "Yes")) # point corresponding to CM.train abline(h=CM.train[["byClass"]][["Sensitivity"]], v=CM.train[["byClass"]][["Specificity"]], col="red") auc.train = auc(factor(as.matrix(Train.resp)), Train.prob, levels=c("No", "Yes")) text(.2, .2, paste("Train AUC=",round(auc.train, 3))) #- Test Set plot.roc(factor(as.matrix(Test.resp)), Test.prob, levels=c("No", "Yes")) # point corresponding to CM.test abline(h=CM.test[["byClass"]][["Sensitivity"]], v=CM.test[["byClass"]][["Specificity"]], col="red") auc.test = auc(factor(as.matrix(Test.resp)), Test.prob, levels=c("No", "Yes")) text(.2, .2, paste("Test AUC=",round(auc.test, 3))) c(auc.train, auc.test) layout(matrix(1:2, 1, 2)) plot.roc(factor(as.matrix(Train.resp)), Train.prob, levels=c("No", "Yes")) text(.2, .2, paste("Train AUC=",round(auc.train, 3))) plot.roc(factor(as.matrix(Test.resp)), Test.prob, levels=c("No", "Yes")) text(.2, .2, paste("Test AUC=",round(auc.test, 3))) layout(1) plot(AUCs[,2], col="red", ylim=c(.5,1)) lines(AUCs[,1], type="p") abline(h=auc.test) AUCs Av.AUCs c(auc.train, auc.test) ###----------------------------------------------------------- ###--- Threshold Picker cost.list = c(0,0,3,1)/4 # order of (TP, TN, FP, FN) #cost.list = c(0,0,2,1)/3 # order of (TP, TN, FP, FN) #cost.list = c(0,0,1,1)/2 # order of (TP, TN, FP, FN) #cost.list = c(0,0,1,2)/3 # order of (TP, TN, FP, FN) #cost.list = c(0,0,1,3)/4 # order of (TP, TN, FP, FN) threshold.list = seq(.01,.99,.01) # grid for threshold #- Set a model Chosen.Model <- Fit01 Train.prob = predict(Chosen.Model, newdata=Train.set)[,1] Test.prob = predict(Chosen.Model, newdata=Test.set)[,1] library(caret) # for confusionMatrix cost=0 for (i in 1:length(threshold.list)){ threshold = threshold.list[i] #- Check the training set accuracy Test.pred = ifelse(Test.prob > threshold, "Yes", "No") CM.test <- confusionMatrix(factor(Test.pred), factor(as.matrix(Test.resp)), positive="Yes") TP = CM.test$table[2,2] # True Pos TN = CM.test$table[1,1] # True Neg FP = CM.test$table[2,1] # False Pos FN = CM.test$table[1,2] # False Neg cost[i] = sum(c(TP, TN, FP, FN) * cost.list) } plot(threshold.list, cost, xlab="threshold") cost.list which.min(cost) min(cost) threshold.list[which.min(cost)]