### ### ### SVM Demo - Simulated Data ### ### #################################################### ###------------------------------------------------- ###--- 1. SVM with linear kernel ##--- Simulate sim.data01 set.seed(1) size=40 x <- matrix(rnorm(size*2), ncol=2) y <- c(rep(-1,10) , rep(1,10) ) x[y==1,] <- x[y==1,] + 1 plot(x, col=ifelse(y>0, "red", "blue")) sim.data=data.frame(x=x, y=as.factor(y)) sim.data.01 <- sim.data[1:20, ] # Training data set sim.data.TS <- sim.data[21:40, ] # Testing data set par(mfrow=c(1,2)) plot(sim.data.01[,1:2], col=ifelse(y>0, "red", "blue"), main="Training Set") plot(sim.data.TS[,1:2], col=ifelse(y>0, "red", "blue"), main="Testing Set") ##--- a. Analyze Training Set with SVM (cost is 10) library (e1071) # install.packages("e1071") svmfit=svm(y~., data=sim.data.01, kernel="linear", cost=10, scale =FALSE ) plot(svmfit, sim.data.01) # gives SVM classification plot svmfit$index # gives ix of support vectors summary(svmfit) ##--- b. Analyze Training Set with SVM (cost is 0.1) library (e1071) # install.packages("e1071") svmfit =svm(y~., data=sim.data.01, kernel="linear", cost=0.1, scale =FALSE ) plot(svmfit, sim.data.01) svmfit$index # support vectors are different now ##--- c. Pick best Cost with 10-fold CV library (e1071) # install.packages("e1071") set.seed (1) tune.out=tune(svm, y~., data=sim.data.01, kernel ="linear", ranges=list(cost=c(0.001, 0.01, 0.1, 1, 5, 10, 100) )) summary(tune.out) #--- Extract the model with best cost bestmod <- tune.out$best.model summary(bestmod) plot(bestmod, sim.data.01) ##--- d. Apply Best model to Test Set Y.pred=predict(bestmod, sim.data.TS ) # predict using bestmod table(predict=Y.pred , truth=sim.data.TS$y ) svmfit=svm(y~., data=dat , kernel =" linear ", cost =.01, scale =FALSE ) ypred=predict(svmfit ,testdat) table(predict=ypred, truth=testdat$y) x[y==1 ,]= x[y==1 ,]+0.5 plot(x, col =(y+5) /2, pch =19) dat=data.frame(x=x, y=as.factor(y)) svmfit=svm(y~., data=dat, kernel="linear", cost=1e5) summary(svmfit) plot(svmfit , dat) svmfit =svm(y∼., data=dat , kernel =" linear ", cost =1) summary (svmfit ) plot(svmfit ,dat ) ###------------------------------------------------- set.seed (1) x=matrix (rnorm (200*2) , ncol =2) x[1:100 ,]=x[1:100 ,]+2 x[101:150 ,]= x[101:150 ,] -2 y=c(rep (1 ,150) ,rep (2 ,50) ) dat=data.frame(x=x,y=as.factor (y)) plot(x, col=y) train=sample (200 ,100) svmfit =svm(y~., data=dat[train,], kernel="radial", gamma=1, cost=1) plot(svmfit , dat[train ,]) summary(svmfit) svmfit=svm(y∼., data=dat [train ,], kernel =" radial ",gamma =1, cost=1e5) plot(svmfit ,dat [train ,]) set.seed (1) tune.out=tune(svm , y∼., data=dat[train ,], kernel =" radial ", ranges =list(cost=c(0.1 ,1 ,10 ,100 ,1000), gamma=c(0.5,1,2,3,4) )) summary (tune.out) table(true=dat[-train ,"y"], pred=predict (tune.out$best .model , newx=dat[-train ,])) library (ROCR) rocplot =function (pred , truth , ...){ predob = prediction (pred , truth ) perf = performance (predob , "tpr ", "fpr ") plot(perf ,...) } svmfit .opt=svm(y∼., data=dat[train ,], kernel =" radial ", gamma =2, cost=1, decision .values =T) fitted =attributes (predict (svmfit.opt ,dat[train,], decision. values =TRUE))$decision .values par(mfrow =c(1,2)) rocplot (fitted ,dat [train ,"y"], main=" Training Data") svmfit.flex=svm (y~., data=dat[train ,], kernel =" radial ", gamma =50, cost=1, decision .values =T) fitted =attributes (predict (svmfit .flex ,dat[train ,], decision.values =T))$decision .values rocplot (fitted ,dat [train ,"y"], add =T,col ="red ") fitted =attributes (predict (svmfit .opt ,dat[-train ,], decision . values =T))$decision .values rocplot (fitted ,dat [-train ,"y"], main ="Test Data") fitted =attributes (predict (svmfit .flex ,dat[-train ,], decision.values =T))$decision .values rocplot (fitted ,dat [-train ,"y"], add=T,col =" red ") ###------------------------------------------------- set.seed (1) x=rbind(x, matrix (rnorm (50*2) , ncol =2)) y=c(y, rep (0 ,50) ) x[y==0 ,2]= x[y==0 ,2]+2 dat=data.frame(x=x, y=as.factor (y)) par(mfrow =c(1,1)) plot(x,col =(y+1)) svmfit =svm(y∼., data=dat , kernel =" radial ", cost =10, gamma =1) plot(svmfit , dat) library (ISLR) names(Khan) [1] "xtrain " "xtest" "ytrain " "ytest " dim( Khan$xtrain ) [1] 63 2308 dim( Khan$xtest ) [1] 20 2308 length (Khan$ytrain ) [1] 63 length (Khan$ytest ) [1] table(Khan$ytrain ) 1 2 3 4 8 23 12 20 table(Khan$ytest ) 1 2 dat=data.frame(x=Khan$xtrain , y=as.factor ( Khan$ytrain )) out=svm(y~., data=dat , kernel =" linear ",cost =10) summary (out) table(out$fitted , dat$y) 1 2 dat.te=data.frame(x=Khan$xtest , y=as.factor (Khan$ytest )) pred.te=predict (out , newdata =dat.te) table(pred.te , dat .te$y)