diff accuracy.R @ 0:4547b5a5169d draft

Uploaded
author testtool
date Fri, 13 Oct 2017 10:09:29 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/accuracy.R	Fri Oct 13 10:09:29 2017 -0400
@@ -0,0 +1,48 @@
+require(caret, quietly = TRUE)
+
+args <- commandArgs(trailingOnly = TRUE)
+
+input = args[1]
+p = args[2]   
+output1 = args[3] 
+output2 = args[4] 
+
+dataset <- read.csv(input, header=TRUE)
+
+validation_index <- createDataPartition(dataset$Species, p=p, list=FALSE)
+
+validation <- dataset[-validation_index,]
+
+validdataset <- dataset[validation_index,]
+
+percentage <- prop.table(table(validdataset$Species)) * 100
+cbind(freq=table(validdataset$Species), percentage=percentage)
+
+output_summary <- summary(validdataset) 
+write.csv(output_summary,output1)
+
+control <- trainControl(method="cv", number=10)
+metric <- "Accuracy"
+
+# a) linear algorithms
+set.seed(7)
+fit.lda <- train(Species~., data=validdataset, method="lda", metric=metric, trControl=control)
+# b) nonlinear algorithms
+# CART
+set.seed(7)
+fit.cart <- train(Species~., data=validdataset, method="rpart", metric=metric, trControl=control)
+# kNN
+set.seed(7)
+fit.knn <- train(Species~., data=validdataset, method="knn", metric=metric, trControl=control)
+# c) advanced algorithms
+# SVM
+set.seed(7)
+fit.svm <- train(Species~., data=validdataset, method="svmRadial", metric=metric, trControl=control)
+# Random Forest
+set.seed(7)
+fit.rf <- train(Species~., data=validdataset, method="rf", metric=metric, trControl=control)
+
+results <- resamples(list(lda=fit.lda, cart=fit.cart, knn=fit.knn, svm=fit.svm, rf=fit.rf))
+output_results <- summary(results) 
+
+write.csv(as.matrix(output_results),output2)