view accuracy.R @ 3:a5a5716e0317 draft

Uploaded
author testtool
date Fri, 13 Oct 2017 10:14:29 -0400
parents 4547b5a5169d
children
line wrap: on
line source

require(caret, quietly = TRUE)

args <- commandArgs(trailingOnly = TRUE)

input = args[1]
p = args[2]   
output1 = args[3] 
output2 = args[4] 

dataset <- read.csv(input, header=TRUE)

validation_index <- createDataPartition(dataset$Species, p=p, list=FALSE)

validation <- dataset[-validation_index,]

validdataset <- dataset[validation_index,]

percentage <- prop.table(table(validdataset$Species)) * 100
cbind(freq=table(validdataset$Species), percentage=percentage)

output_summary <- summary(validdataset) 
write.csv(output_summary,output1)

control <- trainControl(method="cv", number=10)
metric <- "Accuracy"

# a) linear algorithms
set.seed(7)
fit.lda <- train(Species~., data=validdataset, method="lda", metric=metric, trControl=control)
# b) nonlinear algorithms
# CART
set.seed(7)
fit.cart <- train(Species~., data=validdataset, method="rpart", metric=metric, trControl=control)
# kNN
set.seed(7)
fit.knn <- train(Species~., data=validdataset, method="knn", metric=metric, trControl=control)
# c) advanced algorithms
# SVM
set.seed(7)
fit.svm <- train(Species~., data=validdataset, method="svmRadial", metric=metric, trControl=control)
# Random Forest
set.seed(7)
fit.rf <- train(Species~., data=validdataset, method="rf", metric=metric, trControl=control)

results <- resamples(list(lda=fit.lda, cart=fit.cart, knn=fit.knn, svm=fit.svm, rf=fit.rf))
output_results <- summary(results) 

write.csv(as.matrix(output_results),output2)