0
|
1 require(caret, quietly = TRUE)
|
|
2
|
|
3 args <- commandArgs(trailingOnly = TRUE)
|
|
4
|
|
5 input = args[1]
|
|
6 p = args[2]
|
|
7 output1 = args[3]
|
|
8 output2 = args[4]
|
|
9
|
|
10 dataset <- read.csv(input, header=TRUE)
|
|
11
|
|
12 validation_index <- createDataPartition(dataset$Species, p=p, list=FALSE)
|
|
13
|
|
14 validation <- dataset[-validation_index,]
|
|
15
|
|
16 validdataset <- dataset[validation_index,]
|
|
17
|
|
18 percentage <- prop.table(table(validdataset$Species)) * 100
|
|
19 cbind(freq=table(validdataset$Species), percentage=percentage)
|
|
20
|
|
21 output_summary <- summary(validdataset)
|
|
22 write.csv(output_summary,output1)
|
|
23
|
|
24 control <- trainControl(method="cv", number=10)
|
|
25 metric <- "Accuracy"
|
|
26
|
|
27 # a) linear algorithms
|
|
28 set.seed(7)
|
|
29 fit.lda <- train(Species~., data=validdataset, method="lda", metric=metric, trControl=control)
|
|
30 # b) nonlinear algorithms
|
|
31 # CART
|
|
32 set.seed(7)
|
|
33 fit.cart <- train(Species~., data=validdataset, method="rpart", metric=metric, trControl=control)
|
|
34 # kNN
|
|
35 set.seed(7)
|
|
36 fit.knn <- train(Species~., data=validdataset, method="knn", metric=metric, trControl=control)
|
|
37 # c) advanced algorithms
|
|
38 # SVM
|
|
39 set.seed(7)
|
|
40 fit.svm <- train(Species~., data=validdataset, method="svmRadial", metric=metric, trControl=control)
|
|
41 # Random Forest
|
|
42 set.seed(7)
|
|
43 fit.rf <- train(Species~., data=validdataset, method="rf", metric=metric, trControl=control)
|
|
44
|
|
45 results <- resamples(list(lda=fit.lda, cart=fit.cart, knn=fit.knn, svm=fit.svm, rf=fit.rf))
|
|
46 output_results <- summary(results)
|
|
47
|
|
48 write.csv(as.matrix(output_results),output2)
|