Mercurial > repos > anmoljh > activity_predict
changeset 0:20df9782b07a draft
planemo upload commit a1f4dd8eb560c649391ada1a6bb9505893a35272
author | anmoljh |
---|---|
date | Thu, 31 May 2018 11:33:23 -0400 |
parents | |
children | cea1634edfd8 |
files | predict_activity.R predict_activity.xml tool_dependencies.xml |
diffstat | 3 files changed, 238 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/predict_activity.R Thu May 31 11:33:23 2018 -0400 @@ -0,0 +1,157 @@ +########## +args <- commandArgs(T) +arg1 <- args[1] +arg2 <- args[2] +arg3 <- args[3] +#source("~/galaxy-dist/tools/mpdstoolsV2/tool3/Preold.R") +#pre(arg1,arg2,arg3 +set.seed(1) +pre <- function(args1,args2,args3){ +#args <- commandArgs(TRUE) +nTrain <- read.csv(args1,row.names= 1, header = T) # example nTrain.csv file of unknown activity +#save(nTrain,file = "nTrain.RData") +#load("nTrain.RData") +load(args2) # model generated from previous programn +newdata <- nTrain +modelFit <- Fit +########### +# input csv file must contaion the exact same column as used in model building # +# Also do pre-proccessing by means of centering and scaling +## problem in s4 object so first check that the given model has s4 object in +## >isS4(Fit$finalmodel) if it is s4 than add in with elseif loop +## eg . isS4(plsFit$finalModel) == TRUE +f=function(x){ + x<-as.numeric(as.character(x)) #first convert each column into numeric if it is from factor + x[is.na(x) | is.nan(x) | is.infinite(x)] = median(x, na.rm=TRUE) #convert the item with NA to median value from the column + x #display the column +} + +f2=function(x){ + all(is.na(x)) + } + + +fop <- apply(newdata,2,f2) +allcolumnmissing <- which(fop) +if (length(allcolumnmissing) > 0){ +newdata[,allcolumnmissing] <- 0 +newdata[,allcolumnmissing] <- newdata[,allcolumnmissing] + runif(3,0,0.00001) ### add noise} +} + +library(caret) + +#if(as.character(!isS4(Fit$finalModel == "TRUE"))) +if(exists('ppInfo')){ +#if(as.character(!isS4(Fit$finalModel == "TRUE"))) +if((Fit$method != "svmRadial") && (Fit$method != "svmLinear")) +{ + reqcol <- Fit$finalModel$xNames + newdata <- newdata[,reqcol] + newdata <- apply(newdata,2,f) + #newdata <- newdata + runif(3,0,0.0001) ### add noise to overcome from NZV error + #newdata1 <- preProcess(newdata, method = c("center", "scale")) + #newdata1 <- preProcess(newdata, ppInfo) + newdata11 <- predict(ppInfo,newdata) +########### + library(stats) + testpredict <- predict(modelFit,newdata11) + Label <- levels(testpredict) + a1 <- Label[1] + a2 <- Label[2] + probpredict <- predict(modelFit,newdata11,type="prob") + names <- as.data.frame(rownames(nTrain)) + colnames(names) <- "COMPOUND" + activity <- as.data.frame(testpredict) + colnames(activity) <- "PREDICTED ACTIVITY" + colnames(probpredict) <- c(eval(a1),eval(a2)) + Prob <- as.data.frame(probpredict) + dw <- format(cbind(names,Prob,activity),justify="centre") + write.table(dw,file=args3,row.names=FALSE,sep="\t") + + + +} else if((Fit$method == "svmRadial") | (Fit$method == "svmLinear")){ + library(stats) + reqcol <- colnames(Fit$trainingData) + reqcol <- reqcol[1:length(reqcol)-1] + newdata <- newdata[,reqcol] + + newdata <- apply(newdata,2,f) + #newdata <- newdata + runif(3,0,0.0001) ### add little noise to overcome from NZV problem + #newdata1 <- preProcess(newdata, method = c("center", "scale")) + #newdata1 <- preProcess(newdata,ppInfo) + newdata11 <- predict(ppInfo,newdata) + testpredict <- predict(modelFit,newdata11) + Label <- levels(testpredict) + a1 <- Label[1] + a2 <- Label[2] + probpredict <- predict(modelFit,newdata11,type="prob") + names <- as.data.frame(rownames(nTrain)) + colnames(names) <- "COMPOUND" + activity <- as.data.frame(testpredict) + colnames(activity) <- "PREDICTED ACTIVITY" + colnames(probpredict) <- c(eval(a1),eval(a2)) + Prob <- as.data.frame(probpredict) + dw <- format(cbind(names,Prob,activity),justify="centre") + write.table(dw,file=args3,row.names=FALSE,sep="\t") +}else { + dw <- "There is something wrong in data or model" + write.csv(dw,file=args3,row.names=FALSE) +} +} else{ + +#if(as.character(!isS4(Fit$finalModel == "TRUE"))) +if((Fit$method != "svmRadial") && (Fit$method != "svmLinear")) +{ + reqcol <- Fit$finalModel$xNames + newdata <- newdata[,reqcol] + newdata <- apply(newdata,2,f) + #newdata <- newdata + runif(3,0,0.0001) ### add noise to overcome from NZV error + +########### + library(stats) + testpredict <- predict(modelFit,newdata) + Label <- levels(testpredict) + a1 <- Label[1] + a2 <- Label[2] + probpredict <- predict(modelFit,newdata,type="prob") + names <- as.data.frame(rownames(nTrain)) + colnames(names) <- "COMPOUND" + activity <- as.data.frame(testpredict) + colnames(activity) <- "PREDICTED ACTIVITY" + colnames(probpredict) <- c(eval(a1),eval(a2)) + Prob <- as.data.frame(probpredict) + dw <- format(cbind(names,Prob,activity),justify="centre") + write.table(dw,file=args3,row.names=FALSE,sep="\t") + + + +} else if((Fit$method == "svmRadial") | (Fit$method == "svmLinear")){ + library(stats) + reqcol <- colnames(Fit$trainingData) + reqcol <- reqcol[1:length(reqcol)-1] + newdata <- newdata[,reqcol] + + newdata <- apply(newdata,2,f) + #newdata <- newdata + runif(3,0,0.0001) ### add little noise to overcome from NZV problem + + testpredict <- predict(modelFit,newdata) + Label <- levels(testpredict) + a1 <- Label[1] + a2 <- Label[2] + probpredict <- predict(modelFit,newdata,type="prob") + names <- as.data.frame(rownames(nTrain)) + colnames(names) <- "COMPOUND" + activity <- as.data.frame(testpredict) + colnames(activity) <- "PREDICTED ACTIVITY" + colnames(probpredict) <- c(eval(a1),eval(a2)) + Prob <- as.data.frame(probpredict) + dw <- format(cbind(names,Prob,activity),justify="centre") + write.table(dw,file=args3,row.names=FALSE,sep="\t") +}else { + dw <- "There is something wrong in data or model" + write.csv(dw,file=args3,row.names=FALSE) +} +} +} +pre(arg1,arg2,arg3)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/predict_activity.xml Thu May 31 11:33:23 2018 -0400 @@ -0,0 +1,72 @@ +<tool id="predict_activity" name="Predict Activity" version="1.0"> + <description> + used to predict activity based on given model + </description> + <requirements> + <requirement type="package" version="3.2.1">R</requirement> + <requirement type="package" version="1.0">carettools</requirement> + </requirements> + <stdio> + <exit_code range="1:" /> + </stdio> + + <command interpreter="Rscript">predict_activity.R $file1 $model $output1 2>/dev/null </command> + + <inputs> + <param name="model" type="data" format="rdata" label="Select Model" help="Select built model obtained from caret tool 'Create script from the template file'." /> + <param name="file1" type="data" format="csv" label="Select file have descriptor data for activity prediction" help="csv format" /> + </inputs> + + <outputs> + <data format="txt" name="output1" label="Prediction on $file1.name" /> + </outputs> + + <tests> + <test> + <param name="model" value="MODEL.rdata" /> + <param name="file1" value="Descriptors_File_of_prediction_set.csv" /> + <output name="output1" file="Prediction_on_Descriptors_File_of_prediction_set.txt" compare="sim_size" delta="90000" /> + </test> + </tests> + +<help> + +.. class:: infomark + +Make sure this file **must** contain **all** or **more features** than **input** "csv file" used for **model building** + +---------- + +**Input "csv file" must be as follows** + +---------- + + +Example file:- + + + +# example.csv + + feature1,feature2,feature3,..,featureN + +ro1 234,2.3,34,7,..,0.9 + +ro2 432,3.4,23.1,12,..,0.12 + +ro3 692,23,12.2,19,..,0.14 + + +----------- + +**MODEL** + +Choose model file received from model building step. + +Model file has "data" file format can be seen by + +clicking on output files shown in history . + +</help> + +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Thu May 31 11:33:23 2018 -0400 @@ -0,0 +1,9 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="R" version="3.2.1"> + <repository changeset_revision="d9f7d84125b7" name="package_r_3_2_1" owner="iuc" prior_installation_required="True" toolshed="https://toolshed.g2.bx.psu.edu/" /> + </package> + <package name="carettools" version="1.0"> + <repository changeset_revision="d8ebc06d55ca" name="package_carettools_1_0" owner="anmoljh" prior_installation_required="True" toolshed="https://toolshed.g2.bx.psu.edu/" /> + </package> +</tool_dependency>