Mercurial > repos > cafletezbrant > kmersvm
changeset 2:4fa4d5c8036a draft
Uploaded
author | cafletezbrant |
---|---|
date | Mon, 20 Aug 2012 21:33:00 -0400 |
parents | dcf98c713e4a |
children | cd35ace22905 |
files | kmersvm/.gitignore kmersvm/kmersvm.tar.gz kmersvm/rocprcurve.xml |
diffstat | 3 files changed, 1 insertions(+), 147 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/kmersvm/.gitignore Mon Aug 20 21:33:00 2012 -0400 @@ -0,0 +1,1 @@ +scripts/libkmersvm.pyc
--- a/kmersvm/rocprcurve.xml Mon Aug 20 20:04:38 2012 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,147 +0,0 @@ -<tool id="ROC-PR Curve" name="ROC-PR Curve"> - <description>calculates AUC for ROC and PR curves</description> - <command interpreter="sh">r_wrapper.sh $script_file</command> - <inputs> - <param format="tabular" name="cvpred_data" type="data" label="CV Predictions"/> - </inputs> - <outputs> - <!-- - <data format="pdf" name="rocprc.pdf" from_work_dir="rocprc.pdf" label="ROC-PR Curve" /> - --> - <data format="png" name="rocprc.png" from_work_dir="rocprc.png" /> - </outputs> - - <configfiles> - <configfile name="script_file"> - - rm(list = objects() ) - - ########## calculate auprc ######### - auPRC <- function (perf) { - rec <- perf@x.values - prec <- perf@y.values - result <- list() - for (i in 1:length(perf@x.values)) { - result[i] <- list(sum((rec[[i]][2:length(rec[[i]])] - rec[[i]][2:length(rec[[i]])-1])*prec[[i]][-1])) - } - return(result) - } - - ########## plot ROC and PR-Curve ######### - rocprc <- function(x) { - sink(NULL,type="message") - options(warn=-1) - suppressMessages(suppressWarnings(library('ROCR'))) - svmresult <- data.frame(x) - colnames(svmresult) <- c("Seqid","Pred","Label", "CV") - - linewd <- 1 - wd <- 4 - ht <- 4 - fig.nrows <- 1 - fig.ncols <- 2 - pt <- 10 - cex.general <- 1 - cex.lab <- 0.9 - cex.axis <- 0.9 - cex.main <- 1.2 - cex.legend <- 0.8 - - - #pdf("rocprc.pdf", width=wd*fig.ncols, height=ht*fig.nrows) - png("rocprc.png", width=wd*fig.ncols, height=ht*fig.nrows, unit="in", res=100) - - par(xaxs="i", yaxs="i", mar=c(3.5,3.5,2,2)+0.1, mgp=c(2,0.8,0), mfrow=c(fig.nrows, fig.ncols)) - - CVs <- unique(svmresult[["CV"]]) - preds <- list() - labs <- list() - auc <- c() - for(i in 1:length(CVs)) { - preds[i] <- subset(svmresult, CV==(i-1), select=c(Pred)) - labs[i] <- subset(svmresult, CV==(i-1), select=c(Label)) - } - - pred <- prediction(preds, labs) - perf_roc <- performance(pred, 'tpr', 'fpr') - perf_prc <- performance(pred, 'prec', 'rec') - - perf_auc <- performance(pred, 'auc') - prcs <- auPRC(perf_prc) - avgauc <- 0 - avgprc <- 0 - - for(j in 1:length(CVs)) { - avgauc <- avgauc + perf_auc@y.values[[j]] - avgprc <- avgprc + prcs[[j]] - } - - avgauc <- avgauc/length(CVs) - avgprc <- avgprc/length(CVs) - - #preds_merged <- unlist(preds) - #labs_merged <- unlist(labs) - #pred_merged <- prediction(preds_merged, labs_merged) - #perf_merged_auc <- performance(pred_merged, 'auc') - - plot(perf_roc, colorize=T, main="ROC curve", spread.estimate="stderror", - xlab="1-Specificity", ylab="Sensitivity", cex.lab=1.2) - text(0.2, 0.1, paste("AUC=", format(avgauc, digits=3, nsmall=3))) - - plot(perf_prc, colorize=T, main="P-R curve", spread.estimate="stderror", - xlab="Recall", ylab="Precision", cex.lab=1.2, xlim=c(0,1), ylim=c(0,1)) - text(0.2, 0.1, paste("AUC=", format(avgprc, digits=3, nsmall=3))) - - dev.off() - } - - ############## main function ################# - d <- read.table("${cvpred_data}") - - rocprc(d) - - </configfile> - </configfiles> - - <help> - -**Note** - -This tool is based on the ROCR library. If you use this tool please cite: - -Tobias Sing, Oliver Sander, Niko Beerenwinkel, Thomas Lengauer. -ROCR: visualizing classifier performance in R. -Bioinformatics 21(20):3940-3941 (2005). - ----- - -**What it does** - -Takes as input cross-validation predictions and calculates ROC Curve and its area under curve (AUC) and PR Curve and its AUC. - ----- - -**Results** - -ROC Curve: Receiver Operating Characteristic Curve. Compares true positive rate (sensitivity) to false positive rate (1 - specificity). - -PR Curve: Precision Recall Curve. Compares number of true positives (recall; same as sensitivity) to the number of true positives relative to the total number sequences classified as positive (precision). - -AUC for a given curve: Area Under the Curve: Probability that of a randomly selected positive/negative pair, the positive will be scored more highly by the trained SVM than a negative. - -.. class:: infomark - -Both curves measure SVM performance, but ROC curves can be inaccurate if there is a large skew in class distribution. For more information see: - -Jesse Davis, Mark Goadrich. -The Relationship Between Precision-Recall and ROC Curves. -Proceedings of the 23rd Annual Internation Conference on Machine Learning. -Pittsburgh, PA, 2006. - ----- - -**Example** - -.. image:: ./static/images/sample_roc_chen.png - </help> -</tool>