changeset 2:4fa4d5c8036a draft

Uploaded
author cafletezbrant
date Mon, 20 Aug 2012 21:33:00 -0400
parents dcf98c713e4a
children cd35ace22905
files kmersvm/.gitignore kmersvm/kmersvm.tar.gz kmersvm/rocprcurve.xml
diffstat 3 files changed, 1 insertions(+), 147 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/kmersvm/.gitignore	Mon Aug 20 21:33:00 2012 -0400
@@ -0,0 +1,1 @@
+scripts/libkmersvm.pyc
Binary file kmersvm/kmersvm.tar.gz has changed
--- a/kmersvm/rocprcurve.xml	Mon Aug 20 20:04:38 2012 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,147 +0,0 @@
-<tool id="ROC-PR Curve" name="ROC-PR Curve">
-	<description>calculates AUC for ROC and PR curves</description>
-	<command interpreter="sh">r_wrapper.sh $script_file</command>
-	<inputs>
-		<param format="tabular" name="cvpred_data" type="data" label="CV Predictions"/>
-	</inputs>
-	<outputs>
-		<!--
-			<data format="pdf" name="rocprc.pdf" from_work_dir="rocprc.pdf" label="ROC-PR Curve" />
-		-->
-		<data format="png" name="rocprc.png" from_work_dir="rocprc.png" />
-	</outputs>
-
-	<configfiles>
-		<configfile name="script_file">
-
-			rm(list = objects() )
-
-			########## calculate auprc #########
-			auPRC &lt;- function (perf) {
-				rec &lt;- perf@x.values
-				prec &lt;- perf@y.values
-				result &lt;- list()
-				for (i in 1:length(perf@x.values)) {
-				result[i] &lt;- list(sum((rec[[i]][2:length(rec[[i]])] - rec[[i]][2:length(rec[[i]])-1])*prec[[i]][-1]))
-				}
-				return(result)
-			}
-
-			########## plot ROC and PR-Curve #########
-			rocprc &lt;- function(x) {
-				sink(NULL,type="message")
-				options(warn=-1)
-				suppressMessages(suppressWarnings(library('ROCR')))
-				svmresult &lt;- data.frame(x)
-				colnames(svmresult) &lt;- c("Seqid","Pred","Label", "CV")
-
-				linewd &lt;- 1
-				wd &lt;- 4
-				ht &lt;- 4
-				fig.nrows &lt;- 1 
-				fig.ncols &lt;- 2
-				pt &lt;- 10
-				cex.general &lt;- 1 
-				cex.lab &lt;- 0.9
-				cex.axis &lt;- 0.9
-				cex.main &lt;- 1.2
-				cex.legend &lt;- 0.8
-
-
-				#pdf("rocprc.pdf", width=wd*fig.ncols, height=ht*fig.nrows)
-				png("rocprc.png", width=wd*fig.ncols, height=ht*fig.nrows, unit="in", res=100)
-
-				par(xaxs="i", yaxs="i", mar=c(3.5,3.5,2,2)+0.1, mgp=c(2,0.8,0), mfrow=c(fig.nrows, fig.ncols))
-
-				CVs &lt;- unique(svmresult[["CV"]])
-				preds &lt;- list()
-				labs &lt;- list()
-				auc &lt;- c()
-				for(i in 1:length(CVs)) {
-					preds[i] &lt;- subset(svmresult, CV==(i-1), select=c(Pred))
-					labs[i] &lt;- subset(svmresult, CV==(i-1), select=c(Label))
-				}
-
-				pred &lt;- prediction(preds, labs)
-				perf_roc &lt;- performance(pred, 'tpr', 'fpr')
-				perf_prc &lt;- performance(pred, 'prec', 'rec')
-
-				perf_auc &lt;- performance(pred, 'auc')
-				prcs &lt;- auPRC(perf_prc)
-				avgauc &lt;- 0
-				avgprc &lt;- 0
-
-				for(j in 1:length(CVs)) {
-					avgauc &lt;- avgauc + perf_auc@y.values[[j]]
-					avgprc &lt;- avgprc + prcs[[j]]
-				}        
-
-				avgauc &lt;- avgauc/length(CVs)
-				avgprc &lt;- avgprc/length(CVs)
-
-				#preds_merged &lt;- unlist(preds)
-				#labs_merged &lt;- unlist(labs)
-				#pred_merged &lt;- prediction(preds_merged, labs_merged)
-				#perf_merged_auc &lt;- performance(pred_merged, 'auc')
-
-				plot(perf_roc, colorize=T, main="ROC curve", spread.estimate="stderror",
-				xlab="1-Specificity", ylab="Sensitivity", cex.lab=1.2)
-				text(0.2, 0.1, paste("AUC=", format(avgauc, digits=3, nsmall=3)))
-
-				plot(perf_prc, colorize=T, main="P-R curve", spread.estimate="stderror",
-				xlab="Recall", ylab="Precision", cex.lab=1.2, xlim=c(0,1), ylim=c(0,1))
-				text(0.2, 0.1, paste("AUC=", format(avgprc, digits=3, nsmall=3)))
-
-				dev.off()
-			}
-
-			############## main function #################
-			d &lt;- read.table("${cvpred_data}")
-
-			rocprc(d)
-
-		</configfile>
-	</configfiles>
-
-	<help>
-	
-**Note**
-
-This tool is based on the ROCR library.  If you use this tool please cite:
-		
-Tobias Sing, Oliver Sander, Niko Beerenwinkel, Thomas Lengauer.
-ROCR: visualizing classifier performance in R.
-Bioinformatics 21(20):3940-3941 (2005).  
-        
-----
-
-**What it does**
-
-Takes as input cross-validation predictions and calculates ROC Curve and its area under curve (AUC) and PR Curve and its AUC.
-
-----
-
-**Results**
-
-ROC Curve: Receiver Operating Characteristic Curve. Compares true positive rate (sensitivity) to false positive rate (1 - specificity).  
-
-PR Curve: Precision Recall Curve. Compares number of true positives (recall; same as sensitivity) to the number of true positives relative to the total number sequences classified as positive (precision). 
-
-AUC for a given curve: Area Under the Curve: Probability that of a randomly selected positive/negative pair, the positive will be scored more highly by the trained SVM than a negative.
-
-.. class:: infomark
-
-Both curves measure SVM performance, but ROC curves can be inaccurate if there is a large skew in class distribution.  For more information see:
-
-Jesse Davis, Mark Goadrich.
-The Relationship Between Precision-Recall and ROC Curves.
-Proceedings of the 23rd Annual Internation Conference on Machine Learning.
-Pittsburgh, PA, 2006.
-
-----
-
-**Example**
-
-.. image:: ./static/images/sample_roc_chen.png
-	</help>
-</tool>