Mercurial > repos > cafletezbrant > kmersvm
comparison kmersvm/rocprcurve.xml @ 0:7fe1103032f7 draft
Uploaded
author | cafletezbrant |
---|---|
date | Mon, 20 Aug 2012 18:07:22 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:7fe1103032f7 |
---|---|
1 <tool id="ROC-PR Curve" name="ROC-PR Curve"> | |
2 <description>calculates AUC for ROC and PR curves</description> | |
3 <command interpreter="sh">r_wrapper.sh $script_file</command> | |
4 <inputs> | |
5 <param format="tabular" name="cvpred_data" type="data" label="CV Predictions"/> | |
6 </inputs> | |
7 <outputs> | |
8 <!-- | |
9 <data format="pdf" name="rocprc.pdf" from_work_dir="rocprc.pdf" label="ROC-PR Curve" /> | |
10 --> | |
11 <data format="png" name="rocprc.png" from_work_dir="rocprc.png" /> | |
12 </outputs> | |
13 | |
14 <configfiles> | |
15 <configfile name="script_file"> | |
16 | |
17 rm(list = objects() ) | |
18 | |
19 ########## calculate auprc ######### | |
20 auPRC <- function (perf) { | |
21 rec <- perf@x.values | |
22 prec <- perf@y.values | |
23 result <- list() | |
24 for (i in 1:length(perf@x.values)) { | |
25 result[i] <- list(sum((rec[[i]][2:length(rec[[i]])] - rec[[i]][2:length(rec[[i]])-1])*prec[[i]][-1])) | |
26 } | |
27 return(result) | |
28 } | |
29 | |
30 ########## plot ROC and PR-Curve ######### | |
31 rocprc <- function(x) { | |
32 sink(NULL,type="message") | |
33 options(warn=-1) | |
34 suppressMessages(suppressWarnings(library('ROCR'))) | |
35 svmresult <- data.frame(x) | |
36 colnames(svmresult) <- c("Seqid","Pred","Label", "CV") | |
37 | |
38 linewd <- 1 | |
39 wd <- 4 | |
40 ht <- 4 | |
41 fig.nrows <- 1 | |
42 fig.ncols <- 2 | |
43 pt <- 10 | |
44 cex.general <- 1 | |
45 cex.lab <- 0.9 | |
46 cex.axis <- 0.9 | |
47 cex.main <- 1.2 | |
48 cex.legend <- 0.8 | |
49 | |
50 | |
51 #pdf("rocprc.pdf", width=wd*fig.ncols, height=ht*fig.nrows) | |
52 png("rocprc.png", width=wd*fig.ncols, height=ht*fig.nrows, unit="in", res=100) | |
53 | |
54 par(xaxs="i", yaxs="i", mar=c(3.5,3.5,2,2)+0.1, mgp=c(2,0.8,0), mfrow=c(fig.nrows, fig.ncols)) | |
55 | |
56 CVs <- unique(svmresult[["CV"]]) | |
57 preds <- list() | |
58 labs <- list() | |
59 auc <- c() | |
60 for(i in 1:length(CVs)) { | |
61 preds[i] <- subset(svmresult, CV==(i-1), select=c(Pred)) | |
62 labs[i] <- subset(svmresult, CV==(i-1), select=c(Label)) | |
63 } | |
64 | |
65 pred <- prediction(preds, labs) | |
66 perf_roc <- performance(pred, 'tpr', 'fpr') | |
67 perf_prc <- performance(pred, 'prec', 'rec') | |
68 | |
69 perf_auc <- performance(pred, 'auc') | |
70 prcs <- auPRC(perf_prc) | |
71 avgauc <- 0 | |
72 avgprc <- 0 | |
73 | |
74 for(j in 1:length(CVs)) { | |
75 avgauc <- avgauc + perf_auc@y.values[[j]] | |
76 avgprc <- avgprc + prcs[[j]] | |
77 } | |
78 | |
79 avgauc <- avgauc/length(CVs) | |
80 avgprc <- avgprc/length(CVs) | |
81 | |
82 #preds_merged <- unlist(preds) | |
83 #labs_merged <- unlist(labs) | |
84 #pred_merged <- prediction(preds_merged, labs_merged) | |
85 #perf_merged_auc <- performance(pred_merged, 'auc') | |
86 | |
87 plot(perf_roc, colorize=T, main="ROC curve", spread.estimate="stderror", | |
88 xlab="1-Specificity", ylab="Sensitivity", cex.lab=1.2) | |
89 text(0.2, 0.1, paste("AUC=", format(avgauc, digits=3, nsmall=3))) | |
90 | |
91 plot(perf_prc, colorize=T, main="P-R curve", spread.estimate="stderror", | |
92 xlab="Recall", ylab="Precision", cex.lab=1.2, xlim=c(0,1), ylim=c(0,1)) | |
93 text(0.2, 0.1, paste("AUC=", format(avgprc, digits=3, nsmall=3))) | |
94 | |
95 dev.off() | |
96 } | |
97 | |
98 ############## main function ################# | |
99 d <- read.table("${cvpred_data}") | |
100 | |
101 rocprc(d) | |
102 | |
103 </configfile> | |
104 </configfiles> | |
105 | |
106 <help> | |
107 | |
108 **Note** | |
109 | |
110 This tool is based on the ROCR library. If you use this tool please cite: | |
111 | |
112 Tobias Sing, Oliver Sander, Niko Beerenwinkel, Thomas Lengauer. | |
113 ROCR: visualizing classifier performance in R. | |
114 Bioinformatics 21(20):3940-3941 (2005). | |
115 | |
116 ---- | |
117 | |
118 **What it does** | |
119 | |
120 Takes as input cross-validation predictions and calculates ROC Curve and its area under curve (AUC) and PR Curve and its AUC. | |
121 | |
122 ---- | |
123 | |
124 **Results** | |
125 | |
126 ROC Curve: Receiver Operating Characteristic Curve. Compares true positive rate (sensitivity) to false positive rate (1 - specificity). | |
127 | |
128 PR Curve: Precision Recall Curve. Compares number of true positives (recall; same as sensitivity) to the number of true positives relative to the total number sequences classified as positive (precision). | |
129 | |
130 AUC for a given curve: Area Under the Curve: Probability that of a randomly selected positive/negative pair, the positive will be scored more highly by the trained SVM than a negative. | |
131 | |
132 .. class:: infomark | |
133 | |
134 Both curves measure SVM performance, but ROC curves can be inaccurate if there is a large skew in class distribution. For more information see: | |
135 | |
136 Jesse Davis, Mark Goadrich. | |
137 The Relationship Between Precision-Recall and ROC Curves. | |
138 Proceedings of the 23rd Annual Internation Conference on Machine Learning. | |
139 Pittsburgh, PA, 2006. | |
140 | |
141 ---- | |
142 | |
143 **Example** | |
144 | |
145 .. image:: ./static/images/sample_roc_chen.png | |
146 </help> | |
147 </tool> |