0
|
1 <tool id="ROC-PR Curve" name="ROC-PR Curve">
|
|
2 <description>calculates AUC for ROC and PR curves</description>
|
|
3 <command interpreter="sh">r_wrapper.sh $script_file</command>
|
|
4 <inputs>
|
|
5 <param format="tabular" name="cvpred_data" type="data" label="CV Predictions"/>
|
|
6 </inputs>
|
|
7 <outputs>
|
|
8 <!--
|
|
9 <data format="pdf" name="rocprc.pdf" from_work_dir="rocprc.pdf" label="ROC-PR Curve" />
|
|
10 -->
|
|
11 <data format="png" name="rocprc.png" from_work_dir="rocprc.png" />
|
|
12 </outputs>
|
|
13
|
|
14 <configfiles>
|
|
15 <configfile name="script_file">
|
|
16
|
|
17 rm(list = objects() )
|
|
18
|
|
19 ########## calculate auprc #########
|
|
20 auPRC <- function (perf) {
|
|
21 rec <- perf@x.values
|
|
22 prec <- perf@y.values
|
|
23 result <- list()
|
|
24 for (i in 1:length(perf@x.values)) {
|
|
25 result[i] <- list(sum((rec[[i]][2:length(rec[[i]])] - rec[[i]][2:length(rec[[i]])-1])*prec[[i]][-1]))
|
|
26 }
|
|
27 return(result)
|
|
28 }
|
|
29
|
|
30 ########## plot ROC and PR-Curve #########
|
|
31 rocprc <- function(x) {
|
|
32 sink(NULL,type="message")
|
|
33 options(warn=-1)
|
|
34 suppressMessages(suppressWarnings(library('ROCR')))
|
|
35 svmresult <- data.frame(x)
|
|
36 colnames(svmresult) <- c("Seqid","Pred","Label", "CV")
|
|
37
|
|
38 linewd <- 1
|
|
39 wd <- 4
|
|
40 ht <- 4
|
|
41 fig.nrows <- 1
|
|
42 fig.ncols <- 2
|
|
43 pt <- 10
|
|
44 cex.general <- 1
|
|
45 cex.lab <- 0.9
|
|
46 cex.axis <- 0.9
|
|
47 cex.main <- 1.2
|
|
48 cex.legend <- 0.8
|
|
49
|
|
50
|
|
51 #pdf("rocprc.pdf", width=wd*fig.ncols, height=ht*fig.nrows)
|
|
52 png("rocprc.png", width=wd*fig.ncols, height=ht*fig.nrows, unit="in", res=100)
|
|
53
|
|
54 par(xaxs="i", yaxs="i", mar=c(3.5,3.5,2,2)+0.1, mgp=c(2,0.8,0), mfrow=c(fig.nrows, fig.ncols))
|
|
55
|
|
56 CVs <- unique(svmresult[["CV"]])
|
|
57 preds <- list()
|
|
58 labs <- list()
|
|
59 auc <- c()
|
|
60 for(i in 1:length(CVs)) {
|
|
61 preds[i] <- subset(svmresult, CV==(i-1), select=c(Pred))
|
|
62 labs[i] <- subset(svmresult, CV==(i-1), select=c(Label))
|
|
63 }
|
|
64
|
|
65 pred <- prediction(preds, labs)
|
|
66 perf_roc <- performance(pred, 'tpr', 'fpr')
|
|
67 perf_prc <- performance(pred, 'prec', 'rec')
|
|
68
|
|
69 perf_auc <- performance(pred, 'auc')
|
|
70 prcs <- auPRC(perf_prc)
|
|
71 avgauc <- 0
|
|
72 avgprc <- 0
|
|
73
|
|
74 for(j in 1:length(CVs)) {
|
|
75 avgauc <- avgauc + perf_auc@y.values[[j]]
|
|
76 avgprc <- avgprc + prcs[[j]]
|
|
77 }
|
|
78
|
|
79 avgauc <- avgauc/length(CVs)
|
|
80 avgprc <- avgprc/length(CVs)
|
|
81
|
|
82 #preds_merged <- unlist(preds)
|
|
83 #labs_merged <- unlist(labs)
|
|
84 #pred_merged <- prediction(preds_merged, labs_merged)
|
|
85 #perf_merged_auc <- performance(pred_merged, 'auc')
|
|
86
|
|
87 plot(perf_roc, colorize=T, main="ROC curve", spread.estimate="stderror",
|
|
88 xlab="1-Specificity", ylab="Sensitivity", cex.lab=1.2)
|
|
89 text(0.2, 0.1, paste("AUC=", format(avgauc, digits=3, nsmall=3)))
|
|
90
|
|
91 plot(perf_prc, colorize=T, main="P-R curve", spread.estimate="stderror",
|
|
92 xlab="Recall", ylab="Precision", cex.lab=1.2, xlim=c(0,1), ylim=c(0,1))
|
|
93 text(0.2, 0.1, paste("AUC=", format(avgprc, digits=3, nsmall=3)))
|
|
94
|
|
95 dev.off()
|
|
96 }
|
|
97
|
|
98 ############## main function #################
|
|
99 d <- read.table("${cvpred_data}")
|
|
100
|
|
101 rocprc(d)
|
|
102
|
|
103 </configfile>
|
|
104 </configfiles>
|
|
105
|
|
106 <help>
|
|
107
|
|
108 **Note**
|
|
109
|
|
110 This tool is based on the ROCR library. If you use this tool please cite:
|
|
111
|
|
112 Tobias Sing, Oliver Sander, Niko Beerenwinkel, Thomas Lengauer.
|
|
113 ROCR: visualizing classifier performance in R.
|
|
114 Bioinformatics 21(20):3940-3941 (2005).
|
|
115
|
|
116 ----
|
|
117
|
|
118 **What it does**
|
|
119
|
|
120 Takes as input cross-validation predictions and calculates ROC Curve and its area under curve (AUC) and PR Curve and its AUC.
|
|
121
|
|
122 ----
|
|
123
|
|
124 **Results**
|
|
125
|
|
126 ROC Curve: Receiver Operating Characteristic Curve. Compares true positive rate (sensitivity) to false positive rate (1 - specificity).
|
|
127
|
|
128 PR Curve: Precision Recall Curve. Compares number of true positives (recall; same as sensitivity) to the number of true positives relative to the total number sequences classified as positive (precision).
|
|
129
|
|
130 AUC for a given curve: Area Under the Curve: Probability that of a randomly selected positive/negative pair, the positive will be scored more highly by the trained SVM than a negative.
|
|
131
|
|
132 .. class:: infomark
|
|
133
|
|
134 Both curves measure SVM performance, but ROC curves can be inaccurate if there is a large skew in class distribution. For more information see:
|
|
135
|
|
136 Jesse Davis, Mark Goadrich.
|
|
137 The Relationship Between Precision-Recall and ROC Curves.
|
|
138 Proceedings of the 23rd Annual Internation Conference on Machine Learning.
|
|
139 Pittsburgh, PA, 2006.
|
|
140
|
|
141 ----
|
|
142
|
|
143 **Example**
|
|
144
|
|
145 .. image:: ./static/images/sample_roc_chen.png
|
|
146 </help>
|
|
147 </tool>
|