annotate tools/spades_3_5_0/plot_spades_stats.xml @ 14:21734680d921 draft default tip

Uploaded
author takadonet
date Fri, 27 Feb 2015 11:21:41 -0500
parents 85c6121d92a5
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
12
85c6121d92a5 Uploaded
takadonet
parents:
diff changeset
1 <tool id="plot_spades_stats" name="SPAdes stats" version="0.1">
85c6121d92a5 Uploaded
takadonet
parents:
diff changeset
2 <description>coverage vs. length plot</description>
85c6121d92a5 Uploaded
takadonet
parents:
diff changeset
3 <requirements>
85c6121d92a5 Uploaded
takadonet
parents:
diff changeset
4 <requirement type="package">R</requirement>
85c6121d92a5 Uploaded
takadonet
parents:
diff changeset
5 </requirements>
85c6121d92a5 Uploaded
takadonet
parents:
diff changeset
6 <command interpreter="bash">r_wrapper.sh $script_file</command>
85c6121d92a5 Uploaded
takadonet
parents:
diff changeset
7
85c6121d92a5 Uploaded
takadonet
parents:
diff changeset
8 <inputs>
85c6121d92a5 Uploaded
takadonet
parents:
diff changeset
9 <param name="input_scaffolds" type="data" format="tabular" label="Scaffold stats"/>
85c6121d92a5 Uploaded
takadonet
parents:
diff changeset
10 <param name="input_contigs" type="data" format="tabular" label="Contig stats"/>
85c6121d92a5 Uploaded
takadonet
parents:
diff changeset
11 <param name="length_co" type="integer" value="1000" min="0" label="Length cut-off" help="Contigs with length under that value are shown in red"/>
85c6121d92a5 Uploaded
takadonet
parents:
diff changeset
12 <param name="coverage_co" type="integer" value="10" min="0" label="Coverage cut-off" help="Contigs with length under that value are shown in red"/>
85c6121d92a5 Uploaded
takadonet
parents:
diff changeset
13 </inputs>
85c6121d92a5 Uploaded
takadonet
parents:
diff changeset
14 <configfiles>
85c6121d92a5 Uploaded
takadonet
parents:
diff changeset
15 <configfile name="script_file">
85c6121d92a5 Uploaded
takadonet
parents:
diff changeset
16 ## Setup R error handling to go to stderr
85c6121d92a5 Uploaded
takadonet
parents:
diff changeset
17 options( show.error.messages=F,
85c6121d92a5 Uploaded
takadonet
parents:
diff changeset
18 error = function () {
85c6121d92a5 Uploaded
takadonet
parents:
diff changeset
19 cat( geterrmessage(), file=stderr() ); q( "no", 1, F )
85c6121d92a5 Uploaded
takadonet
parents:
diff changeset
20 } )
85c6121d92a5 Uploaded
takadonet
parents:
diff changeset
21 files = c("${input_contigs}", "${input_scaffolds}")
85c6121d92a5 Uploaded
takadonet
parents:
diff changeset
22 types = c("Contigs", "Scaffolds")
85c6121d92a5 Uploaded
takadonet
parents:
diff changeset
23
85c6121d92a5 Uploaded
takadonet
parents:
diff changeset
24 ## Start plotting device
85c6121d92a5 Uploaded
takadonet
parents:
diff changeset
25 png("${out_file}", w=500, h=1000)
85c6121d92a5 Uploaded
takadonet
parents:
diff changeset
26 par(mfrow=c(2,1))
85c6121d92a5 Uploaded
takadonet
parents:
diff changeset
27
85c6121d92a5 Uploaded
takadonet
parents:
diff changeset
28 ## Loop over the two files
85c6121d92a5 Uploaded
takadonet
parents:
diff changeset
29 for (i in 1:length(types)){
85c6121d92a5 Uploaded
takadonet
parents:
diff changeset
30 seqs = read.table(files[i], header=FALSE, comment.char="#")
85c6121d92a5 Uploaded
takadonet
parents:
diff changeset
31 colnames = c("name", "length", "coverage")
85c6121d92a5 Uploaded
takadonet
parents:
diff changeset
32 names(seqs) = colnames
85c6121d92a5 Uploaded
takadonet
parents:
diff changeset
33
85c6121d92a5 Uploaded
takadonet
parents:
diff changeset
34 ## Stats over all sequences
85c6121d92a5 Uploaded
takadonet
parents:
diff changeset
35 sl_all = sort(seqs\$length, decreasing=TRUE)
85c6121d92a5 Uploaded
takadonet
parents:
diff changeset
36 cs_all = cumsum(sl_all)
85c6121d92a5 Uploaded
takadonet
parents:
diff changeset
37 s_all = sum(seqs\$length)
85c6121d92a5 Uploaded
takadonet
parents:
diff changeset
38 n50_idx_all = which.min(sl_all[cs_all &lt; 0.5*s_all])
85c6121d92a5 Uploaded
takadonet
parents:
diff changeset
39 n90_idx_all = which.min(sl_all[cs_all &lt; 0.9*s_all])
85c6121d92a5 Uploaded
takadonet
parents:
diff changeset
40 n50_all = sl_all[n50_idx_all]
85c6121d92a5 Uploaded
takadonet
parents:
diff changeset
41 n90_all = sl_all[n90_idx_all]
85c6121d92a5 Uploaded
takadonet
parents:
diff changeset
42
85c6121d92a5 Uploaded
takadonet
parents:
diff changeset
43 ## Filter short seqs, redo stats
85c6121d92a5 Uploaded
takadonet
parents:
diff changeset
44 seqs_filt = seqs[seqs\$length >= ${length_co} &amp; seqs\$coverage >= ${coverage_co},]
85c6121d92a5 Uploaded
takadonet
parents:
diff changeset
45 if (nrow(seqs_filt) > 0){
85c6121d92a5 Uploaded
takadonet
parents:
diff changeset
46 sl_filt = sort(seqs_filt\$length, decreasing=TRUE)
85c6121d92a5 Uploaded
takadonet
parents:
diff changeset
47 cs_filt = cumsum(sl_filt)
85c6121d92a5 Uploaded
takadonet
parents:
diff changeset
48 s_filt = sum(seqs_filt\$length)
85c6121d92a5 Uploaded
takadonet
parents:
diff changeset
49 n50_idx_filt = which.min(sl_filt[cs_filt &lt; 0.5*s_filt])
85c6121d92a5 Uploaded
takadonet
parents:
diff changeset
50 n90_idx_filt = which.min(sl_filt[cs_filt &lt; 0.9*s_filt])
85c6121d92a5 Uploaded
takadonet
parents:
diff changeset
51 n50_filt = sl_filt[n50_idx_filt]
85c6121d92a5 Uploaded
takadonet
parents:
diff changeset
52 n90_filt = sl_filt[n90_idx_filt]
85c6121d92a5 Uploaded
takadonet
parents:
diff changeset
53 }
85c6121d92a5 Uploaded
takadonet
parents:
diff changeset
54 seqs_bad = seqs[seqs\$length &lt; ${length_co} | seqs\$coverage &lt; ${coverage_co},]
85c6121d92a5 Uploaded
takadonet
parents:
diff changeset
55
85c6121d92a5 Uploaded
takadonet
parents:
diff changeset
56 ## Length vs coverage
85c6121d92a5 Uploaded
takadonet
parents:
diff changeset
57 plot(length~coverage, data=seqs, log="xy", type="n", main=paste(types[i], ": coverage vs. length", sep=""), xlab="Coverage", ylab="Length")
85c6121d92a5 Uploaded
takadonet
parents:
diff changeset
58 if (nrow(seqs_bad) > 0){
85c6121d92a5 Uploaded
takadonet
parents:
diff changeset
59 points(length~coverage, data=seqs_bad, cex=0.5, col="red")
85c6121d92a5 Uploaded
takadonet
parents:
diff changeset
60 }
85c6121d92a5 Uploaded
takadonet
parents:
diff changeset
61 if (nrow(seqs_filt) > 0){
85c6121d92a5 Uploaded
takadonet
parents:
diff changeset
62 points(length~coverage, data=seqs_filt, cex=0.5, col="black")
85c6121d92a5 Uploaded
takadonet
parents:
diff changeset
63 }
85c6121d92a5 Uploaded
takadonet
parents:
diff changeset
64 abline(v=${coverage_co}, h=${length_co}, lty=2, col=grey(0.3))
85c6121d92a5 Uploaded
takadonet
parents:
diff changeset
65 legend(x="topleft", legend=c("Before/after filtering", paste(c("N50: ", "N90: ", "Median cov.: "), c(n50_all, n90_all, round(median(seqs\$coverage))), rep("/", 3), c(n50_filt, n90_filt, round(median(seqs_filt\$coverage))), sep="")), cex=0.8)
85c6121d92a5 Uploaded
takadonet
parents:
diff changeset
66 }
85c6121d92a5 Uploaded
takadonet
parents:
diff changeset
67 dev.off()
85c6121d92a5 Uploaded
takadonet
parents:
diff changeset
68 </configfile>
85c6121d92a5 Uploaded
takadonet
parents:
diff changeset
69 </configfiles>
85c6121d92a5 Uploaded
takadonet
parents:
diff changeset
70 <outputs>
85c6121d92a5 Uploaded
takadonet
parents:
diff changeset
71 <data format="png" name="out_file" />
85c6121d92a5 Uploaded
takadonet
parents:
diff changeset
72 </outputs>
85c6121d92a5 Uploaded
takadonet
parents:
diff changeset
73 <help>
85c6121d92a5 Uploaded
takadonet
parents:
diff changeset
74 **What it does**
85c6121d92a5 Uploaded
takadonet
parents:
diff changeset
75
85c6121d92a5 Uploaded
takadonet
parents:
diff changeset
76 Using the output of SPAdes (a pair of fasta file and stat file for each of the contigs and scaffolds), it produces a coverage vs. contig plot. Each dot represent a contig/scaffold. Given a coverage and a length cutoff, sequences that do not meet those criteria are shown in red. Some statistics are also given (N50, N90, median contig/scaffold length) both before and after filtering.
85c6121d92a5 Uploaded
takadonet
parents:
diff changeset
77
85c6121d92a5 Uploaded
takadonet
parents:
diff changeset
78 Use the "filter SPAdes output" tool to actually filter sequences.
85c6121d92a5 Uploaded
takadonet
parents:
diff changeset
79 </help>
85c6121d92a5 Uploaded
takadonet
parents:
diff changeset
80 </tool>