Mercurial > repos > drosofff > msp_sr_readmap_and_size_histograms
comparison readmap.xml @ 8:be0c6b6466cc draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit 97b40d7a593cef6c3303f7baba781a84d242e454
author | mvdbeek |
---|---|
date | Mon, 19 Sep 2016 06:16:21 -0400 |
parents | 68f58363f1c6 |
children | 92898cc3ea19 |
comparison
equal
deleted
inserted
replaced
7:c9e267cb84c0 | 8:be0c6b6466cc |
---|---|
1 <tool id="Readmap" name="Generate readmap and histograms from alignment files" version="1.1.5"> | 1 <tool id="Readmap" name="Generate readmap and histograms from alignment files" version="1.2.0"> |
2 <description>from sRbowtie aligment</description> | 2 <description>from sRbowtie aligment</description> |
3 <requirements> | 3 <requirements> |
4 <requirement type="package" version="0.12.7">bowtie</requirement> | 4 <requirement type="package" version="1.0.0">bowtie</requirement> |
5 <requirement type="package" version="0.7.7">pysam</requirement> | 5 <requirement type="package" version="0.9.0">pysam</requirement> |
6 <requirement type="package" version="3.1.2">R</requirement> | 6 <requirement type="package" version="1.9.3">numpy</requirement> |
7 <requirement type="package" version="2.14">biocbasics</requirement> | 7 <requirement type="package" version="1.3.0">r-optparse</requirement> |
8 <requirement type="package" version="1.9">numpy</requirement> | 8 <requirement type="package" version="0.6_26">r-latticeextra</requirement> |
9 </requirements> | 9 <requirement type="package" version="2.0.0">r-gridextra</requirement> |
10 <command interpreter="python"> | 10 </requirements> |
11 readmap.py | 11 <command><![CDATA[ |
12 #if $refGenomeSource.genomeSource == "history": | 12 python2 $__tool_directory__/readmap.py |
13 --reference_fasta ## sys.argv[2] | 13 #if $refGenomeSource.genomeSource == "history": |
14 $refGenomeSource.ownFile ## index source | 14 --reference_fasta |
15 #else: | 15 $refGenomeSource.ownFile ## index source |
16 #silent reference= filter( lambda x: str( x[0] ) == str( $refGenomeSource.series[0].input.dbkey ), $__app__.tool_data_tables[ 'bowtie_indexes' ].get_fields() )[0][-1] | 16 #else: |
17 --reference_bowtie_index | 17 #silent reference= filter( lambda x: str( x[0] ) == str( $refGenomeSource.series[0].input.dbkey ), $__app__.tool_data_tables[ 'bowtie_indexes' ].get_fields() )[0][-1] |
18 $reference | 18 --reference_bowtie_index |
19 #end if | 19 $reference |
20 --rcode | 20 #end if |
21 $plotCode | 21 --output_readmap |
22 --output_readmap | 22 "$readmap_dataframe" |
23 $readmap_dataframe | 23 --output_size_distribution |
24 --output_size_distribution | 24 "$size_distribution_dataframe" |
25 $size_distribution_dataframe | 25 --minquery $minquery |
26 --minquery | 26 --maxquery $maxquery |
27 $minquery | 27 --input |
28 --maxquery | 28 #for $i in $refGenomeSource.series |
29 $maxquery | 29 $i.input |
30 --input | 30 #end for |
31 #for $i in $refGenomeSource.series | 31 --ext |
32 $i.input | 32 #for $i in $refGenomeSource.series |
33 #end for | 33 $i.input.ext |
34 --ext | 34 #end for |
35 #for $i in $refGenomeSource.series | 35 --label |
36 $i.input.ext | 36 #for $i in $refGenomeSource.series |
37 #end for | 37 "$i.input.name" |
38 --label | 38 #end for |
39 #for $i in $refGenomeSource.series | 39 --normalization_factor |
40 "$i.input.name" | 40 #for $i in $refGenomeSource.series |
41 #end for | 41 $i.norm |
42 --normalization_factor | 42 #end for |
43 #for $i in $refGenomeSource.series | 43 #if $gff: |
44 $i.norm | 44 --gff |
45 #end for | 45 $gff |
46 #if $gff: | 46 #end if |
47 --gff | 47 ; Rscript $__tool_directory__/plot_size_readmap.r |
48 $gff | 48 --readmap_tab "$readmap_dataframe" |
49 #end if | 49 --size_distribution_tab "$size_distribution_dataframe" |
50 | 50 --readmap_pdf "$readmap_PDF" |
51 --size_distribution_pdf "$size_PDF" | |
52 --combi_pdf "$combi_PDF" | |
53 --title "$title" | |
54 --xlabel "$xlabel" | |
55 --ylabel "$ylabel" | |
56 --yrange "$yrange" | |
57 --rows_per_page "$rows_per_page" | |
58 ]]> | |
51 </command> | 59 </command> |
52 <inputs> | 60 <inputs> |
53 <conditional name="refGenomeSource"> | 61 <conditional name="refGenomeSource"> |
54 <param name="genomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options"> | 62 <param name="genomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options"> |
55 <option value="indexed">Use a built-in index</option> | 63 <option value="indexed">Use a built-in index</option> |
56 <option value="history">Use one from the history</option> | 64 <option value="history">Use one from the history</option> |
57 </param> | 65 </param> |
58 <when value="indexed"> | 66 <when value="indexed"> |
59 <repeat name="series" title="Add alignment files"> | 67 <repeat name="series" title="Add alignment files"> |
60 <param name="input" type="data" label="Select multiple alignments to parse" format="tabular,sam,bam"> | 68 <param name="input" type="data" label="Select multiple alignments to parse" format="tabular,sam,bam"> |
61 <validator type="dataset_metadata_in_data_table" table_name="bowtie_indexes" metadata_name="dbkey" metadata_column="0" message="database not set for this bowtie output. Select the database(=genome used for matching) manually, or select a reference fasta from your history."/> | 69 <validator type="dataset_metadata_in_data_table" table_name="bowtie_indexes" metadata_name="dbkey" metadata_column="0" message="database not set for this bowtie output. Select the database(=genome used for matching) manually, or select a reference fasta from your history."/> |
62 </param> | 70 </param> |
63 <param name="norm" type="float" value="1" label="Indicate a normalization factor to compare multiple aligments"/> | 71 <param name="norm" type="float" value="1" label="Indicate a normalization factor to compare multiple aligments"/> |
64 </repeat> | 72 </repeat> |
65 </when> | 73 </when> |
66 <when value="history"> | 74 <when value="history"> |
67 <param name="ownFile" type="data" format="fasta" label="Select a fasta file, that served as the reference index for the alignments" /> | 75 <param name="ownFile" type="data" format="fasta" label="Select a fasta file, that served as the reference index for the alignments" /> |
68 <repeat name="series" title="Add alignment files"> | 76 <repeat name="series" title="Add alignment files"> |
69 <param name="input" type="data" label="Select multiple alignments to parse" format="tabular,sam,bam"/> | 77 <param name="input" type="data" label="Select multiple alignments to parse" format="tabular,sam,bam"/> |
70 <param name="norm" type="float" value="1" label="Indicate a normalization factor to compare multiple aligments"/> | 78 <param name="norm" type="float" value="1" label="Indicate a normalization factor to compare multiple aligments"/> |
71 </repeat> | 79 </repeat> |
72 </when> | 80 </when> |
73 </conditional> | 81 </conditional> |
74 <param name="gff" type="data" format="gff3" optional="true" label="Optional: select a GFF to investigate regions of interest" help="GFF must match genome build"/> | 82 <param name="gff" type="data" format="gff3" optional="true" label="Optional: select a GFF to investigate regions of interest" help="GFF must match genome build"/> |
75 <!-- <validator type="dataset_metadata_in_data_table" table_name="bowtie_indexes" metadata_name="dbkey" metadata_column="0" message="GFF database and alignment file databse do not match!"/> --> | 83 <!-- <validator type="dataset_metadata_in_data_table" table_name="bowtie_indexes" metadata_name="dbkey" metadata_column="0" message="GFF database and alignment file databse do not match!"/> --> |
76 <param name="minquery" type="integer" size="3" value="18" label="Min size of query small RNAs" help="'18' = 18 nucleotides"/> | 84 <param name="minquery" type="integer" size="3" value="18" label="Min size of query small RNAs" help="'18' = 18 nucleotides"/> |
77 <param name="maxquery" type="integer" size="3" value="28" label="Max size of query small RNAs" help="'28' = 28 nucleotides"/> | 85 <param name="maxquery" type="integer" size="3" value="28" label="Max size of query small RNAs" help="'28' = 28 nucleotides"/> |
78 <param name="title" type="text" size="15" value= "Readmaps and size distributions" label="Main Titles"/> | 86 <param name="title" type="text" size="15" value= "Readmaps and size distributions" label="Main Titles"/> |
79 <param name="xlabel" type="text" size="15" value="Coordinates/read size" label="x axis label"/> | 87 <param name="xlabel" type="text" size="15" value="Coordinates/read size" label="x axis label"/> |
80 <param name="ylabel" type="text" size="15" value="Number of reads" label="y axis label"/> | 88 <param name="ylabel" type="text" size="15" value="Number of reads" label="y axis label"/> |
81 <param name="yrange" type="integer" size="3" value="0" label="y axis range for readmaps. 0 means auto-scaling."/> | 89 <param name="yrange" type="integer" size="3" value="0" label="y axis range for readmaps. 0 means auto-scaling."/> |
82 <param name="rows_per_page" type="text" size="9" value="8" label="How many items to display per page?"> | 90 <param name="rows_per_page" type="text" size="9" value="8" label="How many items to display per page?"> |
83 <validator type="in_range" min="6" max="20" message="Select between 6 and 20 rows, as the readability will suffer otherwise."/> | 91 <validator type="in_range" min="6" max="20" message="Select between 6 and 20 rows, as the readability will suffer otherwise."/> |
84 </param> | 92 </param> |
85 </inputs> | 93 </inputs> |
86 <configfiles> | 94 <outputs> |
87 <configfile name="plotCode"> | 95 <data format="tabular" name="readmap_dataframe" label="Readmap dataframe"/> |
88 ## Setup R error handling to go to stderr | 96 <data format="tabular" name="size_distribution_dataframe" label="Size distribution dataframe"/> |
89 options( show.error.messages=F, | 97 <data format="pdf" name="readmap_PDF" label="Readmaps"/> |
90 error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) | 98 <data format="pdf" name="size_PDF" label="Size distribution"/> |
91 library(RColorBrewer) | 99 <data format="pdf" name="combi_PDF" label="Size distribution and Readmaps"/> |
92 library(lattice) | 100 </outputs> |
93 library(latticeExtra) | 101 <help> |
94 library(grid) | |
95 library(gridExtra) | |
96 | |
97 ## data frames implementation | |
98 | |
99 rm=read.delim("${readmap_dataframe}", header=T, row.names=NULL) | |
100 n_samples=length(unique(rm\$sample)) | |
101 genes=unique(levels(rm\$gene)) | |
102 per_gene_readmap=lapply(genes, function(x) subset(rm, gene==x)) ####### ? | |
103 n_genes=length(per_gene_readmap) | |
104 | |
105 size=read.delim("${size_distribution_dataframe}", header=T, row.names=NULL) | |
106 per_gene_size=lapply(genes, function(x) subset(size, gene==x)) ###### ? | |
107 | |
108 ## end of data frames implementation | |
109 | |
110 ## functions | |
111 | |
112 plot_readmap=function(df, ...) { | |
113 combineLimits(xyplot(count~coord|factor(sample, levels=unique(sample))+reorder(gene, count, function(x) -sum(abs(x))), | |
114 data=df, | |
115 type='h', | |
116 scales= list(relation="free", x=list(rot=0, cex=0.7, axs="i", tck=0.5), y=list(tick.number=4, rot=90, cex=0.7)), | |
117 xlab=NULL, main=NULL, ylab=NULL, | |
118 as.table=T, | |
119 origin = 0, | |
120 horizontal=FALSE, | |
121 group=polarity, | |
122 col=c("red","blue"), | |
123 par.strip.text = list(cex=0.7), | |
124 ...)) | |
125 } | |
126 | |
127 plot_size_distribution= function(df, ...) { | |
128 smR.prepanel=function(x,y,...){; yscale=c(-max(abs(y)), max(abs(y)));list(ylim=yscale);} | |
129 bc= barchart(count~as.factor(size)|factor(sample, levels=unique(sample))+gene, data = df, origin = 0, | |
130 horizontal=FALSE, | |
131 group=polarity, | |
132 stack=TRUE, | |
133 col=c('red', 'blue'), | |
134 cex=0.75, | |
135 scales=list(y=list(tick.number=4, rot=90, relation="free", cex=0.7), x=list(cex=0.7) ), | |
136 prepanel=smR.prepanel, | |
137 xlab = NULL, | |
138 ylab = NULL, | |
139 main = NULL, | |
140 as.table=TRUE, | |
141 newpage = T, | |
142 par.strip.text = list(cex=0.7), | |
143 ...) | |
144 combineLimits(bc) | |
145 } | |
146 | |
147 ## end of functions | |
148 | |
149 ## function parameters' | |
150 | |
151 par.settings.readmap=list(layout.heights=list(top.padding=0, bottom.padding=-2.5), strip.background = list(col=c("lightblue","lightgreen")) ) | |
152 par.settings.size=list(layout.heights=list(top.padding=-1, bottom.padding=-2.5), strip.background = list(col=c("lightblue","lightgreen")) ) | |
153 par.settings.combination.readmap=list(layout.heights=list(top.padding=0, bottom.padding=-3), strip.background=list(col=c("lightblue","lightgreen")) ) | |
154 par.settings.combination.size=list(layout.heights=list(top.padding=-2, bottom.padding=-0.5), strip.background=list(col=c("lightblue", "lightgreen")) ) | |
155 | |
156 ## end of function parameters' | |
157 | |
158 ## GRAPHS | |
159 | |
160 if (n_genes > 7) {page_height_simple = 11.69; page_height_combi=11.69; rows_per_page=${rows_per_page}; extrarow=0 } else { | |
161 rows_per_page= n_genes; page_height_simple = 2.5*n_genes; page_height_combi=page_height_simple*2; extrarow=0 } | |
162 ## rows_per_page= 8; page_height_simple = 11.69/7*n_genes; page_height_combi=11.69/9*(n_genes*2); extrarow=0 } | |
163 ## rows_per_page= n_genes; page_height_simple = 11.69/n_genes/4; page_height_combi=11.69/(n_genes*2); extrarow=1 } | |
164 if (n_samples > 4) {page_width = 8.2677*n_samples/4} else {page_width = 8.2677*n_samples/3} # to test | |
165 | |
166 pdf(file="${readmap_PDF}", paper="special", height=page_height_simple, width=page_width) | |
167 for (i in seq(1,n_genes,rows_per_page)) { | |
168 start=i | |
169 end=i+rows_per_page-1 | |
170 if (end>n_genes) {end=n_genes} | |
171 if (${yrange} == 0) { readmap_plot.list=lapply(per_gene_readmap[start:end], function(x) plot_readmap(x, par.settings=par.settings.readmap)) } else { | |
172 readmap_plot.list=lapply(per_gene_readmap[start:end], function(x) plot_readmap(x, ylim=c(-${yrange}, ${yrange}) , par.settings=par.settings.readmap)) } | |
173 args.list=c(readmap_plot.list, list(nrow=rows_per_page, ncol=1, | |
174 main=textGrob("Read Maps (nucleotide coordinates)", gp=gpar(cex=1), just="top"), | |
175 left=textGrob("${ylabel}", gp=gpar(cex=1), vjust=1, rot=90) | |
176 #sub=textGrob("readmap coordinates", gp=gpar(cex=.75), just="bottom") | |
177 ) | |
178 ) | |
179 do.call(grid.arrange, args.list) | |
180 } | |
181 devname=dev.off() | |
182 | |
183 | |
184 pdf(file="${size_PDF}", paper="special", height=page_height_simple, width=page_width) | |
185 for (i in seq(1,n_genes,rows_per_page)) { | |
186 start=i | |
187 end=i+rows_per_page-1 | |
188 if (end>n_genes) {end=n_genes} | |
189 plot.list=lapply(per_gene_size[start:end], function(x) plot_size_distribution(x, par.settings=par.settings.size) ) | |
190 args.list=c(plot.list, list(nrow=rows_per_page, ncol=1, | |
191 main=textGrob("Size distributions (in nucleotides)", gp=gpar(cex=1), just="top"), | |
192 left=textGrob("${ylabel}", gp=gpar(cex=1), vjust=1, rot=90) | |
193 #sub="readsize in nucleotides" | |
194 ) | |
195 ) | |
196 do.call(grid.arrange, args.list) | |
197 } | |
198 devname=dev.off() | |
199 | |
200 pdf(file="${combi_PDF}", paper="special", height=page_height_combi, width=page_width) | |
201 if (rows_per_page %% 2 != 0) { rows_per_page = rows_per_page + 1} | |
202 for (i in seq(1,n_genes,rows_per_page/2)) { | |
203 start=i | |
204 end=i+rows_per_page/2-1 | |
205 if (end>n_genes) {end=n_genes} | |
206 if (${yrange} == 0) {readmap_plot.list=lapply(per_gene_readmap[start:end], function(x) plot_readmap(x, par.settings=par.settings.readmap)) } else { | |
207 readmap_plot.list=lapply(per_gene_readmap[start:end], function(x) plot_readmap(x, ylim=c(-${yrange}, ${yrange}), par.settings=par.settings.readmap)) } | |
208 size_plot.list=lapply(per_gene_size[start:end], function(x) plot_size_distribution(x, strip=FALSE, par.settings=par.settings.combination.size)) | |
209 plot.list=rbind(readmap_plot.list, size_plot.list ) | |
210 args.list=c(plot.list, list(nrow=rows_per_page + extrarow, ncol=1, | |
211 main=textGrob("${title}", gp=gpar(cex=1), just="top"), | |
212 left=textGrob("${ylabel}", gp=gpar(cex=1), vjust=1, rot=90), | |
213 sub=textGrob("${xlabel}", gp=gpar(cex=1), just="bottom") | |
214 ) | |
215 ) | |
216 do.call(grid.arrange, args.list) | |
217 } | |
218 devname=dev.off() | |
219 | |
220 | |
221 </configfile> | |
222 </configfiles> | |
223 | |
224 <outputs> | |
225 <data format="tabular" name="readmap_dataframe" label="Readmap dataframe"/> | |
226 <data format="tabular" name="size_distribution_dataframe" label="Size distribution dataframe"/> | |
227 <data format="pdf" name="readmap_PDF" label="Readmaps"/> | |
228 <data format="pdf" name="size_PDF" label="Size distribution"/> | |
229 <data format="pdf" name="combi_PDF" label="Size distribution and Readmaps"/> | |
230 </outputs> | |
231 <help> | |
232 | 102 |
233 **What it does** | 103 **What it does** |
234 | 104 |
235 Takes one or more alignment files (BAM, SAM or tabular bowtie output) as input and produces a "Readmap", | 105 Takes one or more alignment files (BAM, SAM or tabular bowtie output) as input and produces a "Readmap", |
236 where by default for each "chromosome" the position of the read is recorded on the x-axis, and the y-axis indicates | 106 where by default for each "chromosome" the position of the read is recorded on the x-axis, and the y-axis indicates |
237 the number of reads per position. Reads that map in sense are on the top, reads that map antisense are on the bottom. | 107 the number of reads per position. Reads that map in sense are on the top, reads that map antisense are on the bottom. |
238 | 108 |
239 | 109 |
240 .. class:: warningmark | 110 .. class:: warningmark |
241 | 111 |
246 '''Example''' | 116 '''Example''' |
247 | 117 |
248 Query sequence:: | 118 Query sequence:: |
249 For a SAM file as the following: | 119 For a SAM file as the following: |
250 | 120 |
251 5 16 2L_79 24393 255 17M * 0 0 CCTTCATCTTTTTTTTT IIIIIIIIIIIIIIIII XA:i:0 MD:Z:17 NM:i:0 | 121 5 16 2L_79 24393 255 17M * 0 0 CCTTCATCTTTTTTTTT IIIIIIIIIIIIIIIII XA:i:0 MD:Z:17 NM:i:0 |
252 | 122 |
253 11 0 2R_1 12675 255 21M * 0 0 AAAAAAAACGCGTCCTTGTGC IIIIIIIIIIIIIIIIIIIII XA:i:0 MD:Z:21 NM:i:0 | 123 11 0 2R_1 12675 255 21M * 0 0 AAAAAAAACGCGTCCTTGTGC IIIIIIIIIIIIIIIIIIIII XA:i:0 MD:Z:21 NM:i:0 |
254 | 124 |
255 2 16 2L_5 669 255 23M * 0 0 TGTTGCTGCATTTCTTTTTTTTT IIIIIIIIIIIIIIIIIIIIIII XA:i:0 MD:Z:23 NM:i:0 | 125 2 16 2L_5 669 255 23M * 0 0 TGTTGCTGCATTTCTTTTTTTTT IIIIIIIIIIIIIIIIIIIIIII XA:i:0 MD:Z:23 NM:i:0 |
256 | 126 |
257 produce a plot like this: | 127 produce a plot like this: |
258 | 128 |
259 ---- | 129 ---- |
260 | 130 |
261 .. image:: static/images/readmap.png | 131 .. image:: static/images/readmap.png |
262 :height: 800 | 132 :height: 800 |
263 :width: 500 | 133 :width: 500 |
264 | 134 |
265 </help> | 135 </help> |
266 <tests> | 136 <tests> |
267 <test> | 137 <test> |
268 <param name="genomeSource" value="history" /> | 138 <param name="genomeSource" value="history" /> |
269 <param name="ownFile" value ="transposons.fasta" ftype="fasta" /> | 139 <param name="ownFile" value ="transposons.fasta" ftype="fasta" /> |
270 <param name="series_0|input" value="sample1.srbowtie_out" ftype="tabular"/> | 140 <param name="series_0|input" value="sample1.srbowtie_out" ftype="tabular"/> |
271 <param name="series_0|norm" value="1" /> | 141 <param name="series_0|norm" value="1" /> |
272 <param name="series_1|input" value="sample2.srbowtie_out" ftype="tabular"/> | 142 <param name="series_1|input" value="sample2.srbowtie_out" ftype="tabular"/> |
273 <param name="series_1|norm" value="1" /> | 143 <param name="series_1|norm" value="1" /> |
274 <param name="series_2|input" value="sample3.srbowtie_out" ftype="tabular"/> | 144 <param name="series_2|input" value="sample3.srbowtie_out" ftype="tabular"/> |
275 <param name="series_2|norm" value="1" /> | 145 <param name="series_2|norm" value="1" /> |
276 <param name="minquery" value="20" /> | 146 <param name="minquery" value="20" /> |
277 <param name="maxquery" value="30" /> | 147 <param name="maxquery" value="30" /> |
278 <param name="title" value="Readmaps and size distributions" /> | 148 <param name="title" value="Readmaps and size distributions" /> |
279 <param name="xlabel" value="Coordinates/read size" /> | 149 <param name="xlabel" value="Coordinates/read size" /> |
280 <param name="ylabel" value="Number of reads" /> | 150 <param name="ylabel" value="Number of reads" /> |
281 <param name="rows_per_page" value="8" /> | 151 <param name="rows_per_page" value="8" /> |
282 <output name="readmap_dataframe" ftype="tabular" file="Readmap_dataframe.tab" /> | 152 <output name="readmap_dataframe" ftype="tabular" file="Readmap_dataframe.tab" /> |
283 <output name="size_distribution_dataframe" ftype="tabular" file="Size_distribution_dataframe.tab" /> | 153 <output name="size_distribution_dataframe" ftype="tabular" file="Size_distribution_dataframe.tab" /> |
284 <output name="readmap_PDF" ftype="pdf" file="Readmaps.pdf" /> | 154 </test> |
285 <output name="size_PDF" ftype="pdf" file="Size_distribution.pdf" /> | 155 </tests> |
286 <output name="combi_PDF" ftype="pdf" file="Size_distribution_and_Readmaps.pdf" /> | |
287 </test> | |
288 </tests> | |
289 </tool> | 156 </tool> |