comparison readmap.xml @ 0:ac7d8e55bb67 draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit fe40dec87779c1fcfbd03330e653aa886f4a2cda
author drosofff
date Wed, 21 Oct 2015 11:13:18 -0400
parents
children e4874d1ae69d
comparison
equal deleted inserted replaced
-1:000000000000 0:ac7d8e55bb67
1 <tool id="Readmap" name="Generate readmap and histograms from alignment files" version="1.0.4">
2 <description>from sRbowtie aligment</description>
3 <requirements>
4 <requirement type="package" version="0.12.7">bowtie</requirement>
5 <requirement type="package" version="0.7.7">pysam</requirement>
6 <requirement type="package" version="3.1.2">R</requirement>
7 <requirement type="package" version="2.14">biocbasics</requirement>
8 <requirement type="package" version="1.9">numpy</requirement>
9 </requirements>
10 <command interpreter="python">
11 readmap.py
12 #if $refGenomeSource.genomeSource == "history":
13 --reference_fasta ## sys.argv[2]
14 $refGenomeSource.ownFile ## index source
15 #else:
16 #silent reference= filter( lambda x: str( x[0] ) == str( $refGenomeSource.series[0].input.dbkey ), $__app__.tool_data_tables[ 'bowtie_indexes' ].get_fields() )[0][-1]
17 --reference_bowtie_index
18 $reference
19 #end if
20 --rcode
21 $plotCode
22 --output_readmap
23 $readmap_dataframe
24 --output_size_distribution
25 $size_distribution_dataframe
26 --minquery
27 $minquery
28 --maxquery
29 $maxquery
30 --input
31 #for $i in $refGenomeSource.series
32 $i.input
33 #end for
34 --ext
35 #for $i in $refGenomeSource.series
36 $i.input.ext
37 #end for
38 --label
39 #for $i in $refGenomeSource.series
40 "$i.input.name"
41 #end for
42 --normalization_factor
43 #for $i in $refGenomeSource.series
44 $i.norm
45 #end for
46 #if $gff:
47 --gff
48 $gff
49 #end if
50
51 </command>
52 <inputs>
53 <conditional name="refGenomeSource">
54 <param name="genomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options">
55 <option value="indexed">Use a built-in index</option>
56 <option value="history">Use one from the history</option>
57 </param>
58 <when value="indexed">
59 <repeat name="series" title="Add alignment files">
60 <param name="input" type="data" label="Select multiple alignments to parse" format="tabular,sam,bam">
61 <validator type="dataset_metadata_in_data_table" table_name="bowtie_indexes" metadata_name="dbkey" metadata_column="0" message="database not set for this bowtie output. Select the database(=genome used for matching) manually, or select a reference fasta from your history."/>
62 </param>
63 <param name="norm" type="float" value="1" label="Indicate a normalization factor to compare multiple aligments"/>
64 </repeat>
65 </when>
66 <when value="history">
67 <param name="ownFile" type="data" format="fasta" label="Select a fasta file, that served as the reference index for the alignments" />
68 <repeat name="series" title="Add alignment files">
69 <param name="input" type="data" label="Select multiple alignments to parse" format="tabular,sam,bam"/>
70 <param name="norm" type="float" value="1" label="Indicate a normalization factor to compare multiple aligments"/>
71 </repeat>
72 </when>
73 </conditional>
74 <param name="gff" type="data" format="gff3" optional="true" label="Optional: select a GFF to investigate regions of interest" help="GFF must match genome build"/>
75 <!-- <validator type="dataset_metadata_in_data_table" table_name="bowtie_indexes" metadata_name="dbkey" metadata_column="0" message="GFF database and alignment file databse do not match!"/> -->
76 <param name="minquery" type="integer" size="3" value="18" label="Min size of query small RNAs" help="'18' = 18 nucleotides"/>
77 <param name="maxquery" type="integer" size="3" value="28" label="Max size of query small RNAs" help="'28' = 28 nucleotides"/>
78 <param name="title" type="text" size="15" value= "Readmaps and size distributions" label="Main Titles"/>
79 <param name="xlabel" type="text" size="15" value="Coordinates/read size" label="x axis label"/>
80 <param name="ylabel" type="text" size="15" value="Number of reads" label="y axis label"/>
81 <param name="rows_per_page" type="text" size="9" value="8" label="How many items to display per page?">
82 <validator type="in_range" min="6" max="20" message="Select between 6 and 20 rows, as the readability will suffer otherwise."/>
83 </param>
84 </inputs>
85 <configfiles>
86 <configfile name="plotCode">
87 ## Setup R error handling to go to stderr
88 options( show.error.messages=F,
89 error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } )
90 library(RColorBrewer)
91 library(lattice)
92 library(latticeExtra)
93 library(grid)
94 library(gridExtra)
95
96 ## data frames implementation
97
98 rm=read.delim("${readmap_dataframe}", header=T, row.names=NULL)
99 n_samples=length(unique(rm\$sample))
100 genes=unique(levels(rm\$gene))
101 per_gene_readmap=lapply(genes, function(x) subset(rm, gene==x)) ####### ?
102 n_genes=length(per_gene_readmap)
103
104 size=read.delim("${size_distribution_dataframe}", header=T, row.names=NULL)
105 per_gene_size=lapply(genes, function(x) subset(size, gene==x)) ###### ?
106
107 ## end of data frames implementation
108
109 ## functions
110
111 plot_readmap=function(df, ...) {
112 combineLimits(xyplot(count~coord|factor(sample, levels=unique(sample))+reorder(gene, count, function(x) -sum(abs(x))),
113 data=df,
114 type='h',
115 scales= list(relation="free", x=list(rot=0, cex=0.7, axs="i", tck=0.5), y=list(tick.number=4, rot=90, cex=0.7)),
116 xlab=NULL, main=NULL, ylab=NULL,
117 as.table=T,
118 origin = 0,
119 horizontal=FALSE,
120 group=polarity,
121 col=c("red","blue"),
122 par.strip.text = list(cex=0.7),
123 ...))
124 }
125
126 plot_size_distribution= function(df, ...) {
127 smR.prepanel=function(x,y,...){; yscale=c(-max(abs(y)), max(abs(y)));list(ylim=yscale);}
128 bc= barchart(count~as.factor(size)|factor(sample, levels=unique(sample))+gene, data = df, origin = 0,
129 horizontal=FALSE,
130 group=polarity,
131 stack=TRUE,
132 col=c('red', 'blue'),
133 cex=0.75,
134 scales=list(y=list(tick.number=4, rot=90, relation="free", cex=0.7), x=list(cex=0.7) ),
135 prepanel=smR.prepanel,
136 xlab = NULL,
137 ylab = NULL,
138 main = NULL,
139 as.table=TRUE,
140 newpage = T,
141 par.strip.text = list(cex=0.7),
142 ...)
143 combineLimits(bc)
144 }
145
146 ## end of functions
147
148 ## function parameters'
149
150 par.settings.readmap=list(layout.heights=list(top.padding=0, bottom.padding=-2.5), strip.background = list(col=c("lightblue","lightgreen")) )
151 par.settings.size=list(layout.heights=list(top.padding=-1, bottom.padding=-2.5), strip.background = list(col=c("lightblue","lightgreen")) )
152 par.settings.combination.readmap=list(layout.heights=list(top.padding=0, bottom.padding=-3), strip.background=list(col=c("lightblue","lightgreen")) )
153 par.settings.combination.size=list(layout.heights=list(top.padding=-2, bottom.padding=-0.5), strip.background=list(col=c("lightblue", "lightgreen")) )
154
155 ## end of function parameters'
156
157 ## GRAPHS
158
159 if (n_genes > 7) {page_height_simple = 11.69; page_height_combi=11.69; rows_per_page=${rows_per_page}; extrarow=0 } else {
160 rows_per_page= n_genes; page_height_simple = 11.69/n_genes/4; page_height_combi=11.69/(n_genes*2); extrarow=1 }
161 if (n_samples > 4) {page_width = 8.2677*n_samples/4} else {page_width = 8.2677*n_samples/3} # to test
162
163 pdf(file="${readmap_PDF}", paper="special", height=page_height_simple, width=page_width)
164 for (i in seq(1,n_genes,rows_per_page)) {
165 start=i
166 end=i+rows_per_page-1
167 if (end>n_genes) {end=n_genes}
168 readmap_plot.list=lapply(per_gene_readmap[start:end], function(x) plot_readmap(x, par.settings=par.settings.readmap))
169 args.list=c(readmap_plot.list, list(nrow=rows_per_page, ncol=1,
170 main=textGrob("Read Maps (nucleotide coordinates)", gp=gpar(cex=1), just="top"),
171 left=textGrob("${ylabel}", gp=gpar(cex=1), vjust=1, rot=90)
172 #sub=textGrob("readmap coordinates", gp=gpar(cex=.75), just="bottom")
173 )
174 )
175 do.call(grid.arrange, args.list)
176 }
177 devname=dev.off()
178
179
180 pdf(file="${size_PDF}", paper="special", height=page_height_simple, width=page_width)
181 for (i in seq(1,n_genes,rows_per_page)) {
182 start=i
183 end=i+rows_per_page-1
184 if (end>n_genes) {end=n_genes}
185 plot.list=lapply(per_gene_size[start:end], function(x) plot_size_distribution(x, par.settings=par.settings.size) )
186 args.list=c(plot.list, list(nrow=rows_per_page, ncol=1,
187 main=textGrob("Size distributions (in nucleotides)", gp=gpar(cex=1), just="top"),
188 left=textGrob("${ylabel}", gp=gpar(cex=1), vjust=1, rot=90)
189 #sub="readsize in nucleotides"
190 )
191 )
192 do.call(grid.arrange, args.list)
193 }
194 devname=dev.off()
195
196 pdf(file="${combi_PDF}", paper="special", height=page_height_combi, width=page_width)
197 for (i in seq(1,n_genes,rows_per_page/2)) {
198 start=i
199 end=i+rows_per_page/2-1
200 if (end>n_genes) {end=n_genes}
201 read_plot.list=lapply(per_gene_readmap[start:end], function(x) plot_readmap(x, par.settings=par.settings.combination.readmap))
202 size_plot.list=lapply(per_gene_size[start:end], function(x) plot_size_distribution(x, strip=FALSE, par.settings=par.settings.combination.size))
203 plot.list=rbind(read_plot.list, size_plot.list )
204 args.list=c(plot.list, list(nrow=rows_per_page + extrarow, ncol=1,
205 main=textGrob("${title}", gp=gpar(cex=1), just="top"),
206 left=textGrob("${ylabel}", gp=gpar(cex=1), vjust=1, rot=90),
207 sub=textGrob("${xlabel}", gp=gpar(cex=1), just="bottom")
208 )
209 )
210 do.call(grid.arrange, args.list)
211 }
212 devname=dev.off()
213
214
215 </configfile>
216 </configfiles>
217
218 <outputs>
219 <data format="tabular" name="readmap_dataframe" label="Readmap dataframe"/>
220 <data format="tabular" name="size_distribution_dataframe" label="Size distribution dataframe"/>
221 <data format="pdf" name="readmap_PDF" label="Readmaps"/>
222 <data format="pdf" name="size_PDF" label="Size distribution"/>
223 <data format="pdf" name="combi_PDF" label="Size distribution and Readmaps"/>
224 </outputs>
225 <help>
226
227 **What it does**
228
229 Takes one or more alignment files (BAM, SAM or tabular bowtie output) as input and produces a "Readmap",
230 where by default for each "chromosome" the position of the read is recorded on the x-axis, and the y-axis indicates
231 the number of reads per position. Reads that map in sense are on the top, reads that map antisense are on the bottom.
232
233
234 .. class:: warningmark
235
236 '''TIP''' The input data can be produced using the sRbowtie tool.
237
238 ----
239
240 '''Example'''
241
242 Query sequence::
243 For a SAM file as the following:
244
245 5 16 2L_79 24393 255 17M * 0 0 CCTTCATCTTTTTTTTT IIIIIIIIIIIIIIIII XA:i:0 MD:Z:17 NM:i:0
246
247 11 0 2R_1 12675 255 21M * 0 0 AAAAAAAACGCGTCCTTGTGC IIIIIIIIIIIIIIIIIIIII XA:i:0 MD:Z:21 NM:i:0
248
249 2 16 2L_5 669 255 23M * 0 0 TGTTGCTGCATTTCTTTTTTTTT IIIIIIIIIIIIIIIIIIIIIII XA:i:0 MD:Z:23 NM:i:0
250
251 produce a plot like this:
252
253 ----
254
255 .. image:: static/images/readmap.png
256 :height: 800
257 :width: 500
258
259 </help>
260 <tests>
261 <test>
262 <param name="genomeSource" value="history" />
263 <param name="ownFile" value ="transposons.fasta" ftype="fasta" />
264 <param name="series_0|input" value="sample1.srbowtie_out" ftype="tabular"/>
265 <param name="series_0|norm" value="1" />
266 <param name="series_1|input" value="sample2.srbowtie_out" ftype="tabular"/>
267 <param name="series_1|norm" value="1" />
268 <param name="series_2|input" value="sample3.srbowtie_out" ftype="tabular"/>
269 <param name="series_2|norm" value="1" />
270 <param name="minquery" value="20" />
271 <param name="maxquery" value="30" />
272 <param name="title" value="Readmaps and size distributions" />
273 <param name="xlabel" value="Coordinates/read size" />
274 <param name="ylabel" value="Number of reads" />
275 <param name="rows_per_page" value="8" />
276 <output name="readmap_dataframe" ftype="tabular" file="Readmap_dataframe.tab" />
277 <output name="size_distribution_dataframe" ftype="tabular" file="Size_distribution_dataframe.tab" />
278 <output name="readmap_PDF" ftype="pdf" file="Readmaps.pdf" />
279 <output name="size_PDF" ftype="pdf" file="Size_distribution.pdf" />
280 <output name="combi_PDF" ftype="pdf" file="Size_distribution_and_Readmaps.pdf" />
281 </test>
282 </tests>
283 </tool>