comparison size_histogram.xml @ 2:a95419680ce4 draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_size_histograms commit 89caea4594db1ae6d6bb9c651bc6019bb6dd3ce6
author drosofff
date Thu, 10 Mar 2016 11:00:00 -0500
parents 00852209fd9f
children 31782dbb7d85
comparison
equal deleted inserted replaced
1:00852209fd9f 2:a95419680ce4
1 <tool id="Size_histogram" name="Generate size histograms from alignment files" version="0.9.7"> 1 <tool id="Size_histogram" name="Generate size histograms from alignment files" version="0.9.8">
2 <description>from sRbowtie aligment</description> 2 <description>from sRbowtie aligment</description>
3 <requirements> 3 <requirements>
4 <requirement type="package" version="0.12.7">bowtie</requirement> 4 <requirement type="package" version="0.12.7">bowtie</requirement>
5 <requirement type="package" version="0.7.7">pysam</requirement> 5 <requirement type="package" version="0.7.7">pysam</requirement>
6 <requirement type="package" version="3.1.2">R</requirement> 6 <requirement type="package" version="3.1.2">R</requirement>
7 <requirement type="package" version="2.14">biocbasics</requirement> 7 <requirement type="package" version="2.14">biocbasics</requirement>
8 <requirement type="package" version="1.9">numpy</requirement> 8 <requirement type="package" version="1.9">numpy</requirement>
9 </requirements> 9 </requirements>
10 <command interpreter="python"> 10 <command interpreter="python">
11 size_histogram.py 11 size_histogram.py
12 #if $refGenomeSource.genomeSource == "history": 12 #if $refGenomeSource.genomeSource == "history":
13 --reference_fasta ## sys.argv[2] 13 --reference_fasta ## sys.argv[2]
14 $refGenomeSource.ownFile ## index source 14 $refGenomeSource.ownFile ## index source
15 #else: 15 #else:
16 #silent reference= filter( lambda x: str( x[0] ) == str( $refGenomeSource.series[0].input.dbkey ), $__app__.tool_data_tables[ 'bowtie_indexes' ].get_fields() )[0][-1] 16 #silent reference= filter( lambda x: str( x[0] ) == str( $refGenomeSource.series[0].input.dbkey ), $__app__.tool_data_tables[ 'bowtie_indexes' ].get_fields() )[0][-1]
17 --reference_bowtie_index 17 --reference_bowtie_index
18 $reference 18 $reference
19 #end if 19 #end if
20 --rcode 20 --rcode
21 $plotCode 21 $plotCode
22 --output_size_distribution 22 --output_size_distribution
23 $size_distribution_dataframe 23 $size_distribution_dataframe
24 --minquery 24 --minquery
25 $minquery 25 $minquery
26 --maxquery 26 --maxquery
27 $maxquery 27 $maxquery
28 --input 28 --input
29 #for $i in $refGenomeSource.series 29 #for $i in $refGenomeSource.series
30 $i.input 30 $i.input
31 #end for 31 #end for
32 --ext 32 --ext
33 #for $i in $refGenomeSource.series 33 #for $i in $refGenomeSource.series
34 $i.input.ext 34 $i.input.ext
35 #end for 35 #end for
36 --label 36 --label
37 #for $i in $refGenomeSource.series 37 #for $i in $refGenomeSource.series
38 "$i.input.name" 38 "$i.input.name"
39 #end for 39 #end for
40 --normalization_factor 40 --normalization_factor
41 #for $i in $refGenomeSource.series 41 #for $i in $refGenomeSource.series
42 $i.norm 42 $i.norm
43 #end for 43 #end for
44 #if $gff: 44 #if $gff:
45 --gff 45 --gff $gff
46 $gff 46 #end if
47 #end if 47 #if $global.value == 'yes':
48 #if $global.value == 'yes': 48 --global_size
49 --global_size 49 #end if
50 #end if 50 #if $collapsestrands.value == 'yes':
51 #if $collapsestrands.value == 'yes': 51 --collapse
52 --collapse 52 #end if
53 #end if 53
54 54 </command>
55 </command> 55 <inputs>
56 <inputs> 56 <conditional name="refGenomeSource">
57 <conditional name="refGenomeSource"> 57 <param name="genomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options">
58 <param name="genomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options"> 58 <option value="indexed">Use a built-in index</option>
59 <option value="indexed">Use a built-in index</option> 59 <option value="history">Use one from the history</option>
60 <option value="history">Use one from the history</option> 60 </param>
61 </param> 61 <when value="indexed">
62 <when value="indexed"> 62 <repeat name="series" title="Add alignment files">
63 <repeat name="series" title="Add alignment files"> 63 <param name="input" type="data" label="Select multiple alignments to parse" format="tabular,sam,bam">
64 <param name="input" type="data" label="Select multiple alignments to parse" format="tabular,sam,bam"> 64 <validator type="dataset_metadata_in_data_table" table_name="bowtie_indexes" metadata_name="dbkey" metadata_column="0" message="database not set for this bowtie output. Select the database(=genome used for matching) manually, or select a reference fasta from your history."/>
65 <validator type="dataset_metadata_in_data_table" table_name="bowtie_indexes" metadata_name="dbkey" metadata_column="0" message="database not set for this bowtie output. Select the database(=genome used for matching) manually, or select a reference fasta from your history."/> 65 </param>
66 </param> 66 <param name="norm" type="float" value="1" label="Indicate a normalization factor to compare multiple aligments"/>
67 <param name="norm" type="float" value="1" label="Indicate a normalization factor to compare multiple aligments"/> 67 </repeat>
68 </repeat> 68 </when>
69 </when> 69 <when value="history">
70 <when value="history"> 70 <param name="ownFile" type="data" format="fasta" label="Select a fasta file, to serve as index reference" />
71 <param name="ownFile" type="data" format="fasta" label="Select a fasta file, to serve as index reference" /> 71 <repeat name="series" title="Add alignment files">
72 <repeat name="series" title="Add alignment files"> 72 <param name="input" type="data" label="Select multiple alignments to parse" format="tabular,sam,bam"/>
73 <param name="input" type="data" label="Select multiple alignments to parse" format="tabular,sam,bam"/> 73 <param name="norm" type="float" value="1" label="Indicate a normalization factor to compare multiple aligments"/>
74 <param name="norm" type="float" value="1" label="Indicate a normalization factor to compare multiple aligments"/> 74 </repeat>
75 </repeat> 75 </when>
76 </when> 76 </conditional>
77 </conditional> 77 <param name="gff" type="data" format="gff,gff3" optional="true" label="Optional: select a GFF to investigate regions of interest" help="GFF must match genome build"/>
78 <param name="gff" type="data" format="gff,gff3" optional="true" label="Optional: select a GFF to investigate regions of interest" help="GFF must match genome build"/> 78 <!-- <validator type="dataset_metadata_in_data_table" table_name="bowtie_indexes" metadata_name="dbkey" metadata_column="0" message="GFF database and alignment file databse do not match!"/> -->
79 <!-- <validator type="dataset_metadata_in_data_table" table_name="bowtie_indexes" metadata_name="dbkey" metadata_column="0" message="GFF database and alignment file databse do not match!"/> --> 79 <param name="global" type="select" label="Generate size distribution for each item, or generate a global alignment">
80 <param name="global" type="select" label="Generate size distribution for each item, or generate a global alignment"> 80 <option value="no">for each item</option>
81 <option value="no">for each item</option> 81 <option value="yes">global</option>
82 <option value="yes">global</option> 82 </param>
83 </param> 83 <param name="collapsestrands" type="select" label="Whether + and - reads should be collapsed or not">
84 <param name="collapsestrands" type="select" label="Whether + and - reads should be collapsed or not"> 84 <option value="no">Do not collapse</option>
85 <option value="no">Do not collapse</option> 85 <option value="yes">Collapse + and - reads</option>
86 <option value="yes">Collapse + and - reads</option> 86 </param>
87 </param> 87 <param name="minquery" type="integer" size="3" value="18" label="Min size of reads to plot" help="'15' = 15 nucleotides"/>
88 <param name="minquery" type="integer" size="3" value="18" label="Min size of reads to plot" help="'15' = 15 nucleotides"/> 88 <param name="maxquery" type="integer" size="3" value="28" label="Max size of reads to plot" help="'30' = 30 nucleotides"/>
89 <param name="maxquery" type="integer" size="3" value="28" label="Max size of reads to plot" help="'30' = 30 nucleotides"/> 89 <param name="title" type="text" size="15" value="Size distribution" label="Main Titles"/>
90 <param name="title" type="text" size="15" value="Size distribution" label="Main Titles"/> 90 <param name="xlabel" type="text" size="15" value="Size in nucleotides" label="x axis label"/>
91 <param name="xlabel" type="text" size="15" value="Size in nucleotides" label="x axis label"/> 91 <param name="ylabel" type="text" size="15" value="Number of reads" label="y axis label"/>
92 <param name="ylabel" type="text" size="15" value="Number of reads" label="y axis label"/> 92 <param name="yrange" type="integer" size="3" value="0" label="y axis range for size distributions. 0 means auto-scaling."/>
93 <param name="rows_per_page" type="text" size="9" value="8" label="How many items to display per page?"> 93 <param name="rows_per_page" type="text" size="9" value="8" label="How many items to display per page?">
94 <validator type="in_range" min="6" max="20" message="Select between 6 and 20 rows, as the readability will suffer otherwise."/> 94 <validator type="in_range" min="6" max="20" message="Select between 6 and 20 rows, as the readability will suffer otherwise."/>
95 </param> 95 </param>
96 </inputs> 96 </inputs>
97 <configfiles> 97 <configfiles>
98 <configfile name="plotCode"> 98 <configfile name="plotCode">
99 ## Setup R error handling to go to stderr 99 ## Setup R error handling to go to stderr
100 options( show.error.messages=F, 100 options( show.error.messages=F,
101 error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) 101 error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } )
107 107
108 ##cheetahtemplate data frame implementation 108 ##cheetahtemplate data frame implementation
109 size=read.delim("${size_distribution_dataframe}", header=T, row.names=NULL) 109 size=read.delim("${size_distribution_dataframe}", header=T, row.names=NULL)
110 n_samples = length(unique (size\$sample)) 110 n_samples = length(unique (size\$sample))
111 n_genes = length (unique (levels(size\$gene))) 111 n_genes = length (unique (levels(size\$gene)))
112
113 if (${yrange} != 0) {
114 # This is used for specifying the y-axis limits
115 ylim=c(-${yrange}, ${yrange})
116 } else { ylim="" }
112 117
113 par.settings.size=list(layout.heights=list(top.padding=1, bottom.padding=1), 118 par.settings.size=list(layout.heights=list(top.padding=1, bottom.padding=1),
114 strip.background = list(col = c("lightblue", "lightgreen")) 119 strip.background = list(col = c("lightblue", "lightgreen"))
115 ) 120 )
116 121
130 par.strip.text = list(cex=0.75), 135 par.strip.text = list(cex=0.75),
131 as.table=TRUE, 136 as.table=TRUE,
132 newpage = T, 137 newpage = T,
133 ...) 138 ...)
134 139
135 combineLimits(update(useOuterStrips(bc, 140 combineLimits(update(useOuterStrips(bc,
136 strip.left = strip.custom(par.strip.text = list(cex=0.5)) 141 strip.left = strip.custom(par.strip.text = list(cex=0.5))
137 ), 142 ),
138 layout=c(n_samples,${rows_per_page})), 143 layout=c(n_samples,${rows_per_page})),
139 margin.x=F, margin.y=1) 144 margin.x=F, margin.y=1)
140 } 145 }
145 #if $global.value == 'yes': 150 #if $global.value == 'yes':
146 global = "yes" 151 global = "yes"
147 #end if 152 #end if
148 153
149 if (global=="no") { 154 if (global=="no") {
155 width = 8.2677*n_samples/4
156 } else { width = 8.2677 }
150 157
151 options(warn=-1) 158 options(warn=-1)
152 pdf(file="${size_PDF}", paper="special", height=11.69, width=8.2677*n_samples/4) 159 pdf(file="${size_PDF}", paper="special", height=11.69, width=width)
153 plot_size_distribution(size, par.settings=par.settings.size) # removed , prepanel=smR.prepanel 160
154 161 if (ylim == "" &amp;&amp; global=="no") {
155 } else { 162 plot_size_distribution(size, par.settings=par.settings.size)
156 163 }
157 pdf(file="${size_PDF}", paper="special", height=11.69, width=8.2677) 164 if (ylim != "" &amp;&amp; global=="no") { plot_size_distribution(size, par.settings=par.settings.size, ylim=ylim)
158 bc= barchart(count~as.factor(size)|factor(sample, levels=unique(sample)), data = size, origin = 0, 165 }
159 horizontal=FALSE, 166 if (ylim == "" &amp;&amp; global=="yes") { bc= barchart(count~as.factor(size)|factor(sample, levels=unique(sample)), data = size, origin = 0,
160 group=polarity, 167 horizontal=FALSE,
161 stack=TRUE, 168 group=polarity,
162 col=c('red', 'blue'), 169 stack=TRUE,
163 # par.settings=list(fontsize = list(text=8, points=8)), 170 col=c('red', 'blue'),
164 scales=list(y=list(tick.number=4, rot=90, relation="same"), cex=1), 171 scales=list(y=list(tick.number=4, rot=90, relation="same"), cex=1),
165 xlab = "readsize in nucleotides", 172 xlab = "readsize in nucleotides",
166 ylab = "${ylabel}", 173 ylab = "${ylabel}",
167 main="${title}" , as.table=TRUE, newpage = T, 174 main="${title}" , as.table=TRUE, newpage = T,
168 aspect=0.5, 175 aspect=0.5,
169 strip = strip.custom(par.strip.text = list(cex = 1), which.given=1, bg="lightblue") 176 strip = strip.custom(par.strip.text = list(cex = 1), which.given=1, bg="lightblue")
170 ) 177 )
171 bc 178 bc
172 } 179 }
180 if (ylim != "" &amp;&amp; global=="yes") { bc= barchart(count~as.factor(size)|factor(sample, levels=unique(sample)), data = size, origin = 0,
181 horizontal=FALSE,
182 group=polarity,
183 stack=TRUE,
184 col=c('red', 'blue'),
185 scales=list(y=list(tick.number=4, rot=90, relation="same"), cex=1),
186 xlab = "readsize in nucleotides",
187 ylab = "${ylabel}",
188 ylim = ylim,
189 main="${title}" , as.table=TRUE, newpage = T,
190 aspect=0.5,
191 strip = strip.custom(par.strip.text = list(cex = 1), which.given=1, bg="lightblue")
192 )
193 bc
194 }
195
173 devname=dev.off() 196 devname=dev.off()
174 197
175 </configfile> 198 </configfile>
176 </configfiles> 199 </configfiles>
177 200 <outputs>
178 <outputs> 201 <data format="tabular" name="size_distribution_dataframe" label="Size_distribution_dataframe.tab"/>
179 <data format="tabular" name="size_distribution_dataframe" label="Size_distribution_dataframe.tab"/> 202 <data format="pdf" name="size_PDF" label="Size_distribution.pdf"/>
180 <data format="pdf" name="size_PDF" label="Size_distribution.pdf"/> 203 </outputs>
181 </outputs>
182 <help> 204 <help>
183 205
184 **What it does** 206 **What it does**
185 207
186 Takes one or more alignment files (BAM, SAM or tabular bowtie output) as input and produces a histogram of read sizes, 208 Takes one or more alignment files (BAM, SAM or tabular bowtie output) as input and produces a histogram of read sizes,
187 where by default for each "chromosome" a histogram of read sizes is drawn. 209 where by default for each "chromosome" a histogram of read sizes is drawn.
188 Reads that map in sense are on the top (red), reads that map antisense are on the bottom (blue). 210 Reads that map in sense are on the top (red), reads that map antisense are on the bottom (blue).
189 211
190 212
191 .. class:: warningmark 213 .. class:: warningmark
192 214
207 229
208 produce a plot like this: 230 produce a plot like this:
209 231
210 ---- 232 ----
211 233
212 .. image:: static/images/size_histogram.png 234 .. image:: static/images/size_histogram.png
213 :height: 800 235 :height: 800
214 :width: 500 236 :width: 500
215 237
216 </help> 238 </help>
217 <tests> 239 <tests>
218 <test> 240 <test>
219 <param name="genomeSource" value="history" /> 241 <param name="genomeSource" value="history" />
220 <param name="ownFile" value="transposons.fasta" ftype="fasta" /> 242 <param name="ownFile" value="transposons.fasta" ftype="fasta" />
221 <param name="series_0|input" value="sample1.srbowtie_out" ftype="tabular"/> 243 <param name="series_0|input" value="sample1.srbowtie_out" ftype="tabular"/>
222 <param name="series_0|norm" value="1" /> 244 <param name="series_0|norm" value="1" />
223 <param name="series_1|input" value="sample2.srbowtie_out" ftype="tabular"/> 245 <param name="series_1|input" value="sample2.srbowtie_out" ftype="tabular"/>
224 <param name="series_1|norm" value="1" /> 246 <param name="series_1|norm" value="1" />
225 <param name="series_2|input" value="sample3.srbowtie_out" ftype="tabular"/> 247 <param name="series_2|input" value="sample3.srbowtie_out" ftype="tabular"/>
226 <param name="series_2|norm" value="1" /> 248 <param name="series_2|norm" value="1" />
227 <param name="global" value="no" /> 249 <param name="global" value="no" />
228 <param name="collapsestrands" value="no" /> 250 <param name="collapsestrands" value="no" />
229 <param name="minquery" value="18"/> 251 <param name="minquery" value="18"/>
230 <param name="maxquery" value="30"/> 252 <param name="maxquery" value="30"/>
231 <param name="title" value="Size distribution"/> 253 <param name="title" value="Size distribution"/>
232 <param name="xlabel" value="Size in nucleotides"/> 254 <param name="xlabel" value="Size in nucleotides"/>
233 <param name="ylabel" value="Number of reads"/> 255 <param name="ylabel" value="Number of reads"/>
234 <param name="rows_per_page" value="10"/> 256 <param name="rows_per_page" value="10"/>
235 <output name="size_distribution_dataframe" ftype="tabular" file="Size_distribution_dataframe.tab" /> 257 <output name="size_distribution_dataframe" ftype="tabular" file="Size_distribution_dataframe.tab" />
236 <output name="size_PDF" ftype="pdf" file="Size_distribution.pdf" /> 258 <output name="size_PDF" ftype="pdf" file="Size_distribution.pdf" />
237 </test> 259 </test>
238 </tests> 260 </tests>
239 </tool> 261 </tool>
240 262