comparison alfa/ALFA.xml @ 0:e360f840a92e draft default tip

Uploaded
author biocomp-ibens
date Wed, 16 May 2018 09:49:18 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:e360f840a92e
1 <tool id="alfa" name="ALFA" version="0.1.0">
2 <description>- Plot the distribution of the genomic features captured by aligned reads </description>
3
4 <!-- ALFA requires bedtools suite v2.20.0 and above -->
5 <requirements>
6 <requirement type="package" version="2.24">bedtools</requirement>
7 <requirement type="package" version="1.2">samtools</requirement>
8 <requirement type="package" version="1.4">matplotlib</requirement>
9 </requirements>
10
11 <command interpreter="python">
12 <![CDATA[
13 ALFA_wrapper.py
14
15 --project_name "${projectName}"
16
17 ##__INPUT 1: ANNOTATION OF THE SEQ/GENOME__##
18 #if str ( $annotation.annotationSource['annotationSourceSelection'] ) == "index"
19 --index "$annotation.annotationSource['strandedIndex']" "$annotation.annotationSource['unstrandedIndex']"
20 #else if str ( $annotation.annotationSource['annotationSourceSelection'] ) == "built_in_index"
21 --bi_index "$annotation.annotationSource.built_in_index_prefix.fields.prefix"
22 #else
23 --annotation "$annotation.annotationSource['annotationFile']"
24 #end if
25
26 ##__INPUT 2: ALIGNED READS__##
27 --reads_format $reads.readsType['readsTypeSelection']
28 --reads
29 #for $i, $r in enumerate ( $reads.readsType['readsList'] )
30 "__fname__$r.readsFile"
31 "__label__$r.readsLabel"
32 #end for
33 --strandness $reads['strandness']
34
35 ##__OUTPUT FILES__##
36 #if str ( $outputFiles['plot'] ) == "True"
37 #if str ( $outputOptions['plotFormat'] ) == "pdf"
38 --output_pdf "$outputPdf"
39 #else if str ( $outputOptions['plotFormat'] ) == "png"
40 --output_png "$outputCategoriesPng" "$outputBiotypesPng"
41 #else
42 --output_svg "$outputCategoriesSvg" "$outputBiotypesSvg"
43 #end if
44 #end if
45 #if str ( $outputFiles['countFile'] ) == "True"
46 --output_count "$outputCountFile"
47 #end if
48 #if str ( $outputFiles['index'] ) == "True"
49 --output_index "$outputStrandedIndex" "$outputUnstrandedIndex"
50 #end if
51
52 ##__OUTPUT OPTIONS__##
53 --categories_depth $outputOptions['categoriesDepth']
54 #if str ( $outputFiles['plot'] ) == "True"
55 --plot_format $outputOptions['plotFormat']
56 #if str ( $outputOptions.plotThreshold['plotThresholdChoice'] ) == "True"
57 --threshold $outputOptions.plotThreshold.yMin $outputOptions.plotThreshold.yMax
58 #end if
59 #end if
60
61 --log_report "$logReport"
62 --tool_dir "$__tool_directory__"
63 ]]>
64 </command>
65 <inputs>
66 <param name="projectName" value="ALFA" type="text" size="20" label="Project Name">
67 <validator type="empty_field" message="Please, specify a name for your project."/>
68 </param>
69
70 <section name="annotation" title="INPUT 1: Annotation of your genome / sequence" expanded="True">
71 <conditional name="annotationSource">
72 <param name="annotationSourceSelection" type="select" label="Select the type of your annotation">
73 <option value="personal_gtf" selected="true">Personal annotation file (GTF format)</option>
74 <option value="index">Stranded and Unstranded Indexes previously generated by ALFA (Index format)</option>
75 <option value="built_in_index">Built-in indexes among a list of referenced genome (Index format)</option>
76 </param>
77 <when value="personal_gtf">
78 <param name="annotationFile" type="data" format="Gff, Gtf" label="Select your personal annotation file (GTF format)">
79 </param>
80 </when>
81 <when value="index">
82 <param name="strandedIndex" type="data" label="Select your ALFA Stranded index file (index format)"/>
83 <param name="unstrandedIndex" type="data" label="Select your ALFA Unstranded index file (index format)"/>
84 </when>
85 <when value="built_in_index">
86 <param name="built_in_index_prefix" type="select" label="Select Genome">
87 <options from_data_table="alfa_indexes">
88 <validator type="no_options" message="No indexes are available for the selected input dataset. Ask your Galaxy Admin for to use ALFA_data_manager tool to build such indexes!" />
89 </options>
90 </param>
91 </when>
92 </conditional>
93 </section>
94
95 <section name="reads" title="INPUT 2: Mapped reads" expanded="True">
96 <conditional name="readsType">
97 <param name="readsTypeSelection" type="select" label="Select the format of your mapped reads">
98 <option value="bam" selected="true">BAM</option>
99 <option value="bedgraph">BEDGRAPH</option>
100 </param>
101 <when value="bam">
102 <repeat name="readsList" title="Mapped Reads" min="1" >
103 <param name="readsFile" type="data" format="Bam" label="Select the file (BAM format)"/>
104 <param name="readsLabel" type="text" size="20" value="" label="Label of the reads" optional="True"/>
105 </repeat>
106 </when>
107 <when value="bedgraph">
108 <repeat name="readsList" title="Mapped Reads" min="1">
109 <param name="readsFile" type="data" format="Bed" label="Select the file (BEDGRAPH format)"/>
110 <param name="readsLabel" type="text" size="20" value="" label="Label of the reads" optional="True"/>
111 </repeat>
112 </when>
113 </conditional>
114 <param name="strandness" type="select" label="Select the strandness of your library of reads">
115 <option value="unstranded" selected="true">Unstranded (reads will be intersected with both forward and reverse strands of the annotated sequence)</option>
116 <option value="forward">Forward (reads will be intersected with only the the forward strand of the annotated sequence)</option>
117 <option value="reverse">Reverse (reads will will be intersected only with the reverse strand of the annotated sequence)</option>
118 </param>
119 </section>
120
121 <section name="outputFiles" title="OUTPUT FILES: Choose the output files" expanded="False">
122 <param name="plot" type="boolean" truevalue="True" falsevalue="False" checked="True" label="Categories and Biotypes Histograms" help="Plot the distribution of genomic categories and biotypes captured by your reads"/>
123 <param name="countFile" type="boolean" truevalue="True" falsevalue="False" checked="True" label="Categories Count File" help="Return the exact count of nucleotides per genomic categories and biotypes"/>
124 <param name="index" type="boolean" truevalue="True" falsevalue="False" checked="False" label ="Indexes" help="Return the stranded and unstranded ALFA indexes generated from the GTF input file (useful if you plan to run ALFA again with the same annotated sequence)"/>
125 </section>
126
127 <section name="outputOptions" title="ADVANCED SETTINGS" expanded="False">
128 <param name="categoriesDepth" type="select" label="Categories to Display">
129 <option value="1">gene | intergenic | antisense</option>
130 <option value="2">exon | intron | undescribed genes | intergenic | antisense</option>
131 <option value="3" selected="true">5’-UTR | CDS | 3’-UTR | underscribes exons | intron | undescribed genes | intergenic | antisense</option>
132 <option value="4">5’-UTR | start_codon | CDS | undescribed CDS | stop_codon | 3’-UTR | undescribed exons | intron | undescribed genes | intergenic | antisense</option>
133 </param>
134 <param name="plotFormat" type="select" label="Plot Options: Select graph format" help="Ignore if you did not choose the histograms output file">
135 <option value="png" selected="true">png</option>
136 <!--<option value="pdf" selected="true">pdf</option>-->
137 <option value="svg">svg</option>
138 </param>
139 <conditional name="plotThreshold">
140 <param name="plotThresholdChoice" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Plot Options: Modify y axis range of the normalized counts of bio-features" help="Ignore if you did not choose the histograms output file"/>
141 <when value="True">
142 <param name="yMin" type="float" value="-2.0" label="y min"/>
143 <param name="yMax" type="float" value="2.0" label="y max"/>
144 </when>
145 <when value="False"></when>
146 </conditional>
147 </section>
148 </inputs>
149
150 <outputs>
151 <data name="logReport" format="txt" label="${projectName}-Log Report"/>
152 <!--
153 <data name="outputPdf" format="pdf" label="${projectName}-BioFeatures Distribution">
154 <filter>outputFiles['plot'] is True and outputOptions['plotFormat'] == 'pdf'</filter>
155 </data>
156 -->
157 <data name="outputCategoriesPng" format="png" label="${projectName}-Categories Distribution">
158 <filter>outputFiles['plot'] is True and outputOptions['plotFormat'] == 'png'</filter>
159 </data>
160 <data name="outputBiotypesPng" format="png" label="${projectName}-Biotypes Distribution">
161 <filter>outputFiles['plot'] is True and outputOptions['plotFormat'] == 'png'</filter>
162 </data>
163 <data name="outputCategoriesSvg" format="svg" label="${projectName}-Categories Distribution">
164 <filter>outputFiles['plot'] is True and outputOptions['plotFormat'] == 'svg'</filter>
165 </data>
166 <data name="outputBiotypesSvg" format="svg" label="${projectName}-Biotypes Distribution">
167 <filter>outputFiles['plot'] is True and outputOptions['plotFormat'] == 'svg'</filter>
168 </data>
169 <data name="outputCountFile" format="txt" label="${projectName}-Categories Count">
170 <filter>outputFiles['countFile'] is True</filter>
171 </data>
172 <data name="outputStrandedIndex" format="txt" label="${projectName}-Stranded Index">
173 <filter>outputFiles['index'] is True</filter>
174 </data>
175 <data name="outputUnstrandedIndex" format="txt" label="${projectName}-Unstranded Index">
176 <filter>outputFiles['index'] is True</filter>
177 </data>
178 </outputs>
179
180 <tests>
181 <test>
182 <param name="alfa_toy" />
183 <section name="annotation">
184 <conditional name="annotationSource">
185 <param name="annotationSourceSelection" value="personal_gtf" />
186 <param name="annotationFile" value="alfa_toy.gtf" ftype="gtf" />
187 </conditional>
188 </section>
189 <section name="reads">
190 <conditional name="readsType">
191 <param name="readsTypeSelection" value="bam" />
192 <repeat name="readsList">
193 <param name="readsFile" value="alfa_toy.bam" ftype="bam" />
194 <param name="readsLabel" value="alfa_toy" />
195 </repeat>
196 <param name="strandness" value="unstranded" />
197 </conditional>
198 </section>
199 <section name="outputFiles">
200 <param name="plot" value="True" />
201 <param name="countFile" value="True" />
202 <param name="index" value="True" />
203 </section>
204 <section name="outputOptions">
205 <param name="categoriesDepth" value="3" />
206 <param name="plotFormat" value="pdf" />
207 <conditional name="plotThreshold">
208 <param name="plotThresholdChoice" value="False" />
209 </conditional>
210 </section>
211 <output name="outputPdf" file="alfa_toy-Biofeatures Distribution.pdf" ftype="pdf" />
212 <output name="outputCountFile" file="alfa_toy.categories_count" ftype="txt" />
213 <output name="outputStrandedIndex" file="alfa_toy.stranded.index" ftype="txt" />
214 <output name="outputUnstrandedIndex" file="alfa_toy.unstranded.index" ftype="txt" />
215 <assert_stdout>
216 <has_text text="### End of the program" />
217 </assert_stdout>
218 </test>
219 </tests>
220
221 <help>
222 <![CDATA[
223 **What it does**
224
225
226 | ALFA provides a global overview of features distribution composing New Generation Sequencing dataset(s).
227 |
228 | Given a set of aligned reads (BAM files) and an annotation file (GTF format), the tool produces plots of the raw and normalized distributions of those reads among genomic categories (stop codon, 5'-UTR, CDS, intergenic, etc.) and biotypes (protein coding genes, miRNA, tRNA, etc.). Whatever the sequencing technique, whatever the organism.
229
230 ----
231
232 **ALFA acronym**
233
234 - Annotation Landscape For Aligned reads
235
236 ----
237
238 **Official documentation of the tool**
239
240
241 - https://github.com/biocompibens/ALFA
242
243 ----
244
245 **Detailed example**
246
247 - https://github.com/biocompibens/ALFA#detailed-example
248
249 ----
250
251 **Nota Bene**
252
253 * **Input 1: Annotation File**
254
255
256 | ALFA requires as first input an annotation file (sequence, genome...) in gtf format in order to generate alfa indexes needed in a second round of the program.
257 | Indexes are files which list all the coordinates of the categories (stop codon, 5'-UTR, CDS, intergenic...) and biotypes (protein coding genes, miRNA, tRNA, ...) encountered in the annotated sequence.
258 |
259
260 .. class:: warningmark
261
262 | Gtf File must be sorted.
263 |
264
265 .. class:: infomark
266
267 | Generation of indexes from an annotation file might be time consuming (i.e ~10min for the human genome). Thus, ALFA allows the user to submit directly indexes generated in previous runs as inputs for a new run.
268 |
269
270 .. class:: infomark
271
272 | ALFA also enables the use of built-in indexes to save even more computational time. In order to generate easily these built-in indexes, install the data manager tool `ALFA_data_manager`_ available on the toolshed.
273
274 .. _data_manager_build_alfa_indexes: https://toolshed.g2.bx.psu.edu/view/charles-bernard/data_manager_build_alfa_indexes
275
276 * **Input 2: Reads**
277
278 | ALFA requires as second input a single or a set of mapped reads file(s) in either bam or bedgraph format. The coordinates of the mapped reads will be intersected with the according categories and biotypes mentioned in the indexes.
279 | The strandness option determines which strand of the annotated sequence will be taken into account during this intersection.
280 |
281
282 .. class:: warningmark
283
284 | Bam or Bedgraph file(s) must be sorted.
285 |
286
287 .. class:: warningmark
288
289 | Chromosome names in reads and in annotation file (gtf or indexes) must be the same for the intersection to occur
290 |
291
292 * **Output files**
293
294 | The result of the intersection is a count file displaying the count of nucleotides in the reads for each genomic categories and biotypes. From this count file, plots of the raw and normalized distributions of the reads among these categories are generated.
295 | In the output files section, the user can choose what kind of files he/she desires as ALFA output. Categories Count File and Plots are proposed by default.
296 |
297
298 .. class:: infomark
299
300 | The user can also select the 'indexes' option as output. This option is interesting if you plan to run ALFA again with the same submitted annotation file. *See Nota Bene/Input 1: Annotation File for more information.*
301 |
302
303 - `How the plots look like`_
304
305 .. _How the plots look like: https://github.com/biocompibens/ALFA#plots
306
307 |
308
309 - `How they are generated`_
310
311 .. _How they are generated: https://github.com/biocompibens/ALFA#detailed-example
312
313 ----
314
315 **ALFA Developpers**
316
317 | Benoît Noël and Mathieu Bahin: *compbio team, Institut de Biologie de l'Ecole Normale Supérieure de Paris*
318
319 ]]>
320 </help>
321
322 <citations>
323 <citation type="bibtex">@MISC{
324 author="Benoît Noël and Mathieu Bahin"
325 title="ALFA: Annotation Landscape For Aligned reads"
326 crossref="https://github.com/biocompibens/ALFA"
327 institution="Institut de Biologie de l'Ecole Normale Supérieure de Paris"
328 }
329 </citation>
330 </citations>
331 </tool>