Mercurial > repos > iuc > ribowaltz_process
comparison ribowaltz.xml @ 0:6d4c94373bba draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ribowaltz commit ff002df702f544829d1b500ac4b517c1e70ad14d
author | iuc |
---|---|
date | Thu, 22 Sep 2022 20:30:54 +0000 |
parents | |
children | 042cab870a39 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:6d4c94373bba |
---|---|
1 <tool id="ribowaltz_process" name="riboWaltz" version="@VERSION@" profile="@PROFILE@"> | |
2 <description>calculation of optimal P-site offsets and diagnostic analysis</description> | |
3 <macros> | |
4 <import>macros.xml</import> | |
5 </macros> | |
6 <expand macro='requirements'/> | |
7 <expand macro='edam_ontology' /> | |
8 <expand macro='xrefs'/> | |
9 <command detect_errors="exit_code"><![CDATA[ | |
10 #for $i, $sample in enumerate($rep_samples): | |
11 ln -s $sample.bam_file $sample.sample_name'.bam' && | |
12 #end for | |
13 Rscript '${__tool_directory__}/ribowaltz.R' -b . -g '$gtf' --refseq_sep '$refseq_sep' | |
14 #import json | |
15 #if $filtering.duplicates.filter == 'yes': | |
16 #set params_duplicate_filterting = [] | |
17 #silent $params_duplicate_filterting.append({"extremity": str($filtering.duplicates.extremity), "keep": str($filtering.duplicates.keep)}) | |
18 --params_duplicate_filterting '#echo json.dumps($params_duplicate_filterting)#' | |
19 #end if | |
20 #if $filtering.length.filter == 'periodicity': | |
21 #set params_peridiocity_filterting = [] | |
22 #silent $params_peridiocity_filterting.append({"periodicity_threshold": int($filtering.length.periodicity_threshold)}) | |
23 --params_peridiocity_filterting '#echo json.dumps($params_peridiocity_filterting)#' | |
24 #end if | |
25 #if $filtering.length.filter == 'custom': | |
26 #set params_custom_filterting = [] | |
27 #silent $params_custom_filterting.append({"length_range": str($filtering.length.length_range_min)+":"+str($filtering.length.length_range_max)}) | |
28 --params_custom_filterting '#echo json.dumps($params_custom_filterting)#' | |
29 #end if | |
30 #set params_psite_additional = [] | |
31 #silent $params_psite_additional.append( | |
32 {"flanking":int($psite_additional.flanking), "use_start":bool($psite_additional.use_start), | |
33 "psite_extrimity":str($psite_additional.psite_extrimity), "cl":int($psite_additional.cl)}) | |
34 --params_psite_additional '#echo json.dumps($params_psite_additional)#' | |
35 #set params_coverage_additional = [] | |
36 #silent $params_coverage_additional.append( | |
37 {"psites_per_region":bool($coverage_additional.psites_per_region), "min_overlap":int($coverage_additional.min_overlap), | |
38 "start_nts":int($coverage_additional.start_nts), "stop_nts":int($coverage_additional.stop_nts)}) | |
39 --params_coverage_additional '#echo json.dumps($params_coverage_additional)#' | |
40 #if $save_rdata: | |
41 --psite_info_rdata '$psite_rdata_out' | |
42 #end if | |
43 --codon_coverage_info '$codon_coverage_out' | |
44 --cds_coverage_info '$cds_coverage_out' && | |
45 cd plots && | |
46 for i in */*.pdf; do mv \$i \${i/\//-}; done; | |
47 ]]></command> | |
48 <inputs> | |
49 <repeat name="rep_samples" title="BAM file" min="1" default="1"> | |
50 <param name="sample_name" type="text" value="SampleName" label="Specify sample name" | |
51 help="Only letters, numbers and underscores will be retained in this field"> | |
52 <sanitizer> | |
53 <valid initial="string.letters,string.digits"><add value="_" /></valid> | |
54 </sanitizer> | |
55 </param> | |
56 <param name="bam_file" type="data" format="bam,sam" multiple="false" label="Input BAM file" | |
57 help="riboWaltz only works for read alignments based on transcript coordinates"/> | |
58 </repeat> | |
59 <param name="gtf" type="data" format="gtf,gff" label="Annotation in GTF format"/> | |
60 <param name="refseq_sep" type="text" optional="true" label="Separator between reference sequences' name and additional information to discard" | |
61 help=" All characters before the first occurrence of the specified separator are kept"/> | |
62 <param name="save_rdata" type="boolean" truevalue="1" falsevalue="0" checked="true" | |
63 label="Save p-site info RDATA file?" | |
64 help="Useful for advanced plotting using riboWaltz-plot tool"/> | |
65 <section name="filtering" title="Filtering Options"> | |
66 <conditional name="duplicates"> | |
67 <param name="filter" type="select" label="Perform duplicate filtering?"> | |
68 <option value="yes">yes</option> | |
69 <option value="no">no</option> | |
70 </param> | |
71 <when value="yes"> | |
72 <param name="extremity" type="select" label="Which reads should be considered duplicates?"> | |
73 <option value="both" selected="true">Share both the 5' extremity and the 3' extremity </option> | |
74 <option value="5end">Share only the 5' extremity</option> | |
75 <option value="3end">Share only the 3' extremity</option> | |
76 </param> | |
77 <param name="keep" type="select" label="Which read to keep if duplicates disply different lengths?" | |
78 help="This parameter is considered only if one of 5' or 3' end extrimity was chosen"> | |
79 <option value="shortest" selected="true">Keep the shortest reads </option> | |
80 <option value="longest">Keep the longest reads</option> | |
81 </param> | |
82 </when> | |
83 <when value="no"/> | |
84 </conditional> | |
85 <conditional name="length"> | |
86 <param name="filter" type="select" label="Perform read length filtering"> | |
87 <option value="periodicity">yes, in periodicity mode</option> | |
88 <option value="custom">yes, based on read length ranges</option> | |
89 <option value="no">no</option> | |
90 </param> | |
91 <when value="periodicity"> | |
92 <param name="periodicity_threshold" type="integer" value="50" min="10" max="100" | |
93 label="Only read lengths satisfying this threshold are kept"/> | |
94 </when> | |
95 <when value="custom"> | |
96 <param name="length_range_min" value="1" type="integer" min="1" | |
97 label="Read lengths ranging from"/> | |
98 <param name="length_range_max" value="100" type="integer" min="1" | |
99 label="Read lengths ranging to"/> | |
100 </when> | |
101 <when value="no"/> | |
102 </conditional> | |
103 </section> | |
104 <section name="psite_additional" title="Additional options for P-site offset computation"> | |
105 <param name="flanking" type="integer" value="6" label="Min number of nucleotides that must flank the reference codon in both directions"/> | |
106 <param name="use_start" type="boolean" truevalue="1" falsevalue="0" checked="true" | |
107 label="Use the translation initiation site as reference codon?" | |
108 help="If not checked, the second to last codon is used instead"/> | |
109 <param name="psite_extrimity" type="select" label="On which extrimity the correction step should be based on?"> | |
110 <option value="auto" selected="true">Automatically select the optimal extremity</option> | |
111 <option value="5end">Use 5' extrimities</option> | |
112 <option value="3end">Use 3' extrimities</option> | |
113 </param> | |
114 <param name="cl" type="integer" value="99" min="1" max="100" | |
115 label="Confidence level for generating occupancy metaprofiles for to a sub-range of read lengths"/> | |
116 </section> | |
117 <section name="coverage_additional" title="Options for codon and CDS coverage"> | |
118 <param name="psites_per_region" type="boolean" truevalue="1" falsevalue="0" checked="true" | |
119 label="Write number of P-Sites falling per region?" | |
120 help="If not checked, number of read foot prints per region trturned"/> | |
121 <param name="min_overlap" type="integer" value="1" min="1" label="Min number of overlapping positions between reads and codons to be considered"/> | |
122 <param name="start_nts" type="integer" value="0" min="0" label="Numer of nucleotides at the beginning of the coding sequences to be excluded"/> | |
123 <param name="stop_nts" type="integer" value="0" min="0" label="Numer of nucleotides at the end of the coding sequences to be excluded"/> | |
124 </section> | |
125 </inputs> | |
126 <outputs> | |
127 <collection name="psite_out" type="list" label="P-site offsets information on ${on_string}"> | |
128 <discover_datasets pattern="(?P<designation>.+)_psite_info\.tsv" format="tabular" directory="." visible="false"/> | |
129 </collection> | |
130 <collection name="basic_plots" type="list:list" label="Ribosome occupancy profiles by read length on ${on_string}"> | |
131 <discover_datasets pattern="(?P<identifier_0>[^-]+)-(?P<identifier_1>[^-]+)\.pdf" format="pdf" directory="plots/" visible="false"/> | |
132 </collection> | |
133 <data name="codon_coverage_out" format="tabular" label="Codon coverage on ${on_string}"/> | |
134 <data name="cds_coverage_out" format="tabular" label="CDS coverage on ${on_string}"/> | |
135 <data name="psite_rdata_out" format="rdata" label="Psite offset R object ${on_string}"> | |
136 <filter>save_rdata</filter> | |
137 </data> | |
138 </outputs> | |
139 <tests> | |
140 <test expect_num_outputs="5"> | |
141 <param name="gtf" value="rep1_annot.gtf.gz"/> | |
142 <param name="refseq_sep" value="."/> | |
143 <param name="save_rdata" value="true"/> | |
144 <repeat name="rep_samples"> | |
145 <param name="sample_name" value="Replicate1"/> | |
146 <param name="bam_file" value="rep1.bam"/> | |
147 </repeat> | |
148 <section name="filtering"/> | |
149 <section name="psite_additional"> | |
150 <param name="flanking" value="6"/> | |
151 <param name="use_start" value="true"/> | |
152 <param name="psite_extrimity" value="auto"/> | |
153 <param name="cl" value="99"/> | |
154 </section> | |
155 <section name="coverage_additional"> | |
156 <param name="psites_per_region" value="true"/> | |
157 <param name="min_overlap" value="1"/> | |
158 <param name="start_nts" value="0"/> | |
159 <param name="stop_nts" value="0"/> | |
160 </section> | |
161 <output_collection name="psite_out" type="list"> | |
162 <element name="Replicate1"> | |
163 <assert_contents> | |
164 <has_text_matching expression="transcript\tend5\tpsite\tend3\tlength\tcds_start\tcds_stop\tpsite_from_start\tpsite_from_stop\tpsite_region"/> | |
165 <has_text_matching expression="ENSMUST00000015812\t1096\t1106\t1134\t39\t697\t1119\t409\t-13\tcds"/> | |
166 </assert_contents> | |
167 </element> | |
168 </output_collection> | |
169 <output_collection name="basic_plots" type="list:list"> | |
170 <element name="Replicate1"> | |
171 <element name="21"> | |
172 <assert_contents> | |
173 <has_size value="5501" delta="200"/> | |
174 </assert_contents> | |
175 </element> | |
176 <element name="48"> | |
177 <assert_contents> | |
178 <has_size value="6035" delta="200"/> | |
179 </assert_contents> | |
180 </element> | |
181 </element> | |
182 </output_collection> | |
183 <output name="codon_coverage_out"> | |
184 <assert_contents> | |
185 <has_text_matching expression="transcript\tstart\tend\tfrom_cds_start\tfrom_cds_stop\tregion\tReplicate1"/> | |
186 <has_text_matching expression="ENSMUST00000000137\t2656\t2659\t865\t471\t3utr\t1"/> | |
187 </assert_contents> | |
188 </output> | |
189 <output name="cds_coverage_out"> | |
190 <assert_contents> | |
191 <has_text_matching expression="transcript\tlength_cds\tReplicate1"/> | |
192 <has_text_matching expression="ENSMUST00000019109\t741\t5"/> | |
193 </assert_contents> | |
194 </output> | |
195 <output name="psite_rdata_out"> | |
196 <assert_contents> | |
197 <has_size value="171803" delta="1000"/> | |
198 </assert_contents> | |
199 </output> | |
200 </test> | |
201 </tests> | |
202 <help><![CDATA[ | |
203 riboWaltz is an R package for calculation of optimal P-site offsets, diagnostic analysis and visual inspection of ribosome profiling data. Taking advantage of a two-step algorithm where offset information is passed through populations of reads with different length in order to maximize offset coherence, riboWaltz computes with high precision the P-site offset. riboWaltz also provides a variety of graphical representations, laying the foundations for further accurate RiboSeq analyses and improved interpretation of positional information. More information can be found here: https://github.com/LabTranslationalArchitectomics/riboWaltz | |
204 | |
205 **Inputs** | |
206 | |
207 It takes BAM files and a GTF file as inputs. Most reads from RiboSeq are supposed to map on mRNAs and not on introns and intergenic regions. Hence, riboWaltz currently works for read alignments based on transcript coordinates. Reads should have been mapped to a reference *transcriptome*, NOT a reference genome. | |
208 | |
209 **Outputs** | |
210 | |
211 riboWaltz generates 4 outputs: | |
212 * P-site offsets info: a collection containing P-site offsets information of each input sample. | |
213 * Codon coverage: a tabular file containing the number of read footprints or P-sites mapping on each triplet of annotated coding sequences and UTRs. | |
214 * CDS coverage: a tabular file containing the number of in-frame P-sites mapping on annotated coding sequence. | |
215 * Plots: a collection containing plots of ribosome occupancy profiles based on the 5' and 3' extremity of reads mapping on the reference codon is generated for all samples and read lengths. | |
216 | |
217 P-site offsets info file: | |
218 | |
219 ====== ========================================================== | |
220 Column Description | |
221 ------ ---------------------------------------------------------- | |
222 1 transcript: the name of the corresponding reference sequence | |
223 2 end5: its leftmost position with respect to the 1st nucleotide of the reference sequence | |
224 3 psite: the P-site position with respect to the 1st nucleotide of the transcript | |
225 4 end3: its rightmost position with respect to the 1st nucleotide of the reference sequence | |
226 5 length: length of the reference sequence | |
227 6 cds_start: the leftmost position of the annotated CDS of the reference sequence (if any) with respect to its 1st nucleotide | |
228 7 cds_stop: the rightmost position of the annotated CDS of the reference sequence (if any) with respect to its 1st nucleotide | |
229 8 psite_from_start: the P-site position with respect to the start codon of the annotated coding sequence (if any) | |
230 9 psite_from_stop: the P-site position with respect to the stop codon of the annotated coding sequence (if any) | |
231 10 psite_region: the region of the transcript (5' UTR, CDS, 3' UTR) that includes the P-site | |
232 ====== ========================================================== | |
233 | |
234 Codon coverage: | |
235 | |
236 ====== ========================================================== | |
237 Column Description | |
238 ------ ---------------------------------------------------------- | |
239 1 transcript: the name of the corresponding reference sequence | |
240 2 start: its leftmost position with respect to the 1st nucleotide of the reference sequence | |
241 3 end: its rightmost position with respect to the 1st nucleotide of the reference sequence | |
242 4 from_cds_start: its position with respect to the first codon of the annotated CDS of the reference sequence | |
243 5 from_cds_stop: its position with respect to the last codon of the annotated CDS of the reference sequence | |
244 6 region: the region of the transcript (5' UTR, CDS, 3' UTR) it is in | |
245 7-end the number of read footprints or P-sites falling in that region for each samples | |
246 ====== ========================================================== | |
247 | |
248 CDS coverage: | |
249 | |
250 ====== ========================================================== | |
251 Column Description | |
252 ------ ---------------------------------------------------------- | |
253 1 transcript: the name of the corresponding reference sequence | |
254 2 length_cds: length of the reference sequence | |
255 3-end Number of in-frame P-sites mapping on its annotated coding region for each sample | |
256 ====== ========================================================== | |
257 | |
258 ]]></help> | |
259 <expand macro="citations" /> | |
260 </tool> |