Mercurial > repos > rnateam > paralyzer
comparison paralyzer.xml @ 0:4dbe81be8b81 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/paralyzer commit 52c43a8b9958fc46ab0284638038e690f5a0da3a
author | rnateam |
---|---|
date | Tue, 06 Dec 2016 03:28:59 -0500 |
parents | |
children | f880686a9194 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:4dbe81be8b81 |
---|---|
1 <tool id="paralyzer" name="PARalyzer" version="1.5"> | |
2 | |
3 <description>A method to map interaction sites between RNA-binding proteins | |
4 and their targets</description> | |
5 | |
6 <requirements> | |
7 <!-- conda dependency --> | |
8 <requirement type="package" version="1.5">paralyzer</requirement> | |
9 <requirement type="package" version="324">ucsc-fatotwobit</requirement> | |
10 </requirements> | |
11 | |
12 <command> | |
13 <![CDATA[ | |
14 #if $refGenomeSource.genomeSource == "history": | |
15 faToTwoBit '$refGenomeSource.ownFile' ownFile.2bit | |
16 && | |
17 #end if | |
18 | |
19 ## execute paralyzer | |
20 ## note the 2nd argument is the memory parameter | |
21 ## the parameter can be override in job_conf.xml e.g. | |
22 ## <env id="_JAVA_OPTIONS">-Xmx12G</env> | |
23 PARalyzer 2G $input_ini | |
24 | |
25 ]]> | |
26 </command> | |
27 <inputs> | |
28 <param name="input_sam" type="data" | |
29 format="sam" label="Alignment" | |
30 help="The sam file containing alignment of the read data."/> | |
31 | |
32 <param name="collapse" type="boolean" truevalue="=COLLAPSED" | |
33 falsevalue="" checked="True" | |
34 label="Incorporate the read copy number" | |
35 help="If reads were collapse before alignment and you want | |
36 to incorporate the read copy number, please select Yes, | |
37 otherwise select No" /> | |
38 | |
39 <!-- Genome source. --> | |
40 <conditional name="refGenomeSource"> | |
41 <param name="genomeSource" type="select" | |
42 label="Will you select a reference genome from your | |
43 history or use a built-in genome?" | |
44 help="The version of genome against which the reads were aligned."> | |
45 <option value="2bit" selected="True"> | |
46 Use a built-in genome</option> | |
47 <option value="history"> | |
48 Use a genome from my current history</option> | |
49 </param> | |
50 <when value="2bit"> | |
51 <param name="builtin" type="select" | |
52 label="Select a reference genome"> | |
53 <options from_data_table="lastz_seqs"> | |
54 <filter type="sort_by" column="1" /> | |
55 <validator type="no_options" | |
56 message="A built-in reference genome is not available | |
57 for the build associated with the selected input file"/> | |
58 </options> | |
59 </param> | |
60 </when> | |
61 <when value="history"> | |
62 <param name="ownFile" type="data" format="fasta" | |
63 label="Select the reference genome" /> | |
64 </when> | |
65 </conditional> | |
66 | |
67 <conditional name="methods"> | |
68 <param name="choice" type="select" | |
69 label="Please select one of the approaches" | |
70 help=""> | |
71 <option value="EXTEND_BY_READ"> | |
72 EXTEND_BY_READ | |
73 </option> | |
74 <option value="HAFFNER_APPROACH"> | |
75 HAFFNER_APPROACH | |
76 </option> | |
77 <option value="ADDITIONAL_NUCLEOTIDES_BEYOND_SIGNAL"> | |
78 ADDITIONAL_NUCLEOTIDES_BEYOND_SIGNAL | |
79 </option> | |
80 </param> | |
81 <when value="EXTEND_BY_READ" /> | |
82 <when value="HAFFNER_APPROACH" /> | |
83 <when value="ADDITIONAL_NUCLEOTIDES_BEYOND_SIGNAL"> | |
84 <param name="max_num" type="integer" | |
85 value="0" label="The maximum number" | |
86 help=" | |
87 The maximum number of reads to extend beyond the positive | |
88 signal in each direction (default 0) the cluster is defined | |
89 "> | |
90 <validator type="in_range" | |
91 message="Minimum allowed value is 0" min="0"/> | |
92 </param> | |
93 </when> | |
94 </conditional> | |
95 | |
96 <conditional name="conversion"> | |
97 <param name="selection" type="select" | |
98 label="Conversion" | |
99 help="Please specify characters"> | |
100 <option value="default">Use defaults: T to C</option> | |
101 <option value="custom">Specify other characters</option> | |
102 </param> | |
103 <when value="default" /> | |
104 <when value="custom"> | |
105 <param name="character_from" type="text" | |
106 size="1" value="T" label="Character from" | |
107 help="Character representing the modified ribonucleotide | |
108 (default 'T')"> | |
109 </param> | |
110 <param name="character_to" type="text" | |
111 size="1" value="C" label="Character to" | |
112 help="Character representing what the modified ribonucleotide | |
113 is read as by rTranscriptase (default 'C')"> | |
114 </param> | |
115 </when> | |
116 </conditional> | |
117 | |
118 <!-- optional parameters --> | |
119 <conditional name="params"> | |
120 <param name="settingsType" type="select" | |
121 label="Required parameters" | |
122 help="You can use the default settings or | |
123 set custom values for any of paralyzer's parameters."> | |
124 <option value="default">Use defaults</option> | |
125 <option value="custom">Full parameter list</option> | |
126 </param> | |
127 <when value="default" /> | |
128 <!-- Full/advanced params. --> | |
129 <when value="custom"> | |
130 <param name="BANDWIDTH" type="integer" | |
131 value="3" label="BANDWIDTH" | |
132 help="Size of bandwidth for KDE calculation (default 3)"> | |
133 <validator type="in_range" | |
134 message="Minimum allowed value is 1" min="1"/> | |
135 </param> | |
136 | |
137 <param name="min_read_group" type="integer" | |
138 value="5" label="MINIMUM_READ_COUNT_PER_GROUP" | |
139 help="Minimum number of reads required to call a group (default 5)"> | |
140 <validator type="in_range" | |
141 message="Minimum allowed value is 1" min="1"/> | |
142 </param> | |
143 | |
144 <param name="min_read_cluster" type="integer" | |
145 value="2" label="MINIMUM_READ_COUNT_PER_CLUSTER" | |
146 help="Minimum number of reads required to call a cluster (default 2)"> | |
147 <validator type="in_range" | |
148 message="Minimum allowed value is 1" min="1"/> | |
149 </param> | |
150 | |
151 <param name="min_read_kde" type="integer" | |
152 value="3" label="MINIMUM_READ_COUNT_FOR_KDE" | |
153 help="Minimum read depth at a location to | |
154 make a KDE estimate (default 3)"> | |
155 <validator type="in_range" | |
156 message="Minimum allowed value is 1" min="1"/> | |
157 </param> | |
158 | |
159 <param name="min_read_cluster_inc" type="integer" | |
160 value="1" label="MINIMUM_READ_COUNT_FOR_CLUSTER_INCLUSION" | |
161 help="Minimum read depth for a location to be included | |
162 within a cluster (default 1)"> | |
163 <validator type="in_range" | |
164 message="Minimum allowed value is 1" min="1"/> | |
165 </param> | |
166 | |
167 <param name="min_cluster_size" type="integer" | |
168 value="11" label="MINIMUM_CLUSTER_SIZE" | |
169 help="Minimum length required for a cluster | |
170 to be reported (default 11)"> | |
171 <validator type="in_range" | |
172 message="Minimum allowed value is 1" min="1"/> | |
173 </param> | |
174 | |
175 <param name="min_conv_loc_cluster" type="integer" | |
176 value="2" label="MINIMUM_CONVERSION_LOCATIONS_FOR_CLUSTER" | |
177 help="Minimum number of separate locations to have a | |
178 reported conversion for a cluster to be | |
179 reported (default 2)"> | |
180 <validator type="in_range" | |
181 message="Minimum allowed value is 1" min="1"/> | |
182 </param> | |
183 | |
184 <param name="min_conv_cluster" type="integer" | |
185 value="2" label="MINIMUM_CONVERSION_COUNT_FOR_CLUSTER" | |
186 help="Minimum number of conversion events within a | |
187 region to report a cluster (default 2)"> | |
188 <validator type="in_range" | |
189 message="Minimum allowed value is 1" min="1"/> | |
190 </param> | |
191 | |
192 <param name="min_read_len" type="integer" | |
193 value="20" label="MINIMUM_READ_LENGTH" | |
194 help="Minimum length of mapped read to be included | |
195 in the analysis (default 20)"> | |
196 <validator type="in_range" | |
197 message="Minimum allowed value is 1" min="1"/> | |
198 </param> | |
199 | |
200 <param name="max_num_conv_mis" type="integer" | |
201 value="1" label="MAXIMUM_NUMBER_OF_NON_CONVERSION_MISMATCHES" | |
202 help="Maximum number of non-conversion mismatches of | |
203 a mapped read to be included in the analysis (default 1)"> | |
204 <validator type="in_range" | |
205 message="Minimum allowed value is 1" min="1"/> | |
206 </param> | |
207 </when> <!-- full --> | |
208 </conditional> | |
209 </inputs> | |
210 <configfiles> | |
211 <configfile name="input_ini"> | |
212 ## genome source | |
213 #if $refGenomeSource.genomeSource == "history": | |
214 GENOME_2BIT_FILE=ownFile.2bit | |
215 #else | |
216 GENOME_2BIT_FILE=$refGenomeSource.builtin.fields.path | |
217 #end if | |
218 | |
219 SAM_FILE=$input_sam$collapse | |
220 | |
221 #if $methods.choice == "ADDITIONAL_NUCLEOTIDES_BEYOND_SIGNAL": | |
222 ADDITIONAL_NUCLEOTIDES_BEYOND_SIGNAL=$methods.max_num | |
223 #else: | |
224 $methods.choice | |
225 #end if | |
226 | |
227 #if $conversion.selection == "custom": | |
228 CONVERSION=$conversion.character_from>$conversion.character_to | |
229 #end if | |
230 | |
231 ## required parameters | |
232 #if $params.settingsType == "custom": | |
233 BANDWIDTH=$params.BANDWIDTH | |
234 MINIMUM_READ_COUNT_PER_GROUP=$params.min_read_group | |
235 MINIMUM_READ_COUNT_PER_CLUSTER=$params.min_read_cluster | |
236 MINIMUM_READ_COUNT_FOR_KDE=$params.min_read_kde | |
237 MINIMUM_READ_COUNT_FOR_CLUSTER_INCLUSION=$params.min_read_cluster_inc | |
238 MINIMUM_CLUSTER_SIZE=$params.min_cluster_size | |
239 MINIMUM_CONVERSION_LOCATIONS_FOR_CLUSTER=$params.min_conv_loc_cluster | |
240 MINIMUM_CONVERSION_COUNT_FOR_CLUSTER=$params.min_conv_cluster | |
241 MINIMUM_READ_LENGTH=$params.min_read_len | |
242 MAXIMUM_NUMBER_OF_NON_CONVERSION_MISMATCHES=$params.max_num_conv_mis | |
243 #end if | |
244 | |
245 OUTPUT_DISTRIBUTIONS_FILE=out.distribution | |
246 OUTPUT_GROUPS_FILE=out.groups | |
247 OUTPUT_CLUSTERS_FILE=out.clusters | |
248 </configfile> | |
249 </configfiles> | |
250 <outputs> | |
251 <data name="distribution" format="txt" | |
252 from_work_dir="out.distribution" | |
253 label="${tool.name} on ${on_string}: DISTRIBUTIONS"/> | |
254 | |
255 <data name="groups" format="txt" | |
256 from_work_dir="out.groups" | |
257 label="${tool.name} on ${on_string}: GROUPS"/> | |
258 | |
259 <data name="clusters" format="txt" | |
260 from_work_dir="out.clusters" | |
261 label="${tool.name} on ${on_string}: CLUSTERS"/> | |
262 | |
263 <!-- | |
264 <data name="PARalyzer_Utilized" format="sam" | |
265 from_work_dir="out_PARalyzer_Utilized.sam" | |
266 label="${tool.name} on ${on_string}: PARalyzer_Utilized.sam"/> | |
267 --> | |
268 </outputs> | |
269 <tests> | |
270 <test> | |
271 <param name="input_sam" value="input.sam" ftype="sam" /> | |
272 <param name="genomeSource" value="history" /> | |
273 <param name="ownFile" value="input.fa" /> | |
274 <param name="choice" value="EXTEND_BY_READ" /> | |
275 <param name="selection" value="custom" /> | |
276 <param name="character_from" value="T" /> | |
277 <param name="character_to" value="C" /> | |
278 <param name="settingsType" value="custom" /> | |
279 <param name="$BANDWIDTH" value="3" /> | |
280 <param name="min_read_group" value="5" /> | |
281 <param name="min_read_cluster" value="2" /> | |
282 <param name="min_read_kde" value="3" /> | |
283 <param name="min_read_cluster_inc" value="1" /> | |
284 <param name="min_cluster_size" value="11" /> | |
285 <param name="min_conv_loc_cluster" value="2" /> | |
286 <param name="min_conv_cluster" value="2" /> | |
287 <param name="min_read_len" value="20" /> | |
288 <param name="max_num_conv_mis" value="1" /> | |
289 <output name="distribution" file="out.distribution" | |
290 ftype="txt"/> | |
291 <output name="groups" file="out.groups" | |
292 ftype="txt"/> | |
293 <output name="clusters" file="out.clusters" | |
294 ftype="txt"/> | |
295 <!-- | |
296 <output name="PARalyzer_Utilized" file="out_PARalyzer_Utilized.sam" | |
297 ftype="sam"/> | |
298 --> | |
299 </test> | |
300 </tests> | |
301 <help> | |
302 <![CDATA[ | |
303 .. class:: infomark | |
304 | |
305 **What it does** | |
306 | |
307 `paralyzer`_ is an algorithm to generate a high resolution | |
308 map of interaction sites between RNA-binding proteins and their targets. The | |
309 algorithm utilizes the deep sequencing reads generated by `PAR-CLIP`_ | |
310 (Photoactivatable-Ribonucleoside-Enhanced Crosslinking and | |
311 Immunoprecipitation) protocol.The use of photoactivatable nucleotides in the | |
312 PAR-CLIP protocol results in more efficient crosslinking between the | |
313 RNA-binding protein and its target relative to other CLIP methods; in addition | |
314 a nucleotide substitution occurs at the site of crosslinking, providing for | |
315 single-nucleotide resolution binding information. PARalyzer utilizes this | |
316 nucleotide substition in a kernel density estimate classifier to generate | |
317 the high resolution set of Protein-RNA interaction sites. | |
318 | |
319 .. _paralyzer: https://ohlerlab.mdc-berlin.de/software/PARalyzer_85/ | |
320 .. _PAR-CLIP: http://www.ncbi.nlm.nih.gov/pubmed/20371350 | |
321 | |
322 .. class:: infomark | |
323 | |
324 **Approaches** | |
325 | |
326 ``EXTEND_BY_READ``: including this line means that the cluster will be extended | |
327 beyond the signal to include a region such that it extends to | |
328 the end of any read that falls within the cluster and contained | |
329 a conversion, or until the minimum read depth | |
330 (MINIMUM_READ_COUNT_FOR_CLUSTER_INCLUSION parameter) is no longer met | |
331 | |
332 ``HAFNER_APPROACH``: identifies the location with the largest number of conversion | |
333 events and extends the cluster up to | |
334 ( parameter ADDITIONAL_NUCLEOTIDES_BEYOND_SIGNAL)nt | |
335 in each direction from that point, or until the minimum | |
336 read depth (MINIMUM_READ_COUNT_FOR_CLUSTER_INCLUSION parameter) is no longer met | |
337 | |
338 ``ADDITIONAL_NUCLEOTIDES_BEYOND_SIGNAL``: the maximum number of reads to | |
339 extend beyond the positive signal in each direction (default 0) | |
340 the cluster is defined as the region where the conversion KDE is above | |
341 the background KDE and then extended up to #integer#, or until the minimum | |
342 read depth (MINIMUM_READ_COUNT_FOR_CLUSTER_INCLUSION parameter) is no longer met | |
343 | |
344 .. class:: infomark | |
345 | |
346 **Outputs** | |
347 | |
348 DISTRIBUTIONS: contains the signal KDE, background KDE, read count & conversion for all locations within each group | |
349 * The data will be in blocks of four lines for each group | |
350 * groups on the reverse strand do not need to be reversed; the values always equal nucleotdies from GroupStart to GroupEnd, regardless of Strand | |
351 * First Column = Chromosome = chromosome on which the group resides | |
352 * Second Column = Strand = orientation in which the group resides | |
353 * Third Column = GroupStart = beginning coordinate on the chromosome of the group | |
354 * Fourth Column = GroupEnd = ending coordinate on the chromosome of the group | |
355 * Fifth Column = GroupID = unique ID for the group | |
356 * Sixth Column = Information = reports if the current line contains the Signal, Background, Conversion Percent, or ReadCount | |
357 * All nucleotides that do not have any possibility of having a conversion event are given a value of -1 | |
358 * All Subsequent Columns: the values for each nucleotide from GroupStart until GroupEnd | |
359 | |
360 | |
361 GROUPS: a comma separated file containing the information about the resulting groups | |
362 * Chromosome = chromosome on which the group resides | |
363 * Strand = orientation in which the group resides | |
364 * GroupStart = beginning coordinate on the chromosome of the group | |
365 * GroupEnd = ending coordinate on the chromosome of the group | |
366 * GroupID = unique ID for the group | |
367 * ReadCount = number of reads within the group | |
368 | |
369 CLUSTERS: a comma separated file containing the information about the resulting clusters | |
370 * Chromosome = chromosome on which the cluster resides | |
371 * Strand = orientation in which the cluster resides | |
372 * ClusterStart = beginning coordinate on the chromosome of the cluster | |
373 * ClusterEnd = ending coordinate on the chromosome of the cluster | |
374 * ClusterID = unique ID for the cluster | |
375 * ClusterSequence = sequence of the cluster | |
376 * ReadCount = number of reads that overlap the cluster by at least 1 nucleotide | |
377 * ModeLocation = coordinate of the location with the highest signal / (signal + background) value | |
378 * ModeScore = score of the highest signal / (signal + background) value | |
379 * ConversionLocationCount = number of unique location where at least 1 conversion occurred | |
380 * ConversionEventCount = total number of conversions that occurred within the cluster | |
381 * NonConversionEventCount = total number of possible conversion events that did not occur | |
382 | |
383 ]]></help> | |
384 <citations> | |
385 <citation type="doi">10.1186/gb-2011-12-8-r79</citation> | |
386 </citations> | |
387 </tool> |