comparison paralyzer.xml @ 0:4dbe81be8b81 draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/paralyzer commit 52c43a8b9958fc46ab0284638038e690f5a0da3a
author rnateam
date Tue, 06 Dec 2016 03:28:59 -0500
parents
children f880686a9194
comparison
equal deleted inserted replaced
-1:000000000000 0:4dbe81be8b81
1 <tool id="paralyzer" name="PARalyzer" version="1.5">
2
3 <description>A method to map interaction sites between RNA-binding proteins
4 and their targets</description>
5
6 <requirements>
7 <!-- conda dependency -->
8 <requirement type="package" version="1.5">paralyzer</requirement>
9 <requirement type="package" version="324">ucsc-fatotwobit</requirement>
10 </requirements>
11
12 <command>
13 <![CDATA[
14 #if $refGenomeSource.genomeSource == "history":
15 faToTwoBit '$refGenomeSource.ownFile' ownFile.2bit
16 &&
17 #end if
18
19 ## execute paralyzer
20 ## note the 2nd argument is the memory parameter
21 ## the parameter can be override in job_conf.xml e.g.
22 ## <env id="_JAVA_OPTIONS">-Xmx12G</env>
23 PARalyzer 2G $input_ini
24
25 ]]>
26 </command>
27 <inputs>
28 <param name="input_sam" type="data"
29 format="sam" label="Alignment"
30 help="The sam file containing alignment of the read data."/>
31
32 <param name="collapse" type="boolean" truevalue="=COLLAPSED"
33 falsevalue="" checked="True"
34 label="Incorporate the read copy number"
35 help="If reads were collapse before alignment and you want
36 to incorporate the read copy number, please select Yes,
37 otherwise select No" />
38
39 <!-- Genome source. -->
40 <conditional name="refGenomeSource">
41 <param name="genomeSource" type="select"
42 label="Will you select a reference genome from your
43 history or use a built-in genome?"
44 help="The version of genome against which the reads were aligned.">
45 <option value="2bit" selected="True">
46 Use a built-in genome</option>
47 <option value="history">
48 Use a genome from my current history</option>
49 </param>
50 <when value="2bit">
51 <param name="builtin" type="select"
52 label="Select a reference genome">
53 <options from_data_table="lastz_seqs">
54 <filter type="sort_by" column="1" />
55 <validator type="no_options"
56 message="A built-in reference genome is not available
57 for the build associated with the selected input file"/>
58 </options>
59 </param>
60 </when>
61 <when value="history">
62 <param name="ownFile" type="data" format="fasta"
63 label="Select the reference genome" />
64 </when>
65 </conditional>
66
67 <conditional name="methods">
68 <param name="choice" type="select"
69 label="Please select one of the approaches"
70 help="">
71 <option value="EXTEND_BY_READ">
72 EXTEND_BY_READ
73 </option>
74 <option value="HAFFNER_APPROACH">
75 HAFFNER_APPROACH
76 </option>
77 <option value="ADDITIONAL_NUCLEOTIDES_BEYOND_SIGNAL">
78 ADDITIONAL_NUCLEOTIDES_BEYOND_SIGNAL
79 </option>
80 </param>
81 <when value="EXTEND_BY_READ" />
82 <when value="HAFFNER_APPROACH" />
83 <when value="ADDITIONAL_NUCLEOTIDES_BEYOND_SIGNAL">
84 <param name="max_num" type="integer"
85 value="0" label="The maximum number"
86 help="
87 The maximum number of reads to extend beyond the positive
88 signal in each direction (default 0) the cluster is defined
89 ">
90 <validator type="in_range"
91 message="Minimum allowed value is 0" min="0"/>
92 </param>
93 </when>
94 </conditional>
95
96 <conditional name="conversion">
97 <param name="selection" type="select"
98 label="Conversion"
99 help="Please specify characters">
100 <option value="default">Use defaults: T to C</option>
101 <option value="custom">Specify other characters</option>
102 </param>
103 <when value="default" />
104 <when value="custom">
105 <param name="character_from" type="text"
106 size="1" value="T" label="Character from"
107 help="Character representing the modified ribonucleotide
108 (default 'T')">
109 </param>
110 <param name="character_to" type="text"
111 size="1" value="C" label="Character to"
112 help="Character representing what the modified ribonucleotide
113 is read as by rTranscriptase (default 'C')">
114 </param>
115 </when>
116 </conditional>
117
118 <!-- optional parameters -->
119 <conditional name="params">
120 <param name="settingsType" type="select"
121 label="Required parameters"
122 help="You can use the default settings or
123 set custom values for any of paralyzer's parameters.">
124 <option value="default">Use defaults</option>
125 <option value="custom">Full parameter list</option>
126 </param>
127 <when value="default" />
128 <!-- Full/advanced params. -->
129 <when value="custom">
130 <param name="BANDWIDTH" type="integer"
131 value="3" label="BANDWIDTH"
132 help="Size of bandwidth for KDE calculation (default 3)">
133 <validator type="in_range"
134 message="Minimum allowed value is 1" min="1"/>
135 </param>
136
137 <param name="min_read_group" type="integer"
138 value="5" label="MINIMUM_READ_COUNT_PER_GROUP"
139 help="Minimum number of reads required to call a group (default 5)">
140 <validator type="in_range"
141 message="Minimum allowed value is 1" min="1"/>
142 </param>
143
144 <param name="min_read_cluster" type="integer"
145 value="2" label="MINIMUM_READ_COUNT_PER_CLUSTER"
146 help="Minimum number of reads required to call a cluster (default 2)">
147 <validator type="in_range"
148 message="Minimum allowed value is 1" min="1"/>
149 </param>
150
151 <param name="min_read_kde" type="integer"
152 value="3" label="MINIMUM_READ_COUNT_FOR_KDE"
153 help="Minimum read depth at a location to
154 make a KDE estimate (default 3)">
155 <validator type="in_range"
156 message="Minimum allowed value is 1" min="1"/>
157 </param>
158
159 <param name="min_read_cluster_inc" type="integer"
160 value="1" label="MINIMUM_READ_COUNT_FOR_CLUSTER_INCLUSION"
161 help="Minimum read depth for a location to be included
162 within a cluster (default 1)">
163 <validator type="in_range"
164 message="Minimum allowed value is 1" min="1"/>
165 </param>
166
167 <param name="min_cluster_size" type="integer"
168 value="11" label="MINIMUM_CLUSTER_SIZE"
169 help="Minimum length required for a cluster
170 to be reported (default 11)">
171 <validator type="in_range"
172 message="Minimum allowed value is 1" min="1"/>
173 </param>
174
175 <param name="min_conv_loc_cluster" type="integer"
176 value="2" label="MINIMUM_CONVERSION_LOCATIONS_FOR_CLUSTER"
177 help="Minimum number of separate locations to have a
178 reported conversion for a cluster to be
179 reported (default 2)">
180 <validator type="in_range"
181 message="Minimum allowed value is 1" min="1"/>
182 </param>
183
184 <param name="min_conv_cluster" type="integer"
185 value="2" label="MINIMUM_CONVERSION_COUNT_FOR_CLUSTER"
186 help="Minimum number of conversion events within a
187 region to report a cluster (default 2)">
188 <validator type="in_range"
189 message="Minimum allowed value is 1" min="1"/>
190 </param>
191
192 <param name="min_read_len" type="integer"
193 value="20" label="MINIMUM_READ_LENGTH"
194 help="Minimum length of mapped read to be included
195 in the analysis (default 20)">
196 <validator type="in_range"
197 message="Minimum allowed value is 1" min="1"/>
198 </param>
199
200 <param name="max_num_conv_mis" type="integer"
201 value="1" label="MAXIMUM_NUMBER_OF_NON_CONVERSION_MISMATCHES"
202 help="Maximum number of non-conversion mismatches of
203 a mapped read to be included in the analysis (default 1)">
204 <validator type="in_range"
205 message="Minimum allowed value is 1" min="1"/>
206 </param>
207 </when> <!-- full -->
208 </conditional>
209 </inputs>
210 <configfiles>
211 <configfile name="input_ini">
212 ## genome source
213 #if $refGenomeSource.genomeSource == "history":
214 GENOME_2BIT_FILE=ownFile.2bit
215 #else
216 GENOME_2BIT_FILE=$refGenomeSource.builtin.fields.path
217 #end if
218
219 SAM_FILE=$input_sam$collapse
220
221 #if $methods.choice == "ADDITIONAL_NUCLEOTIDES_BEYOND_SIGNAL":
222 ADDITIONAL_NUCLEOTIDES_BEYOND_SIGNAL=$methods.max_num
223 #else:
224 $methods.choice
225 #end if
226
227 #if $conversion.selection == "custom":
228 CONVERSION=$conversion.character_from>$conversion.character_to
229 #end if
230
231 ## required parameters
232 #if $params.settingsType == "custom":
233 BANDWIDTH=$params.BANDWIDTH
234 MINIMUM_READ_COUNT_PER_GROUP=$params.min_read_group
235 MINIMUM_READ_COUNT_PER_CLUSTER=$params.min_read_cluster
236 MINIMUM_READ_COUNT_FOR_KDE=$params.min_read_kde
237 MINIMUM_READ_COUNT_FOR_CLUSTER_INCLUSION=$params.min_read_cluster_inc
238 MINIMUM_CLUSTER_SIZE=$params.min_cluster_size
239 MINIMUM_CONVERSION_LOCATIONS_FOR_CLUSTER=$params.min_conv_loc_cluster
240 MINIMUM_CONVERSION_COUNT_FOR_CLUSTER=$params.min_conv_cluster
241 MINIMUM_READ_LENGTH=$params.min_read_len
242 MAXIMUM_NUMBER_OF_NON_CONVERSION_MISMATCHES=$params.max_num_conv_mis
243 #end if
244
245 OUTPUT_DISTRIBUTIONS_FILE=out.distribution
246 OUTPUT_GROUPS_FILE=out.groups
247 OUTPUT_CLUSTERS_FILE=out.clusters
248 </configfile>
249 </configfiles>
250 <outputs>
251 <data name="distribution" format="txt"
252 from_work_dir="out.distribution"
253 label="${tool.name} on ${on_string}: DISTRIBUTIONS"/>
254
255 <data name="groups" format="txt"
256 from_work_dir="out.groups"
257 label="${tool.name} on ${on_string}: GROUPS"/>
258
259 <data name="clusters" format="txt"
260 from_work_dir="out.clusters"
261 label="${tool.name} on ${on_string}: CLUSTERS"/>
262
263 <!--
264 <data name="PARalyzer_Utilized" format="sam"
265 from_work_dir="out_PARalyzer_Utilized.sam"
266 label="${tool.name} on ${on_string}: PARalyzer_Utilized.sam"/>
267 -->
268 </outputs>
269 <tests>
270 <test>
271 <param name="input_sam" value="input.sam" ftype="sam" />
272 <param name="genomeSource" value="history" />
273 <param name="ownFile" value="input.fa" />
274 <param name="choice" value="EXTEND_BY_READ" />
275 <param name="selection" value="custom" />
276 <param name="character_from" value="T" />
277 <param name="character_to" value="C" />
278 <param name="settingsType" value="custom" />
279 <param name="$BANDWIDTH" value="3" />
280 <param name="min_read_group" value="5" />
281 <param name="min_read_cluster" value="2" />
282 <param name="min_read_kde" value="3" />
283 <param name="min_read_cluster_inc" value="1" />
284 <param name="min_cluster_size" value="11" />
285 <param name="min_conv_loc_cluster" value="2" />
286 <param name="min_conv_cluster" value="2" />
287 <param name="min_read_len" value="20" />
288 <param name="max_num_conv_mis" value="1" />
289 <output name="distribution" file="out.distribution"
290 ftype="txt"/>
291 <output name="groups" file="out.groups"
292 ftype="txt"/>
293 <output name="clusters" file="out.clusters"
294 ftype="txt"/>
295 <!--
296 <output name="PARalyzer_Utilized" file="out_PARalyzer_Utilized.sam"
297 ftype="sam"/>
298 -->
299 </test>
300 </tests>
301 <help>
302 <![CDATA[
303 .. class:: infomark
304
305 **What it does**
306
307 `paralyzer`_ is an algorithm to generate a high resolution
308 map of interaction sites between RNA-binding proteins and their targets. The
309 algorithm utilizes the deep sequencing reads generated by `PAR-CLIP`_
310 (Photoactivatable-Ribonucleoside-Enhanced Crosslinking and
311 Immunoprecipitation) protocol.The use of photoactivatable nucleotides in the
312 PAR-CLIP protocol results in more efficient crosslinking between the
313 RNA-binding protein and its target relative to other CLIP methods; in addition
314 a nucleotide substitution occurs at the site of crosslinking, providing for
315 single-nucleotide resolution binding information. PARalyzer utilizes this
316 nucleotide substition in a kernel density estimate classifier to generate
317 the high resolution set of Protein-RNA interaction sites.
318
319 .. _paralyzer: https://ohlerlab.mdc-berlin.de/software/PARalyzer_85/
320 .. _PAR-CLIP: http://www.ncbi.nlm.nih.gov/pubmed/20371350
321
322 .. class:: infomark
323
324 **Approaches**
325
326 ``EXTEND_BY_READ``: including this line means that the cluster will be extended
327 beyond the signal to include a region such that it extends to
328 the end of any read that falls within the cluster and contained
329 a conversion, or until the minimum read depth
330 (MINIMUM_READ_COUNT_FOR_CLUSTER_INCLUSION parameter) is no longer met
331
332 ``HAFNER_APPROACH``: identifies the location with the largest number of conversion
333 events and extends the cluster up to
334 ( parameter ADDITIONAL_NUCLEOTIDES_BEYOND_SIGNAL)nt
335 in each direction from that point, or until the minimum
336 read depth (MINIMUM_READ_COUNT_FOR_CLUSTER_INCLUSION parameter) is no longer met
337
338 ``ADDITIONAL_NUCLEOTIDES_BEYOND_SIGNAL``: the maximum number of reads to
339 extend beyond the positive signal in each direction (default 0)
340 the cluster is defined as the region where the conversion KDE is above
341 the background KDE and then extended up to #integer#, or until the minimum
342 read depth (MINIMUM_READ_COUNT_FOR_CLUSTER_INCLUSION parameter) is no longer met
343
344 .. class:: infomark
345
346 **Outputs**
347
348 DISTRIBUTIONS: contains the signal KDE, background KDE, read count & conversion for all locations within each group
349 * The data will be in blocks of four lines for each group
350 * groups on the reverse strand do not need to be reversed; the values always equal nucleotdies from GroupStart to GroupEnd, regardless of Strand
351 * First Column = Chromosome = chromosome on which the group resides
352 * Second Column = Strand = orientation in which the group resides
353 * Third Column = GroupStart = beginning coordinate on the chromosome of the group
354 * Fourth Column = GroupEnd = ending coordinate on the chromosome of the group
355 * Fifth Column = GroupID = unique ID for the group
356 * Sixth Column = Information = reports if the current line contains the Signal, Background, Conversion Percent, or ReadCount
357 * All nucleotides that do not have any possibility of having a conversion event are given a value of -1
358 * All Subsequent Columns: the values for each nucleotide from GroupStart until GroupEnd
359
360
361 GROUPS: a comma separated file containing the information about the resulting groups
362 * Chromosome = chromosome on which the group resides
363 * Strand = orientation in which the group resides
364 * GroupStart = beginning coordinate on the chromosome of the group
365 * GroupEnd = ending coordinate on the chromosome of the group
366 * GroupID = unique ID for the group
367 * ReadCount = number of reads within the group
368
369 CLUSTERS: a comma separated file containing the information about the resulting clusters
370 * Chromosome = chromosome on which the cluster resides
371 * Strand = orientation in which the cluster resides
372 * ClusterStart = beginning coordinate on the chromosome of the cluster
373 * ClusterEnd = ending coordinate on the chromosome of the cluster
374 * ClusterID = unique ID for the cluster
375 * ClusterSequence = sequence of the cluster
376 * ReadCount = number of reads that overlap the cluster by at least 1 nucleotide
377 * ModeLocation = coordinate of the location with the highest signal / (signal + background) value
378 * ModeScore = score of the highest signal / (signal + background) value
379 * ConversionLocationCount = number of unique location where at least 1 conversion occurred
380 * ConversionEventCount = total number of conversions that occurred within the cluster
381 * NonConversionEventCount = total number of possible conversion events that did not occur
382
383 ]]></help>
384 <citations>
385 <citation type="doi">10.1186/gb-2011-12-8-r79</citation>
386 </citations>
387 </tool>