comparison pipmir.xml @ 0:16209195224c draft default tip

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/pipmir commit 5937e8d61ae1203ebf2a536c669ba701b485cd1a
author rnateam
date Fri, 25 Nov 2016 09:38:28 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:16209195224c
1 <tool id="pipmir" name="PIPmiR PIPELINE" version="0.1.0">
2
3 <description>a method to identify novel plant miRNA</description>
4
5 <requirements>
6 <!-- conda dependency -->
7 <requirement type="package" version="1.1">pipmir</requirement>
8 <requirement type="package" version="324">ucsc-fatotwobit</requirement>
9 </requirements>
10
11 <command>
12 <![CDATA[
13
14 #if $refGenomeSource.genomeSource == "history":
15 faToTwoBit '$refGenomeSource.ownFile' ownFile.2bit
16 &&
17 #end if
18
19 samtools sort
20
21 ## output in BAM format
22 -O BAM
23
24 ## output file name
25 -o test_sorted.bam
26
27 '$input_bam'
28
29 &&
30 samtools index test_sorted.bam test_sorted.bai
31
32 ## the tool requires the location of RNAfold binary
33 &&
34 RNAfold_location=\$(which RNAfold)
35
36 &&
37 PIPmiR PIPELINE
38
39 -a test_sorted.bam
40
41 ## genome source
42 #if $refGenomeSource.genomeSource == "history":
43 -t ownFile.2bit
44 #else
45 -t '$refGenomeSource.builtin.fields.path'
46 #end if
47
48 -o test
49
50 ## optional parameters
51 #if $params.settingsType == "custom":
52 ## default: 50
53 -l $params.min_precursor
54
55 ## default: 500
56 -L $params.max_precursor
57
58 ## default: 2
59 -s $params.step_size
60
61 ## default: 10
62 -m $params.min_read
63 #end if
64
65 ## default: 1
66 -p \${GALAXY_SLOTS:-1}
67
68 -R \$RNAfold_location
69
70 ]]>
71 </command>
72 <inputs>
73 <param name="input_bam" type="data"
74 format="sam,bam" label="Alignment"
75 help="The bam or sam file containing alignment of the read data."/>
76
77 <!-- Genome source. -->
78 <conditional name="refGenomeSource">
79 <param name="genomeSource" type="select"
80 label="Will you select a reference genome from your
81 history or use a built-in genome?"
82 help="The version of genome against which the reads were aligned.">
83 <option value="2bit" selected="True">
84 Use a built-in genome</option>
85 <option value="history">
86 Use a genome from my current history</option>
87 </param>
88 <when value="2bit">
89 <param name="builtin" type="select"
90 label="Select a reference genome">
91 <options from_data_table="lastz_seqs">
92 <filter type="sort_by" column="1" />
93 <validator type="no_options"
94 message="A built-in reference genome is not available
95 for the build associated with the selected input file"/>
96 </options>
97 </param>
98 </when>
99 <when value="history">
100 <param name="ownFile" type="data" format="fasta"
101 label="Select the reference genome" />
102 </when>
103 </conditional>
104
105 <!-- optional parameters -->
106 <conditional name="params">
107 <param name="settingsType" type="select"
108 label="Optional parameters"
109 help="You can use the default settings or
110 set custom values for any of pipmir's parameters.">
111 <option value="default">Use defaults</option>
112 <option value="custom">Full parameter list</option>
113 </param>
114 <when value="default" />
115 <!-- Full/advanced params. -->
116 <when value="custom">
117 <param name="min_precursor" type="integer"
118 value="50" label="Minimum size"
119 help="Minimum size of a precursor sequence (Default: 50)">
120 <validator type="in_range"
121 message="Minimum allowed value is 1" min="1"/>
122 </param>
123
124 <param name="max_precursor" type="integer"
125 value="500" label="Maximum size"
126 help="Maximum size of a precursor sequence (Default: 500)">
127 <validator type="in_range"
128 message="Minimum allowed value is 1" min="1"/>
129 </param>
130
131 <param name="step_size" type="integer"
132 value="2" label="Step size"
133 help="The step size used to identifiy the precursor
134 from the minimum to the maximum possible
135 size of precursor (Default: 2)">
136 <validator type="in_range"
137 message="Minimum allowed value is 1" min="1"/>
138 </param>
139
140 <param name="min_read" type="integer"
141 value="10" label="Minimum read count"
142 help="Minimum read count for a mature to be
143 considered expressed (Default: 10)">
144 <validator type="in_range"
145 message="Minimum allowed value is 1" min="1"/>
146 </param>
147 </when> <!-- full -->
148 </conditional>
149 </inputs>
150 <outputs>
151 <data name="putativeMatures" format="bed"
152 from_work_dir="test_putativeMatures.bed"
153 label="${tool.name} on ${on_string}: putative mature miRNAs"/>
154
155 <data name="predictedPrecursors" format="txt"
156 from_work_dir="test_predictedPrecursors.txt"
157 label="${tool.name} on ${on_string}: predicted precursor"/>
158
159 <data name="predicted_miRNA" format="txt"
160 from_work_dir="test_predicted_miRNAs.txt"
161 label="${tool.name} on ${on_string}: predicted miRNAs"/>
162 </outputs>
163 <tests>
164 <test>
165 <param name="input_bam" value="Aligned.out.sam" ftype="sam" />
166 <param name="genomeSource" value="history" />
167 <param name="ownFile" value="test_seq.fa" />
168 <param name="settingsType" value="custom" />
169 <param name="step_size" value="10" />
170 <output name="putativeMatures" file="test_putativeMatures.bed"
171 ftype="bed"/>
172 <output name="predictedPrecursors" file="test_predictedPrecursors.txt"
173 ftype="txt"/>
174 <output name="predicted_miRNA" file="test_predicted_miRNAs.txt"
175 ftype="txt"/>
176 </test>
177 </tests>
178 <help>
179 <![CDATA[
180 .. class:: infomark
181
182 **What it does**
183
184 `pipmir`_ is an algorithm to identify novel plant miRNA genes from a combination
185 of deep sequencing data and genomic features.
186
187 .. _pipmir: https://ohlerlab.mdc-berlin.de/software/Pipeline_for_the_Identification_of_Plant_miRNAs_84/
188
189 .. class:: infomark
190
191 **Optional parameters**
192
193 Minimum size
194 * The MINIMUM size the precursor predictor can search.
195 * 50 is recommended (and is the default).
196
197 Maximum size
198 * The MAXIMUM size the precursor predictor can search.
199
200 * The larger you make this, the longer PIPmiR will take as folding time is exponential with increased size.
201
202 * 500 is what was used in the manuscript and will include almost all currently known Arabidopsis Thaliana miRNAs.
203
204 * 300 is a limit that will include most known miRNAs and still have a reasonable search time.
205
206 Step size
207 * The step size used to identify the precursor from the minimum to the maximum possible size of a precursor.
208
209 * Increasing this value will speed up the precursor prediction step but limit the number of possible precursor sequences.
210
211 * 2 is the default and is what was used in the manuscript, however 5 still works well enough, and is only slightly less accurate
212
213 .. class:: infomark
214
215 **Outputs**
216
217 A `bed`_ file of putative mature miRNAs
218 * 'name' column = an arbitrary name given to the putative mature
219 * 'score' column = the read count
220
221 .. _bed: https://genome.ucsc.edu/FAQ/FAQformat#format1
222
223 A text file of predicted precursors
224 * Form: Chromosome,Strand,Precursor_Start,Precursor_End,Sequence,Fold_Structure,Normalized_Minimum_Free_Energy,Mature_miRNA_Location
225
226 * Chromosome = chromosome that the precursor is on
227
228 * Strand = strand that the precursor is on
229
230 * Precursor_Start = start nucleotide (1 based) of the precursor
231
232 * Precursor_End = last nucleotide (1 based, inclusive) of the precursor
233
234 * Sequence = sequence of the precursor
235
236 * Fold_Structure = dot-bracket notation of the precursor fold centroid secondary structure (generated from RNAfold -p -d2 -noLP -noPS)
237
238 * Normalized_Minimum_Free_Energy = minimum free energy of the centroid structure divided by the length of the sequence
239
240 * Mature_miRNA_location = name & location of the mature miRNA within the precursor sequence
241
242 A text file of predicted miRNAs
243 * This file will contain the miRNAs that had a positive classifier score, meaning that PIPmiR believes that they may be novel miRNA genes.
244
245 * This file contains the genomic coordinates of the predicted precursor as well as for the mature and star sequences.
246
247 * The file also contains the precursor sequence and the dot-bracket notation of its secondary structure.
248 ]]></help>
249 <citations>
250 <citation type="doi">10.1101/gr.123547.111</citation>
251 </citations>
252 </tool>