comparison sshmm.xml @ 0:4b01f0d7b350 draft default tip

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/sshmm/ commit b578a90031fd7061fbdaef48b6a66d895ac077c3
author rnateam
date Fri, 06 Jul 2018 09:01:40 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4b01f0d7b350
1 <tool id="sshmm" name="ssHMM" version="1.0.7">
2 <description>- RNA sequence-structure motif finder</description>
3 <requirements>
4 <requirement type="package" version="1.0.7">sshmm</requirement>
5 </requirements>
6 <command detect_errors="exit_code"><![CDATA[
7 python '$__tool_directory__/fasta_report_sequence_lengths.py' '$genome_fasta_file' > chrom.sizes &&
8 mkdir prepro_out &&
9 preprocess_dataset
10 prepro_out
11 prepro_id
12 '$input_bed_file'
13 '$genome_fasta_file'
14 chrom.sizes
15 #if $str_pred_method == 'rnashapes':
16 --disable_RNAstructure
17 #elif $str_pred_method == 'rnastructures':
18 --disable_RNAshapes
19 #end if
20 #if $advanced_prepro_params.advanced_prepro_params_selector == 'ap_specify':
21 $advanced_prepro_params.disable_filtering
22 --min_score $advanced_prepro_params.min_score
23 --min_length $advanced_prepro_params.min_length
24 --max_length $advanced_prepro_params.max_length
25 --elongation $advanced_prepro_params.elongation
26 #end if
27 &&
28 mkdir results
29 &&
30 train_seqstructhmm
31 prepro_out/fasta/prepro_id/positive.fasta
32 #if $str_pred_method == 'rnashapes':
33 prepro_out/shapes/prepro_id/positive.txt
34 #elif $str_pred_method == 'rnastructures':
35 prepro_out/structures/prepro_id/positive.txt
36 #end if
37 -o results
38 #if $advanced_train_params.advanced_train_params_selector == 'ap_specify':
39 --motif_length $advanced_train_params.motif_length
40 $advanced_train_params.init_random
41 --flexibility $advanced_train_params.flexibility
42 --block_size $advanced_train_params.block_size
43 --threshold $advanced_train_params.threshold
44 $advanced_train_params.only_best_shape
45 #end if
46 &&
47 mv results/job_* results/res_out
48 ]]></command>
49 <inputs>
50 <param name="input_bed_file" type="data" format="bed"
51 label="Genomic binding sites BED file"
52 help="Genomic BED file containing protein binding sites"/>
53 <param name="genome_fasta_file" type="data" format="fasta"
54 label="Genome reference FASTA file"
55 help="Genomic FASTA file for extracting sequences defined in BED file"/>
56 <param name="str_pred_method" type="select" label="Select structure prediction method">
57 <option value="rnashapes" selected="true">RNAshapes</option>
58 <option value="rnastructures">RNAstructures</option>
59 </param>
60 <conditional name="advanced_prepro_params">
61 <param name="advanced_prepro_params_selector" type="select" label="Advanced preprocessing parameters">
62 <option value="ap_not_specify" selected="true">Do not specify</option>
63 <option value="ap_specify">Manually specify</option>
64 </param>
65 <when value="ap_not_specify" />
66 <when value="ap_specify">
67 <param name="disable_filtering" truevalue="--disable_filtering" falsevalue="" checked="False"
68 label="Skip the filtering step?" type="boolean"/>
69 <param name="min_score" type="float" value="0.0"
70 label="Filtering: minimum score for binding site" />
71 <param name="min_length" type="integer" value="8"
72 label="Filtering: minimum binding site length" />
73 <param name="max_length" type="integer" value="75"
74 label="Filtering: maximum binding site length" />
75 <param name="elongation" type="integer" value="20"
76 label="Elongation: span for up- and downstream elongation of binding sites" />
77 </when>
78 </conditional>
79 <conditional name="advanced_train_params">
80 <param name="advanced_train_params_selector" type="select" label="Advanced training parameters">
81 <option value="ap_not_specify" selected="true">Do not specify</option>
82 <option value="ap_specify">Manually specify</option>
83 </param>
84 <when value="ap_not_specify" />
85 <when value="ap_specify">
86 <param name="motif_length" type="integer" value="6"
87 label="Length of the motif that shall be found"/>
88 <param name="init_random" truevalue="--random" falsevalue="" checked="False"
89 label="Initialize the model randomly?" type="boolean"
90 help="By default model is initialized with Baum-Welch optimized sequence motif"/>
91 <param name="flexibility" type="integer" value="10"
92 label="Greediness of Gibbs sampler"
93 help="Model parameters are sampled from among the top f configurations (default: 10); set f to 0 in order to include all possible configurations"/>
94 <param name="block_size" type="integer" value="1"
95 label="Number of sequences to be held out in each iteration"/>
96 <param name="threshold" type="integer" value="10"
97 label="Termination threshold"
98 help="The iterative algorithm is terminated if the given reduction in sequence structure log-likelihood is not reached for any of the 3 last measurements (default: 10)"/>
99 <param name="only_best_shape" truevalue="--only-best-shape" falsevalue="" checked="False"
100 label="Use only best structure for each sequence?" type="boolean"
101 help="Train only using best structure for each sequence (default: use all structures)"/>
102 </when>
103 </conditional>
104 <section name="output_options" title="Output options">
105 <param name="output_prepro_files" type="boolean" value="False"
106 help="Set to output FASTA file and corresponding structures file for the extracted genomic regions"
107 label="Output FASTA file and corresponding structures file?"/>
108 <param name="output_logo_files" type="boolean" value="False"
109 help="Set to output logo_global.png, logo_hairpin.png, and logo_best_sequences.png files"
110 label="Output logo files?"/>
111 <param name="output_raw_files" type="boolean" value="False"
112 help="Set to output final_model.xml, logo_best_sequences.txt, logo_global.txt, logo_hairpin.txt files"
113 label="Output raw logo and model files?"/>
114 </section>
115 </inputs>
116 <outputs>
117 <data format="fasta" name="positive_fasta_out_file"
118 label="${tool.name} on ${on_string} FASTA sequences (fasta)"
119 from_work_dir="prepro_out/fasta/prepro_id/positive.fasta">
120 <filter>
121 output_options['output_prepro_files'] is True
122 </filter>
123 </data>
124 <data format="txt" name="positive_rnashapes_out_file"
125 label="${tool.name} on ${on_string} RNAshapes structures for sequences (txt)"
126 from_work_dir="prepro_out/shapes/prepro_id/positive.txt">
127 <filter>
128 output_options['output_prepro_files'] is True and str_pred_method == 'rnashapes'
129 </filter>
130 </data>
131 <data format="txt" name="positive_rnastructures_out_file"
132 label="${tool.name} on ${on_string} RNAstructures structures for sequences (txt)"
133 from_work_dir="prepro_out/structures/prepro_id/positive.txt">
134 <filter>
135 output_options['output_prepro_files'] is True and str_pred_method == 'rnastructures'
136 </filter>
137 </data>
138 <data format="png" name="final_graph_png"
139 label="${tool.name} on ${on_string} final graph (png)"
140 from_work_dir="results/res_out/final_graph.png"/>
141 <data format="png" name="logo_best_sequences_png"
142 label="${tool.name} on ${on_string} logo best sequences (png)"
143 from_work_dir="results/res_out/logo_best_sequences.png">
144 <filter>
145 output_options['output_logo_files'] is True
146 </filter>
147 </data>
148 <data format="png" name="logo_global_png"
149 label="${tool.name} on ${on_string} logo global (png)"
150 from_work_dir="results/res_out/logo_global.png">
151 <filter>
152 output_options['output_logo_files'] is True
153 </filter>
154 </data>
155 <data format="png" name="logo_hairpin_png"
156 label="${tool.name} on ${on_string} logo hairpin (png)"
157 from_work_dir="results/res_out/logo_hairpin.png">
158 <filter>
159 output_options['output_logo_files'] is True
160 </filter>
161 </data>
162 <data format="txt" name="logo_best_sequences_txt"
163 label="${tool.name} on ${on_string} logo best sequences (txt)"
164 from_work_dir="results/res_out/logo_best_sequences.txt">
165 <filter>
166 output_options['output_raw_files'] is True
167 </filter>
168 </data>
169 <data format="txt" name="logo_global_txt"
170 label="${tool.name} on ${on_string} logo global (txt)"
171 from_work_dir="results/res_out/logo_global.txt">
172 <filter>
173 output_options['output_raw_files'] is True
174 </filter>
175 </data>
176 <data format="txt" name="logo_hairpin_txt"
177 label="${tool.name} on ${on_string} logo hairpin (txt)"
178 from_work_dir="results/res_out/logo_hairpin.txt">
179 <filter>
180 output_options['output_raw_files'] is True
181 </filter>
182 </data>
183
184 <data format="xml" name="final_model_xml"
185 label="${tool.name} on ${on_string} final model (xml)"
186 from_work_dir="results/res_out/final_model.xml">
187 <filter>
188 output_options['output_raw_files'] is True
189 </filter>
190 </data>
191 </outputs>
192 <tests>
193 <test expect_num_outputs="10">
194 <param name="input_bed_file" value="PUM2_sites_hsa_chrM.bed" ftype="bed"/>
195 <param name="genome_fasta_file" value="hsa_chrM.fa" ftype="fasta"/>
196 <param name="str_pred_method" value="rnastructures"/>
197 <param name="advanced_prepro_params_selector" value="ap_specify"/>
198 <param name="elongation" value="20"/>
199 <param name="output_prepro_files" value="True"/>
200 <param name="output_logo_files" value="True"/>
201 <param name="output_raw_files" value="True"/>
202 <output name="positive_fasta_out_file" file="hsa_chrM_positive.fasta" ftype="fasta"/>
203 <output name="positive_rnastructures_out_file" file="hsa_chrM_structures_positive.txt" ftype="txt"/>
204 <output name="final_graph_png" file="test_structure_final_graph.png" ftype="png" compare="sim_size" delta="40000"/>
205 <output name="logo_global_png" file="test_structure_logo_global.png" ftype="png" compare="sim_size" delta="5000"/>
206 <output name="logo_hairpin_png" file="test_structure_logo_hairpin.png" ftype="png" compare="sim_size" delta="5000"/>
207 <output name="logo_best_sequences_png" file="test_structure_logo_best_sequences.png" ftype="png" compare="sim_size" delta="5000"/>
208 <output name="logo_global_txt" file="test_structure_logo_global.txt" ftype="txt" compare="sim_size"/>
209 <output name="logo_hairpin_txt" file="test_structure_logo_hairpin.txt" ftype="txt" compare="sim_size"/>
210 <output name="logo_best_sequences_txt" file="test_structure_logo_best_sequences.txt" ftype="txt" compare="sim_size"/>
211 <output name="final_model_xml" file="test_structure_final_model.xml" ftype="xml" compare="sim_size"/>
212 </test>
213 <test expect_num_outputs="10">
214 <param name="input_bed_file" value="PUM2_sites_hsa_chrM.bed" ftype="bed"/>
215 <param name="genome_fasta_file" value="hsa_chrM.fa" ftype="fasta"/>
216 <param name="str_pred_method" value="rnashapes"/>
217 <param name="output_prepro_files" value="True"/>
218 <param name="output_logo_files" value="True"/>
219 <param name="output_raw_files" value="True"/>
220 <output name="positive_fasta_out_file" file="hsa_chrM_positive.fasta" ftype="fasta"/>
221 <output name="positive_rnashapes_out_file" file="hsa_chrM_shapes_positive.txt" ftype="txt"/>
222 <output name="final_graph_png" file="test_shapes_final_graph.png" ftype="png" compare="sim_size" delta="40000"/>
223 <output name="logo_global_png" file="test_shapes_logo_global.png" ftype="png" compare="sim_size" delta="5000"/>
224 <output name="logo_hairpin_png" file="test_shapes_logo_hairpin.png" ftype="png" compare="sim_size" delta="5000"/>
225 <output name="logo_best_sequences_png" file="test_shapes_logo_best_sequences.png" ftype="png" compare="sim_size" delta="5000"/>
226 <output name="logo_global_txt" file="test_shapes_logo_global.txt" ftype="txt" compare="sim_size"/>
227 <output name="logo_hairpin_txt" file="test_shapes_logo_hairpin.txt" ftype="txt" compare="sim_size"/>
228 <output name="logo_best_sequences_txt" file="test_shapes_logo_best_sequences.txt" ftype="txt" compare="sim_size"/>
229 <output name="final_model_xml" file="test_shapes_final_model.xml" ftype="xml" compare="sim_size"/>
230 </test>
231 </tests>
232 <help><![CDATA[
233
234 ssHMM is an RNA motif finder that recovers sequence-structure motifs from RNA-binding protein data, such as CLIP-Seq data. The tool input consists of a BED file with genomic binding regions and the corresponding genome reference FASTA file. For structure prediction, the user can select between RNAshapes and RNAstructures. Advanced parameters can be set for both the preprocessing and the training stage.
235
236 The output consists of a graph showing the found sequence motifs for the 5 structural contexts multiloop, hairpin, stem, internal loop, and exterior loop (output in .png format). The height of the nucleotides corresponds to their emission probabilities, while the thickness of the arrows corresponds to their transition probabilities. Additional files (intermediate, logo, model, raw) can be selected for output in the "Output options" section.
237
238 For more details have a look at the online documentation:
239
240 http://sshmm.readthedocs.io/en/latest/index.html
241
242 ]]></help>
243 <citations>
244 <citation type="doi">10.1093/nar/gkx756</citation>
245 </citations>
246 </tool>