Mercurial > repos > rnateam > sshmm
comparison sshmm.xml @ 0:4b01f0d7b350 draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/sshmm/ commit b578a90031fd7061fbdaef48b6a66d895ac077c3
author | rnateam |
---|---|
date | Fri, 06 Jul 2018 09:01:40 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:4b01f0d7b350 |
---|---|
1 <tool id="sshmm" name="ssHMM" version="1.0.7"> | |
2 <description>- RNA sequence-structure motif finder</description> | |
3 <requirements> | |
4 <requirement type="package" version="1.0.7">sshmm</requirement> | |
5 </requirements> | |
6 <command detect_errors="exit_code"><![CDATA[ | |
7 python '$__tool_directory__/fasta_report_sequence_lengths.py' '$genome_fasta_file' > chrom.sizes && | |
8 mkdir prepro_out && | |
9 preprocess_dataset | |
10 prepro_out | |
11 prepro_id | |
12 '$input_bed_file' | |
13 '$genome_fasta_file' | |
14 chrom.sizes | |
15 #if $str_pred_method == 'rnashapes': | |
16 --disable_RNAstructure | |
17 #elif $str_pred_method == 'rnastructures': | |
18 --disable_RNAshapes | |
19 #end if | |
20 #if $advanced_prepro_params.advanced_prepro_params_selector == 'ap_specify': | |
21 $advanced_prepro_params.disable_filtering | |
22 --min_score $advanced_prepro_params.min_score | |
23 --min_length $advanced_prepro_params.min_length | |
24 --max_length $advanced_prepro_params.max_length | |
25 --elongation $advanced_prepro_params.elongation | |
26 #end if | |
27 && | |
28 mkdir results | |
29 && | |
30 train_seqstructhmm | |
31 prepro_out/fasta/prepro_id/positive.fasta | |
32 #if $str_pred_method == 'rnashapes': | |
33 prepro_out/shapes/prepro_id/positive.txt | |
34 #elif $str_pred_method == 'rnastructures': | |
35 prepro_out/structures/prepro_id/positive.txt | |
36 #end if | |
37 -o results | |
38 #if $advanced_train_params.advanced_train_params_selector == 'ap_specify': | |
39 --motif_length $advanced_train_params.motif_length | |
40 $advanced_train_params.init_random | |
41 --flexibility $advanced_train_params.flexibility | |
42 --block_size $advanced_train_params.block_size | |
43 --threshold $advanced_train_params.threshold | |
44 $advanced_train_params.only_best_shape | |
45 #end if | |
46 && | |
47 mv results/job_* results/res_out | |
48 ]]></command> | |
49 <inputs> | |
50 <param name="input_bed_file" type="data" format="bed" | |
51 label="Genomic binding sites BED file" | |
52 help="Genomic BED file containing protein binding sites"/> | |
53 <param name="genome_fasta_file" type="data" format="fasta" | |
54 label="Genome reference FASTA file" | |
55 help="Genomic FASTA file for extracting sequences defined in BED file"/> | |
56 <param name="str_pred_method" type="select" label="Select structure prediction method"> | |
57 <option value="rnashapes" selected="true">RNAshapes</option> | |
58 <option value="rnastructures">RNAstructures</option> | |
59 </param> | |
60 <conditional name="advanced_prepro_params"> | |
61 <param name="advanced_prepro_params_selector" type="select" label="Advanced preprocessing parameters"> | |
62 <option value="ap_not_specify" selected="true">Do not specify</option> | |
63 <option value="ap_specify">Manually specify</option> | |
64 </param> | |
65 <when value="ap_not_specify" /> | |
66 <when value="ap_specify"> | |
67 <param name="disable_filtering" truevalue="--disable_filtering" falsevalue="" checked="False" | |
68 label="Skip the filtering step?" type="boolean"/> | |
69 <param name="min_score" type="float" value="0.0" | |
70 label="Filtering: minimum score for binding site" /> | |
71 <param name="min_length" type="integer" value="8" | |
72 label="Filtering: minimum binding site length" /> | |
73 <param name="max_length" type="integer" value="75" | |
74 label="Filtering: maximum binding site length" /> | |
75 <param name="elongation" type="integer" value="20" | |
76 label="Elongation: span for up- and downstream elongation of binding sites" /> | |
77 </when> | |
78 </conditional> | |
79 <conditional name="advanced_train_params"> | |
80 <param name="advanced_train_params_selector" type="select" label="Advanced training parameters"> | |
81 <option value="ap_not_specify" selected="true">Do not specify</option> | |
82 <option value="ap_specify">Manually specify</option> | |
83 </param> | |
84 <when value="ap_not_specify" /> | |
85 <when value="ap_specify"> | |
86 <param name="motif_length" type="integer" value="6" | |
87 label="Length of the motif that shall be found"/> | |
88 <param name="init_random" truevalue="--random" falsevalue="" checked="False" | |
89 label="Initialize the model randomly?" type="boolean" | |
90 help="By default model is initialized with Baum-Welch optimized sequence motif"/> | |
91 <param name="flexibility" type="integer" value="10" | |
92 label="Greediness of Gibbs sampler" | |
93 help="Model parameters are sampled from among the top f configurations (default: 10); set f to 0 in order to include all possible configurations"/> | |
94 <param name="block_size" type="integer" value="1" | |
95 label="Number of sequences to be held out in each iteration"/> | |
96 <param name="threshold" type="integer" value="10" | |
97 label="Termination threshold" | |
98 help="The iterative algorithm is terminated if the given reduction in sequence structure log-likelihood is not reached for any of the 3 last measurements (default: 10)"/> | |
99 <param name="only_best_shape" truevalue="--only-best-shape" falsevalue="" checked="False" | |
100 label="Use only best structure for each sequence?" type="boolean" | |
101 help="Train only using best structure for each sequence (default: use all structures)"/> | |
102 </when> | |
103 </conditional> | |
104 <section name="output_options" title="Output options"> | |
105 <param name="output_prepro_files" type="boolean" value="False" | |
106 help="Set to output FASTA file and corresponding structures file for the extracted genomic regions" | |
107 label="Output FASTA file and corresponding structures file?"/> | |
108 <param name="output_logo_files" type="boolean" value="False" | |
109 help="Set to output logo_global.png, logo_hairpin.png, and logo_best_sequences.png files" | |
110 label="Output logo files?"/> | |
111 <param name="output_raw_files" type="boolean" value="False" | |
112 help="Set to output final_model.xml, logo_best_sequences.txt, logo_global.txt, logo_hairpin.txt files" | |
113 label="Output raw logo and model files?"/> | |
114 </section> | |
115 </inputs> | |
116 <outputs> | |
117 <data format="fasta" name="positive_fasta_out_file" | |
118 label="${tool.name} on ${on_string} FASTA sequences (fasta)" | |
119 from_work_dir="prepro_out/fasta/prepro_id/positive.fasta"> | |
120 <filter> | |
121 output_options['output_prepro_files'] is True | |
122 </filter> | |
123 </data> | |
124 <data format="txt" name="positive_rnashapes_out_file" | |
125 label="${tool.name} on ${on_string} RNAshapes structures for sequences (txt)" | |
126 from_work_dir="prepro_out/shapes/prepro_id/positive.txt"> | |
127 <filter> | |
128 output_options['output_prepro_files'] is True and str_pred_method == 'rnashapes' | |
129 </filter> | |
130 </data> | |
131 <data format="txt" name="positive_rnastructures_out_file" | |
132 label="${tool.name} on ${on_string} RNAstructures structures for sequences (txt)" | |
133 from_work_dir="prepro_out/structures/prepro_id/positive.txt"> | |
134 <filter> | |
135 output_options['output_prepro_files'] is True and str_pred_method == 'rnastructures' | |
136 </filter> | |
137 </data> | |
138 <data format="png" name="final_graph_png" | |
139 label="${tool.name} on ${on_string} final graph (png)" | |
140 from_work_dir="results/res_out/final_graph.png"/> | |
141 <data format="png" name="logo_best_sequences_png" | |
142 label="${tool.name} on ${on_string} logo best sequences (png)" | |
143 from_work_dir="results/res_out/logo_best_sequences.png"> | |
144 <filter> | |
145 output_options['output_logo_files'] is True | |
146 </filter> | |
147 </data> | |
148 <data format="png" name="logo_global_png" | |
149 label="${tool.name} on ${on_string} logo global (png)" | |
150 from_work_dir="results/res_out/logo_global.png"> | |
151 <filter> | |
152 output_options['output_logo_files'] is True | |
153 </filter> | |
154 </data> | |
155 <data format="png" name="logo_hairpin_png" | |
156 label="${tool.name} on ${on_string} logo hairpin (png)" | |
157 from_work_dir="results/res_out/logo_hairpin.png"> | |
158 <filter> | |
159 output_options['output_logo_files'] is True | |
160 </filter> | |
161 </data> | |
162 <data format="txt" name="logo_best_sequences_txt" | |
163 label="${tool.name} on ${on_string} logo best sequences (txt)" | |
164 from_work_dir="results/res_out/logo_best_sequences.txt"> | |
165 <filter> | |
166 output_options['output_raw_files'] is True | |
167 </filter> | |
168 </data> | |
169 <data format="txt" name="logo_global_txt" | |
170 label="${tool.name} on ${on_string} logo global (txt)" | |
171 from_work_dir="results/res_out/logo_global.txt"> | |
172 <filter> | |
173 output_options['output_raw_files'] is True | |
174 </filter> | |
175 </data> | |
176 <data format="txt" name="logo_hairpin_txt" | |
177 label="${tool.name} on ${on_string} logo hairpin (txt)" | |
178 from_work_dir="results/res_out/logo_hairpin.txt"> | |
179 <filter> | |
180 output_options['output_raw_files'] is True | |
181 </filter> | |
182 </data> | |
183 | |
184 <data format="xml" name="final_model_xml" | |
185 label="${tool.name} on ${on_string} final model (xml)" | |
186 from_work_dir="results/res_out/final_model.xml"> | |
187 <filter> | |
188 output_options['output_raw_files'] is True | |
189 </filter> | |
190 </data> | |
191 </outputs> | |
192 <tests> | |
193 <test expect_num_outputs="10"> | |
194 <param name="input_bed_file" value="PUM2_sites_hsa_chrM.bed" ftype="bed"/> | |
195 <param name="genome_fasta_file" value="hsa_chrM.fa" ftype="fasta"/> | |
196 <param name="str_pred_method" value="rnastructures"/> | |
197 <param name="advanced_prepro_params_selector" value="ap_specify"/> | |
198 <param name="elongation" value="20"/> | |
199 <param name="output_prepro_files" value="True"/> | |
200 <param name="output_logo_files" value="True"/> | |
201 <param name="output_raw_files" value="True"/> | |
202 <output name="positive_fasta_out_file" file="hsa_chrM_positive.fasta" ftype="fasta"/> | |
203 <output name="positive_rnastructures_out_file" file="hsa_chrM_structures_positive.txt" ftype="txt"/> | |
204 <output name="final_graph_png" file="test_structure_final_graph.png" ftype="png" compare="sim_size" delta="40000"/> | |
205 <output name="logo_global_png" file="test_structure_logo_global.png" ftype="png" compare="sim_size" delta="5000"/> | |
206 <output name="logo_hairpin_png" file="test_structure_logo_hairpin.png" ftype="png" compare="sim_size" delta="5000"/> | |
207 <output name="logo_best_sequences_png" file="test_structure_logo_best_sequences.png" ftype="png" compare="sim_size" delta="5000"/> | |
208 <output name="logo_global_txt" file="test_structure_logo_global.txt" ftype="txt" compare="sim_size"/> | |
209 <output name="logo_hairpin_txt" file="test_structure_logo_hairpin.txt" ftype="txt" compare="sim_size"/> | |
210 <output name="logo_best_sequences_txt" file="test_structure_logo_best_sequences.txt" ftype="txt" compare="sim_size"/> | |
211 <output name="final_model_xml" file="test_structure_final_model.xml" ftype="xml" compare="sim_size"/> | |
212 </test> | |
213 <test expect_num_outputs="10"> | |
214 <param name="input_bed_file" value="PUM2_sites_hsa_chrM.bed" ftype="bed"/> | |
215 <param name="genome_fasta_file" value="hsa_chrM.fa" ftype="fasta"/> | |
216 <param name="str_pred_method" value="rnashapes"/> | |
217 <param name="output_prepro_files" value="True"/> | |
218 <param name="output_logo_files" value="True"/> | |
219 <param name="output_raw_files" value="True"/> | |
220 <output name="positive_fasta_out_file" file="hsa_chrM_positive.fasta" ftype="fasta"/> | |
221 <output name="positive_rnashapes_out_file" file="hsa_chrM_shapes_positive.txt" ftype="txt"/> | |
222 <output name="final_graph_png" file="test_shapes_final_graph.png" ftype="png" compare="sim_size" delta="40000"/> | |
223 <output name="logo_global_png" file="test_shapes_logo_global.png" ftype="png" compare="sim_size" delta="5000"/> | |
224 <output name="logo_hairpin_png" file="test_shapes_logo_hairpin.png" ftype="png" compare="sim_size" delta="5000"/> | |
225 <output name="logo_best_sequences_png" file="test_shapes_logo_best_sequences.png" ftype="png" compare="sim_size" delta="5000"/> | |
226 <output name="logo_global_txt" file="test_shapes_logo_global.txt" ftype="txt" compare="sim_size"/> | |
227 <output name="logo_hairpin_txt" file="test_shapes_logo_hairpin.txt" ftype="txt" compare="sim_size"/> | |
228 <output name="logo_best_sequences_txt" file="test_shapes_logo_best_sequences.txt" ftype="txt" compare="sim_size"/> | |
229 <output name="final_model_xml" file="test_shapes_final_model.xml" ftype="xml" compare="sim_size"/> | |
230 </test> | |
231 </tests> | |
232 <help><![CDATA[ | |
233 | |
234 ssHMM is an RNA motif finder that recovers sequence-structure motifs from RNA-binding protein data, such as CLIP-Seq data. The tool input consists of a BED file with genomic binding regions and the corresponding genome reference FASTA file. For structure prediction, the user can select between RNAshapes and RNAstructures. Advanced parameters can be set for both the preprocessing and the training stage. | |
235 | |
236 The output consists of a graph showing the found sequence motifs for the 5 structural contexts multiloop, hairpin, stem, internal loop, and exterior loop (output in .png format). The height of the nucleotides corresponds to their emission probabilities, while the thickness of the arrows corresponds to their transition probabilities. Additional files (intermediate, logo, model, raw) can be selected for output in the "Output options" section. | |
237 | |
238 For more details have a look at the online documentation: | |
239 | |
240 http://sshmm.readthedocs.io/en/latest/index.html | |
241 | |
242 ]]></help> | |
243 <citations> | |
244 <citation type="doi">10.1093/nar/gkx756</citation> | |
245 </citations> | |
246 </tool> |