comparison scoary.xml @ 0:42a1a5750539 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/scoary commit ce823d6021a7afbc2c49ba60e32faababaffd870"
author iuc
date Sun, 21 Mar 2021 12:21:41 +0000
parents
children 77d50ec2bcf2
comparison
equal deleted inserted replaced
-1:000000000000 0:42a1a5750539
1 <tool id="scoary" name="Scoary" version="@TOOL_VERSION@+galaxy0" profile="20.01">
2 <description>calculates the assocations between all genes in the accessory genome and the traits</description>
3 <macros>
4 <token name="@TOOL_VERSION@">1.6.16</token>
5 </macros>
6 <requirements>
7 <requirement type="package" version="@TOOL_VERSION@">scoary</requirement>
8 </requirements>
9 <version_command>scoary --version</version_command>
10 <command detect_errors="exit_code"><![CDATA[
11 scoary
12
13 ###########
14 ## Input ##
15 ###########
16
17 -t '$input_traits'
18 -g '$input_genes'
19
20 #if $input_restricts:
21 -r '$input_restricts'
22 #end if
23
24 ########################
25 ## Additional Options ##
26 ########################
27
28 #if len($additional_options.series_pc) != 0
29 -p #echo " ".join([ "'%s'" % $s.pvalue for $i, $s in enumerate($additional_options.series_pc) ])
30 -c #echo " ".join([ "'%s'" % $s.correction for $i, $s in enumerate($additional_options.series_pc) ])
31 #end if
32
33 #if $additional_options.permute != 0:
34 -e str($additional_options.permute)
35 #end if
36
37 #if $additional_options.maxhits != 0:
38 -m str($additional_options.maxhits)
39 #end if
40
41 $additional_options.collapse
42 $output_options.upgma
43
44 #if $input_newicktree:
45 -n '$input_newicktree'
46 #end if
47
48 #########
49 ## END ##
50 #########
51
52 --no-time
53 &&
54 tail -n +1 *.csv | sed "s/\,/\\t/g" > scoary_output.tsv &&
55 mv *.nwk scoary_output.nwk
56
57 ]]></command>
58 <inputs>
59 <param name="input_traits" argument="-t" type="data" format="csv" label="Trait table"/>
60 <param name="input_genes" argument="-g" type="data" format="csv" label="Gene Presence/Absence table from ROARY (default output)"/>
61 <param name="input_restricts" optional="true" argument="-r" type="data" format="tabular" label="Table to analyze a subset of strains" />
62
63 <!-- Additional Options -->
64 <section name="additional_options" title="Additional Options">
65 <repeat name="series_pc" title="P-value cutoff(s) and Correction(s)">
66 <param name="pvalue" argument="-p" type="float" min="0" max="1.0" value="0.05" label="P-value cutoff for one Trait" help="SCOARY will not report genes with higher p-values than this (Default=1.0=All). Provide a single value (applied to all) or exactly as many values as correction criteria and in corresponding order (e.g., 0.05 0.05 for two traits)."/>
67 <param name="correction" argument="-c" type="select" label="P-value correction" help="Apply the p-value corrections to the p-value cutoffs you have entered (Default = Individual p-value)." >
68 <option value="I" selected="true">Individual (naive) p-value</option>
69 <option value="B">Bonferroni adjusted p-value</option>
70 <option value="BH">Benjamini-Hochberg adjusted p</option>
71 <option value="PW">Best (lowest) pairwise comparison</option>
72 <option value="EPW">Entire range of pairwise comparison p-values</option>
73 <option value="P">Empirical p-value from permutations</option>
74 </param>
75 </repeat>
76 <param name="permute" argument="-e" type="integer" min="0" value="0" label="Permutations" help="Perform N number of permutations of the significant results post-analysis. (Default = 0 = None)" />
77 <param name="maxhits" argument="-m" type="integer" min="0" value="0" label="Maximal number of hits to report" help="SCOARY will only report the top max hits results per trait. (Default = 0 = All)" />
78 <param name="collapse" argument="--collapse" type="boolean" checked="false" truevalue="--collapse" falsevalue="" label="Collapse correlated genes" help="Collapse correlated genes (genes that have identical distribution patterns in the sample) into merged units. (Default=false)"/>
79 <param name="input_newicktree" optional="true" argument="-n" type="data" format="newick" label="Supply a custom tree (Newick format) for phylogenetic analyses instead instead of calculating it internally." />
80 </section>
81
82 <!-- Output Options -->
83 <section name="output_options" title="Output Options" expanded="true">
84 <param name="upgma" argument="-u" type="boolean" checked="false" truevalue="-u" falsevalue="" label="UPGMA tree" help="Calculate UPGMA tree to a newick file."/>
85 </section>
86 </inputs>
87 <outputs>
88 <data name="out_tabular" format="tabular" from_work_dir="scoary_output.tsv" label="${tool.name} on ${on_string}: Table" />
89 <data name="out_newick" format="newick" from_work_dir="scoary_output.nwk" label="${tool.name} on ${on_string}: Tree">
90 <filter>(output_options['upgma'] is True)</filter>
91 </data>
92 </outputs>
93 <tests>
94 <test expect_num_outputs="2">
95 <param name="input_traits" ftype="csv" value="Tetracycline_resistance.csv" />
96 <param name="input_genes" ftype="csv" value="Gene_presence_absence.csv" />
97 <param name="upgma" value="Yes" />
98 <repeat name="series_pc">
99 <param name="pvalue" value="0.05"/>
100 <param name="correction" value="I"/>
101 </repeat>
102 <output name="out_tabular" file="scoary_output.tsv" ftype="tabular" sort="true">
103 <assert_contents>
104 <has_n_lines n="573" />
105 <has_line line="==&gt; Bogus_trait.results.csv &lt;==" />
106 <has_line line="==&gt; Tetracycline_resistance.results.csv &lt;==" />
107 </assert_contents>
108 </output>
109 <output name="out_newick" file="scoary_output.nwk" ftype="newick" />
110 </test>
111 <test expect_num_outputs="2">
112 <param name="input_traits" ftype="csv" value="Tetracycline_resistance.csv" />
113 <param name="input_genes" ftype="csv" value="Gene_presence_absence.csv" />
114 <param name="upgma" value="Yes" />
115 <repeat name="series_pc">
116 <param name="pvalue" value="0.05"/>
117 <param name="correction" value="I"/>
118 </repeat>
119 <repeat name="series_pc">
120 <param name="pvalue" value="0.05"/>
121 <param name="correction" value="EPW"/>
122 </repeat>
123 <output name="out_tabular" file="scoary_output_2.tsv" ftype="tabular" sort="true">
124 <assert_contents>
125 <has_n_lines n="27" />
126 <has_line line="==&gt; Bogus_trait.results.csv &lt;==" />
127 <has_line line="==&gt; Tetracycline_resistance.results.csv &lt;==" />
128 </assert_contents>
129 </output>
130 <output name="out_newick" file="scoary_output_2.nwk" ftype="newick" />
131 </test>
132 </tests>
133
134 <help><![CDATA[
135
136 .. class:: infomark
137
138 **What it does**
139
140 -------------------
141
142 **Scoary**
143
144 Scoary is designed to take the csv file from Roary as well as a traits file created by the user and calculate the assocations between all genes in the accessory genome and the traits. It reports a list of genes sorted by strength of association per trait.
145
146 -------------------
147
148 **Inputs**
149
150 -------------------
151
152 Scoary requires two input files: csv file from Roary and a list of traits to test associations to.
153 Traits can be anything as long as you can classify it into binary categories (e.g. antibiotic resistance, group membership (yes/no), MIC value higher/lower than 16).
154 Make sure you your entires are separated by ','.
155 The traits file needs to be formatted in a specific way (please take a look into the (documentation)[https://github.com/AdmiralenOla/Scoary]).
156
157 You can also use as input the pan-genome as called from Jason Sahl's program LS-BSR (Large-Scale Blast Score Ratio).
158 The program includes a python script for converting LS-BSR output to the Roary/Scoary format.
159
160 Trait presence is indicated by 1, trait absence by 0.
161 Assumes strain names in the first column and trait names in the first row.
162
163 Input gene presence/absence table (comma-separated-values) from ROARY.
164 Strain names must be equal to those in the trait table.
165
166 -----------
167
168 **Outputs**
169
170 -----------
171
172 Scory outputs a single csv traits file. It uses comma "," as a delimiter.
173 The results consists of genes that were found to be associated with the trait, sorted according to significance.
174 By default, Scoary reports all genes with a naive p-value < 0.05.
175
176 You can find the description of the columns in the (documentation)[https://github.com/AdmiralenOla/Scoary].
177
178 --------------------
179
180 **More Information**
181
182 --------------------
183
184 See the excellent `Scoary documentation`_
185
186 .. _`Scoary documentation`: https://github.com/AdmiralenOla/Scoary
187
188
189 **P-value cutoff (-p)**: For Fishers, Bonferronis, and Benjamini-Hochbergs tests, SCOARY will not report genes with higher p-values than this.
190 For empirical p-values, this is treated as an alpha level instead.
191 I.e. 0.02 will filter all genes except the lower and upper percentile from this test.
192 Run with "-p 1.0" to report all genes. Accepts standard form (e.g. 1E-8).
193 Provide a single value (applied to all) or exactly as many values as correction criteria and in corresponding order (e.g., 0.05 0.1 0.05 0.02).
194
195 **Correction (-c)**: Apply the indicated filtration measure: I=Individual (naive) p-value, B=Bonferroni adjusted p-value, BH=Benjamini-Hochberg adjusted p, PW=Best (lowest) pairwise comparison, EPW=Entire range of pairwise comparison p-values, P=Empirical p-value from permutations.
196 You can enter as many correction criteria as you would like.
197 These will be associated with the p-value cutoffs you enter.
198 For example "-c I EPW -p 0.1 0.05" will apply the following cutoffs: Naive p-value must be lower than 0.1 AND the entire range of pairwise comparison values are below 0.05 for this gene.
199 Note that the empirical p-values should be interpreted at both tails.
200 Therefore, running "-c P -p 0.05" will apply an alpha of 0.05 to the empirical (permuted) p-values, i.e. it will filter everything except the upper and lower 2.5 percent of the distribution.
201
202 **Permute (-e)**: Perform N number of permutations of the significant results post-analysis.
203 Each permutation will do a label switching of the phenotype and a new p-value is calculated according to this new dataset.
204 After all N permutations are completed, the results are ordered in ascending order, and the percentile of the original result in the permuted p-value distribution is reported.
205
206 --------------------
207
208 **Galaxy Wrapper Development**
209
210 --------------------
211
212 Author: Florian Heyl
213
214 ]]></help>
215 <citations>
216 <citation type="doi">10.1038/s41467-020-15171-6</citation>
217 </citations>
218 </tool>