comparison fastspar_pvalues.xml @ 0:7ff1304e12aa draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/fastspar commit 0e305d21d0634a1788b9105ec4d0ab1c2da62359
author iuc
date Thu, 19 Jun 2025 21:51:18 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:7ff1304e12aa
1 <tool id="fastspar_pvalues" name="FastSpar: estimate p-values" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
2 <description>
3 Bootstrap-based estimation of p-values from FastSpar correlations
4 </description>
5 <macros>
6 <import>macros.xml</import>
7 </macros>
8 <expand macro="biotools"/>
9 <expand macro="requirements_pvalues"/>
10 <command detect_errors="exit_code"><![CDATA[
11 #if $correlation.select == "new"
12 fastspar
13 --otu_table '$otu_table'
14 --iterations $iterations
15 --exclude_iterations $exclude_iterations
16 --threshold $threshold
17 --seed $seed
18 --correlation '$output_correlation'
19 --covariance '$output_covariance'
20 --threads \${GALAXY_SLOTS:-1}
21 ## Skip warning prompt and continue analysis even if the input contains OTUs with just one permutation.
22 --yes &&
23 #set $correlation_file = $output_correlation
24 #else
25 #set $correlation_file = $correlation.input_file
26 #end if
27
28 mkdir bootstrap_counts
29 && fastspar_bootstrap
30 --otu_table '$otu_table'
31 --number $number
32 --prefix bootstrap_counts/data
33 --seed $seed
34 --threads \${GALAXY_SLOTS:-1}
35
36 && mkdir bootstrap_correlation
37 && parallel
38 --max-procs \${GALAXY_SLOTS:-1}
39 fastspar
40 --otu_table {}
41 --correlation bootstrap_correlation/cor_{/}
42 --covariance bootstrap_correlation/cov_{/}
43 --iterations $iterations
44 --exclude_iterations $exclude_iterations
45 --threshold $threshold
46 --seed $seed
47 ::: bootstrap_counts/*
48
49 && fastspar_pvalues
50 --otu_table '$otu_table'
51 --correlation '$correlation_file'
52 --prefix bootstrap_correlation/cor_data_
53 --permutations $number
54 $pseudo
55 --threads \${GALAXY_SLOTS:-1}
56 --outfile '$pvalues'
57 ]]></command>
58 <inputs>
59 <param argument="--otu_table" type="data" format="tabular" label="Input OTU table"
60 help="The table must contain absolute OTU counts in plain tabular (TSV) format, with OTUs as rows and samples as columns. Do not include any metadata rows or columns."/>
61 <conditional name="correlation">
62 <param name="select" type="select" label="Tested correlation matrix"
63 help="For meaningful p-values, the parameters used during bootstrapped correlation estimation should be identical to those used for the FastSpar run which produced the correlation matrix. &lt;br&gt;For your convenience you can choose to calculate the correlation matrix here. In that case the seed used for the calculation is the same one used for generating the bootstrapped samples.">
64 <option value="new">Recalculate the correlation matrix</option>
65 <option value="original">Use an existing correlation matrix</option>
66 </param>
67 <when value="new"/>
68 <when value="original">
69 <param name="input_file" type="data" format="tabular" argument="--correlation" label="Correlation table" help="The correlation matrix generated by the original FastSpar analysis."/>
70 </when>
71 </conditional>
72 <param argument="--number" type="integer" min="10" max="10000" value="1000" label="Number of bootstrap samples" help="Recommended minimum: 1000 bootstrap samples for robust estimation."/>
73 <expand macro="fastspar_tool_parameters"/>
74 <param argument="--seed" type="integer" value="1" label="Seed to ensure reproducibility of bootstrapped samples."/>
75 <param argument="--pseudo" type="boolean" truevalue="--pseudo" falsevalue="" label="Use pseudo p-values" help="If selected, pseudo p-values are calculated instead of exact p-values. This can provide faster estimates but may be less precise."/>
76 </inputs>
77 <outputs>
78 <data name="output_correlation" format="tabular" label="${tool.name} on ${on_string}: median_correlation.tsv">
79 <filter>correlation['select'] == "new"</filter>
80 </data>
81 <data name="output_covariance" format="tabular" label="${tool.name} on ${on_string}: median_covariance.tsv">
82 <filter>correlation['select'] == "new"</filter>
83 </data>
84 <data name="pvalues" format="tabular" label="${tool.name} on ${on_string}: pvalues.tsv"/>
85 </outputs>
86 <tests>
87 <test expect_num_outputs="1">
88 <param name="otu_table" ftype="tabular" value="fake_data.tsv"/>
89 <conditional name="correlation">
90 <param name="select" value="original"/>
91 <param name="input_file" ftype="tabular" value="fake_data_cor.tsv"/>
92 </conditional>
93 <param name="number" value="10"/>
94 <output name="pvalues" file="fake_pvalues.tsv" compare="diff"/>
95 </test>
96 <test expect_num_outputs="3">
97 <param name="otu_table" ftype="tabular" value="fake_data.tsv"/>
98 <conditional name="correlation">
99 <param name="select" value="new"/>
100 </conditional>
101 <param name="number" value="10"/>
102 <output name="output_correlation" file="fake_data_cor.tsv" compare="diff"/>
103 <output name="output_covariance" file="fake_data_cov.tsv" compare="diff"/>
104 <output name="pvalues" file="fake_pvalues.tsv" compare="diff"/>
105 </test>
106 </tests>
107 <help><![CDATA[
108 What it does
109 ============
110
111 This tool estimates **empirical p-values** for correlation values generated by FastSpar. It uses a **bootstrap-based permutation approach** to assess the statistical significance of observed correlations.
112
113 You can choose to recalculate the correlation matrix with the same parameters or use an existing correlation matrix.
114
115 How it works
116 ============
117
118 1. Generates multiple bootstrapped versions of the OTU table.
119 2. Runs FastSpar on each bootstrap replicate.
120 3. Compares bootstrapped correlations to the original correlation matrix to calculate empirical p-values.
121
122 Required Inputs
123 ===============
124
125 - **OTU table**: TSV file with absolute counts (no metadata).
126 - **Correlation table** (optional): Output from the original FastSpar run.
127 - **Bootstrap samples**: Number of bootstrap replicates (≥1000 recommended).
128
129 Important Parameters
130 ====================
131
132 - **Iterations**: Must match the number used in the original FastSpar run.
133 - **Exclude Iterations** and **Threshold**: Should also match the original settings, if used.
134 - **Seed**: Optional, for reproducibility.
135 - **Pseudo**: Choose whether to calculate pseudo p-values instead of exact values.
136
137 IMPORTANT
138 =========
139
140 For meaningful p-values, the parameters used during bootstrapped correlation estimation (**iterations, exclude iterations, threshold**) should be identical to those used in the original FastSpar run.
141
142 Output
143 ======
144
145 - `pvalues.tsv`: A table of empirical p-values for all pairwise correlations.
146
147 When "Recalculate the correlation matrix" is selected the tool will also output:
148
149 - `median_correlation.tsv`: Correlation matrix between all OTUs.
150 - `median_covariance.tsv`: Covariance matrix between all OTUs.
151
152 Additional Resources
153 ====================
154
155 - FastSpar GitHub: https://github.com/scwatts/fastspar
156 ]]></help>
157 <expand macro="citations"/>
158 </tool>