Mercurial > repos > galaxyp > diffacto
comparison diffacto.xml @ 0:3cc7ce0822a1 draft default tip
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/diffacto commit 507bb20a2c246bb0a1a0c7dae1555a851730e4a6"
author | galaxyp |
---|---|
date | Mon, 21 Jun 2021 12:50:54 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:3cc7ce0822a1 |
---|---|
1 <tool id="diffacto" name="Diffacto" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" python_template_version="3.5"> | |
2 <description>Comparative Protein Abundance from Covariation of Peptide Abundances</description> | |
3 <macros> | |
4 <token name="@TOOL_VERSION@">1.0.6</token> | |
5 <token name="@VERSION_SUFFIX@">0</token> | |
6 </macros> | |
7 <requirements> | |
8 <requirement type="package" version="@TOOL_VERSION@">diffacto</requirement> | |
9 </requirements> | |
10 <command detect_errors="exit_code"><![CDATA[ | |
11 tr '\t' ',' < '$input' > input.csv && | |
12 diffacto | |
13 -i input.csv | |
14 #if $db | |
15 -db '$db' | |
16 #end if | |
17 #if $samples | |
18 -samples '$samples' | |
19 #end if | |
20 -reference '$reference' | |
21 #if $normalize | |
22 -normalize $normalize | |
23 #end if | |
24 #if $farms_mu | |
25 -farms_mu $farms_mu | |
26 #end if | |
27 #if $farms_alpha | |
28 -farms_alpha $farms_alpha | |
29 #end if | |
30 -min_samples $min_samples | |
31 -impute_threshold $impute_threshold | |
32 -cutoff_weight $cutoff_weight | |
33 $use_unique | |
34 #if $scale == 'log2' | |
35 -log2 True | |
36 #else | |
37 -log2 False | |
38 #end if | |
39 $fast | |
40 -out '$output' | |
41 #if $mcfdr | |
42 -mc_out '$mc_out' | |
43 #end if | |
44 #if $loadings | |
45 -loadings_out '$loadings_out' | |
46 #end if | |
47 ]]></command> | |
48 <inputs> | |
49 <param name="input" argument="-i" type="data" format="tabular,csv" label="Peptides abundances"> | |
50 <help><![CDATA[ | |
51 Peptides abundances in tabular or csv format. | |
52 <ul> | |
53 <li>The first row is column headers and should contain the sample name for each sample column. </li> | |
54 <li>The first column should contain unique peptide sequences. </li> | |
55 <li><i>Optionally, the second column may be ProteinID assignments, else the <b>Protein database</b> input is required.</i></li> | |
56 <li>Each remaining column is a sample column with numeric abundance values.</li> | |
57 <li>Missing values should be empty instead of zeros.</li> | |
58 </ul> | |
59 ]]></help> | |
60 </param> | |
61 <param argument="-db" type="data" format="fasta" label="Protein database" optional="true" | |
62 help="Required if the Peptide abundances input does not have Protein IDs in the second column"/> | |
63 <param argument="-samples" type="data" format="tabular" label="Sample Groups" optional="true"> | |
64 <help><![CDATA[ | |
65 <i>Optional: By default, each Sample column in Peptide abundances is treated as a singleton group.</i> | |
66 <br> | |
67 Groups the samples from the Peptides abundance input for comparison. | |
68 Each sample column from Peptides abundance input should be on a line with 2 columns: | |
69 <ol> | |
70 <li>Sample name for header line of the Peptides abundance input.</li> | |
71 <li>Group Name assignemnt for the sample</li> | |
72 </ol> | |
73 ]]></help> | |
74 </param> | |
75 <param argument="-reference" type="text" value="" label="Reference sample groups" optional="true"> | |
76 <help><![CDATA[ | |
77 <i>Optional: By default, Diffacto uses the average of all samples/groups as the reference.</i> | |
78 <br> | |
79 Names of sample groups <i>(separated by semicolon)</i> treated as the comparison reference. | |
80 <ul> | |
81 <li>If a Sample Groups input was used, the reference names should be Group names from column 2.</li> | |
82 <li>Otherwise, the reference names should be Sample names from the Peptides abundance column header line.</li> | |
83 </ul> | |
84 ]]></help> | |
85 </param> | |
86 <param name="scale" argument="-log2" type="select" label="Peptides abundance scale"> | |
87 <option value="linear">linear</option> | |
88 <option value="log2">log2</option> | |
89 </param> | |
90 <param argument="-normalize" type="select" label="Sample-wise normalization" optional="true"> | |
91 <option value="average">average</option> | |
92 <option value="median">median</option> | |
93 <option value="GMM">GMM</option> | |
94 </param> | |
95 <param argument="-farms_mu" type="float" value="0.1" min="0.0" max="1.0" optional="true" label="Hyperparameter mu" | |
96 help="Hyperparameter mu (default: 0.1)"/> | |
97 <param argument="-farms_alpha" type="float" value="0.1" min="0.0" max="1.0" optional="true" label="Hyperparameter alpha" | |
98 help="Hyperparameter weight of prior probability (default: 0.1)"/> | |
99 <param argument="-min_samples" type="integer" value="1" min="1" label="Minimum samples for peptide" | |
100 help="Minimum number of samples peptides needed to be quantified in"/> | |
101 <param argument="-impute_threshold" type="float" value="0.99" min="0.1" max="1.0" label="Minimum fraction of missing values in the group" | |
102 help="Impute missing values if missing fraction is larger than the threshold."/> | |
103 <param argument="-cutoff_weight" type="float" value="0.5" min="0." max="1.0" label="Peptide cutoff weight" | |
104 help="Peptides weighted lower than the cutoff will be excluded."/> | |
105 <param argument="-use_unique" type="boolean" truevalue="-use_unique True" falsevalue="" checked="false" label="Use unique peptides only"/> | |
106 <param argument="-fast" type="boolean" truevalue="-fast True" falsevalue="" checked="false" label="Allow early termination in EM calculation when noise is sufficiently small."/> | |
107 <param name="mcfdr" argument="-mc_out" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Perform Monte Carlo FDR simulation"/> | |
108 <param name="loadings" argument="-loadings_out" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Output Protein Peptide loadings file"/> | |
109 </inputs> | |
110 <outputs> | |
111 <data name="output" format="tabular" label="${tool.name} on ${on_string}: Protein Abundance"> | |
112 <actions> | |
113 <action name="comment_lines" type="metadata" default="1" /> | |
114 <action name="column_names" type="metadata" default="Protein,N.Pept,Q.Pept,S/N,P(PECA)" /> | |
115 </actions> | |
116 </data> | |
117 <data name="mc_out" format="tabular" label="${tool.name} on ${on_string}: MC FDR"> | |
118 <filter>mcfdr == True</filter> | |
119 <actions> | |
120 <action name="comment_lines" type="metadata" default="1" /> | |
121 <action name="column_names" type="metadata" default="Protein,P(MC),MCFDR" /> | |
122 </actions> | |
123 </data> | |
124 <data name="loadings_out" format="tabular" label="${tool.name} on ${on_string}: Protein Peptide loading"> | |
125 <filter>loadings == True</filter> | |
126 <actions> | |
127 <action name="comment_lines" type="metadata" default="1" /> | |
128 <action name="column_names" type="metadata" default="Protein,Peptide,Loading" /> | |
129 </actions> | |
130 </data> | |
131 </outputs> | |
132 <tests> | |
133 <test> | |
134 <param name="input" ftype="csv" value="HBY20Mix.peptides.csv"/> | |
135 <param name="db" ftype="fasta" value="UP000002311_559292.fasta"/> | |
136 <param name="samples" ftype="tabular" value="HBY20Mix.samples.lst"/> | |
137 <output name="output"> | |
138 <assert_contents> | |
139 <has_text text="P19097" /> | |
140 </assert_contents> | |
141 </output> | |
142 </test> | |
143 <test> | |
144 <param name="input" ftype="tabular" value="HBY20Mix.peptides.tsv"/> | |
145 <param name="db" ftype="fasta" value="UP000002311_559292.fasta"/> | |
146 <param name="samples" ftype="tabular" value="HBY20Mix.samples.lst"/> | |
147 <output name="output"> | |
148 <assert_contents> | |
149 <has_text text="P19097" /> | |
150 </assert_contents> | |
151 </output> | |
152 </test> | |
153 | |
154 <test> | |
155 <param name="input" ftype="csv" value="iPRG.novo.pep.csv"/> | |
156 <param name="samples" ftype="tabular" value="iPRG.samples.lst"/> | |
157 <param name="min_samples" value="2"/> | |
158 <output name="output"> | |
159 <assert_contents> | |
160 <has_text text="FAS2" /> | |
161 </assert_contents> | |
162 </output> | |
163 </test> | |
164 <test> | |
165 <param name="input" ftype="csv" value="iPRG.novo.pep.csv"/> | |
166 <param name="samples" ftype="tabular" value="iPRG.samples.lst"/> | |
167 <param name="min_samples" value="4"/> | |
168 <param name="use_unique" value="True"/> | |
169 <param name="mcfdr" value="True"/> | |
170 <output name="output"> | |
171 <assert_contents> | |
172 <has_text text="FAS2" /> | |
173 </assert_contents> | |
174 </output> | |
175 <output name="mc_out"> | |
176 <assert_contents> | |
177 <has_text text="FAS2" /> | |
178 </assert_contents> | |
179 </output> | |
180 </test> | |
181 | |
182 </tests> | |
183 <help><![CDATA[ | |
184 **Diffacto** | |
185 | |
186 Diffacto_ quantifies comparative protein abundance from the covariation of peptide abundances. | |
187 | |
188 Diffacto_ applies factor analysis to extract the covariation of peptides' abundances. The method enables a weighted geometrical average summarization and automatic elimination of incoherent peptides, which may result from suboptimal digestion or being partially modified, and are not representative of the protein concentration. | |
189 | |
190 **Inputs** | |
191 | |
192 - **Peptides abundances** *in tabular or csv format* | |
193 | |
194 - The first row is column headers and should contain the sample name for each sample column. | |
195 - The first column should contain unique peptide sequences. | |
196 - *Optionally, the second column may be Protein ID assignments, else the* **Protein database** *input is required.* | |
197 - Each remaining column is a sample column with numeric abundance values. | |
198 - Missing values should be empty instead of zeros. | |
199 - Example: | |
200 | |
201 ============ ========== ========= ========= ========= ========= | |
202 sequences Protein Sample1-A Sample1_B Sample2_A Sample2_B | |
203 ============ ========== ========= ========= ========= ========= | |
204 AAATAAMTK EF3A 127.35209 142.58217 135.89206 162.54500 | |
205 AAATTGEWDK PDC1 100.35922 114.68676 922.60617 833.97955 | |
206 LPVLLADACCSR HSP72;PDC1 120.21570 194.99594 977.48321 219.23281 | |
207 AAEEAGVTDVK FAS2 442.67501 457.52266 448.52837 424.15980 | |
208 ============ ========== ========= ========= ========= ========= | |
209 | |
210 | |
211 - **Protein database** *(optional)* | |
212 | |
213 - The Protein database in fasta format that has protein seqeunces containing the peptides. | |
214 - Required if the **Peptides abundances** input does not have a second column containing Protein ID assignments | |
215 | |
216 | |
217 - **Sample Groups** *(optional)* | |
218 | |
219 - First column has the sample name | |
220 - Second column has the group name | |
221 - Example: | |
222 | |
223 ========= == | |
224 Sample1-A S1 | |
225 Sample1_B S1 | |
226 Sample2_A S2 | |
227 Sample2_B S2 | |
228 ========= == | |
229 | |
230 | |
231 **Outputs** | |
232 | |
233 - **Protein Abundance** | |
234 | |
235 ======= ====== ====== =================== =================== ================== ================== | |
236 Protein N.Pept Q.Pept S/N P(PECA) S1 S2 | |
237 ======= ====== ====== =================== =================== ================== ================== | |
238 EF3A 2 2 -2.874362404756714 0.2608189432601452 463172795.59269696 489796576.81520355 | |
239 FAS2 6 4 -0.5901265476375578 0.8395809777778386 52093246.23323742 53280470.3811749 | |
240 PDC1 3 2 6.634988423694361 0.25491030879514676 203769831.79809052 174641994.14231393 | |
241 ======= ====== ====== =================== =================== ================== ================== | |
242 | |
243 - **FDR Estimate from Monte Carlo Simulation** *(optional)* | |
244 | |
245 ======= =================== =================== | |
246 Protein P(MC) MCFDR | |
247 ======= =================== =================== | |
248 EF3A 0.1419053964023984 0.5287482885321804 | |
249 FAS2 0.9867109634551495 0.9132662960822688 | |
250 PDC1 0.3338088445078459 0.5287482885321804 | |
251 ======= =================== =================== | |
252 | |
253 - **Protein Peptide Loadings** *(optional)* | |
254 | |
255 ======= =========== =================== | |
256 EF3A AAATAAMTK 0.5287482885321804 | |
257 FAS2 AAEEAGVTDVK 0.9132662960822688 | |
258 PDC1 AAATTGEWDK 0.5287482885321804 | |
259 ======= =========== =================== | |
260 | |
261 .. _Diffacto: https://github.com/statisticalbiotechnology/diffacto | |
262 | |
263 ]]></help> | |
264 <citations> | |
265 <citation type="doi">10.1074/mcp.O117.067728</citation> | |
266 </citations> | |
267 </tool> |