Mercurial > repos > galaxyp > diffacto
comparison diffacto.xml @ 0:3cc7ce0822a1 draft default tip
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/diffacto commit 507bb20a2c246bb0a1a0c7dae1555a851730e4a6"
| author | galaxyp |
|---|---|
| date | Mon, 21 Jun 2021 12:50:54 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:3cc7ce0822a1 |
|---|---|
| 1 <tool id="diffacto" name="Diffacto" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" python_template_version="3.5"> | |
| 2 <description>Comparative Protein Abundance from Covariation of Peptide Abundances</description> | |
| 3 <macros> | |
| 4 <token name="@TOOL_VERSION@">1.0.6</token> | |
| 5 <token name="@VERSION_SUFFIX@">0</token> | |
| 6 </macros> | |
| 7 <requirements> | |
| 8 <requirement type="package" version="@TOOL_VERSION@">diffacto</requirement> | |
| 9 </requirements> | |
| 10 <command detect_errors="exit_code"><![CDATA[ | |
| 11 tr '\t' ',' < '$input' > input.csv && | |
| 12 diffacto | |
| 13 -i input.csv | |
| 14 #if $db | |
| 15 -db '$db' | |
| 16 #end if | |
| 17 #if $samples | |
| 18 -samples '$samples' | |
| 19 #end if | |
| 20 -reference '$reference' | |
| 21 #if $normalize | |
| 22 -normalize $normalize | |
| 23 #end if | |
| 24 #if $farms_mu | |
| 25 -farms_mu $farms_mu | |
| 26 #end if | |
| 27 #if $farms_alpha | |
| 28 -farms_alpha $farms_alpha | |
| 29 #end if | |
| 30 -min_samples $min_samples | |
| 31 -impute_threshold $impute_threshold | |
| 32 -cutoff_weight $cutoff_weight | |
| 33 $use_unique | |
| 34 #if $scale == 'log2' | |
| 35 -log2 True | |
| 36 #else | |
| 37 -log2 False | |
| 38 #end if | |
| 39 $fast | |
| 40 -out '$output' | |
| 41 #if $mcfdr | |
| 42 -mc_out '$mc_out' | |
| 43 #end if | |
| 44 #if $loadings | |
| 45 -loadings_out '$loadings_out' | |
| 46 #end if | |
| 47 ]]></command> | |
| 48 <inputs> | |
| 49 <param name="input" argument="-i" type="data" format="tabular,csv" label="Peptides abundances"> | |
| 50 <help><![CDATA[ | |
| 51 Peptides abundances in tabular or csv format. | |
| 52 <ul> | |
| 53 <li>The first row is column headers and should contain the sample name for each sample column. </li> | |
| 54 <li>The first column should contain unique peptide sequences. </li> | |
| 55 <li><i>Optionally, the second column may be ProteinID assignments, else the <b>Protein database</b> input is required.</i></li> | |
| 56 <li>Each remaining column is a sample column with numeric abundance values.</li> | |
| 57 <li>Missing values should be empty instead of zeros.</li> | |
| 58 </ul> | |
| 59 ]]></help> | |
| 60 </param> | |
| 61 <param argument="-db" type="data" format="fasta" label="Protein database" optional="true" | |
| 62 help="Required if the Peptide abundances input does not have Protein IDs in the second column"/> | |
| 63 <param argument="-samples" type="data" format="tabular" label="Sample Groups" optional="true"> | |
| 64 <help><![CDATA[ | |
| 65 <i>Optional: By default, each Sample column in Peptide abundances is treated as a singleton group.</i> | |
| 66 <br> | |
| 67 Groups the samples from the Peptides abundance input for comparison. | |
| 68 Each sample column from Peptides abundance input should be on a line with 2 columns: | |
| 69 <ol> | |
| 70 <li>Sample name for header line of the Peptides abundance input.</li> | |
| 71 <li>Group Name assignemnt for the sample</li> | |
| 72 </ol> | |
| 73 ]]></help> | |
| 74 </param> | |
| 75 <param argument="-reference" type="text" value="" label="Reference sample groups" optional="true"> | |
| 76 <help><![CDATA[ | |
| 77 <i>Optional: By default, Diffacto uses the average of all samples/groups as the reference.</i> | |
| 78 <br> | |
| 79 Names of sample groups <i>(separated by semicolon)</i> treated as the comparison reference. | |
| 80 <ul> | |
| 81 <li>If a Sample Groups input was used, the reference names should be Group names from column 2.</li> | |
| 82 <li>Otherwise, the reference names should be Sample names from the Peptides abundance column header line.</li> | |
| 83 </ul> | |
| 84 ]]></help> | |
| 85 </param> | |
| 86 <param name="scale" argument="-log2" type="select" label="Peptides abundance scale"> | |
| 87 <option value="linear">linear</option> | |
| 88 <option value="log2">log2</option> | |
| 89 </param> | |
| 90 <param argument="-normalize" type="select" label="Sample-wise normalization" optional="true"> | |
| 91 <option value="average">average</option> | |
| 92 <option value="median">median</option> | |
| 93 <option value="GMM">GMM</option> | |
| 94 </param> | |
| 95 <param argument="-farms_mu" type="float" value="0.1" min="0.0" max="1.0" optional="true" label="Hyperparameter mu" | |
| 96 help="Hyperparameter mu (default: 0.1)"/> | |
| 97 <param argument="-farms_alpha" type="float" value="0.1" min="0.0" max="1.0" optional="true" label="Hyperparameter alpha" | |
| 98 help="Hyperparameter weight of prior probability (default: 0.1)"/> | |
| 99 <param argument="-min_samples" type="integer" value="1" min="1" label="Minimum samples for peptide" | |
| 100 help="Minimum number of samples peptides needed to be quantified in"/> | |
| 101 <param argument="-impute_threshold" type="float" value="0.99" min="0.1" max="1.0" label="Minimum fraction of missing values in the group" | |
| 102 help="Impute missing values if missing fraction is larger than the threshold."/> | |
| 103 <param argument="-cutoff_weight" type="float" value="0.5" min="0." max="1.0" label="Peptide cutoff weight" | |
| 104 help="Peptides weighted lower than the cutoff will be excluded."/> | |
| 105 <param argument="-use_unique" type="boolean" truevalue="-use_unique True" falsevalue="" checked="false" label="Use unique peptides only"/> | |
| 106 <param argument="-fast" type="boolean" truevalue="-fast True" falsevalue="" checked="false" label="Allow early termination in EM calculation when noise is sufficiently small."/> | |
| 107 <param name="mcfdr" argument="-mc_out" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Perform Monte Carlo FDR simulation"/> | |
| 108 <param name="loadings" argument="-loadings_out" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Output Protein Peptide loadings file"/> | |
| 109 </inputs> | |
| 110 <outputs> | |
| 111 <data name="output" format="tabular" label="${tool.name} on ${on_string}: Protein Abundance"> | |
| 112 <actions> | |
| 113 <action name="comment_lines" type="metadata" default="1" /> | |
| 114 <action name="column_names" type="metadata" default="Protein,N.Pept,Q.Pept,S/N,P(PECA)" /> | |
| 115 </actions> | |
| 116 </data> | |
| 117 <data name="mc_out" format="tabular" label="${tool.name} on ${on_string}: MC FDR"> | |
| 118 <filter>mcfdr == True</filter> | |
| 119 <actions> | |
| 120 <action name="comment_lines" type="metadata" default="1" /> | |
| 121 <action name="column_names" type="metadata" default="Protein,P(MC),MCFDR" /> | |
| 122 </actions> | |
| 123 </data> | |
| 124 <data name="loadings_out" format="tabular" label="${tool.name} on ${on_string}: Protein Peptide loading"> | |
| 125 <filter>loadings == True</filter> | |
| 126 <actions> | |
| 127 <action name="comment_lines" type="metadata" default="1" /> | |
| 128 <action name="column_names" type="metadata" default="Protein,Peptide,Loading" /> | |
| 129 </actions> | |
| 130 </data> | |
| 131 </outputs> | |
| 132 <tests> | |
| 133 <test> | |
| 134 <param name="input" ftype="csv" value="HBY20Mix.peptides.csv"/> | |
| 135 <param name="db" ftype="fasta" value="UP000002311_559292.fasta"/> | |
| 136 <param name="samples" ftype="tabular" value="HBY20Mix.samples.lst"/> | |
| 137 <output name="output"> | |
| 138 <assert_contents> | |
| 139 <has_text text="P19097" /> | |
| 140 </assert_contents> | |
| 141 </output> | |
| 142 </test> | |
| 143 <test> | |
| 144 <param name="input" ftype="tabular" value="HBY20Mix.peptides.tsv"/> | |
| 145 <param name="db" ftype="fasta" value="UP000002311_559292.fasta"/> | |
| 146 <param name="samples" ftype="tabular" value="HBY20Mix.samples.lst"/> | |
| 147 <output name="output"> | |
| 148 <assert_contents> | |
| 149 <has_text text="P19097" /> | |
| 150 </assert_contents> | |
| 151 </output> | |
| 152 </test> | |
| 153 | |
| 154 <test> | |
| 155 <param name="input" ftype="csv" value="iPRG.novo.pep.csv"/> | |
| 156 <param name="samples" ftype="tabular" value="iPRG.samples.lst"/> | |
| 157 <param name="min_samples" value="2"/> | |
| 158 <output name="output"> | |
| 159 <assert_contents> | |
| 160 <has_text text="FAS2" /> | |
| 161 </assert_contents> | |
| 162 </output> | |
| 163 </test> | |
| 164 <test> | |
| 165 <param name="input" ftype="csv" value="iPRG.novo.pep.csv"/> | |
| 166 <param name="samples" ftype="tabular" value="iPRG.samples.lst"/> | |
| 167 <param name="min_samples" value="4"/> | |
| 168 <param name="use_unique" value="True"/> | |
| 169 <param name="mcfdr" value="True"/> | |
| 170 <output name="output"> | |
| 171 <assert_contents> | |
| 172 <has_text text="FAS2" /> | |
| 173 </assert_contents> | |
| 174 </output> | |
| 175 <output name="mc_out"> | |
| 176 <assert_contents> | |
| 177 <has_text text="FAS2" /> | |
| 178 </assert_contents> | |
| 179 </output> | |
| 180 </test> | |
| 181 | |
| 182 </tests> | |
| 183 <help><![CDATA[ | |
| 184 **Diffacto** | |
| 185 | |
| 186 Diffacto_ quantifies comparative protein abundance from the covariation of peptide abundances. | |
| 187 | |
| 188 Diffacto_ applies factor analysis to extract the covariation of peptides' abundances. The method enables a weighted geometrical average summarization and automatic elimination of incoherent peptides, which may result from suboptimal digestion or being partially modified, and are not representative of the protein concentration. | |
| 189 | |
| 190 **Inputs** | |
| 191 | |
| 192 - **Peptides abundances** *in tabular or csv format* | |
| 193 | |
| 194 - The first row is column headers and should contain the sample name for each sample column. | |
| 195 - The first column should contain unique peptide sequences. | |
| 196 - *Optionally, the second column may be Protein ID assignments, else the* **Protein database** *input is required.* | |
| 197 - Each remaining column is a sample column with numeric abundance values. | |
| 198 - Missing values should be empty instead of zeros. | |
| 199 - Example: | |
| 200 | |
| 201 ============ ========== ========= ========= ========= ========= | |
| 202 sequences Protein Sample1-A Sample1_B Sample2_A Sample2_B | |
| 203 ============ ========== ========= ========= ========= ========= | |
| 204 AAATAAMTK EF3A 127.35209 142.58217 135.89206 162.54500 | |
| 205 AAATTGEWDK PDC1 100.35922 114.68676 922.60617 833.97955 | |
| 206 LPVLLADACCSR HSP72;PDC1 120.21570 194.99594 977.48321 219.23281 | |
| 207 AAEEAGVTDVK FAS2 442.67501 457.52266 448.52837 424.15980 | |
| 208 ============ ========== ========= ========= ========= ========= | |
| 209 | |
| 210 | |
| 211 - **Protein database** *(optional)* | |
| 212 | |
| 213 - The Protein database in fasta format that has protein seqeunces containing the peptides. | |
| 214 - Required if the **Peptides abundances** input does not have a second column containing Protein ID assignments | |
| 215 | |
| 216 | |
| 217 - **Sample Groups** *(optional)* | |
| 218 | |
| 219 - First column has the sample name | |
| 220 - Second column has the group name | |
| 221 - Example: | |
| 222 | |
| 223 ========= == | |
| 224 Sample1-A S1 | |
| 225 Sample1_B S1 | |
| 226 Sample2_A S2 | |
| 227 Sample2_B S2 | |
| 228 ========= == | |
| 229 | |
| 230 | |
| 231 **Outputs** | |
| 232 | |
| 233 - **Protein Abundance** | |
| 234 | |
| 235 ======= ====== ====== =================== =================== ================== ================== | |
| 236 Protein N.Pept Q.Pept S/N P(PECA) S1 S2 | |
| 237 ======= ====== ====== =================== =================== ================== ================== | |
| 238 EF3A 2 2 -2.874362404756714 0.2608189432601452 463172795.59269696 489796576.81520355 | |
| 239 FAS2 6 4 -0.5901265476375578 0.8395809777778386 52093246.23323742 53280470.3811749 | |
| 240 PDC1 3 2 6.634988423694361 0.25491030879514676 203769831.79809052 174641994.14231393 | |
| 241 ======= ====== ====== =================== =================== ================== ================== | |
| 242 | |
| 243 - **FDR Estimate from Monte Carlo Simulation** *(optional)* | |
| 244 | |
| 245 ======= =================== =================== | |
| 246 Protein P(MC) MCFDR | |
| 247 ======= =================== =================== | |
| 248 EF3A 0.1419053964023984 0.5287482885321804 | |
| 249 FAS2 0.9867109634551495 0.9132662960822688 | |
| 250 PDC1 0.3338088445078459 0.5287482885321804 | |
| 251 ======= =================== =================== | |
| 252 | |
| 253 - **Protein Peptide Loadings** *(optional)* | |
| 254 | |
| 255 ======= =========== =================== | |
| 256 EF3A AAATAAMTK 0.5287482885321804 | |
| 257 FAS2 AAEEAGVTDVK 0.9132662960822688 | |
| 258 PDC1 AAATTGEWDK 0.5287482885321804 | |
| 259 ======= =========== =================== | |
| 260 | |
| 261 .. _Diffacto: https://github.com/statisticalbiotechnology/diffacto | |
| 262 | |
| 263 ]]></help> | |
| 264 <citations> | |
| 265 <citation type="doi">10.1074/mcp.O117.067728</citation> | |
| 266 </citations> | |
| 267 </tool> |
