Mercurial > repos > galaxyp > diffacto
view diffacto.xml @ 0:3cc7ce0822a1 draft default tip
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/diffacto commit 507bb20a2c246bb0a1a0c7dae1555a851730e4a6"
author | galaxyp |
---|---|
date | Mon, 21 Jun 2021 12:50:54 +0000 |
parents | |
children |
line wrap: on
line source
<tool id="diffacto" name="Diffacto" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" python_template_version="3.5"> <description>Comparative Protein Abundance from Covariation of Peptide Abundances</description> <macros> <token name="@TOOL_VERSION@">1.0.6</token> <token name="@VERSION_SUFFIX@">0</token> </macros> <requirements> <requirement type="package" version="@TOOL_VERSION@">diffacto</requirement> </requirements> <command detect_errors="exit_code"><![CDATA[ tr '\t' ',' < '$input' > input.csv && diffacto -i input.csv #if $db -db '$db' #end if #if $samples -samples '$samples' #end if -reference '$reference' #if $normalize -normalize $normalize #end if #if $farms_mu -farms_mu $farms_mu #end if #if $farms_alpha -farms_alpha $farms_alpha #end if -min_samples $min_samples -impute_threshold $impute_threshold -cutoff_weight $cutoff_weight $use_unique #if $scale == 'log2' -log2 True #else -log2 False #end if $fast -out '$output' #if $mcfdr -mc_out '$mc_out' #end if #if $loadings -loadings_out '$loadings_out' #end if ]]></command> <inputs> <param name="input" argument="-i" type="data" format="tabular,csv" label="Peptides abundances"> <help><![CDATA[ Peptides abundances in tabular or csv format. <ul> <li>The first row is column headers and should contain the sample name for each sample column. </li> <li>The first column should contain unique peptide sequences. </li> <li><i>Optionally, the second column may be ProteinID assignments, else the <b>Protein database</b> input is required.</i></li> <li>Each remaining column is a sample column with numeric abundance values.</li> <li>Missing values should be empty instead of zeros.</li> </ul> ]]></help> </param> <param argument="-db" type="data" format="fasta" label="Protein database" optional="true" help="Required if the Peptide abundances input does not have Protein IDs in the second column"/> <param argument="-samples" type="data" format="tabular" label="Sample Groups" optional="true"> <help><![CDATA[ <i>Optional: By default, each Sample column in Peptide abundances is treated as a singleton group.</i> <br> Groups the samples from the Peptides abundance input for comparison. Each sample column from Peptides abundance input should be on a line with 2 columns: <ol> <li>Sample name for header line of the Peptides abundance input.</li> <li>Group Name assignemnt for the sample</li> </ol> ]]></help> </param> <param argument="-reference" type="text" value="" label="Reference sample groups" optional="true"> <help><![CDATA[ <i>Optional: By default, Diffacto uses the average of all samples/groups as the reference.</i> <br> Names of sample groups <i>(separated by semicolon)</i> treated as the comparison reference. <ul> <li>If a Sample Groups input was used, the reference names should be Group names from column 2.</li> <li>Otherwise, the reference names should be Sample names from the Peptides abundance column header line.</li> </ul> ]]></help> </param> <param name="scale" argument="-log2" type="select" label="Peptides abundance scale"> <option value="linear">linear</option> <option value="log2">log2</option> </param> <param argument="-normalize" type="select" label="Sample-wise normalization" optional="true"> <option value="average">average</option> <option value="median">median</option> <option value="GMM">GMM</option> </param> <param argument="-farms_mu" type="float" value="0.1" min="0.0" max="1.0" optional="true" label="Hyperparameter mu" help="Hyperparameter mu (default: 0.1)"/> <param argument="-farms_alpha" type="float" value="0.1" min="0.0" max="1.0" optional="true" label="Hyperparameter alpha" help="Hyperparameter weight of prior probability (default: 0.1)"/> <param argument="-min_samples" type="integer" value="1" min="1" label="Minimum samples for peptide" help="Minimum number of samples peptides needed to be quantified in"/> <param argument="-impute_threshold" type="float" value="0.99" min="0.1" max="1.0" label="Minimum fraction of missing values in the group" help="Impute missing values if missing fraction is larger than the threshold."/> <param argument="-cutoff_weight" type="float" value="0.5" min="0." max="1.0" label="Peptide cutoff weight" help="Peptides weighted lower than the cutoff will be excluded."/> <param argument="-use_unique" type="boolean" truevalue="-use_unique True" falsevalue="" checked="false" label="Use unique peptides only"/> <param argument="-fast" type="boolean" truevalue="-fast True" falsevalue="" checked="false" label="Allow early termination in EM calculation when noise is sufficiently small."/> <param name="mcfdr" argument="-mc_out" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Perform Monte Carlo FDR simulation"/> <param name="loadings" argument="-loadings_out" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Output Protein Peptide loadings file"/> </inputs> <outputs> <data name="output" format="tabular" label="${tool.name} on ${on_string}: Protein Abundance"> <actions> <action name="comment_lines" type="metadata" default="1" /> <action name="column_names" type="metadata" default="Protein,N.Pept,Q.Pept,S/N,P(PECA)" /> </actions> </data> <data name="mc_out" format="tabular" label="${tool.name} on ${on_string}: MC FDR"> <filter>mcfdr == True</filter> <actions> <action name="comment_lines" type="metadata" default="1" /> <action name="column_names" type="metadata" default="Protein,P(MC),MCFDR" /> </actions> </data> <data name="loadings_out" format="tabular" label="${tool.name} on ${on_string}: Protein Peptide loading"> <filter>loadings == True</filter> <actions> <action name="comment_lines" type="metadata" default="1" /> <action name="column_names" type="metadata" default="Protein,Peptide,Loading" /> </actions> </data> </outputs> <tests> <test> <param name="input" ftype="csv" value="HBY20Mix.peptides.csv"/> <param name="db" ftype="fasta" value="UP000002311_559292.fasta"/> <param name="samples" ftype="tabular" value="HBY20Mix.samples.lst"/> <output name="output"> <assert_contents> <has_text text="P19097" /> </assert_contents> </output> </test> <test> <param name="input" ftype="tabular" value="HBY20Mix.peptides.tsv"/> <param name="db" ftype="fasta" value="UP000002311_559292.fasta"/> <param name="samples" ftype="tabular" value="HBY20Mix.samples.lst"/> <output name="output"> <assert_contents> <has_text text="P19097" /> </assert_contents> </output> </test> <test> <param name="input" ftype="csv" value="iPRG.novo.pep.csv"/> <param name="samples" ftype="tabular" value="iPRG.samples.lst"/> <param name="min_samples" value="2"/> <output name="output"> <assert_contents> <has_text text="FAS2" /> </assert_contents> </output> </test> <test> <param name="input" ftype="csv" value="iPRG.novo.pep.csv"/> <param name="samples" ftype="tabular" value="iPRG.samples.lst"/> <param name="min_samples" value="4"/> <param name="use_unique" value="True"/> <param name="mcfdr" value="True"/> <output name="output"> <assert_contents> <has_text text="FAS2" /> </assert_contents> </output> <output name="mc_out"> <assert_contents> <has_text text="FAS2" /> </assert_contents> </output> </test> </tests> <help><![CDATA[ **Diffacto** Diffacto_ quantifies comparative protein abundance from the covariation of peptide abundances. Diffacto_ applies factor analysis to extract the covariation of peptides' abundances. The method enables a weighted geometrical average summarization and automatic elimination of incoherent peptides, which may result from suboptimal digestion or being partially modified, and are not representative of the protein concentration. **Inputs** - **Peptides abundances** *in tabular or csv format* - The first row is column headers and should contain the sample name for each sample column. - The first column should contain unique peptide sequences. - *Optionally, the second column may be Protein ID assignments, else the* **Protein database** *input is required.* - Each remaining column is a sample column with numeric abundance values. - Missing values should be empty instead of zeros. - Example: ============ ========== ========= ========= ========= ========= sequences Protein Sample1-A Sample1_B Sample2_A Sample2_B ============ ========== ========= ========= ========= ========= AAATAAMTK EF3A 127.35209 142.58217 135.89206 162.54500 AAATTGEWDK PDC1 100.35922 114.68676 922.60617 833.97955 LPVLLADACCSR HSP72;PDC1 120.21570 194.99594 977.48321 219.23281 AAEEAGVTDVK FAS2 442.67501 457.52266 448.52837 424.15980 ============ ========== ========= ========= ========= ========= - **Protein database** *(optional)* - The Protein database in fasta format that has protein seqeunces containing the peptides. - Required if the **Peptides abundances** input does not have a second column containing Protein ID assignments - **Sample Groups** *(optional)* - First column has the sample name - Second column has the group name - Example: ========= == Sample1-A S1 Sample1_B S1 Sample2_A S2 Sample2_B S2 ========= == **Outputs** - **Protein Abundance** ======= ====== ====== =================== =================== ================== ================== Protein N.Pept Q.Pept S/N P(PECA) S1 S2 ======= ====== ====== =================== =================== ================== ================== EF3A 2 2 -2.874362404756714 0.2608189432601452 463172795.59269696 489796576.81520355 FAS2 6 4 -0.5901265476375578 0.8395809777778386 52093246.23323742 53280470.3811749 PDC1 3 2 6.634988423694361 0.25491030879514676 203769831.79809052 174641994.14231393 ======= ====== ====== =================== =================== ================== ================== - **FDR Estimate from Monte Carlo Simulation** *(optional)* ======= =================== =================== Protein P(MC) MCFDR ======= =================== =================== EF3A 0.1419053964023984 0.5287482885321804 FAS2 0.9867109634551495 0.9132662960822688 PDC1 0.3338088445078459 0.5287482885321804 ======= =================== =================== - **Protein Peptide Loadings** *(optional)* ======= =========== =================== EF3A AAATAAMTK 0.5287482885321804 FAS2 AAEEAGVTDVK 0.9132662960822688 PDC1 AAATTGEWDK 0.5287482885321804 ======= =========== =================== .. _Diffacto: https://github.com/statisticalbiotechnology/diffacto ]]></help> <citations> <citation type="doi">10.1074/mcp.O117.067728</citation> </citations> </tool>