view idpassemble.xml @ 5:39eaac8f3f42 draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/bumbershoot commit baf4379f0cf287238a4c988723611fe0983917db
author galaxyp
date Tue, 21 Nov 2017 13:19:10 -0500
parents 123813b3eed3
children 4b418a632bb0
line wrap: on
line source

<?xml version="1.0"?>
<tool id="idpassemble" name="idpAssemble" version="@VERSION@.0">
    <description>Merge IDPicker databases from single files into a merged database, and filters the result at PSM/spectrum/peptide/protein/gene levels.</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements" />
    <stdio>
        <exit_code range="1:" level="fatal" description="Job Failed" />
        <regex match="^Error:.*$" source="both" level="fatal" />
    </stdio>
    <command>
<![CDATA[
        #if len($input) < 2
        cp '${input}' output &&
        #end if

        idpAssemble
            -MaxFDRScore $MaxFDRScore
            -MinDistinctPeptides $filter_at_gene_level_condition.MinDistinctPeptides
            -MinSpectra $filter_at_gene_level_condition.MinSpectra
            -MinAdditionalPeptides $filter_at_gene_level_condition.MinAdditionalPeptides
            -MinSpectraPerDistinctMatch $MinSpectraPerDistinctMatch
            -MinSpectraPerDistinctPeptide $MinSpectraPerDistinctPeptide
            -MaxProteinGroupsPerPeptide $MaxProteinGroupsPerPeptide
            #if $filter_at_gene_level_condition.FilterAtGeneLevel
            -FilterAtGeneLevel 1
            #end if
            -SummarizeSources 1
            
            #if $AssignSourceHierarchyCondition.HasAssignSourceHierarchy
            -AssignSourceHierarchy '$AssignSourceHierarchyCondition.AssignSourceHierarchy'
            #end if
            
            #if $IsobaricSampleMappingCondition.HasIsobaricSampleMapping
            -IsobaricSampleMapping '$IsobaricSampleMappingCondition.IsobaricSampleMapping'
            #end if
            
            #if len($input) > 1
                -MergedOutputFilepath output
                #for $i in $input
                    '${i.file_name}'
                #end for
            #else
                output
            #end if
]]>
    </command>
    <inputs>
        <param name="input" type="data" format="idpdb" label="Input idpDB(s)" multiple="true"/>
        <param argument="-MaxFDRScore" type="float" label="Max FDR Score" min="0.00000001" value="0.05" help="Peptide-spectrum-matches (PSMs) with an FDR score (interpolated Q-value) higher than this will be excluded from the filtered data set." />
        <conditional name="filter_at_gene_level_condition">
            <param argument="-FilterAtGeneLevel" type="boolean" truevalue="1" falsevalue="0" label="Filter at Gene Level" help="Apply filters at the gene level (i.e. 'min distinct peptides per gene group' instead of 'min distinct peptides per protein group')"/>
            <when value="1">
                <param argument="-MinDistinctPeptides" type="integer" label="Min Distinct Peptides per Gene Group" min="1" value="2" help="Gene groups with fewer than this number of peptides will be excluded from the filtered data set." />
                <param argument="-MinSpectra" type="integer" label="Min Filtered Spectra per Gene Group" min="1" value="2" help="Gene groups with fewer than this number of spectra will be excluded from the filtered data set." />
                <param argument="-MinAdditionalPeptides" type="integer" label="Min Additional Peptides per Gene Group" min="0" value="1" help="Gene groups that are not necessary to explain the presence of at least this many extra peptides will be from the filtered data set. A value of 1 means that each gene group must explain at least 1 peptide that other gene groups do not explain." />
            </when>
            <when value="0">
                <param argument="-MinDistinctPeptides" type="integer" label="Min Distinct Peptides per Protein Group" min="1" value="2" help="Protein groups with fewer than this number of peptides will be excluded from the filtered data set." />
                <param argument="-MinSpectra" type="integer" label="Min Filtered Spectra per Protein Group" min="1" value="2" help="Protein groups with fewer than this number of spectra will be excluded from the filtered data set." />
                <param argument="-MinAdditionalPeptides" type="integer" label="Min Additional Peptides per Protein Group" min="0" value="1" help="Protein groups that are not necessary to explain the presence of at least this many extra peptides will be from the filtered data set. A value of 1 means that each protein group must explain at least 1 peptide that other protein groups do not explain." />
            </when>
        </conditional>
        <param argument="-MinSpectraPerDistinctMatch" type="integer" label="Min Filtered Spectra per Distinct Match" min="1" value="1" help="Distinct matches with fewer than this number of spectra will be excluded from the filtered data set." />
        <param argument="-MinSpectraPerDistinctPeptide" type="integer" label="Min Filtered Spectra per Distinct Peptide" min="1" value="1" help="Distinct peptides with fewer than this number of spectra will be excluded from the filtered data set." />
        <param argument="-MaxProteinGroupsPerPeptide" type="integer" label="Max Protein Groups per Distinct Peptide" min="0" value="10" help="Peptides that map to more than this number of protein groups will be excluded from the filtered data set. Highly ambiguous peptides are not very useful for quantitation." />

        <conditional name="AssignSourceHierarchyCondition">
            <param name="HasAssignSourceHierarchy" type="boolean" truevalue="1" falsevalue="0" label="Assign sources to a hierarchy?"/>
            <when value="1">
                <param argument="-AssignSourceHierarchy" type="data" format="tabular" optional="true" label="Assign source files to groups" help="A tab-delimited file that organizes source files (e.g. individual runs in a fractionated experiment) into groups. See below for more details." />
            </when>
            <when value="0">
            </when>
        </conditional>
        <conditional name="IsobaricSampleMappingCondition">
            <param name="HasIsobaricSampleMapping" type="boolean" truevalue="1" falsevalue="0" label="Assign sample names to reporter ions?"/>
            <when value="1">
                <param argument="-IsobaricSampleMapping" type="data" format="tabular" optional="true" label="Assign sample names to reporter ions" help="A tab-delimited file that gives sample names to isobaric reporter ion channels (i.e. iTRAQ, TMT) across a given source group. See below for more details." />
            </when>
            <when value="0">
            </when>
        </conditional>
    </inputs>
    <outputs>
        <data format="idpdb" name="output" from_work_dir="output" />
    </outputs>
    <tests>
        <test>
            <param name="input" value="201208-378803-mm.idpDB" />
            <param name="MaxFDRScore" value="0.05" />
            <param name="filter_at_gene_level_condition.MinDistinctPeptides" value="2" />
            <param name="filter_at_gene_level_condition.MinSpectra" value="2" />
            <param name="filter_at_gene_level_condition.MinAdditionalPeptides" value="1" />
            <param name="MinSpectraPerDistinctMatch" value="1" />
            <param name="MinSpectraPerDistinctPeptide" value="1" />
            <param name="MaxProteinGroupsPerPeptide" value="10" />
            <output name="output" file="201208-378803-mm-filtered.idpDB" compare="sim_size" delta="500000" />
        </test>
        <test>
            <param name="input" value="201208-378803-msgf.idpDB" />
            <param name="MaxFDRScore" value="0.05" />
            <param name="filter_at_gene_level_condition.MinDistinctPeptides" value="2" />
            <param name="filter_at_gene_level_condition.MinSpectra" value="2" />
            <param name="filter_at_gene_level_condition.MinAdditionalPeptides" value="1" />
            <param name="MinSpectraPerDistinctMatch" value="1" />
            <param name="MinSpectraPerDistinctPeptide" value="1" />
            <param name="MaxProteinGroupsPerPeptide" value="10" />
            <output name="output" file="201208-378803-msgf-filtered.idpDB" compare="sim_size" delta="500000" />
        </test>
        <test>
            <param name="input" value="201208-378803-cm.idpDB" />
            <param name="MaxFDRScore" value="0.05" />
            <param name="filter_at_gene_level_condition.MinDistinctPeptides" value="2" />
            <param name="filter_at_gene_level_condition.MinSpectra" value="2" />
            <param name="filter_at_gene_level_condition.MinAdditionalPeptides" value="1" />
            <param name="MinSpectraPerDistinctMatch" value="1" />
            <param name="MinSpectraPerDistinctPeptide" value="1" />
            <param name="MaxProteinGroupsPerPeptide" value="10" />
            <param name="HasAssignSourceHierarchy" value="1" />
            <param name="AssignSourceHierarchy" value="assembly.tsv" ftype="tabular" />
            <output name="output" file="201208-378803-cm-filtered.idpDB" compare="sim_size" delta="500000" />
        </test>
        <test>
            <param name="input" value="201208-378803-mm.idpDB,201208-378803-msgf.idpDB,201208-378803-cm.idpDB" />
            <param name="MaxFDRScore" value="0.05" />
            <param name="filter_at_gene_level_condition.MinDistinctPeptides" value="2" />
            <param name="filter_at_gene_level_condition.MinSpectra" value="2" />
            <param name="filter_at_gene_level_condition.MinAdditionalPeptides" value="1" />
            <param name="MinSpectraPerDistinctMatch" value="1" />
            <param name="MinSpectraPerDistinctPeptide" value="1" />
            <param name="MaxProteinGroupsPerPeptide" value="10" />
            <output name="output" file="201208-378803.idpDB" compare="sim_size" delta="500000" />
        </test>
        <test>
            <param name="input" value="201208-378803-embeddedGenesAndQuantitation.idpDB" />
            <param name="HasAssignSourceHierarchy" value="1" />
            <param name="HasIsobaricSampleMapping" value="1" />
            <param name="IsobaricSampleMapping" value="mapping.tsv" ftype="tabular" />
            <param name="AssignSourceHierarchy" value="assembly.tsv" ftype="tabular" />
            <output name="output" file="201208-378803-embeddedGenesAndQuantitationWithMapping.idpDB" compare="sim_size" delta="500000" />
        </test>
    </tests>
    <help>
<![CDATA[
**What it does**

Merges and filters one or more IDPicker 3 idpDB files into a combined idpDB file. Protein assembly (e.g. parsimony) is conducted on the combined set of proteins.

====

**AssignSourceHierarchy**

The assembly file is a tab-delimited file with two columns that organizes the sources (individual runs) into a hierarchy.
The first column is the name of a source group, the second column is the source path or name to assign to that group.
A forward slash in the group name adds another level to the hierarchy (just like a directory path).

*A simple example:*

::

/repA	repA1.idpDB
/repA	repA2.idpDB
/repB	repB1.idpDB
/repB	repB2.idpDB


*A multi-level example:*

::

/A/1	A1_f1
/A/1	A1_f2
/A/2	A2_f1
/A/2	A2_f2
/B/1	B1_f1
/B/1	B1_f2
/B/2	B2_f1
/B/2	B2_f2

====

**IsobaricSampleMapping**

The mapping file is a tab-delimited file with two columns. The first column is the full path to a source group,
the second column is a comma-delimited list of sample names, in ascending order of reporter ion mass. The special
sample name *Reference*, if present, will be used to normalize the other channels. Samples named *Empty* will be
ignored.


*iTRAQ-4plex example:*

::

/Case/Group1_A123_B456_C789	A123,B456,C789,Reference
/Case/Group2_D123_E456_F789	D123,E456,F789,Reference
/Ctrl/Group3_X123_Y456_Z789	Reference,X123,Y456,Z789
/Ctrl/Group4_U123_V456_None	U123,Reference,V456,Empty


*TMT-10plex example:*

::

/Group1_Cases1-4_Controls1-4	Case1,Case2,Case3,Case4,Reference,Control1,Control2,Control3,Control4,Reference
/Group2_Cases5-8_Controls5-8	Case5,Case6,Case7,Case8,Reference,Control5,Control6,Control7,Control8,Reference

]]>
    </help>
    <citations>
        <citation type="doi">10.1021/pr900360j</citation>
        <citation type="bibtex">@misc{toolsGalaxyP, author = {Chilton, J, Chambers MC, et al.}, title = {Galaxy Proteomics Tools}, publisher = {GitHub}, journal = {GitHub repository},
          year = {2015}, url = {https://github.com/galaxyproteomics/tools-galaxyp}}</citation> <!-- TODO: fix substitution of commit ", commit = {$sha1$}" -->
    </citations>
</tool>