Mercurial > repos > galaxyp > percolator

<tool id="batched_set_list_creator" name="Create nested list" version="3.2">
    <description>based on filenames and batch sizes</description>
    <stdio>
        <exit_code range="1:" />
    </stdio>
    <command>python $__tool_directory__/nested_collection.py
        #if $batchsize
            --batchsize $batchsize
        #end if
        #if len($poolids) > 0
            --pool-ids
            #for $pool in $poolids
                '${pool.pool_identifier}'
            #end for
        #end if
        --real-names
        #for $fname in $listtobatch.keys()
            '$fname'
        #end for
        --galaxy-files
        #for $fname in $listtobatch.keys()
            '$listtobatch[$fname]'
        #end for
    </command>
    <inputs>
        <param name="batchsize" type="integer" optional="true" value="" label="Maximal amount of fractions to batch." help="No value means no maximum" />
        <param name="filetype" type="select" label="Which datatype to nest?">
            <option value="mzid">mzIdentML</option>
            <option value="percout">Percolator out</option>
            <option value="tabular">Tabular</option>
        </param>
        <repeat name="poolids" title="Batch pools" help="Pools containing batches (optional, not using this will result in one pool)">
            <param name="pool_identifier" type="text" label="Filename part identifying batchpool"
            help="Specify part of the input filenames that are unique for each pool, e.g set_A. Do not use 3 consecutive underscores."/>
        </repeat>
        <param name="listtobatch" type="data_collection" collection_type="list" label="List of files to batch" />
    </inputs>
    <outputs>
        <collection name="batched_fractions_mzid" type="list:list" label="Pooled batched mzIdentML data">
          <filter>filetype == "mzid"</filter>
          <discover_datasets pattern="(?P&lt;identifier_0&gt;\w+[^_][^_][^_])___(?P&lt;identifier_1&gt;[^_]+)\.data" ext="mzid" visible="false" />
        </collection>
        <collection name="batched_fractions_perco" type="list:list" label="Pooled batched percolator data">
          <filter>filetype == "percout"</filter>
          <discover_datasets pattern="(?P&lt;identifier_0&gt;\w+[^_][^_][^_])___(?P&lt;identifier_1&gt;[^_]+)\.data" ext="percout" visible="false" />
        </collection>
        <collection name="batched_fractions_tab" type="list:list" label="Pooled batched tabular data">
          <filter>filetype == "tabular"</filter>
          <discover_datasets pattern="(?P&lt;identifier_0&gt;\w+[^_][^_][^_])___(?P&lt;identifier_1&gt;[^_]+)\.data" ext="tabular" visible="false" />
        </collection>
    </outputs>
    <tests>
        <test>
            <param name="batchsize" value="2"/>
            <param name="filetype" value="mzid" />
            <param name="listtobatch">
                <collection type="list">
                    <element name="fraction_one_spectra" value="empty_file1.mzid"/>
                    <element name="fraction_two_spectra" value="empty_file2.mzid"/>
                    <element name="fraction_three_spectra" value="empty_file3.mzid"/>
                    <element name="fraction_four_spectra" value="empty_file4.mzid"/>
                </collection>
            </param>
            <output_collection name="batched_fractions_mzid" type="list:list">
                <element name="pool0_batch0">
                    <element name="inputfn0" ftype="mzid" file="empty_file1.mzid"/>
                    <element name="inputfn1" ftype="mzid" file="empty_file2.mzid"/>
                </element>
                <element name="pool0_batch1">
                    <element name="inputfn0" ftype="mzid" file="empty_file3.mzid"/>
                    <element name="inputfn1" ftype="mzid" file="empty_file4.mzid"/>
                </element>
            </output_collection>
        </test>
        <test>
            <repeat name="poolids">
                <param name="pool_identifier" value="set1"/>
            </repeat>
            <repeat name="poolids">
                <param name="pool_identifier" value="set2"/>
            </repeat>
            <param name="filetype" value="tabular" />
            <param name="listtobatch">
                <collection type="list">
                    <element name="fr_one_set1_spectra" value="empty_file1.mzid"/>
                    <element name="fr_two_set1_spectra" value="empty_file2.mzid"/>
                    <element name="fr_three_set1_spectra" value="empty_file3.mzid"/>
                    <element name="fr_one_set2_spectra" value="empty_file4.mzid"/>
                    <element name="fr_two_set2_spectra" value="empty_file5.mzid"/>
                    <element name="fr_three_set2_spectra" value="empty_file6.mzid"/>
                </collection>
            </param>
            <output_collection name="batched_fractions_tab" type="list:list">
                <element name="set1_batch0">
                    <element name="inputfn0" ftype="tabular" file="empty_file1.mzid"/>
                    <element name="inputfn1" ftype="tabular" file="empty_file2.mzid"/>
                    <element name="inputfn2" ftype="tabular" file="empty_file3.mzid"/>
                </element>
                <element name="set2_batch1">
                    <element name="inputfn0" ftype="tabular" file="empty_file4.mzid"/>
                    <element name="inputfn1" ftype="tabular" file="empty_file5.mzid"/>
                    <element name="inputfn2" ftype="tabular" file="empty_file6.mzid"/>
                </element>
            </output_collection>
        </test>
        <test>
            <param name="batchsize" value="2"/>
            <repeat name="poolids">
                <param name="pool_identifier" value="set1"/>
            </repeat>
            <repeat name="poolids">
                <param name="pool_identifier" value="set2"/>
            </repeat>
            <param name="filetype" value="percout" />
            <param name="listtobatch">
                <collection type="list">
                    <element name="fr_one_set1_spectra" value="empty_file1.mzid"/>
                    <element name="fr_two_set1_spectra" value="empty_file2.mzid"/>
                    <element name="fr_three_set1_spectra" value="empty_file3.mzid"/>
                    <element name="fr_one_set2_spectra" value="empty_file4.mzid"/>
                    <element name="fr_two_set2_spectra" value="empty_file5.mzid"/>
                    <element name="fr_three_set2_spectra" value="empty_file6.mzid"/>
                </collection>
            </param>
            <output_collection name="batched_fractions_perco" type="list:list">
                <element name="set1_batch0">
                    <element name="inputfn0" ftype="percout" file="empty_file1.mzid"/>
                    <element name="inputfn1" ftype="percout" file="empty_file2.mzid"/>
                </element>
                <element name="set1_batch1">
                    <element name="inputfn0" ftype="percout" file="empty_file3.mzid"/>
                </element>
                <element name="set2_batch2">
                    <element name="inputfn0" ftype="percout" file="empty_file4.mzid"/>
                    <element name="inputfn1" ftype="percout" file="empty_file5.mzid"/>
                </element>
                <element name="set2_batch3">
                    <element name="inputfn0" ftype="percout" file="empty_file6.mzid"/>
                </element>
            </output_collection>
        </test>
    </tests>
    <help>
    Creates nested collection containing from a list containing datasets in batches of
    a specific size. Useful when e.g. creating inputs to sqt2pin, msgfplus2pin or tandem2pin.
    Use this feature when you want to batch a specific amount of datasets, or datasets with
    a common file name pattern, or both.
    The tool generates a collection containing batches which contain multiple batches
    of input files. The pools are specified as a pattern in the input
    files which makes sure the batching isn't continued through very different input sets
    if that is a problem. Instead, for each filename-pattern it generates batches of the
    specified size.
    </help>
</tool>
author	galaxyp
date	Sat, 08 Apr 2017 08:23:12 -0400
parents	7a0951d0e13e
children	154147805a33