view ebi_metagenomics_run_downloader.xml @ 0:e2e9fae080ad draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ebi_tools commit 7a9c88c1c80b80aaa63e55e9d9125b6a4dd695ac
author iuc
date Thu, 01 Dec 2016 15:27:59 -0500
parents
children
line wrap: on
line source

<tool id="ebi_metagenomics_run_downloader" name="Download run data" version="0.1.0">
    <description>from EBI Metagenomics database</description>

    <macros>
        <import>macros.xml</import>
    </macros>

    <expand macro="requirements">
        <requirement type="package" version="7.49.0">curl</requirement>
    </expand>

    <command detect_errors="exit_code"><![CDATA[
        '$__tool_directory__/download_ebi_metagenomics_run_data'
            '$__tool_directory__/ebeye_urllib.py'
            '$run_id'
            '$information.type'
            '$information.to_download'

        #if ( $information.type == 'sequences' and $information.to_download != 'ncRNA-tRNA-FASTA') or ($information.type == 'function' and $information.to_download == 'InterProScan')
            'multiple_chunks'
        #else
            'single_chunk'
        #end if

            'output.dat'
    ]]></command>

    <inputs>
        <param name="run_id" type="text" label="Identifiant of the run in EBI Metagenomics" />

        <conditional name="information">
            <param name="type" type="select" label="Type of information to download">
                <option value="sequences" selected="true">Sequence data</option>
                <option value="qc-stats">Quality control statistics</option>
                <option value="taxonomy">Taxonomic analysis</option>
                <option value="function">Functional analysis</option>
            </param>

            <when value="sequences">
                <param name="to_download" type="select" label="Sequences to download">
                    <option value="ProcessedReads" selected="true">Processed nucleotide reads</option>
                    <option value="ReadsWithPredictedCDS">Processed reads with pCDS</option>
                    <option value="ReadsWithMatches">Processed reads with annotation</option>
                    <option value="ReadsWithoutMatches">Processed reads without annotation</option>
                    <option value="PredictedCDSWithAnnotation">Predicted CDS with annotation</option>
                    <option value="PredictedCDSWithoutAnnotation">Predicted CDS without annotation</option>
                    <option value="PredictedORFWithoutAnnotation">Predicted ORF without annotation</option>
                    <option value="ncRNA-tRNA-FASTA">Predicted tRNAs</option>
                </param>
            </when>

            <when value="qc-stats">
                <param name="to_download" type="select" label="Quality control statistics to download">
                    <option value="summary" selected="true">Number of sequence reads per Quality Control step</option>
                    <option value="stats">Quality control statistics</option>
                    <option value="length">Read length distribution</option>
                    <option value="gc_bin">Read GC distribution</option>
                    <option value="base">Nucleotide position distribution</option>
                </param>
            </when>

            <when value="taxonomy">
                <param name="to_download" type="select" label="Taxonomic analysis to download">
                    <option value="5S-rRNA-FASTA" selected="true">Reads encoding 5S rRNA</option>
                    <option value="16S-rRNA-FASTA">Reads encoding 16S rRNA</option>
                    <option value="23S-rRNA-FASTA">Reads encoding 23S rRNA</option>
                    <option value="OTU-TSV">OTUs, reads and taxonomic assignments (TSV)</option>
                    <option value="NewickPrunedTree">Phylogenetic tree</option>
                </param>
            </when>

            <when value="function">
                <param name="to_download" type="select" label="Functional analysis to download">
                    <option value="InterProScan" selected="true">InterPro matches</option>
                    <option value="GOAnnotations">Complete GO annotation</option>
                    <option value="GOSlimAnnotations">GO slim annotation</option>
                </param>
            </when>
        </conditional>
    </inputs>

    <outputs>
        <data name="output_file_fasta" format="fasta" label="${tool.name} for ${run_id}" from_work_dir="output.dat">
            <filter>( information['type'] == 'sequences' or
                ( information['type'] == 'taxonomy' and ( information['to_download'] == '5S-rRNA-FASTA' or
                information['to_download'] == '16S-rRNA-FASTA' or information['to_download'] == '23S-rRNA-FASTA' )))
            </filter>
        </data>
        <data name="output_file_tsv" format="tsv" label="${tool.name} for ${run_id}" from_work_dir="output.dat">
            <filter>( information['type'] == 'qc-stats' or
                ( information['type'] == 'taxonomy' and information['to_download'] == 'OTU-TSV' ) or
                ( information['type'] == 'function' and information['to_download'] == 'InterProScan/chunks/1' ))
            </filter>
        </data>
        <data name="output_file_csv" format="csv" label="${tool.name} for ${run_id}" from_work_dir="output.dat">
            <filter>( information['type'] == 'function' and ( information['to_download'] == 'GOAnnotations' or information['to_download'] == 'GOSlimAnnotations' ))
            </filter>
        </data>
        <data name="output_file_newick" format="newick" label="${tool.name} for ${run_id}" from_work_dir="output.dat">
            <filter>( information['type'] == 'taxonomy' and information['to_download'] == 'NewickPrunedTree')
            </filter>
        </data>
    </outputs>

    <tests>
        <test>
            <param name="run_id" value="ERR675640" />
            <conditional name="information">
                <param name="type" value="qc-stats" />
                <param name="to_download" value="stats" />
            </conditional>
            <output name="output_file_tsv" file="ERR675640_qc-stats_stats" />
        </test>
        <test>
            <param name="run_id" value="ERR675573" />
            <conditional name="information">
                <param name="type" value="sequences" />
                <param name="to_download" value="PredictedCDSWithAnnotation" />
            </conditional>
            <output name="output_file_fasta">
                <assert_contents>
                    <has_line_matching expression="^>ERR675573.1772-FC81EB0ABXX:7:1101:19215:2564-ATCACGAT-1_1_117_- IPR010690/PF06898/2-34$" />
                    <has_line_matching expression="^>ERR675573.6692248-FC81EB0ABXX:7:1202:20007:191300-ATCACGAT-1_1_127_- IPR004089/PF00015/2-41|G3DSA:1.10.287.950/2-41$" />
                    <has_line_matching expression="^>ERR675573.21754698-FC81EB0ABXX:7:2208:13328:189792-ATCACGAT-1_1_150_- IPR027414/G3DSA:2.70.98.50/7-45$" />
                </assert_contents>
            </output>
        </test>
    </tests>

    <help><![CDATA[
**What it does**

The European Bioinformatics Institute (EMBL-EBI) maintains the world’s most comprehensive range of freely available and up-to-date molecular databases

This tool download data related to a run in EBI Metagenomics database.
    ]]></help>

    <expand macro="citations" />
</tool>