view chewbbaca.xml @ 0:a3b5bee8ae1b draft

Uploaded
author iss
date Fri, 03 May 2019 10:06:09 -0400
parents
children 872ed5ee7b98
line wrap: on
line source

<tool id="chewBBACA" name="chewBBACA" version="2.0">
    <description>BSR-Based Allele Calling Algorithm
    </description>
    <requirements>
        <requirement type="package" version="3.6">Python</requirement>
        <requirement type="package" version="1.15.3">numpy</requirement>
        <requirement type="package" version="0.19.1">scipy</requirement>
        <requirement type="package" version="5.22.2.1">perl</requirement>
        <requirement type="package" version="1.72">biopython</requirement>
        <requirement type="package" version="3.8.1">plotly</requirement>
        <requirement type="package" version="1.8.2">SPARQLWrapper</requirement>
        <requirement type="package" version="0.23.4">pandas</requirement>
        <requirement type="package" version="2.5.0">blast</requirement>
        <requirement type="package" version="2.6.3">prodigal</requirement>
        <requirement type="package" version="2.1">clustalw</requirement>
        <requirement type="package" version="7.313">mafft</requirement>
        <requirement type="package" version="2.0.16">chewbbaca</requirement>
    </requirements>
    <!-- basic error handling -->
    <stdio>
        <!-- Assume anything other than zero is an error -->
        <exit_code range="1:" level="fatal" description="Tool exception" />
        <exit_code range=":-1" level="fatal" description="Tool exception" />
    </stdio>
    <command>
        perl 
        $__tool_directory__/chewBBACA.pl $selectFunction.myFunctions
          #if str( $selectFunction.myFunctions ) in ('CreateSchema', 'AlleleCall')
            '${",".join(map(str, $selectFunction.input1))}'
          #elif str( $selectFunction.myFunctions ) in ('SchemaEvaluator', 'TestGenomeQuality', 'ExtractCgMLST', 'RemoveGenes')
            $selectFunction.input1
          #else
            'NULL'
          #end if
          #if str( $selectFunction.myFunctions ) == 'AlleleCall'
                #set $input_names = []
                #for $input in $selectFunction.input1
                    $input_names.append($input.element_identifier)
                #end for
                #set $input_name = ",".join(map(str, $input_names))
            '$input_name'
          #else
            'NULL'
          #end if
          #if str( $selectFunction.myFunctions ) == 'CreateSchema'
            '$schema'
          #elif str( $selectFunction.myFunctions ) == 'AlleleCall'
            '$statistics,$contigsinfo,$alleles,$logginginfo,$repeatedloci'
          #elif str( $selectFunction.myFunctions ) == 'SchemaEvaluator'
            '$schemaplot'
          #elif str( $selectFunction.myFunctions ) == 'TestGenomeQuality'
            '$thresholdplot,$removedgenomes'
          #elif str( $selectFunction.myFunctions ) == 'RemoveGenes'
            '$removedgenes'
          #else
            'NULL'
          #end if

          "chewBBACA.py $selectFunction.myFunctions
          #if str( $selectFunction.myFunctions ) in ('CreateSchema', 'AlleleCall', 'TestGenomeQuality', 'ExtractCgMLST')
            -o output_dir
          #end if
          #if str( $selectFunction.myFunctions ) == 'RemoveGenes'
            -o output_removegenes
          #end if
          #if str( $selectFunction.myFunctions ) in ('CreateSchema', 'AlleleCall')
            -i input_dir
            --cpu \${GALAXY_SLOTS:-4}
          #end if
          #if str( $selectFunction.myFunctions ) in ('SchemaEvaluator', 'TestGenomeQuality', 'ExtractCgMLST', 'RemoveGenes')
            -i $selectFunction.input1
          #end if
          #if str( $selectFunction.myFunctions ) in ('CreateSchema', 'AlleleCall')
            #if $selectFunction.bsr
              --bsr $selectFunction.bsr
            #end if

            #if $selectFunction.ptfmode.ptf_select=="system"
              --ptf ${ filter( lambda x: str( x[0] ) == str( $selectFunction.ptfmode.ptf_system ), $__app__.tool_data_tables[ 'chewbbaca_ptfs' ].get_fields() )[0][-1] }
            #elif $selectFunction.ptfmode.ptf_select=="user"
              --ptf $selectFunction.ptfmode.ptf_user
            #end if
          #end if
          #if str( $selectFunction.myFunctions ) == 'CreateSchema'
            #if $selectFunction.minBpLocus
              -l $selectFunction.minBpLocus
            #end if
          #end if
          #if str( $selectFunction.myFunctions ) == 'AlleleCall'
            #if $selectFunction.mode.schema_select=="system"
              -g ${ filter( lambda x: str( x[0] ) == str( $selectFunction.mode.reference ), $__app__.tool_data_tables[ 'chewbbaca_schemas' ].get_fields() )[0][-1] }
            #else
              -g $selectFunction.mode.genes
            #end if
          #end if
          #if str( $selectFunction.myFunctions ) == 'SchemaEvaluator'
            --cpu \${GALAXY_SLOTS:-4}
            #if $selectFunction.conserved
              -p
            #end if
            -l output_rms/SchemaEvaluator.html
            -ta $selectFunction.ncbiTA
            -t $selectFunction.threshold
            #if $selectFunction.title
              --title '$selectFunction.title'
            #end if
            -s $selectFunction.numBoxplots
            #if $selectFunction.light
              --light
            #end if
          #end if
          #if str( $selectFunction.myFunctions ) == 'TestGenomeQuality'
            -n $selectFunction.maxNumIterations
            -t $selectFunction.maxThreshold
            -s $selectFunction.stepThreshold
          #end if
          #if str( $selectFunction.myFunctions ) == 'ExtractCgMLST'
            #if $selectFunction.genes
              -r $selectFunction.genes
            #end if
            #if $selectFunction.genomes
              -g $selectFunction.genomes
            #end if
            #if $selectFunction.maxPresence
              -p $selectFunction.maxPresence
            #end if
          #end if
          #if str( $selectFunction.myFunctions ) == 'RemoveGenes'
            -g $selectFunction.genes
          #end if
          "
    </command>

    <inputs>
        <conditional name="selectFunction">
            <param name="myFunctions" type="select" label="Select function">
                <option value="CreateSchema">CreateSchema: Create a gene by gene schema based on genomes</option>
                <option value="AlleleCall" selected="true">AlleleCall: Perform allele call for target genomes</option>
                <option value="SchemaEvaluator">SchemaEvaluator: Tool that builds an html output to better navigate/visualize your schema</option>
                <option value="TestGenomeQuality">TestGenomeQuality: Analyze your allele call output to refine schemas</option>
                <option value="ExtractCgMLST">ExtractCgMLST: Select a subset of loci without missing data (to be used as PHYLOViZ input)</option>
                <option value="RemoveGenes">RemoveGenes: Remove a provided list of loci from your allele call output</option>
            </param>
            <when value="CreateSchema">
                <param name="input1" format="fasta" type="data" multiple="true" label="Selection of genome files (fasta)" />
                <param name="bsr" type="text" value="0.6" optional="true" label="minimum BSR score" />
                <conditional name="ptfmode">
                  <param name="ptf_select" type="select" display="radio" label="Which Prodigal training file would you like to use?">
                    <option value="system" selected="True">System training file</option>
                    <option value="user">Own training file</option>
                  </param>
                  <when value="system">
                    <param name="ptf_system" type="select" label="Choose Prodigal training file">
                      <options from_data_table="chewbbaca_ptfs" />
                    </param>
                  </when>
                  <when value="user">
                    <param name="ptf_user" format="binary" type="data" optional="true" multiple="false" label="Prodigal Training File" />
                  </when>
                </conditional>
                <param name="minBpLocus" type="integer" value="200" optional="true" label="minimum bp locus lenght" help="Integer" />
            </when>
            <when value="AlleleCall">
                <param name="input1" format="fasta" type="data" multiple="true" label="Selection of genome files (fasta)" />
                <conditional name="mode">
                  <param name="schema_select" type="select" display="radio" label="Which schema would you like to use as a reference?">
                    <option value="system" selected="True">System reference</option>
                    <option value="user">Own reference</option>
                  </param>
                  <when value="system">
                    <param name="reference" type="select" label="Choose reference">
                      <options from_data_table="chewbbaca_schemas" />
                    </param>
                  </when>
                  <when value="user">
                    <param name="genes" format="txt" type="data" label="File with list of genes (fasta)" />
                  </when>
                </conditional>
                <param name="bsr" type="text" value="0.6" optional="true" label="minimum BSR score" />
                <conditional name="ptfmode">
                  <param name="ptf_select" type="select" display="radio" label="Which Prodigal training file would you like to use?">
                    <option value="system">System training file</option>
                    <option value="user">Own training file</option>
                    <option value="noptf" selected="True">No training file</option>
                  </param>
                  <when value="system">
                    <param name="ptf_system" type="select" label="Choose Prodigal training file">
                      <options from_data_table="chewbbaca_ptfs" />
                    </param>
                  </when>
                  <when value="user">
                    <param name="ptf_user" format="binary" type="data" optional="true" multiple="false" label="Prodigal Training File" />
                  </when>
                  <when value="noptf" />
                </conditional>
                <!--<param name="forceContinue" type="boolean" truevalue="true" falsevalue="false" checked="False" label="force continue" />-->
                <!--<param name="forceReset" type="boolean" truevalue="true" falsevalue="false" checked="False" label="force reset" />-->
                <!--<param name="jsonFile" type="boolean" truevalue="true" falsevalue="false" checked="False" label="report in json file" />-->
            </when>
            <when value="SchemaEvaluator">
                <param name="input1" format="txt" type="data" label="File with list of genes (fasta)" />
                <param name="conserved" type="boolean" truevalue="true" falsevalue="false" checked="False" label="One bad allele still makes gene conserved" />
                <param name="ncbiTA" type="integer" value="11" optional="true" label="ncbi translation table" help="Integer" />
                <param name="threshold" type="float" value="0.05" optional="true" label="Threshold" />
                <param name="title" type="text" value="My Analyzed wg/cg MLST Schema - Rate My Schema" optional="true" label="title on the html plot" />
                <param name="numBoxplots" type="integer" value="500" optional="true" label="number of boxplots per page (more than 500 can make the page very slow)" help="Integer" />
                <param name="light" type="boolean" truevalue="true" falsevalue="false" checked="False" label="skip clustal and mafft run" />
            </when>
            <when value="TestGenomeQuality">
                <param name="input1" format="tsv" type="data" label="raw allele call matrix file" />
                <param name="maxNumIterations" type="integer" value="12" label="maximum number of iterations" help="Each iteration removes a set of genomes over the defined threshold (-t) and recalculates all loci presence percentages" />
                <param name="maxThreshold" type="integer" value="200" label="maximum threshold of bad calls above 95 percent" help="This threshold represents the maximum number of missing loci allowed, for each genome independently, before removing it (genome)" />
                <param name="stepThreshold" type="integer" value="5" label="step between each threshold analysis (suggested 5)" help="Integer" />
            </when>
            <when value="ExtractCgMLST">
                <param name="input1" format="tsv" type="data" label="input file to clean" />
                <param name="genes" format="txt" type="data" optional="true" label="list of genes to remove, one per line" help="e.g. the list of gene detected by ParalogPrunning.py" />
                <param name="genomes" format="txt" type="data" optional="true" label="list of genomes to remove, one per line" help="e.g. list of genomes to be removed selected based on testGenomeQuality results" />
                <param name="maxPresence" type="float" value="1.0" optional="true" label="maximum presence (e.g 0.95)" >
		            <validator type="in_range" min="0" max="1" />
		        </param>
            </when>
            <when value="RemoveGenes">
                <param name="input1" format="tsv" type="data" label="main matrix file from which to remove" />
                <param name="genes" format="txt" type="data" label="list of genes to remove" />
            </when>
        </conditional>
    </inputs>

    <!-- define outputs -->
    <outputs>
        <data format="txt" name="schema" label="${tool.name}:CreateSchema on ${on_string}" >
            <filter>selectFunction['myFunctions'] == "CreateSchema"</filter>
        </data>
        <data format="tsv" name="statistics" label="${tool.name}:AlleleCall on ${on_string}: Statistics" >
            <filter>selectFunction['myFunctions'] == "AlleleCall"</filter>
        </data>
        <data format="tsv" name="contigsinfo" label="${tool.name}:AlleleCall on ${on_string}: Contigs Info" >
            <filter>selectFunction['myFunctions'] == "AlleleCall"</filter>
        </data>
        <data format="tsv" name="alleles" label="${tool.name}:AlleleCall on ${on_string}: Alleles" >
            <filter>selectFunction['myFunctions'] == "AlleleCall"</filter>
        </data>
        <data format="txt" name="logginginfo" label="${tool.name}:AlleleCall on ${on_string}: Logging info" >
            <filter>selectFunction['myFunctions'] == "AlleleCall"</filter>
        </data>
        <data format="txt" name="repeatedloci" label="${tool.name}:AlleleCall on ${on_string}: Repeated Loci" >
            <filter>selectFunction['myFunctions'] == "AlleleCall"</filter>
        </data>
        <data format="tar" name="schemaplot" label="${tool.name}:SchemaEvaluator on ${on_string}" >
            <filter>selectFunction['myFunctions'] == "SchemaEvaluator"</filter>
        </data>
        <data format="html" name="thresholdplot" from_work_dir="output_dir/GenomeQualityPlot.html" label="${tool.name}:TestGenomeQuality on ${on_string}: Plot" >
            <filter>selectFunction['myFunctions'] == "TestGenomeQuality"</filter>
        </data>
        <data format="tsv" name="removedgenomes" from_work_dir="output_dir/removedGenomes.txt" label="${tool.name}:TestGenomeQuality on ${on_string}: Removed genomes" >
            <filter>selectFunction['myFunctions'] == "TestGenomeQuality"</filter>
        </data>
        <data format="tsv" name="cgmlst" from_work_dir="output_dir/cgMLST.tsv" label="${tool.name}:ExtractCgMLST on ${on_string}: cgMLST" >
            <filter>selectFunction['myFunctions'] == "ExtractCgMLST"</filter>
        </data>
        <data format="txt" name="cgmlstschema" from_work_dir="output_dir/cgMLSTschema.txt" label="${tool.name}:ExtractCgMLST on ${on_string}: cgMLSTschema" >
            <filter>selectFunction['myFunctions'] == "ExtractCgMLST"</filter>
        </data>
        <data format="tsv" name="removedgenes" from_work_dir="output_removegenes.tsv" label="${tool.name}:RemoveGenes on ${on_string}" >
            <filter>selectFunction['myFunctions'] == "RemoveGenes"</filter>
        </data>
    </outputs>
    <tests>
        <test>
            <param name="selectFunction['myFunctions']" value="AlleleCall" />
            <param name="input1" value="a_contigs.fasta" ftype="fasta" />
            <param name="selectFunction.mode['schema_select']" value="system" />
            <param name="selectFunction.mode['reference']" value="schema_pubMLST" />
            <param name="selectFunction.mode['ptf_select']" value="noptf" />
            <output name="statistics" >
                <assert_contents>
                    <has_text text="a_contigs.fasta" />
                </assert_contents>
			</output>
        </test>
    </tests>
    <help>
**chewBBACA** stands for "BSR-Based Allele Calling Algorithm". The "chew" part could be thought of as "Comprehensive and  Highly Efficient Workflow" 
but at this point still it needs a bit of work to make that claim so we just add "chew" to add extra coolness to the software name.
This tool is in beta test.

The development of the tools have been supported by INNUENDO project (https://www.innuendoweb.org) co-funded by the European Food Safety Authority (EFSA), grant agreement GP/EFSA/AFSCO/2015/01/CT2
("New approaches in identifying and characterizing microbial and chemical hazards") and by the ONEIDA project (LISBOA-01-0145-FEDER-016417) co-funded by FEEI - “Fundos Europeus Estruturais e de Investimento” 
from “Programa Operacional Regional Lisboa 2020” and by national funds from FCT - “Fundação para a Ciência e a Tecnologia” and BacGenTrack (TUBITAK/0004/2014) 
[FCT/ Scientific and Technological Research Council of Turkey (Türkiye Bilimsel ve Teknolojik Araşrrma Kurumu, TÜBITAK)].
    </help>
    <citations>
      <citation type="bibtex">@ARTICLE{andrews_s,
            author = {Rossi, M and Silva, M and Ribeiro-Gonçalves, B and Silva, DN and Machado, MP and Oleastro, M and Borges, V and Isidro, J and Gomes, JP and Vieira, L and Barker, DOR and Llarena, AK and Halkilahti,
            J and Jaakkonen, A and Palma, F and Culebro, A and Kivistö, R and Hänninen, ML and Laukkanen-Ninios, R and Fredriksson-Ahomaa, M and Salmenlinna, S and Hakkinen, M and Garaizer, J and Bikandi, J and Hilbert,
            F and Taboada, EN and Carriço, JA},
            keywords = {bioinformatics, ngs, mlst},
            title = {{INNUENDO whole and core genome MLST databases and schemas for foodborne pathogens}},
            url = {https://github.com/TheInnuendoProject/chewBBACA_schemas}
        }</citation>
      <citation type="doi">10.1099/mgen.0.000166</citation>
      <citation type="doi">10.1371/journal.pgen.1007261</citation>
    </citations>
</tool>