view plasmidfinder.xml @ 0:2b6e795b22a9 draft

planemo upload for repository https://github.com/mesocentre-clermont-auvergne/galaxy-tools/tree/master/tools/plasmidfinder commit 32b1446d0da698b945d7f8b5df6d251b9989d3b1
author iuc
date Mon, 19 Sep 2022 08:47:42 +0000
parents
children eccc7495c3d9
line wrap: on
line source

<tool id="plasmidfinder" name="PlasmidFinder" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
    <description>
        Plasmid identification in bacteria.
    </description>
    <macros>
        <import>macro.xml</import>
    </macros>
    <expand macro='xrefs'/>
    <expand macro="requirements" />
    <command detect_errors="aggressive"><![CDATA[
        mkdir output_dir &&
        mkdir temp_dir &&
        plasmidfinder.py
        -i '$input.input_file'
        -p '$input.database_name.fields.path'
        -l '$options.min_cov'
        -t '$options.threshold'
        #if $input.input_file.ext == 'fasta' or $input.input_file.ext == 'fasta.gz'
            -mp blastn
        #else if $input.input_file.ext == 'fastqsanger.gz' or $input.input_file.ext == 'fastqsanger'
            -mp kma
        #end if
        -x
        -o output_dir
        -tmp temp_dir
        #*======================================
                            LOG file
        ======================================*#
        | tee '$log_file'
        ]]>
    </command>
    <inputs>
        <section name="input" title="Input parameters" expanded="true">
            <param name="input_file" type="data" format="fasta,fastq,fasta.gz,fastq.gz" label="Choose a fasta or fastq file" help="File to be analyzed"/>
            <param name="database_name" type="select" label="PlasmidFinder database">
                <options from_data_table="plasmidfinder_db">
                    <validator message="No PlasmidFinder database is available" type="no_options"/>
                </options>
            </param>
        </section>
        <section name="options" title="Options">
            <param name="min_cov" type="float" min="0" max="1" value="0.6" label="Minimal coverage" help="Choose a minimum coverage value (default: 0.6)"/>
            <param name="threshold" type="float" min="0" max="1" value="0.95" label="Minimal identity" help="Choose a minimum identity value (default: 0.95)"/>
        </section>
        <section name="output_files" title="Output file selection">
            <param name="output_selection" type="select" display="checkboxes" multiple="true" label="Output files selection">
              <option value="data_json">JSON file result</option>
              <option value="hit_fasta" selected="true">Hits in genome</option>
              <option value="plasmid_fasta" selected="true">Plasmid hits</option>
              <option value="result_tsv" selected="true">Plasmid results</option>
              <option value="result_txt" selected="true">Raw results</option>
              <option value="logfile">Log file</option>
            </param>
        </section>
    </inputs>
    <outputs>
        <data name="json_file" format="json" from_work_dir="output_dir/data.json" label="${tool.name} on ${on_string}: data.json">
            <filter>  "data_json" in output_files['output_selection'] </filter>
        </data>
        <data name="hit_file" format="fasta" from_work_dir="output_dir/Hit_in_genome_seq.fsa" label="${tool.name} on ${on_string}: hit_in_genome.fasta">
            <filter>  "hit_fasta" in output_files['output_selection'] </filter>
        </data>
        <data name="plasmid_file" format="fasta" from_work_dir="output_dir/Plasmid_seqs.fsa" label="${tool.name} on ${on_string}: plasmid.fasta">
            <filter> "plasmid_fasta" in output_files['output_selection'] </filter>
        </data>
        <data name="result_file" format="tabular" from_work_dir="output_dir/results_tab.tsv" label="${tool.name} on ${on_string}: results.tsv">
            <filter>  "result_tsv" in output_files['output_selection'] </filter>
        </data>
        <data name="raw_file" format="txt" from_work_dir="output_dir/results.txt" label="${tool.name} on ${on_string}: raw_result.txt">
            <filter>  "result_txt" in output_files['output_selection'] </filter>
        </data>
        <data name="log_file" format="txt" from_work_dir="output_dir" label="${tool.name} on ${on_string}: log file">
            <filter>  "logfile" in output_files['output_selection'] </filter>
        </data>
    </outputs>
    <tests>
        <!--test_1 with default value and all output files for contigs -->
        <test expect_num_outputs="6">
              <section name="input">
                  <param name="input_file" value="contigs.fasta"/>
                  <param name="input_type" value="genome"/>
                  <param name="database_name" value="test-plasmindfinder-db"/>
              </section>
              <section name="output_files">
                  <param name="output_selection" value="data_json,hit_fasta,plasmid_fasta,result_tsv,result_txt,logfile"/>
              </section>
              <output name="json_file" value="test_1/data_test1.json" ftype="json" compare="sim_size"/>
              <output name="hit_file" value="test_1/Hit_in_genome_seq_test1.fsa" ftype="fasta"/>
              <output name="plasmid_file" value="test_1/Plasmid_seqs_test1.fsa" ftype="fasta"/>
              <output name="result_file" value="test_1/results_tab_test1.tsv"/>
              <output name="raw_file" value="test_1/results_test1.txt" lines_diff="2"/>
              <output name="log_file" value="test_1/logfile_test1.log" lines_diff="7"/>
        </test>
        <!--test_2 with default value and for fastq file -->
        <test expect_num_outputs="4">
              <section name="input">
                  <param name="input_file" value="data.fastq.gz"/>
                  <param name="input_type" value="raw"/>
                  <param name="database_name" value="test-plasmindfinder-db"/>
              </section>
              <section name="output_files">
                  <param name="output_selection" value="data_json,result_tsv,result_txt,logfile"/>
              </section>
              <output name="json_file" value="test_2/data_test2.json" ftype="json" compare="sim_size"/>
              <output name="result_file" value="test_2/results_tab_test2.tsv"/>
              <output name="raw_file" value="test_2/results_test2.txt" lines_diff="2"/>
              <output name="log_file" value="test_2/logfile_test2.log" lines_diff="7"/>
        </test>
        <!--test_3 with default value and for fastq file -->
        <test expect_num_outputs="3">
              <section name="input">
                  <param name="input_file" value="contigs.fasta"/>
                  <param name="input_type" value="genome"/>
                  <param name="database_name" value="test-plasmindfinder-db"/>
              </section>
              <section name="options">
                  <param name="min_cov" value="0.2" />
                  <param name="threshold" value="0.6"/>
              </section>
              <section name="output_files">
                  <param name="output_selection" value="hit_fasta,plasmid_fasta,result_tsv"/>
              </section>
              <output name="hit_file" value="test_3/Hit_in_genome_seq_test3.fsa" ftype="fasta"/>
              <output name="plasmid_file" value="test_3/Plasmid_seqs_test3.fsa" ftype="fasta"/>
              <output name="result_file" value="test_3/results_tab_test3.tsv"/>
        </test>
    </tests>
    <help><![CDATA[
      **What it does**
      PlasmidFinder characterize plasmid sequences into whole genome sequencing.
      It is based on [plasmidfinder database](https://bitbucket.org/genomicepidemiology/plasmidfinder_db/) with hundreds sequences.
      **Input**
      It can analyse raw data using a k-mer approach based on KMA or a blastn for genome assembly.
      A database is need obtained from the plasmidfinder database.
      **Options**
      You can modify the coverage value (% of matching sequence on the total target sequence)
      You can modify the identity threshold (% of shared nucleotide on the match)
      **Output**
      Some output files are availables
      - A fasta file with all available hits detected in the genome
      - A fasta file with all plasmid sequences from the database
      - A summary of the analysis in tabular format
      - A Raw result file in text format
      - A JSON file could be use for other boinformatic analaysis
      - A log file with analysis parameters

    ]]></help>
      <expand macro="citations"/>
</tool>