view plasmidfinder.xml @ 3:7075b7a5441b draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/plasmidfinder commit 464661a59ed12a642d365f485a222764b55048a5
author iuc
date Thu, 02 Mar 2023 14:58:17 +0000
parents eccc7495c3d9
children
line wrap: on
line source

<tool id="plasmidfinder" name="PlasmidFinder" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
    <description>
        Plasmid identification in bacteria.
    </description>
    <macros>
        <import>macro.xml</import>
    </macros>
    <expand macro='xrefs'/>
    <expand macro="requirements" />
    <command detect_errors="aggressive"><![CDATA[
        mkdir output_dir &&
        mkdir temp_dir &&
        plasmidfinder.py
        -i '$input.input_file'
        -p '$input.database_name.fields.path'
        -l '$options.min_cov'
        -t '$options.threshold'
        #if $input.input_file.ext == 'fasta' or $input.input_file.ext == 'fasta.gz'
            -mp blastn
        #elif $input.input_file.ext == 'fastqsanger' or $input.input_file.ext == 'fastqsanger.gz'
            -mp kma
        #end if
        -x
        -o output_dir
        -tmp temp_dir
        #*======================================
                            LOG file
        ======================================*#
        | tee '$log_file'
        ]]>
    </command>
    <inputs>
        <section name="input" title="Input parameters" expanded="true">
            <param name="input_file" type="data" format="fasta,fasta.gz,fastqsanger,fastqsanger.gz" label="Choose a fasta or fastq file" help="File to be analyzed"/>
            <param name="database_name" type="select" label="PlasmidFinder database">
                <options from_data_table="plasmidfinder_database">
                    <validator message="No PlasmidFinder database is available" type="no_options"/>
                </options>
            </param>
        </section>
        <section name="options" title="Options">
            <param name="min_cov" type="float" min="0" max="1" value="0.6" label="Minimal coverage" help=" Fraction of matching sequence on the total target sequence(default: 0.6)"/>
            <param name="threshold" type="float" min="0" max="1" value="0.95" label="Minimal identity" help=" Fraction of shared nucleotide on the match (default: 0.95)"/>
        </section>
        <section name="output_files" title="Output file selection">
            <param name="output_selection" type="select" display="checkboxes" multiple="true" label="Output files selection">
              <option value="data_json">JSON file result</option>
              <option value="hit_fasta" selected="true">Matching sequences in the genome for plasmid</option>
              <option value="plasmid_fasta" selected="true">Plasmid sequences discovered in the genome</option>
              <option value="result_tsv" selected="true">Plasmid match list</option>
              <option value="result_txt" selected="true">Plasmid match list in raw format</option>
              <option value="logfile">Log file</option>
            </param>
        </section>
    </inputs>
    <outputs>
        <data name="json_file" format="json" from_work_dir="output_dir/data.json" label="${tool.name} on ${on_string}: data.json">
            <filter>  "data_json" in output_files['output_selection'] </filter>
        </data>
        <data name="hit_file" format="fasta" from_work_dir="output_dir/Hit_in_genome_seq.fsa" label="${tool.name} on ${on_string}: hit_in_genome.fasta">
            <filter>  "hit_fasta" in output_files['output_selection'] </filter>
        </data>
        <data name="plasmid_file" format="fasta" from_work_dir="output_dir/Plasmid_seqs.fsa" label="${tool.name} on ${on_string}: plasmid.fasta">
            <filter> "plasmid_fasta" in output_files['output_selection'] </filter>
        </data>
        <data name="result_file" format="tabular" from_work_dir="output_dir/results_tab.tsv" label="${tool.name} on ${on_string}: results.tsv">
            <filter>  "result_tsv" in output_files['output_selection'] </filter>
        </data>
        <data name="raw_file" format="txt" from_work_dir="output_dir/results.txt" label="${tool.name} on ${on_string}: raw_result.txt">
            <filter>  "result_txt" in output_files['output_selection'] </filter>
        </data>
        <data name="log_file" format="txt" from_work_dir="output_dir" label="${tool.name} on ${on_string}: log file">
            <filter>  "logfile" in output_files['output_selection'] </filter>
        </data>
    </outputs>
    <tests>
        <!--test_1 with default value and all output files for contigs -->
        <test expect_num_outputs="6">
              <section name="input">
                  <param name="input_file" value="contigs.fasta"/>
                  <param name="input_type" value="genome"/>
                  <param name="database_name" value="plasmidfinder_9002e7282dd0_2022-12-20"/>
              </section>
              <section name="output_files">
                  <param name="output_selection" value="data_json,hit_fasta,plasmid_fasta,result_tsv,result_txt,logfile"/>
              </section>
              <output name="json_file" value="test_1/data_test1.json" ftype="json" compare="sim_size"/>
              <output name="hit_file" value="test_1/Hit_in_genome_seq_test1.fsa" ftype="fasta"/>
              <output name="plasmid_file" value="test_1/Plasmid_seqs_test1.fsa" ftype="fasta"/>
              <output name="result_file" value="test_1/results_tab_test1.tsv"/>
              <output name="raw_file" value="test_1/results_test1.txt" lines_diff="2"/>
              <output name="log_file" value="test_1/logfile_test1.log" lines_diff="7"/>
        </test>
        <!--test_2 with default value and for fastq file -->
        <test expect_num_outputs="4">
              <section name="input">
                  <param name="input_file" value="data.fastq.gz"/>
                  <param name="input_type" value="raw"/>
                  <param name="database_name" value="plasmidfinder_9002e7282dd0_2022-12-20"/>
              </section>
              <section name="output_files">
                  <param name="output_selection" value="data_json,result_tsv,result_txt,logfile"/>
              </section>
              <output name="json_file" value="test_2/data_test2.json" ftype="json" compare="sim_size"/>
              <output name="result_file" value="test_2/results_tab_test2.tsv"/>
              <output name="raw_file" value="test_2/results_test2.txt" lines_diff="2"/>
              <output name="log_file" value="test_2/logfile_test2.log" lines_diff="7"/>
        </test>
        <!--test_3 with default value and for fastq file -->
        <test expect_num_outputs="3">
              <section name="input">
                  <param name="input_file" value="contigs.fasta"/>
                  <param name="input_type" value="genome"/>
                  <param name="database_name" value="plasmidfinder_9002e7282dd0_2022-12-20"/>
              </section>
              <section name="options">
                  <param name="min_cov" value="0.2" />
                  <param name="threshold" value="0.6"/>
              </section>
              <section name="output_files">
                  <param name="output_selection" value="hit_fasta,plasmid_fasta,result_tsv"/>
              </section>
              <output name="hit_file" value="test_3/Hit_in_genome_seq_test3.fsa" ftype="fasta"/>
              <output name="plasmid_file" value="test_3/Plasmid_seqs_test3.fsa" ftype="fasta"/>
              <output name="result_file" value="test_3/results_tab_test3.tsv"/>
        </test>
    </tests>
    <help><![CDATA[
      **What it does**
      PlasmidFinder characterize plasmid sequences into whole genome sequencing.
      It is based on the [plasmidfinder database](https://bitbucket.org/genomicepidemiology/plasmidfinder_db/) with hundreds sequences.
      **Input**
      PlasmidFinder takes raw data (with a k-mer analysisi) as reads or genome assembly (blastn analysis) to search plasmids.
      **Output**
      Some output files are availables
      - A fasta file with all available sequences detected in the genome
      - A fasta file with all plasmid sequences from the database
      - A summary of the analysis in tabular format
      - A Raw result file in text format
      - A JSON file could be use for other boinformatic analysis
      - A log file with analysis parameters

    ]]></help>
      <expand macro="citations"/>
</tool>