view phage_term_virome.xml @ 0:69e8f12c8b31 draft

"planemo upload"
author bioit_sciensano
date Fri, 11 Mar 2022 15:06:20 +0000
parents
children ee73cdf35532
line wrap: on
line source

<tool id="phage_term_virome" name="Phage Term Virome" version="2.0.0"  python_template_version="3.5" profile="19.05">
    <description> Determine phage genome termini and genome packaging mode on single phage or multiple contigs at once. </description>
    <requirements>
        <requirement type="package" version="1.0">backports</requirement>
	<requirement type="package" version="1.6.4">backports.functools_lru_cache</requirement>
	<requirement type="package" version="0.5">backports_abc</requirement>
        <requirement type="package" version="0.11.0">cycler</requirement>
        <requirement type="package" version="1.2.2">libwebp-base</requirement>
	<requirement type="package" version="1.9.3">lz4-c</requirement>
        <requirement type="package" version="3.5.1">matplotlib-base</requirement>
        <requirement type="package" version="3.5.1">matplotlib</requirement>
	<requirement type="package" version="1.21.5">numpy</requirement>
        <requirement type="package" version="3.0.0">openssl</requirement>
        <requirement type="package" version="1.3.5">pandas</requirement>
        <requirement type="package" version="0.5.2">patsy</requirement>
        <requirement type="package" version="9.0.1">pillow</requirement>
        <requirement type="package" version="3.0.7">pyparsing</requirement>
        <requirement type="package" version="2.8.2">python-dateutil</requirement>
        <requirement type="package" version="3.7">python_abi</requirement>
        <requirement type="package" version="2021.3">pytz</requirement>
        <requirement type="package" version="8.1">readline</requirement>
        <requirement type="package" version="3.5.68">reportlab</requirement>
        <requirement type="package" version="1.0.2">scikit-learn</requirement>
        <requirement type="package" version="1.7.3">scipy</requirement>
        <requirement type="package" version="59.8.0">setuptools</requirement>
        <requirement type="package" version="3.6.1">singledispatch</requirement>
        <requirement type="package" version="0.13.2">statsmodels</requirement>
        <requirement type="package" version="8.6.12">tk</requirement>
        <requirement type="package" version="6.1">tornado</requirement>
    </requirements>
    <command detect_errors="exit_code"><![CDATA[
      python3 '$__tool_directory__/PhageTerm.py' -c \${GALAXY_SLOTS} -r $reference --report_title $output_prefix
  
      ## Manage single-paired data switch
      #if $single_paired_inputs.input_selector == "single"
            -f "$single_paired_inputs.fastq"
      #else
            #if $single_paired_inputs.input_selector == "paired"
              -f "$single_paired_inputs.fastq_fw" -p "$single_paired_inputs.fastq_rv"
            #else
              -f "$single_paired_inputs.fastq_inputs.forward" -p "$single_paired_inputs.fastq_inputs.reverse"
            #end if
      #end if

      #if $seed
      -s $seed
      #end if

      #if $surrounding
       -d $surrounding
       #end if

      #if $host
      -g $host
      #end if

      #if $coverage
      -m $coverage
      #end if

      #if $min_phage_lg
      -l $min_phage_lg
      #end if
    ]]></command>
    <inputs>
        <conditional name="single_paired_inputs">
          <param name="input_selector" type="select" label="Single or Paired-end reads" help="Select between paired and single end data">
            <option value="paired">Paired-end</option>
            <option value="single">Single-end</option>
            <option value="paired_collection">Paired-end collection</option>
          </param>
          <when value="paired">
            <param name="fastq_fw" type="data" format="fast" label="Select first set of reads" help="Specify dataset with forward reads"/>
            <param name="fastq_rv" type="data" format="fastq" label="Select second set of reads" help="Specify dataset with reverse reads"/>
          </when>
          <when value="single">
            <param name="fastq" type="data" format="fastq" label="Select fastq dataset" help="Specify dataset with single reads"/>
          </when>
          <when value="paired_collection">
            <param name="fastq_inputs" format="fastq" type="data_collection" collection_type="paired" label="Select paired-end dataset collection" help="Specify paired dataset collection containing paired reads"/>
          </when>
        </conditional>
        <param name="fastq" format="fastq" type="data" label="Phage reads (FASTQ)" help="Phage sequencing reads file in fastq format from randomly fragmented NGS data (e.g. Illumina TruSeq) but NOT Nextera."/>
        <param name="reference" format="fasta" type="data" label="Phage genome (FASTA)" help="Phage reference genome file in fasta format, Multi-fasta NOT accepted."/>
        <param name="output_prefix" type="text" size="20" optional="true" label="Output files prefix" value="Phage" help="Prefix for the output file name."/>
        <param name="host" format="fasta" type="data" optional="true" label="Bacterial host genome (FASTA)" help="Host reference genome file in fasta format, Multi-fasta NOT accepted. Warning : increases process time."/>
        <param name="seed" type="integer" optional="true" value="20" label="Seed length" help="Seed length value for alignment of reads."/>
        <param name="surrounding" type="integer" optional="true" value="20" label="Peak surrounding region" help="Length of the surrounding region defining close peaks to be merged in the analysis process."/>
        <param name="coverage" type="integer" optional="true" value="250" label="Limit coverage" help="Phage upper limit coverage."/>
        <param name="min_phage_lg" type="integer" optional="true" value="500" label="Limit phage lenght" help="Minimum phage fasta length."/>
    </inputs>
    <outputs>
        <data name="Statistics" format="csv" label="${output_prefix}_statistics.csv" from_work_dir="*_statistics.csv" />
        <data name="CohesiveSequence" format="fasta" label="${output_prefix}_cohesive-sequence.fasta"  from_work_dir="*_cohesive-sequence.fasta" />
        <data name="PhageSequence" format="fasta" label="${output_prefix}_sequence.fasta" from_work_dir="*_sequence.fasta" />
        <data name="PDFReport" format="pdf" label="${output_prefix}_PhageTerm_report.pdf" from_work_dir="*_PhageTerm_report.pdf" />
        <data name="DirectTermRepeats" format="fasta" label="${output_prefix}_direct-term-repeats.fasta" from_work_dir="*_direct-term-repeats.fasta" />
    </outputs>
    <tests>
        <test>
            <param name="output_prefix" value="Test-cohesive-5" />
            <param name="input_selector" value="single" />
            <param name="fastq" value="COS-5.fastq" />
            <param name="reference" value="COS-5.fasta" />
            <output name="Statistics" ftype="csv">
               <assert_contents>
                  <has_size value="150731" delta="1000" />
               </assert_contents> 
            </output>
            <output name="CohesiveSequence" ftype="fasta">
               <assert_contents>
                  <has_size value="108" delta="50" />
               </assert_contents>
            </output>
            <output name="PhageSequence" ftype="fasta">
               <assert_contents>
                  <has_size value="3089" delta="100" />
               </assert_contents>
            </output>
            <output name="PDFReport" ftype="pdf">
               <assert_contents>
                  <has_size value="396227" delta="5000" />
               </assert_contents>
            </output>
        </test>
        <test>
            <param name="output_prefix" value="Test-cohesive-3" />
            <param name="input_selector" value="single" />
            <param name="fastq" value="COS-3.fastq" />
            <param name="reference" value="COS-3.fasta" />
            <output name="Statistics" ftype="csv">
               <assert_contents>
                 <has_size value="154427" delta="1000" />
               </assert_contents>
            </output>
            <output name="CohesiveSequence" ftype="fasta">
               <assert_contents>
                  <has_size value="106" delta="50" />
               </assert_contents>
            </output>
            <output name="PhageSequence" ftype="fasta">
               <assert_contents>
                  <has_size value="3091" delta="100" />
               </assert_contents>
            </output>
            <output name="PDFReport" ftype="pdf">
               <assert_contents>
                  <has_size value="400459" delta="5000" />
               </assert_contents>
            </output>
        </test>
        <test>
            <param name="output_prefix" value="Test-short-direct-terminal-repeats" />
            <param name="input_selector" value="single" />
            <param name="fastq" value="DTR-short.fastq" />
            <param name="reference" value="DTR-short.fasta" />
	    <output name="Statistics" ftype="csv">
               <assert_contents>
                 <has_size value="165289" delta="1000" />
               </assert_contents>
            </output>
            <output name="DirectTermRepeats" ftype="fasta">
               <assert_contents>
                  <has_size value="438" delta="50" />
               </assert_contents>
            </output>
            <output name="PhageSequence" ftype="fasta">
               <assert_contents>
                  <has_size value="3402" delta="100" />
               </assert_contents>
            </output>
            <output name="PDFReport" ftype="pdf">
               <assert_contents>
                  <has_size value="289115" delta="5000" />
               </assert_contents>
            </output>
        </test>
        <test>
            <param name="output_prefix" value="Test-long-direct-terminal-repeats" />
            <param name="input_selector" value="single" />
            <param name="fastq" value="DTR-long.fastq" />
            <param name="reference" value="DTR-long.fasta" />
            <output name="Statistics" ftype="csv">
               <assert_contents>
                 <has_size value="1800253" delta="10000" />
               </assert_contents>
            </output>
            <output name="DirectTermRepeats" ftype="fasta">
               <assert_contents>
                  <has_size value="8615" delta="100" />
               </assert_contents>
            </output>
            <output name="PhageSequence" ftype="fasta">
               <assert_contents>
                  <has_size value="16759" delta="100" />
               </assert_contents>
            </output>
            <output name="PDFReport" ftype="pdf">
               <assert_contents>
                  <has_size value="360967" delta="5000" />
               </assert_contents>
            </output>
        </test>
        <test>
            <param name="output_prefix" value="Test-Headfull" />
            <param name="input_selector" value="single" />
            <param name="fastq" value="Headful.fastq" />
            <param name="reference" value="Headful.fasta" />
            <param name="surrounding" value="0" />
            <output name="Statistics" ftype="csv">
               <assert_contents>
                 <has_size value="148887" delta="1000" />
               </assert_contents>
            </output>
            <output name="PhageSequence" ftype="fasta">
               <assert_contents>
                  <has_size value="3078" delta="100" />
               </assert_contents>
            </output>
            <output name="PDFReport" ftype="pdf">
               <assert_contents>
                  <has_size value="277156" delta="5000" />
               </assert_contents>
            </output>
        </test>
        <!-- Mu like test absent for two reasons : To hight running time (3  minutes with 10 cores) and crash 
             in case of Mu paired reads because the tool's code try to sum two ranges. Error in 'testMu' : _modules/common_readsCoverage_processing.py", line 385.
         -->
    </tests>
    <help><![CDATA[

       **What it does**
     
	PhageTermVirome software is a tool to determine phage genome termini and genome packaging mode on single phage or multiple contigs at once.
	The software uses phage and virome sequencing reads obtained from libraries prepared with DNA fragmented randomly (e.g. Covaris fragmentation,
	and library preparation using Illumina TruSeq). Phage or virome sequencing reads (fastq files) are aligned to the assembled phage genome or assembled
	virome (fasta or multifasta files) in order to  calculate two types of coverage values (whole genome coverage and the Starting Position Coverage (SPC)). The starting position coverage is used to perform a detailed termini and packaging mode analysis. 

	Mu-type phage analysis : can be done if user suspect the phage genome to be Mu-like type (Only for single phage genome analysis, not possible with multifasta file) :
	User can also provide the host (bacterial) genome sequence. The Mu-type phage analysis will take the reads that does not match the phage
	genome and align them on the bacterial genome using the same mapping function. The analysis to identify Mu-like phages is available only when providing a single phage genome (not possible if user provide a multi-fast file with multiple assembled phage contigs).
        
        **Inputs**
        
        - Raw reads file(s) in fastq format : paired-ends or single-ends.
        - Phage genome in fasta format
        - Host genome in fasta format (optional)

        **Outputs**

 	- PDF report
 	
	- Statistical table (csv) 

        - FASTA file(s) re-organized to start at the predicted termini

       **Tool version**
 
       *Version:* 4.0.0
       *Commit SHA:* 5e92822b3f289a329d18914b2183159642abdba4 

    ]]></help>
    <citations>
        <citation type="doi">10.1101/108100</citation>
    </citations>
</tool>