view sequel_wrapper.xml @ 0:58e1eb37fddc draft

Uploaded
author crs4
date Tue, 15 Oct 2013 11:15:28 -0400
parents
children ccadfae70b02
line wrap: on
line source

<tool id="sequel_wrapper" name="SEQuel" version="0.2">
  <description></description>
  <requirements>
    <requirement type="package" version="0.6.2">bwa</requirement>
    <requirement type="package" version="35">blat</requirement>
    <requirement type="package" version="1.0.2">sequel</requirement>
  </requirements>
  <command interpreter="python">
    sequel_wrapper.py
    \${SEQUEL_SITE_OPTIONS:--t 8 -p 8 -u 1}
    --sequel_jar_path=\$SEQUEL_JAR_PATH --read1=$read1 --read2=$read2 --contigs=$contigs
    #if str($bases_length)
      --bases_length=$bases_length
    #end if
    #if str($kmer_size)
      --kmer_size=$kmer_size
    #end if
    #if str($max_positional_error)
      --max_positional_error=$max_positional_error
    #end if
    #if str($min_fraction)
      --min_fraction=$min_fraction
    #end if
    #if str($min_aln_length)
      --min_aln_length=$min_aln_length
    #end if
    #if str($min_avg_coverage)
      --min_avg_coverage=$min_avg_coverage
    #end if
    #if str($discard_kmers)
      --discard_kmers=$discard_kmers
    #end if
    #if str($discard_positional)
      --discard_positional=$discard_positional
    #end if
    #if str($min_aln_score)
      --min_aln_score=$min_aln_score
    #end if
    #if $single_cell_mode
      --single_cell_mode
    #end if
    #if $report_changes
      --report_changes
    #end if
    #if $extend_contig
      --extend_contig
    #end if
    #if $reference_genome
      --reference_genome=$reference_genome
    #end if
    --contigs_refined=$contigs_refined
    --logprep=$logprep
    --logseq=$logseq
    --logfile_prep=$logfile_prep
    --logfile_seq=$logfile_seq
  </command>

  <inputs>
    <param name="read1" type="data" format="fasta,fastq" label="Paired-end reads 1 from sequencing (-r1)" help="FASTA or FASTQ format" />
    <param name="read2" type="data" format="fasta,fastq" label="Paired-end reads 2 from sequencing (-r2)" help="FASTA or FASTQ format" />
    <param name="contigs" type="data" format="fasta,fastq" label="Contigs from assembly (-c)" help="FASTA or FASTQ format" />

    <param name="bases_length" type="integer" value="0" optional="true" label="Preprocessing: do not refine contigs shorter than n bases (-l)" help="Contigs shorter than n bases will appear unchanged in the final output file" />

    <param name="kmer_size" type="integer" value="50" optional="true" label="K-mer size (-k)" help="" />

    <param name="max_positional_error" type="integer" value="25" optional="true" label="Max positional error Delta (-d)" help="" />

    <param name="min_fraction" type="float" value="0.9" optional="true" label="Min fraction of matches in alignment (-f)" help="" />

    <param name="min_aln_length" type="integer" value="" optional="true" label="Min alignment length (-l)" help="bp or fraction of contig. Optional." />

    <param name="min_avg_coverage" type="float" value="20.0" optional="true" label="Min average coverage to incorporate changes (-v)" help="" />

    <param name="discard_kmers" type="integer" value="1" optional="true" label="Discard k-mers observed less than m times (-m)" help="" />

    <param name="discard_positional" type="integer" value="1" optional="true" label="Discard positional k-mers observed less than n times (-n)" help="" />

    <param name="min_aln_score" type="integer" value="1" optional="true" label="Min alignment score (MAPQ) of reads to consider (-q)" help="" />

    <param name="single_cell_mode" type="boolean" optional="true" checked="false" label="Single cell mode, sort partial-contigs by coverage (-s)" />

    <param name="report_changes" type="boolean" optional="true" checked="false" label="Report changes (slow) for all input-contigs (-r)" />

    <param name="extend_contig" type="boolean" optional="true" checked="false" label="Extend contig with flanking regions of alignment (-e)" />

    <param name="reference_genome" type="data" format="fasta,twobit" optional="true" label="Evaluate refinement using reference genome (-g)" help="FASTA or 2bit format" />
  </inputs>

  <outputs>
    <data name="logfile_prep" format="txt" label="${tool.name} on ${on_string}: log (pre-processing)" />
    <data name="logfile_seq" format="txt" label="${tool.name} on ${on_string}: log (SEQuel)" />
    <data name="logprep" format="txt" label="${tool.name} on ${on_string}: log (pre-processing, official)" />
    <data name="logseq" format="txt" label="${tool.name} on ${on_string}: log (SEQuel, official)" />
    <data name="contigs_refined" format="fasta" label="${tool.name} on ${on_string}: refined contigs" />
  </outputs>

  <tests>

  </tests>
  <help>
**What it does**

SEQuel is a tool for correcting errors (i.e., insertions, deletions, and substitutions) in contigs output from assembly. While assemblies of next generation sequencing (NGS) data are accurate, they still contain a substantial number of errors that need to be corrected after the assembly process. The algorithm behind SEQuel makes use of a graph structure called the positional de Bruijn graph, which models k-mers within reads while incorporating their approximate positions into the model.

SEQuel substantially reduces the number of small insertions, deletions and substitutions errors in assemblies of both standard (multi-cell) and single-cell sequencing data. SEQuel was tested mainly on Illumina sequence data, in combination with multiple NGS assemblers, such as Euler-SR, Velvet, SoapDeNovo, ALLPATHS and SPAdes.

**Known issues**

.. class:: warningmark

During the pre-processing stage, a SAM file per contig is created. Due to runtime considerations, these files are kept open simultaneously. The program will crash when the number of contigs in the assembly is too high.

**License and citation**

This Galaxy tool is Copyright © 2013 `CRS4 Srl.`_ and is released under the `MIT license`_.

.. _CRS4 Srl.: http://www.crs4.it/
.. _MIT license: http://opensource.org/licenses/MIT

If you use this tool in Galaxy, please cite |Cuccuru2013|_.

.. |Cuccuru2013| replace:: Cuccuru, G., Orsini, M., Pinna, A., Sbardellati, A., Soranzo, N., Travaglione, A., Uva, P., Zanetti, G., Fotia, G. (2013) Orione, a web-based framework for NGS analysis in microbiology. *Submitted*
.. _Cuccuru2013: http://orione.crs4.it/

This tool uses `SEQuel`_, which is licensed separately. Please cite |Ronen2012|_.

.. _SEQuel: http://bix.ucsd.edu/SEQuel/
.. |Ronen2012| replace:: Ronen R., *et al.* (2012) SEQuel: improving the accuracy of genome assemblies. *Bioinformatics* 28 (12), i188-i196
.. _Ronen2012: http://bioinformatics.oxfordjournals.org/content/28/12/i188
  </help>
</tool>