unicycler: unicycler.xml comparison

comparison unicycler.xml @ 0:e9c1cdb9f9dc draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/unicycler commit 97c19413f9d7e85c013b82ffafa73632c79c8f0b

author	iuc
date	Thu, 06 Jul 2017 17:07:43 -0400
parents
children	f13d0498a199

comparison

equal deleted inserted replaced

--1:000000000000
+:e9c1cdb9f9dc
+<tool id="unicycler" name="Create assemblies with Unicycler" version="0.2.0">
+<requirements>
+<requirement type="package" version="0.3.0b">unicycler</requirement>
+</requirements>
+<command detect_errors="exit_code"><![CDATA[
+## Preparing files
+#if str( $paired_unpaired.fastq_input_selector ) == "paired":
+#if $paired_unpaired.fastq_input1.is_of_type('fastqsanger'):
+ln -s '${paired_unpaired.fastq_input1}' fq1.fastq &&
+#elif $paired_unpaired.fastq_input1.is_of_type('fastqsanger.gz'):
+ln -s '${paired_unpaired.fastq_input1}' fq1.fastq.gz &&
+#end if
+#if $paired_unpaired.fastq_input2.is_of_type('fastqsanger'):
+ln -s '${paired_unpaired.fastq_input2}' fq2.fastq &&
+#elif $paired_unpaired.fastq_input2.is_of_type('fastqsanger.gz'):
+ln -s '${paired_unpaired.fastq_input1}' fq2.fastq.gz &&
+#end if
+#elif str( $paired_unpaired.fastq_input_selector ) == "paired_collection":
+#if $paired_unpaired.fastq_input1.forward.is_of_type('fastqsanger'):
+ln -s '${paired_unpaired.fastq_input1.forward}' fq1.fastq &&
+#elif $paired_unpaired.fastq_input1.forward.is_of_type('fastqsanger.gz'):
+ln -s '${paired_unpaired.fastq_input1.forward}' fq1.fastq.gz &&
+#end if
+#if $paired_unpaired.fastq_input1.reverse.is_of_type('fastqsanger'):
+ln -s '${paired_unpaired.fastq_input1.reverse}' fq2.fastq &&
+#elif $paired_unpaired.fastq_input1.reverse.is_of_type('fastqsanger.gz'):
+ln -s '${paired_unpaired.fastq_input2.reverse}' fq2.fastq.gz &&
+#end if
+#elif str( $paired_unpaired.fastq_input_selector ) == "single":
+#if $paired_unpaired.fastq_input1.is_of_type('fastqsanger'):
+ln -s '${paired_unpaired.fastq_input1}' fq.fastq &&
+#elif $paired_unpaired.fastq_input1.is_of_type('fastqsanger.gz'):
+ln -s '${paired_unpaired.fastq_input1}' fq.fastq.gz &&
+#end if
+#end if
+## Get location for pilon installation
+pilon=`pilon --jar_dir` &&
+#if $long_reads:
+#if $long_reads.is_of_type('fastqsanger'):
+#set lr = "lr.fastq"
+ln -s '${long_reads}' lr.fastq &&
+#elif $long_reads.is_of_type('fastqsanger.gz'):
+#set lr = "lr.fastq.gz"
+ln -s '${long_reads}' lr.fastq.gz &&
+#elif $long_reads.is_of_type('fasta'):
+#set lr = "lr.fasta"
+ln -s '${long_reads}' lr.fasta &&
+#end if
+#end if
+## Running Unicycler
+unicycler -t "\${GALAXY_SLOTS:-4}"
+-o ./
+--verbosity 3
+--pilon_path \$pilon
+#if str( $paired_unpaired.fastq_input_selector ) != "single":
+#if $paired_unpaired.fastq_input1.is_of_type('fastqsanger'):
+-1 fq1.fastq
+#elif $paired_unpaired.fastq_input1.is_of_type('fastqsanger.gz'):
+-1 fq1.fastq.gz
+#end if
+#if $paired_unpaired.fastq_input2.is_of_type('fastqsanger'):
+-2 fq2.fastq
+#elif $paired_unpaired.fastq_input2.is_of_type('fastqsanger.gz'):
+-2 fq2.fastq.gz
+#end if
+#else:
+#if $paired_unpaired.fastq_input1.is_of_type('fastqsanger'):
+-s fq.fastq
+#elif $paired_unpaired.fastq_input1.is_of_type('fastqsanger.gz'):
+-s fq.fastq.gz
+#end if
+#end if
+#if $long_reads:
+-l $lr
+#end if
+## General Unicycler Options section
+## ----------------------------------------------------------
+--mode '${uc_opt.mode}'
+#if $uc_opt.min_fasta_length:
+--min_fasta_length $uc_opt.min_fasta_length
+#end if
+#if $uc_opt.lin_seq:
+--expected_linear $uc_opt.lin_seq
+#end if
+$uc_opt.no_correct
+$uc_opt.no_rotate
+## Rotation Options section
+## ----------------------------------------------------------
+#if $spades.min_kmer_frac:
+--min_kmer_frac $spades.min_kmer_frac
+#end if
+#if $spades.max_kmer_frac:
+--max_kmer_frac $spades.max_kmer_frac
+#end if
+#if $spades.kmer_count:
+--kmer_count $spades.kmer_count
+#end if
+## Rotation Options section
+## ----------------------------------------------------------
+#if $rotation.start_genes:
+--start_genes '${rotation.rotation_fasta.start_genes}'
+#end if
+#if $rotation.start_gene_id:
+--start_gene_id $rotation.start_gene_id
+#end if
+#if $rotation.start_gene_cov:
+--start_gene_cov $rotation.start_gene_cov
+#end if
+## Pilon Options section
+## ----------------------------------------------------------
+#if $pilon.min_polish_size:
+--min_polish_size $pilon.min_polish_size
+#end if
+## Graph Cleaning Options sdection
+## ----------------------------------------------------------
+#if $graph_clean.min_component_size:
+--min_component_size $graph_clean.min_component_size
+#end if
+#if $graph_clean.min_dead_end_size:
+--min_dead_end_size $graph_clean.min_dead_end_size
+#end if
+## Long Read Alignment Options
+## ----------------------------------------------------------
+#if $lr_align.contamination_fasta:
+--contamination '${lr_align.contamination_fasta}'
+#end if
+#if $lr_align.scores:
+--scores '${lr_align.scores}'
+#end if
+#if $lr_align.low_score:
+--low_score $lr_align.low_score
+#end if
+]]></command>
+<inputs>
+<conditional name="paired_unpaired">
+<param name="fastq_input_selector" type="select" label="Paired or Single end data?" help="Select between paired and single end data">
+<option selected="True" value="paired">Paired</option>
+<option value="paired_collection">Paired Collection</option>
+<option value="single">Single</option>
+</param>
+<when value="paired">
+<param name="fastq_input1" argument="-1" type="data" format="fastqsanger,fastqsanger.gz" label="Select first set of reads" help="Specify dataset with forward reads"/>
+<param name="fastq_input2" argument="-2" type="data" format="fastqsanger,fastqsanger.gz" label="Select second set of reads" help="Specify dataset with reverse reads"/>
+</when>
+<when value="paired_collection">
+<param name="fastq_input1" format="fastqsanger,fastqsanger.gz" type="data_collection" collection_type="paired" label="Select a paired collection" />
+</when>
+<when value="single">
+<param name="fastq_input1" argument="-s" type="data" format="fastqsanger,fastqsanger.gz" label="Select unpaired reads" help="Specify dataset with unpaired reads"/>
+</when>
+</conditional>
+<param name="long_reads" argument="--long" optional="True" type="data" format="fastqsanger,fastqsanger.gz,fasta" label="Select long reads. If there are no long reads, leave this empty"/>
+<section name="uc_opt" expanded="True" title="Unicycler options">
+<param argument="--mode" type="select" label="Select Bridging mode">
+<option value="conservative">Conservative (smaller contigs, lower misassembly)</option>
+<option value="normal" selected="True">Normal (moderate contig size and misassembly rate)</option>
+<option value="bold">Bold (longest contigs, higher misassembly rate)</option>
+</param>
+<param argument="--min_fasta_length" optional="True" type="integer" value="" label="Exclude contigs from the FASTA file which are shorter than this length (bp)" help="default = 1"/>
+<param argument="--no_correct" optional="True" type="boolean" checked="False" truevalue="--no_correct" falsevalue="" label="Skip SPAdes error correction step" help="This option turns off SPAdes error correction. Generally it is highly recommended to use correction."/>
+<param argument="--no_rotate" optional="True" type="boolean" checked="False" truevalue="--no_rotate" falsevalue="" label="Do not rotate completed replicons to start at a standard gene." help="Unicycler uses TBLASTN to search for dnaA or repA alleles in each completed replicon. If one is found, the sequence is rotated and/or flipped so that it begins with that gene encoded on the forward strand. This provides consistently oriented assemblies and reduces the risk that a gene will be split across the start and end of the sequence."/>
+<param argument="--no_pilon" optional="True" type="boolean" checked="False" truevalue="--no_pilon" falsevalue="" label="Do not use Pilon to polish the final assembly." help="Unicycler uses Pilon tool for polishing final assembly."/>
+<param name="lin_seq" argument="--expected_linear_seqs" optional="True" type="integer" value="" label="The expected number of linear (i.e. non-circular) sequences in the assembly" help="default = 0"/>
+</section>
+<section name="spades" expanded="False" title="SPAdes options" help="Unicycler uses SPAdes to construct assembly graphs. You can modify some of the SPAdes settings here. Use this ONLY if you know what you are doing!">
+<param argument="--min_kmer_frac" optional="True" type="float" min="0" max="1" value="" label="Lowest k-mer size for SPAdes assembly, expressed as a fraction of the read length" help="default = 0.2"/>
+<param argument="--max_kmer_frac" optional="True" type="float" min="0" max="1" value="" label="Highest k-mer size for SPAdes assembly, expressed as a fraction of the read length" help="default = 0.95"/>
+<param argument="--kmer_count" optional="True" type="integer" value="" label="Number of k-mer steps to use in SPAdes assembly" help="default = 10"/>
+</section>
+<section name="rotation" expanded="False" title="Rotation options" help="These options control the rotation of completed circular sequence near the end of the Unicycler pipeline. Use this ONLY if you know what you are doing!">
+<param argument="--start_genes" optional="True" type="data" format="fasta" label="FASTA file of genes for start point of rotated replicons" />
+<param argument="--start_gene_id" optional="True" type="integer" min="0" max="100" value="" label="The minimum required BLAST percent identity for a start gene search" help="default = 90"/>
+<param argument="--start_gene_cov" optional="True" type="integer" min="0" max="100" value="" label="The minimum required BLAST percent coverage for a start gene search" help="default = 95"/>
+</section>
+<section name="pilon" title="Pilon options" expanded="False">
+<param argument="--min_polish_size" optional="True" type="integer" min="0" label="Contigs shorter than this value (bp) will not be polished using Pilon" help="default = 1000"/>
+</section>
+<section name="graph_clean" expanded="False" title="Graph cleaning options" help="These options control the removal of small leftover sequences after bridging is complete.">
+<param argument="--min_component_size" optional="True" type="integer" value="" label="Unbridged graph components smaller than this size will be removed from the final graph" help="default = 1000"/>
+<param argument="--min_dead_end_size" optional="True" type="integer" value="" label="Graph dead ends smaller than this size will be removed from the final graph" help="default = 1000"/>
+</section>
+<section name="lr_align" expanded="False" title="Long read alignment parameters" help="These options control the alignment of long reads to the assembly graph.">
+<param name="contamination_fasta" argument="--contamination" optional="True" type="data" format="fasta" label="FASTA file of known contamination in long reads, e.g. lambda, phiXm or puc18 spike-ins." />
+<param argument="--scores" optional="True" type="text" value="" label="Comma-delimited string of alignment scores: match, mismatch, gap open, gap extend" help="default = 3,-6,-5,-2"/>
+<param argument="--low_score" optional="True" type="integer" value="" label="Score threshold - alignments below this are considered poor" help="default = set automatically"/>
+</section>
+</inputs>
+<outputs>
+<data format="txt" name="assembly_grapth" from_work_dir="assembly.gfa" label="${tool.name} on ${on_string}: Final Assembly Graph" />
+<data format="fasta" name="assembly" from_work_dir="assembly.fasta" label="${tool.name} on ${on_string}: Final Assembly"/>
+</outputs>
+<tests>
+<test>
+<param name="fastq_input_selector" value="paired" />
+<param name="fastq_input1" value="phix_f.fq.gz" ftype="fastqsanger" />
+<param name="fastq_input2" value="phix_r.fq.gz" ftype="fastqsanger" />
+<param name="mode" value="normal" />
+<param name="no_correct" value="true" />
+<param name="no_rotate" value="false" />
+<param name="no_pilon" value="false" />
+<output ftype="fasta" name="assembly">
+<assert_contents>
+<has_text text="length=5386" />
+</assert_contents>
+</output>
+</test>
+<!--
+Following test corresponds to the command:
+unicycler -t "${GALAXY_SLOTS:-8}"  -o ./ - -verbose 3 - -pilon_path `pilon - -jar_dir` \
+-1 test-data/phix_f.fq.gz  -2 test-data/phix_r.fq.gz  -l test-data/onp.fa \
+- -mode 'normal' - -no_correct
+This command causes a segfault with the current version of unicycler on bioconda for Linux
+during the minimap step (which seems to be compiled C code). A gist of the log can be found
+at: https://gist.github.com/jmchilton/b411b695170c1daea6589f5d76e326cb.
+-->
+<test>
+<param name="fastq_input_selector" value="paired" />
+<param name="fastq_input1" value="phix_f.fq.gz" ftype="fastqsanger" />
+<param name="fastq_input2" value="phix_r.fq.gz" ftype="fastqsanger" />
+<param name="long_reads" value="onp.fa" ftype="fasta" />
+<param name="mode" value="normal" />
+<param name="no_correct" value="true" />
+<param name="no_rotate" value="false" />
+<param name="no_pilon" value="false" />
+<output ftype="fasta" name="assembly">
+<assert_contents>
+<has_text text="length=5386" />
+</assert_contents>
+</output>
+</test>
+</tests>
+<help><![CDATA[
+**Unicycler**
+Unicycler is a hybrid assembly pipeline for bacterial genomes. It uses both Illumina reads and long reads (PacBio or Nanopore) to produce complete and accurate assemblies. It is written by `Ryan Wick`_ at the University of Melbourne's Centre for Systems Genomics. Much of the description below is lifted from Unicycler's `github page`_.
+.. _`Ryan Wick`: https://github.com/rrwick
+.. _`github page`: https://github.com/rrwick/Unicycler
+-----
+**Input data**
+Unicycler accepts inputs short (Illumina) reads in FASTQ format. Galaxy places additional requirement of having these in FASTQ format with `Sanger encoding`_ of quality scores. Long reads (from Oxford Nanopore or PacBio) can be either in FASTQ of FASTA form.
+.. _`Sanger encoding`: https://en.wikipedia.org/wiki/FASTQ_format#Quality
+The input options are::
+-1 SHORT1, --short1 SHORT1
+FASTQ file of short reads (first reads in each pair)
+-2 SHORT2, --short2 SHORT2
+FASTQ file of short reads (second reads in each pair)
+-s SHORT_UNPAIRED, --short_unpaired SHORT_UNPAIRED
+FASTQ file of unpaired short reads
+-l LONG, --long LONG
+FASTQ or FASTA file of long reads, if all reads are available at start.
+-----
+**Bridging mode**
+Unicycler can be run in three modes: conservative, normal (the default) and bold, set with the --mode option. Conservative mode is least likely to produce a complete assembly but has a very low risk of misassembly. Bold mode is most likely to produce a complete assembly but carries greater risk of misassembly. Normal mode is intermediate regarding both completeness and misassembly risk. See `description of modes`_ for more information.
+.. _`description of modes`: https://github.com/rrwick/Unicycler#conservative-normal-and-bold
+The available modes are::
+--mode {conservative,normal,bold}
+Bridging mode (default: normal)
+conservative = smaller contigs, lowest misassembly rate
+normal = moderate contig size and misassembly rate
+bold = longest contigs, higher misassembly rate
+----
+**Skip SPAdes error correction step**
+Sequencing data contains a substantial number of sequencing errors that manifest themselves as deviations (bulges and non-connected components) within the assembly graph. One of the ways to improve the graph even constructing it is to minimize the amount sequencing errors by performing error correction. SPAdes, which is used by Unicycler for error correction and assembly, uses `BayesHammer`_ to correct the reads. Here is a brief summary of what it does:
+1. SPAdes (or rather BayesHammer) counts *k*-mers in reads and computed *k*-mer statistics that takes into account base quality values.
+2. `Hamming graph`_ is constructed for *k*-mers is which *k*-mers are nodes. In this graph edges connect nodes (*k*-mers) is they differ from each other by a number of nucleotides up to a certain threshold (the `Hamming distance`_). The graph is central to the error correction algorithm.
+3. At this step Bayesian subclustering of the graph produced in the previous step. For each *k*-mer we now know the center of its subcluster.
+4. Solid *k*-mers are derived from cluster centers and are assumed to be *error free*.
+5. Solid *k*-mers are mapped back to the reads and used to correct them.
+This step takes considerable time, so if one need to quickly evaluate assemblies this step can be skipped. However, this is not recommended if one if trying to produce a final high quality assembly.
+.. _`BayesHammer`: https://goo.gl/1iGkMe
+.. _`Hamming graph`: https://en.wikipedia.org/wiki/Hamming_graph
+.. _`Hamming distance`: https://en.wikipedia.org/wiki/Hamming_distance
+This following option turns error correction on and off::
+--no_correct
+Skip SPAdes error correction step
+(default: conduct SPAdes error correction)
+-----
+**Do not rotate completed replicons to start at a standard gene**
+Unicycler uses TBLASTN to search for dnaA or repA alleles in each completed replicon. If one is found, the sequence is rotated and/or flipped so that it begins with that gene encoded on the forward strand. This provides consistently oriented assemblies and reduces the risk that a gene will be split across the start and end of the sequence.
+The following option turns rotation on and off::
+--no_rotate
+Do not rotate completed replicons
+to start at a standard gene
+(default: completed replicons are rotated)
+**Do not use Pilon to polish the final assembly**
+`Pilon`_ is a tool for improving overall quality of draft assemblies and finding variation among strains. Unicycler uses it for assembly *polishing*.
+The following option turns pilon part of Unicycler pipeline on and off::
+--no_pilon
+Do not use Pilon to polish the
+final assembly (default: Pilon is used)
+.. _`Pilon`: https://github.com/broadinstitute/pilon/wiki
+------
+**Expected number of linear sequences**
+If you expect your sample to contain linear (non circular) sequences, set this option::
+--expected_linear_seqs EXPECTED_LINEAR_SEQS
+The expected number of linear (i.e. non-circular)
+sequences in the underlying sequence
+----
+**SPAdes options**
+This section provides control of SPAdes options::
+--min_kmer_frac MIN_KMER_FRAC
+Lowest k-mer size for SPAdes assembly,
+expressed as a fraction of the read length
+(default: 0.2)
+--max_kmer_frac MAX_KMER_FRAC
+Highest k-mer size for SPAdes assembly,
+expressed as a fraction of the read length
+(default: 0.95)
+--kmer_count KMER_COUNT
+Number of k-mer steps to use in
+SPAdes assembly (default: 10)
+----
+**Rotation options**
+Unicycler attempts to rotate circular assemblies to make sure that they begin at a consistent starting gene. The following parameters control assembly rotation::
+--start_genes START_GENES
+FASTA file of genes for start point
+of rotated replicons
+(default: start_genes.fasta)
+--start_gene_id START_GENE_ID
+The minimum required BLAST percent identity
+for a start gene search
+(default: 90.0)
+--start_gene_cov START_GENE_COV
+The minimum required BLAST percent coverage
+for a start gene search
+(default: 95.0)
+-----
+**Graph cleaning options**
+These options control the removal of small leftover sequences after bridging is complete::
+--min_component_size MIN_COMPONENT_SIZE
+Unbridged graph components smaller
+than this size (bp) will be removed
+from the final graph (default: 1000)
+--min_dead_end_size MIN_DEAD_END_SIZE
+Graph dead ends smaller than this size (bp)
+will be removed from the final graph
+(default: 1000)
+-----
+**Long read alignment options**
+These options control the alignment of long reads to the assembly graph::
+--contamination CONTAMINATION
+FASTA file of known contamination in long reads
+--scores SCORES
+Comma-delimited string of alignment scores:
+match, mismatch, gap open, gap extend
+(default: 3,-6,-5,-2)
+--low_score LOW_SCORE
+Score threshold - alignments below this
+are considered poor
+(default: set threshold automatically)
+-----
+**Outputs**
+Galaxy's wrapped for Unicycler produces two outputs:
+* final assembly in FASTA format
+* final assembly grapth in graph format
+While most will likely be interested in the FASTA dataset, the graph dataset is also quite useful and can be visualized using tools such as `Bandage`_.
+.. _`Bandage`: https://github.com/rrwick/Bandage
+]]></help>
+<citations>
+<citation type="doi">10.1101/096412</citation>
+</citations>
+</tool>

Mercurial > repos > iuc > unicycler

comparison unicycler.xml @ 0:e9c1cdb9f9dc draft