annotate sequel_wrapper.xml @ 0:58e1eb37fddc draft

Uploaded
author crs4
date Tue, 15 Oct 2013 11:15:28 -0400
parents
children ccadfae70b02
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
1 <tool id="sequel_wrapper" name="SEQuel" version="0.2">
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
2 <description></description>
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
3 <requirements>
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
4 <requirement type="package" version="0.6.2">bwa</requirement>
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
5 <requirement type="package" version="35">blat</requirement>
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
6 <requirement type="package" version="1.0.2">sequel</requirement>
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
7 </requirements>
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
8 <command interpreter="python">
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
9 sequel_wrapper.py
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
10 \${SEQUEL_SITE_OPTIONS:--t 8 -p 8 -u 1}
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
11 --sequel_jar_path=\$SEQUEL_JAR_PATH --read1=$read1 --read2=$read2 --contigs=$contigs
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
12 #if str($bases_length)
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
13 --bases_length=$bases_length
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
14 #end if
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
15 #if str($kmer_size)
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
16 --kmer_size=$kmer_size
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
17 #end if
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
18 #if str($max_positional_error)
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
19 --max_positional_error=$max_positional_error
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
20 #end if
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
21 #if str($min_fraction)
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
22 --min_fraction=$min_fraction
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
23 #end if
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
24 #if str($min_aln_length)
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
25 --min_aln_length=$min_aln_length
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
26 #end if
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
27 #if str($min_avg_coverage)
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
28 --min_avg_coverage=$min_avg_coverage
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
29 #end if
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
30 #if str($discard_kmers)
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
31 --discard_kmers=$discard_kmers
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
32 #end if
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
33 #if str($discard_positional)
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
34 --discard_positional=$discard_positional
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
35 #end if
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
36 #if str($min_aln_score)
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
37 --min_aln_score=$min_aln_score
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
38 #end if
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
39 #if $single_cell_mode
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
40 --single_cell_mode
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
41 #end if
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
42 #if $report_changes
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
43 --report_changes
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
44 #end if
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
45 #if $extend_contig
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
46 --extend_contig
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
47 #end if
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
48 #if $reference_genome
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
49 --reference_genome=$reference_genome
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
50 #end if
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
51 --contigs_refined=$contigs_refined
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
52 --logprep=$logprep
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
53 --logseq=$logseq
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
54 --logfile_prep=$logfile_prep
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
55 --logfile_seq=$logfile_seq
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
56 </command>
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
57
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
58 <inputs>
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
59 <param name="read1" type="data" format="fasta,fastq" label="Paired-end reads 1 from sequencing (-r1)" help="FASTA or FASTQ format" />
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
60 <param name="read2" type="data" format="fasta,fastq" label="Paired-end reads 2 from sequencing (-r2)" help="FASTA or FASTQ format" />
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
61 <param name="contigs" type="data" format="fasta,fastq" label="Contigs from assembly (-c)" help="FASTA or FASTQ format" />
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
62
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
63 <param name="bases_length" type="integer" value="0" optional="true" label="Preprocessing: do not refine contigs shorter than n bases (-l)" help="Contigs shorter than n bases will appear unchanged in the final output file" />
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
64
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
65 <param name="kmer_size" type="integer" value="50" optional="true" label="K-mer size (-k)" help="" />
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
66
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
67 <param name="max_positional_error" type="integer" value="25" optional="true" label="Max positional error Delta (-d)" help="" />
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
68
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
69 <param name="min_fraction" type="float" value="0.9" optional="true" label="Min fraction of matches in alignment (-f)" help="" />
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
70
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
71 <param name="min_aln_length" type="integer" value="" optional="true" label="Min alignment length (-l)" help="bp or fraction of contig. Optional." />
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
72
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
73 <param name="min_avg_coverage" type="float" value="20.0" optional="true" label="Min average coverage to incorporate changes (-v)" help="" />
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
74
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
75 <param name="discard_kmers" type="integer" value="1" optional="true" label="Discard k-mers observed less than m times (-m)" help="" />
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
76
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
77 <param name="discard_positional" type="integer" value="1" optional="true" label="Discard positional k-mers observed less than n times (-n)" help="" />
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
78
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
79 <param name="min_aln_score" type="integer" value="1" optional="true" label="Min alignment score (MAPQ) of reads to consider (-q)" help="" />
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
80
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
81 <param name="single_cell_mode" type="boolean" optional="true" checked="false" label="Single cell mode, sort partial-contigs by coverage (-s)" />
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
82
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
83 <param name="report_changes" type="boolean" optional="true" checked="false" label="Report changes (slow) for all input-contigs (-r)" />
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
84
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
85 <param name="extend_contig" type="boolean" optional="true" checked="false" label="Extend contig with flanking regions of alignment (-e)" />
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
86
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
87 <param name="reference_genome" type="data" format="fasta,twobit" optional="true" label="Evaluate refinement using reference genome (-g)" help="FASTA or 2bit format" />
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
88 </inputs>
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
89
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
90 <outputs>
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
91 <data name="logfile_prep" format="txt" label="${tool.name} on ${on_string}: log (pre-processing)" />
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
92 <data name="logfile_seq" format="txt" label="${tool.name} on ${on_string}: log (SEQuel)" />
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
93 <data name="logprep" format="txt" label="${tool.name} on ${on_string}: log (pre-processing, official)" />
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
94 <data name="logseq" format="txt" label="${tool.name} on ${on_string}: log (SEQuel, official)" />
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
95 <data name="contigs_refined" format="fasta" label="${tool.name} on ${on_string}: refined contigs" />
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
96 </outputs>
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
97
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
98 <tests>
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
99
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
100 </tests>
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
101 <help>
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
102 **What it does**
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
103
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
104 SEQuel is a tool for correcting errors (i.e., insertions, deletions, and substitutions) in contigs output from assembly. While assemblies of next generation sequencing (NGS) data are accurate, they still contain a substantial number of errors that need to be corrected after the assembly process. The algorithm behind SEQuel makes use of a graph structure called the positional de Bruijn graph, which models k-mers within reads while incorporating their approximate positions into the model.
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
105
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
106 SEQuel substantially reduces the number of small insertions, deletions and substitutions errors in assemblies of both standard (multi-cell) and single-cell sequencing data. SEQuel was tested mainly on Illumina sequence data, in combination with multiple NGS assemblers, such as Euler-SR, Velvet, SoapDeNovo, ALLPATHS and SPAdes.
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
107
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
108 **Known issues**
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
109
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
110 .. class:: warningmark
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
111
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
112 During the pre-processing stage, a SAM file per contig is created. Due to runtime considerations, these files are kept open simultaneously. The program will crash when the number of contigs in the assembly is too high.
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
113
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
114 **License and citation**
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
115
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
116 This Galaxy tool is Copyright © 2013 `CRS4 Srl.`_ and is released under the `MIT license`_.
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
117
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
118 .. _CRS4 Srl.: http://www.crs4.it/
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
119 .. _MIT license: http://opensource.org/licenses/MIT
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
120
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
121 If you use this tool in Galaxy, please cite |Cuccuru2013|_.
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
122
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
123 .. |Cuccuru2013| replace:: Cuccuru, G., Orsini, M., Pinna, A., Sbardellati, A., Soranzo, N., Travaglione, A., Uva, P., Zanetti, G., Fotia, G. (2013) Orione, a web-based framework for NGS analysis in microbiology. *Submitted*
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
124 .. _Cuccuru2013: http://orione.crs4.it/
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
125
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
126 This tool uses `SEQuel`_, which is licensed separately. Please cite |Ronen2012|_.
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
127
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
128 .. _SEQuel: http://bix.ucsd.edu/SEQuel/
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
129 .. |Ronen2012| replace:: Ronen R., *et al.* (2012) SEQuel: improving the accuracy of genome assemblies. *Bioinformatics* 28 (12), i188-i196
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
130 .. _Ronen2012: http://bioinformatics.oxfordjournals.org/content/28/12/i188
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
131 </help>
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
132 </tool>