annotate sequel_wrapper.xml @ 2:208ce57f9221 draft default tip

Fix version for blat requirement (reported by Bjoern Gruening). Upgrade BWA dependency to v. 0.7.7 . Update Orione citation.
author crs4
date Fri, 18 Jul 2014 09:19:43 -0400
parents ccadfae70b02
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
1 <tool id="sequel_wrapper" name="SEQuel" version="0.2">
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
2 <description></description>
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
3 <requirements>
2
208ce57f9221 Fix version for blat requirement (reported by Bjoern Gruening). Upgrade BWA dependency to v. 0.7.7 . Update Orione citation.
crs4
parents: 1
diff changeset
4 <requirement type="package" version="0.7.7">bwa</requirement>
208ce57f9221 Fix version for blat requirement (reported by Bjoern Gruening). Upgrade BWA dependency to v. 0.7.7 . Update Orione citation.
crs4
parents: 1
diff changeset
5 <requirement type="package" version="35x1">blat</requirement>
0
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
6 <requirement type="package" version="1.0.2">sequel</requirement>
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
7 </requirements>
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
8 <command interpreter="python">
1
ccadfae70b02 Use $GALAXY_SLOTS instead of $SEQUEL_SITE_OPTIONS. Depend on package_blat_35x1 . Add readme.rst . Update Orione citation.
crs4
parents: 0
diff changeset
9 sequel_wrapper.py -t \${GALAXY_SLOTS:-8} -p \${GALAXY_SLOTS:-8} -u 1
0
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
10 --sequel_jar_path=\$SEQUEL_JAR_PATH --read1=$read1 --read2=$read2 --contigs=$contigs
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
11 #if str($bases_length)
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
12 --bases_length=$bases_length
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
13 #end if
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
14 #if str($kmer_size)
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
15 --kmer_size=$kmer_size
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
16 #end if
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
17 #if str($max_positional_error)
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
18 --max_positional_error=$max_positional_error
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
19 #end if
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
20 #if str($min_fraction)
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
21 --min_fraction=$min_fraction
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
22 #end if
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
23 #if str($min_aln_length)
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
24 --min_aln_length=$min_aln_length
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
25 #end if
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
26 #if str($min_avg_coverage)
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
27 --min_avg_coverage=$min_avg_coverage
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
28 #end if
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
29 #if str($discard_kmers)
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
30 --discard_kmers=$discard_kmers
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
31 #end if
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
32 #if str($discard_positional)
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
33 --discard_positional=$discard_positional
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
34 #end if
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
35 #if str($min_aln_score)
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
36 --min_aln_score=$min_aln_score
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
37 #end if
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
38 #if $single_cell_mode
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
39 --single_cell_mode
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
40 #end if
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
41 #if $report_changes
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
42 --report_changes
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
43 #end if
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
44 #if $extend_contig
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
45 --extend_contig
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
46 #end if
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
47 #if $reference_genome
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
48 --reference_genome=$reference_genome
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
49 #end if
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
50 --contigs_refined=$contigs_refined
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
51 --logprep=$logprep
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
52 --logseq=$logseq
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
53 --logfile_prep=$logfile_prep
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
54 --logfile_seq=$logfile_seq
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
55 </command>
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
56
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
57 <inputs>
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
58 <param name="read1" type="data" format="fasta,fastq" label="Paired-end reads 1 from sequencing (-r1)" help="FASTA or FASTQ format" />
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
59 <param name="read2" type="data" format="fasta,fastq" label="Paired-end reads 2 from sequencing (-r2)" help="FASTA or FASTQ format" />
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
60 <param name="contigs" type="data" format="fasta,fastq" label="Contigs from assembly (-c)" help="FASTA or FASTQ format" />
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
61
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
62 <param name="bases_length" type="integer" value="0" optional="true" label="Preprocessing: do not refine contigs shorter than n bases (-l)" help="Contigs shorter than n bases will appear unchanged in the final output file" />
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
63
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
64 <param name="kmer_size" type="integer" value="50" optional="true" label="K-mer size (-k)" help="" />
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
65
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
66 <param name="max_positional_error" type="integer" value="25" optional="true" label="Max positional error Delta (-d)" help="" />
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
67
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
68 <param name="min_fraction" type="float" value="0.9" optional="true" label="Min fraction of matches in alignment (-f)" help="" />
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
69
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
70 <param name="min_aln_length" type="integer" value="" optional="true" label="Min alignment length (-l)" help="bp or fraction of contig. Optional." />
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
71
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
72 <param name="min_avg_coverage" type="float" value="20.0" optional="true" label="Min average coverage to incorporate changes (-v)" help="" />
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
73
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
74 <param name="discard_kmers" type="integer" value="1" optional="true" label="Discard k-mers observed less than m times (-m)" help="" />
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
75
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
76 <param name="discard_positional" type="integer" value="1" optional="true" label="Discard positional k-mers observed less than n times (-n)" help="" />
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
77
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
78 <param name="min_aln_score" type="integer" value="1" optional="true" label="Min alignment score (MAPQ) of reads to consider (-q)" help="" />
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
79
1
ccadfae70b02 Use $GALAXY_SLOTS instead of $SEQUEL_SITE_OPTIONS. Depend on package_blat_35x1 . Add readme.rst . Update Orione citation.
crs4
parents: 0
diff changeset
80 <param name="single_cell_mode" type="boolean" checked="false" label="Single cell mode, sort partial-contigs by coverage (-s)" />
0
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
81
1
ccadfae70b02 Use $GALAXY_SLOTS instead of $SEQUEL_SITE_OPTIONS. Depend on package_blat_35x1 . Add readme.rst . Update Orione citation.
crs4
parents: 0
diff changeset
82 <param name="report_changes" type="boolean" checked="false" label="Report changes (slow) for all input-contigs (-r)" />
0
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
83
1
ccadfae70b02 Use $GALAXY_SLOTS instead of $SEQUEL_SITE_OPTIONS. Depend on package_blat_35x1 . Add readme.rst . Update Orione citation.
crs4
parents: 0
diff changeset
84 <param name="extend_contig" type="boolean" checked="false" label="Extend contig with flanking regions of alignment (-e)" />
0
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
85
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
86 <param name="reference_genome" type="data" format="fasta,twobit" optional="true" label="Evaluate refinement using reference genome (-g)" help="FASTA or 2bit format" />
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
87 </inputs>
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
88
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
89 <outputs>
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
90 <data name="logfile_prep" format="txt" label="${tool.name} on ${on_string}: log (pre-processing)" />
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
91 <data name="logfile_seq" format="txt" label="${tool.name} on ${on_string}: log (SEQuel)" />
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
92 <data name="logprep" format="txt" label="${tool.name} on ${on_string}: log (pre-processing, official)" />
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
93 <data name="logseq" format="txt" label="${tool.name} on ${on_string}: log (SEQuel, official)" />
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
94 <data name="contigs_refined" format="fasta" label="${tool.name} on ${on_string}: refined contigs" />
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
95 </outputs>
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
96
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
97 <tests>
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
98
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
99 </tests>
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
100 <help>
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
101 **What it does**
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
102
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
103 SEQuel is a tool for correcting errors (i.e., insertions, deletions, and substitutions) in contigs output from assembly. While assemblies of next generation sequencing (NGS) data are accurate, they still contain a substantial number of errors that need to be corrected after the assembly process. The algorithm behind SEQuel makes use of a graph structure called the positional de Bruijn graph, which models k-mers within reads while incorporating their approximate positions into the model.
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
104
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
105 SEQuel substantially reduces the number of small insertions, deletions and substitutions errors in assemblies of both standard (multi-cell) and single-cell sequencing data. SEQuel was tested mainly on Illumina sequence data, in combination with multiple NGS assemblers, such as Euler-SR, Velvet, SoapDeNovo, ALLPATHS and SPAdes.
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
106
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
107 **Known issues**
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
108
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
109 .. class:: warningmark
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
110
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
111 During the pre-processing stage, a SAM file per contig is created. Due to runtime considerations, these files are kept open simultaneously. The program will crash when the number of contigs in the assembly is too high.
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
112
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
113 **License and citation**
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
114
2
208ce57f9221 Fix version for blat requirement (reported by Bjoern Gruening). Upgrade BWA dependency to v. 0.7.7 . Update Orione citation.
crs4
parents: 1
diff changeset
115 This Galaxy tool is Copyright © 2013-2014 `CRS4 Srl.`_ and is released under the `MIT license`_.
0
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
116
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
117 .. _CRS4 Srl.: http://www.crs4.it/
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
118 .. _MIT license: http://opensource.org/licenses/MIT
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
119
2
208ce57f9221 Fix version for blat requirement (reported by Bjoern Gruening). Upgrade BWA dependency to v. 0.7.7 . Update Orione citation.
crs4
parents: 1
diff changeset
120 You can use this tool only if you agree to the license terms of: `SEQuel`_.
0
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
121
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
122 .. _SEQuel: http://bix.ucsd.edu/SEQuel/
2
208ce57f9221 Fix version for blat requirement (reported by Bjoern Gruening). Upgrade BWA dependency to v. 0.7.7 . Update Orione citation.
crs4
parents: 1
diff changeset
123
208ce57f9221 Fix version for blat requirement (reported by Bjoern Gruening). Upgrade BWA dependency to v. 0.7.7 . Update Orione citation.
crs4
parents: 1
diff changeset
124 If you use this tool, please cite:
208ce57f9221 Fix version for blat requirement (reported by Bjoern Gruening). Upgrade BWA dependency to v. 0.7.7 . Update Orione citation.
crs4
parents: 1
diff changeset
125
208ce57f9221 Fix version for blat requirement (reported by Bjoern Gruening). Upgrade BWA dependency to v. 0.7.7 . Update Orione citation.
crs4
parents: 1
diff changeset
126 - |Cuccuru2014|_
208ce57f9221 Fix version for blat requirement (reported by Bjoern Gruening). Upgrade BWA dependency to v. 0.7.7 . Update Orione citation.
crs4
parents: 1
diff changeset
127 - |Ronen2012|_.
208ce57f9221 Fix version for blat requirement (reported by Bjoern Gruening). Upgrade BWA dependency to v. 0.7.7 . Update Orione citation.
crs4
parents: 1
diff changeset
128
208ce57f9221 Fix version for blat requirement (reported by Bjoern Gruening). Upgrade BWA dependency to v. 0.7.7 . Update Orione citation.
crs4
parents: 1
diff changeset
129 .. |Cuccuru2014| replace:: Cuccuru, G., Orsini, M., Pinna, A., Sbardellati, A., Soranzo, N., Travaglione, A., Uva, P., Zanetti, G., Fotia, G. (2014) Orione, a web-based framework for NGS analysis in microbiology. *Bioinformatics* 30(13), 1928-1929
208ce57f9221 Fix version for blat requirement (reported by Bjoern Gruening). Upgrade BWA dependency to v. 0.7.7 . Update Orione citation.
crs4
parents: 1
diff changeset
130 .. _Cuccuru2014: http://bioinformatics.oxfordjournals.org/content/30/13/1928
0
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
131 .. |Ronen2012| replace:: Ronen R., *et al.* (2012) SEQuel: improving the accuracy of genome assemblies. *Bioinformatics* 28 (12), i188-i196
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
132 .. _Ronen2012: http://bioinformatics.oxfordjournals.org/content/28/12/i188
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
133 </help>
58e1eb37fddc Uploaded
crs4
parents:
diff changeset
134 </tool>