annotate velveth.xml @ 0:08256557922f draft

planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
author devteam
date Tue, 13 Oct 2015 16:38:28 -0400
parents
children 5da9a0e2fb2d
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
1 <tool id="velveth" name="velveth" version="@WRAPPER_VERSION@.0">
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
2 <description>Prepare a dataset for the Velvet velvetg Assembler</description>
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
3 <version_command>velveth 2&gt;&amp;1 | grep "Version" | sed -e 's/Version //'</version_command>
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
4 <macros>
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
5 <import>macros.xml</import>
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
6 </macros>
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
7 <expand macro="requirements"/>
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
8 <expand macro="stdio"/>
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
9 <command interpreter="python">
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
10 velveth_wrapper.py
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
11 '$out_file1' '$out_file1.extra_files_path'
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
12 $hash_length
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
13 $strand_specific
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
14 #for $i in $inputs
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
15 ${i.file_format}
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
16 ${i.read_type}
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
17 ${i.input}
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
18 #end for
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
19 </command>
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
20 <inputs>
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
21 <param label="Hash Length" name="hash_length" type="select" help="k-mer length in base pairs of the words being hashed.">
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
22 <option value="11">11</option>
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
23 <option value="13">13</option>
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
24 <option value="15">15</option>
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
25 <option value="17">17</option>
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
26 <option value="19">19</option>
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
27 <option value="21" selected="yes">21</option>
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
28 <option value="23">23</option>
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
29 <option value="25">25</option>
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
30 <option value="27">27</option>
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
31 <option value="29">29</option>
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
32 </param>
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
33 <param name="strand_specific" type="boolean" checked="false" truevalue="-strand_specific" falsevalue="" label="Use strand specific transcriptome sequencing" help="If you are using a strand specific transcriptome sequencing protocol, you may wish to use this option for better results."/>
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
34 <repeat name="inputs" title="Input Files">
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
35 <param label="file format" name="file_format" type="select">
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
36 <option value="-fasta" selected="yes">fasta</option>
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
37 <option value="-fastq">fastq</option>
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
38 <option value="-eland">eland</option>
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
39 <option value="-gerald">gerald</option>
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
40 </param>
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
41 <param label="read type" name="read_type" type="select">
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
42 <option value="-short" selected="yes">short reads</option>
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
43 <option value="-shortPaired">shortPaired reads</option>
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
44 <option value="-short2">short2 reads</option>
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
45 <option value="-shortPaired2">shortPaired2 reads</option>
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
46 <option value="-long">long reads</option>
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
47 <option value="-longPaired">longPaired reads</option>
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
48 </param>
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
49
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
50 <param name="input" type="data" format="fasta,fastq,eland,gerald" label="Dataset"/>
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
51 </repeat>
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
52 </inputs>
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
53 <outputs>
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
54 <data format="velvet" name="out_file1" />
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
55 </outputs>
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
56 <requirements>
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
57 <requirement type="package">velvet</requirement>
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
58 </requirements>
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
59 <tests>
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
60 <test>
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
61 <param name="hash_length" value="21" />
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
62 <param name="read_type" value="-shortPaired" />
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
63 <!-- <repeat name="inputs"> -->
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
64 <param name="file_format" value="fasta" />
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
65 <param name="read_type" value="shortPaired reads" />
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
66 <param name="input" value="velvet_test_reads.fa" ftype="fasta" />
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
67 <!-- </repeat> -->
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
68 <param name="strand_specific" value="" />
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
69 <output name="out_file1" file="velveth_test1/output.html" lines_diff="4">
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
70 <extra_files type="file" name='Sequences' value="velveth_test1/Sequences" compare="diff" />
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
71 <extra_files type="file" name='Roadmaps' value="velveth_test1/Roadmaps" compare="diff" />
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
72 </output>
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
73 </test>
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
74 </tests>
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
75 <help>
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
76
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
77 **What it does**
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
78
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
79 Velvet_ is a de novo genomic assembler specially designed for short read sequencing technologies, such as Solexa or 454, developed by Daniel Zerbino and Ewan Birney at the European Bioinformatics Institute (EMBL-EBI), near Cambridge, in the United Kingdom.
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
80
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
81 Velvet currently takes in short read sequences, removes errors then produces high quality unique contigs. It then uses paired-end read and long read information, when available, to retrieve the repeated areas between contigs.
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
82
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
83 Read the Velvet `documentation`__ for details on using the Velvet Assembler.
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
84
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
85 .. _Velvet: http://www.ebi.ac.uk/~zerbino/velvet/
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
86
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
87 .. __: http://www.ebi.ac.uk/~zerbino/velvet/Manual.pdf
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
88
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
89 ------
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
90
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
91 **Velveth**
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
92
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
93 Velveth takes in a number of sequence files, produces a hashtable, then outputs two files in an output directory (creating it if necessary), Sequences and Roadmaps, which are necessary to velvetg.
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
94
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
95 ------
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
96
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
97 **Hash Length**
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
98
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
99 The hash length, also known as k-mer length, corresponds to the length, in base pairs, of the words being hashed.
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
100
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
101 The hash length is the length of the k-mers being entered in the hash table. Firstly, you must observe three technical constraints::
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
102
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
103 # it must be an odd number, to avoid palindromes. If you put in an even number, Velvet will just decrement it and proceed.
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
104 # it must be below or equal to MAXKMERHASH length (cf. 2.3.3, by default 31bp), because it is stored on 64 bits
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
105 # it must be strictly inferior to read length, otherwise you simply will not observe any overlaps between reads, for obvious reasons.
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
106
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
107 Now you still have quite a lot of possibilities. As is often the case, it's a trade- off between specificity and sensitivity. Longer kmers bring you more specificity (i.e. less spurious overlaps) but lowers coverage (cf. below). . . so there's a sweet spot to be found with time and experience.
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
108 We like to think in terms of "k-mer coverage", i.e. how many times has a k-mer been seen among the reads. The relation between k-mer coverage Ck and standard (nucleotide-wise) coverage C is Ck = C # (L - k + 1)/L where k is your hash length, and L you read length.
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
109 Experience shows that this kmer coverage should be above 10 to start getting decent results. If Ck is above 20, you might be "wasting" coverage. Experience also shows that empirical tests with different values for k are not that costly to run!
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
110
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
111 **Input Files**
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
112
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
113 Velvet works mainly with fasta and fastq formats. For paired-end reads, the assumption is that each read is next to its mate
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
114 read. In other words, if the reads are indexed from 0, then reads 0 and 1 are paired, 2 and 3, 4 and 5, etc.
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
115
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
116 Supported file formats are::
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
117
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
118 fasta
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
119 fastq
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
120 fasta.gz
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
121 fastq.gz
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
122 eland
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
123 gerald
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
124
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
125 Read categories are::
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
126
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
127 short (default)
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
128 shortPaired
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
129 short2 (same as short, but for a separate insert-size library)
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
130 shortPaired2 (see above)
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
131 long (for Sanger, 454 or even reference sequences)
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
132 longPaired
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
133
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
134 </help>
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
135 <expand macro="citation"/>
08256557922f planemo upload commit 4720b3dfa114d790b597fef6ccf3c17e8c11e111
devteam
parents:
diff changeset
136 </tool>