sharplabtool: tools/sr_assembly/velveth.xml annotate

annotate tools/sr_assembly/velveth.xml @ 1:cdcb0ce84a1b

Uploaded

author	xuebing
date	Fri, 09 Mar 2012 19:45:15 -0500
parents	9071e359b9a3
children

rev	line source
0 9071e359b9a3 Uploaded xuebing parents: diff changeset	1 <tool id="velveth" name="velveth" version="1.0.0">
9071e359b9a3 Uploaded xuebing parents: diff changeset	2 <description>Prepare a dataset for the Velvet velvetg Assembler</description>
9071e359b9a3 Uploaded xuebing parents: diff changeset	3 <command interpreter="python">
9071e359b9a3 Uploaded xuebing parents: diff changeset	4 velveth_wrapper.py
9071e359b9a3 Uploaded xuebing parents: diff changeset	5 '$out_file1' '$out_file1.extra_files_path'
9071e359b9a3 Uploaded xuebing parents: diff changeset	6 $hash_length
9071e359b9a3 Uploaded xuebing parents: diff changeset	7 $strand_specific
9071e359b9a3 Uploaded xuebing parents: diff changeset	8 #for $i in $inputs
9071e359b9a3 Uploaded xuebing parents: diff changeset	9 ${i.file_format}
9071e359b9a3 Uploaded xuebing parents: diff changeset	10 ${i.read_type}
9071e359b9a3 Uploaded xuebing parents: diff changeset	11 ${i.input}
9071e359b9a3 Uploaded xuebing parents: diff changeset	12 #end for
9071e359b9a3 Uploaded xuebing parents: diff changeset	13 </command>
9071e359b9a3 Uploaded xuebing parents: diff changeset	14 <inputs>
9071e359b9a3 Uploaded xuebing parents: diff changeset	15 <param label="Hash Length" name="hash_length" type="select" help="k-mer length in base pairs of the words being hashed.">
9071e359b9a3 Uploaded xuebing parents: diff changeset	16 <option value="11">11</option>
9071e359b9a3 Uploaded xuebing parents: diff changeset	17 <option value="13">13</option>
9071e359b9a3 Uploaded xuebing parents: diff changeset	18 <option value="15">15</option>
9071e359b9a3 Uploaded xuebing parents: diff changeset	19 <option value="17">17</option>
9071e359b9a3 Uploaded xuebing parents: diff changeset	20 <option value="19">19</option>
9071e359b9a3 Uploaded xuebing parents: diff changeset	21 <option value="21" selected="yes">21</option>
9071e359b9a3 Uploaded xuebing parents: diff changeset	22 <option value="23">23</option>
9071e359b9a3 Uploaded xuebing parents: diff changeset	23 <option value="25">25</option>
9071e359b9a3 Uploaded xuebing parents: diff changeset	24 <option value="27">27</option>
9071e359b9a3 Uploaded xuebing parents: diff changeset	25 <option value="29">29</option>
9071e359b9a3 Uploaded xuebing parents: diff changeset	26 </param>
9071e359b9a3 Uploaded xuebing parents: diff changeset	27 <param name="strand_specific" type="boolean" checked="false" truevalue="-strand_specific" falsevalue="" label="Use strand specific transcriptome sequencing" help="If you are using a strand specific transcriptome sequencing protocol, you may wish to use this option for better results."/>
9071e359b9a3 Uploaded xuebing parents: diff changeset	28 <repeat name="inputs" title="Input Files">
9071e359b9a3 Uploaded xuebing parents: diff changeset	29 <param label="file format" name="file_format" type="select">
9071e359b9a3 Uploaded xuebing parents: diff changeset	30 <option value="-fasta" selected="yes">fasta</option>
9071e359b9a3 Uploaded xuebing parents: diff changeset	31 <option value="-fastq">fastq</option>
9071e359b9a3 Uploaded xuebing parents: diff changeset	32 <option value="-eland">eland</option>
9071e359b9a3 Uploaded xuebing parents: diff changeset	33 <option value="-gerald">gerald</option>
9071e359b9a3 Uploaded xuebing parents: diff changeset	34 </param>
9071e359b9a3 Uploaded xuebing parents: diff changeset	35 <param label="read type" name="read_type" type="select">
9071e359b9a3 Uploaded xuebing parents: diff changeset	36 <option value="-short" selected="yes">short reads</option>
9071e359b9a3 Uploaded xuebing parents: diff changeset	37 <option value="-shortPaired">shortPaired reads</option>
9071e359b9a3 Uploaded xuebing parents: diff changeset	38 <option value="-short2">short2 reads</option>
9071e359b9a3 Uploaded xuebing parents: diff changeset	39 <option value="-shortPaired2">shortPaired2 reads</option>
9071e359b9a3 Uploaded xuebing parents: diff changeset	40 <option value="-long">long reads</option>
9071e359b9a3 Uploaded xuebing parents: diff changeset	41 <option value="-longPaired">longPaired reads</option>
9071e359b9a3 Uploaded xuebing parents: diff changeset	42 </param>
9071e359b9a3 Uploaded xuebing parents: diff changeset	43
9071e359b9a3 Uploaded xuebing parents: diff changeset	44 <param name="input" type="data" format="fasta,fastq,eland,gerald" label="Dataset"/>
9071e359b9a3 Uploaded xuebing parents: diff changeset	45 </repeat>
9071e359b9a3 Uploaded xuebing parents: diff changeset	46 </inputs>
9071e359b9a3 Uploaded xuebing parents: diff changeset	47 <outputs>
9071e359b9a3 Uploaded xuebing parents: diff changeset	48 <data format="velvet" name="out_file1" />
9071e359b9a3 Uploaded xuebing parents: diff changeset	49 </outputs>
9071e359b9a3 Uploaded xuebing parents: diff changeset	50 <requirements>
9071e359b9a3 Uploaded xuebing parents: diff changeset	51 <requirement type="package">velvet</requirement>
9071e359b9a3 Uploaded xuebing parents: diff changeset	52 </requirements>
9071e359b9a3 Uploaded xuebing parents: diff changeset	53 <tests>
9071e359b9a3 Uploaded xuebing parents: diff changeset	54 <test>
9071e359b9a3 Uploaded xuebing parents: diff changeset	55 <param name="hash_length" value="21" />
9071e359b9a3 Uploaded xuebing parents: diff changeset	56 <param name="read_type" value="-shortPaired" />
9071e359b9a3 Uploaded xuebing parents: diff changeset	57 <!-- <repeat name="inputs"> -->
9071e359b9a3 Uploaded xuebing parents: diff changeset	58 <param name="file_format" value="fasta" />
9071e359b9a3 Uploaded xuebing parents: diff changeset	59 <param name="read_type" value="shortPaired reads" />
9071e359b9a3 Uploaded xuebing parents: diff changeset	60 <param name="input" value="velvet_test_reads.fa" ftype="fasta" />
9071e359b9a3 Uploaded xuebing parents: diff changeset	61 <!-- </repeat> -->
9071e359b9a3 Uploaded xuebing parents: diff changeset	62 <param name="strand_specific" value="" />
9071e359b9a3 Uploaded xuebing parents: diff changeset	63 <output name="out_file1" file="velveth_test1/output.html" lines_diff="4">
9071e359b9a3 Uploaded xuebing parents: diff changeset	64 <extra_files type="file" name='Sequences' value="velveth_test1/Sequences" compare="diff" />
9071e359b9a3 Uploaded xuebing parents: diff changeset	65 <extra_files type="file" name='Roadmaps' value="velveth_test1/Roadmaps" compare="diff" />
9071e359b9a3 Uploaded xuebing parents: diff changeset	66 </output>
9071e359b9a3 Uploaded xuebing parents: diff changeset	67 </test>
9071e359b9a3 Uploaded xuebing parents: diff changeset	68 </tests>
9071e359b9a3 Uploaded xuebing parents: diff changeset	69 <help>
9071e359b9a3 Uploaded xuebing parents: diff changeset	70 Velvet Overview
9071e359b9a3 Uploaded xuebing parents: diff changeset	71
9071e359b9a3 Uploaded xuebing parents: diff changeset	72 Velvet_ is a de novo genomic assembler specially designed for short read sequencing technologies, such as Solexa or 454, developed by Daniel Zerbino and Ewan Birney at the European Bioinformatics Institute (EMBL-EBI), near Cambridge, in the United Kingdom.
9071e359b9a3 Uploaded xuebing parents: diff changeset	73
9071e359b9a3 Uploaded xuebing parents: diff changeset	74 Velvet currently takes in short read sequences, removes errors then produces high quality unique contigs. It then uses paired-end read and long read information, when available, to retrieve the repeated areas between contigs.
9071e359b9a3 Uploaded xuebing parents: diff changeset	75
9071e359b9a3 Uploaded xuebing parents: diff changeset	76 Read the Velvet `documentation`__ for details on using the Velvet Assembler.
9071e359b9a3 Uploaded xuebing parents: diff changeset	77
9071e359b9a3 Uploaded xuebing parents: diff changeset	78 .. _Velvet: http://www.ebi.ac.uk/~zerbino/velvet/
9071e359b9a3 Uploaded xuebing parents: diff changeset	79
9071e359b9a3 Uploaded xuebing parents: diff changeset	80 .. __: http://www.ebi.ac.uk/~zerbino/velvet/Manual.pdf
9071e359b9a3 Uploaded xuebing parents: diff changeset	81
9071e359b9a3 Uploaded xuebing parents: diff changeset	82 ------
9071e359b9a3 Uploaded xuebing parents: diff changeset	83
9071e359b9a3 Uploaded xuebing parents: diff changeset	84 Velveth
9071e359b9a3 Uploaded xuebing parents: diff changeset	85
9071e359b9a3 Uploaded xuebing parents: diff changeset	86 Velveth takes in a number of sequence files, produces a hashtable, then outputs two files in an output directory (creating it if necessary), Sequences and Roadmaps, which are necessary to velvetg.
9071e359b9a3 Uploaded xuebing parents: diff changeset	87
9071e359b9a3 Uploaded xuebing parents: diff changeset	88 ------
9071e359b9a3 Uploaded xuebing parents: diff changeset	89
9071e359b9a3 Uploaded xuebing parents: diff changeset	90 Hash Length
9071e359b9a3 Uploaded xuebing parents: diff changeset	91
9071e359b9a3 Uploaded xuebing parents: diff changeset	92 The hash length, also known as k-mer length, corresponds to the length, in base pairs, of the words being hashed.
9071e359b9a3 Uploaded xuebing parents: diff changeset	93
9071e359b9a3 Uploaded xuebing parents: diff changeset	94 The hash length is the length of the k-mers being entered in the hash table. Firstly, you must observe three technical constraints::
9071e359b9a3 Uploaded xuebing parents: diff changeset	95
9071e359b9a3 Uploaded xuebing parents: diff changeset	96 # it must be an odd number, to avoid palindromes. If you put in an even number, Velvet will just decrement it and proceed.
9071e359b9a3 Uploaded xuebing parents: diff changeset	97 # it must be below or equal to MAXKMERHASH length (cf. 2.3.3, by default 31bp), because it is stored on 64 bits
9071e359b9a3 Uploaded xuebing parents: diff changeset	98 # it must be strictly inferior to read length, otherwise you simply will not observe any overlaps between reads, for obvious reasons.
9071e359b9a3 Uploaded xuebing parents: diff changeset	99
9071e359b9a3 Uploaded xuebing parents: diff changeset	100 Now you still have quite a lot of possibilities. As is often the case, it's a trade- off between specificity and sensitivity. Longer kmers bring you more specificity (i.e. less spurious overlaps) but lowers coverage (cf. below). . . so there's a sweet spot to be found with time and experience.
9071e359b9a3 Uploaded xuebing parents: diff changeset	101 We like to think in terms of "k-mer coverage", i.e. how many times has a k-mer been seen among the reads. The relation between k-mer coverage Ck and standard (nucleotide-wise) coverage C is Ck = C # (L - k + 1)/L where k is your hash length, and L you read length.
9071e359b9a3 Uploaded xuebing parents: diff changeset	102 Experience shows that this kmer coverage should be above 10 to start getting decent results. If Ck is above 20, you might be "wasting" coverage. Experience also shows that empirical tests with different values for k are not that costly to run!
9071e359b9a3 Uploaded xuebing parents: diff changeset	103
9071e359b9a3 Uploaded xuebing parents: diff changeset	104 Input Files
9071e359b9a3 Uploaded xuebing parents: diff changeset	105
9071e359b9a3 Uploaded xuebing parents: diff changeset	106 Velvet works mainly with fasta and fastq formats. For paired-end reads, the assumption is that each read is next to its mate
9071e359b9a3 Uploaded xuebing parents: diff changeset	107 read. In other words, if the reads are indexed from 0, then reads 0 and 1 are paired, 2 and 3, 4 and 5, etc.
9071e359b9a3 Uploaded xuebing parents: diff changeset	108
9071e359b9a3 Uploaded xuebing parents: diff changeset	109 Supported file formats are::
9071e359b9a3 Uploaded xuebing parents: diff changeset	110
9071e359b9a3 Uploaded xuebing parents: diff changeset	111 fasta
9071e359b9a3 Uploaded xuebing parents: diff changeset	112 fastq
9071e359b9a3 Uploaded xuebing parents: diff changeset	113 fasta.gz
9071e359b9a3 Uploaded xuebing parents: diff changeset	114 fastq.gz
9071e359b9a3 Uploaded xuebing parents: diff changeset	115 eland
9071e359b9a3 Uploaded xuebing parents: diff changeset	116 gerald
9071e359b9a3 Uploaded xuebing parents: diff changeset	117
9071e359b9a3 Uploaded xuebing parents: diff changeset	118 Read categories are::
9071e359b9a3 Uploaded xuebing parents: diff changeset	119
9071e359b9a3 Uploaded xuebing parents: diff changeset	120 short (default)
9071e359b9a3 Uploaded xuebing parents: diff changeset	121 shortPaired
9071e359b9a3 Uploaded xuebing parents: diff changeset	122 short2 (same as short, but for a separate insert-size library)
9071e359b9a3 Uploaded xuebing parents: diff changeset	123 shortPaired2 (see above)
9071e359b9a3 Uploaded xuebing parents: diff changeset	124 long (for Sanger, 454 or even reference sequences)
9071e359b9a3 Uploaded xuebing parents: diff changeset	125 longPaired
9071e359b9a3 Uploaded xuebing parents: diff changeset	126
9071e359b9a3 Uploaded xuebing parents: diff changeset	127 </help>
9071e359b9a3 Uploaded xuebing parents: diff changeset	128 </tool>

Mercurial > repos > xuebing > sharplabtool

annotate tools/sr_assembly/velveth.xml @ 1:cdcb0ce84a1b