annotate tools/ngs_simulation/grinder.xml @ 4:8c1cbee38ffd

Uploaded
author fangly
date Tue, 04 Oct 2011 01:52:39 -0400
parents 27a15723d4f0
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
27a15723d4f0 Uploaded
fangly
parents: 1
diff changeset
1 <tool id="grinder" name="Grinder" version="0.3.8" force_history_refresh="True">
1
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
2
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
3 <!--
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
4 Author: florent.angly@gmail.com
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
5 TODO:
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
6 • See bfast tool (tools/sr_mapping/bfast_wrapper.xml) for how to use datatables easily
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
7 • Basic tests
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
8 • Link to full manual
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
9 • Better sync with Grinder parameters, defaults and help
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
10 -->
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
11
2
27a15723d4f0 Uploaded
fangly
parents: 1
diff changeset
12 <description>genomic, metagenomic and amplicon read simulator</description>
1
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
13
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
14 <requirements>
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
15 <requirement type="binary">grinder</requirement>
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
16 </requirements>
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
17
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
18 <version_string>grinder --version</version_string>
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
19
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
20 <command>
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
21 #set $tool_dir = os.path.join( os.path.abspath($__root_dir__), 'tools', 'ngs_simulation' )
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
22 #set $script1 = os.path.join( $tool_dir, 'stderr_wrapper.py' )
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
23 #set $script2 = os.path.join( $tool_dir, 'grinder_multiple_outputs.py' )
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
24
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
25 $script1
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
26 grinder
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
27 #if $reference_file.specify == "builtin":
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
28 -reference_file ${ filter( lambda x: str( x[0] ) == str( $reference_file.value ), $__app__.tool_data_tables[ 'all_fasta' ].get_fields() )[0][-1] }
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
29 #else if $reference_file.specify == "uploaded":
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
30 -reference_file $reference_file.value
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
31 #end if
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
32 #if str($coverage_fold):
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
33 -coverage_fold $coverage_fold
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
34 #end if
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
35 #if str($total_reads):
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
36 -total_reads $total_reads
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
37 #end if
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
38 #if str($read_dist):
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
39 -read_dist $read_dist
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
40 #end if
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
41 #if str($insert_dist):
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
42 -insert_dist $insert_dist
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
43 #end if
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
44 #if str($exclude_chars):
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
45 -exclude_chars $exclude_chars
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
46 #end if
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
47 #if str($delete_chars):
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
48 -delete_chars $delete_chars
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
49 #end if
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
50 #if str($forward_reverse) != "None":
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
51 -forward_reverse $forward_reverse
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
52 #end if
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
53 #if str($unidirectional):
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
54 -unidirectional $unidirectional
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
55 #end if
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
56 #if str($length_bias):
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
57 -length_bias $length_bias
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
58 #end if
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
59 #if str($copy_bias):
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
60 -copy_bias $copy_bias
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
61 #end if
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
62 #if str($mutation_dist):
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
63 -mutation_dist $mutation_dist
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
64 #end if
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
65 #if str($mutation_ratio):
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
66 -mutation_ratio $mutation_ratio
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
67 #end if
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
68 #if str($homopolymer_dist):
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
69 -homopolymer_dist $homopolymer_dist
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
70 #end if
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
71 #if str($chimera_perc):
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
72 -chimera_perc $chimera_perc
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
73 #end if
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
74 #if str($abundance_file) != "None":
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
75 -abundance_file $abundance_file
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
76 #end if
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
77 #if str($abundance_model):
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
78 -abundance_model $abundance_model
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
79 #end if
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
80 #if str($num_libraries):
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
81 -num_libraries $num_libraries
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
82 #end if
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
83 #if str($multiplex_ids) != "None":
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
84 -multiplex_ids $multiplex_ids
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
85 #end if
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
86 #if str($diversity):
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
87 -diversity $diversity
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
88 #end if
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
89 #if str($shared_perc):
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
90 -shared_perc $shared_perc
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
91 #end if
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
92 #if str($permuted_perc):
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
93 -permuted_perc $permuted_perc
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
94 #end if
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
95 #if str($random_seed):
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
96 -random_seed $random_seed
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
97 #end if
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
98 #if str($permuted_perc):
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
99 -desc_track $desc_track
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
100 #end if
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
101 #if str($qual_levels):
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
102 -qual_levels $qual_levels
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
103 #end if
2
27a15723d4f0 Uploaded
fangly
parents: 1
diff changeset
104 #if str($fastq_output):
27a15723d4f0 Uploaded
fangly
parents: 1
diff changeset
105 -fastq_output $fastq_output
27a15723d4f0 Uploaded
fangly
parents: 1
diff changeset
106 #end if
1
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
107 #if str($profile_file) != "None":
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
108 -profile_file $profile_file.value
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
109 #end if
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
110 <!-- When Galaxy bug #661 is resolved, then we can use the same method to check for all optional argument -->
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
111 <!-- i.e. either if str($param) != "None": or if str($param): -->
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
112 <!-- URL: https://bitbucket.org/galaxy/galaxy-central/issue/661/optional-arguments-problems#comment-655611 -->
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
113
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
114 #set $output_dir = $__new_file_path__
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
115 -output_dir $output_dir
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
116
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
117 #set $base_name = $output.id
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
118 -base_name $base_name
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
119 ;
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
120
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
121 $script2 $output_dir $base_name
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
122
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
123 </command>
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
124
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
125 <inputs>
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
126
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
127 <conditional name="reference_file">
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
128 <param name="specify" type="select" label="Specify">
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
129 <option value="builtin">Built-in file</option>
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
130 <option value="uploaded">Uploaded file</option>
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
131 </param>
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
132 <when value="builtin">
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
133 <param name="value" type="select" label="Reference sequences" help="Galaxy built-in FASTA file">
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
134 <options from_data_table="all_fasta" />
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
135 </param>
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
136 </when>
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
137 <when value="uploaded">
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
138 <param name="value" type="data" format="fasta" label="Reference sequences" help="FASTA file that contains the input reference sequences" />
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
139 </when>
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
140 </conditional>
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
141
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
142 <param name="total_reads" type="text" value="100" optional="true" label="Number of reads" help="Number of shotgun or amplicon reads to generate for each library. Do not specify this if you specify the fold coverage." />
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
143
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
144 <param name="coverage_fold" type="text" optional="true" label="Coverage fold" help="Generate the number of reads needed to achieve the specified fold coverage of the input reference sequences for each library (the output FASTA length divided by the input FASTA length). Do not specify this if you specify the number of reads directly" />
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
145
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
146 <param name="read_dist" type="text" value="100" optional="true" label="Sequence length distribution" help="Desired sequence length distribution specified as:
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
147 average length, distribution ('uniform' or 'normal') and standard deviation
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
148 Only the first element is required.
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
149 Examples:
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
150 1/ All sequences exactly 250 bp long: 250
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
151 2/ Uniform distribution around 100+-10 bp: 100 uniform 10
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
152 3/ Read normally distributed with an average of 800 and a standard deviation
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
153 of 100 bp: 800 normal 100" />
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
154
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
155 <param name="insert_dist" type="text" value="0" optional="true" label="Insert size distribution" help="Create shotgun paired end reads (mate pairs) spanning the given insert length (the reads are interior to the insert):
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
156 0 : off,
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
157 or: insert size distribution in bp, in the same format as the read length
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
158 distribution (a typical value is 2,500 bp)
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
159 Two distinct reads are generated whether or not the mate pair overlaps.
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
160 Default: insert_dist.default" />
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
161
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
162 <param name="exclude_chars" type="text" optional="true" label="Characters to exclude" help="Do not create reads containing any of the specified characters (case insensitive), e.g. 'N-' to prevent reads with gaps (-) or ambiguities (N)." />
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
163
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
164 <param name="delete_chars" type="text" optional="true" label="Characters to delete" help="Remove the specified characters from the reference sequences (case insensitive), e.g. 'N-' to remove gaps (-) and ambiguities (N)." />
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
165
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
166 <param name="forward_reverse" type="data" format="fasta" optional="true" label="Amplicon primers" help="Use amplicon sequencing using the given forward and reverse PCR primer sequences (in a FASTA file, in this order). The second sequence in the FASTA file (the reverse primer) is optional. The sequences should use the IUPAC convention for degenerate residues). Example: AAACTYAAAKGAATTGRCGG and ACGGGCGGTGTGTRC for the 926F and 1392R primers respectively (primers that target the v6 to v9 region of the 16S rRNA gene). Genome sequences that do not match the specified primers are excluded. It is recommended to use the unidirectional and no genome length bias options to generate amplicon reads." />
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
167
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
168 <param name="unidirectional" type="select" display="radio" value="0" label="Sequencing direction" help="Produce reads just from one strand, by opposition to the reference strand and its reverse complement.">
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
169 <option value="0">both strands</option>
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
170 <option value="1">forward strand only</option>
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
171 <option value="-1">reverse strand only</option>
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
172 </param>
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
173
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
174 <param name="length_bias" type="boolean" truevalue="1" falsevalue="0" checked="true" label="Length bias" help="In shotgun libraries, sample species proportionally to their genome length: at the same relative abundance, larger genomes contribute more reads than smaller genomes." />
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
175
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
176 <param name="copy_bias" type="boolean" truevalue="1" falsevalue="0" checked="true" label="Copy number bias" help="In amplicon libraries, sample species proportionally to the number of copies of the target gene: at equal relative abundance, genomes that have multiple copies of the target gene contribute more amplicon reads than genomes that have a single copy. Note: you should use full genomes in the reference file to make use of this option." />
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
177
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
178 <param name="mutation_dist" type="text" value="0" optional="true" label="Mutation distribution" help="Introduce sequencing errors in the reads, under the form of mutations (substitutions, insertions and deletions) using a specified frequency distribution:
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
179 average probability (%),
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
180 model (uniform, linear),
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
181 value at 3&apos; end (not applicable for uniform model).
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
182 For example, for Sanger-type errors, use:
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
183 1.5 linear 2." />
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
184
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
185 <param name="mutation_ratio" type="text" value="80 20" optional="true" label="Mutation ratio" help="Indicate the percentage of substitutions and indels (insertions and deletions). For example, use 80 20 (4 substitutions for each indel) for Sanger reads. Note that this parameter has no effect unless you specify the mutation distribution option." />
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
186
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
187 <param name="homopolymer_dist" type="text" value="0" optional="true" label="Homopolymer distribution" help="Introduce sequencing errors in the reads under the form of homopolymeric stretches (e.g. AAA, CCCCC) using a specified model (n: homopolymer length).
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
188 Margulies: N(n, 0.15 * n), Margulies et al. 2005.
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
189 Richter: N(n, 0.15 * sqrt(n)), Richter et al. 2008.
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
190 Balzer: N(n, 0.03494 + n * 0.06856), Balzer et al. 2010." />
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
191
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
192 <param name="chimera_perc" type="text" value="0" optional="true" label="Percentage of chimeras" help="Specify the percent of reads in amplicon libraries that should be chimeric sequences. A typical value is 10%." />
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
193
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
194 <param name="abundance_file" type="data" format="tabular" optional="true" label="Abundance file" help="Specify the relative abundance of the genomes manually in an input file. Each line of the file should contain a sequence name and its relative abundance (%), e.g. 'seqABC 82.1' or 'seqABC 82.1 10.2' if you are specifying 2 different communities." />
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
195
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
196 <param name="abundance_model" type="text" value="uniform 1" optional="true" label="Rank abundance model" help="Relative abundance model for the input genomes:
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
197 uniform, linear, powerlaw, logarithmic or exponential.
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
198 Examples:
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
199 1/ uniform distribution: uniform,
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
200 2/ powerlaw distribution with parameter 0.1: powerlaw 0.1." />
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
201
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
202 <param name="num_libraries" type="text" value="1" optional="true" label="Number of libraries" help="Number of independent libraries to create. Specify how diverse and similar they should be using the options diversity, shared percent; and permuted percent. Assign them different MID tags with the multiplex mids option." />
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
203
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
204 <param name="multiplex_ids" type="data" format="fasta" optional="true" label="Specify MID tags file" help="Specify an optional FASTA file that contains sequence identifiers (a.k.a MIDs or barcodes) to add to the sequences (one per library)."/>
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
205
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
206 <!-- When Galaxy bug #661 is resolved, then we can really have optional parameters of type "integer" or "float" -->
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
207 <!-- URL: https://bitbucket.org/galaxy/galaxy-central/issue/661/optional-arguments-problems#comment-655611 -->
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
208 <!-- Affected params: diversity (int), shared_perc (float), permuted_perc (float), random_seed (int), num_libraries (int), chimera_perc (float) -->
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
209 <param name="diversity" type="text" optional="true" label="Diversity (richness)" help="Richness, or number of genomes to include in the shotgun libraries. Use 0 for the maximum diversity possible, i.e. all the genomes from the input file when a single independent library is requested." />
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
210
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
211 <param name="shared_perc" type="text" value="0" optional="true" label="Percent shared" help="For multiple libraries, percent of genomes they should have in common." />
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
212
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
213 <param name="permuted_perc" type="text" value="0" optional="true" label="Percent permuted" help="For multiple libraries, percent of the most-abundant genomes to permute in rank-abundance." />
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
214
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
215 <param name="random_seed" type="text" optional="true" label="Random seed" help="Seed number to use for the pseudo-random number generator." />
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
216
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
217 <param name="desc_track" type="boolean" truevalue="1" falsevalue="0" checked="true" label="Read tracking" help="Track read information (reference sequence, position, errors, ...) by writing it in the FASTA read description." />
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
218
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
219 <param name="qual_levels" type="text" optional="true" label="Quality score levels" help="Generate basic quality scores for the simulated reads. Good residues are given a specified good score (e.g. 30) and residues that are the result of an insertion or substitution are given a specified bad score (e.g. 10). Specify first the good score and then the bad score, e.g. '30 10'" />
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
220
2
27a15723d4f0 Uploaded
fangly
parents: 1
diff changeset
221 <param name="fastq_output" type="boolean" truevalue="1" falsevalue="0" checked="false" label="FASTQ output" help="
27a15723d4f0 Uploaded
fangly
parents: 1
diff changeset
222 Write the generated reads in FASTQ format (Sanger variant) instead of FASTA and
27a15723d4f0 Uploaded
fangly
parents: 1
diff changeset
223 QUAL. Quality score levels need to be specified for this option to be effective." />
27a15723d4f0 Uploaded
fangly
parents: 1
diff changeset
224
1
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
225 <param name="profile_file" type="data" format="txt" optional="true" label="Profile file" help="A file that contains Grinder arguments. This is useful if you use many options or often use the same options. Lines with comments (#) are ignored. Consider the profile file, 'simple_profile.txt':
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
226
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
227 # A simple Grinder profile
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
228 -read_dist 105 normal 12
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
229 -total_reads 1000
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
230
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
231 Running: grinder -reference_file viral_genomes.fa -profile_file simple_profile.txt
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
232
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
233 Translates into: grinder -reference_file viral_genomes.fa -read_dist 105 normal 12 -total_reads 1000
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
234
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
235 Note that the arguments specified in the profile should not be specified again on the command line." />
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
236
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
237 </inputs>
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
238
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
239 <!--
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
240 <outputs>
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
241 <data format="tabular" name="ranks" from_work_dir="grinder-ranks.txt" label="${tool.name} ranks from ${on_string}" />
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
242 <conditional/>
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
243 <data format="fasta" name="fasta" from_work_dir="grinder-reads.fa" label="${tool.name} reads from ${on_string}" />
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
244 <data format="qual" name="qual" from_work_dir="grinder-reads.qual" label="${tool.name} read quals from ${on_string}" >
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
245 <filter>(str(qual_levels))</filter>
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
246 </data>
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
247 </outputs>
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
248 -->
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
249
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
250 <outputs>
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
251 <data format="text" name="output" />
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
252 </outputs>
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
253
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
254 <tests>
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
255 <!-- no tests since they would not not always return the same results -->
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
256 <!--
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
257 <test>
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
258 <param name="specify" value="uploaded" />
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
259 <param name="value" value="ngs_simulation_in1.fasta" ftype="fasta" />
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
260 <output name="ranks" file="" />
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
261 <output name="fasta" file="" />
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
262 <output name="qual" file="" />
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
263 </test>
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
264
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
265 <test>
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
266 <param name="specify" value="builtin" />
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
267 <param name="builtin" value="pUC18" />
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
268 <output name="ranks" file="" />
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
269 <output name="fasta" file="" />
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
270 <output name="qual" file="" />
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
271 </test>
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
272 -->
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
273
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
274 </tests>
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
275
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
276 <help>
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
277
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
278 **What it does**
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
279
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
280 Grinder is a program to create random shotgun and amplicon sequence libraries
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
281 based on reference sequences in a FASTA file. Features include:
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
282
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
283 * shotgun library or amplicon library
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
284 * arbitrary read length distribution and number of reads
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
285 * simulation of PCR and sequencing errors (chimeras, point mutations, homopolymers)
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
286 * support for creating paired-end (mate pair) datasets
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
287 * specific rank-abundance settings or manually given abundance for each genome
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
288 * creation of datasets with a given richness (alpha diversity)
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
289 * independent datasets can share a variable number of genomes (beta diversity)
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
290 * modeling of the bias created by varying genome lengths or gene copy number
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
291 * profile mechanism to store preferred options
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
292 * API to automate the creation of a large number of simulated datasets
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
293
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
294
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
295 **Input**
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
296
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
297 A variety of FASTA databases containing genes or genomes can be used as input
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
298 for Grinder, such as the NCBI RefSeq collection (ftp://ftp.ncbi.nih.gov/refseq/release/microbial/),
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
299 the GreenGenes 16S rRNA database (http://greengenes.lbl.gov/Download/Sequence_Data/Fasta_data_files/Isolated_named_strains_16S_aligned.fasta), theh uman genome and transcriptome (ftp://ftp.ncbi.nih.gov/refseq/H_sapiens/RefSeqGene/, ftp://ftp.ncbi.nih.gov/refseq/H_sapiens/mRNA_Prot/human.rna.fna.gz), ...
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
300
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
301 These input files can either be provided as a Galaxy dataset, or can be uploaded
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
302 by Galaxy users in their history.
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
303
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
304
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
305 **Output**
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
306
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
307 For each library requested, a first file contains the abundance of the species
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
308 in the simulated community created, e.g.::
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
309
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
310 # rank seqID rel. abundance
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
311 1 86715_Lachnospiraceae 0.367936925098555
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
312 2 6439_Neisseria_polysaccharea 0.183968462549277
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
313 3 103712_Fusobacterium_nucleatum 0.122645641699518
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
314 4 103024_Frigoribacterium 0.0919842312746386
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
315 5 129066_Streptococcus_pyogenes 0.0735873850197109
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
316 6 106485_Pseudomonas_aeruginosa 0.0613228208497591
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
317 7 13824_Veillonella_criceti 0.0525624178712221
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
318 8 28044_Lactosphaera 0.0459921156373193
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
319
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
320 The second file is a FASTA file containing shotgun or amplicon reads, e.g.::
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
321
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
322 >1 reference=13824_Veillonella_criceti position=89-1088 strand=+
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
323 ACCAACCTGCCCTTCAGAGGGGGATAACAACGGGAAACCGTTGCTAATACCGCGTACGAA
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
324 TGGACTTCGGCATCGGAGTTCATTGAAAGGTGGCCTCTATTTATAAGCTATCGCTGAAGG
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
325 AGGGGGTTGCGTCTGATTAGCTAGTTGGAGGGGTAATGGCCCACCAAGGCAA
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
326
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
327 >2 reference=103712_Fusobacterium_nucleatum position=2-1001 strand=+
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
328 TGAACGAAGAGTTTGATCCTGGCTCAGGATGAACGCTGACAGAATGCTTAACACATGCAA
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
329 GTCAACTTGAATTTGGGTTTTTAACTTAGGTTTGGG
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
330
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
331 If you specify the quality score levels option, a third file representing the
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
332 quality scores of the reads is created::
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
333
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
334 >1 reference=103712_Fusobacterium_nucleatum position=2-1001 strand=+
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
335 30 30 30 10 30 30 ...
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
336
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
337
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
338 </help>
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
339
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
340 </tool>
7d26d64539b2 Uploaded
fangly
parents:
diff changeset
341