annotate tools/grinder.xml @ 0:b35ec780aac1

Uploaded
author fangly
date Mon, 19 Sep 2011 01:01:58 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
1 <tool id="grinder" name="Grinder" version="0.3.7" force_history_refresh="True">
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
2
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
3 <!--
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
4 Author: florent.angly@gmail.com
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
5 TODO:
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
6 • See bfast tool (tools/sr_mapping/bfast_wrapper.xml) for how to use datatables easily
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
7 • Basic tests
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
8 • Link to full manual
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
9 • Better sync with Grinder parameters, defaults and help
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
10 -->
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
11
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
12 <description>genomic, metagenomic and amplicon read simulator (BETA)</description>
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
13
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
14 <requirements>
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
15 <requirement type="binary">grinder</requirement>
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
16 </requirements>
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
17
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
18 <version_string>grinder --version</version_string>
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
19
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
20 <command>
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
21 #set $tool_dir = os.path.join( os.path.abspath($__root_dir__), 'tools', 'ngs_simulation' )
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
22 #set $script1 = os.path.join( $tool_dir, 'stderr_wrapper.py' )
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
23 #set $script2 = os.path.join( $tool_dir, 'grinder_multiple_outputs.py' )
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
24
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
25 $script1
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
26 grinder
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
27 #if $reference_file.specify == "builtin":
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
28 -reference_file ${ filter( lambda x: str( x[0] ) == str( $reference_file.value ), $__app__.tool_data_tables[ 'all_fasta' ].get_fields() )[0][-1] }
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
29 #else if $reference_file.specify == "uploaded":
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
30 -reference_file $reference_file.value
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
31 #end if
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
32 #if str($coverage_fold):
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
33 -coverage_fold $coverage_fold
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
34 #end if
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
35 #if str($total_reads):
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
36 -total_reads $total_reads
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
37 #end if
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
38 #if str($read_dist):
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
39 -read_dist $read_dist
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
40 #end if
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
41 #if str($insert_dist):
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
42 -insert_dist $insert_dist
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
43 #end if
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
44 #if str($exclude_chars):
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
45 -exclude_chars $exclude_chars
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
46 #end if
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
47 #if str($delete_chars):
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
48 -delete_chars $delete_chars
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
49 #end if
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
50 #if str($forward_reverse) != "None":
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
51 -forward_reverse $forward_reverse
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
52 #end if
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
53 #if str($unidirectional):
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
54 -unidirectional $unidirectional
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
55 #end if
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
56 #if str($length_bias):
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
57 -length_bias $length_bias
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
58 #end if
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
59 #if str($copy_bias):
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
60 -copy_bias $copy_bias
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
61 #end if
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
62 #if str($mutation_dist):
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
63 -mutation_dist $mutation_dist
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
64 #end if
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
65 #if str($mutation_ratio):
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
66 -mutation_ratio $mutation_ratio
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
67 #end if
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
68 #if str($homopolymer_dist):
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
69 -homopolymer_dist $homopolymer_dist
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
70 #end if
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
71 #if str($chimera_perc):
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
72 -chimera_perc $chimera_perc
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
73 #end if
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
74 #if str($abundance_file) != "None":
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
75 -abundance_file $abundance_file
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
76 #end if
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
77 #if str($abundance_model):
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
78 -abundance_model $abundance_model
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
79 #end if
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
80 #if str($num_libraries):
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
81 -num_libraries $num_libraries
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
82 #end if
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
83 #if str($multiplex_ids) != "None":
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
84 -multiplex_ids $multiplex_ids
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
85 #end if
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
86 #if str($diversity):
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
87 -diversity $diversity
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
88 #end if
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
89 #if str($shared_perc):
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
90 -shared_perc $shared_perc
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
91 #end if
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
92 #if str($permuted_perc):
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
93 -permuted_perc $permuted_perc
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
94 #end if
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
95 #if str($random_seed):
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
96 -random_seed $random_seed
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
97 #end if
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
98 #if str($permuted_perc):
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
99 -desc_track $desc_track
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
100 #end if
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
101 #if str($qual_levels):
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
102 -qual_levels $qual_levels
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
103 #end if
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
104 #if str($profile_file) != "None":
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
105 -profile_file $profile_file.value
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
106 #end if
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
107 <!-- When Galaxy bug #661 is resolved, then we can use the same method to check for all optional argument -->
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
108 <!-- i.e. either if str($param) != "None": or if str($param): -->
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
109 <!-- URL: https://bitbucket.org/galaxy/galaxy-central/issue/661/optional-arguments-problems#comment-655611 -->
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
110
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
111 #set $output_dir = $__new_file_path__
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
112 -output_dir $output_dir
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
113
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
114 #set $base_name = $output.id
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
115 -base_name $base_name
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
116 ;
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
117
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
118 $script2 $output_dir $base_name
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
119
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
120 </command>
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
121
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
122 <inputs>
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
123
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
124 <conditional name="reference_file">
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
125 <param name="specify" type="select" label="Specify">
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
126 <option value="builtin">Built-in file</option>
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
127 <option value="uploaded">Uploaded file</option>
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
128 </param>
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
129 <when value="builtin">
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
130 <param name="value" type="select" label="Reference sequences" help="Galaxy built-in FASTA file">
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
131 <options from_data_table="all_fasta" />
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
132 </param>
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
133 </when>
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
134 <when value="uploaded">
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
135 <param name="value" type="data" format="fasta" label="Reference sequences" help="FASTA file that contains the input reference sequences" />
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
136 </when>
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
137 </conditional>
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
138
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
139 <param name="total_reads" type="text" value="100" optional="true" label="Number of reads" help="Number of shotgun or amplicon reads to generate for each library. Do not specify this if you specify the fold coverage." />
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
140
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
141 <param name="coverage_fold" type="text" optional="true" label="Coverage fold" help="Generate the number of reads needed to achieve the specified fold coverage of the input reference sequences for each library (the output FASTA length divided by the input FASTA length). Do not specify this if you specify the number of reads directly" />
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
142
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
143 <param name="read_dist" type="text" value="100" optional="true" label="Sequence length distribution" help="Desired sequence length distribution specified as:
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
144 average length, distribution ('uniform' or 'normal') and standard deviation
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
145 Only the first element is required.
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
146 Examples:
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
147 1/ All sequences exactly 250 bp long: 250
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
148 2/ Uniform distribution around 100+-10 bp: 100 uniform 10
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
149 3/ Read normally distributed with an average of 800 and a standard deviation
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
150 of 100 bp: 800 normal 100" />
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
151
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
152 <param name="insert_dist" type="text" value="0" optional="true" label="Insert size distribution" help="Create shotgun paired end reads (mate pairs) spanning the given insert length (the reads are interior to the insert):
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
153 0 : off,
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
154 or: insert size distribution in bp, in the same format as the read length
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
155 distribution (a typical value is 2,500 bp)
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
156 Two distinct reads are generated whether or not the mate pair overlaps.
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
157 Default: insert_dist.default" />
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
158
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
159 <param name="exclude_chars" type="text" optional="true" label="Characters to exclude" help="Do not create reads containing any of the specified characters (case insensitive), e.g. 'N-' to prevent reads with gaps (-) or ambiguities (N)." />
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
160
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
161 <param name="delete_chars" type="text" optional="true" label="Characters to delete" help="Remove the specified characters from the reference sequences (case insensitive), e.g. 'N-' to remove gaps (-) and ambiguities (N)." />
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
162
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
163 <param name="forward_reverse" type="data" format="fasta" optional="true" label="Amplicon primers" help="Use amplicon sequencing using the given forward and reverse PCR primer sequences (in a FASTA file, in this order). The second sequence in the FASTA file (the reverse primer) is optional. The sequences should use the IUPAC convention for degenerate residues). Example: AAACTYAAAKGAATTGRCGG and ACGGGCGGTGTGTRC for the 926F and 1392R primers respectively (primers that target the v6 to v9 region of the 16S rRNA gene). Genome sequences that do not match the specified primers are excluded. It is recommended to use the unidirectional and no genome length bias options to generate amplicon reads." />
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
164
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
165 <param name="unidirectional" type="select" display="radio" value="0" label="Sequencing direction" help="Produce reads just from one strand, by opposition to the reference strand and its reverse complement.">
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
166 <option value="0">both strands</option>
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
167 <option value="1">forward strand only</option>
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
168 <option value="-1">reverse strand only</option>
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
169 </param>
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
170
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
171 <param name="length_bias" type="boolean" truevalue="1" falsevalue="0" checked="true" label="Length bias" help="In shotgun libraries, sample species proportionally to their genome length: at the same relative abundance, larger genomes contribute more reads than smaller genomes." />
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
172
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
173 <param name="copy_bias" type="boolean" truevalue="1" falsevalue="0" checked="true" label="Copy number bias" help="In amplicon libraries, sample species proportionally to the number of copies of the target gene: at equal relative abundance, genomes that have multiple copies of the target gene contribute more amplicon reads than genomes that have a single copy. Note: you should use full genomes in the reference file to make use of this option." />
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
174
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
175 <param name="mutation_dist" type="text" value="0" optional="true" label="Mutation distribution" help="Introduce sequencing errors in the reads, under the form of mutations (substitutions, insertions and deletions) using a specified frequency distribution:
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
176 average probability (%),
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
177 model (uniform, linear),
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
178 value at 3&apos; end (not applicable for uniform model).
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
179 For example, for Sanger-type errors, use:
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
180 1.5 linear 2." />
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
181
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
182 <param name="mutation_ratio" type="text" value="80 20" optional="true" label="Mutation ratio" help="Indicate the percentage of substitutions and indels (insertions and deletions). For example, use 80 20 (4 substitutions for each indel) for Sanger reads. Note that this parameter has no effect unless you specify the mutation distribution option." />
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
183
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
184 <param name="homopolymer_dist" type="text" value="0" optional="true" label="Homopolymer distribution" help="Introduce sequencing errors in the reads under the form of homopolymeric stretches (e.g. AAA, CCCCC) using a specified model (n: homopolymer length).
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
185 Margulies: N(n, 0.15 * n), Margulies et al. 2005.
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
186 Richter: N(n, 0.15 * sqrt(n)), Richter et al. 2008.
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
187 Balzer: N(n, 0.03494 + n * 0.06856), Balzer et al. 2010." />
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
188
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
189 <param name="chimera_perc" type="text" value="0" optional="true" label="Percentage of chimeras" help="Specify the percent of reads in amplicon libraries that should be chimeric sequences. A typical value is 10%." />
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
190
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
191 <param name="abundance_file" type="data" format="tabular" optional="true" label="Abundance file" help="Specify the relative abundance of the genomes manually in an input file. Each line of the file should contain a sequence name and its relative abundance (%), e.g. 'seqABC 82.1' or 'seqABC 82.1 10.2' if you are specifying 2 different communities." />
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
192
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
193 <param name="abundance_model" type="text" value="uniform 1" optional="true" label="Rank abundance model" help="Relative abundance model for the input genomes:
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
194 uniform, linear, powerlaw, logarithmic or exponential.
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
195 Examples:
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
196 1/ uniform distribution: uniform,
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
197 2/ powerlaw distribution with parameter 0.1: powerlaw 0.1." />
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
198
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
199 <param name="num_libraries" type="text" value="1" optional="true" label="Number of libraries" help="Number of independent libraries to create. Specify how diverse and similar they should be using the options diversity, shared percent; and permuted percent. Assign them different MID tags with the multiplex mids option." />
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
200
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
201 <param name="multiplex_ids" type="data" format="fasta" optional="true" label="Specify MID tags file" help="Specify an optional FASTA file that contains sequence identifiers (a.k.a MIDs or barcodes) to add to the sequences (one per library)."/>
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
202
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
203 <!-- When Galaxy bug #661 is resolved, then we can really have optional parameters of type "integer" or "float" -->
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
204 <!-- URL: https://bitbucket.org/galaxy/galaxy-central/issue/661/optional-arguments-problems#comment-655611 -->
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
205 <!-- Affected params: diversity (int), shared_perc (float), permuted_perc (float), random_seed (int), num_libraries (int), chimera_perc (float) -->
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
206 <param name="diversity" type="text" optional="true" label="Diversity (richness)" help="Richness, or number of genomes to include in the shotgun libraries. Use 0 for the maximum diversity possible, i.e. all the genomes from the input file when a single independent library is requested." />
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
207
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
208 <param name="shared_perc" type="text" value="0" optional="true" label="Percent shared" help="For multiple libraries, percent of genomes they should have in common." />
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
209
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
210 <param name="permuted_perc" type="text" value="0" optional="true" label="Percent permuted" help="For multiple libraries, percent of the most-abundant genomes to permute in rank-abundance." />
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
211
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
212 <param name="random_seed" type="text" optional="true" label="Random seed" help="Seed number to use for the pseudo-random number generator." />
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
213
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
214 <param name="desc_track" type="boolean" truevalue="1" falsevalue="0" checked="true" label="Read tracking" help="Track read information (reference sequence, position, errors, ...) by writing it in the FASTA read description." />
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
215
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
216 <param name="qual_levels" type="text" optional="true" label="Quality score levels" help="Generate basic quality scores for the simulated reads. Good residues are given a specified good score (e.g. 30) and residues that are the result of an insertion or substitution are given a specified bad score (e.g. 10). Specify first the good score and then the bad score, e.g. '30 10'" />
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
217
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
218 <param name="profile_file" type="data" format="txt" optional="true" label="Profile file" help="A file that contains Grinder arguments. This is useful if you use many options or often use the same options. Lines with comments (#) are ignored. Consider the profile file, 'simple_profile.txt':
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
219
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
220 # A simple Grinder profile
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
221 -read_dist 105 normal 12
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
222 -total_reads 1000
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
223
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
224 Running: grinder -reference_file viral_genomes.fa -profile_file simple_profile.txt
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
225
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
226 Translates into: grinder -reference_file viral_genomes.fa -read_dist 105 normal 12 -total_reads 1000
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
227
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
228 Note that the arguments specified in the profile should not be specified again on the command line." />
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
229
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
230 </inputs>
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
231
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
232 <!--
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
233 <outputs>
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
234 <data format="tabular" name="ranks" from_work_dir="grinder-ranks.txt" label="${tool.name} ranks from ${on_string}" />
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
235 <conditional/>
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
236 <data format="fasta" name="fasta" from_work_dir="grinder-reads.fa" label="${tool.name} reads from ${on_string}" />
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
237 <data format="qual" name="qual" from_work_dir="grinder-reads.qual" label="${tool.name} read quals from ${on_string}" >
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
238 <filter>(str(qual_levels))</filter>
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
239 </data>
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
240 </outputs>
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
241 -->
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
242
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
243 <outputs>
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
244 <data format="text" name="output" />
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
245 </outputs>
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
246
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
247 <tests>
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
248 <!-- no tests since they would not not always return the same results -->
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
249 <!--
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
250 <test>
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
251 <param name="specify" value="uploaded" />
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
252 <param name="value" value="ngs_simulation_in1.fasta" ftype="fasta" />
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
253 <output name="ranks" file="" />
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
254 <output name="fasta" file="" />
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
255 <output name="qual" file="" />
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
256 </test>
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
257
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
258 <test>
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
259 <param name="specify" value="builtin" />
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
260 <param name="builtin" value="pUC18" />
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
261 <output name="ranks" file="" />
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
262 <output name="fasta" file="" />
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
263 <output name="qual" file="" />
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
264 </test>
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
265 -->
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
266
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
267 </tests>
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
268
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
269 <help>
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
270
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
271 **What it does**
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
272
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
273 Grinder is a program to create random shotgun and amplicon sequence libraries
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
274 based on reference sequences in a FASTA file. Features include:
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
275
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
276 * shotgun library or amplicon library
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
277 * arbitrary read length distribution and number of reads
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
278 * simulation of PCR and sequencing errors (chimeras, point mutations, homopolymers)
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
279 * support for creating paired-end (mate pair) datasets
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
280 * specific rank-abundance settings or manually given abundance for each genome
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
281 * creation of datasets with a given richness (alpha diversity)
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
282 * independent datasets can share a variable number of genomes (beta diversity)
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
283 * modeling of the bias created by varying genome lengths or gene copy number
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
284 * profile mechanism to store preferred options
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
285 * API to automate the creation of a large number of simulated datasets
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
286
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
287
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
288 **Input**
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
289
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
290 A variety of FASTA databases containing genes or genomes can be used as input
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
291 for Grinder, such as the NCBI RefSeq collection (ftp://ftp.ncbi.nih.gov/refseq/release/microbial/),
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
292 the GreenGenes 16S rRNA database (http://greengenes.lbl.gov/Download/Sequence_Data/Fasta_data_files/Isolated_named_strains_16S_aligned.fasta), theh uman genome and transcriptome (ftp://ftp.ncbi.nih.gov/refseq/H_sapiens/RefSeqGene/, ftp://ftp.ncbi.nih.gov/refseq/H_sapiens/mRNA_Prot/human.rna.fna.gz), ...
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
293
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
294 These input files can either be provided as a Galaxy dataset, or can be uploaded
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
295 by Galaxy users in their history.
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
296
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
297
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
298 **Output**
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
299
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
300 For each library requested, a first file contains the abundance of the species
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
301 in the simulated community created, e.g.::
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
302
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
303 # rank seqID rel. abundance
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
304 1 86715_Lachnospiraceae 0.367936925098555
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
305 2 6439_Neisseria_polysaccharea 0.183968462549277
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
306 3 103712_Fusobacterium_nucleatum 0.122645641699518
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
307 4 103024_Frigoribacterium 0.0919842312746386
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
308 5 129066_Streptococcus_pyogenes 0.0735873850197109
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
309 6 106485_Pseudomonas_aeruginosa 0.0613228208497591
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
310 7 13824_Veillonella_criceti 0.0525624178712221
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
311 8 28044_Lactosphaera 0.0459921156373193
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
312
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
313 The second file is a FASTA file containing shotgun or amplicon reads, e.g.::
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
314
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
315 >1 reference=13824_Veillonella_criceti position=89-1088 strand=+
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
316 ACCAACCTGCCCTTCAGAGGGGGATAACAACGGGAAACCGTTGCTAATACCGCGTACGAA
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
317 TGGACTTCGGCATCGGAGTTCATTGAAAGGTGGCCTCTATTTATAAGCTATCGCTGAAGG
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
318 AGGGGGTTGCGTCTGATTAGCTAGTTGGAGGGGTAATGGCCCACCAAGGCAA
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
319
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
320 >2 reference=103712_Fusobacterium_nucleatum position=2-1001 strand=+
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
321 TGAACGAAGAGTTTGATCCTGGCTCAGGATGAACGCTGACAGAATGCTTAACACATGCAA
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
322 GTCAACTTGAATTTGGGTTTTTAACTTAGGTTTGGG
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
323
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
324 If you specify the quality score levels option, a third file representing the
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
325 quality scores of the reads is created::
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
326
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
327 >1 reference=103712_Fusobacterium_nucleatum position=2-1001 strand=+
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
328 30 30 30 10 30 30 ...
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
329
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
330
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
331 </help>
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
332
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
333 </tool>
b35ec780aac1 Uploaded
fangly
parents:
diff changeset
334