annotate snap_caller.xml @ 0:6231ae8f87b8

Uploaded
author wolma
date Wed, 11 Feb 2015 08:29:02 -0500
parents
children a548b3c6ed00
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
1 <tool id="read_alignment" name="SNAP Read Alignment">
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
2 <description>Map sequence reads to a reference genome using SNAP</description>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
3 <version_command>mimodd version -q</version_command>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
4 <command>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
5 mimodd snap-batch -s
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
6 ## SNAP calls (considering different cases)
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
7
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
8 #for $i in $datasets
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
9 "snap ${i.mode_choose.mode} '$ref_genome'
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
10 #if $str($i.mode_choose.mode) == "paired" and $str($i.mode_choose.input.iformat) in ("fastq", "gz"):
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
11 '${i.mode_choose.input.ifile1}' '${i.mode_choose.input.ifile2}'
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
12 #else:
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
13 '${i.mode_choose.input.ifile}'
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
14 #end if
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
15 --ofile '$outputfile' --iformat ${i.mode_choose.input.iformat} --oformat $oformat
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
16 --idx-seedsize '$set.seedsize'
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
17 --idx-slack '$set.slack' --maxseeds '$set.maxseeds' --maxhits '$set.maxhits' --clipping=$set.clipping --maxdist '$set.maxdist' --confdiff '$set.confdiff' --confadapt '$set.confadpt'
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
18 #if $i.mode_choose.input.header:
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
19 --header '${i.mode_choose.input.header}'
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
20 #end if
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
21 #if $str($i.mode_choose.mode) == "paired":
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
22 --spacing '$set.sp_min' '$set.sp_max'
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
23 #end if
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
24 #if $str($set.selectivity) != "off":
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
25 --selectivity '$set.selectivity'
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
26 #end if
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
27 #if $str($set.filter_output) != "off":
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
28 --filter-output $set.filter_output
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
29 #end if
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
30 #if $str($set.sort) != "off":
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
31 --sort $set.sort
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
32 #end if
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
33 #if $str($set.mmatch_notation) == "general":
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
34 -M
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
35 #end if
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
36 --max-mate-overlap '$set.max_mate_overlap'
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
37 --verbose
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
38 "
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
39 #end for
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
40 </command>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
41
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
42 <inputs>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
43 ## mandatory arguments (and mode-conditionals)
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
44
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
45 <param name="ref_genome" type="data" format="fasta" label="reference genome" help="The fasta reference genome that SNAP should align reads against."/>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
46
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
47 <repeat name="datasets" title="datasets" default="1" min="1">
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
48 <conditional name="mode_choose">
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
49 <param name="mode" type="select" label="choose mode" help="Reads obtained from single-end sequencing runs should be aligned in 'single' mode, paired-end reads in 'paired' mode. **WARNING**: if the read input file is in SAM/BAM format, the current version of this tool will **not** verify the mode and may produce erroneous alignments with wrong settings!">
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
50 <option value="single">single-end</option>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
51 <option value="paired">paired-end</option>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
52 </param>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
53
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
54 <when value="single">
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
55 <conditional name="input">
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
56 <param name="iformat" type="select" label="input file format">
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
57 <option value="bam">BAM</option>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
58 <option value="sam">SAM</option>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
59 <option value="gz">gz</option>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
60 <option value="fastq">fastq</option>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
61 </param>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
62 <when value="bam">
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
63 <param name="ifile" type="data" format="bam" label="input file"/>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
64 <param name="header" type="data" optional="true" format="sam" label="custom header file" />
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
65 </when>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
66 <when value="sam">
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
67 <param name="ifile" type="data" format="sam" label="input file"/>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
68 <param name="header" type="data" optional="true" format="sam" label="custom header file" />
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
69 </when>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
70 <when value="gz">
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
71 <param name="ifile" type="data" label="input file"/>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
72 <param name="header" type="data" format="sam" label="header file" />
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
73 </when>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
74 <when value="fastq">
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
75 <param name="ifile" type="data" format="fastq" label="input file"/>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
76 <param name="header" type="data" format="sam" label="header file" />
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
77 </when>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
78 </conditional>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
79 </when>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
80 <when value="paired">
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
81 <conditional name="input">
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
82 <param name="iformat" type="select" label="input file format">
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
83 <option value="bam">BAM</option>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
84 <option value="sam">SAM</option>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
85 <option value="gz">gz</option>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
86 <option value="fastq">fastq</option>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
87 </param>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
88 <when value="bam">
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
89 <param name="ifile" type="data" format="bam" label="input file"/>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
90 <param name="header" type="data" optional="true" format="sam" label="custom header file" />
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
91 </when>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
92 <when value="sam">
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
93 <param name="ifile" type="data" format="sam" label="input file"/>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
94 <param name="header" type="data" optional="true" format="sam" label="custom header file" />
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
95 </when>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
96 <when value="fastq">
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
97 <param name="ifile1" type="data" format="fastq" label="inputfile with the first set of reads of paired-end data"/>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
98 <param name="ifile2" type="data" format="fastq" label="inputfile with the second set of reads of paired-end data"/>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
99 <param name="header" type="data" format="sam" label="header file" help="required" />
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
100 </when>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
101 <when value="gz">
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
102 <param name="ifile1" type="data" label="inputfile with the first set of reads of paired-end data"/>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
103 <param name="ifile2" type="data" label="inputfile with the second set of reads of paired-end data"/>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
104 <param name="header" type="data" format="sam" label="header file" help="required" />
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
105 </when>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
106 </conditional>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
107 </when>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
108 </conditional>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
109 </repeat>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
110
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
111 <param name="oformat" type="select" label="output file format">
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
112 <option value="bam">BAM</option>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
113 <option value="sam">SAM</option>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
114 </param>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
115
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
116 ## optional arguments
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
117
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
118 <conditional name="set">
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
119 <param name="settings_mode" type="select" label="further parameter settings" help="This section lets you specify the detailed parameter settings for the SNAP aligner. Only change them if you know what you are doing, i.e., read the documentation first.">
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
120 <option value="default">default settings</option>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
121 <option value="change">change settings</option>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
122 </param>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
123
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
124 ## default settings
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
125
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
126 <when value="default">
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
127 <param name="seedsize" type="hidden" value="20"/>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
128 <param name="slack" type="hidden" value="0.3"/>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
129 <param name="sp_min" type="hidden" value="100"/>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
130 <param name="sp_max" type="hidden" value="10000"/>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
131 <param name="maxdist" type="hidden" value="8"/>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
132 <param name="confdiff" type="hidden" value="2"/>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
133 <param name="confadpt" type="hidden" value="7"/>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
134
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
135 <param name="maxseeds" type="hidden" value="25"/>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
136 <param name="maxhits" type="hidden" value="250"/>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
137 <param name="clipping" type="hidden" value="++"/>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
138
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
139 <param name="selectivity" type="hidden" value="off"/>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
140 <param name="filter_output" type="hidden" value="off"/>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
141 <param name="sort" type="hidden" value="0"/>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
142 <param name="mmatch_notation" type="hidden" value="general"/>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
143 <param name="max_mate_overlap" type="hidden" value="0" />
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
144 </when>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
145
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
146 ## change settings
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
147
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
148 <when value="change">
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
149 <param name="seedsize" type="integer" value="20" label="seed size (default: 20)" help="Length of the seeds used in the reference genome hash table (SNAP index option -s)."/>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
150 <param name="slack" type="float" value="0.3" label="hash table slack size (default: 0.3)" help="Corresponds to the -h option of SNAP index."/>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
151
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
152 ## paired-end specific options
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
153 <param name="sp_min" type="integer" value="100" label="minimum spacing to allow between paired ends (default: 100)" help="Corresponds to the first value of the SNAP option -s."/>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
154 <param name="sp_max" type="integer" value="10000" label="maximum spacing to allow between paired ends (default: 10000)" help="Corresponds to the second value of the SNAP option -s."/>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
155 <param name="max_mate_overlap" type="float" value="0" label="Maximal overlap between the reads in a pair (as a fraction of their combined length; default: 0, no overlap allowed)" help="If the reads of a read pair overlap by more than this fraction of their combined length, they are filtered out" />
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
156
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
157 <param name="maxdist" type="integer" value="8" label="edit distance (default: 8)" help="maximum edit distance allowed per read or pair (SNAP option -d); higher values allow more divergent alignments to be found, but increase the rate of misalignments."/>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
158 <param name="maxhits" type="integer" value="250" label="maximum hits per seed (default: 250)" help="Maximum hits to consider per seed (SNAP option -h); don't use a seed region in the alignment process if it matches more than maxhits regions in the reference genome. Higher values reduce the rate of misalignments, but reduce performance."/>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
159 <param name="confdiff" type="integer" value="2" label="confidence threshold (default: 2)" help="Confidence threshold (SNAP option -c); the minimum edit distance difference between two alternate alignments required to reject the poorer alignment as suboptimal; higher values increase the rate of ambiguously aligned reads."/>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
160 <param name="confadpt" type="integer" value="7" label="adaptive confdiff behaviour (default: 7)" help="Specifies how many seeds of a read may be ignored (based on the maximum hits value above) before the confidence threshold above gets increased by one for that read; helps fine-tuning alignment accuracy in repetitive regions of the genome."/>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
161 <param name="maxseeds" type="integer" value="25" label="maximum seeds per read (default: 25)" help="Number of seeds to use per read (SNAP option -n) when trying to match it to the reference genome; higher numbers will increase the rate of aligned reads and reduce the rate of misalignments, but will reduce performance."/>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
162 <param name="clipping" type="select" label="read clipping (default: from back and front)" help="Specifies from which end of a read low-quality bases should be clipped (SNAP option -Cxx)">
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
163 <option value="++">from back and front</option>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
164 <option value="-+">from back only</option>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
165 <option value="+-">from front only</option>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
166 <option value="--">no clipping</option>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
167 </param>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
168 <param name="selectivity" type="integer" value="1" label="selectivity (default: 1)" help="randomly choose 1/selectivity of the reads to score (SNAP option -S). The tool uses the default of 1 (or a 0 setting) to indicate that all reads should be worked with." />
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
169 <param name="filter_output" type="select" label="filter output (default: no filtering)" help="filter output (SNAP option -F for certain classes of reads.">
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
170 <option value="off">no filtering</option>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
171 <option value="a">aligned only</option>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
172 <option value="s">single-aligned only</option>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
173 <option value="u">unaligned only</option>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
174 </param>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
175 <param name="sort" type="select" label="output sorting (default: sort by read coordinates)" help="Sort the output file by alignment location (SNAP option --so).">
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
176 <option value="0">sort by read coordinates</option>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
177 <option value="off">no sorting</option>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
178 </param>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
179 <param name="mmatch_notation" type="select" label="CIGAR symbols for alignment matches/mismatches (default: M notation)" help="Indicates whether CIGAR strings in the generated SAM/BAM file should use M (alignment match) rather than = and X (sequence (mis-)match). Warning: Downstream variant calling based on samtools currently relies on the old-style M notation!!" >
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
180 <option value="general">use M for both matches and mismatches</option>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
181 <option value="differentiate">use = for matches, X for mismatches</option>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
182 </param>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
183 </when>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
184 </conditional>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
185 </inputs>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
186
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
187 <outputs>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
188 <data name="outputfile" format="bam" label="Aligned reads from MiModd ${tool.name} on ${on_string}">
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
189 <change_format>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
190 <when input="oformat" value="sam" format="sam"/>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
191 </change_format>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
192 </data>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
193 </outputs>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
194
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
195 <help>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
196 .. class:: infomark
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
197
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
198 **What it does**
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
199
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
200 The tool aligns the sequenced reads in an arbitrary number of input datasets against a common reference genome and stores the results in a single, possibly multi-sample output file. It supports a variety of different sequenced reads input formats, i.e., SAM, BAM, fastq and gzipped fastq, and both single-end and paired-end data.
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
201
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
202 Internally, the tool uses the ultrafast, hashtable-based aligner SNAP (http://snap.cs.berkeley.edu), hence its name.
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
203
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
204 **Notes:**
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
205
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
206 1) In its standard configuration Galaxy will decompress any .gz files during their upload, so the option to align gzipped fastq input is useful only with customized Galaxy instances or by using linked files as explained in our `recipe for using gzipped fastq files in Galaxy`_ from the `MiModD user guide`_.
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
207
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
208 2) To use paired-end fastq data with the tool the read mate information needs to be split over two fastq files in corresponding order.
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
209
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
210 **TIP:** If your paired-end data is arranged differently, you may look into the *fastq splitter* and *fastq de-interlacer* tools for Galaxy from the `Fastq Manipulation category`_ of the Galaxy Tool Shed to see if they can convert your files to the expected format.
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
211
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
212 3) The tool supports the alignment of reads from the same sequencing run, but distributed across several input files.
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
213
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
214 Generally, it expects the reads from each input dataset to belong to one read-group and will abort with an error message if any input dataset declares more than one read group or sample names in its header. Different datasets, however, are allowed to contain reads from the same read-group (as indicated by matching read-group IDs and sample names in their headers), in which case the reads will be combined into one group in the output.
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
215
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
216 4) Read-group information is required for every input dataset!
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
217
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
218 We generally recommend to store NGS datasets in SAM/BAM format with run metadata stored in the file header. You can use the *NGS Run Annotation* and *Convert* tools to convert data in fastq format to SAM/BAM with added run information.
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
219
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
220 While it is not our recommended approach, you can, if you prefer it, align reads from fastq files or SAM/BAM files without header read-group information. To do so, you **must** specify a SAM file that provides the missing information in its header along with the input dataset. You can generate a SAM header file with the *NGS Run Annotation* tool.
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
221
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
222 Optionally, a SAM header file can also be used to replace existing read-group information in a headered SAM/BAM input file. This can be used to resolve read-group ID conflicts between multiple input files at tool runtime.
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
223
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
224 4) Currently, you cannot configure aligner-specific options separately for specific input files from within this Galaxy tool. If you need this advanced level of control, you should use the command line tool ``mimodd snap-batch``.
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
225
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
226 .. _Fastq Manipulation category: https://toolshed.g2.bx.psu.edu/repository/browse_repositories_in_category?id=310ff67d4caf6531
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
227 .. _recipe for using gzipped fastq files in Galaxy: http://mimodd.readthedocs.org/en/latest/recipes.html#use-gzipped-fastq-files-in-galaxy
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
228 .. _MiModD user guide: http://mimodd.readthedocs.org/en/latest
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
229
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
230 </help>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
231 </tool>
6231ae8f87b8 Uploaded
wolma
parents:
diff changeset
232