annotate MUMmer/mummer_clustering.xml @ 0:61f30d177448 default tip

initial commit on Mummer toolsuite on toolshed
author eric
date Tue, 31 Mar 2015 14:19:49 +0200
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
1 <tool id="mummer_clustering" name="MUMmer Clustering" version="0.9.alx" force_history_refresh="True">
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
2 <description>: order sequence matches in clusters</description>
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
3 <command>
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
4 <!-- update this path to the installed location -->
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
5 $tool.cmd
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
6 #if $tool.cmd=="gaps":
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
7 $in_reference
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
8 #if $tool.gaps_r=="yes":
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
9 -r
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
10 #end if
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
11 #end if
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
12 #if $tool.cmd=="mgaps":
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
13 #if $tool.cmd_C=="yes":
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
14 -C
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
15 #end if
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
16 -d $tool.cmd_d
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
17 #if $tool.cmd_e=="yes":
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
18 -e
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
19 #end if
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
20 -f $tool.cmd_f
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
21 -l $tool.cmd_l
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
22 -s $tool.cmd_s
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
23 #end if
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
24 &lt; $tool.in_match_list
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
25 &gt; $out_tool
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
26
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
27 </command>
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
28 <inputs>
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
29 <conditional name="tool">
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
30 <param name="cmd" type="select" label="MUMmer maximal matching" help="Algorithms are run with default parameters (none). For specific args see help below" >
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
31 <option value="gaps" selected="true">gaps</option>
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
32 <option value="mgaps">mgaps</option>
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
33 </param>
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
34 <when value="gaps">
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
35 <param name="in_reference" type="data" format="fasta" label="Reference FastA file" />
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
36 <param name="gaps_r" type="select" label="Use reversed [-r]" >
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
37 <option value="no" selected="true">No</option>
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
38 <option value="yes">Yes</option>
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
39 </param>
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
40 <param name="in_match_list" type="data" format="text" label="MUMmer match list" help="See help for more details" />
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
41 </when>
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
42 <when value="mgaps">
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
43 <param name="in_match_list" type="data" format="text" label="MUMmer match list" help="See help for more details" />
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
44 <param name="cmd_C" type="select" label="Check input header labels have reversed keyword [-C]" >
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
45 <option value="no" selected="true">No</option>
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
46 <option value="yes">Yes</option>
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
47 </param>
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
48 <param name="cmd_d" type="integer" size="5" value="5" label="Max fixed diagonal difference [-d]" />
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
49 <param name="cmd_e" type="select" label="Use extent of cluster [-e]" >
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
50 <option value="no" selected="true">No</option>
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
51 <option value="yes">Yes</option>
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
52 </param>
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
53 <param name="cmd_f" type="float" size="5" value="0.05" label="Max fraction separation for diagonal difference [-f]" />
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
54 <param name="cmd_l" type="integer" size="5" value="200" label="Min cluster length [-l]" />
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
55 <param name="cmd_s" type="integer" size="5" value="1000" label="Max separation adjecent matches in cluster [-s]" />
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
56 </when>
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
57 </conditional>
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
58 </inputs>
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
59 <outputs>
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
60 <data name="out_tool" format="text" label="Clustering output" />
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
61 </outputs>
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
62 <requirements>
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
63 <!-- <requirement type="set_environment" version="3.23">MUMMER_PATH</requirement> -->
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
64 <requirement type="package" version="4.6.4">gnuplot</requirement>
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
65 <requirement type="package" version="3.23">mummer</requirement>
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
66 </requirements>
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
67 <tests>
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
68 <test>
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
69 </test>
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
70 </tests>
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
71 <help>
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
72 |
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
73
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
74
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
75 **Reference**
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
76 =============
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
77
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
78 - **MUMmer clustering Galaxy tool wrapper:** Alex Bossers, CVI of Wageningen UR, The Netherlands.
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
79
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
80 - **MUMmer suite v3.22:** http://mummer.sourceforge.net
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
81
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
82 - **MUMmer tutorials:** http://mummer.sourceforge.net/examples/
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
83
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
84 If you found these tools/wrappers usefull in your research, please acknowledge our work. If you improve
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
85 or modify the wrappers please add instead of substitute yourself into the acknowlegement section :)
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
86
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
87
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
88 **MUMmer Clustering**
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
89 =====================
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
90
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
91 MUMmer's clustering algorithms attempt to order small individual matches into larger match clusters
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
92 in order to make the output of mummer more intelligible. A dot plot makes it easy to spot alignment
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
93 regions from a match list, however when examining the data without graphic aids, it is very difficult
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
94 to draw any reasonable conclusions from the simple flat file list of matches. Clustering the matches
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
95 together into larger groups of neighboring matches makes this process much easier by ordering the
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
96 data and removing spurious matches.
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
97
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
98
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
99 Gaps
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
100 ----
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
101
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
102 *gaps* is the primary clustering algorithm for run-mummer1, and although classified as a "clustering"
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
103 step, gaps is more of a sorting routine. It implements the LIS (longest increasing subset) algorithm
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
104 to extract the longest consistent set of matches between two sequences, and generates a single
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
105 cluster that represents the best "straight-line" arrangement of matches between the sequences. By
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
106 straight-line, we mean no rearrangements or inversions, just a simple path of agreeing matches
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
107 between the two sequences. This limits the usability of this program to the alignment of genomes
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
108 that are very similar and with no large scale mutations. *gaps* is best suited for the comparison of
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
109 near identical sequences with the goal of finding minor mutations like SNPs and small indels.
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
110
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
111 Input can be filtered mummer output. The strange syntax is a result of a legacy issue described in
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
112 the Known problems (manual) section, and requires the header be stripped from the mummer output. In
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
113 addition, gaps is only designed to handle a single reference and a single query sequence, thus the
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
114 preceding mummer run must also follow this constraint. The -r is optional and designates the incoming
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
115 matches as reverse complement matches which must reference the reverse complement of the sequence,
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
116 therefore forcing mummer to be run without the -c option.
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
117
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
118 Reference: http://mummer.sourceforge.net/manual/#gaps
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
119
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
120 **Output:**
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
121 ::
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
122
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
123 > /home/aphillip/data/GHP.1con Consistent matches
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
124 183 17 22 none - -
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
125 238 72 108 none 33 33
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
126 347 181 92 none 1 1
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
127 458 292 50 none 19 19
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
128 705 539 44 none 1 1
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
129 750 584 38 none 1 1
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
130 807 641 23 -16 0 4
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
131 (output continues ...)
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
132 > Wrap around
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
133 334398 329917 47 none - 225
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
134 334446 329965 62 none 1 1
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
135 334539 330058 20 none 31 31
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
136 334560 330079 92 none 1 1
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
137 334653 330172 77 none 1 1
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
138 334740 330259 41 none 10 10
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
139 (output continues ...)
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
140 > /home/aphillip/data/GHP.1con Other matches
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
141 1317231 4891 21 none - -
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
142 1317275 4927 21 none - -
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
143 1317804 5399 25 none 508 451
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
144 947580 5436 36 none - -
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
145 23406 5518 34 none - -
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
146 333079 6592 32 none - -
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
147 (output continues ...)
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
148
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
149 Where the first line is the location of the reference file, and the first three columns are the same
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
150 as the three column match format described in the mummer section. The final three columns are the
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
151 overlap between this match and the previous match, the gap between the start of this match and the
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
152 end of the previous match in the reference, and the gap between the start of this match and the end
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
153 of the previous match in the query respectively.
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
154
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
155
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
156 mgaps
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
157 -----
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
158
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
159 *mgaps* was introduced into the MUMmer pipeline in an effort to better handle large-scale
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
160 rearrangements and duplications. Unlike gaps, mgaps is a full clustering algorithm that is capable
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
161 of generating multiple groups of consistently ordered matches. Clustering is controlled by a set of
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
162 command-line parameters that adjust the minimum cluster size, maximum gap between matches, etc. Only
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
163 matches that were included in clusters will appear in the output, so by adjusting the command-line
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
164 parameters it is possible to filter out many of the spurious matches, thus leaving only the larger
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
165 areas of conservation between the input sequences. The major advantage of mgaps is its ability to
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
166 identify these "islands" of conservation. This frees the user from the single LIS restraints of the
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
167 gaps program and allows for the identification of large-scale rearrangements, duplications, gene
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
168 families and so on.
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
169
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
170 Gaps can fail to identify clusters because they were not consistent with the LIS. However, by using
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
171 mgaps, all regions of conservation can now been identified. The only fallback being the increased
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
172 complexity of the output, where you once had only one cluster for the whole comparison, you usually
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
173 now get more. Because of this, it can sometimes be difficult separating the repetitive clusters from
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
174 "correct" clusters, *making mgaps more suited for global alignments instead of localized error detection*.
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
175
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
176 Input can be raw mummer output. *mgaps* is only designed to handle a single reference and one or
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
177 more query sequences, thus the preceding mummer run must also follow this constraint. Please refer
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
178 to the run-mummer3 script (see online manual) for an example of how to use this program in an
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
179 alignment pipeline. Note that in order to cluster reverse complement matches, the reverse complement
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
180 matches must reference the reverse complement strand of the query sequence, therefore forcing mummer
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
181 to be run without the -c option. A rewrite of this algorithm to handle multiple reference sequences
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
182 and a better coordinate system (forward coordinates for reverse complement matches) is doubtful but
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
183 may eventually appear.
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
184
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
185 The -d option can be interpreted as the number of insertions allowed between two matches in the same
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
186 cluster, while the -f option is a fraction equal to (diagonal difference / match separation) where
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
187 a higher value will increase the indel tolerance. Minimum cluster length is the sum of the contained
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
188 matches unless the -e option is used. The best way to get a feel for what each parameter controls
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
189 is to cluster the same data set numerous times with different values and observe the resulting
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
190 differences. It can also be helpful to set these parameters to the size of the element you wish to
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
191 capture, i.e. set the minimum cluster size to say the smallest exon you expect and set the max gap
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
192 to the smallest intron you expect to obtain clusters that could represent single exons (depending
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
193 of course of the similarity of the two sequences).
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
194
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
195 Reference: http://mummer.sourceforge.net/manual/#mgaps
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
196
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
197 **Output format**
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
198
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
199 Output of *mgaps* shares much in common with the output of mummer and gaps, with a slightly different
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
200 header formatting than gaps to allow for multiple query sequences and multiple clusters. The output
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
201 of mgaps run on both forward and reverse complement matches is as follows:
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
202 ::
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
203
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
204 > ID41
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
205 > ID41 Reverse
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
206 5177399 1 232 none - -
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
207 5177632 234 6794 none 1 1
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
208 5184433 7035 24 none 7 7
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
209 5184468 7069 23 none 11 10
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
210 > ID42
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
211 10181 43 1521 none - -
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
212 > ID42 Reverse
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
213 4654536 17 36 none - -
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
214 4654578 57 298 none 6 4
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
215 4654877 356 226 none 1 1
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
216 #
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
217 4655139 845 28 none - -
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
218 4655178 884 694 none 11 11
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
219 4655873 1579 20 none 1 1
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
220 #
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
221 4850044 17 1492 none - -
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
222 4851537 1510 711 none 1 1
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
223 4852249 2222 42 none 1 1
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
224 (output continues ...)
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
225
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
226
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
227 Headers containing the ID for each query sequence are listed after the '>' characters, and a
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
228 following Reverse keyword identifies the reverse matches for that query sequence. Individual clusters
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
229 for each sequence are separated by a '#' character, and the six columns are exactly the same as the
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
230 gaps output (see the gaps section for more details).
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
231
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
232
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
233 |
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
234 |
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
235
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
236 </help>
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
237 </tool>
61f30d177448 initial commit on Mummer toolsuite on toolshed
eric
parents:
diff changeset
238