diff MUMmer/mummer_maxmatch.xml @ 0:61f30d177448 default tip

initial commit on Mummer toolsuite on toolshed
author eric
date Tue, 31 Mar 2015 14:19:49 +0200
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MUMmer/mummer_maxmatch.xml	Tue Mar 31 14:19:49 2015 +0200
@@ -0,0 +1,170 @@
+<tool id="mummer_maxmatch" name="MUMmer MaxMatch" version="0.9.alx" force_history_refresh="True">
+  <description>: Maximal exact sequence matching</description>
+  <command>
+	<!-- update this path to the installed location -->
+		$tool.cmd
+		#if $tool.cmd=="mummer":
+			$tool.cmd_extra
+			$tool.mum_ref_in
+			$tool.mum_q_in
+		#end if
+		#if $tool.cmd=="repeat-match":
+			-n $tool.rm_n
+			#if $tool.rm_E=="yes":
+				-E
+			#end if
+			$tool.cmd_extra
+			$tool.in_seq
+		#end if
+		#if $tool.cmd=="exact-tandems":
+			$tool.in_seq
+			$tool.et_minl
+		#end if
+		<!-- unfortunate somehow error state gets set also on succesfull jobs. Pipe io stderr to dev/null -->
+		2&gt;&amp;-
+		> $out_tool
+
+  </command>
+	<inputs>
+	  <conditional name="tool">
+		<param name="cmd" type="select" value="mummer" label="MUMmer maximal matching" help="Algorithms are run with default parameters (none). For specific args see help below" >
+			<option value="mummer">mummer</option>
+			<option value="repeat-match">repeat-match</option>
+			<option value="exact-tandems">exact-tandems</option>
+		</param>
+		<when value="mummer">
+			<param name="mum_ref_in" type="data" format="fasta" label="Reference FastA file" />
+			<param name="mum_q_in" type="data" format="fasta" label="Query (multi) FastA sequence" />
+			<param name="cmd_extra" type="text" size="40" value="" label="Extra cmd line options" help="See specific cmd line options below for each tool" />
+		</when>
+		<when value="repeat-match">
+			<param name="in_seq" type="data" format="fasta" label="FastA sequence file" />
+			<param name="rm_n" type="text" size="5" value="20" label="Minimum exact match length [-n]" />
+			<param name="rm_E" type="select" value="no" label="Use exhaustive (slow) search to find matches [-E]" >
+				<option value="no">No</option>
+				<option value="yes">Yes</option>
+			</param>
+			<param name="cmd_extra" type="text" size="40" value="" label="Extra cmd line options" help="-n and -E are configured above. More specific cmd line options in help below." />
+		</when>
+		<when value="exact-tandems">
+			<param name="in_seq" type="data" format="fasta" label="FastA sequence file" />
+			<param name="et_minl" type="text" size="5" value="20" label="Minimum length" />
+		</when>
+	  </conditional>
+	</inputs>
+	<outputs>
+		<data name="out_tool" format="text" label="Max exact match output" />
+	</outputs>
+    <requirements>
+<!--         <requirement type="set_environment" version="3.23">MUMMER_PATH</requirement> -->
+        <requirement type="package" version="4.6.4">gnuplot</requirement>
+        <requirement type="package" version="3.23">mummer</requirement>
+    </requirements>
+	<tests>
+		<test>
+		</test>
+	</tests>
+	<help>
+|
+
+
+**Reference**
+=============
+ 
+- **MUMmer MaxExactMatch Galaxy tool wrapper:** Alex Bossers, CVI of Wageningen UR, The Netherlands.
+
+- **MUMmer suite v3.22:** http://mummer.sourceforge.net
+
+- **MUMmer tutorials:** http://mummer.sourceforge.net/examples/
+
+Please do not use any of the command line options that modify prefixes or file names. As obvious 
+they are quite useless within galaxy and are likely to fail the routine!
+
+If you found these tools/wrappers usefull in your research, please acknowledge our work. If you improve 
+or modify the wrappers please add instead of substitute yourself into the acknowlegement section :)
+
+
+
+**MUMmer Maximal exact matching**
+=================================
+
+The heart of the MUMmer package is its suffix tree based maximal matching routines. These can be 
+used for repeat detection within a single sequence as is done by *repeat-match* and *exact-tandems*, 
+or can be used for the alignment of two or more sequences as is done by *mummer*.
+
+Mummer
+------
+
+mummer is a suffix tree algorithm designed to find maximal exact matches of some minimum length 
+between two input sequences. by default mummer will only find maximal matches that are unique in 
+the entire set of reference sequences. The match lists produced by mummer can be used alone to 
+generate alignment dot plots, or can be passed on to the clustering algorithms for the identification 
+of longer non-exact regions of conservation. These match lists have great versatility because they 
+contain huge amounts of information and can be passed forward to other interpretation programs for 
+clustering, analysis, searching, etc.
+
+
+Repeat-match
+------------
+
+repeat-match is a suffix tree algorithm designed to find maximal exact repeats within a single input 
+sequence. It uses a similar algorithm to mummer, but altered slightly to find maximal exact matches 
+within a single sequence.
+
+Output formatting varies depending on the command line parameters and the output can be quite large. 
+The standard output format that results from running repeat-match with default parameters is as follows:
+::
+
+ Long Exact Matches:
+    Start1     Start2    Length
+   4919485    4919506r       22
+
+The three columns are the first position of the repeat, the second position of the repeat, and the 
+length of the repeat respectively. Reverse complement repeat positions are denoted by an 'r' 
+following the Start2 position, and are relative to the forward strand of the sequence.
+
+
+Exact-tandems
+-------------
+
+exact-tandems is a wrapper script for the repeat-match program. It provides a list of exact tandem 
+repeats within a single input sequence. As with repeat-match the sequence file should contain only 
+one sequence in FastA format, however if multiple sequences exist the first one will be used. The 
+sequence may contain any set of upper and lowercase characters, thus DNA and protein sequence are 
+both allowed and matching is case insensitive. The minimum match length parameter should be a 
+positive integer, this value will be passed to the repeat-match program via the -n option.
+
+The output format of exact-tandems is as follows:
+::
+
+ Finding matches
+ Tandem repeats
+    Start   Extent  UnitLen     Copies
+   416173      150       45        3.3
+
+The four columns are the first position of the tandem, the extent of the repeat region, the length 
+of each tandem repeat unit, and the number of repeat units respectively.
+
+
+
+**Manuals and CMD line options (specific for each tool!):** 
+===========================================================
+
+**Mummer**
+
+http://mummer.sourceforge.net/manual/#mummer
+
+**Repeat-match**
+
+http://mummer.sourceforge.net/manual/#repeat
+
+**exact-tandems**
+
+http://mummer.sourceforge.net/manual/#exact
+
+| 
+| 
+
+	</help>
+</tool>
+