# HG changeset patch
# User bjoern-gruening
# Date 1359707377 18000
# Node ID f8f1a3878edd1a0672bb01176009e3f487d2cb28
# Parent 13df908a02b0b4b33a90b98ed8b265eb77e7c277
0.1.1 version, prevent a crash if no repeat is found. Thanks to Simon Guest
diff -r 13df908a02b0 -r f8f1a3878edd RepeatMasker.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/RepeatMasker.xml Fri Feb 01 03:29:37 2013 -0500
@@ -0,0 +1,314 @@
+
+ Masks different kind of repeats
+
+## The command is a Cheetah template which allows some Python based syntax.
+## Lines starting hash hash are comments. Galaxy will turn newlines into spaces
+
+## create temp directory
+#import tempfile, os
+#set $dirname = os.path.abspath(tempfile.mkdtemp())
+#set $input_filename = os.path.split(str($query))[-1]
+#set $output_basename = os.path.join($dirname, $input_filename)
+
+
+RepeatMasker
+-parallel 8
+
+$nolow
+$noint
+$norna
+
+#if str($species)!="all":
+ $species
+#end if
+
+
+-dir $dirname
+
+#if $adv_opts.adv_opts_selector=="advanced":
+
+ #if str($adv_opts.gc)!="0":
+ -gc $adv_opts.gc
+ #end if
+
+ $adv_opts.gccalc
+
+ #set $output_files_list = str($adv_opts.output_files).split(',')
+ #if "gff" in $output_files_list:
+ -gff
+ #end if
+ #if "html" in $output_files_list:
+ -html
+ #end if
+
+ $adv_opts.slow_search
+ $adv_opts.quick_search
+ $adv_opts.rush_search
+ $adv_opts.only_alus
+ $adv_opts.is_only
+
+#else:
+ ## Set defaults
+ -gff
+
+## End of advanced options:
+#end if
+
+$query
+
+
+> /dev/null 2> /dev/null;
+## Copy the output files to galaxy
+## AgR: if there are no repeats, the output files may not exist.
+## This causes the job to fail, so touch files to ensure they exist.
+#if $adv_opts.adv_opts_selector=="advanced":
+
+ #if "summary" in $output_files_list:
+ ## Write out the summary file (default)
+ #set $summary_file = $output_basename + '.tbl'
+ touch $summary_file
+ cp $summary_file $output_summary;
+ #end if
+
+ #if "gff" in $output_files_list:
+ ## Write out the gff file (default)
+ #set $gff_file = $output_basename + '.out.gff'
+ touch $gff_file
+ cp $gff_file $output_gff;
+ #end if
+
+ #if "html" in $output_files_list:
+ ## Write out the html file
+ #set $html_file = $output_basename + '.out.html'
+ touch $html_file
+ cp $html_file $output_html;
+ #end if
+
+#else:
+
+ ## Write out the summary file (default)
+ #set $summary_file = $output_basename + '.tbl'
+ touch $summary_file
+ cp $summary_file $output_summary;
+
+ ## Write out the gff file (default)
+ #set $gff_file = $output_basename + '.out.gff'
+ touch $gff_file
+ cp $gff_file $output_gff;
+
+
+## End of advanced options:
+#end if
+
+## Write out mask sequence file
+#set $mask_sequence_file = $output_basename + '.masked'
+touch $mask_sequence_file
+cp $mask_sequence_file $output_mask;
+
+## Write out standard file (default)
+## The default '.out' file from RepeatMasker has a 3-line header and spaces rather
+## than tabs. Remove the header and replace the whitespaces with tab
+#set $standard_file = $output_basename + '.out'
+tail -n +4 $standard_file | tr -s ' ' '\t' > $output_std;
+
+## Delete all temporary files
+rm $dirname -r;
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ (adv_opts['adv_opts_selector'] == 'advanced' and 'mask' in adv_opts['output_files'])
+
+
+
+ (
+ (adv_opts['adv_opts_selector'] == 'advanced' and 'summary' in adv_opts['output_files'])
+ or
+ (adv_opts['adv_opts_selector'] == 'basic')
+ )
+
+
+
+ (adv_opts['adv_opts_selector'] == 'advanced' and 'html' in adv_opts['output_files'])
+
+
+
+ (adv_opts['adv_opts_selector'] == 'advanced' and 'gff' in adv_opts['output_files'])
+
+
+
+
+ RepeatMasker
+
+
+
+.. class:: warningmark
+
+**What it does**
+
+RepeatMasker is a program that screens DNA sequences for *interspersed repeats*
+and *low complexity* DNA sequences. The output of the program is a detailed
+annotation of the repeats that are present in the query sequence as well as a
+modified version of the query sequence in which all the annotated repeats have
+been masked (default: replaced by Ns).
+
+-----
+
+**How to read the results**
+
+
+
+The annotation file contains the cross_match output lines. It lists all best matches
+(above a set minimum score) between the query sequence and any of the sequences in
+the repeat database or with low complexity DNA. The term "best matches" reflects
+that a match is not shown if its domain is over 80% contained within the domain
+of a higher scoring match, where the "domain" of a match is the region in
+the query sequence that is defined by the alignment start and stop. These domains
+have been masked in the returned masked sequence file. In the output, matches are
+ordered by query name, and for each query by position of the start of the alignment.
+
+Example:
+
+======== ========= ========= ========= ========== =========== ========= ========= ============ =============== =================== ================ ============== ======= ==
+SW score perc div. perc del. perc ins. query seq. q-pos begin q-pos end (left) w complement matching repeat repeat class/family repeat-pos begin repeat-pos end (left) ID
+======== ========= ========= ========= ========== =========== ========= ========= ============ =============== =================== ================ ============== ======= ==
+ 1306 15.6 6.2 0.0 HSU08988 6563 6781 \(22462) C MER7A DNA/MER2_type 336 103 \(0) 1
+ 12204 10.0 2.4 1.8 HSU08988 6782 7714 \(21529) C TIGGER1 DNA/MER2_type 2418 1493 \(0) 2
+ 279 3.0 0.0 0.0 HSU08988 7719 7751 \(21492) + (TTTTA)n Simple_repeat 1 33 \(0) 3
+ 1765 13.4 6.5 1.8 HSU08988 7752 8022 \(21221) C AluSx SINE/Alu 289 1 \(23) 4
+ 12204 10.0 2.4 1.8 HSU08988 8023 8694 \(20549) C TIGGER1 DNA/MER2_type 1493 827 \(925) 5
+ 1984 11.1 0.3 0.7 HSU08988 8695 9000 \(20243) C AluSg SINE/Alu 305 1 \(5) 6
+ 12204 10.0 2.4 1.8 HSU08988 9001 9695 \(19548) C TIGGER1 DNA/MER2_type 827 2 \(1591) 7
+ 711 21.2 1.4 0.0 HSU08988 9696 9816 \(19427) C MER7A DNA/MER2_type 122 2 \(224) 8
+======== ========= ========= ========= ========== =========== ========= ========= ============ =============== =================== ================ ============== ======= ==
+
+This is a sequence in which a Tigger1 DNA transposon has integrated into a MER7 DNA transposon copy.
+Subsequently two Alus integrated in the Tigger1 sequence. The simple repeat is derived from the
+poly A of the Alu element. The first line is interpreted like this:
+
+:Table description:
+
+1. **1306** = Smith-Waterman score of the match, usually complexity adjusted
+ The SW scores are not always directly comparable. Sometimes
+ the complexity adjustment has been turned off, and a variety of
+ scoring-matrices are used.
+
+#. **15.6** = % substitutions in matching region compared to the consensus
+#. **6.2** = % of bases opposite a gap in the query sequence (deleted bp)
+#. **0.0** = % of bases opposite a gap in the repeat consensus (inserted bp)
+#. **HSU08988** = name of query sequence
+#. **6563** = starting position of match in query sequence
+#. **7714** = ending position of match in query sequence
+#. **(22462)** = no. of bases in query sequence past the ending position of match
+#. **C** = match is with the Complement of the consensus sequence in the database
+#. **MER7A** = name of the matching interspersed repeat
+#. **DNA/MER2_type** = the class of the repeat, in this case a DNA transposon fossil of the MER2 group (see below for list and references)
+#. **2418** = starting position of match in database sequence (using top-strand numbering)
+#. **1465** = ending position of match in database sequence
+#. **(0)** = no. of bases in (complement of) the repeat consensus sequence prior to beginning of the match (so 0 means that the match extended all the way to the end of the repeat consensus sequence)
+#. **1** = Identifier
+
+An asterisk (\*) in the final column (no example shown) indicates that there is
+a higher-scoring match whose domain partly (<80%) includes the domain of this match.
+
+Note that the SW score and divergence numbers for the three Tigger1 lines are identical.
+This is because the information is derived from a single alignment (the Alus were deleted
+from the query before the alignment with the Tigger element was performed).
+The program makes educated guesses about many fragments if they are derived from
+the same element (e.g. it knows that the MER7A fragments represent one insert).
+In a next version I can identify each element with a unique ID, if interest exists
+(this could help to represent repeats cleaner in graphic displays).
+
+
+-------
+
+**References**
+
+Smit, AFA, Hubley, R and Green, P. RepeatMasker Open-3.0.
+
+http://www.repeatmasker.org/
+
+
+
diff -r 13df908a02b0 -r f8f1a3878edd readme.txt
--- a/readme.txt Wed Jan 11 04:50:59 2012 -0500
+++ b/readme.txt Fri Feb 01 03:29:37 2013 -0500
@@ -1,7 +1,7 @@
Galaxy wrapper for RepeatMasker
=====================================
-This wrapper is copyright 2012 by Björn Grüning.
+This wrapper is copyright 2013 by Björn Grüning.
This is a wrapper for the command line tool of RepeatMasker from the Institute for Systems Biology.
http://www.repeatmasker.org/
@@ -34,6 +34,7 @@
=======
v0.1 - Initial public release
+v0.1.1 - patch from Simon Guest, to create empty files if no repeat is found
Wrapper Licence (MIT/BSD style)
diff -r 13df908a02b0 -r f8f1a3878edd tools/RepeatMasker.xml
--- a/tools/RepeatMasker.xml Wed Jan 11 04:50:59 2012 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,309 +0,0 @@
-
- Masks different kind of repeats
-
-## The command is a Cheetah template which allows some Python based syntax.
-## Lines starting hash hash are comments. Galaxy will turn newlines into spaces
-
-## create temp directory
-#import tempfile, os
-#set $dirname = os.path.abspath(tempfile.mkdtemp())
-#set $input_filename = os.path.split(str($query))[-1]
-#set $output_basename = os.path.join($dirname, $input_filename)
-
-
-RepeatMasker
--parallel 8
-
-$nolow
-$noint
-$norna
-
-#if str($species)!="all":
- $species
-#end if
-
-
--dir $dirname
-
-#if $adv_opts.adv_opts_selector=="advanced":
-
- #if str($adv_opts.gc)!="0":
- -gc $adv_opts.gc
- #end if
-
- $adv_opts.gccalc
-
- #set $output_files_list = str($adv_opts.output_files).split(',')
- #if "gff" in $output_files_list:
- -gff
- #end if
- #if "html" in $output_files_list:
- -html
- #end if
-
- $adv_opts.slow_search
- $adv_opts.quick_search
- $adv_opts.rush_search
- $adv_opts.only_alus
- $adv_opts.is_only
-
-#else:
- ## Set defaults
- -gff
-
-## End of advanced options:
-#end if
-
-$query
-
-
-> /dev/null 2> /dev/null;
-## Copy the output files to galaxy
-#if $adv_opts.adv_opts_selector=="advanced":
-
- #if "summary" in $output_files_list:
- ## Write out the summary file (default)
- #set $summary_file = $output_basename + '.tbl'
- cp $summary_file $output_summary;
- #end if
-
- #if "gff" in $output_files_list:
- ## Write out the gff file (default)
- #set $gff_file = $output_basename + '.out.gff'
- cp $gff_file $output_gff;
- #end if
-
- #if "html" in $output_files_list:
- ## Write out the html file
- #set $html_file = $output_basename + '.out.html'
- cp $html_file $output_html;
- #end if
-
-#else:
-
- ## Write out the summary file (default)
- #set $summary_file = $output_basename + '.tbl'
- cp $summary_file $output_summary;
-
- ## Write out the gff file (default)
- #set $gff_file = $output_basename + '.out.gff'
- cp $gff_file $output_gff;
-
-
-## End of advanced options:
-#end if
-
-## Write out mask sequence file
-#set $mask_sequence_file = $output_basename + '.masked'
-cp $mask_sequence_file $output_mask;
-
-## Write out standard file (default)
-## The default '.out' file from RepeatMasker has a 3-line header and spaces rather
-## than tabs. Remove the header and replace the whitespaces with tab
-#set $standard_file = $output_basename + '.out'
-tail -n +4 $standard_file | tr -s ' ' '\t' > $output_std;
-
-## Delete all temporary files
-rm $dirname -r;
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- (adv_opts['adv_opts_selector'] == 'advanced' and 'mask' in adv_opts['output_files'])
-
-
-
- (
- (adv_opts['adv_opts_selector'] == 'advanced' and 'summary' in adv_opts['output_files'])
- or
- (adv_opts['adv_opts_selector'] == 'basic')
- )
-
-
-
- (adv_opts['adv_opts_selector'] == 'advanced' and 'html' in adv_opts['output_files'])
-
-
-
- (adv_opts['adv_opts_selector'] == 'advanced' and 'gff' in adv_opts['output_files'])
-
-
-
-
- RepeatMasker
-
-
-
-.. class:: warningmark
-
-
------
-
-**What it does**
-
-RepeatMasker is a program that screens DNA sequences for *interspersed repeats*
-and *low complexity* DNA sequences. The output of the program is a detailed
-annotation of the repeats that are present in the query sequence as well as a
-modified version of the query sequence in which all the annotated repeats have
-been masked (default: replaced by Ns).
-
------
-
-**How to read the results**
-
-
-
-The annotation file contains the cross_match output lines. It lists all best matches
-(above a set minimum score) between the query sequence and any of the sequences in
-the repeat database or with low complexity DNA. The term "best matches" reflects
-that a match is not shown if its domain is over 80% contained within the domain
-of a higher scoring match, where the "domain" of a match is the region in
-the query sequence that is defined by the alignment start and stop. These domains
-have been masked in the returned masked sequence file. In the output, matches are
-ordered by query name, and for each query by position of the start of the alignment.
-
-Example:
-
-======== ========= ========= ========= ========== =========== ========= ========= ============ =============== =================== ================ ============== ======= ==
-SW score perc div. perc del. perc ins. query seq. q-pos begin q-pos end (left) w complement matching repeat repeat class/family repeat-pos begin repeat-pos end (left) ID
-======== ========= ========= ========= ========== =========== ========= ========= ============ =============== =================== ================ ============== ======= ==
- 1306 15.6 6.2 0.0 HSU08988 6563 6781 \(22462) C MER7A DNA/MER2_type 336 103 \(0) 1
- 12204 10.0 2.4 1.8 HSU08988 6782 7714 \(21529) C TIGGER1 DNA/MER2_type 2418 1493 \(0) 2
- 279 3.0 0.0 0.0 HSU08988 7719 7751 \(21492) + (TTTTA)n Simple_repeat 1 33 \(0) 3
- 1765 13.4 6.5 1.8 HSU08988 7752 8022 \(21221) C AluSx SINE/Alu 289 1 \(23) 4
- 12204 10.0 2.4 1.8 HSU08988 8023 8694 \(20549) C TIGGER1 DNA/MER2_type 1493 827 \(925) 5
- 1984 11.1 0.3 0.7 HSU08988 8695 9000 \(20243) C AluSg SINE/Alu 305 1 \(5) 6
- 12204 10.0 2.4 1.8 HSU08988 9001 9695 \(19548) C TIGGER1 DNA/MER2_type 827 2 \(1591) 7
- 711 21.2 1.4 0.0 HSU08988 9696 9816 \(19427) C MER7A DNA/MER2_type 122 2 \(224) 8
-======== ========= ========= ========= ========== =========== ========= ========= ============ =============== =================== ================ ============== ======= ==
-
-This is a sequence in which a Tigger1 DNA transposon has integrated into a MER7 DNA transposon copy.
-Subsequently two Alus integrated in the Tigger1 sequence. The simple repeat is derived from the
-poly A of the Alu element. The first line is interpreted like this:
-
-:Table description:
-
-1. **1306** = Smith-Waterman score of the match, usually complexity adjusted
- The SW scores are not always directly comparable. Sometimes
- the complexity adjustment has been turned off, and a variety of
- scoring-matrices are used.
-
-#. **15.6** = % substitutions in matching region compared to the consensus
-#. **6.2** = % of bases opposite a gap in the query sequence (deleted bp)
-#. **0.0** = % of bases opposite a gap in the repeat consensus (inserted bp)
-#. **HSU08988** = name of query sequence
-#. **6563** = starting position of match in query sequence
-#. **7714** = ending position of match in query sequence
-#. **(22462)** = no. of bases in query sequence past the ending position of match
-#. **C** = match is with the Complement of the consensus sequence in the database
-#. **MER7A** = name of the matching interspersed repeat
-#. **DNA/MER2_type** = the class of the repeat, in this case a DNA transposon fossil of the MER2 group (see below for list and references)
-#. **2418** = starting position of match in database sequence (using top-strand numbering)
-#. **1465** = ending position of match in database sequence
-#. **(0)** = no. of bases in (complement of) the repeat consensus sequence prior to beginning of the match (so 0 means that the match extended all the way to the end of the repeat consensus sequence)
-#. **1** = Identifier
-
-An asterisk (\*) in the final column (no example shown) indicates that there is
-a higher-scoring match whose domain partly (<80%) includes the domain of this match.
-
-Note that the SW score and divergence numbers for the three Tigger1 lines are identical.
-This is because the information is derived from a single alignment (the Alus were deleted
-from the query before the alignment with the Tigger element was performed).
-The program makes educated guesses about many fragments if they are derived from
-the same element (e.g. it knows that the MER7A fragments represent one insert).
-In a next version I can identify each element with a unique ID, if interest exists
-(this could help to represent repeats cleaner in graphic displays).
-
-
--------
-
-**References**
-
-Smit, AFA, Hubley, R and Green, P. RepeatMasker Open-3.0.
-
-http://www.repeatmasker.org/
-
-
-