Mercurial > repos > bgruening > repeat_masker
comparison repeatmasker.xml @ 3:bdfc22c1c3e3 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/repeat_masker commit 8dacb4321122d92df0983e0794cf23572be03224
author | iuc |
---|---|
date | Wed, 02 May 2018 20:18:11 -0400 |
parents | |
children | 04f5c3d7448e |
comparison
equal
deleted
inserted
replaced
2:5673e72241aa | 3:bdfc22c1c3e3 |
---|---|
1 <tool id="repeatmasker_wrapper" name="RepeatMasker" version="4.0.7" profile="17.01"> | |
2 <description>RepeatMasker</description> | |
3 | |
4 <requirements> | |
5 <requirement type="package" version="4.0.7">repeatmasker</requirement> | |
6 </requirements> | |
7 | |
8 <command detect_errors="exit_code"><![CDATA[ | |
9 RM_LIB_PATH=\$(dirname \$(which RepeatMasker))/../share/RepeatMasker/Libraries && | |
10 mkdir lib && | |
11 export REPEATMASKER_LIB_DIR=\$(pwd)/lib && | |
12 for file in \$(ls \$RM_LIB_PATH) ; do ln -s \$RM_LIB_PATH/\$file lib/\$file ; done && | |
13 #if $repeat_source.source_type == "repbase": | |
14 cp '${repeat_source.repbase_file}' lib/RMRBSeqs.embl && | |
15 #end if | |
16 ln -s '${input_fasta}' rm_input.fasta && | |
17 RepeatMasker -dir \$(pwd) | |
18 #if $repeat_source.source_type == "library": | |
19 -lib '${repeat_source.repeat_lib}' | |
20 -cutoff '${repeat_source.cutoff}' | |
21 #else if $repeat_source.source_type == "repbase": | |
22 #if $repeat_source.species_source.species_from_list == 'yes': | |
23 $repeat_source.species_source.species_list | |
24 #else | |
25 -species '${repeat_source.species_source.species_name}' | |
26 #end if | |
27 #end if | |
28 -parallel \${GALAXY_SLOTS:-1} | |
29 '${gff}' | |
30 '${ignore_n_stretches}' | |
31 '${advanced.is_only}' | |
32 '${advanced.is_clip}' | |
33 '${advanced.no_is}' | |
34 '${advanced.rodspec}' | |
35 '${advanced.primspec}' | |
36 '${advanced.nolow}' | |
37 '${advanced.noint}' | |
38 '${advanced.norna}' | |
39 '${advanced.alu}' | |
40 '${advanced.div}' | |
41 '${advanced.search_speed}' | |
42 '${advanced.frag}' | |
43 '${advanced.maxsize}' | |
44 #if $advanced.gc is not None: | |
45 '${advanced.gc}' | |
46 #end if | |
47 '${advanced.gccalc}' | |
48 '${advanced.nocut}' | |
49 '${advanced.keep_alignments}' | |
50 '${advanced.invert_alignments}' | |
51 '${advanced.xout}' | |
52 '${advanced.xsmall}' | |
53 '${advanced.poly}' | |
54 rm_input.fasta && | |
55 #if $advanced.is_only != '-is_only': | |
56 mv rm_input.fasta.masked '${output_masked_genome}' && | |
57 sed -r 's/^ *// ; s/ *$//; s/\+ //; s/ +/\t/g ; 1,2c SW score\t% div.\t% del.\t% ins.\tquery sequence\tpos in query: begin\tend\t(left)\trepeat\tclass/family\tpos in repeat: begin\tend\t(left)\tID' rm_input.fasta.out >'${output_log}' && | |
58 mv rm_input.fasta.tbl '${output_table}' && | |
59 #if $gff == '-gff': | |
60 mv rm_input.fasta.out.gff '${output_gff}' && | |
61 #end if | |
62 #if $advanced.keep_alignments == '-ali': | |
63 mv rm_input.fasta.align '${output_alignment}' && | |
64 #end if | |
65 #if $advanced.poly == '-poly': | |
66 sed -r 's/^ *// ; s/ *$//; s/\+ //; s/ +/\t/g' rm_input.fasta.polyout >'${output_polymorphic}' && | |
67 #end if | |
68 #end if | |
69 mv rm_input.fasta.cat '${output_repeat_catalog}' | |
70 ]]> | |
71 </command> | |
72 | |
73 <inputs> | |
74 <param name="input_fasta" type="data" format="fasta" label="Genomic DNA" /> | |
75 <conditional name="repeat_source"> | |
76 <param label="Repeat library source" name="source_type" type="select"> | |
77 <option selected="true" value="repbase">RepBase</option> | |
78 <option value="library">Custom library of repeats</option> | |
79 </param> | |
80 <when value="repbase"> | |
81 <param name="repbase_file" type="data" format="embl" label="RepBase (RMRBSeqs.embl) file" /> | |
82 <conditional name="species_source"> | |
83 <param label="Select species name from a list?" name="species_from_list" type="select"> | |
84 <option value="yes" selected="true">Yes</option> | |
85 <option value="no">No</option> | |
86 </param> | |
87 <when value="yes"> | |
88 <param name="species_list" type="select" label="Species"> | |
89 <option value="-species anopheles" selected="true">anopheles</option> | |
90 <option value="-species arabidopsis">arabidopsis</option> | |
91 <option value="-species artiodactyl">artiodactyl</option> | |
92 <option value="-species aspergillus">aspergillus</option> | |
93 <option value="-species carnivore">carnivore</option> | |
94 <option value="-species cat">cat</option> | |
95 <option value="-species chicken">chicken</option> | |
96 <option value="-species 'ciona intestinalis'">ciona intestinalis</option> | |
97 <option value="-species 'ciona savignyi'">ciona savignyi</option> | |
98 <option value="-species cow">cow</option> | |
99 <option value="-species danio">danio</option> | |
100 <option value="-species diatoaea">diatomea</option> | |
101 <option value="-species dog">dog</option> | |
102 <option value="-species drosophila">drosophila</option> | |
103 <option value="-species elegans">elegans</option> | |
104 <option value="-species fugu">fugu</option> | |
105 <option value="-species fungi" selected="true">fungi</option> | |
106 <option value="-species human">human</option> | |
107 <option value="-species maize">maize</option> | |
108 <option value="-species mammal">mammal</option> | |
109 <option value="-species mouse">mouse</option> | |
110 <option value="-species pig">pig</option> | |
111 <option value="-species rat">rat</option> | |
112 <option value="-species rice">rice</option> | |
113 <option value="-species rodentia">rodentia</option> | |
114 <option value="-species ruminantia">ruminantia</option> | |
115 <option value="-species wheat">wheat</option> | |
116 </param> | |
117 </when> | |
118 <when value="no"> | |
119 <param name="species_name" type="text" value="homo sapiens" label="Repeat source species" help="Source species (or clade name) used to select repeats from RepBase" /> | |
120 </when> | |
121 </conditional> | |
122 </when> | |
123 <when value="library"> | |
124 <param name="repeat_lib" type="data" format="fasta" label="Custom library of repeats" /> | |
125 <param name="cutoff" type="integer" argument="-cutoff" value="225" label="Cutoff score for masking repeats" /> | |
126 </when> | |
127 </conditional> | |
128 <param type="boolean" argument="-gff" truevalue="-gff" falsevalue="" label="Output annotation of repeats in GFF format" checked="false" /> | |
129 <param name="ignore_n_stretches" type="boolean" argument="-excln" falsevalue="" label="Ignore stretches of Ns when computing statistics" checked="true" help="Scaffolds are sometimes joined with stretches of 25 or more Ns. This option ignores them when calculating repeat statistics" /> | |
130 <section name="advanced" title="Advanced options" expanded="false"> | |
131 <param argument="-is_only" type="boolean" truevalue="-is_only" falsevalue="" checked="false" label="Only clip E coli insertion elements" /> | |
132 <param argument="-is_clip" type="boolean" truevalue="-is_clip" falsevalue="" checked="false" label="Clip IS elements before analysis" help="Normally RepeatMasker will report on IS element, with this option selected it will clip them before analysis" /> | |
133 <param argument="-no_is" type="boolean" truevalue="-no_is" falsevalue="" checked="false" label="Skip bacterial insertion element check" /> | |
134 <param argument="-rodspec" type="boolean" truevalue="-rodspec" falsevalue="" checked="false" label="Only check for rodent specific repeats" help="If this option is select a check for rodent specific repeats is done instead of a full RepeatMasker run" /> | |
135 <param argument="-primspec" type="boolean" truevalue="-primspec" falsevalue="" checked="false" label="Only check for primate specific repeats" help="If this option is select a check for primate specific repeats is done instead of a full RepeatMasker run" /> | |
136 <param argument="-nolow" type="boolean" truevalue="-nolow" falsevalue="" checked="false" label="No low complexity masking" help="Skip masking of simple tandem repeats and low complexity regions." /> | |
137 <param argument="-noint" type="boolean" truevalue="-noint" falsevalue="" checked="false" label="No interspersed repeat masking" help="Only mask simple repeats, skip masking of interspersed repeats." /> | |
138 <param argument="-norna" type="boolean" truevalue="-norna" falsevalue="" checked="false" label="No repeat-like-RNA masking" help="Skip masking of small pol III transcribed RNA (these are masked by default because they resemble SINEs)" /> | |
139 <param argument="-alu" type="boolean" truevalue="-alu" falsevalue="" checked="false" label="Limit masking to (primate) Alu repeats" /> | |
140 <param argument="-div" type="boolean" truevalue="-div" falsevalue="" checked="false" label="Limit masking to less diverged (younger) repeats" /> | |
141 <param type="select" name="search_speed" label="Search speed vs sensitiviy trade-off"> | |
142 <option value="">Default</option> | |
143 <option value="-q">Quick (5-10% less sensitive, 3-4 times speedup)</option> | |
144 <option value="-qq">Rush (10% less sensitive)</option> | |
145 <option value="-s">Slow (0-5% more sensitive, 2.5 times slowdown)</option> | |
146 </param> | |
147 <param type="integer" argument="-frag" value="40000" label="Maximum contiguous sequence searched" help="Maximum length of sequencing that is search without fragmenting" /> | |
148 <param type="integer" argument="-maxsize" value="4000000" label="Maximum length for IS or repeat clipped sequences" /> | |
149 <param type="integer" argument="-gc" optional="True" label="Select matrices for this GC%" help="Valid values are a percentage or -1 to choose the default" /> | |
150 <param type="boolean" argument="-gccalc" truevalue="-gcccalc" falsevalue="" checked="false" label="Calculate GC % for all sequences" help="By default RepeatMasker skips calculating GC % for small sequences" /> | |
151 <param type="boolean" argument="-nocut" truevalue="-nocut" falsevalue="" checked="false" label="Skips cutting of repeats" /> | |
152 <param name="xout" type="boolean" argument="-x" truevalue="-x" falsevalue="" checked="false" label="Mask with X instead of N characters" /> | |
153 <param name="keep_alignments" type="boolean" argument="-ali" truevalue="-ali" falsevalue="" checked="false" label="Output alignments file" /> | |
154 <param name="invert_alignments" type="boolean" argument="-inv" truevalue="-inv" falsevalue="" checked="false" label="Invert alignments in alignment file" help="Show alignments in the orientation of the repeat sequence, not the query sequence" /> | |
155 <param type="boolean" argument="-xsmall" truevalue="-xsmall" falsevalue="" checked="false" label="Output repetitive regions as lowercase, non-repetitive regions as uppercase" /> | |
156 <param type="boolean" argument="-poly" truevalue="-poly" falsevalue="" checked="false" label="Output list of potentially polymorphic microsatellites" /> | |
157 </section> | |
158 </inputs> | |
159 <outputs> | |
160 <data name="output_masked_genome" format="fasta" label="RepeatMasker masked sequence on ${on_string}"> | |
161 <filter>not advanced['is_only']</filter> | |
162 </data> | |
163 <data name="output_log" format="tabular" label="RepeatMasker output log on ${on_string}"> | |
164 <filter>not advanced['is_only']</filter> | |
165 </data> | |
166 <data name="output_table" format="txt" label="RepeatMasker repeat statistics on ${on_string}"> | |
167 <filter>not advanced['is_only']</filter> | |
168 </data> | |
169 <data name="output_repeat_catalog" format="txt" label="RepeatMasker repeat catalogue on ${on_string}" /> | |
170 <data name="output_alignment" format="txt" label="RepeatMasker alignment on ${on_string}"> | |
171 <filter>not advanced['is_only'] and advanced['keep_alignments']</filter> | |
172 </data> | |
173 <data name="output_polymorphic" format="tabular" label="RepeatMasker possible polymorphic repeats on ${on_string}"> | |
174 <filter>not advanced['is_only'] and advanced['poly']</filter> | |
175 </data> | |
176 <data name="output_gff" format="gff" label="RepeatMasker repeat annotation on ${on_string}"> | |
177 <filter>not advanced['is_only'] and gff is True</filter> | |
178 </data> | |
179 </outputs> | |
180 <tests> | |
181 <test expect_num_outputs="4"> | |
182 <param name="input_fasta" value="small.fasta" ftype="fasta" /> | |
183 <param name="source_type" value="library" /> | |
184 <param name="repeat_lib" value="repeats.fasta" ftype="fasta" /> | |
185 <output name="output_masked_genome" file="small.fasta.masked" /> | |
186 <output name="output_table" file="small.fasta.stats" lines_diff="2" /> | |
187 <output name="output_repeat_catalog" file="small.fasta.cat" /> | |
188 <output name="output_log" file="small.fasta.log" /> | |
189 </test> | |
190 <test expect_num_outputs="7"> | |
191 <param name="input_fasta" value="small.fasta" ftype="fasta" /> | |
192 <param name="source_type" value="library" /> | |
193 <param name="gff" value="-gff" /> | |
194 <!-- <param name="show" value="yes" /> --> | |
195 <param name="keep_alignments" value="-ali" /> | |
196 <param name="poly" value="-poly" /> | |
197 <param name="repeat_lib" value="repeats.fasta" ftype="fasta" /> | |
198 <output name="output_masked_genome" file="small.fasta.masked" /> | |
199 <output name="output_table" file="small.fasta.stats" lines_diff="4" /> | |
200 <output name="output_repeat_catalog" file="small.fasta.cat" /> | |
201 <output name="output_log" file="small.fasta.log" /> | |
202 <output name="output_alignment" file="small.fasta.align" /> | |
203 <output name="output_polymorphic" file="small.fasta.poly" /> | |
204 <output name="output_gff" file="small.fasta.gff" lines_diff="4" /> | |
205 </test> | |
206 </tests> | |
207 <help><![CDATA[ | |
208 RepeatMasker is a program that screens DNA for interspersed repeats and low | |
209 complexity DNA sequences. The database of repeats to screen for can be | |
210 provided as a FASTA file or downloaded from RepBase_. If the RepBase option is | |
211 chosen the RepBaseRepeatMaskerEdition file should be downloaded and | |
212 unpacked, and the enclosed EMBL format file ('RMRBSeqs.embl') should | |
213 be uploaded to Galaxy for use with this tool. | |
214 | |
215 Further documentation is available on the RepeatMasker homepage_. | |
216 | |
217 .. _RepBase: http://www.girinst.org/repbase/ | |
218 .. _homepage: http://www.repeatmasker.org/webrepeatmaskerhelp.html | |
219 ]]> | |
220 </help> | |
221 <citations> | |
222 <citation type="bibtex"> | |
223 @misc{RepeatMasker, | |
224 title = {RepeatMasker Open-4.0}, | |
225 howpublished = {\url{http://www.repeatmasker.org}}, | |
226 author = {Smit, AFA and Hubley, R and Green, P.}, | |
227 year = {2013-2015}} | |
228 </citation> | |
229 </citations> | |
230 </tool> |