Mercurial > repos > bgruening > repeat_masker
comparison repeatmasker.xml.orig @ 14:7563ea7a922d draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/repeatmasker commit 7a5f368a5859e659aa36d0358bb96ca12574e2cc
author | iuc |
---|---|
date | Mon, 24 Apr 2023 10:29:31 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
13:3f987772e283 | 14:7563ea7a922d |
---|---|
1 <<<<<<< HEAD | |
2 <tool id="repeatmasker_wrapper" name="RepeatMasker" version="@TOOL_VERSION@+@GALAXY_TOOL_VERSION@" profile="20.01"> | |
3 <description>screen DNA sequences for interspersed repeats and low complexity regions</description> | |
4 <macros> | |
5 <import>macros.xml</import> | |
6 </macros> | |
7 <expand macro='xrefs'/> | |
8 <expand macro='edam_ontology' /> | |
9 <expand macro='requirements' /> | |
10 <version_command>repeatmasker --version</version_command> | |
11 <command detect_errors="exit_code"><![CDATA[ | |
12 ======= | |
13 <tool id="repeatmasker_wrapper" name="RepeatMasker" version="4.1.1" profile="17.01"> | |
14 <description>screen DNA sequences for interspersed repeats and low complexity regions</description> | |
15 <xrefs> | |
16 <xref type="bio.tools">RepeatMasker</xref> | |
17 </xrefs> | |
18 <requirements> | |
19 <requirement type="package" version="4.1.1">repeatmasker</requirement> | |
20 </requirements> | |
21 | |
22 <command detect_errors="exit_code"><![CDATA[ | |
23 >>>>>>> c895e2728 (Update repeatmasker.xml) | |
24 RM_PATH=\$(which RepeatMasker) && | |
25 if [ -z "\$RM_PATH" ] ; then echo "Failed to find RepeatMasker in PATH (\$PATH)" >&2 ; exit 1 ; fi && | |
26 | |
27 RM_LIB_PATH=\$(dirname \$RM_PATH)/../share/RepeatMasker/Libraries && | |
28 #if $repeat_source.source_type == "dfam_up": | |
29 mkdir lib/ && | |
30 ln -s '${repeat_source.dfam_lib}' lib/RepeatMaskerLib.h5 && | |
31 RM_LIB_PATH=\$(pwd)/lib && | |
32 #end if | |
33 | |
34 ln -s '${input_fasta}' rm_input.fasta && | |
35 | |
36 RepeatMasker -dir \$(pwd) | |
37 -libdir \$RM_LIB_PATH | |
38 #if $repeat_source.source_type == "library": | |
39 -lib '${repeat_source.repeat_lib}' | |
40 -cutoff '${repeat_source.cutoff}' | |
41 #else if $repeat_source.source_type == "dfam": | |
42 #if $repeat_source.species_source.species_from_list == 'yes': | |
43 -species $repeat_source.species_source.species_list | |
44 #else | |
45 -species '${repeat_source.species_source.species_name}' | |
46 #end if | |
47 #else if $repeat_source.source_type == "dfam_up": | |
48 -species '${repeat_source.species_name}' | |
49 #end if | |
50 -parallel \${GALAXY_SLOTS:-1} | |
51 ${gff} | |
52 ${excln} | |
53 ${advanced.is_only} | |
54 ${advanced.is_clip} | |
55 ${advanced.no_is} | |
56 ${advanced.rodspec} | |
57 ${advanced.primspec} | |
58 ${advanced.nolow} | |
59 ${advanced.noint} | |
60 ${advanced.norna} | |
61 ${advanced.alu} | |
62 ${advanced.div} | |
63 ${advanced.search_speed} | |
64 -frag ${advanced.frag} | |
65 ## -maxsize ${advanced.maxsize} | |
66 #if str($advanced.gc): | |
67 -gc ${advanced.gc} | |
68 #end if | |
69 ${advanced.gccalc} | |
70 ${advanced.nocut} | |
71 ${advanced.keep_alignments} | |
72 ${advanced.invert_alignments} | |
73 ${advanced.xout} | |
74 ${advanced.xsmall} | |
75 ${advanced.poly} | |
76 rm_input.fasta && | |
77 #if $advanced.is_only != '-is_only': | |
78 mv rm_input.fasta.masked '${output_masked_genome}' && | |
79 sed -E 's/^ *// ; s/ *$//; s/\+ //; s/ +/\t/g ; 1,2c SW score\t% div.\t% del.\t% ins.\tquery sequence\tpos in query: begin\tend\t(left)\trepeat\tclass/family\tpos in repeat: begin\tend\t(left)\tID' rm_input.fasta.out >'${output_log}' && | |
80 mv rm_input.fasta.tbl '${output_table}' && | |
81 #if $gff == '-gff': | |
82 mv rm_input.fasta.out.gff '${output_gff}' && | |
83 #end if | |
84 #if $advanced.keep_alignments == '-ali': | |
85 mv rm_input.fasta.align '${output_alignment}' && | |
86 #end if | |
87 #if $advanced.poly == '-poly': | |
88 sed -E 's/^ *// ; s/ *$//; s/\+ //; s/ +/\t/g' rm_input.fasta.polyout >'${output_polymorphic}' && | |
89 #end if | |
90 #end if | |
91 if [ -f 'rm_input.fasta.cat.gz' ]; then | |
92 zcat 'rm_input.fasta.cat.gz' > '${output_repeat_catalog}'; | |
93 else | |
94 mv rm_input.fasta.cat '${output_repeat_catalog}'; | |
95 fi | |
96 ]]> | |
97 </command> | |
98 | |
99 <inputs> | |
100 <param name="input_fasta" type="data" format="fasta" label="Genomic DNA" /> | |
101 <conditional name="repeat_source"> | |
102 <param label="Repeat library source" name="source_type" type="select" help="To use RepBase, choose 'Custom library of repeats' and select a fasta version of this non-free database."> | |
103 <option selected="true" value="dfam">DFam (curated only, bundled with RepeatMasker)</option> | |
104 <option value="dfam_up">DFam (full/specific version)</option> | |
105 <option value="library">Custom library of repeats</option> | |
106 </param> | |
107 <when value="dfam"> | |
108 <conditional name="species_source"> | |
109 <param label="Select species name from a list?" name="species_from_list" type="select"> | |
110 <option value="yes" selected="true">Yes</option> | |
111 <option value="no">No</option> | |
112 </param> | |
113 <when value="yes"> | |
114 <param name="species_list" type="select" label="Species"> | |
115 <option value="human" selected="true">Human (Homo sapiens)</option> | |
116 <option value="rodent">Rodent (Order Rodentia)</option> | |
117 <option value="mouse">Mouse (Mus musculus)</option> | |
118 <option value="rattus">Rat (Rattus sp.)</option> | |
119 <option value="danio">Danio (zebra fish)</option> | |
120 <option value="drosophila">Fruit fly (Drosophila melanogaster)</option> | |
121 <option value="elegans">Caenorhabditis elegans (nematode)</option> | |
122 </param> | |
123 </when> | |
124 <when value="no"> | |
125 <param name="species_name" type="text" value="human" label="Repeat source species" help="Source species (or clade name) used to select repeats from DFam" /> | |
126 </when> | |
127 </conditional> | |
128 </when> | |
129 <when value="dfam_up"> | |
130 <param name="dfam_lib" type="data" format="h5" label="DFam library" help="The full DFam library can be downloaded from https://www.dfam.org/releases/current/families/Dfam.h5.gz" /> | |
131 <param name="species_name" type="text" value="human" label="Repeat source species" help="Source species (or clade name) used to select repeats from DFam" /> | |
132 </when> | |
133 <when value="library"> | |
134 <param name="repeat_lib" type="data" format="fasta" label="Custom library of repeats" /> | |
135 <param name="cutoff" type="integer" argument="-cutoff" value="225" label="Cutoff score for masking repeats" /> | |
136 </when> | |
137 </conditional> | |
138 <param type="boolean" argument="-gff" truevalue="-gff" falsevalue="" label="Output annotation of repeats in GFF format" checked="false" /> | |
139 <param argument="-excln" type="boolean" truevalue="-excln" falsevalue="" label="Ignore stretches of Ns when computing statistics" checked="true" help="Scaffolds are sometimes joined with stretches of 25 or more Ns. This option ignores them when calculating repeat statistics" /> | |
140 <section name="advanced" title="Advanced options" expanded="false"> | |
141 <param argument="-is_only" type="boolean" truevalue="-is_only" falsevalue="" checked="false" label="Only clip E coli insertion elements" /> | |
142 <param argument="-is_clip" type="boolean" truevalue="-is_clip" falsevalue="" checked="false" label="Clip IS elements before analysis" help="Normally RepeatMasker will report on IS element, with this option selected it will clip them before analysis" /> | |
143 <param argument="-no_is" type="boolean" truevalue="-no_is" falsevalue="" checked="false" label="Skip bacterial insertion element check" /> | |
144 <param argument="-rodspec" type="boolean" truevalue="-rodspec" falsevalue="" checked="false" label="Only check for rodent specific repeats" help="If this option is select a check for rodent specific repeats is done instead of a full RepeatMasker run" /> | |
145 <param argument="-primspec" type="boolean" truevalue="-primspec" falsevalue="" checked="false" label="Only check for primate specific repeats" help="If this option is select a check for primate specific repeats is done instead of a full RepeatMasker run" /> | |
146 <param argument="-nolow" type="boolean" truevalue="-nolow" falsevalue="" checked="false" label="No low complexity masking" help="Skip masking of simple tandem repeats and low complexity regions." /> | |
147 <param argument="-noint" type="boolean" truevalue="-noint" falsevalue="" checked="false" label="No interspersed repeat masking" help="Only mask simple repeats, skip masking of interspersed repeats." /> | |
148 <param argument="-norna" type="boolean" truevalue="-norna" falsevalue="" checked="false" label="No repeat-like-RNA masking" help="Skip masking of small pol III transcribed RNA (these are masked by default because they resemble SINEs)" /> | |
149 <param argument="-alu" type="boolean" truevalue="-alu" falsevalue="" checked="false" label="Limit masking to (primate) Alu repeats" /> | |
150 <param argument="-div" type="boolean" truevalue="-div" falsevalue="" checked="false" label="Limit masking to less diverged (younger) repeats" /> | |
151 <param type="select" name="search_speed" label="Search speed vs sensitiviy trade-off"> | |
152 <option value="">Default</option> | |
153 <option value="-q">Quick (5-10% less sensitive, 3-4 times speedup)</option> | |
154 <option value="-qq">Rush (10% less sensitive)</option> | |
155 <option value="-s">Slow (0-5% more sensitive, 2.5 times slowdown)</option> | |
156 </param> | |
157 <param type="integer" argument="-frag" value="40000" label="Maximum contiguous sequence searched" help="Maximum length of sequencing that is search without fragmenting" /> | |
158 <!-- -maxsize option is in the help, but not in the code of repeatmasker--> | |
159 <!--param type="integer" argument="-maxsize" value="4000000" label="Maximum length for IS or repeat clipped sequences" /--> | |
160 <param type="integer" argument="-gc" optional="true" label="Select matrices for this GC%" help="Valid values are a percentage or -1 to choose the default" /> | |
161 <param type="boolean" argument="-gccalc" truevalue="-gcccalc" falsevalue="" checked="false" label="Calculate GC % for all sequences" help="By default RepeatMasker skips calculating GC % for small sequences" /> | |
162 <param type="boolean" argument="-nocut" truevalue="-nocut" falsevalue="" checked="false" label="Skips cutting of repeats" /> | |
163 <param name="xout" type="boolean" argument="-x" truevalue="-x" falsevalue="" checked="false" label="Mask with X instead of N characters" /> | |
164 <param name="keep_alignments" type="boolean" argument="-ali" truevalue="-ali" falsevalue="" checked="false" label="Output alignments file" /> | |
165 <param name="invert_alignments" type="boolean" argument="-inv" truevalue="-inv" falsevalue="" checked="false" label="Invert alignments in alignment file" help="Show alignments in the orientation of the repeat sequence, not the query sequence" /> | |
166 <param type="boolean" argument="-xsmall" truevalue="-xsmall" falsevalue="" checked="false" label="Output repetitive regions as lowercase, non-repetitive regions as uppercase" /> | |
167 <param type="boolean" argument="-poly" truevalue="-poly" falsevalue="" checked="false" label="Output list of potentially polymorphic microsatellites" /> | |
168 </section> | |
169 </inputs> | |
170 <outputs> | |
171 <data name="output_masked_genome" format="fasta" label="RepeatMasker masked sequence on ${on_string}"> | |
172 <filter>not advanced['is_only']</filter> | |
173 </data> | |
174 <data name="output_log" format="tabular" label="RepeatMasker output log on ${on_string}"> | |
175 <filter>not advanced['is_only']</filter> | |
176 </data> | |
177 <data name="output_table" format="txt" label="RepeatMasker repeat statistics on ${on_string}"> | |
178 <filter>not advanced['is_only']</filter> | |
179 </data> | |
180 <data name="output_repeat_catalog" format="txt" label="RepeatMasker repeat catalogue on ${on_string}" /> | |
181 <data name="output_alignment" format="txt" label="RepeatMasker alignment on ${on_string}"> | |
182 <filter>not advanced['is_only'] and advanced['keep_alignments']</filter> | |
183 </data> | |
184 <data name="output_polymorphic" format="tabular" label="RepeatMasker possible polymorphic repeats on ${on_string}"> | |
185 <filter>not advanced['is_only'] and advanced['poly']</filter> | |
186 </data> | |
187 <data name="output_gff" format="gff" label="RepeatMasker repeat annotation on ${on_string}"> | |
188 <filter>not advanced['is_only'] and gff is True</filter> | |
189 </data> | |
190 </outputs> | |
191 <tests> | |
192 <test expect_num_outputs="4"> | |
193 <param name="input_fasta" value="small.fasta" ftype="fasta" /> | |
194 <param name="source_type" value="library" /> | |
195 <param name="repeat_lib" value="repeats.fasta" ftype="fasta" /> | |
196 <output name="output_masked_genome" file="small.fasta.masked" /> | |
197 <output name="output_table" file="small.fasta.stats" lines_diff="6" /> | |
198 <output name="output_repeat_catalog" file="small.fasta.cat" lines_diff="2" /> | |
199 <output name="output_log" file="small.fasta.log" lines_diff="2"/> | |
200 </test> | |
201 <test expect_num_outputs="7"> | |
202 <param name="input_fasta" value="small.fasta" ftype="fasta" /> | |
203 <param name="source_type" value="library" /> | |
204 <param name="gff" value="-gff" /> | |
205 <param name="keep_alignments" value="-ali" /> | |
206 <param name="poly" value="-poly" /> | |
207 <param name="repeat_lib" value="repeats.fasta" ftype="fasta" /> | |
208 <output name="output_masked_genome" file="small.fasta.masked" /> | |
209 <output name="output_table" file="small.fasta.stats" lines_diff="6" /> | |
210 <output name="output_repeat_catalog" file="small.fasta.cat" lines_diff="2" /> | |
211 <output name="output_log" file="small.fasta.log" lines_diff="2"/> | |
212 <output name="output_alignment" file="small.fasta.align" /> | |
213 <output name="output_polymorphic" file="small.fasta.poly" /> | |
214 <output name="output_gff" file="small.fasta.gff" lines_diff="4" /> | |
215 </test> | |
216 <test expect_num_outputs="4"> | |
217 <param name="input_fasta" value="small.fasta" ftype="fasta" /> | |
218 <param name="source_type" value="dfam" /> | |
219 <param name="species_list" value="human" /> | |
220 <output name="output_masked_genome" file="small_dfam.fasta.masked" /> | |
221 <output name="output_table" file="small_dfam.fasta.stats" lines_diff="2" /> | |
222 <output name="output_repeat_catalog" file="small_dfam.fasta.cat" lines_diff="2" /> | |
223 <output name="output_log" file="small_dfam.fasta.log" lines_diff="2"/> | |
224 </test> | |
225 <test expect_num_outputs="4"> | |
226 <param name="input_fasta" value="small.fasta" ftype="fasta" /> | |
227 <param name="source_type" value="dfam_up" /> | |
228 <param name="dfam_lib" value="Dfam_partial_test.h5" ftype="h5" /> | |
229 <param name="species_name" value="rodent" /> | |
230 <output name="output_masked_genome" file="small_dfam_up.fasta.masked" /> | |
231 <output name="output_table" file="small_dfam_up.fasta.stats" lines_diff="2" /> | |
232 <output name="output_repeat_catalog" file="small_dfam_up.fasta.cat" lines_diff="2" /> | |
233 <output name="output_log" file="small_dfam_up.fasta.log" lines_diff="2"/> | |
234 </test> | |
235 <test expect_num_outputs="4"> | |
236 <param name="input_fasta" value="small.fasta" ftype="fasta" /> | |
237 <param name="source_type" value="dfam" /> | |
238 <param name="species_list" value="rattus" /> | |
239 <output name="output_masked_genome" file="small_dfam_rattus.fasta.masked" /> | |
240 <output name="output_table" file="small_dfam_rattus.fasta.stats" lines_diff="2" /> | |
241 <output name="output_repeat_catalog" file="small_dfam_rattus.fasta.cat" lines_diff="2" /> | |
242 <output name="output_log" file="small_dfam_rattus.fasta.log" lines_diff="2"/> | |
243 </test> | |
244 </tests> | |
245 <help><![CDATA[ | |
246 RepeatMasker is a program that screens DNA for interspersed repeats and low | |
247 complexity DNA sequences. The database of repeats to screen for can be | |
248 provided as a FASTA file or downloaded from RepBase_. If the RepBase option is | |
249 chosen the RepBaseRepeatMaskerEdition file should be downloaded and | |
250 unpacked, and the enclosed EMBL format file ('RMRBSeqs.embl') should | |
251 be uploaded to Galaxy for use with this tool. | |
252 | |
253 Further documentation is available on the RepeatMasker homepage_. | |
254 | |
255 .. _RepBase: http://www.girinst.org/repbase/ | |
256 .. _homepage: http://www.repeatmasker.org/webrepeatmaskerhelp.html | |
257 ]]> | |
258 </help> | |
259 <expand macro="citations" /> | |
260 </tool> |