Mercurial > repos > bgruening > repeat_masker
changeset 14:7563ea7a922d draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/repeatmasker commit 7a5f368a5859e659aa36d0358bb96ca12574e2cc
author | iuc |
---|---|
date | Mon, 24 Apr 2023 10:29:31 +0000 |
parents | 3f987772e283 |
children | ba6d2c32f797 |
files | macros.xml repeatmasker.xml.orig test-data/Dfam_partial_test.h5 test-data/README.md test-data/small.fasta.cat test-data/small.fasta.gff test-data/small.fasta.log test-data/small.fasta.stats test-data/small_dfam.fasta.cat test-data/small_dfam.fasta.log test-data/small_dfam.fasta.stats test-data/small_dfam_rattus.fasta.cat test-data/small_dfam_rattus.fasta.log test-data/small_dfam_rattus.fasta.stats test-data/small_dfam_up.fasta.cat test-data/small_dfam_up.fasta.log test-data/small_dfam_up.fasta.stats |
diffstat | 17 files changed, 312 insertions(+), 47 deletions(-) [+] |
line wrap: on
line diff
--- a/macros.xml Thu Oct 21 15:49:31 2021 +0000 +++ b/macros.xml Mon Apr 24 10:29:31 2023 +0000 @@ -1,6 +1,6 @@ <macros> - <token name="@TOOL_VERSION@">4.1.2-p1</token> - <token name="@VERSION_SUFFIX@">galaxy1</token> + <token name="@TOOL_VERSION@">4.1.5</token> + <token name="@VERSION_SUFFIX@">galaxy0</token> <xml name="edam_ontology"> <edam_topics> <edam_topic>topic_0157</edam_topic>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/repeatmasker.xml.orig Mon Apr 24 10:29:31 2023 +0000 @@ -0,0 +1,260 @@ +<<<<<<< HEAD +<tool id="repeatmasker_wrapper" name="RepeatMasker" version="@TOOL_VERSION@+@GALAXY_TOOL_VERSION@" profile="20.01"> + <description>screen DNA sequences for interspersed repeats and low complexity regions</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro='xrefs'/> + <expand macro='edam_ontology' /> + <expand macro='requirements' /> + <version_command>repeatmasker --version</version_command> + <command detect_errors="exit_code"><![CDATA[ +======= +<tool id="repeatmasker_wrapper" name="RepeatMasker" version="4.1.1" profile="17.01"> + <description>screen DNA sequences for interspersed repeats and low complexity regions</description> + <xrefs> + <xref type="bio.tools">RepeatMasker</xref> + </xrefs> + <requirements> + <requirement type="package" version="4.1.1">repeatmasker</requirement> + </requirements> + + <command detect_errors="exit_code"><![CDATA[ +>>>>>>> c895e2728 (Update repeatmasker.xml) + RM_PATH=\$(which RepeatMasker) && + if [ -z "\$RM_PATH" ] ; then echo "Failed to find RepeatMasker in PATH (\$PATH)" >&2 ; exit 1 ; fi && + + RM_LIB_PATH=\$(dirname \$RM_PATH)/../share/RepeatMasker/Libraries && + #if $repeat_source.source_type == "dfam_up": + mkdir lib/ && + ln -s '${repeat_source.dfam_lib}' lib/RepeatMaskerLib.h5 && + RM_LIB_PATH=\$(pwd)/lib && + #end if + + ln -s '${input_fasta}' rm_input.fasta && + + RepeatMasker -dir \$(pwd) + -libdir \$RM_LIB_PATH + #if $repeat_source.source_type == "library": + -lib '${repeat_source.repeat_lib}' + -cutoff '${repeat_source.cutoff}' + #else if $repeat_source.source_type == "dfam": + #if $repeat_source.species_source.species_from_list == 'yes': + -species $repeat_source.species_source.species_list + #else + -species '${repeat_source.species_source.species_name}' + #end if + #else if $repeat_source.source_type == "dfam_up": + -species '${repeat_source.species_name}' + #end if + -parallel \${GALAXY_SLOTS:-1} + ${gff} + ${excln} + ${advanced.is_only} + ${advanced.is_clip} + ${advanced.no_is} + ${advanced.rodspec} + ${advanced.primspec} + ${advanced.nolow} + ${advanced.noint} + ${advanced.norna} + ${advanced.alu} + ${advanced.div} + ${advanced.search_speed} + -frag ${advanced.frag} + ## -maxsize ${advanced.maxsize} + #if str($advanced.gc): + -gc ${advanced.gc} + #end if + ${advanced.gccalc} + ${advanced.nocut} + ${advanced.keep_alignments} + ${advanced.invert_alignments} + ${advanced.xout} + ${advanced.xsmall} + ${advanced.poly} + rm_input.fasta && + #if $advanced.is_only != '-is_only': + mv rm_input.fasta.masked '${output_masked_genome}' && + sed -E 's/^ *// ; s/ *$//; s/\+ //; s/ +/\t/g ; 1,2c SW score\t% div.\t% del.\t% ins.\tquery sequence\tpos in query: begin\tend\t(left)\trepeat\tclass/family\tpos in repeat: begin\tend\t(left)\tID' rm_input.fasta.out >'${output_log}' && + mv rm_input.fasta.tbl '${output_table}' && + #if $gff == '-gff': + mv rm_input.fasta.out.gff '${output_gff}' && + #end if + #if $advanced.keep_alignments == '-ali': + mv rm_input.fasta.align '${output_alignment}' && + #end if + #if $advanced.poly == '-poly': + sed -E 's/^ *// ; s/ *$//; s/\+ //; s/ +/\t/g' rm_input.fasta.polyout >'${output_polymorphic}' && + #end if + #end if + if [ -f 'rm_input.fasta.cat.gz' ]; then + zcat 'rm_input.fasta.cat.gz' > '${output_repeat_catalog}'; + else + mv rm_input.fasta.cat '${output_repeat_catalog}'; + fi + ]]> + </command> + + <inputs> + <param name="input_fasta" type="data" format="fasta" label="Genomic DNA" /> + <conditional name="repeat_source"> + <param label="Repeat library source" name="source_type" type="select" help="To use RepBase, choose 'Custom library of repeats' and select a fasta version of this non-free database."> + <option selected="true" value="dfam">DFam (curated only, bundled with RepeatMasker)</option> + <option value="dfam_up">DFam (full/specific version)</option> + <option value="library">Custom library of repeats</option> + </param> + <when value="dfam"> + <conditional name="species_source"> + <param label="Select species name from a list?" name="species_from_list" type="select"> + <option value="yes" selected="true">Yes</option> + <option value="no">No</option> + </param> + <when value="yes"> + <param name="species_list" type="select" label="Species"> + <option value="human" selected="true">Human (Homo sapiens)</option> + <option value="rodent">Rodent (Order Rodentia)</option> + <option value="mouse">Mouse (Mus musculus)</option> + <option value="rattus">Rat (Rattus sp.)</option> + <option value="danio">Danio (zebra fish)</option> + <option value="drosophila">Fruit fly (Drosophila melanogaster)</option> + <option value="elegans">Caenorhabditis elegans (nematode)</option> + </param> + </when> + <when value="no"> + <param name="species_name" type="text" value="human" label="Repeat source species" help="Source species (or clade name) used to select repeats from DFam" /> + </when> + </conditional> + </when> + <when value="dfam_up"> + <param name="dfam_lib" type="data" format="h5" label="DFam library" help="The full DFam library can be downloaded from https://www.dfam.org/releases/current/families/Dfam.h5.gz" /> + <param name="species_name" type="text" value="human" label="Repeat source species" help="Source species (or clade name) used to select repeats from DFam" /> + </when> + <when value="library"> + <param name="repeat_lib" type="data" format="fasta" label="Custom library of repeats" /> + <param name="cutoff" type="integer" argument="-cutoff" value="225" label="Cutoff score for masking repeats" /> + </when> + </conditional> + <param type="boolean" argument="-gff" truevalue="-gff" falsevalue="" label="Output annotation of repeats in GFF format" checked="false" /> + <param argument="-excln" type="boolean" truevalue="-excln" falsevalue="" label="Ignore stretches of Ns when computing statistics" checked="true" help="Scaffolds are sometimes joined with stretches of 25 or more Ns. This option ignores them when calculating repeat statistics" /> + <section name="advanced" title="Advanced options" expanded="false"> + <param argument="-is_only" type="boolean" truevalue="-is_only" falsevalue="" checked="false" label="Only clip E coli insertion elements" /> + <param argument="-is_clip" type="boolean" truevalue="-is_clip" falsevalue="" checked="false" label="Clip IS elements before analysis" help="Normally RepeatMasker will report on IS element, with this option selected it will clip them before analysis" /> + <param argument="-no_is" type="boolean" truevalue="-no_is" falsevalue="" checked="false" label="Skip bacterial insertion element check" /> + <param argument="-rodspec" type="boolean" truevalue="-rodspec" falsevalue="" checked="false" label="Only check for rodent specific repeats" help="If this option is select a check for rodent specific repeats is done instead of a full RepeatMasker run" /> + <param argument="-primspec" type="boolean" truevalue="-primspec" falsevalue="" checked="false" label="Only check for primate specific repeats" help="If this option is select a check for primate specific repeats is done instead of a full RepeatMasker run" /> + <param argument="-nolow" type="boolean" truevalue="-nolow" falsevalue="" checked="false" label="No low complexity masking" help="Skip masking of simple tandem repeats and low complexity regions." /> + <param argument="-noint" type="boolean" truevalue="-noint" falsevalue="" checked="false" label="No interspersed repeat masking" help="Only mask simple repeats, skip masking of interspersed repeats." /> + <param argument="-norna" type="boolean" truevalue="-norna" falsevalue="" checked="false" label="No repeat-like-RNA masking" help="Skip masking of small pol III transcribed RNA (these are masked by default because they resemble SINEs)" /> + <param argument="-alu" type="boolean" truevalue="-alu" falsevalue="" checked="false" label="Limit masking to (primate) Alu repeats" /> + <param argument="-div" type="boolean" truevalue="-div" falsevalue="" checked="false" label="Limit masking to less diverged (younger) repeats" /> + <param type="select" name="search_speed" label="Search speed vs sensitiviy trade-off"> + <option value="">Default</option> + <option value="-q">Quick (5-10% less sensitive, 3-4 times speedup)</option> + <option value="-qq">Rush (10% less sensitive)</option> + <option value="-s">Slow (0-5% more sensitive, 2.5 times slowdown)</option> + </param> + <param type="integer" argument="-frag" value="40000" label="Maximum contiguous sequence searched" help="Maximum length of sequencing that is search without fragmenting" /> + <!-- -maxsize option is in the help, but not in the code of repeatmasker--> + <!--param type="integer" argument="-maxsize" value="4000000" label="Maximum length for IS or repeat clipped sequences" /--> + <param type="integer" argument="-gc" optional="true" label="Select matrices for this GC%" help="Valid values are a percentage or -1 to choose the default" /> + <param type="boolean" argument="-gccalc" truevalue="-gcccalc" falsevalue="" checked="false" label="Calculate GC % for all sequences" help="By default RepeatMasker skips calculating GC % for small sequences" /> + <param type="boolean" argument="-nocut" truevalue="-nocut" falsevalue="" checked="false" label="Skips cutting of repeats" /> + <param name="xout" type="boolean" argument="-x" truevalue="-x" falsevalue="" checked="false" label="Mask with X instead of N characters" /> + <param name="keep_alignments" type="boolean" argument="-ali" truevalue="-ali" falsevalue="" checked="false" label="Output alignments file" /> + <param name="invert_alignments" type="boolean" argument="-inv" truevalue="-inv" falsevalue="" checked="false" label="Invert alignments in alignment file" help="Show alignments in the orientation of the repeat sequence, not the query sequence" /> + <param type="boolean" argument="-xsmall" truevalue="-xsmall" falsevalue="" checked="false" label="Output repetitive regions as lowercase, non-repetitive regions as uppercase" /> + <param type="boolean" argument="-poly" truevalue="-poly" falsevalue="" checked="false" label="Output list of potentially polymorphic microsatellites" /> + </section> + </inputs> + <outputs> + <data name="output_masked_genome" format="fasta" label="RepeatMasker masked sequence on ${on_string}"> + <filter>not advanced['is_only']</filter> + </data> + <data name="output_log" format="tabular" label="RepeatMasker output log on ${on_string}"> + <filter>not advanced['is_only']</filter> + </data> + <data name="output_table" format="txt" label="RepeatMasker repeat statistics on ${on_string}"> + <filter>not advanced['is_only']</filter> + </data> + <data name="output_repeat_catalog" format="txt" label="RepeatMasker repeat catalogue on ${on_string}" /> + <data name="output_alignment" format="txt" label="RepeatMasker alignment on ${on_string}"> + <filter>not advanced['is_only'] and advanced['keep_alignments']</filter> + </data> + <data name="output_polymorphic" format="tabular" label="RepeatMasker possible polymorphic repeats on ${on_string}"> + <filter>not advanced['is_only'] and advanced['poly']</filter> + </data> + <data name="output_gff" format="gff" label="RepeatMasker repeat annotation on ${on_string}"> + <filter>not advanced['is_only'] and gff is True</filter> + </data> + </outputs> + <tests> + <test expect_num_outputs="4"> + <param name="input_fasta" value="small.fasta" ftype="fasta" /> + <param name="source_type" value="library" /> + <param name="repeat_lib" value="repeats.fasta" ftype="fasta" /> + <output name="output_masked_genome" file="small.fasta.masked" /> + <output name="output_table" file="small.fasta.stats" lines_diff="6" /> + <output name="output_repeat_catalog" file="small.fasta.cat" lines_diff="2" /> + <output name="output_log" file="small.fasta.log" lines_diff="2"/> + </test> + <test expect_num_outputs="7"> + <param name="input_fasta" value="small.fasta" ftype="fasta" /> + <param name="source_type" value="library" /> + <param name="gff" value="-gff" /> + <param name="keep_alignments" value="-ali" /> + <param name="poly" value="-poly" /> + <param name="repeat_lib" value="repeats.fasta" ftype="fasta" /> + <output name="output_masked_genome" file="small.fasta.masked" /> + <output name="output_table" file="small.fasta.stats" lines_diff="6" /> + <output name="output_repeat_catalog" file="small.fasta.cat" lines_diff="2" /> + <output name="output_log" file="small.fasta.log" lines_diff="2"/> + <output name="output_alignment" file="small.fasta.align" /> + <output name="output_polymorphic" file="small.fasta.poly" /> + <output name="output_gff" file="small.fasta.gff" lines_diff="4" /> + </test> + <test expect_num_outputs="4"> + <param name="input_fasta" value="small.fasta" ftype="fasta" /> + <param name="source_type" value="dfam" /> + <param name="species_list" value="human" /> + <output name="output_masked_genome" file="small_dfam.fasta.masked" /> + <output name="output_table" file="small_dfam.fasta.stats" lines_diff="2" /> + <output name="output_repeat_catalog" file="small_dfam.fasta.cat" lines_diff="2" /> + <output name="output_log" file="small_dfam.fasta.log" lines_diff="2"/> + </test> + <test expect_num_outputs="4"> + <param name="input_fasta" value="small.fasta" ftype="fasta" /> + <param name="source_type" value="dfam_up" /> + <param name="dfam_lib" value="Dfam_partial_test.h5" ftype="h5" /> + <param name="species_name" value="rodent" /> + <output name="output_masked_genome" file="small_dfam_up.fasta.masked" /> + <output name="output_table" file="small_dfam_up.fasta.stats" lines_diff="2" /> + <output name="output_repeat_catalog" file="small_dfam_up.fasta.cat" lines_diff="2" /> + <output name="output_log" file="small_dfam_up.fasta.log" lines_diff="2"/> + </test> + <test expect_num_outputs="4"> + <param name="input_fasta" value="small.fasta" ftype="fasta" /> + <param name="source_type" value="dfam" /> + <param name="species_list" value="rattus" /> + <output name="output_masked_genome" file="small_dfam_rattus.fasta.masked" /> + <output name="output_table" file="small_dfam_rattus.fasta.stats" lines_diff="2" /> + <output name="output_repeat_catalog" file="small_dfam_rattus.fasta.cat" lines_diff="2" /> + <output name="output_log" file="small_dfam_rattus.fasta.log" lines_diff="2"/> + </test> + </tests> + <help><![CDATA[ +RepeatMasker is a program that screens DNA for interspersed repeats and low +complexity DNA sequences. The database of repeats to screen for can be +provided as a FASTA file or downloaded from RepBase_. If the RepBase option is +chosen the RepBaseRepeatMaskerEdition file should be downloaded and +unpacked, and the enclosed EMBL format file ('RMRBSeqs.embl') should +be uploaded to Galaxy for use with this tool. + +Further documentation is available on the RepeatMasker homepage_. + +.. _RepBase: http://www.girinst.org/repbase/ +.. _homepage: http://www.repeatmasker.org/webrepeatmaskerhelp.html + ]]> + </help> + <expand macro="citations" /> +</tool>
--- a/test-data/README.md Thu Oct 21 15:49:31 2021 +0000 +++ b/test-data/README.md Mon Apr 24 10:29:31 2023 +0000 @@ -1,1 +1,4 @@ -Dfam_partial_test.h5 was generated from the test dataset in https://github.com/Dfam-consortium/FamDB/ (commit: 6b28b66) +Dfam_partial_test.h5 was generated from the test dataset in https://github.com/Dfam-consortium/FamDB/ (commit: 20c436d) + +`./export_dfam.py --from-tax-dump /path/to/taxonomy_dump/from_ncbi/ --from-hmm test_data/Dfam_partial.hmm --db-version 1.0 Dfam_partial_test.h5` +
--- a/test-data/small.fasta.cat Thu Oct 21 15:49:31 2021 +0000 +++ b/test-data/small.fasta.cat Mon Apr 24 10:29:31 2023 +0000 @@ -98,6 +98,6 @@ ## Total Length: 14220 ## Total NonMask ( excluding >20bp runs of N/X bases ): 14220 ## Total NonSub ( excluding all non ACGT bases ):14220 -RepeatMasker version 4.1.2-p1 , default mode -run with rmblastn version 2.10.0+ +RepeatMasker version 4.1.5 , default mode +run with rmblastn version 2.13.0+ RM Library:
--- a/test-data/small.fasta.gff Thu Oct 21 15:49:31 2021 +0000 +++ b/test-data/small.fasta.gff Mon Apr 24 10:29:31 2023 +0000 @@ -1,11 +1,10 @@ -##gff-version 2 -##date 2021-05-20 -##sequence-region rm_input.fasta -scaffold_1 RepeatMasker similarity 613 632 0.0 + . Target "Motif:(GT)n" 1 20 -scaffold_1 RepeatMasker similarity 780 824 18.3 + . Target "Motif:(ATAATA)n" 1 45 -scaffold_1 RepeatMasker similarity 2231 2274 23.9 + . Target "Motif:(CAGA)n" 1 46 -scaffold_1 RepeatMasker similarity 4853 4901 18.4 + . Target "Motif:(TC)n" 1 54 -scaffold_1 RepeatMasker similarity 6230 6284 19.1 + . Target "Motif:(TAATTAA)n" 1 52 -scaffold_1 RepeatMasker similarity 6548 6606 28.3 + . Target "Motif:(GACA)n" 1 57 -scaffold_1 RepeatMasker similarity 11981 12050 2.9 + . Target "Motif:(CT)n" 1 71 -scaffold_1 RepeatMasker similarity 12078 12113 15.4 + . Target "Motif:(CT)n" 1 37 +##gff-version 3 +##sequence-region scaffold_1 1 14220 +scaffold_1 RepeatMasker dispersed_repeat 613 632 0.0 + . ID=1;Target "Motif:(GT)n" 1 20 +scaffold_1 RepeatMasker dispersed_repeat 780 824 18.3 + . ID=2;Target "Motif:(ATAATA)n" 1 45 +scaffold_1 RepeatMasker dispersed_repeat 2231 2274 23.9 + . ID=3;Target "Motif:(CAGA)n" 1 46 +scaffold_1 RepeatMasker dispersed_repeat 4853 4901 18.4 + . ID=4;Target "Motif:(TC)n" 1 54 +scaffold_1 RepeatMasker dispersed_repeat 6230 6284 19.1 + . ID=5;Target "Motif:(TAATTAA)n" 1 52 +scaffold_1 RepeatMasker dispersed_repeat 6548 6606 28.3 + . ID=6;Target "Motif:(GACA)n" 1 57 +scaffold_1 RepeatMasker dispersed_repeat 11981 12050 2.9 + . ID=7;Target "Motif:(CT)n" 1 71 +scaffold_1 RepeatMasker dispersed_repeat 12078 12113 15.4 + . ID=8;Target "Motif:(CT)n" 1 37
--- a/test-data/small.fasta.log Thu Oct 21 15:49:31 2021 +0000 +++ b/test-data/small.fasta.log Mon Apr 24 10:29:31 2023 +0000 @@ -1,4 +1,4 @@ -SW scoret% div.t% del.t% ins.tquery sequencetpos in query: begintendt(left)trepeattclass/familytpos in repeat: begintendt(left)tID +SW score % div. % del. % ins. query sequence pos in query: begin end (left) repeat class/family pos in repeat: begin end (left) ID 18 0.0 0.0 0.0 scaffold_1 613 632 (13588) (GT)n Simple_repeat 1 20 (0) 1 16 18.3 2.2 2.2 scaffold_1 780 824 (13396) (ATAATA)n Simple_repeat 1 45 (0) 2
--- a/test-data/small.fasta.stats Thu Oct 21 15:49:31 2021 +0000 +++ b/test-data/small.fasta.stats Mon Apr 24 10:29:31 2023 +0000 @@ -10,7 +10,7 @@ -------------------------------------------------- Retroelements 0 0 bp 0.00 % SINEs: 0 0 bp 0.00 % - Penelope 0 0 bp 0.00 % + Penelope: 0 0 bp 0.00 % LINEs: 0 0 bp 0.00 % CRE/SLACS 0 0 bp 0.00 % L2/CR1/Rex 0 0 bp 0.00 % @@ -28,7 +28,7 @@ hobo-Activator 0 0 bp 0.00 % Tc1-IS630-Pogo 0 0 bp 0.00 % En-Spm 0 0 bp 0.00 % - MuDR-IS905 0 0 bp 0.00 % + MULE-MuDR 0 0 bp 0.00 % PiggyBac 0 0 bp 0.00 % Tourist/Harbinger 0 0 bp 0.00 % Other (Mirage, 0 0 bp 0.00 % @@ -53,8 +53,8 @@ Runs of >=20 X/Ns in query were excluded in % calcs -RepeatMasker version 4.1.2-p1 , default mode - -run with rmblastn version 2.10.0+ -The query was compared to unclassified sequences in ".../dataset_a3b3078d-de09-4651-9e83-62019a3d45ba.dat" +RepeatMasker version 4.1.5 , default mode + +run with rmblastn version 2.13.0+ +The query was compared to unclassified sequences in ".../dataset_9e3ddbd2-0776-4c6d-bed6-0f4cd415796c.dat" FamDB:
--- a/test-data/small_dfam.fasta.cat Thu Oct 21 15:49:31 2021 +0000 +++ b/test-data/small_dfam.fasta.cat Mon Apr 24 10:29:31 2023 +0000 @@ -82,6 +82,7 @@ Matrix = 25p39g.matrix Kimura (with divCpGMod) = 29.45 +CpG sites = 10, Kimura (unadjusted) = 31.65 Transitions / transversions = 1.43 (10/7) Gap_init rate = 0.07 (5 / 70), avg. gap size = 1.00 (5 / 5) @@ -113,6 +114,6 @@ ## Total Length: 14220 ## Total NonMask ( excluding >20bp runs of N/X bases ): 14220 ## Total NonSub ( excluding all non ACGT bases ):14220 -RepeatMasker version 4.1.2-p1 , default mode -run with rmblastn version 2.10.0+ -RM Library: CONS-Dfam_3.3 +RepeatMasker version 4.1.5 , default mode +run with rmblastn version 2.13.0+ +RM Library: CONS-Dfam_3.7
--- a/test-data/small_dfam.fasta.log Thu Oct 21 15:49:31 2021 +0000 +++ b/test-data/small_dfam.fasta.log Mon Apr 24 10:29:31 2023 +0000 @@ -1,4 +1,4 @@ -SW scoret% div.t% del.t% ins.tquery sequencetpos in query: begintendt(left)trepeattclass/familytpos in repeat: begintendt(left)tID +SW score % div. % del. % ins. query sequence pos in query: begin end (left) repeat class/family pos in repeat: begin end (left) ID 18 0.0 0.0 0.0 scaffold_1 613 632 (13588) (GT)n Simple_repeat 1 20 (0) 1 16 18.3 2.2 2.2 scaffold_1 780 824 (13396) (ATAATA)n Simple_repeat 1 45 (0) 2
--- a/test-data/small_dfam.fasta.stats Thu Oct 21 15:49:31 2021 +0000 +++ b/test-data/small_dfam.fasta.stats Mon Apr 24 10:29:31 2023 +0000 @@ -45,7 +45,7 @@ The query species was assumed to be human -RepeatMasker version 4.1.2-p1 , default mode - -run with rmblastn version 2.10.0+ -FamDB: CONS-Dfam_3.3 +RepeatMasker version 4.1.5 , default mode + +run with rmblastn version 2.13.0+ +FamDB: CONS-Dfam_3.7
--- a/test-data/small_dfam_rattus.fasta.cat Thu Oct 21 15:49:31 2021 +0000 +++ b/test-data/small_dfam_rattus.fasta.cat Mon Apr 24 10:29:31 2023 +0000 @@ -98,6 +98,6 @@ ## Total Length: 14220 ## Total NonMask ( excluding >20bp runs of N/X bases ): 14220 ## Total NonSub ( excluding all non ACGT bases ):14220 -RepeatMasker version 4.1.2-p1 , default mode -run with rmblastn version 2.10.0+ -RM Library: CONS-Dfam_3.3 +RepeatMasker version 4.1.5 , default mode +run with rmblastn version 2.13.0+ +RM Library: CONS-Dfam_3.7
--- a/test-data/small_dfam_rattus.fasta.log Thu Oct 21 15:49:31 2021 +0000 +++ b/test-data/small_dfam_rattus.fasta.log Mon Apr 24 10:29:31 2023 +0000 @@ -1,4 +1,4 @@ -SW scoret% div.t% del.t% ins.tquery sequencetpos in query: begintendt(left)trepeattclass/familytpos in repeat: begintendt(left)tID +SW score % div. % del. % ins. query sequence pos in query: begin end (left) repeat class/family pos in repeat: begin end (left) ID 18 0.0 0.0 0.0 scaffold_1 613 632 (13588) (GT)n Simple_repeat 1 20 (0) 1 16 18.3 2.2 2.2 scaffold_1 780 824 (13396) (ATAATA)n Simple_repeat 1 45 (0) 2
--- a/test-data/small_dfam_rattus.fasta.stats Thu Oct 21 15:49:31 2021 +0000 +++ b/test-data/small_dfam_rattus.fasta.stats Mon Apr 24 10:29:31 2023 +0000 @@ -14,6 +14,7 @@ IDs 0 0 bp 0.00 % MIRs 0 0 bp 0.00 % + LINEs: 0 0 bp 0.00 % LINE1 0 0 bp 0.00 % LINE2 0 0 bp 0.00 % @@ -47,7 +48,7 @@ The query species was assumed to be rattus -RepeatMasker version 4.1.2-p1 , default mode - -run with rmblastn version 2.10.0+ -FamDB: CONS-Dfam_3.3 +RepeatMasker version 4.1.5 , default mode + +run with rmblastn version 2.13.0+ +FamDB: CONS-Dfam_3.7
--- a/test-data/small_dfam_up.fasta.cat Thu Oct 21 15:49:31 2021 +0000 +++ b/test-data/small_dfam_up.fasta.cat Mon Apr 24 10:29:31 2023 +0000 @@ -98,6 +98,6 @@ ## Total Length: 14220 ## Total NonMask ( excluding >20bp runs of N/X bases ): 14220 ## Total NonSub ( excluding all non ACGT bases ):14220 -RepeatMasker version 4.1.2-p1 , default mode -run with rmblastn version 2.10.0+ -RM Library: CONS-_ +RepeatMasker version 4.1.5 , default mode +run with rmblastn version 2.13.0+ +RM Library: CONS-Dfam_1.0
--- a/test-data/small_dfam_up.fasta.log Thu Oct 21 15:49:31 2021 +0000 +++ b/test-data/small_dfam_up.fasta.log Mon Apr 24 10:29:31 2023 +0000 @@ -1,4 +1,4 @@ -SW scoret% div.t% del.t% ins.tquery sequencetpos in query: begintendt(left)trepeattclass/familytpos in repeat: begintendt(left)tID +SW score % div. % del. % ins. query sequence pos in query: begin end (left) repeat class/family pos in repeat: begin end (left) ID 18 0.0 0.0 0.0 scaffold_1 613 632 (13588) (GT)n Simple_repeat 1 20 (0) 1 16 18.3 2.2 2.2 scaffold_1 780 824 (13396) (ATAATA)n Simple_repeat 1 45 (0) 2
--- a/test-data/small_dfam_up.fasta.stats Thu Oct 21 15:49:31 2021 +0000 +++ b/test-data/small_dfam_up.fasta.stats Mon Apr 24 10:29:31 2023 +0000 @@ -14,6 +14,7 @@ IDs 0 0 bp 0.00 % MIRs 0 0 bp 0.00 % + LINEs: 0 0 bp 0.00 % LINE1 0 0 bp 0.00 % LINE2 0 0 bp 0.00 % @@ -47,7 +48,7 @@ The query species was assumed to be rodent -RepeatMasker version 4.1.2-p1 , default mode - -run with rmblastn version 2.10.0+ -FamDB: CONS-_ +RepeatMasker version 4.1.5 , default mode + +run with rmblastn version 2.13.0+ +FamDB: CONS-Dfam_1.0