# HG changeset patch
# User petr-novak
# Date 1567502402 14400
# Node ID 3151a72a667155298e2375287133b4656b2010ed
# Parent a6c55d1bdb6cfb469ea0e051a7d9c72189fcedd1
Uploaded
diff -r a6c55d1bdb6c -r 3151a72a6671 coverage2gff.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/coverage2gff.py Tue Sep 03 05:20:02 2019 -0400
@@ -0,0 +1,65 @@
+#!/usr/bin/env python3
+import argparse
+import tempfile
+import shutil
+import sys
+
+def parse_args():
+ '''Argument parsin'''
+ description = """
+ parsing cap3 assembly aln output
+ """
+
+ parser = argparse.ArgumentParser(
+ description=description,
+ formatter_class=argparse.RawTextHelpFormatter)
+ parser.add_argument(
+ '-g',
+ '--gff_file',
+ default=None,
+ required=True,
+ help="input gff3 file for appending coverage information",
+ type=str,
+ action='store')
+ parser.add_argument(
+ '-p',
+ '--profile',
+ default=None,
+ required=True,
+ help="output file for coverage profile",
+ type=str,
+ action="store")
+ return parser.parse_args()
+
+def read_coverage(profile):
+ with open(profile) as p:
+ d = {}
+ for name, prof in zip(p, p):
+ d[name[1:].strip()] = [int(i) for i in prof.split()]
+ print(d, file=sys.stderr)
+ return d
+
+
+def main():
+ args = parse_args()
+ coverage_hash = read_coverage(args.profile)
+ gff_tmp = tempfile.NamedTemporaryFile()
+ with open(args.gff_file) as f, open(gff_tmp.name, 'w') as out:
+ for line in f:
+ if line[0] == "#":
+ out.write(line)
+ else:
+ line_parts = line.split()
+ start = int(line_parts[3])
+ end = int(line_parts[4])
+ coverage = round( sum(coverage_hash[line_parts[0]][(
+ start - 1):end]) / (end - start + 1), 3)
+ new_line = "{};Coverage={}\n".format(line.strip(), coverage)
+ out.write(new_line)
+
+ shutil.copyfile(gff_tmp.name, args.gff_file)
+
+
+if __name__ == "__main__":
+
+ main()
diff -r a6c55d1bdb6c -r 3151a72a6671 dante.py
--- a/dante.py Wed Aug 28 08:08:47 2019 -0400
+++ b/dante.py Tue Sep 03 05:20:02 2019 -0400
@@ -586,10 +586,10 @@
if count_region == len(indices_plus):
strand_gff = "-"
if strand_gff == "+":
- feature_start = min(start_hit[regions_above_threshold])-1
+ feature_start = min(start_hit[regions_above_threshold]) + 1
feature_end = max(end_hit[regions_above_threshold])
else:
- feature_end = seq_len[region][0] - min(start_hit[regions_above_threshold]) - 1
+ feature_end = seq_len[region][0] - min(start_hit[regions_above_threshold])
feature_start = seq_len[region][0] - max(end_hit[regions_above_threshold]) + 1
create_gff3(domain_type, ann_substring, unique_annotations,
ann_pos_counts, feature_start,feature_end,
diff -r a6c55d1bdb6c -r 3151a72a6671 dante.xml
--- a/dante.xml Wed Aug 28 08:08:47 2019 -0400
+++ b/dante.xml Tue Sep 03 05:20:02 2019 -0400
@@ -6,123 +6,185 @@
rexdbREXDB
-
-
-
-
-
-python3 ${__tool_directory__}/dante.py --query ${input} --domain_gff ${DomGff}
- --protein_database \${REXDB}/${db_type}_pdb
- --classification \${REXDB}/${db_type}_class
- --scoring_matrix ${scoring_matrix}
- &&
+
+
+
+
+
+ #if str($input_type.input_type_selector) == "aln"
+ python3 ${__tool_directory__}/parse_aln.py -a $(input_sequences) -f sequences.fasta -p sequences.profile
+ &&
+ INPUT_SEQUENCES="sequences.fasta"
+ #else
+ INPUT_SEQUENCES=$(input_sequences)
+ #end if
+ &&
+
+
+ python3 ${__tool_directory__}/dante.py --query \${INPUT_SEQUENCES} --domain_gff ${DomGff}
+ --protein_database \${REXDB}/${db_type}_pdb
+ --classification \${REXDB}/${db_type}_class
+ --scoring_matrix ${scoring_matrix}
+
+
+ #if str($input_type.input_type_selector) == "aln"
+ &&
+ python3 ${__tool_directory__}/coverage2gff.py -p sequences.profile -g ${DomGff}
+ #end if
-python3 ${__tool_directory__}/dante_gff_output_filtering.py --dom_gff ${DomGff}
---domains_prot_seq domains_filtered.fasta --domains_filtered domains_filtered.gff
---output_dir .
---selected_dom All --th_identity 0.35
---th_similarity 0.45 --th_length 0.9
---interruptions 1 --max_len_proportion 1.1
---element_type '' &&
+ #if str($iterative) == "Yes"
+ &&
+ python3 ${__tool_directory__}/dante_gff_output_filtering.py --dom_gff ${DomGff}
+ --domains_prot_seq domains_filtered.fasta --domains_filtered domains_filtered.gff
+ --output_dir .
+ --selected_dom All --th_identity 0.35
+ --th_similarity 0.45 --th_length 0.9
+ --interruptions 1 --max_len_proportion 1.1
+ --element_type ''
+ &&
-python3 ${__tool_directory__}/fasta2database.py domains_filtered.fasta domains_filtered.db
-domains_filtered.class &&
+
+
+ python3 ${__tool_directory__}/fasta2database.py domains_filtered.fasta domains_filtered.db
+ domains_filtered.class
+ &&
-lastdb -p domains_filtered.db domains_filtered.db &&
+ lastdb -p domains_filtered.db domains_filtered.db
+ &&
+
+ python3 ${__tool_directory__}/dante.py --query \${INPUT_SEQUENCES} --domain_gff ${DomGff2}
+ --protein_database domains_filtered.db
+ --classification domains_filtered.class
+ --scoring_matrix BL80
+
-python3 ${__tool_directory__}/dante.py --query ${input} --domain_gff ${DomGff2}
- --protein_database domains_filtered.db
- --classification domains_filtered.class
- --scoring_matrix BL80
+ #if str($input_type.input_type_selector) == "aln"
+ &&
+ python3 ${__tool_directory__}/coverage2gff.py -p sequences.profile -g ${DomGff2}
+ #end if
+ #end if
-
-
-
+
+
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
+
+
+
+
+
-
-
-
-
-
+
+
+
+
+
-THIS IS A PRIMARY OUTPUT THAT SHOULD UNDERGO FURTHER QUALITY FILTERING TO GET RID OFF POTENTIAL FALSE POSITIVE DOMAINS
-
-**WHAT IT DOES**
+
+
+
+ iterative == "Yes"
+
+
+
+
+
+
+
+
+
+
+
-This tool uses external aligning programme `LAST`_ and RepeatExplorer database of TE protein domains(REXdb) (Viridiplantae and Metazoa)
+
+
-.. _LAST: http://last.cbrc.jp/
-*Lastal* runs similarity search to find hits between query DNA sequence and our database of protein domains from all Viridiplantae repetitive elements. Hits with overlapping positions in the sequence (even through other hits) forms a cluster which represents one potential protein domain. Strand orientation is taken into consideration when forming the clusters which means each cluster is built from forward or reverse stranded hits exclusively. The clusters are subsequently processed separately; within one cluster positions are scanned base-by-base and classification strings are assigned for each of them based on the database sequences which were mapped on that place. These asigned classification strings consist of a domain type as well as class and lineage of the repetitive element where the database protein comes from. Different classification levels are separated by "|" character. Every hit is scored according to the scoring matrix used for DNA-protein alignment (BLOSUM80). For single position only the hits reaching certain percentage (80% by default) of the overall best score within the whole cluster are reported. One cluster of overlapping hits represents one domain region and is recorded as one line in the resulting GFF3 file. Regarding the classition strings assigned to one region (cluster) there are three situations that can occur:
+
- 1. There is a single classification string assigned to each position as well as classifications along all the positions in the region are mutually uniform, in this case domain's final classification is equivalent to this unique classification.
- 2. There are multiple classification strings assigned to one cluster, i.e. one domain, which leads to classification to the common (less specific) level of all the strings
- 3. There is a conflict at the domain type level, domains are reported with slash (e.g. RT/INT) and the classification is in this case ambiguous
-
-**There are 2 outputs produced by this tool:**
-
-1. GFF3 file of all proteins domains built from all hits found by LAST. Domains are reported per line as regions (start - end) on the original DNA sequence including the seq ID, alignment score and strand orientation. The last "Attributes" column contains several semicolon-separated information related to annotation, repetitive classification, alignment and its quality. This file can undergo further filtering using *Protein Domain Filter* tool
+
+ THIS IS A PRIMARY OUTPUT THAT SHOULD UNDERGO FURTHER QUALITY FILTERING TO GET RID OFF POTENTIAL FALSE POSITIVE DOMAINS
+
+ **WHAT IT DOES**
+
+ This tool uses external aligning programme `LAST`_ and RepeatExplorer database of TE protein domains(REXdb) (Viridiplantae and Metazoa)
+
+ .. _LAST: http://last.cbrc.jp/
+
+ *Lastal* runs similarity search to find hits between query DNA sequence and our database of protein domains from all Viridiplantae repetitive elements. Hits with overlapping positions in the sequence (even through other hits) forms a cluster which represents one potential protein domain. Strand orientation is taken into consideration when forming the clusters which means each cluster is built from forward or reverse stranded hits exclusively. The clusters are subsequently processed separately; within one cluster positions are scanned base-by-base and classification strings are assigned for each of them based on the database sequences which were mapped on that place. These asigned classification strings consist of a domain type as well as class and lineage of the repetitive element where the database protein comes from. Different classification levels are separated by "|" character. Every hit is scored according to the scoring matrix used for DNA-protein alignment (BLOSUM80). For single position only the hits reaching certain percentage (80% by default) of the overall best score within the whole cluster are reported. One cluster of overlapping hits represents one domain region and is recorded as one line in the resulting GFF3 file. Regarding the classition strings assigned to one region (cluster) there are three situations that can occur:
-- Attributes reported always:
+ 1. There is a single classification string assigned to each position as well as classifications along all the positions in the region are mutually uniform, in this case domain's final classification is equivalent to this unique classification.
+ 2. There are multiple classification strings assigned to one cluster, i.e. one domain, which leads to classification to the common (less specific) level of all the strings
+ 3. There is a conflict at the domain type level, domains are reported with slash (e.g. RT/INT) and the classification is in this case ambiguous
+
+ **There are 2 outputs produced by this tool:**
+
+ 1. GFF3 file of all proteins domains built from all hits found by LAST. Domains are reported per line as regions (start - end) on the original DNA sequence including the seq ID, alignment score and strand orientation. The last "Attributes" column contains several semicolon-separated information related to annotation, repetitive classification, alignment and its quality. This file can undergo further filtering using *Protein Domain Filter* tool
- Name
+ - Attributes reported always:
+
+ Name
type of domain; if ambiguous reported with slash
- Final_classification
+ Final_classification
definite classification based on all partial classifications of Region_hits_classifications attribute or
"Ambiguous_domain" when there is an ambiguous domain type
- Region_Hits_Classifications
+ Region_Hits_Classifications
all hits classifications (comma separated) from a certain domain region that reach the set score threshold; in case of multiple annotations the square brackets indicate the number of bases having this particular classification
-
-- Attributes only reported in case of unambiguous domain type (all the attributes including quality information are related to the Best_Hit of the region):
-
- Best_hit
+
+ - Attributes only reported in case of unambiguous domain type (all the attributes including quality information are related to the Best_Hit of the region):
+
+ Best_hit
classification and position of the best alignment with the highest score within the cluster; in the square brackets is the percentage of the whole cluster range that this best hit covers
- Best_Hit_DB_Pos
+ Best_Hit_DB_Pos
showing which part of the original datatabase domain corresponding to the Best Hit was aligned on query DNA (e.g. **Best_Hit_DB_Pos=17:75of79** means the Best Hit reported in GFF represents region from 17th to 75th of total 79 aminoacids in the original domain from the database)
- DB_Seq
+ DB_Seq
database protein sequence of the best hit mapped to the query DNA
- Query_Seq
+ Query_Seq
alignment sequence of the query DNA for the best hit
- Identity
+ Identity
ratio of identical amino acids in alignment sequence to the length of alignment
- Similarity
+ Similarity
ratio of alignment positions with positive score (according to the scoring matrix) to the length of alignment
- Relat_Length
+ Relat_Length
ratio of gapless length of the aligned protein sequence to the whole length of the database protein
- Relat_Interruptions
+ Relat_Interruptions
number of the interruptions (frameshifts + stop codons) in aligned translated query sequence per each starting 100 AA
- Hit_to_DB_Length
+ Hit_to_DB_Length
proportion of alignment length to the original length of the protein domain from database
-
-
+
+
-!NOTE: Tool can in average process 0.5 Gbps of the DNA sequence per day. This is only a rough estimate and it is highly dependent on input data (repetive elements occurence) as well as computing resources. Maximum running time of the tool is 7 days.
+ !NOTE: Tool can in average process 0.5 Gbps of the DNA sequence per day. This is only a rough estimate and it is highly dependent on input data (repetive elements occurence) as well as computing resources. Maximum running time of the tool is 7 days.
-
+
diff -r a6c55d1bdb6c -r 3151a72a6671 dante_gff_output_filtering.py
--- a/dante_gff_output_filtering.py Wed Aug 28 08:08:47 2019 -0400
+++ b/dante_gff_output_filtering.py Tue Sep 03 05:20:02 2019 -0400
@@ -82,6 +82,22 @@
return count_comment, lines
+def parse_gff_line(line):
+ '''Return dictionary with gff fields and atributers
+ Note - type of fields is strings
+ '''
+ # order of first 9 column is fixed
+ gff_line = dict(
+ zip(
+ ['seqid', 'source', 'type', 'start', 'end',
+ 'score', 'strand', 'phase', 'attributes'],
+ line.split("\t")
+ )
+ )
+ # split attributes and replace:
+ gff_line['attributes'] = dict([i.split("=") for i in gff_line['attributes'].split(";")])
+ return gff_line
+
def filter_qual_dom(DOM_GFF, FILT_DOM_GFF, TH_IDENTITY, TH_SIMILARITY,
TH_LENGTH, TH_INTERRUPT, TH_LEN_RATIO, SELECTED_DOM,
ELEMENT):
@@ -90,7 +106,7 @@
filt_dom_tmp = NamedTemporaryFile(delete=False)
with open(DOM_GFF, "r") as gff_all, open(filt_dom_tmp.name,
"w") as gff_filtered:
- for comment_idx in range(count_comment):
+ for _ in range(count_comment):
next(gff_all)
dom_dict = defaultdict(lambda: defaultdict(int))
orig_class_dict = defaultdict(int)
@@ -109,20 +125,22 @@
orig_class_dict[classification] += 1
## ambiguous domains filtered out automatically
if classification != configuration.AMBIGUOUS_TAG:
- al_identity = float(attributes.split(";")[-5].split("=")[1])
- al_similarity = float(attributes.split(";")[-4].split("=")[1])
- al_length = float(attributes.split(";")[-3].split("=")[1])
- relat_interrupt = float(attributes.split(";")[-2].split("=")[
- 1])
- db_len_proportion = float(attributes.split(";")[-1].split("=")[
- 1])
- dom_type = attributes.split(";")[0].split("=")[1]
- seq_id = line.split("\t")[0]
- xminimal = int(line.split("\t")[3])
- xmaximal = int(line.split("\t")[4])
- if al_identity >= TH_IDENTITY and al_similarity >= TH_SIMILARITY and al_length >= TH_LENGTH and relat_interrupt <= TH_INTERRUPT and db_len_proportion <= TH_LEN_RATIO and (
- dom_type == SELECTED_DOM or
- SELECTED_DOM == "All") and (ELEMENT in classification):
+ gff_line = parse_gff_line(line)
+ al_identity = float(gff_line['attributes']['Identity'])
+ al_similarity = float(gff_line['attributes']['Similarity'])
+ al_length = float(gff_line['attributes']['Relat_Length'])
+ relat_interrupt = float(gff_line['attributes']['Relat_Interruptions'])
+ db_len_proportion = float(gff_line['attributes']['Hit_to_DB_Length'])
+ dom_type = gff_line['attributes']['Final_Classification']
+ seq_id = gff_line['seqid']
+ xminimal = int(gff_line['start'])
+ xmaximal = int(gff_line['end'])
+ c1 = al_identity >= TH_IDENTITY
+ c2 = al_similarity >= TH_SIMILARITY
+ if (c1 and c2 and al_length >= TH_LENGTH and relat_interrupt <= TH_INTERRUPT and
+ db_len_proportion <= TH_LEN_RATIO and
+ (dom_type == SELECTED_DOM or SELECTED_DOM == "All") and
+ (ELEMENT in classification)):
gff_filtered.writelines(line)
filt_class_dict[classification] += 1
dom_dict[seq_id][dom_type] += 1
diff -r a6c55d1bdb6c -r 3151a72a6671 dante_pyan_scheme.png
Binary file dante_pyan_scheme.png has changed
diff -r a6c55d1bdb6c -r 3151a72a6671 dante_pyan_scheme.svg
--- a/dante_pyan_scheme.svg Wed Aug 28 08:08:47 2019 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,326 +0,0 @@
-
-
-
-
-
diff -r a6c55d1bdb6c -r 3151a72a6671 parse_aln.py
--- a/parse_aln.py Wed Aug 28 08:08:47 2019 -0400
+++ b/parse_aln.py Tue Sep 03 05:20:02 2019 -0400
@@ -4,6 +4,7 @@
profile file
'''
import argparse
+import re
def parse_args():
@@ -11,33 +12,126 @@
description = """
parsing cap3 assembly aln output
"""
- parser = argparse.ArgumentParser(description=description,
- formatter_class=argparse.RawTextHelpFormatter)
- parser.add_argument(
- '-a', '--aln_file',
- default=None, required=True,
- help="Aln file input",
- type=str,
- action='store')
- parser.add_argument(
- '-f', '--fasta',
- default=None, required=True,
- help="fasta output file name",
- type=str,
- action='store')
- parser.add_argument(
- '-p', '--profile',
- default=None, required=True,
- help="output file for coverage profile",
- type=str,
- action="store"
- )
+
+ parser = argparse.ArgumentParser(
+ description=description,
+ formatter_class=argparse.RawTextHelpFormatter)
+ parser.add_argument('-a',
+ '--aln_file',
+ default=None,
+ required=True,
+ help="Aln file input",
+ type=str,
+ action='store')
+ parser.add_argument('-f',
+ '--fasta',
+ default=None,
+ required=True,
+ help="fasta output file name",
+ type=str,
+ action='store')
+ parser.add_argument('-p',
+ '--profile',
+ default=None,
+ required=True,
+ help="output file for coverage profile",
+ type=str,
+ action="store")
return parser.parse_args()
+def get_header(f):
+ aln_header = ". : . : . : . : . : . :"
+ contig_lead = "******************"
+ aln_start = -1
+ while True:
+ line = f.readline()
+ if not line:
+ return None, None
+ if line[0:18] == contig_lead:
+ line2 = f.readline()
+ else:
+ continue
+ if aln_header in line2:
+ aln_start = line2.index(aln_header)
+ break
+ contig_name = line.split()[1] + line.split()[2]
+ return contig_name, aln_start
+
+
+def segment_start(f):
+ pos = f.tell()
+ line = f.readline()
+ # detect next contig or end of file
+ if "********" in line or line == "":
+ segment = False
+ else:
+ segment = True
+ f.seek(pos)
+ return segment
+
+
+def get_segment(f, seq_start):
+ if not segment_start(f):
+ return None, None
+ aln = []
+ while True:
+ line = f.readline()
+ if ". : . :" in line:
+ continue
+ if "__________" in line:
+ consensus = f.readline().rstrip('\n')[seq_start:]
+ f.readline() # empty line
+ break
+ else:
+ aln.append(line.rstrip('\n')[seq_start:])
+ return aln, consensus
+
+
+def aln2coverage(aln):
+ coverage = [0] * len(aln[0])
+ for a in aln:
+ for i, c in enumerate(a):
+ if c not in " -":
+ coverage[i] += 1
+ return coverage
+
+
+def read_contig(f, seq_start):
+ contig = ""
+ coverage = []
+ while True:
+ aln, consensus = get_segment(f, seq_start)
+ if aln:
+ contig += consensus
+ coverage += aln2coverage(aln)
+ else:
+ break
+ return contig, coverage
+
+def remove_gaps(consensus, coverage):
+ if "-" not in consensus:
+ return consensus, coverage
+ new_coverage = [cov for cons, cov in zip(consensus, coverage)
+ if cons != "-"]
+ new_consensus = consensus.replace("-", "")
+ return new_consensus, new_coverage
+
+def main():
+ args = parse_args()
+ with open(args.aln_file, 'r') as f1, open(args.fasta, 'w') as ffasta, open(args.profile, 'w') as fprofile:
+ while True:
+ contig_name, seq_start = get_header(f1)
+ if contig_name:
+ consensus, coverage = remove_gaps(*read_contig(f1, seq_start))
+ ffasta.write(">{}\n".format(contig_name))
+ ffasta.write("{}\n".format(consensus))
+ fprofile.write(">{}\n".format(contig_name))
+ fprofile.write("{}\n".format(" ".join([str(i) for i in coverage])))
+ else:
+ break
+
+
if __name__ == "__main__":
- args = parse_args()
- print(args.profile)
-
-
+ main()
diff -r a6c55d1bdb6c -r 3151a72a6671 test-data/GEPY_test_long_1_output_unfiltered.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/GEPY_test_long_1_output_unfiltered.gff3 Tue Sep 03 05:20:02 2019 -0400
@@ -0,0 +1,26 @@
+##gff-version 3
+##-----------------------------------------------
+##PIPELINE VERSION : iter_search_optional-rv-3168(0b80fa0)
+##PROTEIN DATABASE VERSION : Viridiplantae_v3.0_pdb
+##-----------------------------------------------
+scaffold146.1|size86774 dante protein_domain 976 1289 293 + . Name=RH;Final_Classification=Class_I|LTR|Ty1/copia|Bianca;Region_Hits_Classifications=RH|Class_I|LTR|Ty1/copia|Bianca;Best_Hit=Ty1-RH__REXdb_ID2558|Class_I|LTR|Ty1/copia|Bianca:976-1289[100percent];Best_Hit_DB_Pos=26:134of134;DB_Seq=ISWRSVKQTITATSSNHAELLALHEASRECVWLRSMIQHIQKNCG-LSSGRMDATIIYEDNTACIAQLKEGYIKGDRTKHISPKFF-FTHDLQKDGDISIQQIRSCDNLAD;Region_Seq=ISWRSTKQTIVAISSNHVELLAIHDTSRECVWLRFMIESI\IMXXXXXXXXXXXXXXXXXXQLKE*YIKCDRTKHISPKFF\FTQDLQKNGDVIIQQIRSNDNVVD;Query_Seq=ISWRSTKQTIVAISSNHVELLAIHDTSRECVWLRFMIESI-----\IMXXXXXXXXXXXXXXXXXXQLKE*YIKCDRTKHISPKFF\FTQDLQKNGDVIIQQIRSNDNVVD;Identity=0.59;Similarity=0.66;Relat_Length=0.813;Relat_Interruptions=1.5;Hit_to_DB_Length=0.83
+scaffold146.1|size86774 dante protein_domain 6810 7049 153 + . Name=PROT;Final_Classification=Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Tat|Retand;Region_Hits_Classifications=PROT|Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Tat|Retand;Best_Hit=Ty3-PROT__REXdb_ID9702|Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Tat|Retand:6810-7049[100percent];Best_Hit_DB_Pos=1:80of80;DB_Seq=LVDDGSKVNLLPYRVFQQMGIPEEQLVRDQAPVKGIGGVPVLVEGKVKLALTLGEAPRTRTHYAVFLVVKPPLSYNAILG;Region_Seq=LVDSGASCNLMSKRVMKQMGIPDEKLEFLDATLYAFDRRTIIPAGKIQLPVTLGEEERTRSEMVEFIIVDMDLAYNAILG;Query_Seq=LVDSGASCNLMSKRVMKQMGIPDEKLEFLDATLYAFDRRTIIPAGKIQLPVTLGEEERTRSEMVEFIIVDMDLAYNAILG;Identity=0.44;Similarity=0.62;Relat_Length=1.0;Relat_Interruptions=0.0;Hit_to_DB_Length=1.0
+scaffold146.1|size86774 dante protein_domain 7656 8296 . + . Name=RT/INT;Final_Classification=Ambiguous_domain;Region_Hits_Classifications_=RT|Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Tat|Retand[246bp],INT|Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Tat|Retand[468bp]
+scaffold146.1|size86774 dante protein_domain 8756 9241 538 + . Name=RT;Final_Classification=Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Tat;Region_Hits_Classifications=RT|Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Tat|Retand[486bp],RT|Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Tat|Ogre[441bp];Best_Hit=Ty3-RT__REXdb_ID8210|Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Tat|Retand:8801-9241[90percent];Best_Hit_DB_Pos=27:173of173;DB_Seq=DFTDLNKACPKDSFPLPHIDRLVDSTAGNELLTFMDAFSGYNQIMMNPEDQEKTSFITDRGIYCYKVMPFGLKNAGATYQRLVNKMFHNHLGKTMEVYIDDMLVKSLKKEDHVKHLEECFDILNKYQMKLNPAKCTFGVPSGEFLGY;Region_Seq=TSIATASGGRTSDGADFKGVNKHCQPDPFPLPHIDRLVDAVAGSSLLSTMDAYSGYHQISLAREDQAKSSFLTEDGVFCYVVMPFGLRNAGATYQRLVNKIFADLLGKEMEIYVDDMIVKSLNDEDHIIYLSHCFEVCRTHRLKLNPAKCCFGVRSGKFLGY;Query_Seq=DFKGVNKHCQPDPFPLPHIDRLVDAVAGSSLLSTMDAYSGYHQISLAREDQAKSSFLTEDGVFCYVVMPFGLRNAGATYQRLVNKIFADLLGKEMEIYVDDMIVKSLNDEDHIIYLSHCFEVCRTHRLKLNPAKCCFGVRSGKFLGY;Identity=0.63;Similarity=0.8;Relat_Length=0.85;Relat_Interruptions=0.0;Hit_to_DB_Length=0.85
+scaffold146.1|size86774 dante protein_domain 9434 9781 343 + . Name=RH;Final_Classification=Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Tat|Retand;Region_Hits_Classifications=RH|Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Tat|Retand;Best_Hit=Ty3-RH__REXdb_ID9729|Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Tat|Retand:9434-9772[97percent];Best_Hit_DB_Pos=1:113of149;DB_Seq=WTEECEEAFQKLKEYLGSPHLLVKPIQGEPLFLYLAVSEHATSSVLVREDDGVQRPIYYTSRALVDAETRYLSLEKIVLALIVSARRLRPYFQAHTIIVLTDQPIRQVLAKPD;Region_Seq=WTDQCDRAFKELKTYLASPPLIVSPTPTETLGLYLAVSEHAVSSVLVAERDGVQHPVYYVSHTLLPAESRYSTVEKFVLALLKSVAKLRHYFESRKVIVYTDQPIKAVLGQSDHTS;Query_Seq=WTDQCDRAFKELKTYLASPPLIVSPTPTETLGLYLAVSEHAVSSVLVAERDGVQHPVYYVSHTLLPAESRYSTVEKFVLALLKSVAKLRHYFESRKVIVYTDQPIKAVLGQSD;Identity=0.58;Similarity=0.73;Relat_Length=0.758;Relat_Interruptions=0.0;Hit_to_DB_Length=0.76
+scaffold146.1|size86774 dante protein_domain 10810 11667 747 + . Name=INT;Final_Classification=Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Tat|Retand;Region_Hits_Classifications=INT|Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Tat|Retand;Best_Hit=Ty3-INT__REXdb_ID9633|Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Tat|Retand:10819-11667[98percent];Best_Hit_DB_Pos=30:310of310;DB_Seq=RDTHQYVQRCIQCQKFAPLIHKPGEEMTIMSAPCPFAQWGIDLVGPFPQTAGRKKFFIVAVDYFTKWVEAEALSKITEDEVMHFIWKYICCRFGLPRSLVSDNGTQFNGKKIRAWCEEMKITQKFVAVAHPQANGQVESTNRTIVNGLKKRIDELGGSWVDELPSVLWSYRTSAKAATGETPFRLTYGTEAVIPVEVAMDTLRIATF--DEEANDGALRTRLDEIFDLREAAYLHMERSKNLIKARYDQGVRSRSFQIGDLILRRADALKHTGKLEANWEGPY;Region_Seq=SVLRDAMDCVRRCQSCQYFAPINRKPGAEITLTELPCPFDRWGIDILGPFPQSVRQRRFCIVAVEYHSKWIEAEAVASITSEAVKKFVMNNIIVRFGCPRVLVSDNGPQFISDKFATFCEEYGIQQRTSSVYHPQTNGQAEASNKIILHGLRRNLDSLGGSWPDQLPHVLWAYRTTPKSSTGETPFSLVYGSEAVAPVESTIITPRIAAYMHTESANTEFRELDLDLLEERRNEVYGRVRKQQRALRKRYNQRVRPRQFEKGDLILRSVESQGHKGKLDRAWEGPY;Query_Seq=RDAMDCVRRCQSCQYFAPINRKPGAEITLTELPCPFDRWGIDILGPFPQSVRQRRFCIVAVEYHSKWIEAEAVASITSEAVKKFVMNNIIVRFGCPRVLVSDNGPQFISDKFATFCEEYGIQQRTSSVYHPQTNGQAEASNKIILHGLRRNLDSLGGSWPDQLPHVLWAYRTTPKSSTGETPFSLVYGSEAVAPVESTIITPRIAAYMHTESANTEFRELDLDLLEERRNEVYGRVRKQQRALRKRYNQRVRPRQFEKGDLILRSVESQGHKGKLDRAWEGPY;Identity=0.49;Similarity=0.66;Relat_Length=0.906;Relat_Interruptions=0.0;Hit_to_DB_Length=0.91
+scaffold146.1|size86774 dante protein_domain 14592 14828 289 + . Name=PROT;Final_Classification=Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Athila;Region_Hits_Classifications=PROT|Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Athila;Best_Hit=Ty3-PROT__REXdb_ID6659|Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Athila:14592-14828[100percent];Best_Hit_DB_Pos=1:80of80;DB_Seq=MLDLGASINVMPYSIYNSLNLGPMEETCIIIQLADRSNAYPKGVMEDVLVQVNELVFPADFYILKMEDELSPNPTPILLG;Region_Seq=MVDLGASINLMPYSIYSALQLGPLQGTAIVIKLADRSNTHPEGVIEDVLVQVNNLVFPADFYVLKMGKAENNDCPLLLG;Query_Seq=MVDLGASINLMPYSIYSALQLGPLQGTAIVIKLADRSNTHPEGVIEDVLVQVNNLVFPADFYVLKM-GKAENNDCPLLLG;Identity=0.68;Similarity=0.84;Relat_Length=1.0;Relat_Interruptions=0.0;Hit_to_DB_Length=1.0
+scaffold146.1|size86774 dante protein_domain 15420 15995 871 + . Name=RT;Final_Classification=Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Athila;Region_Hits_Classifications=RT|Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Athila;Best_Hit=Ty3-RT__REXdb_ID6635|Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Athila:15420-15995[100percent];Best_Hit_DB_Pos=1:192of192;DB_Seq=IYPITDSKWVAPIHVVPKKTGITLVKNKNDELIPTRISSGWRMCVDYRKLNLATRKDHFPLPFMDQMLERLAGKSFYCFLDGYSGYNQIVINPEDQEKTTFTCPFGTYAYRRMPFGLCNAPATFQRCMMSIFSDYVERIIEVFMDDFTVYGDSFDKCLENLSLILKRCIETNLVLNYEKCYFMVEQGIVLGH;Region_Seq=IYAISDSDWVSPVHVVPKKTGFTVERNKNGELVPKRVTNGWRVCIDYRKLNDATRKDHFPLPFIDQMLERLAGKKFYCFLDGYSGYNQVAIAPEDQEKTTFTCTYGTYAFRKMPFGLCNAPATFQRCMLSIFSEFTGKFIEVFMDDFTVYGDSFEGALENLEKVLQRCVEKKLVLNSEKCHFMVRQGIVLGH;Query_Seq=IYAISDSDWVSPVHVVPKKTGFTVERNKNGELVPKRVTNGWRVCIDYRKLNDATRKDHFPLPFIDQMLERLAGKKFYCFLDGYSGYNQVAIAPEDQEKTTFTCTYGTYAFRKMPFGLCNAPATFQRCMLSIFSEFTGKFIEVFMDDFTVYGDSFEGALENLEKVLQRCVEKKLVLNSEKCHFMVRQGIVLGH;Identity=0.76;Similarity=0.88;Relat_Length=1.0;Relat_Interruptions=0.0;Hit_to_DB_Length=1.0
+scaffold146.1|size86774 dante protein_domain 16188 16634 623 + . Name=RH;Final_Classification=Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Athila;Region_Hits_Classifications=RH|Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Athila;Best_Hit=Ty3-RH__REXdb_ID6648|Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Athila:16188-16634[100percent];Best_Hit_DB_Pos=1:149of149;DB_Seq=FNEACKVAFDKLKELLTSAPIIQPPDWSLPFEIMCDASNYVVGAVLGQRVGRAAHVIYYTSRTLDSAQCNYSTTEKELLAIVFALEKFRSYLLGTKVIIFSDHAALRYLLAKKEAKPRLIRWILLLQEFNLEIRDKKGTENLVADHLSR;Region_Seq=FNQECQEAFNKLKSLLTAAPIIQPPNWELPFELMCDASNYALGAVLGQKIEGKRHVIYYASKTLSEAQIHYTTTEKELLAIVYALEKFRSYLLGTKITVHSDHAALRHLLSKKESKPRLIRWILLLQEFDLEIKDRAGTENAVADNLSR;Query_Seq=FNQECQEAFNKLKSLLTAAPIIQPPNWELPFELMCDASNYALGAVLGQKIEGKRHVIYYASKTLSEAQIHYTTTEKELLAIVYALEKFRSYLLGTKITVHSDHAALRHLLSKKESKPRLIRWILLLQEFDLEIKDRAGTENAVADNLSR;Identity=0.74;Similarity=0.87;Relat_Length=1.0;Relat_Interruptions=0.0;Hit_to_DB_Length=1.0
+scaffold146.1|size86774 dante protein_domain 24522 24659 149 + . Name=PROT;Final_Classification=Class_I|LTR|Ty1/copia|Bianca;Region_Hits_Classifications=PROT|Class_I|LTR|Ty1/copia|Bianca;Best_Hit=Ty1-PROT__REXdb_ID2599|Class_I|LTR|Ty1/copia|Bianca:24531-24659[93percent];Best_Hit_DB_Pos=29:71of71;DB_Seq=STISGTTNLVEGSGRANIMLPNGTRFHINDALYSSKSRRNLLS;Region_Seq=IKASTIVCEANIVEGSGRATVVLPSGTHIRIDDALYANKSRRNLLS;Query_Seq=STIVCEANIVEGSGRATVVLPSGTHIRIDDALYANKSRRNLLS;Identity=0.65;Similarity=0.77;Relat_Length=0.606;Relat_Interruptions=0.0;Hit_to_DB_Length=0.61
+scaffold146.1|size86774 dante protein_domain 24873 25481 913 + . Name=INT;Final_Classification=Class_I|LTR|Ty1/copia|Bianca;Region_Hits_Classifications=INT|Class_I|LTR|Ty1/copia|Bianca;Best_Hit=Ty1-INT__REXdb_ID2558|Class_I|LTR|Ty1/copia|Bianca:24873-25481[100percent];Best_Hit_DB_Pos=1:203of203;DB_Seq=HERLGHPGSIMMRKIIEHSCGHQLKSREILQSNKFSCTSCSQGKLITRPSPTKIGSESLNFLERIHGDICGPIHPPCGPFRYFMVLIDASTRWSHVCLLSTRNQAFARLLAQLIRIRAHFPDYPVKKIRLDNAAEFSSQTFNDYCMSIGIDIEHPVAHVHTQNGLAESFIKRIQLIARPLLMRCKLPISTWGHAILHAATLIR;Region_Seq=HDRLGHPGMIMMRKIIRTTSGHSLKNREILHPREYICTACAQGKLITRPSPVKIMNERITFLERIQGDICGPIHPACGPFRYFIVLIDASSRWSHVSLLSTRNHAFARLLSQIIRLRAHFPDYPVKKIRLDNAAEFTSRTFNNYCLAMGIDVEHPVEYVHTQNGLAESLIKRLQLIARPLLMKSKLPVTCWGHAIIHASSLIR;Query_Seq=HDRLGHPGMIMMRKIIRTTSGHSLKNREILHPREYICTACAQGKLITRPSPVKIMNERITFLERIQGDICGPIHPACGPFRYFIVLIDASSRWSHVSLLSTRNHAFARLLSQIIRLRAHFPDYPVKKIRLDNAAEFTSRTFNNYCLAMGIDVEHPVEYVHTQNGLAESLIKRLQLIARPLLMKSKLPVTCWGHAIIHASSLIR;Identity=0.75;Similarity=0.9;Relat_Length=1.0;Relat_Interruptions=0.0;Hit_to_DB_Length=1.0
+scaffold146.1|size86774 dante protein_domain 26313 27071 1060 + . Name=RT;Final_Classification=Class_I|LTR|Ty1/copia|Bianca;Region_Hits_Classifications=RT|Class_I|LTR|Ty1/copia|Bianca;Best_Hit=Ty1-RT__REXdb_ID2558|Class_I|LTR|Ty1/copia|Bianca:26322-27032[93percent];Best_Hit_DB_Pos=1:237of262;DB_Seq=WKDAIKAELYSLNKRKVFGPVVRTPKGVKPVGYKWVFVRKRNENGEIARYKARLVAQGFSQRPGIDFNETYSPVVDATTFRYLISLIAYEGLNLHMMDVVTAYLYGSLDSDIYMKIPEGFNLPDTNSSGSREDYSIKLNKSLYGLKQSGRMWYNRLSEYLLKEGYKNDSVCPCIFMKRSENEFAIIAVYVDDINIIGTPEELPKAIDCLKKEFEMKDLGKTKFCLGLQIEHLNNGIF;Region_Seq=WPKWKDAIESELKSLNKRDVFGPVVRTPEGVQPVGYKWVFVRKRNDKGEISRYKARLVAQGFSQRPGIDYDETYSPVMDATTFRFLISLAIEYGLDLQLMDVVTAYLYGSLDCEIYMKIPEGFHMPERYSSEPRTDYAIKLNKSLYGLKQSGRMWYNRLSEYLIKEGYKNNLVCPCVFMKKFENEFVIIAVYVDDINIVGTQKALLDAVNCLKREFEMKDLGRTKYCLGLQIEYLKNGIFRTDYAIKLNKSLY;Query_Seq=WKDAIESELKSLNKRDVFGPVVRTPEGVQPVGYKWVFVRKRNDKGEISRYKARLVAQGFSQRPGIDYDETYSPVMDATTFRFLISLAIEYGLDLQLMDVVTAYLYGSLDCEIYMKIPEGFHMPERYSSEPRTDYAIKLNKSLYGLKQSGRMWYNRLSEYLIKEGYKNNLVCPCVFMKKFENEFVIIAVYVDDINIVGTQKALLDAVNCLKREFEMKDLGRTKYCLGLQIEYLKNGIF;Identity=0.78;Similarity=0.91;Relat_Length=0.905;Relat_Interruptions=0.0;Hit_to_DB_Length=0.9
+scaffold146.1|size86774 dante protein_domain 27723 28124 581 + . Name=RH;Final_Classification=Class_I|LTR|Ty1/copia|Bianca;Region_Hits_Classifications=RH|Class_I|LTR|Ty1/copia|Bianca;Best_Hit=Ty1-RH__REXdb_ID2558|Class_I|LTR|Ty1/copia|Bianca:27723-28124[100percent];Best_Hit_DB_Pos=1:134of134;DB_Seq=DAGYLSDPHHGRSQTGYLFTSGNTAISWRSVKQTITATSSNHAELLALHEASRECVWLRSMIQHIQKNCGLSSGRMDATIIYEDNTACIAQLKEGYIKGDRTKHISPKFFFTHDLQKDGDISIQQIRSCDNLAD;Region_Seq=DAGYRSDPHNGRSQTGYVFLNKGAAISWRSTKQTIAATSSNHAELLAIHETSRECVWLRSMIESIYNACGLFTDKMPPTVLYEDNSACIIQLKEGYIKGDRTKHISPKFFFTHDLQKNGEVIIQQIRSSDNVAD;Query_Seq=DAGYRSDPHNGRSQTGYVFLNKGAAISWRSTKQTIAATSSNHAELLAIHETSRECVWLRSMIESIYNACGLFTDKMPPTVLYEDNSACIIQLKEGYIKGDRTKHISPKFFFTHDLQKNGEVIIQQIRSSDNVAD;Identity=0.75;Similarity=0.84;Relat_Length=1.0;Relat_Interruptions=0.0;Hit_to_DB_Length=1.0
+scaffold146.1|size86774 dante protein_domain 9783 9956 178 - . Name=INT;Final_Classification=Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Tat|Retand;Region_Hits_Classifications=INT|Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Tat|Retand;Best_Hit=Ty3-INT__REXdb_ID9635|Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Tat|Retand:9783-9956[100percent];Best_Hit_DB_Pos=1:58of310;DB_Seq=HRGGCGEHGGARALIQKLHRAGYYWPGMKRDTHQYVQRCIQCQKFAPLIHKPGEEMTI;Region_Seq=HSGLCGNHPGARSLALRIQRAGYYWPTLLRDAMDCVRRCQSCQYFAPINRKPGAEITL;Query_Seq=HSGLCGNHPGARSLALRIQRAGYYWPTLLRDAMDCVRRCQSCQYFAPINRKPGAEITL;Identity=0.53;Similarity=0.69;Relat_Length=0.187;Relat_Interruptions=0.0;Hit_to_DB_Length=0.19
+scaffold146.1|size86774 dante protein_domain 10299 10658 303 - . Name=aRH;Final_Classification=Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Tat;Region_Hits_Classifications=aRH|Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Tat|TatII[360bp],aRH|Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Tat|Ogre[360bp],aRH|Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Tat|Retand[360bp];Best_Hit=Ty3-aRH__REXdb_ID9546|Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Tat|Retand:10299-10658[100percent];Best_Hit_DB_Pos=1:121of121;DB_Seq=WILHVDGASSKQGSGIGIRLQSPYGEVIEQSFCLAFNASNNEAEYESLLAGLRLAVGIGVTKLRAFCNSQLVANQFSGDYEAKDSRMEAYLAQVQELSKKFLSFELARIPRSENSAADSLA;Region_Seq=WNMYIDGSTQSGAGVGVHYITPYGDWINLAVKLQFPATNNVAEYEALLAGMNFALSLGVTRLKTFSDSQLVVEQFSGHFQAKEPMLEAYKSRSQLLAAKFSEFSLEHIPRESNRAADSLA;Query_Seq=WNMYIDG-STQSGAGVGVHYITPYGDWINLAVKLQFPATNNVAEYEALLAGMNFALSLGVTRLKTFSDSQLVVEQFSGHFQAKEPMLEAYKSRSQLLAAKFSEFSLEHIPRESNRAADSLA;Identity=0.49;Similarity=0.7;Relat_Length=1.0;Relat_Interruptions=0.0;Hit_to_DB_Length=1.0
+scaffold146.1|size86774 dante protein_domain 10701 10817 136 - . Name=RH;Final_Classification=Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Tat;Region_Hits_Classifications=RH|Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Tat|Retand[117bp],RH|Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Tat|Ogre[99bp];Best_Hit=Ty3-RH__REXdb_ID8372|Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Tat|Retand:10701-10817[100percent];Best_Hit_DB_Pos=279:317of317;DB_Seq=NREGTGRVVKWAIELSEFDLHFEPRHAIKSQALADFVVE;Region_Seq=NTDHTSRLAKWAIKVSAMDIAFEPRKAIKGQALADFVVE;Query_Seq=NTDHTSRLAKWAIKVSAMDIAFEPRKAIKGQALADFVVE;Identity=0.64;Similarity=0.77;Relat_Length=0.123;Relat_Interruptions=0.0;Hit_to_DB_Length=0.12
+scaffold146.1|size86774 dante protein_domain 16797 17666 1057 - . Name=INT;Final_Classification=Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Athila;Region_Hits_Classifications=INT|Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Athila;Best_Hit=Ty3-INT__REXdb_ID6633|Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Athila:16812-17666[98percent];Best_Hit_DB_Pos=1:285of313;DB_Seq=HSHSYGGHFGAKRTAHKVLESGFYWPSIFKDAYHFCKSCEKCQRTGNITHKNQMPLTNILVSEIFDVWGIDFMGPFPSSFGNLYILLVVDYVSKWIEAKATRTNDAKVVLDFVRTHIFNRFGIPKAIISDRGTHFCNRSMEALLRKYHVTHRTSTAYHPQTNGQAEISNREIKSILEKIVQPNRRDWSLRLGDALWAYRTAYKSPIGMSPYRMIYGKACHLPVELEHKAFWAIKQCNMDYDAAGIARKLQLQELEEIRNDAYENARIYKEKTKNLHDRMLTRKEF;Region_Seq=HASDYGGHFGPNRTARRILDVGFYWPSIFRDVYQFCRTCDACQRVGNITNRREMPQNYILANEIFDIWGLDFMGPFPQSQGNNYILVAVDYVSKWVEAIPTRTDDGKTVTEFLRKNIFTRYGVPKAIISDRGTHFCNSTMRAMMKKYNVIHKTTTAYHPQGNGQAEATNREIKSILEKVVNKKRSNWSQKLPDALWAYRTAYKTPIGTTPFRLIYGKHCNLPVGLEHKAYWAIREMNFEEGGDAELRQMQLQELDALRLEAYDNSRIYKERLKTYHDKKLLQQNFRERLS;Query_Seq=HASDYGGHFGPNRTARRILDVGFYWPSIFRDVYQFCRTCDACQRVGNITNRREMPQNYILANEIFDIWGLDFMGPFPQSQGNNYILVAVDYVSKWVEAIPTRTDDGKTVTEFLRKNIFTRYGVPKAIISDRGTHFCNSTMRAMMKKYNVIHKTTTAYHPQGNGQAEATNREIKSILEKVVNKKRSNWSQKLPDALWAYRTAYKTPIGTTPFRLIYGKHCNLPVGLEHKAYWAIREMNFEEGGDAELRQMQLQELDALRLEAYDNSRIYKERLKTYHDKKLLQQNF;Identity=0.61;Similarity=0.79;Relat_Length=0.911;Relat_Interruptions=0.0;Hit_to_DB_Length=0.91
+scaffold146.1|size86774 dante protein_domain 18554 18811 306 - . Name=INT;Final_Classification=Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Athila;Region_Hits_Classifications=INT|Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Athila;Best_Hit=Ty3-INT__REXdb_ID6693|Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Athila:18554-18802[96percent];Best_Hit_DB_Pos=231:313of313;DB_Seq=WALRLLNFDNNACGEKRKLQLQELEEMRLNAYESSRIYKERTKAYHDKKLQRREFQPGQQVLLFNSRLRLFPGKLKSKWSGPF;Region_Seq=QGNWAIREMNFEEGGDAELRQMQLQELDALRLEAYDNSRIYKERLKAYHDKKILQQNFREGQQVLLFNSKLRLFPGKLKSRWMGPF;Query_Seq=WAIREMNFEEGGDAELRQMQLQELDALRLEAYDNSRIYKERLKAYHDKKILQQNFREGQQVLLFNSKLRLFPGKLKSRWMGPF;Identity=0.65;Similarity=0.82;Relat_Length=0.265;Relat_Interruptions=0.0;Hit_to_DB_Length=0.27
+scaffold146.1|size86774 dante protein_domain 19158 19478 197 - . Name=INT;Final_Classification=Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Athila;Region_Hits_Classifications=INT|Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Athila;Best_Hit=Ty3-INT__REXdb_ID6659|Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Athila:19182-19448[83percent];Best_Hit_DB_Pos=216:304of314;DB_Seq=YGKPCHLPVELEHKAWWAVKQCNMELDVAGQHRxLQLQELEEIRNDAYESSxIYKEKTKAFHDKQILRKNFEVGQKVLIFHSRLKLFPG;Region_Seq=PRGTISIGLNFGKQCKVLVGMEHENYWEIREMNYEEGADVEQKQMQLQKMDALKLEAYDNSRIDKEKLKAHHAKRILQQNCKKRQQVLIFDSKLKMFPGIPRWMEPF;Query_Seq=FGKQCKVLVGMEHENYWEIREMNYEEGADVEQKQMQLQKMDALKLEAYDNSRIDKEKLKAHHAKRILQQNCKKRQQVLIFDSKLKMFPG;Identity=0.42;Similarity=0.71;Relat_Length=0.283;Relat_Interruptions=0.0;Hit_to_DB_Length=0.28
+scaffold146.1|size86774 dante protein_domain 19976 20212 259 - . Name=PROT;Final_Classification=Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Athila;Region_Hits_Classifications=PROT|Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Athila;Best_Hit=Ty3-PROT__REXdb_ID6659|Class_I|LTR|Ty3/gypsy|non-chromovirus|OTA|Athila:19976-20212[100percent];Best_Hit_DB_Pos=1:80of80;DB_Seq=MLDLGASINVMPYSIYNSLNLGPMEETCIIIQLADRSNAYPKGVMEDVLVQVNELVFPADFYILKMEDELSPNPTPILLG;Region_Seq=MVDLGASINLMPYYIYSALKLGSLQGTAIIIKLADRSETHPEGVVKDVLAQVNNLVFPADFYVLKMGEAENDDCPLLLG;Query_Seq=MVDLGASINLMPYYIYSALKLGSLQGTAIIIKLADRSETHPEGVVKDVLAQVNNLVFPADFYVLKM-GEAENDDCPLLLG;Identity=0.62;Similarity=0.79;Relat_Length=1.0;Relat_Interruptions=0.0;Hit_to_DB_Length=1.0
+scaffold146.1|size86774 dante protein_domain 28912 29124 216 - . Name=PROT;Final_Classification=Class_I|LTR|Ty1/copia|Bianca;Region_Hits_Classifications=PROT|Class_I|LTR|Ty1/copia|Bianca;Best_Hit=Ty1-PROT__REXdb_ID2599|Class_I|LTR|Ty1/copia|Bianca:28912-29124[100percent];Best_Hit_DB_Pos=1:71of71;DB_Seq=CLADCATTHTILRDKRYFLELTLIKANVSTISGTTNLVEGSGRANIMLPNGTRFHINDALYSSKSRRNLLS;Region_Seq=CLVDSATTHTILKNMRYFTSFEKRDVNIATIVCEANIVEGSGRAVIVLPSGTHIRIDDALYANKSRRNLLS;Query_Seq=CLVDSATTHTILKNMRYFTSFEKRDVNIATIVCEANIVEGSGRAVIVLPSGTHIRIDDALYANKSRRNLLS;Identity=0.59;Similarity=0.7;Relat_Length=1.0;Relat_Interruptions=0.0;Hit_to_DB_Length=1.0