Mercurial > repos > artbio > repenrich
comparison RepEnrich.py @ 13:530626b0757c draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
author | artbio |
---|---|
date | Tue, 02 Apr 2024 21:16:37 +0000 |
parents | 89e05f831259 |
children | bf866bedd4b4 |
comparison
equal
deleted
inserted
replaced
12:89e05f831259 | 13:530626b0757c |
---|---|
2 import csv | 2 import csv |
3 import os | 3 import os |
4 import shlex | 4 import shlex |
5 import subprocess | 5 import subprocess |
6 import sys | 6 import sys |
7 | 7 from collections import defaultdict |
8 import numpy | 8 from concurrent.futures import ProcessPoolExecutor |
9 | 9 |
10 | 10 |
11 parser = argparse.ArgumentParser(description=''' | 11 parser = argparse.ArgumentParser(description=''' |
12 Repenrich aligns reads to Repeat Elements pseudogenomes\ | 12 Repenrich aligns reads to Repeat Elements pseudogenomes\ |
13 and counts aligned reads. RepEnrich_setup must be run\ | 13 and counts aligned reads. RepEnrich_setup must be run\ |
15 parser.add_argument('--annotation_file', action='store', | 15 parser.add_argument('--annotation_file', action='store', |
16 metavar='annotation_file', | 16 metavar='annotation_file', |
17 help='RepeatMasker.org annotation file for your\ | 17 help='RepeatMasker.org annotation file for your\ |
18 organism. The file may be downloaded from\ | 18 organism. The file may be downloaded from\ |
19 RepeatMasker.org. E.g. hg19_repeatmasker.txt') | 19 RepeatMasker.org. E.g. hg19_repeatmasker.txt') |
20 parser.add_argument('--outputfolder', action='store', metavar='outputfolder', | 20 parser.add_argument('--alignment_bam', action='store', metavar='alignment_bam', |
21 help='Folder that will contain results. Should be the\ | 21 help='Bam alignments of unique mapper reads.') |
22 same as the one used for RepEnrich_setup.\ | |
23 Example: ./outputfolder') | |
24 parser.add_argument('--outputprefix', action='store', metavar='outputprefix', | |
25 help='Prefix name for Repenrich output files.') | |
26 parser.add_argument('--setup_folder', action='store', metavar='setup_folder', | |
27 help='Folder produced by RepEnrich_setup which contains\ | |
28 repeat element pseudogenomes.') | |
29 parser.add_argument('--fastqfile', action='store', metavar='fastqfile', | 22 parser.add_argument('--fastqfile', action='store', metavar='fastqfile', |
30 help='File of fastq reads mapping to multiple\ | 23 help='File of fastq reads mapping to multiple\ |
31 locations. Example: /data/multimap.fastq') | 24 locations. Example: /data/multimap.fastq') |
32 parser.add_argument('--alignment_bam', action='store', metavar='alignment_bam', | |
33 help='Bam alignments of unique mapper reads.') | |
34 parser.add_argument('--pairedend', action='store', dest='pairedend', | |
35 default='FALSE', | |
36 help='Change to TRUE for paired-end fastq files.\ | |
37 Default FALSE') | |
38 parser.add_argument('--fastqfile2', action='store', dest='fastqfile2', | 25 parser.add_argument('--fastqfile2', action='store', dest='fastqfile2', |
39 metavar='fastqfile2', default='none', | 26 metavar='fastqfile2', default='', |
40 help='fastqfile #2 when using paired-end option.\ | 27 help='fastqfile #2 when using paired-end option.\ |
41 Default none') | 28 Default none') |
42 parser.add_argument('--cpus', action='store', dest='cpus', metavar='cpus', | 29 parser.add_argument('--cpus', action='store', dest='cpus', metavar='cpus', |
43 default="1", type=int, | 30 default="1", type=int, |
44 help='Number of CPUs. The more cpus the\ | 31 help='Number of CPUs. The more cpus the\ |
46 | 33 |
47 args = parser.parse_args() | 34 args = parser.parse_args() |
48 | 35 |
49 # parameters | 36 # parameters |
50 annotation_file = args.annotation_file | 37 annotation_file = args.annotation_file |
51 outputfolder = args.outputfolder | |
52 outputfile_prefix = args.outputprefix | |
53 setup_folder = args.setup_folder | |
54 repeat_bed = os.path.join(setup_folder, 'repnames.bed') | |
55 unique_mapper_bam = args.alignment_bam | 38 unique_mapper_bam = args.alignment_bam |
56 fastqfile_1 = args.fastqfile | 39 fastqfile_1 = args.fastqfile |
57 fastqfile_2 = args.fastqfile2 | 40 fastqfile_2 = args.fastqfile2 |
58 cpus = args.cpus | 41 cpus = args.cpus |
59 b_opt = "-k1 -p 1 --quiet" | |
60 # Change if simple repeats are differently annotated in your organism | 42 # Change if simple repeats are differently annotated in your organism |
61 simple_repeat = "Simple_repeat" | 43 simple_repeat = "Simple_repeat" |
62 paired_end = args.pairedend | 44 if args.fastqfile2: |
45 paired_end = True | |
46 else: | |
47 paired_end = False | |
63 | 48 |
64 # check that the programs we need are available | 49 # check that the programs we need are available |
65 try: | 50 try: |
66 subprocess.call(shlex.split("coverageBed -h"), | 51 subprocess.call(shlex.split("coverageBed -h"), |
67 stdout=open(os.devnull, 'wb'), | 52 stdout=open(os.devnull, 'wb'), |
71 stderr=open(os.devnull, 'wb')) | 56 stderr=open(os.devnull, 'wb')) |
72 except OSError: | 57 except OSError: |
73 print("Error: Bowtie or bedtools not loaded") | 58 print("Error: Bowtie or bedtools not loaded") |
74 raise | 59 raise |
75 | 60 |
76 # define a csv reader that reads space deliminated files | 61 |
77 print('Preparing for analysis using RepEnrich...') | 62 def starts_with_numerical(list): |
78 csv.field_size_limit(sys.maxsize) | 63 try: |
79 | 64 if len(list) == 0: |
80 | 65 return False |
66 int(list[0]) | |
67 return True | |
68 except ValueError: | |
69 return False | |
70 | |
71 | |
72 # define a text importer for .out/.txt format of repbase | |
81 def import_text(filename, separator): | 73 def import_text(filename, separator): |
82 for line in csv.reader(open(filename), delimiter=separator, | 74 csv.field_size_limit(sys.maxsize) |
83 skipinitialspace=True): | 75 file = csv.reader(open(filename), delimiter=separator, |
84 if line: | 76 skipinitialspace=True) |
85 yield line | 77 return [line for line in file if starts_with_numerical(line)] |
86 | 78 |
87 | 79 |
88 # build dictionaries to convert repclass and rep families | 80 def run_bowtie(args): |
89 repeatclass, repeatfamily = {}, {} | 81 metagenome, fastqfile = args |
82 b_opt = "-k 1 -p 1 --quiet" | |
83 command = shlex.split(f"bowtie {b_opt} -x {metagenome} {fastqfile}") | |
84 bowtie_align = subprocess.run(command, check=True, | |
85 capture_output=True, text=True).stdout | |
86 bowtie_align = bowtie_align.rstrip('\r\n').split('\n') | |
87 readlist = [metagenome] | |
88 if paired_end: | |
89 for line in bowtie_align: | |
90 readlist.append(line.split("/")[0]) | |
91 else: | |
92 for line in bowtie_align: | |
93 readlist.append(line.split("\t")[0]) | |
94 return readlist | |
95 | |
96 | |
97 # set a reference repeat list for the script | |
98 repeat_list = [listline[9].translate( | |
99 str.maketrans( | |
100 '()/', '___')) for listline in import_text(annotation_file, ' ')] | |
101 repeat_list = sorted(list(set(repeat_list))) | |
102 | |
103 # unique mapper counting | |
104 cmd = f"bedtools bamtobed -i {unique_mapper_bam} | \ | |
105 bedtools coverage -b stdin -a repnames.bed" | |
106 p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE) | |
107 bedtools_counts = p.communicate()[0].decode().rstrip('\r\n').split('\n') | |
108 | |
109 # parse bedtools output | |
110 counts = defaultdict(int) # key: repeat names, value: unique mapper counts | |
111 sumofrepeatreads = 0 | |
112 for line in bedtools_counts: | |
113 line = line.split('\t') | |
114 counts[line[3]] += int(line[4]) | |
115 sumofrepeatreads += int(line[4]) | |
116 print(f"Identified {sumofrepeatreads} unique reads that mapped to repeats.") | |
117 | |
118 # multimapper parsing | |
119 if not paired_end: | |
120 args_list = [(metagenome, fastqfile_1) for | |
121 metagenome in repeat_list] | |
122 with ProcessPoolExecutor(max_workers=cpus) as executor: | |
123 results = executor.map(run_bowtie, args_list) | |
124 else: | |
125 args_list = [(metagenome, fastqfile_1) for | |
126 metagenome in repeat_list] | |
127 args_list.extend([(metagenome, fastqfile_2) for | |
128 metagenome in repeat_list]) | |
129 with ProcessPoolExecutor(max_workers=cpus) as executor: | |
130 results = executor.map(run_bowtie, args_list) | |
131 | |
132 # Aggregate results (avoiding race conditions) | |
133 metagenome_reads = defaultdict(list) # repeat_name: list of multimap reads | |
134 for result in results: | |
135 metagenome_reads[result[0]] += result[1:] | |
136 | |
137 for name in metagenome_reads: | |
138 # read are only once in list | |
139 metagenome_reads[name] = list(set(metagenome_reads[name])) | |
140 # remove "no read" instances | |
141 metagenome_reads[name] = [read for read in metagenome_reads[name] | |
142 if read != ""] | |
143 | |
144 # implement repeats_by_reads from the inverse dictionnary metagenome_reads | |
145 repeats_by_reads = defaultdict(list) # readids: list of repeats names | |
146 for repname in metagenome_reads: | |
147 for read in metagenome_reads[repname]: | |
148 repeats_by_reads[read].append(repname) | |
149 for repname in repeats_by_reads: | |
150 repeats_by_reads[repname] = list(set(repeats_by_reads[repname])) | |
151 | |
152 # 3 dictionnaries and 1 pointer variable to be populated | |
153 fractionalcounts = defaultdict(float) | |
154 familyfractionalcounts = defaultdict(float) | |
155 classfractionalcounts = defaultdict(float) | |
156 sumofrepeatreads = 0 | |
157 | |
158 # Update counts dictionnary with sets of repeats (was "subfamilies") | |
159 # matched by multimappers | |
160 for repeat_set in repeats_by_reads.values(): | |
161 repeat_set_string = ','.join(repeat_set) | |
162 counts[repeat_set_string] += 1 | |
163 sumofrepeatreads += 1 | |
164 | |
165 print(f'Identified more {sumofrepeatreads} mutimapper repeat reads') | |
166 | |
167 # Populate fractionalcounts | |
168 for key, count in counts.items(): | |
169 key_list = key.split(',') | |
170 for i in key_list: | |
171 fractionalcounts[i] += count / len(key_list) | |
172 | |
173 # build repeat_ref for easy access to rep class and rep families | |
174 repeat_ref = defaultdict(dict) | |
90 repeats = import_text(annotation_file, ' ') | 175 repeats = import_text(annotation_file, ' ') |
91 # skip three first lines of the iterator | |
92 for line in range(3): | |
93 next(repeats) | |
94 for repeat in repeats: | 176 for repeat in repeats: |
95 classfamily = [] | 177 repeat_name = repeat[9].translate(str.maketrans('()/', '___')) |
96 classfamily = repeat[10].split('/') | 178 try: |
97 matching_repeat = repeat[9].translate(str.maketrans('()/', '___')) | 179 repclass = repeat[10].split('/')[0] |
98 repeatclass[matching_repeat] = classfamily[0] | 180 repfamily = repeat[10].split('/')[1] |
99 if len(classfamily) == 2: | 181 except IndexError: |
100 repeatfamily[matching_repeat] = classfamily[1] | 182 repclass, repfamily = repeat[10], repeat[10] |
101 else: | 183 repeat_ref[repeat_name]['class'] = repclass |
102 repeatfamily[matching_repeat] = classfamily[0] | 184 repeat_ref[repeat_name]['family'] = repfamily |
103 | 185 |
104 # build list of repeats initializing dictionaries for downstream analysis' | 186 # Populate classfractionalcounts and familyfractionalcounts |
105 repgenome_path = os.path.join(setup_folder, 'repgenomes_key.txt') | 187 for key, value in fractionalcounts.items(): |
106 reptotalcounts = {line[0]: 0 for line in import_text(repgenome_path, '\t')} | 188 classfractionalcounts[repeat_ref[key]['class']] += value |
107 fractionalcounts = {line[0]: 0 for line in import_text(repgenome_path, '\t')} | 189 familyfractionalcounts[repeat_ref[key]['family']] += value |
108 classtotalcounts = {repeatclass[line[0]]: 0 for line in import_text( | 190 |
109 repgenome_path, '\t') if line[0] in repeatclass} | 191 # print class-, family- and fraction-repeats counts to files |
110 classfractionalcounts = {repeatclass[line[0]]: 0 for line in import_text( | 192 with open("class_fraction_counts.tsv", 'w') as fout: |
111 repgenome_path, '\t') if line[0] in repeatclass} | 193 for key in sorted(classfractionalcounts): |
112 familytotalcounts = {repeatfamily[line[0]]: 0 for line in import_text( | |
113 repgenome_path, '\t') if line[0] in repeatfamily} | |
114 familyfractionalcounts = {repeatfamily[line[0]]: 0 for line in import_text( | |
115 repgenome_path, '\t') if line[0] in repeatfamily} | |
116 reptotalcounts_simple = {(simple_repeat if line[0] in repeatfamily and | |
117 repeatfamily[line[0]] == simple_repeat else | |
118 line[0]): 0 for line in import_text( | |
119 repgenome_path, '\t')} | |
120 repeat_key = {line[0]: int(line[1]) for line in import_text( | |
121 repgenome_path, '\t')} | |
122 rev_repeat_key = {int(line[1]): line[0] for line in import_text( | |
123 repgenome_path, '\t')} | |
124 repeat_list = [line[0] for line in import_text(repgenome_path, '\t')] | |
125 | |
126 # map the repeats to the psuedogenomes: | |
127 if not os.path.exists(outputfolder): | |
128 os.mkdir(outputfolder) | |
129 | |
130 # Conduct the regions sorting | |
131 fileout = os.path.join(outputfolder, f"{outputfile_prefix}_regionsorter.txt") | |
132 command = shlex.split(f"coverageBed -abam {unique_mapper_bam} -b \ | |
133 {os.path.join(setup_folder, 'repnames.bed')}") | |
134 with open(fileout, 'w') as stdout: | |
135 subprocess.run(command, stdout=stdout, check=True) | |
136 | |
137 counts = {} | |
138 sumofrepeatreads = 0 | |
139 with open(fileout) as filein: | |
140 for line in filein: | |
141 line = line.split('\t') | |
142 if not str(repeat_key[line[3]]) in counts: | |
143 counts[str(repeat_key[line[3]])] = 0 | |
144 counts[str(repeat_key[line[3]])] += int(line[4]) | |
145 sumofrepeatreads += int(line[4]) | |
146 print(f"Identified {sumofrepeatreads} unique reads that \ | |
147 mapped to repeats.") | |
148 | |
149 | |
150 def run_bowtie(metagenome, fastqfile, folder): | |
151 metagenomepath = os.path.join(setup_folder, metagenome) | |
152 output_file = os.path.join(folder, f"{metagenome}.bowtie") | |
153 command = shlex.split(f"bowtie {b_opt} {metagenomepath} {fastqfile}") | |
154 with open(output_file, 'w') as stdout: | |
155 return subprocess.Popen(command, stdout=stdout) | |
156 | |
157 | |
158 if paired_end == 'FALSE': | |
159 folder_pair1 = os.path.join(outputfolder, 'pair1_bowtie') | |
160 os.makedirs(folder_pair1, exist_ok=True) | |
161 | |
162 print("Processing repeat pseudogenomes...") | |
163 processes = [] | |
164 ticker = 0 | |
165 | |
166 for metagenome in repeat_list: | |
167 processes.append(run_bowtie(metagenome, fastqfile_1, folder_pair1)) | |
168 ticker += 1 | |
169 if ticker == cpus: | |
170 for p in processes: | |
171 p.communicate() | |
172 ticker = 0 | |
173 processes = [] | |
174 | |
175 for p in processes: | |
176 p.communicate() | |
177 # Combine the output from both read pairs: | |
178 print('Sorting and combining the output for both read pairs....') | |
179 sorted_bowtie = os.path.join(outputfolder, 'sorted_bowtie') | |
180 os.makedirs(sorted_bowtie, exist_ok=True) | |
181 for metagenome in repeat_list: | |
182 file1 = os.path.join(folder_pair1, f"{metagenome}.bowtie") | |
183 fileout = os.path.join(sorted_bowtie, f"{metagenome}.bowtie") | |
184 with open(fileout, 'w') as stdout: | |
185 p1 = subprocess.Popen(['cat', file1], stdout=subprocess.PIPE) | |
186 p2 = subprocess.Popen(['cut', '-f1'], stdin=p1.stdout, | |
187 stdout=subprocess.PIPE) | |
188 p3 = subprocess.Popen(['cut', '-f1', "-d/"], stdin=p2.stdout, | |
189 stdout=subprocess.PIPE) | |
190 p4 = subprocess.Popen(['sort'], stdin=p3.stdout, | |
191 stdout=subprocess.PIPE) | |
192 p5 = subprocess.Popen(['uniq'], stdin=p4.stdout, stdout=stdout) | |
193 p5.communicate() | |
194 stdout.close() | |
195 print('completed ...') | |
196 else: | |
197 folder_pair1 = os.path.join(outputfolder, 'pair1_bowtie') | |
198 folder_pair2 = os.path.join(outputfolder, 'pair2_bowtie') | |
199 os.makedirs(folder_pair1, exist_ok=True) | |
200 os.makedirs(folder_pair2, exist_ok=True) | |
201 | |
202 print("Processing repeat pseudogenomes...") | |
203 ps, psb, ticker = [], [], 0 | |
204 | |
205 for metagenome in repeat_list: | |
206 ps.append(run_bowtie(metagenome, fastqfile_1, folder_pair1)) | |
207 ticker += 1 | |
208 if fastqfile_2 != 'none': | |
209 psb.append(run_bowtie(metagenome, fastqfile_2, folder_pair2)) | |
210 ticker += 1 | |
211 if ticker >= cpus: | |
212 for p in ps: | |
213 p.communicate() | |
214 for p in psb: | |
215 p.communicate() | |
216 ticker = 0 | |
217 ps = [] | |
218 psb = [] | |
219 | |
220 for p in ps: | |
221 p.communicate() | |
222 for p in psb: | |
223 p.communicate() | |
224 # combine the output from both read pairs: | |
225 print('Sorting and combining the output for both read pairs...') | |
226 if not os.path.exists(outputfolder + os.path.sep + 'sorted_bowtie'): | |
227 os.mkdir(outputfolder + os.path.sep + 'sorted_bowtie') | |
228 sorted_bowtie = outputfolder + os.path.sep + 'sorted_bowtie' | |
229 for metagenome in repeat_list: | |
230 file1 = folder_pair1 + os.path.sep + metagenome + '.bowtie' | |
231 file2 = folder_pair2 + os.path.sep + metagenome + '.bowtie' | |
232 fileout = sorted_bowtie + os.path.sep + metagenome + '.bowtie' | |
233 with open(fileout, 'w') as stdout: | |
234 p1 = subprocess.Popen(['cat', file1, file2], | |
235 stdout=subprocess.PIPE) | |
236 p2 = subprocess.Popen(['cut', '-f1', "-d "], stdin=p1.stdout, | |
237 stdout=subprocess.PIPE) | |
238 p3 = subprocess.Popen(['cut', '-f1', "-d/"], stdin=p2.stdout, | |
239 stdout=subprocess.PIPE) | |
240 p4 = subprocess.Popen(['sort'], stdin=p3.stdout, | |
241 stdout=subprocess.PIPE) | |
242 p5 = subprocess.Popen(['uniq'], stdin=p4.stdout, stdout=stdout) | |
243 p5.communicate() | |
244 stdout.close() | |
245 print('completed ...') | |
246 | |
247 # build a file of repeat keys for all reads | |
248 print('Writing and processing intermediate files...') | |
249 sorted_bowtie = os.path.join(outputfolder, 'sorted_bowtie') | |
250 sumofrepeatreads = 0 | |
251 readid = {} | |
252 | |
253 for rep in repeat_list: | |
254 for data in import_text( | |
255 f"{os.path.join(sorted_bowtie, rep)}.bowtie", '\t'): | |
256 readid[data[0]] = '' | |
257 | |
258 for rep in repeat_list: | |
259 for data in import_text( | |
260 f"{os.path.join(sorted_bowtie, rep)}.bowtie", '\t'): | |
261 readid[data[0]] += f"{repeat_key[rep]}," | |
262 | |
263 for subfamilies in readid.values(): | |
264 if subfamilies not in counts: | |
265 counts[subfamilies] = 0 | |
266 counts[subfamilies] += 1 | |
267 sumofrepeatreads += 1 | |
268 | |
269 print(f'Identified {sumofrepeatreads} reads that mapped to \ | |
270 repeats for unique and multimappers.') | |
271 print("Conducting final calculations...") | |
272 | |
273 # building the total counts for repeat element enrichment... | |
274 for x in counts.keys(): | |
275 count = counts[x] | |
276 x = x.strip(',').split(',') | |
277 for i in x: | |
278 reptotalcounts[rev_repeat_key[int(i)]] += int(count) | |
279 # building the fractional counts for repeat element enrichment... | |
280 for x in counts.keys(): | |
281 count = counts[x] | |
282 x = x.strip(',') .split(',') | |
283 splits = len(x) | |
284 for i in x: | |
285 fractionalcounts[rev_repeat_key[int(i)]] += float( | |
286 numpy.divide(float(count), float(splits))) | |
287 # building categorized table of repeat element enrichment... | |
288 repcounts = {} | |
289 repcounts['other'] = 0 | |
290 for key in counts.keys(): | |
291 key_list = key.strip(',').split(',') | |
292 repname = '' | |
293 for i in key_list: | |
294 repname = os.path.join(repname, rev_repeat_key[int(i)]) | |
295 repcounts[repname] = counts[key] | |
296 # building the total counts for class enrichment... | |
297 for key in reptotalcounts.keys(): | |
298 classtotalcounts[repeatclass[key]] += reptotalcounts[key] | |
299 # building total counts for family enrichment... | |
300 for key in reptotalcounts.keys(): | |
301 familytotalcounts[repeatfamily[key]] += reptotalcounts[key] | |
302 # building unique counts table | |
303 repcounts2 = {} | |
304 for rep in repeat_list: | |
305 if "/" + rep in repcounts: | |
306 repcounts2[rep] = repcounts["/" + rep] | |
307 else: | |
308 repcounts2[rep] = 0 | |
309 # building the fractionalcounts counts for class enrichment... | |
310 for key in fractionalcounts.keys(): | |
311 classfractionalcounts[repeatclass[key]] += fractionalcounts[key] | |
312 # building fractional counts for family enrichment... | |
313 for key in fractionalcounts.keys(): | |
314 familyfractionalcounts[repeatfamily[key]] += fractionalcounts[key] | |
315 | |
316 # print output to file of the categorized counts and total overlapping counts: | |
317 print('Writing final output...') | |
318 with open(f"{os.path.join(outputfolder, outputfile_prefix)}_" | |
319 f"class_fraction_counts.txt", 'w') as fout: | |
320 for key in sorted(classfractionalcounts.keys()): | |
321 fout.write(f"{key}\t{classfractionalcounts[key]}\n") | 194 fout.write(f"{key}\t{classfractionalcounts[key]}\n") |
322 | 195 |
323 with open(f"{os.path.join(outputfolder, outputfile_prefix)}_" | 196 with open("family_fraction_counts.tsv", 'w') as fout: |
324 f"family_fraction_counts.txt", 'w') as fout: | 197 for key in sorted(familyfractionalcounts): |
325 for key in sorted(familyfractionalcounts.keys()): | |
326 fout.write(f"{key}\t{familyfractionalcounts[key]}\n") | 198 fout.write(f"{key}\t{familyfractionalcounts[key]}\n") |
327 | 199 |
328 with open(f"{os.path.join(outputfolder, outputfile_prefix)}_" | 200 with open("fraction_counts.tsv", 'w') as fout: |
329 f"fraction_counts.txt", 'w') as fout: | 201 for key in sorted(fractionalcounts): |
330 for key in sorted(fractionalcounts.keys()): | 202 fout.write(f"{key}\t{repeat_ref[key]['class']}\t" |
331 fout.write(f"{key}\t{repeatclass[key]}\t{repeatfamily[key]}\t" | 203 f"{repeat_ref[key]['family']}\t" |
332 f"{int(fractionalcounts[key])}\n") | 204 f"{fractionalcounts[key]}\n") |