Mercurial > repos > artbio > repenrich2
annotate RepEnrich2.py @ 9:2b61c6407efb draft default tip
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 961a14cd73dd8e40b2f30d5e4df136b98cd8f07a
author | artbio |
---|---|
date | Thu, 25 Apr 2024 16:22:34 +0000 |
parents | 567549a49eb2 |
children |
rev | line source |
---|---|
0
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
1 import argparse |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
2 import csv |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
3 import os |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
4 import shlex |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
5 import subprocess |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
6 import sys |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
7 from collections import defaultdict |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
8 from concurrent.futures import ProcessPoolExecutor |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
9 |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
10 |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
11 parser = argparse.ArgumentParser(description=''' |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
12 Repenrich aligns reads to Repeat Elements pseudogenomes\ |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
13 and counts aligned reads. RepEnrich_setup must be run\ |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
14 before its use''') |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
15 parser.add_argument('--annotation_file', action='store', |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
16 metavar='annotation_file', |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
17 help='RepeatMasker.org annotation file for your\ |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
18 organism. The file may be downloaded from\ |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
19 RepeatMasker.org. E.g. hg19_repeatmasker.txt') |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
20 parser.add_argument('--alignment_bam', action='store', metavar='alignment_bam', |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
21 help='Bam alignments of unique mapper reads.') |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
22 parser.add_argument('--fastqfile', action='store', metavar='fastqfile', |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
23 help='File of fastq reads mapping to multiple\ |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
24 locations. Example: /data/multimap.fastq') |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
25 parser.add_argument('--fastqfile2', action='store', dest='fastqfile2', |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
26 metavar='fastqfile2', default='', |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
27 help='fastqfile #2 when using paired-end option.\ |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
28 Default none') |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
29 parser.add_argument('--cpus', action='store', dest='cpus', metavar='cpus', |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
30 default="1", type=int, |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
31 help='Number of CPUs. The more cpus the\ |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
32 faster RepEnrich performs. Default: "1"') |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
33 |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
34 args = parser.parse_args() |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
35 |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
36 # parameters |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
37 annotation_file = args.annotation_file |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
38 unique_mapper_bam = args.alignment_bam |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
39 fastqfile_1 = args.fastqfile |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
40 fastqfile_2 = args.fastqfile2 |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
41 cpus = args.cpus |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
42 # Change if simple repeats are differently annotated in your organism |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
43 simple_repeat = "Simple_repeat" |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
44 if args.fastqfile2: |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
45 paired_end = True |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
46 else: |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
47 paired_end = False |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
48 |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
49 # check that the programs we need are available |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
50 try: |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
51 subprocess.call(shlex.split("coverageBed -h"), |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
52 stdout=open(os.devnull, 'wb'), |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
53 stderr=open(os.devnull, 'wb')) |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
54 subprocess.call(shlex.split("bowtie2 --version"), |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
55 stdout=open(os.devnull, 'wb'), |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
56 stderr=open(os.devnull, 'wb')) |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
57 except OSError: |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
58 print("Error: Bowtie2 or bedtools not loaded") |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
59 raise |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
60 |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
61 |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
62 def starts_with_numerical(list): |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
63 try: |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
64 if len(list) == 0: |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
65 return False |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
66 int(list[0]) |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
67 return True |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
68 except ValueError: |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
69 return False |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
70 |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
71 |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
72 # define a text importer for .out/.txt format of repbase |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
73 def import_text(filename, separator): |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
74 csv.field_size_limit(sys.maxsize) |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
75 file = csv.reader(open(filename), delimiter=separator, |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
76 skipinitialspace=True) |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
77 return [line for line in file if starts_with_numerical(line)] |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
78 |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
79 |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
80 # set a reference repeat list for the script |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
81 repeat_list = [listline[9].translate( |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
82 str.maketrans( |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
83 '()/', '___')) for listline in import_text(annotation_file, ' ')] |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
84 repeat_list = sorted(list(set(repeat_list))) |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
85 |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
86 # unique mapper counting |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
87 cmd = f"bedtools bamtobed -i {unique_mapper_bam} | \ |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
88 bedtools coverage -b stdin -a repnames.bed" |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
89 p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE) |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
90 bedtools_counts = p.communicate()[0].decode().rstrip('\r\n').split('\n') |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
91 |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
92 # parse bedtools output |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
93 counts = defaultdict(int) # key: repeat names, value: unique mapper counts |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
94 sumofrepeatreads = 0 |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
95 for line in bedtools_counts: |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
96 line = line.split('\t') |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
97 counts[line[3]] += int(line[4]) |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
98 sumofrepeatreads += int(line[4]) |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
99 print(f"Identified {sumofrepeatreads} unique reads that mapped to repeats.") |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
100 |
5
08e50af788f7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit b1761de76fd068b86a06d88e70c1ba1d8644e7b5
artbio
parents:
0
diff
changeset
|
101 # print unique mapper counts |
08e50af788f7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit b1761de76fd068b86a06d88e70c1ba1d8644e7b5
artbio
parents:
0
diff
changeset
|
102 with open("unique_mapper_counts.tsv", 'w') as fout: |
08e50af788f7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit b1761de76fd068b86a06d88e70c1ba1d8644e7b5
artbio
parents:
0
diff
changeset
|
103 fout.write("#element\tcount\n") |
08e50af788f7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit b1761de76fd068b86a06d88e70c1ba1d8644e7b5
artbio
parents:
0
diff
changeset
|
104 for count in sorted(counts): |
08e50af788f7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit b1761de76fd068b86a06d88e70c1ba1d8644e7b5
artbio
parents:
0
diff
changeset
|
105 fout.write(f"{count}\t{counts[count]}\n") |
08e50af788f7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit b1761de76fd068b86a06d88e70c1ba1d8644e7b5
artbio
parents:
0
diff
changeset
|
106 |
7
61e0404f0d76
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 067aa46ea7482d640369b95d5b3dabec5793396b
artbio
parents:
6
diff
changeset
|
107 |
61e0404f0d76
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 067aa46ea7482d640369b95d5b3dabec5793396b
artbio
parents:
6
diff
changeset
|
108 def run_bowtie(args): |
61e0404f0d76
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 067aa46ea7482d640369b95d5b3dabec5793396b
artbio
parents:
6
diff
changeset
|
109 ''' |
61e0404f0d76
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 067aa46ea7482d640369b95d5b3dabec5793396b
artbio
parents:
6
diff
changeset
|
110 write to files to save memory |
61e0404f0d76
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 067aa46ea7482d640369b95d5b3dabec5793396b
artbio
parents:
6
diff
changeset
|
111 ''' |
9
2b61c6407efb
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 961a14cd73dd8e40b2f30d5e4df136b98cd8f07a
artbio
parents:
8
diff
changeset
|
112 metagenome = args |
2b61c6407efb
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 961a14cd73dd8e40b2f30d5e4df136b98cd8f07a
artbio
parents:
8
diff
changeset
|
113 b_opt = "-k 1 -p 2 --quiet --no-hd --no-unal" |
2b61c6407efb
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 961a14cd73dd8e40b2f30d5e4df136b98cd8f07a
artbio
parents:
8
diff
changeset
|
114 if paired_end is True: |
2b61c6407efb
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 961a14cd73dd8e40b2f30d5e4df136b98cd8f07a
artbio
parents:
8
diff
changeset
|
115 command = shlex.split(f"bowtie2 {b_opt} -x {metagenome}" |
2b61c6407efb
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 961a14cd73dd8e40b2f30d5e4df136b98cd8f07a
artbio
parents:
8
diff
changeset
|
116 f" -1 {fastqfile_1} -2 {fastqfile_1}") |
2b61c6407efb
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 961a14cd73dd8e40b2f30d5e4df136b98cd8f07a
artbio
parents:
8
diff
changeset
|
117 else: |
2b61c6407efb
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 961a14cd73dd8e40b2f30d5e4df136b98cd8f07a
artbio
parents:
8
diff
changeset
|
118 command = shlex.split(f"bowtie2 {b_opt} -x {metagenome} {fastqfile_1}") |
7
61e0404f0d76
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 067aa46ea7482d640369b95d5b3dabec5793396b
artbio
parents:
6
diff
changeset
|
119 bowtie_align = subprocess.run(command, check=True, |
61e0404f0d76
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 067aa46ea7482d640369b95d5b3dabec5793396b
artbio
parents:
6
diff
changeset
|
120 capture_output=True, text=True).stdout |
61e0404f0d76
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 067aa46ea7482d640369b95d5b3dabec5793396b
artbio
parents:
6
diff
changeset
|
121 bowtie_align = bowtie_align.rstrip('\r\n').split('\n') |
61e0404f0d76
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 067aa46ea7482d640369b95d5b3dabec5793396b
artbio
parents:
6
diff
changeset
|
122 with open(f"{metagenome}.reads", "a+") as readfile: |
61e0404f0d76
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 067aa46ea7482d640369b95d5b3dabec5793396b
artbio
parents:
6
diff
changeset
|
123 for line in bowtie_align: |
61e0404f0d76
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 067aa46ea7482d640369b95d5b3dabec5793396b
artbio
parents:
6
diff
changeset
|
124 read = line.split()[0].split("/")[0] |
61e0404f0d76
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 067aa46ea7482d640369b95d5b3dabec5793396b
artbio
parents:
6
diff
changeset
|
125 if read: |
61e0404f0d76
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 067aa46ea7482d640369b95d5b3dabec5793396b
artbio
parents:
6
diff
changeset
|
126 readfile.write(f"{read}\n") |
61e0404f0d76
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 067aa46ea7482d640369b95d5b3dabec5793396b
artbio
parents:
6
diff
changeset
|
127 |
61e0404f0d76
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 067aa46ea7482d640369b95d5b3dabec5793396b
artbio
parents:
6
diff
changeset
|
128 |
0
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
129 # multimapper parsing |
9
2b61c6407efb
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 961a14cd73dd8e40b2f30d5e4df136b98cd8f07a
artbio
parents:
8
diff
changeset
|
130 args_list = [metagenome for metagenome in repeat_list] |
7
61e0404f0d76
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 067aa46ea7482d640369b95d5b3dabec5793396b
artbio
parents:
6
diff
changeset
|
131 with ProcessPoolExecutor(max_workers=cpus) as executor: |
61e0404f0d76
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 067aa46ea7482d640369b95d5b3dabec5793396b
artbio
parents:
6
diff
changeset
|
132 results = executor.map(run_bowtie, args_list) |
0
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
133 |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
134 # Aggregate results (avoiding race conditions) |
7
61e0404f0d76
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 067aa46ea7482d640369b95d5b3dabec5793396b
artbio
parents:
6
diff
changeset
|
135 metagenome_reads = defaultdict(list) # metagenome: list of multimap reads |
0
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
136 |
9
2b61c6407efb
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 961a14cd73dd8e40b2f30d5e4df136b98cd8f07a
artbio
parents:
8
diff
changeset
|
137 # Now we read .reads files to populate metagnomes_reads |
7
61e0404f0d76
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 067aa46ea7482d640369b95d5b3dabec5793396b
artbio
parents:
6
diff
changeset
|
138 for metagenome in repeat_list: |
61e0404f0d76
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 067aa46ea7482d640369b95d5b3dabec5793396b
artbio
parents:
6
diff
changeset
|
139 with open(f"{metagenome}.reads") as readfile: |
61e0404f0d76
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 067aa46ea7482d640369b95d5b3dabec5793396b
artbio
parents:
6
diff
changeset
|
140 for read in readfile: |
61e0404f0d76
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 067aa46ea7482d640369b95d5b3dabec5793396b
artbio
parents:
6
diff
changeset
|
141 metagenome_reads[metagenome].append(read) |
61e0404f0d76
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 067aa46ea7482d640369b95d5b3dabec5793396b
artbio
parents:
6
diff
changeset
|
142 # read are only once in list |
61e0404f0d76
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 067aa46ea7482d640369b95d5b3dabec5793396b
artbio
parents:
6
diff
changeset
|
143 metagenome_reads[metagenome] = list(set(metagenome_reads[metagenome])) |
0
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
144 |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
145 # implement repeats_by_reads from the inverse dictionnary metagenome_reads |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
146 repeats_by_reads = defaultdict(list) # readids: list of repeats names |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
147 for repname in metagenome_reads: |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
148 for read in metagenome_reads[repname]: |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
149 repeats_by_reads[read].append(repname) |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
150 for repname in repeats_by_reads: |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
151 repeats_by_reads[repname] = list(set(repeats_by_reads[repname])) |
8
567549a49eb2
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit b3b166a8e991f49227e4888b8065b57cec0ba949
artbio
parents:
7
diff
changeset
|
152 # this repeats_by_reads dictionary is far too big |
0
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
153 |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
154 # 3 dictionnaries and 1 pointer variable to be populated |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
155 fractionalcounts = defaultdict(float) |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
156 familyfractionalcounts = defaultdict(float) |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
157 classfractionalcounts = defaultdict(float) |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
158 sumofrepeatreads = 0 |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
159 |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
160 # Update counts dictionnary with sets of repeats (was "subfamilies") |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
161 # matched by multimappers |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
162 for repeat_set in repeats_by_reads.values(): |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
163 repeat_set_string = ','.join(repeat_set) |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
164 counts[repeat_set_string] += 1 |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
165 sumofrepeatreads += 1 |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
166 |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
167 print(f'Identified more {sumofrepeatreads} mutimapper repeat reads') |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
168 |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
169 # Populate fractionalcounts |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
170 for key, count in counts.items(): |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
171 key_list = key.split(',') |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
172 for i in key_list: |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
173 fractionalcounts[i] += count / len(key_list) |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
174 |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
175 # build repeat_ref for easy access to rep class and rep families |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
176 repeat_ref = defaultdict(dict) |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
177 repeats = import_text(annotation_file, ' ') |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
178 for repeat in repeats: |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
179 repeat_name = repeat[9].translate(str.maketrans('()/', '___')) |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
180 try: |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
181 repclass = repeat[10].split('/')[0] |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
182 repfamily = repeat[10].split('/')[1] |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
183 except IndexError: |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
184 repclass, repfamily = repeat[10], repeat[10] |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
185 repeat_ref[repeat_name]['class'] = repclass |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
186 repeat_ref[repeat_name]['family'] = repfamily |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
187 |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
188 # Populate classfractionalcounts and familyfractionalcounts |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
189 for key, value in fractionalcounts.items(): |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
190 classfractionalcounts[repeat_ref[key]['class']] += value |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
191 familyfractionalcounts[repeat_ref[key]['family']] += value |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
192 |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
193 # print class-, family- and fraction-repeats counts to files |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
194 with open("class_fraction_counts.tsv", 'w') as fout: |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
195 for key in sorted(classfractionalcounts): |
6
388a47ca4199
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit a9af1284d644055565b6cf3401569694218ab8e9
artbio
parents:
5
diff
changeset
|
196 fout.write(f"{key}\t{round(classfractionalcounts[key], 2)}\n") |
0
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
197 |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
198 with open("family_fraction_counts.tsv", 'w') as fout: |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
199 for key in sorted(familyfractionalcounts): |
6
388a47ca4199
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit a9af1284d644055565b6cf3401569694218ab8e9
artbio
parents:
5
diff
changeset
|
200 fout.write(f"{key}\t{round(familyfractionalcounts[key], 2)}\n") |
0
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
201 |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
202 with open("fraction_counts.tsv", 'w') as fout: |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
203 for key in sorted(fractionalcounts): |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
204 fout.write(f"{key}\t{repeat_ref[key]['class']}\t" |
4905a332a094
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
artbio
parents:
diff
changeset
|
205 f"{repeat_ref[key]['family']}\t" |
6
388a47ca4199
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit a9af1284d644055565b6cf3401569694218ab8e9
artbio
parents:
5
diff
changeset
|
206 f"{round(fractionalcounts[key], 2)}\n") |