Mercurial > repos > artbio > repenrich
annotate RepEnrich.py @ 13:530626b0757c draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
author | artbio |
---|---|
date | Tue, 02 Apr 2024 21:16:37 +0000 |
parents | 89e05f831259 |
children | bf866bedd4b4 |
rev | line source |
---|---|
0
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
1 import argparse |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
2 import csv |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
3 import os |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
4 import shlex |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
5 import subprocess |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
6 import sys |
13
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
7 from collections import defaultdict |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
8 from concurrent.futures import ProcessPoolExecutor |
0
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
9 |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
10 |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
11 parser = argparse.ArgumentParser(description=''' |
12
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
12 Repenrich aligns reads to Repeat Elements pseudogenomes\ |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
13 and counts aligned reads. RepEnrich_setup must be run\ |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
14 before its use''') |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
15 parser.add_argument('--annotation_file', action='store', |
0
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
16 metavar='annotation_file', |
12
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
17 help='RepeatMasker.org annotation file for your\ |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
18 organism. The file may be downloaded from\ |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
19 RepeatMasker.org. E.g. hg19_repeatmasker.txt') |
13
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
20 parser.add_argument('--alignment_bam', action='store', metavar='alignment_bam', |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
21 help='Bam alignments of unique mapper reads.') |
12
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
22 parser.add_argument('--fastqfile', action='store', metavar='fastqfile', |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
23 help='File of fastq reads mapping to multiple\ |
0
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
24 locations. Example: /data/multimap.fastq') |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
25 parser.add_argument('--fastqfile2', action='store', dest='fastqfile2', |
13
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
26 metavar='fastqfile2', default='', |
12
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
27 help='fastqfile #2 when using paired-end option.\ |
0
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
28 Default none') |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
29 parser.add_argument('--cpus', action='store', dest='cpus', metavar='cpus', |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
30 default="1", type=int, |
12
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
31 help='Number of CPUs. The more cpus the\ |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
32 faster RepEnrich performs. Default: "1"') |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
33 |
0
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
34 args = parser.parse_args() |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
35 |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
36 # parameters |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
37 annotation_file = args.annotation_file |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
38 unique_mapper_bam = args.alignment_bam |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
39 fastqfile_1 = args.fastqfile |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
40 fastqfile_2 = args.fastqfile2 |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
41 cpus = args.cpus |
12
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
42 # Change if simple repeats are differently annotated in your organism |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
43 simple_repeat = "Simple_repeat" |
13
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
44 if args.fastqfile2: |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
45 paired_end = True |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
46 else: |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
47 paired_end = False |
0
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
48 |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
49 # check that the programs we need are available |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
50 try: |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
51 subprocess.call(shlex.split("coverageBed -h"), |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
52 stdout=open(os.devnull, 'wb'), |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
53 stderr=open(os.devnull, 'wb')) |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
54 subprocess.call(shlex.split("bowtie --version"), |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
55 stdout=open(os.devnull, 'wb'), |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
56 stderr=open(os.devnull, 'wb')) |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
57 except OSError: |
12
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
58 print("Error: Bowtie or bedtools not loaded") |
0
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
59 raise |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
60 |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
61 |
13
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
62 def starts_with_numerical(list): |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
63 try: |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
64 if len(list) == 0: |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
65 return False |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
66 int(list[0]) |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
67 return True |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
68 except ValueError: |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
69 return False |
0
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
70 |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
71 |
13
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
72 # define a text importer for .out/.txt format of repbase |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
73 def import_text(filename, separator): |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
74 csv.field_size_limit(sys.maxsize) |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
75 file = csv.reader(open(filename), delimiter=separator, |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
76 skipinitialspace=True) |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
77 return [line for line in file if starts_with_numerical(line)] |
12
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
78 |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
79 |
13
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
80 def run_bowtie(args): |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
81 metagenome, fastqfile = args |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
82 b_opt = "-k 1 -p 1 --quiet" |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
83 command = shlex.split(f"bowtie {b_opt} -x {metagenome} {fastqfile}") |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
84 bowtie_align = subprocess.run(command, check=True, |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
85 capture_output=True, text=True).stdout |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
86 bowtie_align = bowtie_align.rstrip('\r\n').split('\n') |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
87 readlist = [metagenome] |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
88 if paired_end: |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
89 for line in bowtie_align: |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
90 readlist.append(line.split("/")[0]) |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
91 else: |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
92 for line in bowtie_align: |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
93 readlist.append(line.split("\t")[0]) |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
94 return readlist |
12
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
95 |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
96 |
13
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
97 # set a reference repeat list for the script |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
98 repeat_list = [listline[9].translate( |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
99 str.maketrans( |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
100 '()/', '___')) for listline in import_text(annotation_file, ' ')] |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
101 repeat_list = sorted(list(set(repeat_list))) |
12
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
102 |
13
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
103 # unique mapper counting |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
104 cmd = f"bedtools bamtobed -i {unique_mapper_bam} | \ |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
105 bedtools coverage -b stdin -a repnames.bed" |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
106 p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE) |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
107 bedtools_counts = p.communicate()[0].decode().rstrip('\r\n').split('\n') |
12
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
108 |
13
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
109 # parse bedtools output |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
110 counts = defaultdict(int) # key: repeat names, value: unique mapper counts |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
111 sumofrepeatreads = 0 |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
112 for line in bedtools_counts: |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
113 line = line.split('\t') |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
114 counts[line[3]] += int(line[4]) |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
115 sumofrepeatreads += int(line[4]) |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
116 print(f"Identified {sumofrepeatreads} unique reads that mapped to repeats.") |
12
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
117 |
13
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
118 # multimapper parsing |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
119 if not paired_end: |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
120 args_list = [(metagenome, fastqfile_1) for |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
121 metagenome in repeat_list] |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
122 with ProcessPoolExecutor(max_workers=cpus) as executor: |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
123 results = executor.map(run_bowtie, args_list) |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
124 else: |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
125 args_list = [(metagenome, fastqfile_1) for |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
126 metagenome in repeat_list] |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
127 args_list.extend([(metagenome, fastqfile_2) for |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
128 metagenome in repeat_list]) |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
129 with ProcessPoolExecutor(max_workers=cpus) as executor: |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
130 results = executor.map(run_bowtie, args_list) |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
131 |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
132 # Aggregate results (avoiding race conditions) |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
133 metagenome_reads = defaultdict(list) # repeat_name: list of multimap reads |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
134 for result in results: |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
135 metagenome_reads[result[0]] += result[1:] |
0
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
136 |
13
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
137 for name in metagenome_reads: |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
138 # read are only once in list |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
139 metagenome_reads[name] = list(set(metagenome_reads[name])) |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
140 # remove "no read" instances |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
141 metagenome_reads[name] = [read for read in metagenome_reads[name] |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
142 if read != ""] |
0
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
143 |
13
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
144 # implement repeats_by_reads from the inverse dictionnary metagenome_reads |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
145 repeats_by_reads = defaultdict(list) # readids: list of repeats names |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
146 for repname in metagenome_reads: |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
147 for read in metagenome_reads[repname]: |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
148 repeats_by_reads[read].append(repname) |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
149 for repname in repeats_by_reads: |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
150 repeats_by_reads[repname] = list(set(repeats_by_reads[repname])) |
12
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
151 |
13
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
152 # 3 dictionnaries and 1 pointer variable to be populated |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
153 fractionalcounts = defaultdict(float) |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
154 familyfractionalcounts = defaultdict(float) |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
155 classfractionalcounts = defaultdict(float) |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
156 sumofrepeatreads = 0 |
12
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
157 |
13
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
158 # Update counts dictionnary with sets of repeats (was "subfamilies") |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
159 # matched by multimappers |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
160 for repeat_set in repeats_by_reads.values(): |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
161 repeat_set_string = ','.join(repeat_set) |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
162 counts[repeat_set_string] += 1 |
0
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
163 sumofrepeatreads += 1 |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
164 |
13
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
165 print(f'Identified more {sumofrepeatreads} mutimapper repeat reads') |
0
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
166 |
13
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
167 # Populate fractionalcounts |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
168 for key, count in counts.items(): |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
169 key_list = key.split(',') |
12
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
170 for i in key_list: |
13
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
171 fractionalcounts[i] += count / len(key_list) |
0
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
172 |
13
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
173 # build repeat_ref for easy access to rep class and rep families |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
174 repeat_ref = defaultdict(dict) |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
175 repeats = import_text(annotation_file, ' ') |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
176 for repeat in repeats: |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
177 repeat_name = repeat[9].translate(str.maketrans('()/', '___')) |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
178 try: |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
179 repclass = repeat[10].split('/')[0] |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
180 repfamily = repeat[10].split('/')[1] |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
181 except IndexError: |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
182 repclass, repfamily = repeat[10], repeat[10] |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
183 repeat_ref[repeat_name]['class'] = repclass |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
184 repeat_ref[repeat_name]['family'] = repfamily |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
185 |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
186 # Populate classfractionalcounts and familyfractionalcounts |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
187 for key, value in fractionalcounts.items(): |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
188 classfractionalcounts[repeat_ref[key]['class']] += value |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
189 familyfractionalcounts[repeat_ref[key]['family']] += value |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
190 |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
191 # print class-, family- and fraction-repeats counts to files |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
192 with open("class_fraction_counts.tsv", 'w') as fout: |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
193 for key in sorted(classfractionalcounts): |
12
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
194 fout.write(f"{key}\t{classfractionalcounts[key]}\n") |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
195 |
13
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
196 with open("family_fraction_counts.tsv", 'w') as fout: |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
197 for key in sorted(familyfractionalcounts): |
12
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
198 fout.write(f"{key}\t{familyfractionalcounts[key]}\n") |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
199 |
13
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
200 with open("fraction_counts.tsv", 'w') as fout: |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
201 for key in sorted(fractionalcounts): |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
202 fout.write(f"{key}\t{repeat_ref[key]['class']}\t" |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
203 f"{repeat_ref[key]['family']}\t" |
530626b0757c
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit df6b9491ad06e8a85e67c663b68db3cce3eb0115
artbio
parents:
12
diff
changeset
|
204 f"{fractionalcounts[key]}\n") |