Mercurial > repos > artbio > repenrich
annotate RepEnrich.py @ 12:89e05f831259 draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
author | artbio |
---|---|
date | Mon, 18 Mar 2024 09:39:44 +0000 |
parents | 6f4143893463 |
children | 530626b0757c |
rev | line source |
---|---|
0
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
1 import argparse |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
2 import csv |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
3 import os |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
4 import shlex |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
5 import subprocess |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
6 import sys |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
7 |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
8 import numpy |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
9 |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
10 |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
11 parser = argparse.ArgumentParser(description=''' |
12
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
12 Repenrich aligns reads to Repeat Elements pseudogenomes\ |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
13 and counts aligned reads. RepEnrich_setup must be run\ |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
14 before its use''') |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
15 parser.add_argument('--annotation_file', action='store', |
0
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
16 metavar='annotation_file', |
12
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
17 help='RepeatMasker.org annotation file for your\ |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
18 organism. The file may be downloaded from\ |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
19 RepeatMasker.org. E.g. hg19_repeatmasker.txt') |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
20 parser.add_argument('--outputfolder', action='store', metavar='outputfolder', |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
21 help='Folder that will contain results. Should be the\ |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
22 same as the one used for RepEnrich_setup.\ |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
23 Example: ./outputfolder') |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
24 parser.add_argument('--outputprefix', action='store', metavar='outputprefix', |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
25 help='Prefix name for Repenrich output files.') |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
26 parser.add_argument('--setup_folder', action='store', metavar='setup_folder', |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
27 help='Folder produced by RepEnrich_setup which contains\ |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
28 repeat element pseudogenomes.') |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
29 parser.add_argument('--fastqfile', action='store', metavar='fastqfile', |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
30 help='File of fastq reads mapping to multiple\ |
0
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
31 locations. Example: /data/multimap.fastq') |
12
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
32 parser.add_argument('--alignment_bam', action='store', metavar='alignment_bam', |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
33 help='Bam alignments of unique mapper reads.') |
0
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
34 parser.add_argument('--pairedend', action='store', dest='pairedend', |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
35 default='FALSE', |
12
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
36 help='Change to TRUE for paired-end fastq files.\ |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
37 Default FALSE') |
0
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
38 parser.add_argument('--fastqfile2', action='store', dest='fastqfile2', |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
39 metavar='fastqfile2', default='none', |
12
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
40 help='fastqfile #2 when using paired-end option.\ |
0
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
41 Default none') |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
42 parser.add_argument('--cpus', action='store', dest='cpus', metavar='cpus', |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
43 default="1", type=int, |
12
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
44 help='Number of CPUs. The more cpus the\ |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
45 faster RepEnrich performs. Default: "1"') |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
46 |
0
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
47 args = parser.parse_args() |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
48 |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
49 # parameters |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
50 annotation_file = args.annotation_file |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
51 outputfolder = args.outputfolder |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
52 outputfile_prefix = args.outputprefix |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
53 setup_folder = args.setup_folder |
12
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
54 repeat_bed = os.path.join(setup_folder, 'repnames.bed') |
0
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
55 unique_mapper_bam = args.alignment_bam |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
56 fastqfile_1 = args.fastqfile |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
57 fastqfile_2 = args.fastqfile2 |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
58 cpus = args.cpus |
12
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
59 b_opt = "-k1 -p 1 --quiet" |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
60 # Change if simple repeats are differently annotated in your organism |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
61 simple_repeat = "Simple_repeat" |
0
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
62 paired_end = args.pairedend |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
63 |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
64 # check that the programs we need are available |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
65 try: |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
66 subprocess.call(shlex.split("coverageBed -h"), |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
67 stdout=open(os.devnull, 'wb'), |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
68 stderr=open(os.devnull, 'wb')) |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
69 subprocess.call(shlex.split("bowtie --version"), |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
70 stdout=open(os.devnull, 'wb'), |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
71 stderr=open(os.devnull, 'wb')) |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
72 except OSError: |
12
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
73 print("Error: Bowtie or bedtools not loaded") |
0
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
74 raise |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
75 |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
76 # define a csv reader that reads space deliminated files |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
77 print('Preparing for analysis using RepEnrich...') |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
78 csv.field_size_limit(sys.maxsize) |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
79 |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
80 |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
81 def import_text(filename, separator): |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
82 for line in csv.reader(open(filename), delimiter=separator, |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
83 skipinitialspace=True): |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
84 if line: |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
85 yield line |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
86 |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
87 |
12
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
88 # build dictionaries to convert repclass and rep families |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
89 repeatclass, repeatfamily = {}, {} |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
90 repeats = import_text(annotation_file, ' ') |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
91 # skip three first lines of the iterator |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
92 for line in range(3): |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
93 next(repeats) |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
94 for repeat in repeats: |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
95 classfamily = [] |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
96 classfamily = repeat[10].split('/') |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
97 matching_repeat = repeat[9].translate(str.maketrans('()/', '___')) |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
98 repeatclass[matching_repeat] = classfamily[0] |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
99 if len(classfamily) == 2: |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
100 repeatfamily[matching_repeat] = classfamily[1] |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
101 else: |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
102 repeatfamily[matching_repeat] = classfamily[0] |
0
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
103 |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
104 # build list of repeats initializing dictionaries for downstream analysis' |
12
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
105 repgenome_path = os.path.join(setup_folder, 'repgenomes_key.txt') |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
106 reptotalcounts = {line[0]: 0 for line in import_text(repgenome_path, '\t')} |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
107 fractionalcounts = {line[0]: 0 for line in import_text(repgenome_path, '\t')} |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
108 classtotalcounts = {repeatclass[line[0]]: 0 for line in import_text( |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
109 repgenome_path, '\t') if line[0] in repeatclass} |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
110 classfractionalcounts = {repeatclass[line[0]]: 0 for line in import_text( |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
111 repgenome_path, '\t') if line[0] in repeatclass} |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
112 familytotalcounts = {repeatfamily[line[0]]: 0 for line in import_text( |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
113 repgenome_path, '\t') if line[0] in repeatfamily} |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
114 familyfractionalcounts = {repeatfamily[line[0]]: 0 for line in import_text( |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
115 repgenome_path, '\t') if line[0] in repeatfamily} |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
116 reptotalcounts_simple = {(simple_repeat if line[0] in repeatfamily and |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
117 repeatfamily[line[0]] == simple_repeat else |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
118 line[0]): 0 for line in import_text( |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
119 repgenome_path, '\t')} |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
120 repeat_key = {line[0]: int(line[1]) for line in import_text( |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
121 repgenome_path, '\t')} |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
122 rev_repeat_key = {int(line[1]): line[0] for line in import_text( |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
123 repgenome_path, '\t')} |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
124 repeat_list = [line[0] for line in import_text(repgenome_path, '\t')] |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
125 |
0
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
126 # map the repeats to the psuedogenomes: |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
127 if not os.path.exists(outputfolder): |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
128 os.mkdir(outputfolder) |
12
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
129 |
0
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
130 # Conduct the regions sorting |
12
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
131 fileout = os.path.join(outputfolder, f"{outputfile_prefix}_regionsorter.txt") |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
132 command = shlex.split(f"coverageBed -abam {unique_mapper_bam} -b \ |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
133 {os.path.join(setup_folder, 'repnames.bed')}") |
0
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
134 with open(fileout, 'w') as stdout: |
12
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
135 subprocess.run(command, stdout=stdout, check=True) |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
136 |
0
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
137 counts = {} |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
138 sumofrepeatreads = 0 |
12
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
139 with open(fileout) as filein: |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
140 for line in filein: |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
141 line = line.split('\t') |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
142 if not str(repeat_key[line[3]]) in counts: |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
143 counts[str(repeat_key[line[3]])] = 0 |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
144 counts[str(repeat_key[line[3]])] += int(line[4]) |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
145 sumofrepeatreads += int(line[4]) |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
146 print(f"Identified {sumofrepeatreads} unique reads that \ |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
147 mapped to repeats.") |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
148 |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
149 |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
150 def run_bowtie(metagenome, fastqfile, folder): |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
151 metagenomepath = os.path.join(setup_folder, metagenome) |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
152 output_file = os.path.join(folder, f"{metagenome}.bowtie") |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
153 command = shlex.split(f"bowtie {b_opt} {metagenomepath} {fastqfile}") |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
154 with open(output_file, 'w') as stdout: |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
155 return subprocess.Popen(command, stdout=stdout) |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
156 |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
157 |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
158 if paired_end == 'FALSE': |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
159 folder_pair1 = os.path.join(outputfolder, 'pair1_bowtie') |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
160 os.makedirs(folder_pair1, exist_ok=True) |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
161 |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
162 print("Processing repeat pseudogenomes...") |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
163 processes = [] |
0
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
164 ticker = 0 |
12
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
165 |
0
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
166 for metagenome in repeat_list: |
12
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
167 processes.append(run_bowtie(metagenome, fastqfile_1, folder_pair1)) |
0
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
168 ticker += 1 |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
169 if ticker == cpus: |
12
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
170 for p in processes: |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
171 p.communicate() |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
172 ticker = 0 |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
173 processes = [] |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
174 |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
175 for p in processes: |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
176 p.communicate() |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
177 # Combine the output from both read pairs: |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
178 print('Sorting and combining the output for both read pairs....') |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
179 sorted_bowtie = os.path.join(outputfolder, 'sorted_bowtie') |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
180 os.makedirs(sorted_bowtie, exist_ok=True) |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
181 for metagenome in repeat_list: |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
182 file1 = os.path.join(folder_pair1, f"{metagenome}.bowtie") |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
183 fileout = os.path.join(sorted_bowtie, f"{metagenome}.bowtie") |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
184 with open(fileout, 'w') as stdout: |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
185 p1 = subprocess.Popen(['cat', file1], stdout=subprocess.PIPE) |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
186 p2 = subprocess.Popen(['cut', '-f1'], stdin=p1.stdout, |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
187 stdout=subprocess.PIPE) |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
188 p3 = subprocess.Popen(['cut', '-f1', "-d/"], stdin=p2.stdout, |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
189 stdout=subprocess.PIPE) |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
190 p4 = subprocess.Popen(['sort'], stdin=p3.stdout, |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
191 stdout=subprocess.PIPE) |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
192 p5 = subprocess.Popen(['uniq'], stdin=p4.stdout, stdout=stdout) |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
193 p5.communicate() |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
194 stdout.close() |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
195 print('completed ...') |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
196 else: |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
197 folder_pair1 = os.path.join(outputfolder, 'pair1_bowtie') |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
198 folder_pair2 = os.path.join(outputfolder, 'pair2_bowtie') |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
199 os.makedirs(folder_pair1, exist_ok=True) |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
200 os.makedirs(folder_pair2, exist_ok=True) |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
201 |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
202 print("Processing repeat pseudogenomes...") |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
203 ps, psb, ticker = [], [], 0 |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
204 |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
205 for metagenome in repeat_list: |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
206 ps.append(run_bowtie(metagenome, fastqfile_1, folder_pair1)) |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
207 ticker += 1 |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
208 if fastqfile_2 != 'none': |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
209 psb.append(run_bowtie(metagenome, fastqfile_2, folder_pair2)) |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
210 ticker += 1 |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
211 if ticker >= cpus: |
0
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
212 for p in ps: |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
213 p.communicate() |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
214 for p in psb: |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
215 p.communicate() |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
216 ticker = 0 |
12
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
217 ps = [] |
0
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
218 psb = [] |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
219 |
12
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
220 for p in ps: |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
221 p.communicate() |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
222 for p in psb: |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
223 p.communicate() |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
224 # combine the output from both read pairs: |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
225 print('Sorting and combining the output for both read pairs...') |
0
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
226 if not os.path.exists(outputfolder + os.path.sep + 'sorted_bowtie'): |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
227 os.mkdir(outputfolder + os.path.sep + 'sorted_bowtie') |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
228 sorted_bowtie = outputfolder + os.path.sep + 'sorted_bowtie' |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
229 for metagenome in repeat_list: |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
230 file1 = folder_pair1 + os.path.sep + metagenome + '.bowtie' |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
231 file2 = folder_pair2 + os.path.sep + metagenome + '.bowtie' |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
232 fileout = sorted_bowtie + os.path.sep + metagenome + '.bowtie' |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
233 with open(fileout, 'w') as stdout: |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
234 p1 = subprocess.Popen(['cat', file1, file2], |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
235 stdout=subprocess.PIPE) |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
236 p2 = subprocess.Popen(['cut', '-f1', "-d "], stdin=p1.stdout, |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
237 stdout=subprocess.PIPE) |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
238 p3 = subprocess.Popen(['cut', '-f1', "-d/"], stdin=p2.stdout, |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
239 stdout=subprocess.PIPE) |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
240 p4 = subprocess.Popen(['sort'], stdin=p3.stdout, |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
241 stdout=subprocess.PIPE) |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
242 p5 = subprocess.Popen(['uniq'], stdin=p4.stdout, stdout=stdout) |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
243 p5.communicate() |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
244 stdout.close() |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
245 print('completed ...') |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
246 |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
247 # build a file of repeat keys for all reads |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
248 print('Writing and processing intermediate files...') |
12
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
249 sorted_bowtie = os.path.join(outputfolder, 'sorted_bowtie') |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
250 sumofrepeatreads = 0 |
0
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
251 readid = {} |
12
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
252 |
0
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
253 for rep in repeat_list: |
12
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
254 for data in import_text( |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
255 f"{os.path.join(sorted_bowtie, rep)}.bowtie", '\t'): |
0
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
256 readid[data[0]] = '' |
12
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
257 |
0
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
258 for rep in repeat_list: |
12
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
259 for data in import_text( |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
260 f"{os.path.join(sorted_bowtie, rep)}.bowtie", '\t'): |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
261 readid[data[0]] += f"{repeat_key[rep]}," |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
262 |
0
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
263 for subfamilies in readid.values(): |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
264 if subfamilies not in counts: |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
265 counts[subfamilies] = 0 |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
266 counts[subfamilies] += 1 |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
267 sumofrepeatreads += 1 |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
268 |
12
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
269 print(f'Identified {sumofrepeatreads} reads that mapped to \ |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
270 repeats for unique and multimappers.') |
0
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
271 print("Conducting final calculations...") |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
272 |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
273 # building the total counts for repeat element enrichment... |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
274 for x in counts.keys(): |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
275 count = counts[x] |
12
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
276 x = x.strip(',').split(',') |
0
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
277 for i in x: |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
278 reptotalcounts[rev_repeat_key[int(i)]] += int(count) |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
279 # building the fractional counts for repeat element enrichment... |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
280 for x in counts.keys(): |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
281 count = counts[x] |
12
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
282 x = x.strip(',') .split(',') |
0
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
283 splits = len(x) |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
284 for i in x: |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
285 fractionalcounts[rev_repeat_key[int(i)]] += float( |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
286 numpy.divide(float(count), float(splits))) |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
287 # building categorized table of repeat element enrichment... |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
288 repcounts = {} |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
289 repcounts['other'] = 0 |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
290 for key in counts.keys(): |
12
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
291 key_list = key.strip(',').split(',') |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
292 repname = '' |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
293 for i in key_list: |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
294 repname = os.path.join(repname, rev_repeat_key[int(i)]) |
10
6f4143893463
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit e3881f05134c6f50889d0376d27e1c232251f8b3
artbio
parents:
4
diff
changeset
|
295 repcounts[repname] = counts[key] |
0
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
296 # building the total counts for class enrichment... |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
297 for key in reptotalcounts.keys(): |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
298 classtotalcounts[repeatclass[key]] += reptotalcounts[key] |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
299 # building total counts for family enrichment... |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
300 for key in reptotalcounts.keys(): |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
301 familytotalcounts[repeatfamily[key]] += reptotalcounts[key] |
12
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
302 # building unique counts table |
0
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
303 repcounts2 = {} |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
304 for rep in repeat_list: |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
305 if "/" + rep in repcounts: |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
306 repcounts2[rep] = repcounts["/" + rep] |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
307 else: |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
308 repcounts2[rep] = 0 |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
309 # building the fractionalcounts counts for class enrichment... |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
310 for key in fractionalcounts.keys(): |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
311 classfractionalcounts[repeatclass[key]] += fractionalcounts[key] |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
312 # building fractional counts for family enrichment... |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
313 for key in fractionalcounts.keys(): |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
314 familyfractionalcounts[repeatfamily[key]] += fractionalcounts[key] |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
315 |
f6f0f1e5e940
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 61e203df0be5ed877ff92b917c7cde6eeeab8310
artbio
parents:
diff
changeset
|
316 # print output to file of the categorized counts and total overlapping counts: |
12
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
317 print('Writing final output...') |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
318 with open(f"{os.path.join(outputfolder, outputfile_prefix)}_" |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
319 f"class_fraction_counts.txt", 'w') as fout: |
2
15e3e29f310e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit c89c33e5ea8fc63f3ea5c0f66ebc5fa822ac734b
artbio
parents:
0
diff
changeset
|
320 for key in sorted(classfractionalcounts.keys()): |
12
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
321 fout.write(f"{key}\t{classfractionalcounts[key]}\n") |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
322 |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
323 with open(f"{os.path.join(outputfolder, outputfile_prefix)}_" |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
324 f"family_fraction_counts.txt", 'w') as fout: |
2
15e3e29f310e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit c89c33e5ea8fc63f3ea5c0f66ebc5fa822ac734b
artbio
parents:
0
diff
changeset
|
325 for key in sorted(familyfractionalcounts.keys()): |
12
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
326 fout.write(f"{key}\t{familyfractionalcounts[key]}\n") |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
327 |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
328 with open(f"{os.path.join(outputfolder, outputfile_prefix)}_" |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
329 f"fraction_counts.txt", 'w') as fout: |
2
15e3e29f310e
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit c89c33e5ea8fc63f3ea5c0f66ebc5fa822ac734b
artbio
parents:
0
diff
changeset
|
330 for key in sorted(fractionalcounts.keys()): |
12
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
331 fout.write(f"{key}\t{repeatclass[key]}\t{repeatfamily[key]}\t" |
89e05f831259
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich commit 212b838f614f1f7b8e770473c026d9c1180722df
artbio
parents:
10
diff
changeset
|
332 f"{int(fractionalcounts[key])}\n") |