annotate RepEnrich.py @ 7:6df84986c146 draft default tip

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 18b0b149cdda97c8d01caeb6debb77002a3ac89f
author drosofff
date Wed, 31 May 2017 16:51:19 -0400
parents 1435d142041b
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
1 #!/usr/bin/env python
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
2 import argparse
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
3 import csv
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
4 import numpy
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
5 import os
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
6 import shlex
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
7 import shutil
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
8 import subprocess
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
9 import sys
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
10
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
11 parser = argparse.ArgumentParser(description='Part II: Conducting the alignments to the psuedogenomes. Before doing this step you will require 1) a bamfile of the unique alignments with index 2) a fastq file of the reads mapping to more than one location. These files can be obtained using the following bowtie options [EXAMPLE: bowtie -S -m 1 --max multimap.fastq mm9 mate1_reads.fastq] Once you have the unique alignment bamfile and the reads mapping to more than one location in a fastq file you can run this step. EXAMPLE: python master_output.py /users/nneretti/data/annotation/hg19/hg19_repeatmasker.txt /users/nneretti/datasets/repeatmapping/POL3/Pol3_human/HeLa_InputChIPseq_Rep1 HeLa_InputChIPseq_Rep1 /users/nneretti/data/annotation/hg19/setup_folder HeLa_InputChIPseq_Rep1_multimap.fastq HeLa_InputChIPseq_Rep1.bam')
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
12 parser.add_argument('--version', action='version', version='%(prog)s 0.1')
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
13 parser.add_argument('annotation_file', action= 'store', metavar='annotation_file', help='List RepeatMasker.org annotation file for your organism. The file may be downloaded from the RepeatMasker.org website. Example: /data/annotation/hg19/hg19_repeatmasker.txt')
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
14 parser.add_argument('outputfolder', action= 'store', metavar='outputfolder', help='List folder to contain results. Example: /outputfolder')
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
15 parser.add_argument('outputprefix', action= 'store', metavar='outputprefix', help='Enter prefix name for data. Example: HeLa_InputChIPseq_Rep1')
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
16 parser.add_argument('setup_folder', action= 'store', metavar='setup_folder', help='List folder that contains the repeat element psuedogenomes. Example /data/annotation/hg19/setup_folder')
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
17 parser.add_argument('fastqfile', action= 'store', metavar='fastqfile', help='Enter file for the fastq reads that map to multiple locations. Example /data/multimap.fastq')
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
18 parser.add_argument('alignment_bam', action= 'store', metavar='alignment_bam', help='Enter bamfile output for reads that map uniquely. Example /bamfiles/old.bam')
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
19 parser.add_argument('--pairedend', action= 'store', dest='pairedend', default= 'FALSE', help='Designate this option for paired-end sequencing. Default FALSE change to TRUE')
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
20 parser.add_argument('--collapserepeat', action= 'store', dest='collapserepeat', metavar='collapserepeat', default= 'Simple_repeat', help='Designate this option to generate a collapsed repeat type. Uncollapsed output is generated in addition to collapsed repeat type. Simple_repeat is default to simplify downstream analysis. You can change the default to another repeat name to collapse a seperate specific repeat instead or if the name of Simple_repeat is different for your organism. Default Simple_repeat')
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
21 parser.add_argument('--fastqfile2', action= 'store', dest='fastqfile2', metavar='fastqfile2', default= 'none', help='Enter fastqfile2 when using paired-end option. Default none')
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
22 parser.add_argument('--cpus', action= 'store', dest='cpus', metavar='cpus', default= "1", type=int, help='Enter available cpus per node. The more cpus the faster RepEnrich performs. RepEnrich is designed to only work on one node. Default: "1"')
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
23 parser.add_argument('--allcountmethod', action= 'store', dest='allcountmethod', metavar='allcountmethod', default= "FALSE", help='By default the pipeline only outputs the fraction count method. Consdidered to be the best way to count multimapped reads. Changing this option will include the unique count method, a conservative count, and the total count method, a liberal counting strategy. Our evaluation of simulated data indicated fraction counting is best. Default = FALSE, change to TRUE')
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
24 parser.add_argument('--is_bed', action= 'store', dest='is_bed', metavar='is_bed', default= 'FALSE', help='Is the annotation file a bed file. This is also a compatible format. The file needs to be a tab seperated bed with optional fields. Ex. format chr\tstart\tend\tName_element\tclass\tfamily. The class and family should identical to name_element if not applicable. Default FALSE change to TRUE')
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
25 args = parser.parse_args()
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
26
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
27 # parameters
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
28 annotation_file = args.annotation_file
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
29 outputfolder = args.outputfolder
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
30 outputfile_prefix = args.outputprefix
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
31 setup_folder = args.setup_folder
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
32 repeat_bed = setup_folder + os.path.sep + 'repnames.bed'
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
33 unique_mapper_bam = args.alignment_bam
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
34 fastqfile_1 = args.fastqfile
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
35 fastqfile_2 = args.fastqfile2
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
36 cpus = args.cpus
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
37 b_opt = "-k1 -p " +str(1) +" --quiet"
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
38 simple_repeat = args.collapserepeat
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
39 paired_end = args.pairedend
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
40 allcountmethod = args.allcountmethod
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
41 is_bed = args.is_bed
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
42
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
43 ################################################################################
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
44 # check that the programs we need are available
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
45 try:
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
46 subprocess.call(shlex.split("coverageBed -h"), stdout=open(os.devnull, 'wb'), stderr=open(os.devnull, 'wb'))
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
47 subprocess.call(shlex.split("bowtie --version"), stdout=open(os.devnull, 'wb'), stderr=open(os.devnull, 'wb'))
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
48 except OSError:
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
49 print ("Error: Bowtie or BEDTools not loaded")
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
50 raise
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
51
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
52 ################################################################################
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
53 # define a csv reader that reads space deliminated files
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
54 print ('Preparing for analysis using RepEnrich...')
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
55 csv.field_size_limit(sys.maxsize)
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
56 def import_text(filename, separator):
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
57 for line in csv.reader(open(filename), delimiter=separator,
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
58 skipinitialspace=True):
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
59 if line:
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
60 yield line
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
61
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
62 ################################################################################
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
63 # build dictionaries to convert repclass and rep families'
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
64 if is_bed == "FALSE":
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
65 repeatclass = {}
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
66 repeatfamily = {}
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
67 fin = import_text(annotation_file, ' ')
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
68 x = 0
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
69 for line in fin:
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
70 if x>2:
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
71 classfamily =[]
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
72 classfamily = line[10].split(os.path.sep)
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
73 line9 = line[9].replace("(","_").replace(")","_").replace("/","_")
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
74 repeatclass[line9] = classfamily[0]
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
75 if len(classfamily) == 2:
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
76 repeatfamily[line9] = classfamily[1]
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
77 else:
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
78 repeatfamily[line9] = classfamily[0]
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
79 x +=1
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
80 if is_bed == "TRUE":
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
81 repeatclass = {}
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
82 repeatfamily = {}
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
83 fin = open(annotation_file, 'r')
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
84 for line in fin:
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
85 line=line.strip('\n')
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
86 line=line.split('\t')
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
87 theclass =line[4]
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
88 thefamily = line[5]
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
89 line3 = line[3].replace("(","_").replace(")","_").replace("/","_")
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
90 repeatclass[line3] = theclass
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
91 repeatfamily[line3] = thefamily
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
92 fin.close()
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
93
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
94 ################################################################################
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
95 # build list of repeats initializing dictionaries for downstream analysis'
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
96 fin = import_text(setup_folder + os.path.sep + 'repgenomes_key.txt', '\t')
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
97 repeat_key ={}
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
98 rev_repeat_key ={}
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
99 repeat_list = []
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
100 reptotalcounts = {}
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
101 classfractionalcounts = {}
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
102 familyfractionalcounts = {}
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
103 classtotalcounts = {}
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
104 familytotalcounts = {}
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
105 reptotalcounts_simple = {}
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
106 fractionalcounts = {}
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
107 i = 0
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
108 for line in fin:
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
109 reptotalcounts[line[0]] = 0
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
110 fractionalcounts[line[0]] = 0
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
111 if line[0] in repeatclass:
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
112 classtotalcounts[repeatclass[line[0]]] = 0
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
113 classfractionalcounts[repeatclass[line[0]]] = 0
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
114 if line[0] in repeatfamily:
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
115 familytotalcounts[repeatfamily[line[0]]] = 0
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
116 familyfractionalcounts[repeatfamily[line[0]]] = 0
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
117 if line[0] in repeatfamily:
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
118 if repeatfamily[line[0]] == simple_repeat:
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
119 reptotalcounts_simple[simple_repeat] = 0
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
120 else:
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
121 reptotalcounts_simple[line[0]] = 0
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
122 repeat_list.append(line[0])
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
123 repeat_key[line[0]] = int(line[1])
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
124 rev_repeat_key[int(line[1])] = line[0]
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
125 fin.close()
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
126 ################################################################################
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
127 # map the repeats to the psuedogenomes:
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
128 if not os.path.exists(outputfolder):
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
129 os.mkdir(outputfolder)
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
130 ################################################################################
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
131 # Conduct the regions sorting
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
132 print ('Conducting region sorting on unique mapping reads....')
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
133 fileout= outputfolder + os.path.sep + outputfile_prefix + '_regionsorter.txt'
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
134 with open(fileout, 'w') as stdout:
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
135 command = shlex.split("coverageBed -abam " +unique_mapper_bam+" -b " +setup_folder + os.path.sep + 'repnames.bed')
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
136 p = subprocess.Popen(command, stdout=stdout)
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
137 p.communicate()
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
138 stdout.close()
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
139 filein = open(outputfolder + os.path.sep + outputfile_prefix + '_regionsorter.txt','r')
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
140 counts = {}
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
141 sumofrepeatreads=0
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
142 for line in filein:
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
143 line= line.split('\t')
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
144 if not str(repeat_key[line[3]]) in counts:
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
145 counts[str(repeat_key[line[3]])]=0
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
146 counts[str(repeat_key[line[3]])]+=int(line[4])
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
147 sumofrepeatreads+=int(line[4])
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
148 print ('Identified ' + str(sumofrepeatreads) + 'unique reads that mapped to repeats.')
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
149 ################################################################################
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
150 if paired_end == 'TRUE':
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
151 if not os.path.exists(outputfolder + os.path.sep + 'pair1_bowtie'):
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
152 os.mkdir(outputfolder + os.path.sep + 'pair1_bowtie')
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
153 if not os.path.exists(outputfolder + os.path.sep + 'pair2_bowtie'):
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
154 os.mkdir(outputfolder + os.path.sep + 'pair2_bowtie')
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
155 folder_pair1 = outputfolder + os.path.sep + 'pair1_bowtie'
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
156 folder_pair2 = outputfolder + os.path.sep + 'pair2_bowtie'
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
157 ################################################################################
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
158 print ("Processing repeat psuedogenomes...")
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
159 ps = []
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
160 psb= []
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
161 ticker= 0
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
162 for metagenome in repeat_list:
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
163 metagenomepath = setup_folder + os.path.sep + metagenome
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
164 file1=folder_pair1 + os.path.sep + metagenome + '.bowtie'
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
165 file2 =folder_pair2 + os.path.sep + metagenome + '.bowtie'
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
166 with open(file1, 'w') as stdout:
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
167 command = shlex.split("bowtie " + b_opt + " " + metagenomepath + " " + fastqfile_1)
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
168 p = subprocess.Popen(command,stdout=stdout)
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
169 with open(file2, 'w') as stdout:
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
170 command = shlex.split("bowtie " + b_opt + " " + metagenomepath + " " + fastqfile_2)
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
171 pp = subprocess.Popen(command,stdout=stdout)
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
172 ps.append(p)
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
173 ticker +=1
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
174 psb.append(pp)
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
175 ticker +=1
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
176 if ticker == cpus:
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
177 for p in ps:
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
178 p.communicate()
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
179 for p in psb:
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
180 p.communicate()
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
181 ticker = 0
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
182 psb =[]
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
183 ps = []
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
184 if len(ps) > 0:
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
185 for p in ps:
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
186 p.communicate()
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
187 stdout.close()
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
188
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
189 ################################################################################
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
190 # combine the output from both read pairs:
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
191 print ('sorting and combining the output for both read pairs...')
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
192 if not os.path.exists(outputfolder + os.path.sep + 'sorted_bowtie'):
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
193 os.mkdir(outputfolder + os.path.sep + 'sorted_bowtie')
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
194 sorted_bowtie = outputfolder + os.path.sep + 'sorted_bowtie'
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
195 for metagenome in repeat_list:
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
196 file1 = folder_pair1 + os.path.sep + metagenome + '.bowtie'
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
197 file2 = folder_pair2 + os.path.sep + metagenome + '.bowtie'
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
198 fileout= sorted_bowtie + os.path.sep + metagenome + '.bowtie'
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
199 with open(fileout, 'w') as stdout:
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
200 p1 = subprocess.Popen(['cat',file1,file2], stdout = subprocess.PIPE)
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
201 p2 = subprocess.Popen(['cut', '-f1',"-d "], stdin = p1.stdout, stdout = subprocess.PIPE)
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
202 p3 = subprocess.Popen(['cut', '-f1', "-d/"], stdin = p2.stdout, stdout = subprocess.PIPE)
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
203 p4 = subprocess.Popen(['sort'], stdin=p3.stdout, stdout = subprocess.PIPE)
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
204 p5 = subprocess.Popen(['uniq'], stdin=p4.stdout, stdout = stdout)
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
205 p5.communicate()
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
206 stdout.close()
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
207 print ('completed ...')
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
208 ################################################################################
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
209 if paired_end == 'FALSE':
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
210 if not os.path.exists(outputfolder + os.path.sep + 'pair1_bowtie'):
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
211 os.mkdir(outputfolder + os.path.sep + 'pair1_bowtie')
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
212 folder_pair1 = outputfolder + os.path.sep + 'pair1_bowtie'
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
213 ################################################################################
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
214 ps = []
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
215 ticker= 0
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
216 print ("Processing repeat psuedogenomes...")
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
217 for metagenome in repeat_list:
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
218 metagenomepath = setup_folder + os.path.sep + metagenome
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
219 file1=folder_pair1 + os.path.sep + metagenome + '.bowtie'
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
220 with open(file1, 'w') as stdout:
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
221 command = shlex.split("bowtie " + b_opt + " " + metagenomepath + " " + fastqfile_1)
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
222 p = subprocess.Popen(command,stdout=stdout)
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
223 ps.append(p)
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
224 ticker +=1
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
225 if ticker == cpus:
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
226 for p in ps:
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
227 p.communicate()
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
228 ticker = 0
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
229 ps = []
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
230 if len(ps) > 0:
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
231 for p in ps:
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
232 p.communicate()
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
233 stdout.close()
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
234
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
235 ################################################################################
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
236 # combine the output from both read pairs:
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
237 print ('Sorting and combining the output for both read pairs....')
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
238 if not os.path.exists(outputfolder + os.path.sep + 'sorted_bowtie'):
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
239 os.mkdir(outputfolder + os.path.sep + 'sorted_bowtie')
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
240 sorted_bowtie = outputfolder + os.path.sep + 'sorted_bowtie'
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
241 for metagenome in repeat_list:
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
242 file1 = folder_pair1 + os.path.sep + metagenome + '.bowtie'
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
243 fileout= sorted_bowtie + os.path.sep + metagenome + '.bowtie'
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
244 with open(fileout, 'w') as stdout:
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
245 p1 = subprocess.Popen(['cat',file1], stdout = subprocess.PIPE)
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
246 p2 = subprocess.Popen(['cut', '-f1'], stdin = p1.stdout, stdout = subprocess.PIPE)
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
247 p3 = subprocess.Popen(['cut', '-f1', "-d/"], stdin = p2.stdout, stdout = subprocess.PIPE)
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
248 p4 = subprocess.Popen(['sort'], stdin = p3.stdout,stdout = subprocess.PIPE)
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
249 p5 = subprocess.Popen(['uniq'], stdin = p4.stdout,stdout = stdout)
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
250 p5.communicate()
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
251 stdout.close()
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
252 print ('completed ...')
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
253
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
254 ################################################################################
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
255 # build a file of repeat keys for all reads
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
256 print ('Writing and processing intermediate files...')
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
257 sorted_bowtie = outputfolder + os.path.sep + 'sorted_bowtie'
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
258 readid = {}
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
259 sumofrepeatreads=0
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
260 for rep in repeat_list:
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
261 for data in import_text(sorted_bowtie + os.path.sep + rep + '.bowtie', '\t'):
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
262 readid[data[0]] = ''
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
263 for rep in repeat_list:
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
264 for data in import_text(sorted_bowtie + os.path.sep + rep + '.bowtie', '\t'):
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
265 readid[data[0]]+=str(repeat_key[rep]) + str(',')
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
266 for subfamilies in readid.values():
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
267 if not subfamilies in counts:
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
268 counts[subfamilies]=0
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
269 counts[subfamilies] +=1
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
270 sumofrepeatreads+=1
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
271 del readid
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
272 print ('Identified ' + str(sumofrepeatreads) + ' reads that mapped to repeats for unique and multimappers.')
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
273
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
274 ################################################################################
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
275 print ("Conducting final calculations...")
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
276 # build a converter to numeric label for repeat and yield a combined list of repnames seperated by backslash
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
277 def convert(x):
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
278 x = x.strip(',')
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
279 x = x.split(',')
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
280 global repname
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
281 repname = ""
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
282 for i in x:
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
283 repname = repname + os.path.sep + rev_repeat_key[int(i)]
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
284 # building the total counts for repeat element enrichment...
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
285 for x in counts.keys():
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
286 count= counts[x]
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
287 x = x.strip(',')
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
288 x = x.split(',')
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
289 for i in x:
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
290 reptotalcounts[rev_repeat_key[int(i)]] += int(count)
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
291 # building the fractional counts for repeat element enrichment...
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
292 for x in counts.keys():
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
293 count= counts[x]
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
294 x = x.strip(',')
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
295 x = x.split(',')
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
296 splits = len(x)
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
297 for i in x:
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
298 fractionalcounts[rev_repeat_key[int(i)]] += float(numpy.divide(float(count),float(splits)))
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
299 # building categorized table of repeat element enrichment...
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
300 repcounts = {}
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
301 repcounts['other'] = 0
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
302 for key in counts.keys():
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
303 convert(key)
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
304 repcounts[repname] = counts[key]
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
305 # building the total counts for class enrichment...
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
306 for key in reptotalcounts.keys():
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
307 classtotalcounts[repeatclass[key]] += reptotalcounts[key]
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
308 # building total counts for family enrichment...
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
309 for key in reptotalcounts.keys():
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
310 familytotalcounts[repeatfamily[key]] += reptotalcounts[key]
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
311 # building unique counts table'
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
312 repcounts2 = {}
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
313 for rep in repeat_list:
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
314 if "/" +rep in repcounts:
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
315 repcounts2[rep] = repcounts["/" +rep]
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
316 else:
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
317 repcounts2[rep] = 0
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
318 # building the fractionalcounts counts for class enrichment...
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
319 for key in fractionalcounts.keys():
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
320 classfractionalcounts[repeatclass[key]] += fractionalcounts[key]
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
321 # building fractional counts for family enrichment...
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
322 for key in fractionalcounts.keys():
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
323 familyfractionalcounts[repeatfamily[key]] += fractionalcounts[key]
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
324
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
325 ################################################################################
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
326 print ('Writing final output and removing intermediate files...')
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
327 # print output to file of the categorized counts and total overlapping counts:
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
328 if allcountmethod == "TRUE":
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
329 fout1 = open(outputfolder + os.path.sep + outputfile_prefix + '_total_counts.txt' , 'w')
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
330 for key in reptotalcounts.keys():
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
331 fout1.write(str(key) + '\t' + repeatclass[key] + '\t' + repeatfamily[key] + '\t' + str(reptotalcounts[key]) + '\n')
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
332 fout2 = open(outputfolder + os.path.sep + outputfile_prefix + '_class_total_counts.txt' , 'w')
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
333 for key in classtotalcounts.keys():
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
334 fout2.write(str(key) + '\t' + str(classtotalcounts[key]) + '\n')
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
335 fout3 = open(outputfolder + os.path.sep + outputfile_prefix + '_family_total_counts.txt' , 'w')
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
336 for key in familytotalcounts.keys():
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
337 fout3.write(str(key) + '\t' + str(familytotalcounts[key]) + '\n')
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
338 fout4 = open(outputfolder + os.path.sep + outputfile_prefix + '_unique_counts.txt' , 'w')
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
339 for key in repcounts2.keys():
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
340 fout4.write(str(key) + '\t' + repeatclass[key] + '\t' + repeatfamily[key] + '\t' + str(repcounts2[key]) + '\n')
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
341 fout5 = open(outputfolder + os.path.sep + outputfile_prefix + '_class_fraction_counts.txt' , 'w')
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
342 for key in classfractionalcounts.keys():
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
343 fout5.write(str(key) + '\t' + str(classfractionalcounts[key]) + '\n')
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
344 fout6 = open(outputfolder + os.path.sep + outputfile_prefix + '_family_fraction_counts.txt' , 'w')
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
345 for key in familyfractionalcounts.keys():
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
346 fout6.write(str(key) + '\t' + str(familyfractionalcounts[key]) + '\n')
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
347 fout7 = open(outputfolder + os.path.sep + outputfile_prefix + '_fraction_counts.txt' , 'w')
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
348 for key in fractionalcounts.keys():
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
349 fout7.write(str(key) + '\t' + repeatclass[key] + '\t' + repeatfamily[key] + '\t' + str(int(fractionalcounts[key])) + '\n')
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
350 fout1.close()
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
351 fout2.close()
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
352 fout3.close()
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
353 fout4.close()
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
354 fout5.close()
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
355 fout6.close()
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
356 fout7.close()
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
357 else:
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
358 fout1 = open(outputfolder + os.path.sep + outputfile_prefix + '_class_fraction_counts.txt' , 'w')
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
359 for key in classfractionalcounts.keys():
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
360 fout1.write(str(key) + '\t' + str(classfractionalcounts[key]) + '\n')
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
361 fout2 = open(outputfolder + os.path.sep + outputfile_prefix + '_family_fraction_counts.txt' , 'w')
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
362 for key in familyfractionalcounts.keys():
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
363 fout2.write(str(key) + '\t' + str(familyfractionalcounts[key])+ '\n')
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
364 fout3 = open(outputfolder + os.path.sep + outputfile_prefix + '_fraction_counts.txt' , 'w')
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
365 for key in fractionalcounts.keys():
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
366 fout3.write(str(key) + '\t' + repeatclass[key] + '\t' + repeatfamily[key] + '\t' + str(int(fractionalcounts[key])) + '\n')
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
367 fout1.close()
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
368 fout2.close()
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
369 fout3.close()
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
370
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
371 ################################################################################
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
372 # Remove Large intermediate files
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
373 if os.path.exists(outputfolder + os.path.sep + outputfile_prefix + '_regionsorter.txt'):
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
374 os.remove(outputfolder + os.path.sep + outputfile_prefix + '_regionsorter.txt')
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
375 if os.path.exists(outputfolder + os.path.sep + 'pair1_bowtie'):
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
376 shutil.rmtree(outputfolder + os.path.sep + 'pair1_bowtie')
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
377 if os.path.exists(outputfolder + os.path.sep + 'pair2_bowtie'):
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
378 shutil.rmtree(outputfolder + os.path.sep + 'pair2_bowtie')
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
379 if os.path.exists(outputfolder + os.path.sep + 'sorted_bowtie'):
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
380 shutil.rmtree(outputfolder + os.path.sep + 'sorted_bowtie')
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
381
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
382 print ("... Done")