Mercurial > repos > drosofff > repenrich
annotate RepEnrich.py @ 0:1435d142041b draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
author | drosofff |
---|---|
date | Tue, 23 May 2017 18:37:22 -0400 |
parents | |
children |
rev | line source |
---|---|
0
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
1 #!/usr/bin/env python |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
2 import argparse |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
3 import csv |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
4 import numpy |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
5 import os |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
6 import shlex |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
7 import shutil |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
8 import subprocess |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
9 import sys |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
10 |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
11 parser = argparse.ArgumentParser(description='Part II: Conducting the alignments to the psuedogenomes. Before doing this step you will require 1) a bamfile of the unique alignments with index 2) a fastq file of the reads mapping to more than one location. These files can be obtained using the following bowtie options [EXAMPLE: bowtie -S -m 1 --max multimap.fastq mm9 mate1_reads.fastq] Once you have the unique alignment bamfile and the reads mapping to more than one location in a fastq file you can run this step. EXAMPLE: python master_output.py /users/nneretti/data/annotation/hg19/hg19_repeatmasker.txt /users/nneretti/datasets/repeatmapping/POL3/Pol3_human/HeLa_InputChIPseq_Rep1 HeLa_InputChIPseq_Rep1 /users/nneretti/data/annotation/hg19/setup_folder HeLa_InputChIPseq_Rep1_multimap.fastq HeLa_InputChIPseq_Rep1.bam') |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
12 parser.add_argument('--version', action='version', version='%(prog)s 0.1') |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
13 parser.add_argument('annotation_file', action= 'store', metavar='annotation_file', help='List RepeatMasker.org annotation file for your organism. The file may be downloaded from the RepeatMasker.org website. Example: /data/annotation/hg19/hg19_repeatmasker.txt') |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
14 parser.add_argument('outputfolder', action= 'store', metavar='outputfolder', help='List folder to contain results. Example: /outputfolder') |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
15 parser.add_argument('outputprefix', action= 'store', metavar='outputprefix', help='Enter prefix name for data. Example: HeLa_InputChIPseq_Rep1') |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
16 parser.add_argument('setup_folder', action= 'store', metavar='setup_folder', help='List folder that contains the repeat element psuedogenomes. Example /data/annotation/hg19/setup_folder') |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
17 parser.add_argument('fastqfile', action= 'store', metavar='fastqfile', help='Enter file for the fastq reads that map to multiple locations. Example /data/multimap.fastq') |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
18 parser.add_argument('alignment_bam', action= 'store', metavar='alignment_bam', help='Enter bamfile output for reads that map uniquely. Example /bamfiles/old.bam') |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
19 parser.add_argument('--pairedend', action= 'store', dest='pairedend', default= 'FALSE', help='Designate this option for paired-end sequencing. Default FALSE change to TRUE') |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
20 parser.add_argument('--collapserepeat', action= 'store', dest='collapserepeat', metavar='collapserepeat', default= 'Simple_repeat', help='Designate this option to generate a collapsed repeat type. Uncollapsed output is generated in addition to collapsed repeat type. Simple_repeat is default to simplify downstream analysis. You can change the default to another repeat name to collapse a seperate specific repeat instead or if the name of Simple_repeat is different for your organism. Default Simple_repeat') |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
21 parser.add_argument('--fastqfile2', action= 'store', dest='fastqfile2', metavar='fastqfile2', default= 'none', help='Enter fastqfile2 when using paired-end option. Default none') |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
22 parser.add_argument('--cpus', action= 'store', dest='cpus', metavar='cpus', default= "1", type=int, help='Enter available cpus per node. The more cpus the faster RepEnrich performs. RepEnrich is designed to only work on one node. Default: "1"') |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
23 parser.add_argument('--allcountmethod', action= 'store', dest='allcountmethod', metavar='allcountmethod', default= "FALSE", help='By default the pipeline only outputs the fraction count method. Consdidered to be the best way to count multimapped reads. Changing this option will include the unique count method, a conservative count, and the total count method, a liberal counting strategy. Our evaluation of simulated data indicated fraction counting is best. Default = FALSE, change to TRUE') |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
24 parser.add_argument('--is_bed', action= 'store', dest='is_bed', metavar='is_bed', default= 'FALSE', help='Is the annotation file a bed file. This is also a compatible format. The file needs to be a tab seperated bed with optional fields. Ex. format chr\tstart\tend\tName_element\tclass\tfamily. The class and family should identical to name_element if not applicable. Default FALSE change to TRUE') |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
25 args = parser.parse_args() |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
26 |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
27 # parameters |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
28 annotation_file = args.annotation_file |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
29 outputfolder = args.outputfolder |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
30 outputfile_prefix = args.outputprefix |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
31 setup_folder = args.setup_folder |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
32 repeat_bed = setup_folder + os.path.sep + 'repnames.bed' |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
33 unique_mapper_bam = args.alignment_bam |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
34 fastqfile_1 = args.fastqfile |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
35 fastqfile_2 = args.fastqfile2 |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
36 cpus = args.cpus |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
37 b_opt = "-k1 -p " +str(1) +" --quiet" |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
38 simple_repeat = args.collapserepeat |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
39 paired_end = args.pairedend |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
40 allcountmethod = args.allcountmethod |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
41 is_bed = args.is_bed |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
42 |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
43 ################################################################################ |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
44 # check that the programs we need are available |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
45 try: |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
46 subprocess.call(shlex.split("coverageBed -h"), stdout=open(os.devnull, 'wb'), stderr=open(os.devnull, 'wb')) |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
47 subprocess.call(shlex.split("bowtie --version"), stdout=open(os.devnull, 'wb'), stderr=open(os.devnull, 'wb')) |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
48 except OSError: |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
49 print ("Error: Bowtie or BEDTools not loaded") |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
50 raise |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
51 |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
52 ################################################################################ |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
53 # define a csv reader that reads space deliminated files |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
54 print ('Preparing for analysis using RepEnrich...') |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
55 csv.field_size_limit(sys.maxsize) |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
56 def import_text(filename, separator): |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
57 for line in csv.reader(open(filename), delimiter=separator, |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
58 skipinitialspace=True): |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
59 if line: |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
60 yield line |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
61 |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
62 ################################################################################ |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
63 # build dictionaries to convert repclass and rep families' |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
64 if is_bed == "FALSE": |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
65 repeatclass = {} |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
66 repeatfamily = {} |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
67 fin = import_text(annotation_file, ' ') |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
68 x = 0 |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
69 for line in fin: |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
70 if x>2: |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
71 classfamily =[] |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
72 classfamily = line[10].split(os.path.sep) |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
73 line9 = line[9].replace("(","_").replace(")","_").replace("/","_") |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
74 repeatclass[line9] = classfamily[0] |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
75 if len(classfamily) == 2: |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
76 repeatfamily[line9] = classfamily[1] |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
77 else: |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
78 repeatfamily[line9] = classfamily[0] |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
79 x +=1 |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
80 if is_bed == "TRUE": |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
81 repeatclass = {} |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
82 repeatfamily = {} |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
83 fin = open(annotation_file, 'r') |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
84 for line in fin: |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
85 line=line.strip('\n') |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
86 line=line.split('\t') |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
87 theclass =line[4] |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
88 thefamily = line[5] |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
89 line3 = line[3].replace("(","_").replace(")","_").replace("/","_") |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
90 repeatclass[line3] = theclass |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
91 repeatfamily[line3] = thefamily |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
92 fin.close() |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
93 |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
94 ################################################################################ |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
95 # build list of repeats initializing dictionaries for downstream analysis' |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
96 fin = import_text(setup_folder + os.path.sep + 'repgenomes_key.txt', '\t') |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
97 repeat_key ={} |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
98 rev_repeat_key ={} |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
99 repeat_list = [] |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
100 reptotalcounts = {} |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
101 classfractionalcounts = {} |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
102 familyfractionalcounts = {} |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
103 classtotalcounts = {} |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
104 familytotalcounts = {} |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
105 reptotalcounts_simple = {} |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
106 fractionalcounts = {} |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
107 i = 0 |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
108 for line in fin: |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
109 reptotalcounts[line[0]] = 0 |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
110 fractionalcounts[line[0]] = 0 |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
111 if line[0] in repeatclass: |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
112 classtotalcounts[repeatclass[line[0]]] = 0 |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
113 classfractionalcounts[repeatclass[line[0]]] = 0 |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
114 if line[0] in repeatfamily: |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
115 familytotalcounts[repeatfamily[line[0]]] = 0 |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
116 familyfractionalcounts[repeatfamily[line[0]]] = 0 |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
117 if line[0] in repeatfamily: |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
118 if repeatfamily[line[0]] == simple_repeat: |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
119 reptotalcounts_simple[simple_repeat] = 0 |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
120 else: |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
121 reptotalcounts_simple[line[0]] = 0 |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
122 repeat_list.append(line[0]) |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
123 repeat_key[line[0]] = int(line[1]) |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
124 rev_repeat_key[int(line[1])] = line[0] |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
125 fin.close() |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
126 ################################################################################ |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
127 # map the repeats to the psuedogenomes: |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
128 if not os.path.exists(outputfolder): |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
129 os.mkdir(outputfolder) |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
130 ################################################################################ |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
131 # Conduct the regions sorting |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
132 print ('Conducting region sorting on unique mapping reads....') |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
133 fileout= outputfolder + os.path.sep + outputfile_prefix + '_regionsorter.txt' |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
134 with open(fileout, 'w') as stdout: |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
135 command = shlex.split("coverageBed -abam " +unique_mapper_bam+" -b " +setup_folder + os.path.sep + 'repnames.bed') |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
136 p = subprocess.Popen(command, stdout=stdout) |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
137 p.communicate() |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
138 stdout.close() |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
139 filein = open(outputfolder + os.path.sep + outputfile_prefix + '_regionsorter.txt','r') |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
140 counts = {} |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
141 sumofrepeatreads=0 |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
142 for line in filein: |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
143 line= line.split('\t') |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
144 if not str(repeat_key[line[3]]) in counts: |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
145 counts[str(repeat_key[line[3]])]=0 |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
146 counts[str(repeat_key[line[3]])]+=int(line[4]) |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
147 sumofrepeatreads+=int(line[4]) |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
148 print ('Identified ' + str(sumofrepeatreads) + 'unique reads that mapped to repeats.') |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
149 ################################################################################ |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
150 if paired_end == 'TRUE': |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
151 if not os.path.exists(outputfolder + os.path.sep + 'pair1_bowtie'): |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
152 os.mkdir(outputfolder + os.path.sep + 'pair1_bowtie') |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
153 if not os.path.exists(outputfolder + os.path.sep + 'pair2_bowtie'): |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
154 os.mkdir(outputfolder + os.path.sep + 'pair2_bowtie') |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
155 folder_pair1 = outputfolder + os.path.sep + 'pair1_bowtie' |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
156 folder_pair2 = outputfolder + os.path.sep + 'pair2_bowtie' |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
157 ################################################################################ |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
158 print ("Processing repeat psuedogenomes...") |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
159 ps = [] |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
160 psb= [] |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
161 ticker= 0 |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
162 for metagenome in repeat_list: |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
163 metagenomepath = setup_folder + os.path.sep + metagenome |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
164 file1=folder_pair1 + os.path.sep + metagenome + '.bowtie' |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
165 file2 =folder_pair2 + os.path.sep + metagenome + '.bowtie' |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
166 with open(file1, 'w') as stdout: |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
167 command = shlex.split("bowtie " + b_opt + " " + metagenomepath + " " + fastqfile_1) |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
168 p = subprocess.Popen(command,stdout=stdout) |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
169 with open(file2, 'w') as stdout: |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
170 command = shlex.split("bowtie " + b_opt + " " + metagenomepath + " " + fastqfile_2) |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
171 pp = subprocess.Popen(command,stdout=stdout) |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
172 ps.append(p) |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
173 ticker +=1 |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
174 psb.append(pp) |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
175 ticker +=1 |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
176 if ticker == cpus: |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
177 for p in ps: |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
178 p.communicate() |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
179 for p in psb: |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
180 p.communicate() |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
181 ticker = 0 |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
182 psb =[] |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
183 ps = [] |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
184 if len(ps) > 0: |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
185 for p in ps: |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
186 p.communicate() |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
187 stdout.close() |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
188 |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
189 ################################################################################ |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
190 # combine the output from both read pairs: |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
191 print ('sorting and combining the output for both read pairs...') |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
192 if not os.path.exists(outputfolder + os.path.sep + 'sorted_bowtie'): |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
193 os.mkdir(outputfolder + os.path.sep + 'sorted_bowtie') |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
194 sorted_bowtie = outputfolder + os.path.sep + 'sorted_bowtie' |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
195 for metagenome in repeat_list: |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
196 file1 = folder_pair1 + os.path.sep + metagenome + '.bowtie' |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
197 file2 = folder_pair2 + os.path.sep + metagenome + '.bowtie' |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
198 fileout= sorted_bowtie + os.path.sep + metagenome + '.bowtie' |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
199 with open(fileout, 'w') as stdout: |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
200 p1 = subprocess.Popen(['cat',file1,file2], stdout = subprocess.PIPE) |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
201 p2 = subprocess.Popen(['cut', '-f1',"-d "], stdin = p1.stdout, stdout = subprocess.PIPE) |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
202 p3 = subprocess.Popen(['cut', '-f1', "-d/"], stdin = p2.stdout, stdout = subprocess.PIPE) |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
203 p4 = subprocess.Popen(['sort'], stdin=p3.stdout, stdout = subprocess.PIPE) |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
204 p5 = subprocess.Popen(['uniq'], stdin=p4.stdout, stdout = stdout) |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
205 p5.communicate() |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
206 stdout.close() |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
207 print ('completed ...') |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
208 ################################################################################ |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
209 if paired_end == 'FALSE': |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
210 if not os.path.exists(outputfolder + os.path.sep + 'pair1_bowtie'): |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
211 os.mkdir(outputfolder + os.path.sep + 'pair1_bowtie') |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
212 folder_pair1 = outputfolder + os.path.sep + 'pair1_bowtie' |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
213 ################################################################################ |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
214 ps = [] |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
215 ticker= 0 |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
216 print ("Processing repeat psuedogenomes...") |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
217 for metagenome in repeat_list: |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
218 metagenomepath = setup_folder + os.path.sep + metagenome |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
219 file1=folder_pair1 + os.path.sep + metagenome + '.bowtie' |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
220 with open(file1, 'w') as stdout: |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
221 command = shlex.split("bowtie " + b_opt + " " + metagenomepath + " " + fastqfile_1) |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
222 p = subprocess.Popen(command,stdout=stdout) |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
223 ps.append(p) |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
224 ticker +=1 |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
225 if ticker == cpus: |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
226 for p in ps: |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
227 p.communicate() |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
228 ticker = 0 |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
229 ps = [] |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
230 if len(ps) > 0: |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
231 for p in ps: |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
232 p.communicate() |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
233 stdout.close() |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
234 |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
235 ################################################################################ |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
236 # combine the output from both read pairs: |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
237 print ('Sorting and combining the output for both read pairs....') |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
238 if not os.path.exists(outputfolder + os.path.sep + 'sorted_bowtie'): |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
239 os.mkdir(outputfolder + os.path.sep + 'sorted_bowtie') |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
240 sorted_bowtie = outputfolder + os.path.sep + 'sorted_bowtie' |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
241 for metagenome in repeat_list: |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
242 file1 = folder_pair1 + os.path.sep + metagenome + '.bowtie' |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
243 fileout= sorted_bowtie + os.path.sep + metagenome + '.bowtie' |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
244 with open(fileout, 'w') as stdout: |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
245 p1 = subprocess.Popen(['cat',file1], stdout = subprocess.PIPE) |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
246 p2 = subprocess.Popen(['cut', '-f1'], stdin = p1.stdout, stdout = subprocess.PIPE) |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
247 p3 = subprocess.Popen(['cut', '-f1', "-d/"], stdin = p2.stdout, stdout = subprocess.PIPE) |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
248 p4 = subprocess.Popen(['sort'], stdin = p3.stdout,stdout = subprocess.PIPE) |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
249 p5 = subprocess.Popen(['uniq'], stdin = p4.stdout,stdout = stdout) |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
250 p5.communicate() |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
251 stdout.close() |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
252 print ('completed ...') |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
253 |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
254 ################################################################################ |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
255 # build a file of repeat keys for all reads |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
256 print ('Writing and processing intermediate files...') |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
257 sorted_bowtie = outputfolder + os.path.sep + 'sorted_bowtie' |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
258 readid = {} |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
259 sumofrepeatreads=0 |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
260 for rep in repeat_list: |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
261 for data in import_text(sorted_bowtie + os.path.sep + rep + '.bowtie', '\t'): |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
262 readid[data[0]] = '' |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
263 for rep in repeat_list: |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
264 for data in import_text(sorted_bowtie + os.path.sep + rep + '.bowtie', '\t'): |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
265 readid[data[0]]+=str(repeat_key[rep]) + str(',') |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
266 for subfamilies in readid.values(): |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
267 if not subfamilies in counts: |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
268 counts[subfamilies]=0 |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
269 counts[subfamilies] +=1 |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
270 sumofrepeatreads+=1 |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
271 del readid |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
272 print ('Identified ' + str(sumofrepeatreads) + ' reads that mapped to repeats for unique and multimappers.') |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
273 |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
274 ################################################################################ |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
275 print ("Conducting final calculations...") |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
276 # build a converter to numeric label for repeat and yield a combined list of repnames seperated by backslash |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
277 def convert(x): |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
278 x = x.strip(',') |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
279 x = x.split(',') |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
280 global repname |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
281 repname = "" |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
282 for i in x: |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
283 repname = repname + os.path.sep + rev_repeat_key[int(i)] |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
284 # building the total counts for repeat element enrichment... |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
285 for x in counts.keys(): |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
286 count= counts[x] |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
287 x = x.strip(',') |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
288 x = x.split(',') |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
289 for i in x: |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
290 reptotalcounts[rev_repeat_key[int(i)]] += int(count) |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
291 # building the fractional counts for repeat element enrichment... |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
292 for x in counts.keys(): |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
293 count= counts[x] |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
294 x = x.strip(',') |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
295 x = x.split(',') |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
296 splits = len(x) |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
297 for i in x: |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
298 fractionalcounts[rev_repeat_key[int(i)]] += float(numpy.divide(float(count),float(splits))) |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
299 # building categorized table of repeat element enrichment... |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
300 repcounts = {} |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
301 repcounts['other'] = 0 |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
302 for key in counts.keys(): |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
303 convert(key) |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
304 repcounts[repname] = counts[key] |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
305 # building the total counts for class enrichment... |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
306 for key in reptotalcounts.keys(): |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
307 classtotalcounts[repeatclass[key]] += reptotalcounts[key] |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
308 # building total counts for family enrichment... |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
309 for key in reptotalcounts.keys(): |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
310 familytotalcounts[repeatfamily[key]] += reptotalcounts[key] |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
311 # building unique counts table' |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
312 repcounts2 = {} |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
313 for rep in repeat_list: |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
314 if "/" +rep in repcounts: |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
315 repcounts2[rep] = repcounts["/" +rep] |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
316 else: |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
317 repcounts2[rep] = 0 |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
318 # building the fractionalcounts counts for class enrichment... |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
319 for key in fractionalcounts.keys(): |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
320 classfractionalcounts[repeatclass[key]] += fractionalcounts[key] |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
321 # building fractional counts for family enrichment... |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
322 for key in fractionalcounts.keys(): |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
323 familyfractionalcounts[repeatfamily[key]] += fractionalcounts[key] |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
324 |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
325 ################################################################################ |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
326 print ('Writing final output and removing intermediate files...') |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
327 # print output to file of the categorized counts and total overlapping counts: |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
328 if allcountmethod == "TRUE": |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
329 fout1 = open(outputfolder + os.path.sep + outputfile_prefix + '_total_counts.txt' , 'w') |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
330 for key in reptotalcounts.keys(): |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
331 fout1.write(str(key) + '\t' + repeatclass[key] + '\t' + repeatfamily[key] + '\t' + str(reptotalcounts[key]) + '\n') |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
332 fout2 = open(outputfolder + os.path.sep + outputfile_prefix + '_class_total_counts.txt' , 'w') |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
333 for key in classtotalcounts.keys(): |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
334 fout2.write(str(key) + '\t' + str(classtotalcounts[key]) + '\n') |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
335 fout3 = open(outputfolder + os.path.sep + outputfile_prefix + '_family_total_counts.txt' , 'w') |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
336 for key in familytotalcounts.keys(): |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
337 fout3.write(str(key) + '\t' + str(familytotalcounts[key]) + '\n') |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
338 fout4 = open(outputfolder + os.path.sep + outputfile_prefix + '_unique_counts.txt' , 'w') |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
339 for key in repcounts2.keys(): |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
340 fout4.write(str(key) + '\t' + repeatclass[key] + '\t' + repeatfamily[key] + '\t' + str(repcounts2[key]) + '\n') |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
341 fout5 = open(outputfolder + os.path.sep + outputfile_prefix + '_class_fraction_counts.txt' , 'w') |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
342 for key in classfractionalcounts.keys(): |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
343 fout5.write(str(key) + '\t' + str(classfractionalcounts[key]) + '\n') |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
344 fout6 = open(outputfolder + os.path.sep + outputfile_prefix + '_family_fraction_counts.txt' , 'w') |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
345 for key in familyfractionalcounts.keys(): |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
346 fout6.write(str(key) + '\t' + str(familyfractionalcounts[key]) + '\n') |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
347 fout7 = open(outputfolder + os.path.sep + outputfile_prefix + '_fraction_counts.txt' , 'w') |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
348 for key in fractionalcounts.keys(): |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
349 fout7.write(str(key) + '\t' + repeatclass[key] + '\t' + repeatfamily[key] + '\t' + str(int(fractionalcounts[key])) + '\n') |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
350 fout1.close() |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
351 fout2.close() |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
352 fout3.close() |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
353 fout4.close() |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
354 fout5.close() |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
355 fout6.close() |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
356 fout7.close() |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
357 else: |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
358 fout1 = open(outputfolder + os.path.sep + outputfile_prefix + '_class_fraction_counts.txt' , 'w') |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
359 for key in classfractionalcounts.keys(): |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
360 fout1.write(str(key) + '\t' + str(classfractionalcounts[key]) + '\n') |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
361 fout2 = open(outputfolder + os.path.sep + outputfile_prefix + '_family_fraction_counts.txt' , 'w') |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
362 for key in familyfractionalcounts.keys(): |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
363 fout2.write(str(key) + '\t' + str(familyfractionalcounts[key])+ '\n') |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
364 fout3 = open(outputfolder + os.path.sep + outputfile_prefix + '_fraction_counts.txt' , 'w') |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
365 for key in fractionalcounts.keys(): |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
366 fout3.write(str(key) + '\t' + repeatclass[key] + '\t' + repeatfamily[key] + '\t' + str(int(fractionalcounts[key])) + '\n') |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
367 fout1.close() |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
368 fout2.close() |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
369 fout3.close() |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
370 |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
371 ################################################################################ |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
372 # Remove Large intermediate files |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
373 if os.path.exists(outputfolder + os.path.sep + outputfile_prefix + '_regionsorter.txt'): |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
374 os.remove(outputfolder + os.path.sep + outputfile_prefix + '_regionsorter.txt') |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
375 if os.path.exists(outputfolder + os.path.sep + 'pair1_bowtie'): |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
376 shutil.rmtree(outputfolder + os.path.sep + 'pair1_bowtie') |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
377 if os.path.exists(outputfolder + os.path.sep + 'pair2_bowtie'): |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
378 shutil.rmtree(outputfolder + os.path.sep + 'pair2_bowtie') |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
379 if os.path.exists(outputfolder + os.path.sep + 'sorted_bowtie'): |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
380 shutil.rmtree(outputfolder + os.path.sep + 'sorted_bowtie') |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
381 |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
382 print ("... Done") |