Mercurial > repos > drosofff > repenrich
annotate RepEnrich_setup.py @ 6:77807fc5d609 draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit e9282183531bacbb4bbe2d6e53258f6e0ad0bc34
author | drosofff |
---|---|
date | Wed, 31 May 2017 13:09:15 -0400 |
parents | 1435d142041b |
children |
rev | line source |
---|---|
0
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
1 #!/usr/bin/env python |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
2 import argparse |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
3 import csv |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
4 import os |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
5 import shlex |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
6 import subprocess |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
7 import sys |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
8 from Bio import SeqIO |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
9 from Bio.Seq import Seq |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
10 from Bio.SeqRecord import SeqRecord |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
11 from Bio.Alphabet import IUPAC |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
12 |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
13 parser = argparse.ArgumentParser(description='Part I: Prepartion of repetive element psuedogenomes and repetive element bamfiles. This script prepares the annotation used by downstream applications to analyze for repetitive element enrichment. For this script to run properly bowtie must be loaded. The repeat element psuedogenomes are prepared in order to analyze reads that map to multiple locations of the genome. The repeat element bamfiles are prepared in order to use a region sorter to analyze reads that map to a single location of the genome.You will 1) annotation_file: The repetitive element annotation file downloaded from RepeatMasker.org database for your organism of interest. 2) genomefasta: Your genome of interest in fasta format, 3)setup_folder: a folder to contain repeat element setup files command-line usage EXAMPLE: python master_setup.py /users/nneretti/data/annotation/mm9/mm9_repeatmasker.txt /users/nneretti/data/annotation/mm9/mm9.fa /users/nneretti/data/annotation/mm9/setup_folder', prog='getargs_genome_maker.py') |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
14 parser.add_argument('--version', action='version', version='%(prog)s 0.1') |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
15 parser.add_argument('annotation_file', action= 'store', metavar='annotation_file', help='List annotation file. The annotation file contains the repeat masker annotation for the genome of interest and may be downloaded at RepeatMasker.org Example /data/annotation/mm9/mm9.fa.out') |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
16 parser.add_argument('genomefasta', action= 'store', metavar='genomefasta', help='File name and path for genome of interest in fasta format. Example /data/annotation/mm9/mm9.fa') |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
17 parser.add_argument('setup_folder', action= 'store', metavar='setup_folder', help='List folder to contain bamfiles for repeats and repeat element psuedogenomes. Example /data/annotation/mm9/setup') |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
18 parser.add_argument('--nfragmentsfile1', action= 'store', dest='nfragmentsfile1', metavar='nfragmentsfile1', default='./repnames_nfragments.txt', help='Output location of a description file that saves the number of fragments processed per repname. Default ./repnames_nfragments.txt') |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
19 parser.add_argument('--gaplength', action= 'store', dest='gaplength', metavar='gaplength', default= '200', type=int, help='Length of the spacer used to build repeat psuedogeneomes. Default 200') |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
20 parser.add_argument('--flankinglength', action= 'store', dest='flankinglength', metavar='flankinglength', default= '25', type=int, help='Length of the flanking region adjacent to the repeat element that is used to build repeat psuedogeneomes. The flanking length should be set according to the length of your reads. Default 25') |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
21 parser.add_argument('--is_bed', action= 'store', dest='is_bed', metavar='is_bed', default= 'FALSE', help='Is the annotation file a bed file. This is also a compatible format. The file needs to be a tab seperated bed with optional fields. Ex. format chr\tstart\tend\tName_element\tclass\tfamily. The class and family should identical to name_element if not applicable. Default FALSE change to TRUE') |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
22 args = parser.parse_args() |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
23 |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
24 # parameters and paths specified in args_parse |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
25 gapl = args.gaplength |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
26 flankingl = args.flankinglength |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
27 annotation_file = args.annotation_file |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
28 genomefasta = args.genomefasta |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
29 setup_folder = args.setup_folder |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
30 nfragmentsfile1 = args.nfragmentsfile1 |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
31 is_bed = args.is_bed |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
32 |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
33 ################################################################################ |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
34 # check that the programs we need are available |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
35 try: |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
36 subprocess.call(shlex.split("bowtie --version"), stdout=open(os.devnull, 'wb'), stderr=open(os.devnull, 'wb')) |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
37 except OSError: |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
38 print ("Error: Bowtie or BEDTools not loaded") |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
39 raise |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
40 |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
41 ################################################################################ |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
42 # Define a text importer |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
43 csv.field_size_limit(sys.maxsize) |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
44 def import_text(filename, separator): |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
45 for line in csv.reader(open(os.path.realpath(filename)), delimiter=separator, |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
46 skipinitialspace=True): |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
47 if line: |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
48 yield line |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
49 # Make a setup folder |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
50 if not os.path.exists(setup_folder): |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
51 os.makedirs(setup_folder) |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
52 |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
53 ################################################################################ |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
54 # load genome into dictionary |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
55 print ("loading genome...") |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
56 g = SeqIO.to_dict(SeqIO.parse(genomefasta, "fasta")) |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
57 |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
58 print ("Precomputing length of all chromosomes...") |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
59 idxgenome = {} |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
60 lgenome = {} |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
61 genome = {} |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
62 allchrs = g.keys() |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
63 k = 0 |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
64 for chr in allchrs: |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
65 genome[chr] = str(g[chr].seq) |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
66 # del g[chr] |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
67 lgenome[chr] = len(genome[chr]) |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
68 idxgenome[chr] = k |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
69 k = k + 1 |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
70 del g |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
71 |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
72 ################################################################################ |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
73 # Build a bedfile of repeatcoordinates to use by RepEnrich region_sorter |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
74 if is_bed == "FALSE": |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
75 repeat_elements= [] |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
76 fout = open(os.path.realpath(setup_folder + os.path.sep + 'repnames.bed'), 'w') |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
77 fin = import_text(annotation_file, ' ') |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
78 x = 0 |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
79 rep_chr = {} |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
80 rep_start = {} |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
81 rep_end = {} |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
82 x = 0 |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
83 for line in fin: |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
84 if x>2: |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
85 line9 = line[9].replace("(","_").replace(")","_").replace("/","_") |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
86 repname = line9 |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
87 if not repname in repeat_elements: |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
88 repeat_elements.append(repname) |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
89 repchr = line[4] |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
90 repstart = int(line[5]) |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
91 repend = int(line[6]) |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
92 # print >> fout, str(repchr) + '\t'+str(repstart)+ '\t'+str(repend)+ '\t'+str(repname) |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
93 fout.write(str(repchr) + '\t'+str(repstart)+ '\t'+str(repend)+ '\t'+str(repname)+ '\n') |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
94 # if rep_chr.has_key(repname): |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
95 if repname in rep_chr: |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
96 rep_chr[repname].append(repchr) |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
97 rep_start[repname].append(int(repstart)) |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
98 rep_end[repname].append(int(repend)) |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
99 else: |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
100 rep_chr[repname] = [repchr] |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
101 rep_start[repname] = [int(repstart)] |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
102 rep_end[repname] = [int(repend)] |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
103 x +=1 |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
104 if is_bed == "TRUE": |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
105 repeat_elements= [] |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
106 fout = open(os.path.realpath(setup_folder + os.path.sep + 'repnames.bed'), 'w') |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
107 fin = open(os.path.realpath(annotation_file), 'r') |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
108 x =0 |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
109 rep_chr = {} |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
110 rep_start = {} |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
111 rep_end = {} |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
112 x =0 |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
113 for line in fin: |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
114 line=line.strip('\n') |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
115 line=line.split('\t') |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
116 line3 = line[3].replace("(","_").replace(")","_").replace("/","_") |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
117 repname = line3 |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
118 if not repname in repeat_elements: |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
119 repeat_elements.append(repname) |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
120 repchr = line[0] |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
121 repstart = int(line[1]) |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
122 repend = int(line[2]) |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
123 # print >> fout, str(repchr) + '\t'+str(repstart)+ '\t'+str(repend)+ '\t'+str(repname) |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
124 fout.write(str(repchr) + '\t'+str(repstart)+ '\t'+str(repend)+ '\t'+str(repname) + '\n') |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
125 # if rep_chr.has_key(repname): |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
126 if repname in rep_chr: |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
127 rep_chr[repname].append(repchr) |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
128 rep_start[repname].append(int(repstart)) |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
129 rep_end[repname].append(int(repend)) |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
130 else: |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
131 rep_chr[repname] = [repchr] |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
132 rep_start[repname] = [int(repstart)] |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
133 rep_end[repname] = [int(repend)] |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
134 |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
135 fin.close() |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
136 fout.close() |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
137 repeat_elements = sorted(repeat_elements) |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
138 print ("Writing a key for all repeats...") |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
139 #print to fout the binary key that contains each repeat type with the associated binary number; sort the binary key: |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
140 fout = open(os.path.realpath(setup_folder + os.path.sep + 'repgenomes_key.txt'), 'w') |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
141 x = 0 |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
142 for repeat in repeat_elements: |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
143 # print >> fout, str(repeat) + '\t' + str(x) |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
144 fout.write(str(repeat) + '\t' + str(x) + '\n') |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
145 x +=1 |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
146 fout.close() |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
147 ################################################################################ |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
148 # generate spacer for psuedogenomes |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
149 spacer = "" |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
150 for i in range(gapl): |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
151 spacer = spacer + "N" |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
152 |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
153 # save file with number of fragments processed per repname |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
154 print ("Saving number of fragments processed per repname to " + nfragmentsfile1) |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
155 fout1 = open(os.path.realpath(nfragmentsfile1),"w") |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
156 for repname in rep_chr.keys(): |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
157 rep_chr_current = rep_chr[repname] |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
158 # print >>fout1, str(len(rep_chr[repname])) + "\t" + repname |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
159 fout1.write(str(len(rep_chr[repname])) + "\t" + repname + '\n') |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
160 fout1.close() |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
161 |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
162 # generate metagenomes and save them to FASTA files |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
163 k = 1 |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
164 nrepgenomes = len(rep_chr.keys()) |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
165 for repname in rep_chr.keys(): |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
166 metagenome = "" |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
167 newname = repname.replace("(","_").replace(")","_").replace("/","_") |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
168 print ("processing repgenome " + newname + ".fa" + " (" + str(k) + " of " + str(nrepgenomes) + ")") |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
169 rep_chr_current = rep_chr[repname] |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
170 rep_start_current = rep_start[repname] |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
171 rep_end_current = rep_end[repname] |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
172 print ("-------> " + str(len(rep_chr[repname])) + " fragments") |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
173 for i in range(len(rep_chr[repname])): |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
174 try: |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
175 chr = rep_chr_current[i] |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
176 rstart = max(rep_start_current[i] - flankingl, 0) |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
177 rend = min(rep_end_current[i] + flankingl, lgenome[chr]-1) |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
178 metagenome = metagenome + spacer + genome[chr][rstart:(rend+1)] |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
179 except KeyError: |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
180 print ("Unrecognised Chromosome: "+chr) |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
181 pass |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
182 |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
183 # Convert metagenome to SeqRecord object (required by SeqIO.write) |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
184 record = SeqRecord(Seq(metagenome, IUPAC.unambiguous_dna), id = "repname", name = "", description = "") |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
185 print ("saving repgenome " + newname + ".fa" + " (" + str(k) + " of " + str(nrepgenomes) + ")") |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
186 fastafilename = os.path.realpath(setup_folder + os.path.sep + newname + ".fa") |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
187 SeqIO.write(record, fastafilename, "fasta") |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
188 print ("indexing repgenome " + newname + ".fa" + " (" + str(k) + " of " + str(nrepgenomes) + ")") |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
189 command = shlex.split('bowtie-build -f ' + fastafilename + ' ' + setup_folder + os.path.sep + newname) |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
190 p = subprocess.Popen(command).communicate() |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
191 k += 1 |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
192 |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
193 print ("... Done") |
1435d142041b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff
changeset
|
194 |