annotate RepEnrich_setup.py @ 5:ea5bba2c569f draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit 5f5581487dc6efb3e23e52a5ca11bb097afc72e9
author drosofff
date Tue, 30 May 2017 20:16:05 -0400
parents 1435d142041b
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
1 #!/usr/bin/env python
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
2 import argparse
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
3 import csv
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
4 import os
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
5 import shlex
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
6 import subprocess
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
7 import sys
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
8 from Bio import SeqIO
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
9 from Bio.Seq import Seq
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
10 from Bio.SeqRecord import SeqRecord
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
11 from Bio.Alphabet import IUPAC
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
12
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
13 parser = argparse.ArgumentParser(description='Part I: Prepartion of repetive element psuedogenomes and repetive element bamfiles. This script prepares the annotation used by downstream applications to analyze for repetitive element enrichment. For this script to run properly bowtie must be loaded. The repeat element psuedogenomes are prepared in order to analyze reads that map to multiple locations of the genome. The repeat element bamfiles are prepared in order to use a region sorter to analyze reads that map to a single location of the genome.You will 1) annotation_file: The repetitive element annotation file downloaded from RepeatMasker.org database for your organism of interest. 2) genomefasta: Your genome of interest in fasta format, 3)setup_folder: a folder to contain repeat element setup files command-line usage EXAMPLE: python master_setup.py /users/nneretti/data/annotation/mm9/mm9_repeatmasker.txt /users/nneretti/data/annotation/mm9/mm9.fa /users/nneretti/data/annotation/mm9/setup_folder', prog='getargs_genome_maker.py')
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
14 parser.add_argument('--version', action='version', version='%(prog)s 0.1')
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
15 parser.add_argument('annotation_file', action= 'store', metavar='annotation_file', help='List annotation file. The annotation file contains the repeat masker annotation for the genome of interest and may be downloaded at RepeatMasker.org Example /data/annotation/mm9/mm9.fa.out')
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
16 parser.add_argument('genomefasta', action= 'store', metavar='genomefasta', help='File name and path for genome of interest in fasta format. Example /data/annotation/mm9/mm9.fa')
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
17 parser.add_argument('setup_folder', action= 'store', metavar='setup_folder', help='List folder to contain bamfiles for repeats and repeat element psuedogenomes. Example /data/annotation/mm9/setup')
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
18 parser.add_argument('--nfragmentsfile1', action= 'store', dest='nfragmentsfile1', metavar='nfragmentsfile1', default='./repnames_nfragments.txt', help='Output location of a description file that saves the number of fragments processed per repname. Default ./repnames_nfragments.txt')
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
19 parser.add_argument('--gaplength', action= 'store', dest='gaplength', metavar='gaplength', default= '200', type=int, help='Length of the spacer used to build repeat psuedogeneomes. Default 200')
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
20 parser.add_argument('--flankinglength', action= 'store', dest='flankinglength', metavar='flankinglength', default= '25', type=int, help='Length of the flanking region adjacent to the repeat element that is used to build repeat psuedogeneomes. The flanking length should be set according to the length of your reads. Default 25')
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
21 parser.add_argument('--is_bed', action= 'store', dest='is_bed', metavar='is_bed', default= 'FALSE', help='Is the annotation file a bed file. This is also a compatible format. The file needs to be a tab seperated bed with optional fields. Ex. format chr\tstart\tend\tName_element\tclass\tfamily. The class and family should identical to name_element if not applicable. Default FALSE change to TRUE')
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
22 args = parser.parse_args()
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
23
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
24 # parameters and paths specified in args_parse
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
25 gapl = args.gaplength
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
26 flankingl = args.flankinglength
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
27 annotation_file = args.annotation_file
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
28 genomefasta = args.genomefasta
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
29 setup_folder = args.setup_folder
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
30 nfragmentsfile1 = args.nfragmentsfile1
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
31 is_bed = args.is_bed
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
32
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
33 ################################################################################
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
34 # check that the programs we need are available
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
35 try:
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
36 subprocess.call(shlex.split("bowtie --version"), stdout=open(os.devnull, 'wb'), stderr=open(os.devnull, 'wb'))
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
37 except OSError:
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
38 print ("Error: Bowtie or BEDTools not loaded")
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
39 raise
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
40
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
41 ################################################################################
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
42 # Define a text importer
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
43 csv.field_size_limit(sys.maxsize)
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
44 def import_text(filename, separator):
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
45 for line in csv.reader(open(os.path.realpath(filename)), delimiter=separator,
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
46 skipinitialspace=True):
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
47 if line:
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
48 yield line
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
49 # Make a setup folder
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
50 if not os.path.exists(setup_folder):
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
51 os.makedirs(setup_folder)
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
52
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
53 ################################################################################
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
54 # load genome into dictionary
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
55 print ("loading genome...")
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
56 g = SeqIO.to_dict(SeqIO.parse(genomefasta, "fasta"))
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
57
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
58 print ("Precomputing length of all chromosomes...")
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
59 idxgenome = {}
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
60 lgenome = {}
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
61 genome = {}
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
62 allchrs = g.keys()
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
63 k = 0
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
64 for chr in allchrs:
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
65 genome[chr] = str(g[chr].seq)
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
66 # del g[chr]
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
67 lgenome[chr] = len(genome[chr])
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
68 idxgenome[chr] = k
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
69 k = k + 1
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
70 del g
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
71
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
72 ################################################################################
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
73 # Build a bedfile of repeatcoordinates to use by RepEnrich region_sorter
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
74 if is_bed == "FALSE":
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
75 repeat_elements= []
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
76 fout = open(os.path.realpath(setup_folder + os.path.sep + 'repnames.bed'), 'w')
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
77 fin = import_text(annotation_file, ' ')
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
78 x = 0
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
79 rep_chr = {}
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
80 rep_start = {}
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
81 rep_end = {}
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
82 x = 0
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
83 for line in fin:
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
84 if x>2:
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
85 line9 = line[9].replace("(","_").replace(")","_").replace("/","_")
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
86 repname = line9
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
87 if not repname in repeat_elements:
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
88 repeat_elements.append(repname)
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
89 repchr = line[4]
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
90 repstart = int(line[5])
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
91 repend = int(line[6])
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
92 # print >> fout, str(repchr) + '\t'+str(repstart)+ '\t'+str(repend)+ '\t'+str(repname)
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
93 fout.write(str(repchr) + '\t'+str(repstart)+ '\t'+str(repend)+ '\t'+str(repname)+ '\n')
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
94 # if rep_chr.has_key(repname):
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
95 if repname in rep_chr:
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
96 rep_chr[repname].append(repchr)
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
97 rep_start[repname].append(int(repstart))
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
98 rep_end[repname].append(int(repend))
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
99 else:
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
100 rep_chr[repname] = [repchr]
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
101 rep_start[repname] = [int(repstart)]
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
102 rep_end[repname] = [int(repend)]
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
103 x +=1
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
104 if is_bed == "TRUE":
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
105 repeat_elements= []
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
106 fout = open(os.path.realpath(setup_folder + os.path.sep + 'repnames.bed'), 'w')
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
107 fin = open(os.path.realpath(annotation_file), 'r')
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
108 x =0
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
109 rep_chr = {}
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
110 rep_start = {}
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
111 rep_end = {}
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
112 x =0
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
113 for line in fin:
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
114 line=line.strip('\n')
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
115 line=line.split('\t')
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
116 line3 = line[3].replace("(","_").replace(")","_").replace("/","_")
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
117 repname = line3
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
118 if not repname in repeat_elements:
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
119 repeat_elements.append(repname)
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
120 repchr = line[0]
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
121 repstart = int(line[1])
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
122 repend = int(line[2])
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
123 # print >> fout, str(repchr) + '\t'+str(repstart)+ '\t'+str(repend)+ '\t'+str(repname)
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
124 fout.write(str(repchr) + '\t'+str(repstart)+ '\t'+str(repend)+ '\t'+str(repname) + '\n')
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
125 # if rep_chr.has_key(repname):
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
126 if repname in rep_chr:
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
127 rep_chr[repname].append(repchr)
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
128 rep_start[repname].append(int(repstart))
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
129 rep_end[repname].append(int(repend))
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
130 else:
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
131 rep_chr[repname] = [repchr]
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
132 rep_start[repname] = [int(repstart)]
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
133 rep_end[repname] = [int(repend)]
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
134
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
135 fin.close()
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
136 fout.close()
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
137 repeat_elements = sorted(repeat_elements)
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
138 print ("Writing a key for all repeats...")
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
139 #print to fout the binary key that contains each repeat type with the associated binary number; sort the binary key:
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
140 fout = open(os.path.realpath(setup_folder + os.path.sep + 'repgenomes_key.txt'), 'w')
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
141 x = 0
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
142 for repeat in repeat_elements:
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
143 # print >> fout, str(repeat) + '\t' + str(x)
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
144 fout.write(str(repeat) + '\t' + str(x) + '\n')
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
145 x +=1
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
146 fout.close()
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
147 ################################################################################
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
148 # generate spacer for psuedogenomes
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
149 spacer = ""
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
150 for i in range(gapl):
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
151 spacer = spacer + "N"
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
152
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
153 # save file with number of fragments processed per repname
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
154 print ("Saving number of fragments processed per repname to " + nfragmentsfile1)
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
155 fout1 = open(os.path.realpath(nfragmentsfile1),"w")
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
156 for repname in rep_chr.keys():
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
157 rep_chr_current = rep_chr[repname]
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
158 # print >>fout1, str(len(rep_chr[repname])) + "\t" + repname
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
159 fout1.write(str(len(rep_chr[repname])) + "\t" + repname + '\n')
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
160 fout1.close()
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
161
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
162 # generate metagenomes and save them to FASTA files
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
163 k = 1
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
164 nrepgenomes = len(rep_chr.keys())
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
165 for repname in rep_chr.keys():
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
166 metagenome = ""
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
167 newname = repname.replace("(","_").replace(")","_").replace("/","_")
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
168 print ("processing repgenome " + newname + ".fa" + " (" + str(k) + " of " + str(nrepgenomes) + ")")
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
169 rep_chr_current = rep_chr[repname]
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
170 rep_start_current = rep_start[repname]
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
171 rep_end_current = rep_end[repname]
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
172 print ("-------> " + str(len(rep_chr[repname])) + " fragments")
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
173 for i in range(len(rep_chr[repname])):
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
174 try:
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
175 chr = rep_chr_current[i]
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
176 rstart = max(rep_start_current[i] - flankingl, 0)
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
177 rend = min(rep_end_current[i] + flankingl, lgenome[chr]-1)
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
178 metagenome = metagenome + spacer + genome[chr][rstart:(rend+1)]
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
179 except KeyError:
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
180 print ("Unrecognised Chromosome: "+chr)
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
181 pass
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
182
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
183 # Convert metagenome to SeqRecord object (required by SeqIO.write)
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
184 record = SeqRecord(Seq(metagenome, IUPAC.unambiguous_dna), id = "repname", name = "", description = "")
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
185 print ("saving repgenome " + newname + ".fa" + " (" + str(k) + " of " + str(nrepgenomes) + ")")
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
186 fastafilename = os.path.realpath(setup_folder + os.path.sep + newname + ".fa")
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
187 SeqIO.write(record, fastafilename, "fasta")
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
188 print ("indexing repgenome " + newname + ".fa" + " (" + str(k) + " of " + str(nrepgenomes) + ")")
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
189 command = shlex.split('bowtie-build -f ' + fastafilename + ' ' + setup_folder + os.path.sep + newname)
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
190 p = subprocess.Popen(command).communicate()
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
191 k += 1
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
192
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
193 print ("... Done")
1435d142041b planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/repenrich commit d5ebd581fa3a22ca61ce07a31c01bb70610fbcf5
drosofff
parents:
diff changeset
194