annotate antismash.py @ 0:6a37d0a4510a default tip

initial uploaded
author bjoern-gruening
date Thu, 15 Mar 2012 05:23:03 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1 #!/usr/bin/env python
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2 ## Copyright (c) 2010 Marnix H. Medema
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3 ## University of Groningen
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4 ## Department of Microbial Physiology / Groningen Bioinformatics Centre
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5 ## License: GNU General Public License v3 or later
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6 ## A copy of GNU GPL v3 should have been included in this software package in LICENSE.txt.
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
8 ##Functions necessary for this script
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
9
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
10 import linecache, cPickle
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
11
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
12 DEBUG = True
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
13
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
14
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
15 def invalidoptions(argument):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
16 if len(argument) > 0:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
17 print >> sys.stderr, "Invalid options input:"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
18 print >> sys.stderr, argument
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
19 print "From the command line, input antismash --help for more information."
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
20 logfile.write("Invalid options input: " + argument + "\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
21 logfile.close()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
22 sys.exit(1)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
23
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
24 def sortdictkeysbyvalues(dict):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
25 items = [(value, key) for key, value in dict.items()]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
26 items.sort()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
27 return [key for value, key in items]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
28
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
29 def sortdictkeysbyvaluesrev(dict):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
30 items = [(value, key) for key, value in dict.items()]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
31 items.sort()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
32 items.reverse()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
33 return [key for value, key in items]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
34
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
35 def sortdictkeysbyvaluesrevv(dict):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
36 items = [(value, key) for key, value in dict.items()]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
37 items.sort()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
38 items.reverse()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
39 return [value for value, key in items]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
40
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
41 def get_sequence(fasta):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
42 """get the description and trimmed dna sequence"""
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
43 #in_file = open(fasta, 'r')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
44 #content = in_file.readlines()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
45 #in_file.close()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
46 #content2 = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
47 #for i in content:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
48 #if i != "":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
49 # content2.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
50 content = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
51 [content.append(line) for line in open(fasta, 'r') if line]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
52 #content = content2
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
53 while content[0] == "" or content[0] == "\n":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
54 content = content[1:]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
55 header = content[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
56 content = content[1:]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
57 content = [x.rstrip() for x in content]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
58 seq = "".join(content)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
59 if ">" not in header or ">" in seq:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
60 print >> sys.stderr, "FASTA file not properly formatted; should be single sequence starting with '>' and sequence name."
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
61 logfile.write("FASTA file not properly formatted; should started with '>' and sequence name on first line.\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
62 logfile.close()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
63 sys.exit(1)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
64 return seq
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
65
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
66 def complement(seq):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
67 complement = {'a': 't', 'c': 'g', 'g': 'c', 't': 'a', 'n': 'n', 'A': 'T', 'C': 'G', 'G': 'C', 'T': 'A', 'N': 'N'}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
68 complseq = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
69 for base in seq:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
70 if base in complement.keys():
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
71 complbase = complement[str(base)]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
72 complseq.append(complbase)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
73 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
74 complbase = 'n'
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
75 complseq.append(complbase)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
76 return complseq
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
77
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
78 def reverse_complement(seq):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
79 seq = list(seq)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
80 seq.reverse()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
81 revcompl = complement(seq)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
82 revcomplstr = str()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
83 for i in revcompl:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
84 revcomplstr = revcomplstr + str(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
85 return revcomplstr
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
86
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
87 def fastaseqlengths(proteins):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
88 names = proteins[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
89 seqs = proteins[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
90 seqlengths = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
91 a = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
92 for i in names:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
93 #seq = seqs[a]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
94 #seqlength = len(seq)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
95 #seqlengths[i] = seqlength
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
96 seqlengths[i] = len(seqs[a])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
97 a += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
98 return seqlengths
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
99
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
100 # Function that reads the fasta file into a dictionary
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
101 def fastadict(fasta):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
102 file = open(fasta,"r")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
103 filetext = file.read()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
104 filetext = filetext.replace("\r","\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
105 filetext = filetext.strip()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
106 #Replaces all spaces with "_" to avoid problems
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
107 filetext = filetext.replace(' ','_')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
108 filetext = filetext.split()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
109 dictseq = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
110 for a in filetext:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
111 if ">" in a[0]:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
112 f = str()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
113 d = a[1:68]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
114 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
115 e = a
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
116 f += e
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
117 dictseq[d] = f
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
118 return dictseq
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
119
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
120 # Function that extracts all sequence names from the fasta dictionary
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
121 def lnames(fastadict):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
122 items = fastadict.items()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
123 items.sort()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
124 return [names for names, seqs in items]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
125
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
126 # Function that extracts all sequences from the fasta dictionary
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
127 def lseqs(fastadict):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
128 items = fastadict.items()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
129 items.sort()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
130 return [seqs for names, seqs in items]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
131
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
132 def extractpositions(refmusclefile,newmusclefile,positions,refsequencename,querysequencename):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
133 dict = fastadict(refmusclefile)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
134 seqs = lseqs(dict)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
135 names = lnames(dict)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
136 #startpos = 2
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
137 residues = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
138 #Count residues in ref sequence and put positions in list
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
139 muscle_dict = fastadict(newmusclefile)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
140 muscle_seqs = lseqs(muscle_dict)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
141 muscle_names = lnames(muscle_dict)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
142 refseqnr = muscle_names.index(refsequencename)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
143 #Extract activity signature
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
144 refseq = muscle_seqs[refseqnr]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
145 poslist = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
146 b = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
147 c = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
148 while refseq != "":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
149 i = refseq[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
150 if c in positions and i != "-":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
151 poslist.append(b)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
152 if i != "-":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
153 c += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
154 b += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
155 refseq = refseq[1:]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
156 #Extract positions from query sequence
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
157 query_seqnr = muscle_names.index(querysequencename)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
158 query_seq = muscle_seqs[query_seqnr]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
159 for j in poslist:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
160 residues.append(query_seq[j])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
161 return residues
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
162
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
163 def parsegenes(genes):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
164 genedict = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
165 genelist = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
166 joinlist = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
167 joindict = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
168 accessiondict = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
169 error = "n"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
170 errorlocations = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
171 genenr = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
172 for i in genes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
173 if " gene " in i:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
174 i = i.split(" gene ")[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
175 elif "FT gene " in i:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
176 i = i.split("FT gene ")[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
177 join = "no"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
178 genenr += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
179 #Find gene location info for each gene
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
180 if "complement" in i.split("\n")[0].lower() and i.split("\n")[0][-1] == ")":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
181 location = i.split("\n")[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
182 elif "complement" in i.split("\n")[0].lower() and i.split("\n")[0][-1] != ")":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
183 location = i.split(" /")[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
184 while ")" not in location.replace(" ","")[-3:]:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
185 location = location.rpartition("\n")[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
186 location = location.replace("\n","")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
187 location = location.replace(" ","")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
188 elif "join" in i.split("\n")[0].lower() and i.split("\n")[0][-1] == ")":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
189 location = i.split("\n")[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
190 elif "join" in i.split("\n")[0].lower() and i.split("\n")[0][-1] != ")":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
191 location = i.split("/")[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
192 while ")" not in location.replace(" ","")[-3:]:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
193 location = location.rpartition("\n")[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
194 location = location.replace("\n","")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
195 location = location.replace(" ","")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
196 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
197 location = i.split("\n")[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
198 original_location = location
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
199 #location info found in gbk/embl file, now extract start and end positions
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
200 if location.count("(") != location.count(")"):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
201 error = "y"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
202 errorlocations.append(original_location)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
203 continue
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
204 if "join(complement" in location.lower():
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
205 location = location.lower()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
206 join = "yes"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
207 location2 = location.partition("join(")[2][:-1].replace("<","").replace(">","")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
208 if ("complement(" in location2[0:12] and location2[-1] != ")") or ")," in location2:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
209 error = "y"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
210 errorlocations.append(original_location)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
211 continue
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
212 elif ("complement(" in location2[0:12] and location2[-1] == ")" and location2[12:-2].count(")") == 0 and location2[12:-2].count("(") == 0):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
213 location2 = location2.partition("complement(")[2][:-1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
214 start = location2.split(",")[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
215 start = start.split("..")[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
216 start = start.replace("<","")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
217 end = location2.split(",")[-1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
218 if ".." in end:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
219 end = end.split("..")[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
220 end = end.replace(">","")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
221 joinedparts = location2.split(",")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
222 joinedparts2 = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
223 for j in joinedparts:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
224 newjoinedpart = j.replace("<","")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
225 newjoinedpart = newjoinedpart.replace(">","")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
226 joinedparts2.append(newjoinedpart)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
227 strand = "-"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
228 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
229 error = "y"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
230 errorlocations.append(original_location)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
231 continue
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
232 elif "complement" in location.lower():
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
233 location = location.lower()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
234 location = location.partition("complement(")[2][:-1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
235 if "join(" in location.lower():
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
236 join = "yes"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
237 location = location.lower()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
238 location2 = location.partition("join(")[2][:-1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
239 start = location2.split(",")[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
240 start = start.split("..")[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
241 start = start.replace("<","")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
242 end = location2.split(",")[-1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
243 if ".." in end:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
244 end = end.split("..")[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
245 end = end.replace(">","")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
246 joinedparts = location2.split(",")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
247 joinedparts2 = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
248 for j in joinedparts:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
249 newjoinedpart = j.replace("<","")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
250 newjoinedpart = newjoinedpart.replace(">","")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
251 joinedparts2.append(newjoinedpart)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
252 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
253 start = location.split("..")[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
254 start = start.replace("<","")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
255 if ".." in location:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
256 end = location.split("..")[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
257 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
258 end = location
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
259 end = end.replace(">","")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
260 strand = "-"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
261 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
262 if "join(" in location.lower():
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
263 join = "yes"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
264 location = location.lower()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
265 location2 = location.partition("join(")[2][:-1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
266 start = location2.split(",")[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
267 start = start.split("..")[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
268 start = start.replace("<","")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
269 end = location2.split(",")[-1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
270 if ".." in end:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
271 end = end.split("..")[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
272 end = end.replace(">","")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
273 joinedparts = location2.split(",")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
274 joinedparts2 = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
275 for j in joinedparts:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
276 newjoinedpart = j.replace("<","")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
277 newjoinedpart = newjoinedpart.replace(">","")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
278 joinedparts2.append(newjoinedpart)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
279 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
280 start = location.split("..")[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
281 start = start.replace("<","")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
282 if ".." in location:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
283 end = location.split("..")[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
284 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
285 end = location
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
286 end = end.replace(">","")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
287 strand = "+"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
288 try:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
289 if int(start) > int(end):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
290 start2 = end
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
291 end2 = start
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
292 start = start2
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
293 end = end2
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
294 except ValueError:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
295 error = "y"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
296 errorlocations.append(original_location)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
297 continue
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
298 #Correct for alternative codon start positions
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
299 if "codon_start=" in i.lower():
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
300 temp = i.lower().split("codon_start=")[1].split()[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
301 if '"' in temp:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
302 # temp ist "1" oder "2", dies kommt aus biopython
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
303 temp = temp[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
304 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
305 # ohne anfuhrungszeichen ... 1 oder 2
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
306 temp = temp[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
307 codonstart = temp
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
308 if strand == "+":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
309 start = str(int(start) + (int(codonstart) - 1))
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
310 elif strand == "-":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
311 end = str(int(end) - (int(codonstart) - 1))
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
312 #Find gene name for each gene, preferably locus_tag, than gene, than protein_ID
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
313 a = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
314 b = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
315 genename = ""
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
316 nrlines = len(i.split("\n"))
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
317 while b == 0:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
318 line = i.split("\n")[a]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
319 if "protein_id=" in line:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
320 genename = (line.split("protein_id=")[1][1:-1]).replace(" ","_")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
321 genename = genename.replace("\\","_")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
322 genename = genename.replace("/","_")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
323 b += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
324 elif "protein_id=" in line.lower():
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
325 genename = (line.lower().split("protein_id=")[1][1:-1]).replace(" ","_")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
326 genename = genename.replace("\\","_")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
327 genename = genename.replace("/","_")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
328 b += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
329 elif a == (nrlines - 1):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
330 genename = ""
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
331 b += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
332 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
333 a += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
334 if len(genename) > 1:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
335 accnr = genename
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
336 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
337 accnr = "no_accession_number_found"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
338 a = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
339 b = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
340 nrlines = len(i.split("\n"))
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
341 while b == 0:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
342 line = i.split("\n")[a]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
343 if "gene=" in line:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
344 genename = (line.split("gene=")[1][1:-1]).replace(" ","_")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
345 genename = genename.replace("\\","_")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
346 genename = genename.replace("/","_")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
347 b += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
348 elif "gene=" in line.lower():
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
349 genename = (line.lower().split("gene=")[1][1:-1]).replace(" ","_")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
350 genename = genename.replace("\\","_")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
351 genename = genename.replace("/","_")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
352 b += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
353 elif a == (nrlines - 1):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
354 b += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
355 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
356 a += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
357 a = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
358 b = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
359 nrlines = len(i.split("\n"))
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
360 while b == 0:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
361 line = i.split("\n")[a]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
362 if "locus_tag=" in line:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
363 genename = (line.split("locus_tag=")[1][1:-1]).replace(" ","_")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
364 genename = genename.replace("\\","_")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
365 genename = genename.replace("/","_")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
366 b += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
367 elif "locus_tag=" in line.lower():
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
368 genename = (line.lower().split("locus_tag=")[1][1:-1]).replace(" ","_")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
369 genename = genename.replace("\\","_")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
370 genename = genename.replace("/","_")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
371 b += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
372 elif a == (nrlines - 1):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
373 if genename == "":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
374 genename = "prot_ID_" + str(genenr)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
375 b += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
376 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
377 a += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
378 #Find sequence for each gene
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
379 a = 0 ###Not all gbks contain protein sequences as translations, therefore sequences from gene clusters are now extracted from the database at a later stage if sequence is not in gbk
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
380 b = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
381 sequence = ""
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
382 while b < 2:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
383 line = i.split("\n")[a]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
384 if "translation=" in line:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
385 sequence = line.split("translation=")[1][1:]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
386 b += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
387 a += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
388 if line.count('"') > 1:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
389 sequence = line.split("translation=")[1][1:-1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
390 b = 2
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
391 elif "translation=" in line.lower():
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
392 sequence = line.lower().split("translation=")[1][1:]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
393 b += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
394 a += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
395 if line.count('"') > 1:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
396 sequence = line.lower().split("translation=")[1][1:-1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
397 b = 2
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
398 elif a == (nrlines - 2) or a == (nrlines - 1):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
399 sequence = ""
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
400 b = 2
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
401 elif b == 1:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
402 if '"' in line:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
403 seqline = line.replace(" ","")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
404 seqline = seqline.split('"')[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
405 sequence = sequence + seqline
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
406 b += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
407 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
408 seqline = line.replace(" ","")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
409 sequence = sequence + seqline
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
410 a += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
411 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
412 a += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
413 sequence = sequence.upper()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
414 #Quality-check sequence
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
415 forbiddencharacters = ["'",'"','=',';',':','[',']','>','<','|','\\',"/",'*','-','_','.',',','?',')','(','^','#','!','`','~','+','{','}','@','$','%','&']
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
416 for z in forbiddencharacters:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
417 if z in sequence:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
418 sequence = ""
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
419 #Find annotation for each gene
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
420 a = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
421 b = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
422 while b == 0:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
423 line = i.split("\n")[a]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
424 if "product=" in line:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
425 annotation = line.split("product=")[1][1:]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
426 annotation = annotation.replace(" ","_")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
427 if annotation[-1] == '"':
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
428 annotation = annotation[:-1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
429 b += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
430 elif "product=" in line.lower():
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
431 annotation = line.lower().split("product=")[1][1:]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
432 annotation = annotation.replace(" ","_")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
433 if annotation[-1] == '"':
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
434 annotation = annotation[:-1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
435 b += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
436 elif a == (nrlines - 1):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
437 annotation = "not_annotated"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
438 b += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
439 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
440 a += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
441 accessiondict[genename] = accnr
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
442 if join == "yes":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
443 joinlist.append(genename)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
444 joindict[genename] = joinedparts2
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
445 #Save data to dictionary
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
446 if len(genename) > 1:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
447 genedict[genename] = [start,end,strand,annotation,sequence]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
448 genelist.append(genename)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
449 if error == "y":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
450 errorinfo = "\n".join(errorlocations)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
451 print >> sys.stderr, "Exit: locations in GBK/EMBL file not properly formatted:\n" + errorinfo
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
452 logfile.write("Exit: GBK file not properly formatted, no sequence found or no CDS annotation found.\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
453 logfile.close()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
454 sys.exit(1)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
455 return [genelist, genedict, joinlist, joindict, accessiondict]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
456
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
457 def cleandnaseq(dnaseq):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
458 dnaseq = dnaseq.replace(" ","")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
459 dnaseq = dnaseq.replace("\t","")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
460 dnaseq = dnaseq.replace("\n","")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
461 dnaseq = dnaseq.replace("0","")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
462 dnaseq = dnaseq.replace("1","")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
463 dnaseq = dnaseq.replace("2","")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
464 dnaseq = dnaseq.replace("3","")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
465 dnaseq = dnaseq.replace("4","")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
466 dnaseq = dnaseq.replace("5","")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
467 dnaseq = dnaseq.replace("6","")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
468 dnaseq = dnaseq.replace("7","")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
469 dnaseq = dnaseq.replace("8","")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
470 dnaseq = dnaseq.replace("9","")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
471 dnaseq = dnaseq.replace("/","")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
472 dnaseq = dnaseq.replace("u","t")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
473 dnaseq = dnaseq.replace("U","T")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
474 dnaseq = dnaseq.replace("r","n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
475 dnaseq = dnaseq.replace("R","n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
476 dnaseq = dnaseq.replace("y","n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
477 dnaseq = dnaseq.replace("Y","n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
478 dnaseq = dnaseq.replace("w","n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
479 dnaseq = dnaseq.replace("W","n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
480 dnaseq = dnaseq.replace("s","n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
481 dnaseq = dnaseq.replace("S","n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
482 dnaseq = dnaseq.replace("m","n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
483 dnaseq = dnaseq.replace("M","n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
484 dnaseq = dnaseq.replace("k","n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
485 dnaseq = dnaseq.replace("K","n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
486 dnaseq = dnaseq.replace("h","n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
487 dnaseq = dnaseq.replace("H","n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
488 dnaseq = dnaseq.replace("b","n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
489 dnaseq = dnaseq.replace("B","n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
490 dnaseq = dnaseq.replace("v","n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
491 dnaseq = dnaseq.replace("V","n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
492 dnaseq = dnaseq.replace("d","n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
493 dnaseq = dnaseq.replace("D","n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
494 return dnaseq
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
495
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
496 def extractprotfasta(genelist,genedict,dnaseq,rc_dnaseq,joinlist,joindict,accessiondict):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
497 names = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
498 seqs = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
499 for i in genelist:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
500 genename = i
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
501 #If suitable translation found in gbk, use that
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
502 if len(genedict[i][4]) > 5:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
503 protseq = genedict[i][4]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
504 i = genedict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
505 #If no suitable translation found in gbk, extract from DNA sequence
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
506 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
507 i = genedict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
508 y = int(i[0])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
509 z = int(i[1])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
510 if i[2] == "+":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
511 if genename in joinlist:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
512 geneseq = ""
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
513 for j in joindict[genename]:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
514 partstart = int(j.split("..")[0])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
515 if ".." in j:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
516 partend = int(j.split("..")[1])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
517 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
518 partend = int(j)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
519 geneseqpart = dnaseq[(partstart - 1):partend]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
520 geneseq = geneseq + geneseqpart
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
521 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
522 geneseq = dnaseq[(y - 1):z]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
523 protseq = translate(geneseq)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
524 elif i[2] == "-":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
525 if genename in joinlist:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
526 geneseq = ""
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
527 joinlistrev = joindict[genename]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
528 joinlistrev.reverse()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
529 for j in joinlistrev:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
530 partstart = int(j.split("..")[0])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
531 if ".." in j:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
532 partend = int(j.split("..")[1])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
533 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
534 partend = int(j)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
535 geneseqpart = rc_dnaseq[(len(rc_dnaseq) - partend):(len(rc_dnaseq) - partstart + 1)]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
536 geneseq = geneseq + geneseqpart
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
537 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
538 geneseq = rc_dnaseq[(len(rc_dnaseq) - z):(len(rc_dnaseq) - y + 1)]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
539 protseq = translate(geneseq)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
540 name = "input" + "|" + "c1" + "|" + i[0] + "-" + i[1] + "|" + i[2] + "|" + genename + "|" + i[3]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
541 seqs.append(protseq)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
542 names.append(name)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
543 proteins = [names,seqs,genelist,genedict,accessiondict]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
544 return proteins
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
545
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
546 def gbk2proteins(gbkfile):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
547 file = open(gbkfile,"r")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
548 filetext = file.read()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
549 filetext = filetext.replace("\r","\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
550 if " CDS " not in filetext or "\nORIGIN" not in filetext:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
551 print >> sys.stderr, "Exit: GBK file not properly formatted, no sequence found or no CDS annotation found."
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
552 logfile.write("Exit: GBK file not properly formatted, no sequence found or no CDS annotation found.\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
553 logfile.close()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
554 sys.exit(1)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
555 cdspart = filetext.split("\nORIGIN")[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
556 #Extract DNA sequence and calculate reverse complement of it
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
557 dnaseq = filetext.split("\nORIGIN")[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
558 dnaseq = cleandnaseq(dnaseq)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
559 sequence = dnaseq
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
560 if (sequence.count('A') + sequence.count('a') + sequence.count('C') + sequence.count('c') + sequence.count('G') + sequence.count('g') + sequence.count('T') + sequence.count('t')) < (0.5 * len(sequence)):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
561 print >> sys.stderr, "Protein GBK/EMBL file provided. Please provide nucleotide GBK/EMBL file."
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
562 sys.exit(1)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
563 dnaseqlength = len(dnaseq)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
564 rc_dnaseq = reverse_complement(dnaseq)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
565 #Extract genes
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
566 genes = cdspart.split(" CDS ")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
567 genes = genes[1:]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
568 try:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
569 genesdetails = parsegenes(genes)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
570 except ValueError, e:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
571 print >> sys.stderr, "Could not parse genes from GBK/EMBL file. Please check if your GBK/EMBL file is valid."
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
572 raise
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
573 print >> sys.stderr, "Error was: %s" % e
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
574 print len(genes)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
575 sys.exit(1)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
576 genelist = genesdetails[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
577 genedict = genesdetails[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
578 joinlist = genesdetails[2]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
579 joindict = genesdetails[3]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
580 accessiondict = genesdetails[4]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
581 #Locate all genes on DNA sequence and translate to protein sequence
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
582 proteins = extractprotfasta(genelist,genedict,dnaseq,rc_dnaseq,joinlist,joindict,accessiondict)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
583 textlines = filetext.split("\n//")[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
584 textlines = textlines.split("\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
585 accession = ""
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
586 for i in textlines:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
587 if accession == "":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
588 if "LOCUS " in i:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
589 j = i.split("LOCUS ")[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
590 accession = j.split(" ")[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
591 if len(accession) < 4:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
592 accession = ""
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
593 #Test if accession number is probably real GenBank/RefSeq acc nr
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
594 numbers = range(0,10)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
595 letters = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
596 for i in ascii_letters:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
597 letters.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
598 nrnumbers = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
599 nrletters = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
600 for i in accession:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
601 if i in letters:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
602 nrletters += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
603 try:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
604 j = int(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
605 if j in numbers:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
606 nrnumbers += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
607 except:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
608 pass
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
609 if nrnumbers < 3 or nrletters < 1:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
610 accession = ""
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
611 return [proteins,accession,dnaseqlength]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
612
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
613 def embl2proteins(emblfile,sequence):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
614 file = open(emblfile,"r")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
615 filetext = file.read()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
616 filetext = filetext.replace("\r","\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
617 file.close()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
618 if "FT CDS " not in filetext or ("\nSQ" not in filetext and len(sequence) < 1):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
619 logfile.write("Exit: EMBL file not properly formatted, no sequence found or no CDS annotation found.\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
620 print >> sys.stderr, "Exit: EMBL file not properly formatted, no sequence found or no CDS annotation found.\n"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
621 logfile.close()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
622 sys.exit(1)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
623 cdspart = filetext.split("\nSQ ")[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
624 #Extract DNA sequence and calculate reverse complement of it
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
625 seqpart = filetext.split("\nSQ ")[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
626 seqlines = seqpart.split("\n")[1:]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
627 dnaseq = ""
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
628 for i in seqlines:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
629 dnaseq = dnaseq + i
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
630 dnaseq = cleandnaseq(dnaseq)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
631 sequence = dnaseq
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
632 if (sequence.count('A') + sequence.count('a') + sequence.count('C') + sequence.count('c') + sequence.count('G') + sequence.count('g') + sequence.count('T') + sequence.count('t')) < (0.5 * len(sequence)):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
633 print >> sys.stderr, "Protein GBK/EMBL file provided. Please provide nucleotide GBK/EMBL file."
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
634 sys.exit(1)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
635 dnaseqlength = len(dnaseq)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
636 rc_dnaseq = reverse_complement(dnaseq)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
637 #Extract genes
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
638 genes = cdspart.split("FT CDS ")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
639 genes = genes[1:]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
640 try:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
641 genesdetails = parsegenes(genes)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
642 except ValueError, e:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
643 print >> sys.stderr, "Could not parse genes from GBK/EMBL file. Please check if your GBK/EMBL file is valid."
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
644 print >> sys.stderr, "Error was: %s" % e
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
645 sys.exit(1)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
646 genelist = genesdetails[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
647 genedict = genesdetails[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
648 joinlist = genesdetails[2]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
649 joindict = genesdetails[3]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
650 accessiondict = genesdetails[4]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
651 #Locate all genes on DNA sequence and translate to protein sequence
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
652 proteins = extractprotfasta(genelist,genedict,dnaseq,rc_dnaseq,joinlist,joindict,accessiondict)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
653 textlines = filetext.split("SQ ")[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
654 textlines = textlines.split("\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
655 accession = ""
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
656 for i in textlines:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
657 if accession == "":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
658 if "AC " in i:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
659 j = i.split("AC ")[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
660 j = j.replace(" ","")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
661 accession = j.split(";")[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
662 if len(accession) < 4:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
663 accession = ""
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
664 #Test if accession number is probably real GenBank/RefSeq acc nr
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
665 numbers = range(0,10)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
666 letters = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
667 for i in ascii_letters:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
668 letters.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
669 nrnumbers = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
670 nrletters = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
671 for i in accession:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
672 if i in letters:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
673 nrletters += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
674 try:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
675 j = int(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
676 if j in numbers:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
677 nrnumbers += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
678 except:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
679 pass
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
680 if nrnumbers < 3 or nrletters < 1:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
681 accession = ""
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
682 return [proteins,accession,dnaseqlength]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
683
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
684 def translate(sequence):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
685 #Translation table standard genetic code; according to http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
686 transldict = { 'TTT': 'F', 'TCT': 'S', 'TAT': 'Y', 'TGT': 'C',
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
687 'TTC': 'F', 'TCC': 'S', 'TAC': 'Y', 'TGC': 'C',
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
688 'TTA': 'L', 'TCA': 'S', 'TAA': '*', 'TGA': '*',
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
689 'TTG': 'L', 'TCG': 'S', 'TAG': '*', 'TGG': 'W',
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
690 'CTT': 'L', 'CCT': 'P', 'CAT': 'H', 'CGT': 'R',
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
691 'CTC': 'L', 'CCC': 'P', 'CAC': 'H', 'CGC': 'R',
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
692 'CTA': 'L', 'CCA': 'P', 'CAA': 'Q', 'CGA': 'R',
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
693 'CTG': 'L', 'CCG': 'P', 'CAG': 'Q', 'CGG': 'R',
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
694 'ATT': 'I', 'ACT': 'T', 'AAT': 'N', 'AGT': 'S',
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
695 'ATC': 'I', 'ACC': 'T', 'AAC': 'N', 'AGC': 'S',
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
696 'ATA': 'I', 'ACA': 'T', 'AAA': 'K', 'AGA': 'R',
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
697 'ATG': 'M', 'ACG': 'T', 'AAG': 'K', 'AGG': 'R',
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
698 'GTT': 'V', 'GCT': 'A', 'GAT': 'D', 'GGT': 'G',
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
699 'GTC': 'V', 'GCC': 'A', 'GAC': 'D', 'GGC': 'G',
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
700 'GTA': 'V', 'GCA': 'A', 'GAA': 'E', 'GGA': 'G',
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
701 'GTG': 'V', 'GCG': 'A', 'GAG': 'E', 'GGG': 'G',
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
702 'ttt': 'F', 'tct': 'S', 'tat': 'Y', 'tgt': 'C',
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
703 'ttc': 'F', 'tcc': 'S', 'tac': 'Y', 'tgc': 'C',
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
704 'tta': 'L', 'tca': 'S', 'taa': '*', 'tga': '*',
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
705 'ttg': 'L', 'tcg': 'S', 'tag': '*', 'tgg': 'W',
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
706 'ctt': 'L', 'cct': 'P', 'cat': 'H', 'cgt': 'R',
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
707 'ctc': 'L', 'ccc': 'P', 'cac': 'H', 'cgc': 'R',
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
708 'cta': 'L', 'cca': 'P', 'caa': 'Q', 'cga': 'R',
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
709 'ctg': 'L', 'ccg': 'P', 'cag': 'Q', 'cgg': 'R',
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
710 'att': 'I', 'act': 'T', 'aat': 'N', 'agt': 'S',
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
711 'atc': 'I', 'acc': 'T', 'aac': 'N', 'agc': 'S',
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
712 'ata': 'I', 'aca': 'T', 'aaa': 'K', 'aga': 'R',
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
713 'atg': 'M', 'acg': 'T', 'aag': 'K', 'agg': 'R',
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
714 'gtt': 'V', 'gct': 'A', 'gat': 'D', 'ggt': 'G',
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
715 'gtc': 'V', 'gcc': 'A', 'gac': 'D', 'ggc': 'G',
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
716 'gta': 'V', 'gca': 'A', 'gaa': 'E', 'gga': 'G',
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
717 'gtg': 'V', 'gcg': 'A', 'gag': 'E', 'ggg': 'G'}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
718 triplets = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
719 triplet = ""
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
720 a = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
721 for i in sequence:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
722 if a < 2:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
723 a += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
724 triplet = triplet + i
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
725 elif a == 2:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
726 triplet = triplet + i
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
727 triplets.append(triplet)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
728 triplet = ""
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
729 a = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
730 protseq = ""
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
731 aanr = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
732 for i in triplets:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
733 aanr += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
734 if aanr == 1:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
735 protseq = protseq + "M"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
736 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
737 if "n" in i or "N" in i or i not in transldict.keys():
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
738 protseq = protseq + "X"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
739 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
740 protseq = protseq + transldict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
741 if len(protseq) > 0 and protseq[-1] == "*":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
742 protseq = protseq[:-1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
743 return protseq
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
744
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
745 def writefasta(names,seqs,file):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
746 e = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
747 f = len(names) - 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
748 try:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
749 out_file = open(file,"w")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
750 while e <= f:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
751 out_file.write(">%s\n%s\n" % (names[e], seqs[e]) )
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
752 #out_file.write(">")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
753 #out_file.write(names[e])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
754 #out_file.write("\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
755 #out_file.write(seqs[e])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
756 #out_file.write("\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
757 e += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
758 out_file.close()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
759 except(IOError,OSError,NotImplementedError):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
760 print >> sys.stderr, "FASTA file not created."
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
761 logfile.write("FASTA file not created.\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
762
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
763 def parsehmmoutput(cutoff,file):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
764 #file = open(file,"r")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
765 #filetext = file.read()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
766 #filetext = filetext.replace("\r","\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
767 #lines = filetext.split("\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
768 protlines = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
769 #for i in lines:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
770 # if len(i) > 1 and i[0] != "#":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
771 # protlines.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
772 [protlines.append(line.strip()) for line in open(file,"r") if len(line) > 1 and not line.startswith('#')]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
773 proteins = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
774 scores = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
775 #measuringline = lines[2]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
776 measuringline = linecache.getline(file, 3)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
777 x = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
778 y = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
779 for i in measuringline:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
780 y += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
781 if "-" in i:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
782 x += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
783 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
784 if x > 1:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
785 break
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
786 for i in protlines:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
787 #accession = ""
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
788 #a = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
789 protname = i[0:y]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
790 protnameparts = protname.split("|")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
791 accession = protnameparts[4]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
792 score = i[(y+76):(y+82)]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
793 score = float(score.replace(" ",""))
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
794 if score > cutoff and len(accession) > 1:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
795 proteins.append(accession)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
796 scores.append(score)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
797 return [proteins,scores]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
798
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
799 def sortonsecondvalueoflist(first,second):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
800 f = int(first[1])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
801 s = second[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
802 if f > s:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
803 value = 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
804 elif f < s:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
805 value = -1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
806 elif f == s:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
807 value = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
808 return value
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
809
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
810 def hmmlengths(hmmfile):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
811 hmmlengthsdict = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
812 file = open(hmmfile,"r")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
813 filetext = file.read()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
814 filetext = filetext.replace("\r","\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
815 hmms = filetext.split("//")[:-1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
816 for i in hmms:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
817 namepart = i.split("NAME ")[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
818 name = namepart.split("\n", 1)[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
819 lengthpart = i.split("LENG ")[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
820 #print lengthline
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
821 #tabs = lengthline.split(" ")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
822 #tabs2 = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
823 #for j in tabs:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
824 # if j != "":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
825 # tabs2.append(j)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
826 #print tabs2
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
827 length = lengthpart.split("\n", 1)[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
828 hmmlengthsdict[name] = int(length)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
829 return hmmlengthsdict
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
830
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
831 def hmmscanparse(hmmscanoutputfile,hmmlengthsdict):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
832 domaindict = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
833 file = open(hmmscanoutputfile,"r")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
834 filetext = file.read()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
835 filetext = filetext.replace("\r","\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
836 outputs = filetext.split("Query: ")[1:]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
837 for i in outputs:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
838 protname = i.split("\n", 1)[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
839 protname = protname.split(" ", 1)[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
840 domainresults = i.split("Domain annotation for each model:\n")[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
841 domainresults = domainresults.split("\n\nInternal pipeline statistics summary:")[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
842 domains = domainresults.split(">> ")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
843 domainlist = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
844 #Find all domains
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
845 for i in domains:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
846 tokens = i.split('\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
847 domainname = tokens[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
848 domainname = domainname.split(" ", 1)[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
849 domainresults = tokens[3:-2]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
850 for i in domainresults:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
851 tabs = i.split(" ")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
852 tabs2 = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
853 [tabs2.append(tab) for tab in tabs if tab != '']
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
854 #for i in tabs:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
855 # if i != "":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
856 # tabs2.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
857 tabs = tabs2
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
858 start = int(tabs[12])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
859 end = int(tabs[13])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
860 evalue = tabs[5]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
861 score = float(tabs[2])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
862 domainlist.append([domainname,start,end,evalue,score])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
863 domainlist.sort(sortonsecondvalueoflist)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
864 #Purify domain list to remove overlapping domains, only keeping those with the highest scores
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
865 if len(domainlist) > 1:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
866 domainlist2 = [domainlist[0]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
867 for i in domainlist[1:]:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
868 maxoverlap = 20
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
869 if i[1] < (domainlist2[-1][2] - maxoverlap):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
870 if i[4] < domainlist2[-1][4]:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
871 pass
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
872 elif i[4] > domainlist2[-1][4]:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
873 del domainlist2[-1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
874 domainlist2.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
875 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
876 domainlist2.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
877 domainlist = domainlist2
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
878 #Merge domain fragments which are really one domain
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
879 if len(domainlist) > 1:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
880 domainlist2 = [domainlist[0]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
881 for i in domainlist[1:]:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
882 alilength1 = int(domainlist2[-1][2]) - int(domainlist2[-1][1])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
883 alilength2 = int(i[2]) - int(i[1])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
884 domainlength = hmmlengthsdict[i[0]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
885 if i[0] == domainlist2[-1][0] and (alilength1 < (0.75 * domainlength) or alilength2 < (0.75 * domainlength)) and (alilength1 + alilength2) < (1.5 * domainlength):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
886 name = i[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
887 start = domainlist2[-1][1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
888 end = i[2]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
889 evalue = str(float(domainlist2[-1][3]) * float(i[3]))
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
890 score = str(float(domainlist2[-1][4]) + float(i[4]))
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
891 del domainlist2[-1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
892 domainlist2.append([name,start,end,evalue,score])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
893 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
894 domainlist2.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
895 domainlist = domainlist2
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
896 #Remove incomplete domains (covering less than 60% of total domain hmm length)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
897 if len(domainlist) > 1:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
898 domainlist2 = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
899 for i in domainlist:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
900 alilength = int(i[2]) - int(i[1])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
901 domainlength = hmmlengthsdict[i[0]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
902 if alilength > (0.6 * domainlength):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
903 domainlist2.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
904 domainlist = domainlist2
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
905 #Save domainlist to domaindict
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
906 domaindict[protname] = domainlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
907 return domaindict
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
908
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
909 def blastparse(blasttext,minseqcoverage,minpercidentity,seqlengths,geneclustergenes):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
910 blastdict = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
911 querylist = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
912 hitclusters = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
913 blastlines = blasttext.split("\n")[:-1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
914 #Filter for best blast hits (of one query on each subject)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
915 query_subject_combinations = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
916 blastlines2 = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
917 for i in blastlines:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
918 tabs = i.split("\t")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
919 query = tabs[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
920 subject = tabs[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
921 query_subject_combination = query + "_" + subject
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
922 if query_subject_combination in query_subject_combinations:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
923 pass
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
924 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
925 query_subject_combinations.append(query_subject_combination)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
926 blastlines2.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
927 blastlines = blastlines2
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
928 #Filters blastlines to get rid of hits that do not meet criteria
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
929 blastlines2 = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
930 for i in blastlines:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
931 tabs = i.split("\t")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
932 perc_ident = int(tabs[2].split(".",1)[0])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
933 alignmentlength = float(tabs[3])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
934 evalue = str(tabs[10])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
935 blastscore = int(tabs[11].split(".",1)[0])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
936 if seqlengths.has_key(query):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
937 perc_coverage = (float(tabs[3]) / seqlengths[query]) * 100
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
938 if perc_ident > minpercidentity and (perc_coverage > minseqcoverage or alignmentlength > 40):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
939 blastlines2.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
940 blastlines = blastlines2
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
941 #Goes through the blastlines. For each query, creates a querydict and hitlist, and adds these to the blastdict when finding the next query
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
942 firstquery = "y"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
943 for i in blastlines:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
944 tabs = i.split("\t")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
945 query = tabs[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
946
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
947 second_column_split = tabs[1].split("|")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
948
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
949 subject = second_column_split[4]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
950 if subject == "no_locus_tag":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
951 subject = second_column_split[6]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
952 if subject in geneclustergenes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
953 subject = "h_" + subject
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
954 if len(second_column_split) > 6:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
955 locustag = second_column_split[6]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
956 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
957 locustag = ""
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
958 subject_genecluster = second_column_split[0] + "_" + second_column_split[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
959 subject_start = (second_column_split[2]).split("-")[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
960 subject_end = (second_column_split[2]).split("-")[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
961 subject_strand = second_column_split[3]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
962 subject_annotation = second_column_split[5]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
963 perc_ident = int(tabs[2].split(".")[0])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
964 alignmentlength = float(tabs[3])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
965 evalue = str(tabs[10])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
966 blastscore = int(tabs[11].split(".", 1)[0])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
967 if seqlengths.has_key(query):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
968 perc_coverage = (float(tabs[3]) / seqlengths[query]) * 100
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
969 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
970 seqlength = len(seqdict[query.split("|")[4]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
971 perc_coverage = (float(tabs[3]) / seqlength) * 100
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
972 if firstquery == "y": #Only until the first blastline with good hit
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
973 firstquery = "n"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
974 querylist.append(query)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
975 subjectlist = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
976 querydict = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
977 subjectlist.append(subject)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
978 querydict[subject] = [subject_genecluster,subject_start,subject_end,subject_strand,subject_annotation,perc_ident,blastscore,perc_coverage,evalue,locustag]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
979 if subject_genecluster not in hitclusters:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
980 hitclusters.append(subject_genecluster)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
981 last_query = query
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
982 elif i == blastlines[-1]: #Only for the last blastline
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
983 if query not in querylist:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
984 subjectlist.append(subject)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
985 querydict[subject] = [subject_genecluster,subject_start,subject_end,subject_strand,subject_annotation,perc_ident,blastscore,perc_coverage,evalue,locustag]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
986 blastdict[query] = [subjectlist,querydict]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
987 querylist.append(query)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
988 if subject_genecluster not in hitclusters:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
989 hitclusters.append(subject_genecluster)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
990 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
991 subjectlist.append(subject)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
992 querydict[subject] = [subject_genecluster,subject_start,subject_end,subject_strand,subject_annotation,perc_ident,blastscore,perc_coverage,evalue,locustag]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
993 blastdict[query] = [subjectlist,querydict]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
994 else: #For all but the first and last blastlines
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
995 if query not in querylist:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
996 blastdict[last_query] = [subjectlist,querydict]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
997 querylist.append(query)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
998 subjectlist = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
999 querydict = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1000 subjectlist.append(subject)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1001 querydict[subject] = [subject_genecluster,subject_start,subject_end,subject_strand,subject_annotation,perc_ident,blastscore,perc_coverage,evalue,locustag]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1002 if subject_genecluster not in hitclusters:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1003 hitclusters.append(subject_genecluster)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1004 last_query = query
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1005 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1006 subjectlist.append(subject)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1007 querydict[subject] = [subject_genecluster,subject_start,subject_end,subject_strand,subject_annotation,perc_ident,blastscore,perc_coverage,evalue,locustag]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1008 if subject_genecluster not in hitclusters:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1009 hitclusters.append(subject_genecluster)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1010 return [blastdict,querylist,hitclusters]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1011
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1012 def getdircontents():
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1013 return os.listdir('.')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1014 """
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1015 if sys.platform == ('win32'):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1016 dircontents = os.popen("dir/w")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1017 dircontents = dircontents.read()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1018 dircontents = dircontents.replace("\n"," ")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1019 dircontents = dircontents.split(" ")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1020 if sys.platform == ('linux2'):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1021 dircontents = os.popen("ls")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1022 dircontents = dircontents.read()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1023 dircontents = dircontents.replace("\n"," ")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1024 dircontents = dircontents.replace("\r"," ")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1025 dircontents = dircontents.split(" ")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1026
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1027 return dircontents
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1028 """
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1029
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1030 def _gene_arrow(start,end,strand,color,base,height):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1031 halfheight = height/2
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1032 if start > end:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1033 start2 = end
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1034 end2 = start
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1035 start = start2
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1036 end = end2
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1037 dist = 100
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1038 oh = ShapeBuilder()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1039 if (end - start) < halfheight:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1040 if (strand == "+"):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1041 pointsAsTuples=[(start,base),
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1042 (end,base - halfheight),
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1043 (start,base - height),
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1044 (start,base)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1045 ]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1046 elif (strand == "-"):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1047 pointsAsTuples=[(start,base - halfheight),
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1048 (end,base - height),
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1049 (end,base),
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1050 (start,base - halfheight)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1051 ]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1052 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1053 if (strand == "+"):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1054 arrowstart = end-halfheight
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1055 pointsAsTuples=[(start,base),
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1056 (arrowstart,base),
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1057 (end,base-halfheight),
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1058 (arrowstart,base - height),
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1059 (start,base - height),
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1060 (start,base)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1061 ]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1062 elif (strand == "-"):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1063 arrowstart = start + halfheight
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1064 pointsAsTuples=[(start,base - halfheight),
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1065 (arrowstart,base - height),
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1066 (end,base - height),
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1067 (end,base),
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1068 (arrowstart,base),
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1069 (start,base - halfheight)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1070 ]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1071 pg=oh.createPolygon(points=oh.convertTupleArrayToPoints(pointsAsTuples),strokewidth=1, stroke='black', fill=color)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1072 return pg
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1073
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1074 def _gene_label(start,end,name,y,screenwidth):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1075 #Add gene label
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1076 txt = name
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1077 myStyle = StyleBuilder()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1078 myStyle.setFontFamily(fontfamily="Verdana")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1079 #myStyle.setFontWeight(fontweight='bold')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1080 myStyle.setFontStyle(fontstyle='italic')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1081 myStyle.setFontSize('10px')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1082 myStyle.setFilling('#600000')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1083 x = ((start + end)/2)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1084 base = 35
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1085 height = 10
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1086 halfheight = height/2
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1087 y = base + halfheight
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1088 t1 = text(txt,x,y)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1089 t1.set_style(myStyle.getStyle())
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1090 return t1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1091
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1092 def relativepositions(starts,ends,largestclustersize):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1093 rel_starts = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1094 rel_ends = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1095 #Assign relative start and end sites for visualization
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1096 lowest_start = int(starts[0])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1097 leftboundary = lowest_start
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1098 for i in starts:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1099 i = float(float(int(i) - int(leftboundary)) / largestclustersize) * screenwidth * 0.75
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1100 i = int(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1101 rel_starts.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1102 for i in ends:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1103 i = float(float(int(i) - int(leftboundary)) / largestclustersize) * screenwidth * 0.75
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1104 i = int(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1105 rel_ends.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1106 return [rel_starts,rel_ends]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1107
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1108 def startendsitescheck(starts,ends):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1109 #Check whether start sites are always lower than end sites, reverse if necessary
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1110 starts2 = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1111 ends2 = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1112 a = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1113 for i in starts:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1114 if int(i) > int(ends[a]):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1115 starts2.append(ends[a])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1116 ends2.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1117 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1118 starts2.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1119 ends2.append(ends[a])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1120 a += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1121 ends = ends2
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1122 starts = starts2
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1123 return [starts,ends]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1124
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1125 def RadialGradient(startcolor,stopcolor,gradientname):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1126 d = defs()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1127 rg = radialGradient()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1128 rg.set_id(gradientname)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1129 s = stop(offset="0%")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1130 s.set_stop_color(startcolor)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1131 s.set_stop_opacity(1)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1132 rg.addElement(s)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1133 s = stop(offset="100%")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1134 s.set_stop_color(stopcolor)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1135 s.set_stop_opacity(1)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1136 rg.addElement(s)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1137 d.addElement(rg)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1138 return d
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1139
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1140 def LinearGradient(startcolor,stopcolor,gradientname):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1141 d = defs()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1142 lg = linearGradient()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1143 lg.set_id(gradientname)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1144 s = stop(offset="0%")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1145 s.set_stop_color(startcolor)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1146 s.set_stop_opacity(1)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1147 lg.addElement(s)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1148 s = stop(offset="100%")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1149 s.set_stop_color(stopcolor)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1150 s.set_stop_opacity(1)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1151 lg.addElement(s)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1152 d.addElement(lg)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1153 return d
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1154
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1155 def generate_rgbscheme(nr):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1156 usablenumbers = [1,2,4,8,12,18,24,32,48,64,10000]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1157 lengthsdict = {1:[1,1,1],2:[1,1,2],4:[1,2,2],8:[2,2,2],12:[2,2,3],18:[2,3,3],24:[3,3,3],32:[3,3,4],48:[3,4,4],64:[4,4,4]}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1158 shortestdistance = 10000
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1159 for i in usablenumbers:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1160 distance = i - nr
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1161 if distance >= 0:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1162 if distance < shortestdistance:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1163 shortestdistance = distance
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1164 closestnr = i
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1165 toohigh = "n"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1166 if closestnr == 10000:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1167 toohigh = "y"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1168 closestnr = 64
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1169 xyznumbers = lengthsdict[closestnr]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1170 x = xyznumbers[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1171 y = xyznumbers[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1172 z = xyznumbers[2]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1173 xpoints = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1174 xpoint = (255/z)/2
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1175 for i in range(x):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1176 xpoints.append(xpoint)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1177 xpoint += (255/x)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1178 ypoints = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1179 ypoint = (255/z)/2
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1180 for i in range(y):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1181 ypoints.append(ypoint)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1182 ypoint += (255/y)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1183 zpoints = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1184 zpoint = (255/z)/2
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1185 for i in range(z):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1186 zpoints.append(zpoint)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1187 zpoint += (255/z)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1188 colorlist = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1189 for i in xpoints:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1190 for j in ypoints:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1191 #for k in zpoints:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1192 # rgb = "rgb(%s,%s,%s)" % (i, j, k)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1193 # #rgb = "rgb(" + str(i) + "," + str(j) + "," + str(k) + ")"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1194 # colorlist.append(rgb)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1195 [colorlist.append("rgb(%s,%s,%s)" % (i, j, k)) for k in zpoints]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1196 if toohigh == "y":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1197 colorlist = colorlist + colorlist + colorlist + colorlist + colorlist + colorlist + colorlist + colorlist + colorlist + colorlist + colorlist + colorlist + colorlist + colorlist + colorlist + colorlist + colorlist + colorlist + colorlist + colorlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1198 if closestnr == 24:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1199 colorlist = colorlist[:15] + colorlist[18:]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1200 if closestnr == 32:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1201 colorlist = colorlist[:21] + colorlist[24:]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1202 colorlist2 = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1203 if closestnr == 1:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1204 colorlist2.append("red")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1205 if closestnr == 2:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1206 colorlist2.append("red")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1207 colorlist2.append("green")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1208 if closestnr == 4:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1209 colorlist2.append("red")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1210 colorlist2.append("green")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1211 colorlist2.append("blue")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1212 colorlist2.append("yellow")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1213 if closestnr == 8:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1214 neworder=[4,1,2,5,6,7,3,0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1215 colorlist2 = [colorlist[i] for i in neworder]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1216 if closestnr == 12:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1217 neworder=[6,3,5,9,7,2,11,4,8,1,10,0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1218 colorlist2 = [colorlist[i] for i in neworder]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1219 if closestnr == 18:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1220 neworder=[9,6,2,14,15,8,12,10,3,5,7,11,4,1,16,13,0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1221 colorlist2 = [colorlist[i] for i in neworder]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1222 if closestnr == 24:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1223 neworder=[15,12,9,6,5,0,21,1,16,14,8,17,2,23,22,3,13,7,10,4,18,20,19,11]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1224 colorlist2 = [colorlist[i] for i in neworder]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1225 if closestnr == 32:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1226 neworder = [21,19,27,6,8,1,14,7,20,13,9,30,4,23,18,12,5,29,24,17,11,31,2,28,22,15,26,3,20,16,10,25]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1227 colorlist2 = [colorlist[i] for i in neworder]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1228 if closestnr > 32:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1229 random.shuffle(colorlist)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1230 colorlist2 = colorlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1231 colorlist = colorlist2
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1232 return colorlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1233
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1234 def geneclustersvg(genes,rel_starts,rel_ends,strands,geneposdict,pksnrpsprots,pksnrpsdomains,qclusternr):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1235 nrgenes = len(genes)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1236 #Define relative start and end positions for plotting
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1237 s = svg(x = 0, y = 0, width = (screenwidth * 0.75), height = (259 + 99 * len(pksnrpsprots)))
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1238 viewbox = "0 -30 " + str(screenwidth * 0.8) + " " + str(185 + 70 * len(pksnrpsprots))
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1239 s.set_viewBox(viewbox)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1240 s.set_preserveAspectRatio("none")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1241
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1242 #Add line behind gene arrows
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1243 oh = ShapeBuilder()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1244 group = g()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1245 group.addElement(oh.createLine(10,60,10 + (screenwidth * 0.75),60, strokewidth = 2, stroke = "grey"))
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1246 s.addElement(group)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1247 #Add gene arrows
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1248 a = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1249 y = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1250 for x in range(nrgenes):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1251 group = g()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1252 #group.addElement(_gene_label(rel_starts[a],rel_ends[a],genes[a],y,screenwidth))
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1253 group.addElement(_gene_arrow(10 + rel_starts[a],10 + rel_ends[a],strands[a],colors[a],65,10))
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1254 #Can be used for domains
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1255 # group.addElement(oh.createRect(rel_starts[a],45,(rel_ends[a]-rel_starts[a]),10, strokewidth = 2, stroke = "black", fill="#237845"))
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1256 group.set_id("a" + str(qclusternr) + "_00%s"%x)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1257 s.addElement(group)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1258 if y == 0:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1259 y = 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1260 elif y == 1:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1261 y = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1262 a += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1263 #Add domain depictions
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1264 oh = ShapeBuilder()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1265 group = g()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1266 #Determine longest protein to decide on scaling
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1267 longestprot = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1268 protlengthdict = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1269 for i in pksnrpsprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1270 protlength = (geneposdict[i][1] - geneposdict[i][0]) / 3
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1271 protlengthdict[i] = protlength
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1272 if protlength > longestprot:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1273 longestprot = protlength
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1274 z = 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1275 w = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1276 ksnr = 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1277 atnr = 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1278 dhnr = 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1279 krnr = 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1280 ernr = 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1281 acpnr = 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1282 cnr = 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1283 enr = 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1284 anr = 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1285 pcpnr = 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1286 tenr = 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1287 othernr = 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1288 for i in pksnrpsprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1289 domains = pksnrpsdomains[i][0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1290 domainsdict = pksnrpsdomains[i][1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1291 protlength = protlengthdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1292 group.addElement(oh.createLine(10,(125 + z * 60 ),10 + ((float(protlength) / float(longestprot)) * (screenwidth * 0.75)),(125 + z * 60 ), strokewidth = 1, stroke = "grey"))
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1293 s.addElement(group)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1294 try:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1295 aa2pixelratio = longestprot * 0.75 / screenwidth
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1296 except:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1297 aa2pixelratio = 0.1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1298 #print 'logestprot', longestprot
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1299 #print 'scrennwidth', screenwidth
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1300 #print aa2pixelratio
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1301 myStyle = StyleBuilder()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1302 myStyle.setFontFamily(fontfamily="MS Reference Sans Serif")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1303 myStyle.setFontWeight(fontweight='bold')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1304 myStyle.setFontSize('12px')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1305 for j in domains:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1306 startpos = domainsdict[j][0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1307 endpos = domainsdict[j][1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1308 if "PKS_KS" in j:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1309 c = LinearGradient("#08B208","#81F781","KS_domain"+str(qclusternr) + "_" + str(ksnr))
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1310 d = LinearGradient("#81F781","#08B208","KS_line"+str(qclusternr) + "_" + str(ksnr))
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1311 e = oh.createRect(str(10 + startpos / aa2pixelratio),str((125 + z * 60 ) - 8),str((endpos-startpos) / aa2pixelratio),15,8,strokewidth=1,stroke='url(#KS_line' + str(qclusternr) + "_" + str(ksnr) + ")",fill="url(#KS_domain" + str(qclusternr) + "_" + str(ksnr) + ")")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1312 f = text("KS",((-4 + startpos / aa2pixelratio) + 0.5 * ((endpos-startpos) / aa2pixelratio)), ((125 + z * 60 ) + 4),fill='#0A2A0A')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1313 if ((endpos-startpos) / aa2pixelratio) < 100 and ((endpos-startpos) / aa2pixelratio) >= 20:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1314 myStyle.setFontSize('8px')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1315 f = text("KS",((startpos / aa2pixelratio) + 0.5 * ((endpos-startpos) / aa2pixelratio)), ((125 + z * 60 ) + 3),fill='#3B0B0B')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1316 elif ((endpos-startpos) / aa2pixelratio) < 20:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1317 f = "notext"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1318 if f != "notext":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1319 f.set_style(myStyle.getStyle())
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1320 myStyle.setFontSize('12px')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1321 group = g()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1322 group.addElement(c)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1323 group.addElement(d)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1324 group.addElement(e)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1325 if f != "notext":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1326 group.addElement(f)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1327 group.set_id("b" + str(qclusternr) + "_00%s"%w)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1328 s.addElement(group)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1329 ksnr += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1330 elif "PKS_AT" in j:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1331 c = LinearGradient("#DC0404","#F78181","AT_domain"+str(qclusternr) + "_" + str(atnr))
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1332 d = LinearGradient("#F78181","#DC0404","AT_line"+str(qclusternr) + "_" + str(atnr))
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1333 e = oh.createRect(str(10 + startpos / aa2pixelratio),str((125 + z * 60 ) - 8),str((endpos-startpos) / aa2pixelratio),15,8,strokewidth=1,stroke='url(#AT_line' + str(qclusternr) + "_" + str(atnr) + ")",fill="url(#AT_domain" + str(qclusternr) + "_" + str(atnr) + ")")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1334 f = text("AT",((-4 + startpos / aa2pixelratio) + 0.5 * ((endpos-startpos) / aa2pixelratio)), ((125 + z * 60 ) + 4),fill='#2A1B0A')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1335 if ((endpos-startpos) / aa2pixelratio) < 100 and ((endpos-startpos) / aa2pixelratio) >= 20:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1336 myStyle.setFontSize('8px')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1337 f = text("AT",((startpos / aa2pixelratio) + 0.5 * ((endpos-startpos) / aa2pixelratio)), ((125 + z * 60 ) + 3),fill='#2A1B0A')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1338 elif ((endpos-startpos) / aa2pixelratio) < 20:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1339 f = "notext"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1340 if f != "notext":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1341 f.set_style(myStyle.getStyle())
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1342 myStyle.setFontSize('12px')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1343 group = g()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1344 group.addElement(c)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1345 group.addElement(d)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1346 group.addElement(e)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1347 if f != "notext":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1348 group.addElement(f)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1349 group.set_id("b" + str(qclusternr) + "_00%s"%w)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1350 s.addElement(group)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1351 atnr += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1352 elif "PKS_DH" in j:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1353 c = LinearGradient("#B45F04","#F7BE81","DH_domain"+str(qclusternr) + "_" + str(dhnr))
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1354 d = LinearGradient("#F7BE81","#B45F04","DH_line"+str(qclusternr) + "_" + str(dhnr))
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1355 e = oh.createRect(str(10 + startpos / aa2pixelratio),str((125 + z * 60 ) - 8),str((endpos-startpos) / aa2pixelratio),15,8,strokewidth=1,stroke='url(#DH_line' + str(qclusternr) + "_" + str(dhnr) + ")",fill="url(#DH_domain" + str(qclusternr) + "_" + str(dhnr) + ")")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1356 f = text("DH",((-4 + startpos / aa2pixelratio) + 0.5 * ((endpos-startpos) / aa2pixelratio)), ((125 + z * 60 ) + 4),fill='#3B0B0B')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1357 if ((endpos-startpos) / aa2pixelratio) < 100 and ((endpos-startpos) / aa2pixelratio) >= 20:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1358 myStyle.setFontSize('8px')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1359 f = text("DH",((startpos / aa2pixelratio) + 0.5 * ((endpos-startpos) / aa2pixelratio)), ((125 + z * 60 ) + 3),fill='#3B0B0B')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1360 elif ((endpos-startpos) / aa2pixelratio) < 20:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1361 f = "notext"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1362 if f != "notext":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1363 f.set_style(myStyle.getStyle())
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1364 myStyle.setFontSize('12px')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1365 group = g()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1366 group.addElement(c)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1367 group.addElement(d)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1368 group.addElement(e)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1369 if f != "notext":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1370 group.addElement(f)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1371 group.set_id("b" + str(qclusternr) + "_00%s"%w)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1372 s.addElement(group)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1373 dhnr += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1374 elif "PKS_KR" in j:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1375 c = LinearGradient("#089E4B","#81F781","KR_domain"+str(qclusternr) + "_" + str(krnr))
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1376 d = LinearGradient("#81F781","#089E4B","KR_line"+str(qclusternr) + "_" + str(krnr))
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1377 e = oh.createRect(str(10 + startpos / aa2pixelratio),str((125 + z * 60 ) - 8),str((endpos-startpos) / aa2pixelratio),15,8,strokewidth=1,stroke='url(#KR_line' + str(qclusternr) + "_" + str(krnr) + ")",fill="url(#KR_domain" + str(qclusternr) + "_" + str(krnr) + ")")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1378 f = text("KR",((-4 + startpos / aa2pixelratio) + 0.5 * ((endpos-startpos) / aa2pixelratio)), ((125 + z * 60 ) + 4),fill='#0A2A1B')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1379 if ((endpos-startpos) / aa2pixelratio) < 100 and ((endpos-startpos) / aa2pixelratio) >= 20:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1380 myStyle.setFontSize('8px')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1381 f = text("KR",((startpos / aa2pixelratio) + 0.5 * ((endpos-startpos) / aa2pixelratio)), ((125 + z * 60 ) + 3),fill='#0A2A1B')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1382 elif ((endpos-startpos) / aa2pixelratio) < 20:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1383 f = "notext"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1384 if f != "notext":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1385 f.set_style(myStyle.getStyle())
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1386 myStyle.setFontSize('12px')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1387 group = g()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1388 group.addElement(c)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1389 group.addElement(d)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1390 group.addElement(e)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1391 if f != "notext":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1392 group.addElement(f)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1393 group.set_id("b" + str(qclusternr) + "_00%s"%w)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1394 s.addElement(group)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1395 krnr += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1396 elif "PKS_ER" in j:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1397 c = LinearGradient("#089E85","#81F7F3","ER_domain"+str(qclusternr) + "_" + str(ernr))
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1398 d = LinearGradient("#81F7F3","#089E85","ER_line"+str(qclusternr) + "_" + str(ernr))
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1399 e = oh.createRect(str(10 + startpos / aa2pixelratio),str((125 + z * 60 ) - 8),str((endpos-startpos) / aa2pixelratio),15,8,strokewidth=1,stroke='url(#ER_line' + str(qclusternr) + "_" + str(ernr) + ")",fill="url(#ER_domain" + str(qclusternr) + "_" + str(ernr) + ")")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1400 f = text("ER",((-4 + startpos / aa2pixelratio) + 0.5 * ((endpos-startpos) / aa2pixelratio)), ((125 + z * 60 ) + 4),fill='#0A2A29')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1401 if ((endpos-startpos) / aa2pixelratio) < 100 and ((endpos-startpos) / aa2pixelratio) >= 20:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1402 myStyle.setFontSize('8px')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1403 f = text("ER",((startpos / aa2pixelratio) + 0.5 * ((endpos-startpos) / aa2pixelratio)), ((125 + z * 60 ) + 3),fill='#0A2A29')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1404 elif ((endpos-startpos) / aa2pixelratio) < 20:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1405 f = "notext"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1406 if f != "notext":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1407 f.set_style(myStyle.getStyle())
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1408 myStyle.setFontSize('12px')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1409 group = g()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1410 group.addElement(c)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1411 group.addElement(d)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1412 group.addElement(e)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1413 if f != "notext":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1414 group.addElement(f)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1415 group.set_id("b" + str(qclusternr) + "_00%s"%w)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1416 s.addElement(group)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1417 ernr += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1418 elif "ACP" in j:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1419 c = LinearGradient("#084BC6","#81BEF7","ACP_domain"+str(qclusternr) + "_" + str(acpnr))
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1420 d = LinearGradient("#81BEF7","#084BC6","ACP_line"+str(qclusternr) + "_" + str(acpnr))
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1421 e = oh.createRect(str(10 + startpos / aa2pixelratio),str((125 + z * 60 ) - 8),str((endpos-startpos) / aa2pixelratio),15,8,strokewidth=1,stroke='url(#ACP_line' + str(qclusternr) + "_" + str(acpnr) + ")",fill="url(#ACP_domain" + str(qclusternr) + "_" + str(acpnr) + ")")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1422 f = text("ACP",((startpos / aa2pixelratio) + 0.5 * ((endpos-startpos) / aa2pixelratio)), ((125 + z * 60 ) + 4),fill='#0A1B2A')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1423 if ((endpos-startpos) / aa2pixelratio) < 100 and ((endpos-startpos) / aa2pixelratio) >= 20:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1424 myStyle.setFontSize('8px')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1425 f = text("ACP",((-2 + startpos / aa2pixelratio) + 0.5 * ((endpos-startpos) / aa2pixelratio)), ((125 + z * 60 ) + 3),fill='#0A1B2A')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1426 elif ((endpos-startpos) / aa2pixelratio) < 20:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1427 f = "notext"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1428 if f != "notext":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1429 f.set_style(myStyle.getStyle())
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1430 myStyle.setFontSize('12px')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1431 group = g()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1432 group.addElement(c)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1433 group.addElement(d)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1434 group.addElement(e)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1435 if f != "notext":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1436 group.addElement(f)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1437 group.set_id("b" + str(qclusternr) + "_00%s"%w)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1438 s.addElement(group)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1439 acpnr += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1440 elif ("C" in j or "Heterocyclization" in j ) and "ACP" not in j and "PCP" not in j and "NRPS-COM" not in j and "CAL" not in j:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1441 c = LinearGradient("#393989","#8181F7","C_domain"+str(qclusternr) + "_" + str(cnr))
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1442 d = LinearGradient("#8181F7","#393989","C_line"+str(qclusternr) + "_" + str(cnr))
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1443 e = oh.createRect(str(10 + startpos / aa2pixelratio),str((125 + z * 60 ) - 8),str((endpos-startpos) / aa2pixelratio),15,8,strokewidth=1,stroke='url(#C_line' + str(qclusternr) + "_" + str(cnr) + ")",fill="url(#C_domain" + str(qclusternr) + "_" + str(cnr) + ")")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1444 f = text("C",((startpos / aa2pixelratio) + 0.5 * ((endpos-startpos) / aa2pixelratio)), ((125 + z * 60 ) + 4),fill='#0A0A2A')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1445 if ((endpos-startpos) / aa2pixelratio) < 100 and ((endpos-startpos) / aa2pixelratio) >= 20:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1446 myStyle.setFontSize('8px')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1447 f = text("C",((startpos / aa2pixelratio) + 0.5 * ((endpos-startpos) / aa2pixelratio)), ((125 + z * 60 ) + 3),fill='#0A0A2A')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1448 elif ((endpos-startpos) / aa2pixelratio) < 20:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1449 f = "notext"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1450 if f != "notext":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1451 f.set_style(myStyle.getStyle())
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1452 myStyle.setFontSize('12px')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1453 group = g()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1454 group.addElement(c)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1455 group.addElement(d)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1456 group.addElement(e)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1457 if f != "notext":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1458 group.addElement(f)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1459 group.set_id("b" + str(qclusternr) + "_00%s"%w)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1460 s.addElement(group)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1461 cnr += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1462 elif "Epimerization" in j and "ER" not in j and "TE" not in j:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1463 c = LinearGradient("#393989","#8181F7","E_domain"+str(qclusternr) + "_" + str(enr))
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1464 d = LinearGradient("#8181F7","#393989","E_line"+str(qclusternr) + "_" + str(enr))
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1465 e = oh.createRect(str(10 + startpos / aa2pixelratio),str((125 + z * 60 ) - 8),str((endpos-startpos) / aa2pixelratio),15,8,strokewidth=1,stroke='url(#E_line' + str(qclusternr) + "_" + str(enr) + ")",fill="url(#E_domain" + str(qclusternr) + "_" + str(enr) + ")")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1466 f = text("E",((startpos / aa2pixelratio) + 0.5 * ((endpos-startpos) / aa2pixelratio)), ((125 + z * 60 ) + 4),fill='#0A0A2A')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1467 if ((endpos-startpos) / aa2pixelratio) < 100 and ((endpos-startpos) / aa2pixelratio) >= 20:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1468 myStyle.setFontSize('8px')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1469 f = text("E",((startpos / aa2pixelratio) + 0.5 * ((endpos-startpos) / aa2pixelratio)), ((125 + z * 60 ) + 3),fill='#0A0A2A')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1470 elif ((endpos-startpos) / aa2pixelratio) < 20:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1471 f = "notext"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1472 if f != "notext":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1473 f.set_style(myStyle.getStyle())
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1474 myStyle.setFontSize('12px')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1475 group = g()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1476 group.addElement(c)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1477 group.addElement(d)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1478 group.addElement(e)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1479 if f != "notext":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1480 group.addElement(f)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1481 group.set_id("b" + str(qclusternr) + "_00%s"%w)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1482 s.addElement(group)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1483 enr += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1484 elif ("AMP" in j or "A-OX" in j):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1485 c = LinearGradient("#56157F","#BE81F7","A_domain"+str(qclusternr) + "_" + str(anr))
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1486 d = LinearGradient("#BE81F7","#56157F","A_line"+str(qclusternr) + "_" + str(anr))
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1487 e = oh.createRect(str(10 + startpos / aa2pixelratio),str((125 + z * 60 ) - 8),str((endpos-startpos) / aa2pixelratio),15,8,strokewidth=1,stroke='url(#A_line' + str(qclusternr) + "_" + str(anr) + ")",fill="url(#A_domain" + str(qclusternr) + "_" + str(anr) + ")")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1488 f = text("A",((startpos / aa2pixelratio) + 0.5 * ((endpos-startpos) / aa2pixelratio)), ((125 + z * 60 ) + 4),fill='#1B0A2A')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1489 if ((endpos-startpos) / aa2pixelratio) < 100 and ((endpos-startpos) / aa2pixelratio) >= 20:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1490 myStyle.setFontSize('8px')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1491 f = text("A",((startpos / aa2pixelratio) + 0.5 * ((endpos-startpos) / aa2pixelratio)), ((125 + z * 60 ) + 3),fill='#1B0A2A')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1492 elif ((endpos-startpos) / aa2pixelratio) < 20:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1493 f = "notext"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1494 if f != "notext":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1495 f.set_style(myStyle.getStyle())
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1496 myStyle.setFontSize('12px')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1497 group = g()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1498 group.addElement(c)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1499 group.addElement(d)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1500 group.addElement(e)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1501 if f != "notext":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1502 group.addElement(f)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1503 group.set_id("b" + str(qclusternr) + "_00%s"%w)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1504 s.addElement(group)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1505 anr += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1506 elif "PCP" in j:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1507 c = LinearGradient("#084BC6","#81BEF7","PCP_domain"+str(qclusternr) + "_" + str(pcpnr))
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1508 d = LinearGradient("#81BEF7","#084BC6","PCP_line"+str(qclusternr) + "_" + str(pcpnr))
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1509 e = oh.createRect(str(10 + startpos / aa2pixelratio),str((125 + z * 60 ) - 8),str((endpos-startpos) / aa2pixelratio),15,8,strokewidth=1,stroke='url(#PCP_line' + str(qclusternr) + "_" + str(pcpnr) + ")",fill="url(#PCP_domain" + str(qclusternr) + "_" + str(pcpnr) + ")")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1510 f = text("PCP",((startpos / aa2pixelratio) + 0.5 * ((endpos-startpos) / aa2pixelratio)), ((125 + z * 60 ) + 4),fill='#0A1B2A')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1511 if ((endpos-startpos) / aa2pixelratio) < 100 and ((endpos-startpos) / aa2pixelratio) >= 20:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1512 myStyle.setFontSize('8px')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1513 f = text("PCP",((-2 + startpos / aa2pixelratio) + 0.5 * ((endpos-startpos) / aa2pixelratio)), ((125 + z * 60 ) + 3),fill='#0A1B2A')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1514 elif ((endpos-startpos) / aa2pixelratio) < 20:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1515 f = "notext"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1516 if f != "notext":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1517 f.set_style(myStyle.getStyle())
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1518 myStyle.setFontSize('12px')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1519 group = g()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1520 group.addElement(c)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1521 group.addElement(d)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1522 group.addElement(e)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1523 if f != "notext":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1524 group.addElement(f)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1525 group.set_id("b" + str(qclusternr) + "_00%s"%w)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1526 s.addElement(group)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1527 pcpnr += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1528 elif "Thioesterase" in j or "TD" in j:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1529 c = LinearGradient("#750072","#F5A9F2","TE_domain"+str(qclusternr) + "_" + str(tenr))
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1530 d = LinearGradient("#F5A9F2","#750072","TE_line"+str(qclusternr) + "_" + str(tenr))
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1531 e = oh.createRect(str(10 + startpos / aa2pixelratio),str((125 + z * 60 ) - 8),str((endpos-startpos) / aa2pixelratio),15,8,strokewidth=1,stroke='url(#TE_line' + str(qclusternr) + "_" + str(tenr) + ")",fill="url(#TE_domain" + str(qclusternr) + "_" + str(tenr) + ")")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1532 if "Thioesterase" in j:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1533 f = text("TE",((startpos / aa2pixelratio) + 0.5 * ((endpos-startpos) / aa2pixelratio)), ((125 + z * 60 ) + 4),fill='#2A0A29')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1534 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1535 f = text("TD",((startpos / aa2pixelratio) + 0.5 * ((endpos-startpos) / aa2pixelratio)), ((125 + z * 60 ) + 4),fill='#2A0A29')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1536 if ((endpos-startpos) / aa2pixelratio) < 100 and ((endpos-startpos) / aa2pixelratio) >= 20:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1537 myStyle.setFontSize('8px')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1538 if "Thioesterase" in j:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1539 f = text("TE",((startpos / aa2pixelratio) + 0.5 * ((endpos-startpos) / aa2pixelratio)), ((125 + z * 60 ) + 3),fill='#2A0A29')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1540 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1541 f = text("TD",((startpos / aa2pixelratio) + 0.5 * ((endpos-startpos) / aa2pixelratio)), ((125 + z * 60 ) + 4),fill='#2A0A29')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1542 elif ((endpos-startpos) / aa2pixelratio) < 20:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1543 f = "notext"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1544 if f != "notext":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1545 f.set_style(myStyle.getStyle())
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1546 myStyle.setFontSize('12px')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1547 group = g()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1548 group.addElement(c)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1549 group.addElement(d)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1550 group.addElement(e)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1551 if f != "notext":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1552 group.addElement(f)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1553 group.set_id("b" + str(qclusternr) + "_00%s"%w)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1554 s.addElement(group)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1555 tenr += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1556 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1557 c = LinearGradient("#929292","#DBDBDB","other_domain"+str(qclusternr) + "_" + str(othernr))
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1558 d = LinearGradient("#DBDBDB","#929292","other_line"+str(qclusternr) + "_" + str(othernr))
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1559 e = oh.createRect(str(10 + startpos / aa2pixelratio),str((125 + z * 60 ) - 8),str((endpos-startpos) / aa2pixelratio),15,8,strokewidth=1,stroke='url(#other_line' + str(qclusternr) + "_" + str(othernr) + ")",fill="url(#other_domain" + str(qclusternr) + "_" + str(othernr) + ")")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1560 domname = (((((((((j.replace("0","")).replace("1","")).replace("2","")).replace("3","")).replace("4","")).replace("5","")).replace("6","")).replace("7","")).replace("8","")).replace("9","")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1561 if len(domname) == 1:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1562 f = text(domname,((startpos / aa2pixelratio) + 0.5 * ((endpos-startpos) / aa2pixelratio)), ((125 + z * 60 ) + 4),fill='#0B0B0B')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1563 elif len(domname) == 2:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1564 f = text(domname,((-4 + startpos / aa2pixelratio) + 0.5 * ((endpos-startpos) / aa2pixelratio)), ((125 + z * 60 ) + 4),fill='#0B0B0B')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1565 elif len(domname) == 3:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1566 f = text(domname,((-12 + startpos / aa2pixelratio) + 0.5 * ((endpos-startpos) / aa2pixelratio)), ((125 + z * 60 ) + 4),fill='#0B0B0B')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1567 if len(domname) > 3 or ((endpos-startpos) / aa2pixelratio) < 100:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1568 myStyle.setFontSize('8px')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1569 f = text(domname,((-16 + startpos / aa2pixelratio) + 0.5 * ((endpos-startpos) / aa2pixelratio)), ((125 + z * 60 ) + 3),fill='#0B0B0B')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1570 if len(domname) > 4 and ((endpos-startpos) / aa2pixelratio) < 100:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1571 myStyle.setFontSize('6px')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1572 f = text(domname,((-16 + startpos / aa2pixelratio) + 0.5 * ((endpos-startpos) / aa2pixelratio)), ((125 + z * 60 ) + 3),fill='#0B0B0B')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1573 if ((endpos-startpos) / aa2pixelratio) < 60:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1574 f = "notext"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1575 if f != "notext":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1576 f.set_style(myStyle.getStyle())
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1577 myStyle.setFontSize('12px')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1578 group = g()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1579 group.addElement(c)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1580 group.addElement(d)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1581 group.addElement(e)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1582 if f != "notext":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1583 group.addElement(f)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1584 group.set_id("b" + str(qclusternr) + "_00%s"%w)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1585 s.addElement(group)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1586 othernr += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1587 w += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1588 z += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1589 s.addElement(group)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1590 return s
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1591
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1592 def calculate_colorgroups(queryclusternumber,hitclusternumbers,queryclusterdata,internalhomologygroupsdict):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1593 #Extract data and generate color scheme
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1594 nrhitclusters = queryclusterdata[queryclusternumber][0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1595 hitclusterdata = queryclusterdata[queryclusternumber][1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1596 queryclustergenes = hitclusterdata[1][3]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1597 queryclustergenesdetails = hitclusterdata[1][4]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1598 colorgroupsdict = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1599 colorgroupslengthlist = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1600 colorgroupslist = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1601 for hitclusternumber in hitclusternumbers:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1602 colorgroups = hitclusterdata[hitclusternumber][0][hitclusternumber]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1603 colorgroupsdict[hitclusternumber] = colorgroups
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1604 colorgroupslengthlist.append(len(colorgroups))
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1605 colorgroupslist.append(colorgroups)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1606 metacolorgroups = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1607 internalgroups = internalhomologygroupsdict[queryclusternumber]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1608 for i in internalgroups:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1609 metagroup = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1610 for j in i:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1611 for m in colorgroupslist:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1612 for l in m:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1613 if j in l:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1614 #for k in l:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1615 # if k not in metagroup:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1616 # metagroup.append(k)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1617 [metagroup.append(k) for k in l if k not in metagroup]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1618 if len(metagroup) > 1 and metagroup not in metacolorgroups:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1619 metacolorgroups.append(metagroup)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1620 #Generate RGB scheme
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1621 rgbcolorscheme = generate_rgbscheme(len(metacolorgroups))
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1622 rgbcolorscheme.append("#FFFFFF")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1623 #Create colorschemedict in which all genes that are hits of the same query gene get the same color
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1624 colorschemedict = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1625 z = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1626 for i in queryclustergenes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1627 for j in metacolorgroups:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1628 if i in j:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1629 for l in j:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1630 if colorschemedict.has_key(l):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1631 pass
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1632 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1633 colorschemedict[l] = z
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1634 #[colorschemedict[l] = z for l in j if not coloschemedict.has_key(l)]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1635 if z in colorschemedict.values():
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1636 z += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1637 return colorschemedict,rgbcolorscheme
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1638
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1639 def clusterblastresults(queryclusternumber,hitclusternumbers,queryclusterdata,colorschemedict,rgbcolorscheme):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1640 #print "Generating svg for cluster",queryclusternumber
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1641 #Extract data and generate color scheme
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1642 nrhitclusters = queryclusterdata[queryclusternumber][0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1643 hitclusterdata = queryclusterdata[queryclusternumber][1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1644 queryclustergenes = hitclusterdata[1][3]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1645 queryclustergenesdetails = hitclusterdata[1][4]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1646 colorgroupsdict = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1647 colorgroupslengthlist = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1648 colorgroupslist = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1649 for hitclusternumber in hitclusternumbers:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1650 colorgroups = hitclusterdata[hitclusternumber][0][hitclusternumber]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1651 colorgroupsdict[hitclusternumber] = colorgroups
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1652 colorgroupslengthlist.append(len(colorgroups))
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1653 colorgroupslist.append(colorgroups)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1654 #Find out whether hit gene cluster needs to be inverted compared to query gene cluster
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1655 strandsbalancedict = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1656 for m in hitclusternumbers:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1657 hitclustergenesdetails = hitclusterdata[m][2]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1658 strandsbalance = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1659 for i in queryclustergenes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1660 refstrand = queryclustergenesdetails[i][2]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1661 for j in colorgroupsdict[m]:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1662 if i in j:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1663 for k in j:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1664 if k in hitclusterdata[m][1] and hitclustergenesdetails[k][2] == refstrand:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1665 strandsbalance += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1666 elif k in hitclusterdata[m][1] and hitclusterdata[m][2][k][2] != refstrand:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1667 strandsbalance = strandsbalance - 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1668 strandsbalancedict[m] = strandsbalance
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1669 #Generate coordinates for SVG figure
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1670 qnrgenes = len(queryclustergenes)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1671 qstarts =[]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1672 qends = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1673 qstrands =[]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1674 qcolors = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1675 for i in queryclustergenes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1676 qgenedata = queryclustergenesdetails[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1677 if qgenedata[0] > qgenedata[1]:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1678 qstarts.append(qgenedata[0])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1679 qends.append(qgenedata[1])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1680 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1681 qstarts.append(qgenedata[1])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1682 qends.append(qgenedata[0])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1683 qstrands.append(qgenedata[2])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1684 if colorschemedict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1685 qcolors.append(colorschemedict[i])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1686 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1687 qcolors.append("white")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1688 qstarts_ends = startendsitescheck(qstarts,qends)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1689 qstarts = qstarts_ends[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1690 qends = qstarts_ends[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1691 hdata = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1692 for m in hitclusternumbers:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1693 hitclustergenes = hitclusterdata[m][1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1694 hitclustergenesdetails = hitclusterdata[m][2]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1695 hnrgenes = len(hitclustergenes)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1696 hstarts =[]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1697 hends = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1698 hstrands =[]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1699 hcolors = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1700 for i in hitclustergenes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1701 hgenedata = hitclustergenesdetails[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1702 if hgenedata[0] > hgenedata[1]:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1703 hstarts.append(hgenedata[0])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1704 hends.append(hgenedata[1])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1705 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1706 hstarts.append(hgenedata[1])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1707 hends.append(hgenedata[0])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1708 hstrands.append(hgenedata[2])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1709 if colorschemedict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1710 hcolors.append(colorschemedict[i])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1711 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1712 hcolors.append("white")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1713 #Invert gene cluster if needed
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1714 if strandsbalancedict[m] < 0:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1715 hstarts2 = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1716 hends2 = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1717 hstrands2 = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1718 for i in hstarts:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1719 hstarts2.append(str(100000000 - int(i)))
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1720 hstarts = hstarts2
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1721 hstarts.reverse()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1722 for i in hends:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1723 hends2.append(str(100000000 - int(i)))
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1724 hends = hends2
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1725 hends.reverse()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1726 for i in hstrands:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1727 if i == "+":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1728 hstrands2.append("-")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1729 elif i == "-":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1730 hstrands2.append("+")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1731 hstrands = hstrands2
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1732 hstrands.reverse()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1733 hcolors.reverse()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1734 hstarts_ends = startendsitescheck(hstarts,hends)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1735 hstarts = hstarts_ends[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1736 hends = hstarts_ends[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1737 hdata[m] = [hstarts,hends,hstrands,hcolors]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1738 #Find cluster size of largest cluster of query & all hit clusters assessed
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1739 clustersizes = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1740 for m in hitclusternumbers:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1741 hclustersize = int(hdata[m][1][-1]) - int(hdata[m][0][0])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1742 clustersizes.append(hclustersize)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1743 qclustersize = int(qends[-1]) - int(qstarts[0])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1744 clustersizes.append(qclustersize)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1745 largestclustersize = max(clustersizes)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1746 smallestclustersize = min(clustersizes)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1747 #Find relative positions
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1748 qrelpositions = relativepositions(qstarts,qends,largestclustersize)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1749 qrel_starts = qrelpositions[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1750 qrel_ends = qrelpositions[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1751 qdata = [qrel_starts,qrel_ends,qstrands,qcolors]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1752 hdata2 = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1753 qdata2 = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1754 for m in hitclusternumbers:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1755 hclustersize = int(hdata[m][1][-1]) - int(hdata[m][0][0])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1756 hrelpositions = relativepositions(hdata[m][0],hdata[m][1],largestclustersize)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1757 hrel_starts = hrelpositions[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1758 hrel_ends = hrelpositions[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1759 #Center-align smallest gene cluster
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1760 if largestclustersize == hclustersize:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1761 qrel_ends2 = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1762 qrel_starts2 = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1763 for i in qrel_starts:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1764 qrel_starts2.append(int(i) + int(float(float((largestclustersize - qclustersize) / 2) / largestclustersize) * screenwidth * 0.75))
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1765 for i in qrel_ends:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1766 qrel_ends2.append(int(i) + int(float(float((largestclustersize - qclustersize) / 2) / largestclustersize) * screenwidth * 0.75))
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1767 qrel_ends = qrel_ends2
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1768 qrel_starts = qrel_starts2
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1769 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1770 hrel_ends2 = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1771 hrel_starts2 = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1772 for i in hrel_starts:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1773 hrel_starts2.append(int(i) + int(float(float((largestclustersize - hclustersize) / 2) / largestclustersize) * screenwidth * 0.75))
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1774 for i in hrel_ends:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1775 hrel_ends2.append(int(i) + int(float(float((largestclustersize - hclustersize) / 2) / largestclustersize) * screenwidth * 0.75))
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1776 hrel_ends = hrel_ends2
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1777 hrel_starts = hrel_starts2
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1778 hdata2[m] = [hrel_starts,hrel_ends,hdata[m][2],hdata[m][3]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1779 qdata2 = [qrel_starts,qrel_ends,qdata[2],qdata[3]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1780 hdata = hdata2
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1781 qdata = qdata2
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1782 s = svg(x = 0, y = 0, width = (screenwidth * 0.75), height = (270 + len(hitclusternumbers) * 50))
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1783 viewbox = "0 0 " + str(screenwidth * 0.8) + " " + str(180 + len(hitclusternumbers) * 50)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1784 s.set_viewBox(viewbox)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1785 s.set_preserveAspectRatio("none")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1786 #Add line behind query gene cluster gene arrows
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1787 oh = ShapeBuilder()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1788 group = g()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1789 group.addElement(oh.createLine(10,35,10 + (screenwidth * 0.75),35, strokewidth = 1, stroke = "grey"))
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1790 s.addElement(group)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1791 #Add query gene cluster gene arrows
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1792 a = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1793 y = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1794 for x in range(qnrgenes):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1795 group = g()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1796 #group.addElement(_gene_label(rel_starts[a],rel_ends[a],genes[a],y,screenwidth))
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1797 if qcolors[a] == "white":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1798 group.addElement(_gene_arrow(10 + qrel_starts[a],10 + qrel_ends[a],qstrands[a],rgbcolorscheme[-1],40,10))
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1799 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1800 group.addElement(_gene_arrow(10 + qrel_starts[a],10 + qrel_ends[a],qstrands[a],rgbcolorscheme[qcolors[a]],40,10))
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1801 #Can be used for domains
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1802 #group.addElement(oh.createRect(rel_starts[a],45,(rel_ends[a]-rel_starts[a]),10, strokewidth = 2, stroke = "black", fill="#237845"))
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1803 if len(hitclusternumbers) == 1:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1804 group.set_id("q" + str(queryclusternumber) + "_" + str(hitclusternumbers[0]) + "_" + "%s"%x)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1805 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1806 group.set_id("all_" + str(queryclusternumber) + "_0_" + "%s"%x)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1807 s.addElement(group)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1808 if y == 0:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1809 y = 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1810 elif y == 1:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1811 y = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1812 a += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1813 for m in hitclusternumbers:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1814 #Add line behind hit gene cluster gene arrows
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1815 group.addElement(oh.createLine(10,35 + 50 * (hitclusternumbers.index(m) + 1),10 + (screenwidth * 0.75),35 + 50 * (hitclusternumbers.index(m) + 1), strokewidth = 1, stroke = "grey"))
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1816 s.addElement(group)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1817 #Add hit gene cluster gene arrows
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1818 hitclustergenes = hitclusterdata[m][1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1819 hnrgenes = len(hitclustergenes)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1820 hrel_starts = hdata[m][0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1821 hrel_ends = hdata[m][1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1822 hstrands = hdata[m][2]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1823 hcolors = hdata[m][3]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1824 a = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1825 y = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1826 for x in range(hnrgenes):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1827 group = g()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1828 #group.addElement(_gene_label(rel_starts[a],rel_ends[a],genes[a],y,screenwidth))
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1829 if hcolors[a] == "white":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1830 group.addElement(_gene_arrow(10 + hrel_starts[a],10 + hrel_ends[a],hstrands[a],rgbcolorscheme[-1],40 + 50 * (hitclusternumbers.index(m) + 1),10))
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1831 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1832 group.addElement(_gene_arrow(10 + hrel_starts[a],10 + hrel_ends[a],hstrands[a],rgbcolorscheme[hcolors[a]],40 + 50 * (hitclusternumbers.index(m) + 1),10))
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1833 #Can be used for domains
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1834 # group.addElement(oh.createRect(rel_starts[a],45,(rel_ends[a]-rel_starts[a]),10, strokewidth = 2, stroke = "black", fill="#237845"))
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1835 if len(hitclusternumbers) == 1:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1836 group.set_id("h" + str(queryclusternumber) + "_" + str(m) + "_" + "%s"%x)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1837 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1838 group.set_id("all_" + str(queryclusternumber) + "_" + str(m) + "_" + "%s"%x)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1839 s.addElement(group)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1840 if y == 0:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1841 y = 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1842 elif y == 1:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1843 y = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1844 a += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1845 return [s,[qdata,hdata,strandsbalancedict]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1846
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1847 def runblast(query):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1848 blastsearch = "blastp -db "+antismash_path+"clusterblast/geneclusterprots.fasta -query " + query + " -outfmt 6 -max_target_seqs 1000 -evalue 1e-05 -out " + query.split(".")[0] + ".out"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1849 os.system(blastsearch)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1850
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1851 def smcog_analysis(inputgenes,inputnr,accessiondict,seqdict,smcogdict,smcogsoutputfolder):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1852 #create input.fasta file with single query sequence to be used as input for MSA
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1853 for k in inputgenes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1854 gene = accessiondict[k]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1855 tag = k
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1856 seq = seqdict[k]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1857 writefasta([tag],[seq],"input" + str(inputnr) + ".fasta")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1858 if len(smcogdict[k]) > 0:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1859 smcog = (smcogdict[k][0][0]).split(":")[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1860 #Align to multiple sequence alignment, output as fasta file
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1861 fastafile = "input" + str(inputnr) + ".fasta"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1862 musclecommand = "muscle -quiet -profile -in1 " + str(smcog).lower() + "_muscle.fasta -in2 input" + str(inputnr) + ".fasta -out muscle" + str(inputnr) + ".fasta"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1863 os.system(musclecommand)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1864 #Trim alignment
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1865 #edit muscle fasta file: remove all positions before the first and after the last position shared by >33% of all sequences
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1866 file = open("muscle" + str(inputnr) + ".fasta","r")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1867 filetext = file.read()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1868 filetext = filetext.replace("\r","\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1869 lines = filetext.split("\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1870 ##Combine all sequence lines into single lines
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1871 lines2 = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1872 seq = ""
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1873 nrlines = len(lines)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1874 a = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1875 lines = lines[:-1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1876 for i in lines:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1877 if a == (nrlines - 2):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1878 seq = seq + i
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1879 lines2.append(seq)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1880 if i[0] == ">":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1881 lines2.append(seq)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1882 seq = ""
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1883 lines2.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1884 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1885 seq = seq + i
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1886 a += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1887 lines = lines2[1:]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1888 #Retrieve names and seqs from muscle fasta lines
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1889 seqs = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1890 names = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1891 for i in lines:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1892 if len(i) > 0 and i[0] == ">":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1893 name = i[1:]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1894 names.append(name)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1895 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1896 seq = i
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1897 seqs.append(seq)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1898 #Find first and last amino acids shared conserved >33%
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1899 #Create list system to store conservation of residues
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1900 conservationlist = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1901 lenseqs = len(seqs[0])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1902 nrseqs = len(seqs)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1903 for i in range(lenseqs):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1904 conservationlist.append({"A":0,"B":0,"C":0,"D":0,"E":0,"F":0,"G":0,"H":0,"I":0,"J":0,"K":0,"L":0,"M":0,"N":0,"P":0,"Q":0,"R":0,"S":0,"T":0,"U":0,"V":0,"W":0,"X":0,"Y":0,"Z":0,"-":0})
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1905 a = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1906 for i in seqs:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1907 aa = list(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1908 for i in aa:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1909 conservationlist[a][i] += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1910 a += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1911 a = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1912 firstsharedaa = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1913 lastsharedaa = lenseqs
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1914 #Find first amino acid shared
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1915 first = "yes"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1916 nr = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1917 for i in conservationlist:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1918 aa = sortdictkeysbyvaluesrev(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1919 if aa[0] != "-" and i[aa[1]] > (nrseqs / 3) and first == "yes":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1920 firstsharedaa = nr
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1921 first = "no"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1922 nr += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1923 #Find last amino acid shared
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1924 conservationlist.reverse()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1925 first = "yes"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1926 nr = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1927 for i in conservationlist:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1928 aa = sortdictkeysbyvaluesrev(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1929 if aa[0] != "-" and i[aa[1]] > (nrseqs / 3) and first == "yes":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1930 lastsharedaa = lenseqs - nr
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1931 first = "no"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1932 nr += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1933 #Shorten sequences to detected conserved regions
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1934 seqs2 = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1935 for i in seqs:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1936 seq = i[firstsharedaa:lastsharedaa]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1937 seqs2.append(seq)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1938 seqs = seqs2
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1939 seedfastaname = "trimmed_alignment" + str(inputnr) + ".fasta"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1940 writefasta(names,seqs,seedfastaname)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1941 #Draw phylogenetic tree with fasttree 2.1.1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1942 nwkfile = "tree" + str(inputnr) + ".nwk"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1943 if sys.platform == ('win32'):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1944 fasttreecommand = "fasttree -quiet -fastest -noml trimmed_alignment" + str(inputnr) + ".fasta > " + nwkfile
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1945 elif sys.platform == ('linux2'):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1946 fasttreecommand = "./FastTree -quiet -fastest -noml trimmed_alignment" + str(inputnr) + ".fasta > " + nwkfile
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1947 os.system(fasttreecommand)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1948 #Convert tree to XTG and draw PNG image using TreeGraph
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1949 p = subprocess.Popen("java -Djava.awt.headless=true -jar TreeGraph.jar -convert tree" + str(inputnr) + ".nwk -xtg tree" + str(inputnr) + ".xtg", shell=True, stdout=subprocess.PIPE,stderr=subprocess.STDOUT)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1950 processes_starttime = time.time()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1951 while True:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1952 if (time.time() - processes_starttime) > 300:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1953 if sys.platform == ('linux2'):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1954 os.kill(p.pid,signal.SIGKILL)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1955 break
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1956 if sys.platform == ('win32'):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1957 subprocess.Popen("taskkill /F /T /PID %i"%p.pid , shell=True, stdout=subprocess.PIPE,stderr=subprocess.STDOUT)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1958 break
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1959 if p.poll() == 0:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1960 break
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1961 time.sleep(2)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1962 out, err = p.communicate()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1963 output = out
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1964 if "exception" not in output and "Exception" not in output:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1965 p = subprocess.Popen("java -Djava.awt.headless=true -jar TreeGraph.jar -image tree" + str(inputnr) + ".xtg " + tag.split(".")[0] + ".png", shell=True, stdout=subprocess.PIPE,stderr=subprocess.STDOUT)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1966 processes_starttime = time.time()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1967 while True:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1968 if (time.time() - processes_starttime) > 300:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1969 if sys.platform == ('linux2'):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1970 os.kill(p.pid,signal.SIGKILL)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1971 break
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1972 if sys.platform == ('win32'):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1973 subprocess.Popen("taskkill /F /T /PID %i"%p.pid , shell=True, stdout=subprocess.PIPE,stderr=subprocess.STDOUT)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1974 break
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1975 if p.poll() == 0:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1976 break
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1977 time.sleep(2)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1978 out, err = p.communicate()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1979 output = out
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1980 if "exception" not in output and "Exception" not in output:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1981 if sys.platform == ('win32'):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1982 copycommand = 'copy/y ' + tag.split(".")[0] + '.png "..\\' + smcogsoutputfolder + '" > nul'
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1983 elif sys.platform == ('linux2'):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1984 copycommand = 'cp ' + tag.split(".")[0] + '.png "../' + smcogsoutputfolder + '" > /dev/null'
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1985 os.system(copycommand)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1986 if sys.platform == ('win32'):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1987 os.system("del " + tag.split(".")[0] + ".png")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1988 os.system("del tree" + str(inputnr) + ".xtg")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1989 os.system("del trimmed_alignment" + str(inputnr) + ".fasta")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1990 elif sys.platform == ('linux2'):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1991 os.system("rm " + tag.split(".")[0] + ".png")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1992 os.system("rm tree" + str(inputnr) + ".xtg")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1993 os.system("rm trimmed_alignment" + str(inputnr) + ".fasta")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1994
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1995 def depict_smile(genecluster,structuresfolder):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1996 if sys.platform == ('win32'):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1997 indigo_depict_command1 = "indigo-depict genecluster" + str(genecluster) + ".smi " + "genecluster" + str(genecluster) + "_icon.png -query -w 200 -h 150"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1998 indigo_depict_command2 = "indigo-depict genecluster" + str(genecluster) + ".smi " + "genecluster" + str(genecluster) + ".png -query"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
1999 elif sys.platform == ('linux2'):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2000 indigo_depict_command1 = "./indigo-depict genecluster" + str(genecluster) + ".smi " + "genecluster" + str(genecluster) + "_icon.png -query -w 200 -h 150"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2001 indigo_depict_command2 = "./indigo-depict genecluster" + str(genecluster) + ".smi " + "genecluster" + str(genecluster) + ".png -query"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2002 os.system(indigo_depict_command1)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2003 os.system(indigo_depict_command2)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2004 dircontents = getdircontents()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2005 geneclusterstring = "genecluster" + str(genecluster) + ".png"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2006 if geneclusterstring in dircontents:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2007 if sys.platform == ('win32'):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2008 structuresfolder = structuresfolder.replace("/","\\")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2009 copycommand1 = "copy/y genecluster" + str(genecluster) + ".png ..\\" + structuresfolder + ' > nul'
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2010 copycommand2 = "copy/y genecluster" + str(genecluster) + "_icon.png ..\\" + structuresfolder + ' > nul'
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2011 delcommand1 = "del genecluster" + str(genecluster) + ".png"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2012 delcommand2 = "del genecluster" + str(genecluster) + "_icon.png"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2013 delcommand3 = "del genecluster" + str(genecluster) + ".smi"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2014 os.system(copycommand1)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2015 os.system(copycommand2)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2016 os.system(delcommand1)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2017 os.system(delcommand2)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2018 os.system(delcommand3)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2019 if sys.platform == ('linux2'):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2020 copycommand1 = "cp genecluster" + str(genecluster) + ".png ../" + structuresfolder
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2021 copycommand2 = "cp genecluster" + str(genecluster) + "_icon.png ../" + structuresfolder
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2022 delcommand1 = "rm genecluster" + str(genecluster) + ".png"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2023 delcommand2 = "rm genecluster" + str(genecluster) + "_icon.png"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2024 delcommand3 = "rm genecluster" + str(genecluster) + ".smi"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2025 os.system(copycommand1)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2026 os.system(copycommand2)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2027 os.system(delcommand1)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2028 os.system(delcommand2)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2029 return "success"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2030 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2031 return "failed"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2032
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2033 ##Core script
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2034 import os
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2035 from os import system
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2036 import sys
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2037 import multiprocessing
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2038 import time
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2039 from multiprocessing import Process, freeze_support
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2040 import random
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2041 import string
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2042 import itertools
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2043 from pysvg.filter import *
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2044 from pysvg.gradient import *
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2045 from pysvg.linking import *
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2046 from pysvg.script import *
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2047 from pysvg.shape import *
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2048 from pysvg.structure import *
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2049 from pysvg.style import *
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2050 from pysvg.text import *
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2051 from pysvg.builders import *
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2052 from string import ascii_letters
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2053 from pyExcelerator import *
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2054 from pyExcelerator.Workbook import *
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2055 import signal
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2056 import subprocess
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2057 starttime = time.time()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2058
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2059 os.environ['NRPS2BASEDIR'] = os.path.join(os.getcwd(), 'NRPSPredictor2')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2060
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2061 #Fix sys.argv input
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2062 options = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2063 for i in sys.argv:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2064 if i.count('"') > 1:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2065 j = i.split(' ')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2066 for k in j:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2067 if k[0] == '"':
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2068 k = k + '"'
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2069 elif k[-1] == '"':
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2070 k = '"' + k
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2071 options.append(k)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2072 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2073 options.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2074 sys.argv = options
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2075 #Redirect stdout and stderr if GUI-executed
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2076 if "--gui" in sys.argv and len(sys.argv) < (sys.argv.index("--gui") + 2):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2077 print >> sys.stderr, "Invalid options input: --gui without n or y"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2078 print "From the command line, input antismash --help for more information."
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2079 logfile = open("antismash.log","w")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2080 logfile.write("Invalid options input: --gui without n or y\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2081 logfile.close()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2082 sys.exit(1)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2083 if "--gui" in sys.argv and sys.argv[sys.argv.index("--gui") + 1] == "y":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2084 stdoutfile = open("stdout.txt","w")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2085 sys.stdout = stdoutfile
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2086 sys.stderr = stdoutfile
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2087
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2088 if __name__ == '__main__':
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2089 import shutil
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2090 hmmsearch_path = 'hmmsearch'
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2091 hmmscan_path = 'hmmscan'
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2092 antismash_path = '/home/galaxy/bin/antismash-1.1.0/'
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2093 hmms_path = antismash_path + '/hmms/'
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2094 shutil.copytree(antismash_path + '/NRPSPredictor2/', './NRPSPredictor2/')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2095 shutil.copytree(antismash_path + '/Minowa/', './Minowa/')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2096 shutil.copytree(antismash_path + '/pkssignatures/', './pkssignatures/')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2097 shutil.copytree(antismash_path + '/kr_analysis/', './kr_analysis/')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2098 shutil.copytree(antismash_path + '/docking_analysis/', './docking_analysis/')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2099 shutil.copytree(antismash_path + '/NRPeditor/', './NRPeditor/')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2100 shutil.copy(antismash_path + '/search_form.html', './')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2101 shutil.copy(antismash_path + '/empty.xhtml', './')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2102 shutil.copytree(antismash_path + '/vis/', './vis/')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2103 shutil.copytree(antismash_path + '/smcogtree/', './smcogtree/')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2104
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2105 # add freeze support
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2106 freeze_support()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2107
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2108 #Open logfile
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2109 logfile = open("antismash.log","w")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2110
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2111 #Identify screen width
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2112 if sys.platform == ('win32'):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2113 import ctypes
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2114 user32 = ctypes.windll.user32
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2115 screenwidth = user32.GetSystemMetrics(0)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2116 if sys.platform == ('linux2'):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2117 screenwidth = 1024
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2118 # res = os.popen("xrandr | grep \* | cut -d' ' -f4") ###FOR SERVER USE###
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2119 # res = res.read() ###FOR SERVER USE###
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2120 # screenwidth = int(res.split("x")[0]) ###FOR SERVER USE###
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2121 if screenwidth < 1024:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2122 screenwidth = 1024
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2123 #temporary for testing
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2124 screenwidth = 1024
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2125
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2126
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2127 #Reads input
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2128 inputinstructions = "antiSMASH 1.1.0 arguments:\n\nUsage: antismash <query fasta/embl/gbk file> [options]\n\nOptions (x is an integer number, list x,y,z is a list of integer numbers separated by commas):\n\n--gtransl <x> : GenBank translation table used for Glimmer (only for FASTA inputs, default: 1)\n1. The Standard Code\n2. The Vertebrate Mitochondrial Code\n3. The Yeast Mitochondrial Code\n4. The Mold, Protozoan, and Coelenterate Mitochondrial Code and the Mycoplasma/Spiroplasma Code\n5. The Invertebrate Mitochondrial Code\n6. The Ciliate, Dasycladacean and Hexamita Nuclear Code\n9. The Echinoderm and Flatworm Mitochondrial Code\n10. The Euplotid Nuclear Code\n11. The Bacterial, Archaeal and Plant Plastid Code\n12. The Alternative Yeast Nuclear Code\n13. The Ascidian Mitochondrial Code\n14. The Alternative Flatworm Mitochondrial Code\n15. Blepharisma Nuclear Code\n16. Chlorophycean Mitochondrial Code\n21. Trematode Mitochondrial Code\n22. Scenedesmus Obliquus Mitochondrial Code\n23. Thraustochytrium Mitochondrial Code\n--genomeconf <l/c> : Genome configuration used for Glimmer: linear / circular (only for FASTA inputs, default: l)\n--minglength <x> : Glimmer minimal gene length (range 30-120, only for FASTA inputs, default: 90)\n--taxon <p/e> : Taxonomy: prokaryotic / eukaryotic (default: p)\n--cores <x> : Number of parallel CPUs to use for threading (default: all)\n--clusterblast <y/n> : Include ClusterBlast gene cluster comparison analysis (default:y)\n--smcogs <y/n> : Include smCOG analysis for functional prediction of genes (default:y)\n--fullblast <y/n> : Include genome-wide BLAST analysis (default:n)\n--fullhmm <y/n> : Include genome-wide PFAM HMM analysis (default:n)\n--blastdbpath <path> : Specify folder containing CLUSEAN blast database (default:clusean/db)\n--pfamdbpath <path> : Specify folder containing PFAM database (default:clusean/db)\n--geneclustertypes <x,y,z> : Gene cluster types to scan for (default:1):\n1 = all\n2 = type I polyketide synthases\n3 = type II polyketide synthases\n4 = type III polyketide synthases\n5 = nonribosomal peptide synthetases\n6 = terpene synthases\n7 = lantibiotics\n8 = bacteriocins\n9 = beta-lactams\n10 = aminoglycosides / aminocyclitols\n11 = aminocoumarins\n12 = siderophores\n13 = ectoines\n14 = butyrolactones\n15 = indoles\n16 = nucleosides\n17 = phosphoglycolipids\n18 = melanins\n19 = others\n--help : this help screen\n"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2129 #Check input file format
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2130 if len(sys.argv) < 2 or len(sys.argv[1]) < 1:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2131 print >> sys.stderr, "Please supply valid name for input file."
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2132 print "Usage: antismash <query fasta/embl/gbk file> [options]"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2133 print "From the command line, input antismash --help for more information."
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2134 logfile.write("Input format error. Please supply valid name for infile.\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2135 logfile.write("Usage: antismash <query fasta/embl/gbk file> [options]\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2136 logfile.write("From the command line, input antismash --help for more information.\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2137 logfile.close()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2138 sys.exit(1)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2139 if sys.argv[1] != "--help":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2140 if len(sys.argv[1].split(".")) < 2 or (sys.argv[1].split(".")[-1] != "embl" and sys.argv[1].split(".")[-1] != "EMBL" and sys.argv[1].split(".")[-1] != "emb" and sys.argv[1].split(".")[-1] != "EMB" and sys.argv[1].split(".")[-1] != "genbank" and sys.argv[1].split(".")[-1] != "GENBANK" and sys.argv[1].split(".")[-1] != "gbk" and sys.argv[1].split(".")[-1] != "GBK" and sys.argv[1].split(".")[-1] != "gb" and sys.argv[1].split(".")[-1] != "GB" and sys.argv[1].split(".")[-1] != "fasta" and sys.argv[1].split(".")[-1] != "FASTA" and sys.argv[1].split(".")[-1] != "fas" and sys.argv[1].split(".")[-1] != "FAS" and sys.argv[1].split(".")[-1] != "fa" and sys.argv[1].split(".")[-1] != "FA"):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2141 print >> sys.stderr, "No EMBL/GBK/FASTA file submitted as input. Please supply a valid file with .embl / .gbk / .fasta extension. "
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2142 print "Usage: antismash <query fasta/embl/gbk file> [options]"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2143 print "From the command line, input antismash --help for more information."
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2144 logfile.write("Input format error. Please supply a valid file with .embl / .gbk / .fasta extension.\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2145 logfile.write("Usage: antismash <query fasta/embl/gbk file> [options]\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2146 logfile.write("From the command line, input antismash --help for more information.\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2147 logfile.close()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2148 sys.exit(1)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2149 #Define input filename and make fixes if necessary
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2150 infile = sys.argv[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2151 try:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2152 testfile = open(infile,"r").read()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2153 except(IOError):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2154 print >> sys.stderr, "Please supply valid name for input file."
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2155 print "Usage: antismash <query fasta/embl/gbk file> [options]"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2156 print "From the command line, input antismash --help for more information."
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2157 logfile = open("antismash.log","w")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2158 logfile.write("Input format error. Please supply valid name for infile.\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2159 logfile.write("Usage: antismash <query fasta/embl/gbk file> [options]\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2160 logfile.write("From the command line, input antismash --help for more information.\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2161 logfile.close()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2162 sys.exit(1)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2163 #Parse absolute paths if found
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2164 absolutepath = "n"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2165 if "/" in infile or "\\" in infile:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2166 absolutepath = "y"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2167 lastpos1 = infile.rfind("\\")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2168 lastpos2 = infile.rfind("/")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2169 lastpos = max([lastpos1,lastpos2])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2170 originpath = infile[:(lastpos + 1)]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2171 infile = infile[(lastpos + 1):]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2172 if sys.platform == ('win32'):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2173 copycommand = 'copy/y "' + originpath + infile + '" ' + infile + ' > nul'
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2174 os.system(copycommand)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2175 if sys.platform == ('linux2'):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2176 copycommand = 'cp ' + originpath + infile + " . > /dev/null"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2177 os.system(copycommand)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2178 #genomename = ".".join(infile.split(".")[:-1])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2179 #for i in genomename:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2180 # if i in '!"#$%&()*+,./:;=>?@[]^`{|}' or i in "'":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2181 # genomename = genomename.replace(i,"")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2182 # if "/" in genomename:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2183 # genomename = genomename.rpartition("/")[2]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2184 # if "\\" in genomename:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2185 # genomename = genomename.rpartition("\\")[2]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2186 genomename = os.path.splitext(os.path.basename(infile))[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2187 if sys.platform == ('linux2'):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2188 if genomename != infile.split(".")[-2]:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2189 oldinfile = infile.replace("(","\\(").replace(")","\\)").replace("*","\\*").replace("&","\\&").replace("!","\\!").replace("$","\\$").replace("{","\\{").replace("}","\\}").replace("|","\\|").replace("`","\\`").replace("'","\\'").replace('"','\\"').replace('?','\\?')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2190 infile = genomename + "." + infile.split(".")[-1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2191 if "/" in genomename:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2192 genomename = genomename.rpartition("/")[2]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2193 if "\\" in genomename:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2194 genomename = genomename.rpartition("\\")[2]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2195 os.system("cp " + oldinfile + " " + infile)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2196 #Define outputfolder
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2197 if absolutepath == "y":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2198 if sys.platform == ('win32'):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2199 dir1 = os.popen("dir/w/ad " + originpath)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2200 dir2 = os.popen("dir/w/ad")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2201 dir1 = dir1.read()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2202 dir2 = dir2.read()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2203 if sys.platform == ('linux2'):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2204 dir1 = os.popen("ls")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2205 dir2 = os.popen("ls " + originpath)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2206 dir1 = dir1.read()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2207 dir2 = dir2.read()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2208 parts = dir1.split(" ") + dir2.split(" ")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2209 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2210 if sys.platform == ('win32'):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2211 dir = os.popen("dir/w/ad")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2212 dir = dir.read()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2213 if sys.platform == ('linux2'):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2214 dir = os.popen("ls")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2215 dir = dir.read()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2216 parts = dir.split(" ")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2217 parts2 = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2218 for i in parts:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2219 partparts = i.split("\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2220 for i in partparts:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2221 i = i.replace("[","")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2222 i = i.replace("]","")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2223 parts2.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2224 parts = parts2
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2225 oldgenomename = genomename
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2226 if genomename in parts:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2227 genomename = genomename + "_" + str(0)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2228 while genomename in parts:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2229 finalpart = genomename.split("_")[-1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2230 allnumbers = "y"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2231 for i in finalpart:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2232 if i not in ["0","1","2","3","4","5","6","7","8","9"]:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2233 allnumbers = "n"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2234 if allnumbers == "y" and int(finalpart) in range(0,1000):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2235 newgenomename = ""
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2236 for i in genomename.split("_")[:-1]:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2237 newgenomename = newgenomename + "_" + i
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2238 newgenomename = newgenomename + "_" + str(int(finalpart) + 1)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2239 genomename = newgenomename[1:]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2240 genomename = genomename.replace("__","_")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2241 #Output results folder name for output checking by GUI
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2242 resultslocfile = open("resultsfolder.txt","w")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2243 resultslocfile.write(os.getcwd() + os.sep + genomename)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2244 resultslocfile.close()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2245 #Implement defaults
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2246 glimmertransl_table = str(1)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2247 genomeconf = "l"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2248 minglength = str(90)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2249 cores = "all"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2250 taxon = "p"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2251 clusterblast = "y"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2252 smcogs = "y"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2253 fullblast = "n"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2254 fullhmm = "n"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2255 if sys.platform == ('win32'):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2256 blastdbpath = '"' + os.getcwd() + "/clusean/db" + '"'
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2257 if sys.platform == ('linux2'):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2258 blastdbpath = os.getcwd() + "/clusean/db"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2259 if sys.platform == ('win32'):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2260 pfamdbpath = '"' + os.getcwd() + "/clusean/db/" + '"'
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2261 if sys.platform == ('linux2'):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2262 pfamdbpath = os.getcwd() + "/clusean/db/"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2263 geneclustertypes = [1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2264 #Read user-specified options which may override defaults
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2265 if len(sys.argv) > 2 or sys.argv[1] == "--help":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2266 options = sys.argv
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2267 if "--" in options[-1] and sys.argv[1] != "--help":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2268 invalidoptions(options[-1])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2269 #identify option identifiers
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2270 identifiers = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2271 for i in options:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2272 if "--" in i:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2273 if i not in identifiers:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2274 identifiers.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2275 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2276 invalidoptions("No '--' in given options or option given twice.")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2277 for i in identifiers:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2278 if i != "--help":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2279 value = options[options.index(i) + 1].strip()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2280 if i == "--gtransl":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2281 for k in value:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2282 if k not in ["0","1","2","3","4","5","6","7","8","9"]:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2283 invalidoptions(i + "input is no number")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2284 if int(value) in range(1,24) and int(value) != 7 and int(value) != 8 and int(value) != 17 and int(value) != 18 and int(value) != 19 and int(value) != 20:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2285 glimmertransl_table = value
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2286 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2287 invalidoptions(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2288 elif i == "--genomeconf":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2289 if value == "l" or value == "c":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2290 genomeconf = value
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2291 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2292 invalidoptions(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2293 elif i == "--minglength":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2294 for k in value:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2295 if k not in ["0","1","2","3","4","5","6","7","8","9"]:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2296 invalidoptions(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2297 if int(value) in range(30,91):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2298 minglength = value
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2299 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2300 print >> sys.stderr, "Invalid options input: minimal gene length should be a number between 30-90."
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2301 logfile = open("antismash.log","w")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2302 logfile.write("Invalid options input: minimal gene length should be a number between 30-90.\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2303 logfile.close()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2304 sys.exit(1)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2305 elif i == "--cores":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2306 for k in value:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2307 if k not in ["0","1","2","3","4","5","6","7","8","9"]:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2308 invalidoptions(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2309 if int(value) in range(1,1000):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2310 cores = int(value)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2311 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2312 invalidoptions(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2313 elif i == "--taxon":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2314 if value == "p" or value == "e":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2315 taxon = value
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2316 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2317 invalidoptions(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2318 elif i == "--clusterblast":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2319 if value == "y" or value == "n":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2320 clusterblast = value
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2321 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2322 invalidoptions(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2323 elif i == "--smcogs":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2324 if value == "y" or value == "n":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2325 smcogs = value
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2326 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2327 invalidoptions(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2328 elif i == "--fullblast":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2329 if value == "y" or value == "n":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2330 fullblast = value
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2331 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2332 invalidoptions(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2333 elif i == "--fullhmm":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2334 if value == "y" or value == "n":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2335 fullhmm = value
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2336 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2337 invalidoptions(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2338 elif i == "--glimmer_prediction":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2339 glimmer_prediction_path = value
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2340 elif i == "--blastdbpath":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2341 if sys.platform == ('win32'):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2342 if options[options.index(i) + 1][0] != '"':
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2343 value = '"' + options[options.index(i) + 1] + '"'
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2344 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2345 value = options[options.index(i) + 1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2346 if ":\\" in value:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2347 blastdbpath = value
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2348 elif "\\" in value or "/" in value:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2349 if value[0] == "\\" or value[0] == "/":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2350 blastdbpath = os.getcwd() + value
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2351 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2352 blastdbpath = os.getcwd() + "\\" + value
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2353 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2354 blastdbpath = os.getcwd() + "\\" + value
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2355 if sys.platform == ('linux2'):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2356 value = options[options.index(i) + 1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2357 if "\\" in value or "/" in value:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2358 value = value.replace("\\","/")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2359 if value[0] == "/":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2360 blastdbpath = value
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2361 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2362 blastdbpath = os.getcwd() + "/" + value
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2363 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2364 blastdbpath = os.getcwd() + "/" + value
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2365 elif i == "--pfamdbpath":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2366 if sys.platform == ('win32'):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2367 if options[options.index(i) + 1][0] != '"':
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2368 value = '"' + options[options.index(i) + 1] + '"'
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2369 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2370 value = options[options.index(i) + 1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2371 if ":\\" in value:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2372 pfamdbpath = value
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2373 elif "\\" in value or "/" in value:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2374 if value[0] == "\\" or value[0] == "/":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2375 pfamdbpath = os.getcwd() + value
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2376 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2377 pfamdbpath = os.getcwd() + "\\" + value
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2378 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2379 pfamdbpath = os.getcwd() + "\\" + value
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2380 if sys.platform == ('linux2'):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2381 value = options[options.index(i) + 1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2382 if "\\" in value or "/" in value:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2383 value = value.replace("\\","/")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2384 if value[0] == "/":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2385 pfamdbpath = value
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2386 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2387 pfamdbpath = os.getcwd() + "/" + value
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2388 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2389 pfamdbpath = os.getcwd() + "/" + value
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2390 elif i == "--geneclustertypes":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2391 if "," not in value and value not in ["1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","16","17","18","19"]:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2392 invalidoptions(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2393 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2394 types = value.split(",")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2395 types2 = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2396 if "1" in types:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2397 types2 = [1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2398 for j in types:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2399 if int(j) not in range(1,20):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2400 invalidoptions(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2401 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2402 types2.append(int(j))
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2403 geneclustertypes = types2
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2404 elif i == "--help":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2405 print inputinstructions
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2406 sys.exit()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2407 elif i == "--gui":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2408 pass
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2409 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2410 invalidoptions(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2411
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2412 #Determine number of CPUs used
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2413 if cores == "all":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2414 try:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2415 nrcpus = multiprocessing.cpu_count()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2416 except(IOError,OSError,NotImplementedError):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2417 nrcpus = 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2418 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2419 try:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2420 nrcpus = multiprocessing.cpu_count()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2421 except(IOError,OSError,NotImplementedError):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2422 nrcpus = 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2423 if cores < nrcpus:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2424 nrcpus = cores
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2425
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2426 #Create directory structure needed for file storage
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2427 try:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2428 os.mkdir(genomename)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2429 except(IOError,OSError):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2430 pass
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2431 hmmoutputfolder = genomename + "/hmmoutput/"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2432 try:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2433 os.mkdir(hmmoutputfolder)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2434 except(IOError,OSError):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2435 pass
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2436 nrpspksoutputfolder = genomename + "/nrpspks/"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2437 try:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2438 os.mkdir(nrpspksoutputfolder)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2439 except(IOError,OSError):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2440 pass
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2441 nrpspredictoroutputfolder = nrpspksoutputfolder + "nrpspredictor/"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2442 try:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2443 os.mkdir(nrpspredictoroutputfolder)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2444 except(IOError,OSError):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2445 pass
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2446 minowanrpsoutputfolder = nrpspksoutputfolder + "minowanrpspred/"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2447 try:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2448 os.mkdir(minowanrpsoutputfolder)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2449 except(IOError,OSError):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2450 pass
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2451 minowapksoutputfolder = nrpspksoutputfolder + "minowapkspred/"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2452 try:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2453 os.mkdir(minowapksoutputfolder)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2454 except(IOError,OSError):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2455 pass
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2456 minowacaloutputfolder = nrpspksoutputfolder + "minowacalpred/"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2457 try:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2458 os.mkdir(minowacaloutputfolder)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2459 except(IOError,OSError):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2460 pass
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2461 pkssignatureoutputfolder = nrpspksoutputfolder + "pkssignatures/"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2462 try:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2463 os.mkdir(pkssignatureoutputfolder)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2464 except(IOError,OSError):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2465 pass
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2466 kranalysisoutputfolder = nrpspksoutputfolder + "kr_analysis/"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2467 try:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2468 os.mkdir(kranalysisoutputfolder)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2469 except(IOError,OSError):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2470 pass
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2471 clusterblastoutputfolder = genomename + "/clusterblast/"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2472 try:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2473 os.mkdir(clusterblastoutputfolder)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2474 except(IOError,OSError):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2475 pass
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2476 smcogsoutputfolder = genomename + "/smcogs/"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2477 try:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2478 os.mkdir(smcogsoutputfolder)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2479 except(IOError,OSError):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2480 pass
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2481 substrspecsfolder = genomename + "/substrspecs/"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2482 try:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2483 os.mkdir(substrspecsfolder)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2484 except(IOError,OSError):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2485 pass
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2486 structuresfolder = genomename + "/structures/"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2487 try:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2488 os.mkdir(structuresfolder)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2489 except(IOError,OSError):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2490 pass
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2491 svgfolder = genomename + "/svg/"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2492 try:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2493 os.mkdir(svgfolder)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2494 except(IOError,OSError):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2495 pass
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2496 searchgtrfolder = genomename + "/searchgtr/"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2497 try:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2498 os.mkdir(searchgtrfolder)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2499 except(IOError,OSError):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2500 pass
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2501 htmlfolder = genomename + "/html/"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2502 try:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2503 os.mkdir(htmlfolder)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2504 except(IOError,OSError):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2505 pass
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2506 imagesfolder = genomename + "/images/"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2507 try:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2508 os.mkdir(imagesfolder)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2509 except(IOError,OSError):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2510 pass
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2511
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2512 #If input is unannotated GBK/EMBL file, convert to FASTA and use that as input
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2513 if " CDS " not in open(infile,"r").read() and "FT CDS " not in open(infile,"r").read():
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2514 if infile.split(".")[-1] == "embl" or infile.split(".")[-1] == "EMBL" or infile.split(".")[-1] == "emb" or infile.split(".")[-1] == "EMB":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2515 filetext = open(infile,"r").read()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2516 if "\nSQ" not in filetext:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2517 print >> sys.stderr, "Exit: EMBL file not properly formatted, no sequence found."
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2518 logfile = open("antismash.log","w")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2519 logfile.write("Exit: EMBL file not properly formatted, no sequence found.\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2520 logfile.close()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2521 sys.exit(1)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2522 dnaseq = filetext.split("\nSQ")[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2523 dnaseq = cleandnaseq(dnaseq)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2524 sequence = dnaseq
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2525 if (sequence.count('A') + sequence.count('a') + sequence.count('C') + sequence.count('c') + sequence.count('G') + sequence.count('g') + sequence.count('T') + sequence.count('t')) < (0.5 * len(sequence)):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2526 print >> sys.stderr, "Protein EMBL file provided. Please provide nucleotide EMBL file."
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2527 sys.exit(1)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2528 fastafile = open(infile.rpartition(".")[0] + ".fasta","w")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2529 fastafile.write(">" + infile.rpartition(".")[0] + "|\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2530 fastafile.write(sequence)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2531 fastafile.close()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2532 infile = fastafile
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2533 elif infile.split(".")[-1] == "gbk" or infile.split(".")[-1] == "GBK" or infile.split(".")[-1] == "gb" or infile.split(".")[-1] == "GB" or infile.split(".")[-1] == "genbank" or infile.split(".")[-1] == "GENBANK":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2534 filetext = open(infile,"r").read()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2535 if "\nORIGIN" not in filetext:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2536 print >> sys.stderr, "Exit: GBK file not properly formatted, no sequence found."
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2537 logfile = open("antismash.log","w")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2538 logfile.write("Exit: GBK file not properly formatted, no sequence found.\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2539 logfile.close()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2540 sys.exit(1)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2541 dnaseq = filetext.split("\nORIGIN")[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2542 dnaseq = cleandnaseq(dnaseq)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2543 sequence = dnaseq
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2544 if (sequence.count('A') + sequence.count('a') + sequence.count('C') + sequence.count('c') + sequence.count('G') + sequence.count('g') + sequence.count('T') + sequence.count('t')) < (0.5 * len(sequence)):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2545 print >> sys.stderr, "Protein GBK file provided. Please provide nucleotide GBK file."
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2546 sys.exit(1)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2547 fastafile = open(infile.rpartition(".")[0] + ".fasta","w")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2548 fastafile.write(">" + infile.rpartition(".")[0] + "\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2549 fastafile.write(sequence)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2550 fastafile.close()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2551 infile = infile.rpartition(".")[0] + ".fasta"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2552 #If input is unannotated fasta file, predict genes with Glimmer and create EMBL file. If input is EMBL or GBK file, read input embl/gbk and create input fasta file, read input protein info into memory
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2553 annotated = "y"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2554 if infile.split(".")[-1] == "fasta" or infile.split(".")[-1] == "FASTA" or infile.split(".")[-1] == "FAS" or infile.split(".")[-1] == "fas" or infile.split(".")[-1] == "FA" or infile.split(".")[-1] == "fa":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2555 annotated = "n"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2556 #Check input file formatting
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2557 sequence = get_sequence(infile)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2558 if (sequence.count('A') + sequence.count('a') + sequence.count('C') + sequence.count('c') + sequence.count('G') + sequence.count('g') + sequence.count('T') + sequence.count('t')) < (0.5 * len(sequence)):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2559 print >> sys.stderr, "Protein FASTA file provided. Please provide nucleotide FASTA file."
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2560 sys.exit(1)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2561 nucleotides = ["A","a","C","c","G","g","T","t","N","n"]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2562 badsequence = "n"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2563 sequence_name = open(infile,"r").read().partition(">")[2].partition("\n")[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2564 for i in sequence:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2565 if i not in nucleotides:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2566 badsequence = "y"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2567 if badsequence == "y":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2568 cleaned_sequence = cleandnaseq(sequence)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2569 badsequence = "n"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2570 for i in cleaned_sequence:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2571 if i not in nucleotides:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2572 badsequence = "y"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2573 if badsequence == "n":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2574 writefasta([sequence_name],[cleaned_sequence],infile.rpartition(".")[0] + "_f.fasta")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2575 infile = infile.rpartition(".")[0] + "_f.fasta"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2576 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2577 print >>sys.stderr, "Incorrect file formatting. Please submit a properly formatted single-sequence FASTA file."
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2578 logfile = open("antismash.log","w")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2579 logfile.write("Incorrect file formatting. Please submit a properly formatted single-sequence FASTA file.\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2580 logfile.close()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2581 sys.exit(1)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2582 revseq = reverse_complement(sequence)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2583 seqlength = len(sequence)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2584
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2585 #Print Glimmer notification
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2586 #if taxon == "p":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2587 # print "Running Glimmer 3.02 to predict genes in unannotated prokaryotic genome..."
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2588 #elif taxon == "e":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2589 # print "Running GlimmerHMM 3.0.1 to predict genes in unannotated eukaryotic genome..."
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2590 logfile = open("antismash.log","w")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2591 if taxon == "p":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2592 logfile.write("Running Glimmer 3.02 to predict genes in unannotated prokaryotic genome...\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2593 elif taxon == "e":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2594 logfile.write("Running GlimmerHMM 3.0.1 to predict genes in unannotated eukaryotic genome...\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2595 #logfile.close()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2596 loginfo = open("antismash.log","r").read()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2597 #logfile.close()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2598 #Copying file and changing to folder to prepare for Glimmer3 prediction
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2599 os.mkdir( os.path.join(os.getcwd(), genomename, "geneprediction"))
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2600 if sys.platform == ('win32'):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2601 os.system("copy/y " + infile + " geneprediction > nul")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2602 if sys.platform == ('linux2'):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2603 os.system("cp " + infile + " geneprediction > /dev/null")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2604
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2605 os.chdir( os.path.join(os.getcwd(), genomename, "geneprediction"))
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2606 fastafile = '../../'+infile
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2607
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2608 #Find DNA sequence length
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2609 seq = get_sequence(fastafile)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2610 dnaseqlength = len(seq)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2611 #Run Glimmer for prokaryotic sequences, GlimmerHMM for eukaryotic sequences
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2612 if taxon == "p":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2613 """
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2614 GlimmerPrediction, not needed since we can predict it in galaxy on our own
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2615 if genomeconf == "l":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2616 if "--gui" in sys.argv and sys.argv[sys.argv.index("--gui") + 1] == "y":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2617 os.popen("tigr-glimmer long-orfs -l -n -t 1.15 --trans_table " + glimmertransl_table + " " + fastafile + " " + fastafile.rpartition(".")[0] + ".longorfs")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2618 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2619 os.system("tigr-glimmer long-orfs -l -n -t 1.15 --trans_table " + glimmertransl_table + " " + fastafile + " " + fastafile.rpartition(".")[0] + ".longorfs")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2620 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2621 if "--gui" in sys.argv and sys.argv[sys.argv.index("--gui") + 1] == "y":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2622 os.popen("tigr-glimmer long-orfs -n -t 1.15 --trans_table " + glimmertransl_table + " " + fastafile + " " + fastafile.rpartition(".")[0] + ".longorfs")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2623 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2624 os.system("tigr-glimmer long-orfs -n -t 1.15 --trans_table " + glimmertransl_table + " " + fastafile + " " + fastafile.rpartition(".")[0] + ".longorfs")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2625 if "--gui" in sys.argv and sys.argv[sys.argv.index("--gui") + 1] == "y":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2626 os.popen("tigr-glimmer extract -t " + fastafile + " " + fastafile.rpartition(".")[0] + ".longorfs > " + fastafile.rpartition(".")[0] + ".train")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2627 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2628 os.system("tigr-glimmer extract -t " + fastafile + " " + fastafile.rpartition(".")[0] + ".longorfs > " + fastafile.rpartition(".")[0] + ".train")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2629 if "--gui" in sys.argv and sys.argv[sys.argv.index("--gui") + 1] == "y":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2630 os.popen("tigr-glimmer build-icm -r " + fastafile.rpartition(".")[0] + ".icm < " + fastafile.rpartition(".")[0] + ".train")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2631 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2632 os.system("tigr-glimmer build-icm -r " + fastafile.rpartition(".")[0] + ".icm < " + fastafile.rpartition(".")[0] + ".train")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2633 if genomeconf == "l":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2634 if "--gui" in sys.argv and sys.argv[sys.argv.index("--gui") + 1] == "y":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2635 os.popen("tigr-glimmer glimmer3 -l -o50 -g" + minglength + " -q3000 -t30 --trans_table " + glimmertransl_table + " " + fastafile + " " + fastafile.rpartition(".")[0] + ".icm " + fastafile.rpartition(".")[0])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2636 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2637 os.system("tigr-glimmer glimmer3 -l -o50 -g" + minglength + " -q3000 -t30 --trans_table " + glimmertransl_table + " " + fastafile + " " + fastafile.rpartition(".")[0] + ".icm " + fastafile.rpartition(".")[0])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2638 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2639 if "--gui" in sys.argv and sys.argv[sys.argv.index("--gui") + 1] == "y":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2640 os.popen("tigr-glimmer glimmer3 -o50 -g" + minglength + " -q3000 -t30 --trans_table " + glimmertransl_table + " " + fastafile + " " + fastafile.rpartition(".")[0] + ".icm " + fastafile.rpartition(".")[0])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2641 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2642 os.system("tigr-glimmer glimmer3 -o50 -g" + minglength + " -q3000 -t30 --trans_table " + glimmertransl_table + " " + fastafile + " " + fastafile.rpartition(".")[0] + ".icm " + fastafile.rpartition(".")[0])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2643 #Convert glimmer predictions into EMBL with sequence
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2644 glfile = fastafile.rpartition(".")[0] + ".predict"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2645
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2646 Ende der Glimmer-Prediction
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2647 """
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2648 glfile = glimmer_prediction_path
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2649 emblfile = fastafile.rpartition(".")[0] + ".embl"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2650 try:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2651 file = open(glfile,"r")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2652 filetext = file.read()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2653 except:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2654 print >> sys.stderr, "Glimmer gene prediction failed. Please check the format of your input FASTA file. Error 11."
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2655 logfile = open("antismash.log","w")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2656 logfile.write("Glimmer gene prediction failed. Please check the format of your input FASTA file. Error 11.\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2657 logfile.close()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2658 sys.exit(1)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2659 if "orf" not in filetext:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2660 print >> sys.stderr, "Glimmer gene prediction failed: no genes found."
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2661 logfile = open("antismash.log","w")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2662 logfile.write("Glimmer gene prediction failed: no genes found.\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2663 logfile.close()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2664 sys.exit(1)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2665 filetext = filetext.replace("\r","\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2666 lines = filetext.split("\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2667 lines = lines[1:-1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2668 orfnames = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2669 starts = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2670 ends = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2671 strands = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2672 starts2 = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2673 ends2 = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2674 firstline = "y"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2675 for i in lines:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2676 columns = i.split(" ")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2677 columns2 = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2678 for i in columns:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2679 if i != "":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2680 columns2.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2681 columns = columns2
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2682 if len(columns) > 3:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2683 frame = columns[3][0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2684 strands.append(frame)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2685 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2686 frame = ""
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2687 if firstline == "y" and frame == "+" and len(columns) > 3:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2688 orfname = str(columns[0])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2689 orfnames.append(orfname)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2690 if genomeconf == "c" and (int(columns[1]) > int(columns[2])) and (int(columns[1]) > (0.5 * dnaseqlength)):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2691 gstart = (int(columns[2]) % 3) + 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2692 if gstart == 3:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2693 gstart = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2694 starts.append(str(gstart))
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2695 ends.append(columns[2])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2696 starts.append(columns[1])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2697 ends.append(str(dnaseqlength))
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2698 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2699 starts.append(columns[1])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2700 ends.append(columns[2])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2701 firstline = "n"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2702 elif firstline == "y" and frame == "-" and len(columns) > 3:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2703 orfname = str(columns[0])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2704 orfnames.append(orfname)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2705 if genomeconf == "c" and (int(columns[1]) > int(columns[2])) and (int(columns[1]) > (0.5 * dnaseqlength)):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2706 gstart = (int(columns[2]) % 3) + 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2707 if gstart == 3:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2708 gstart = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2709 starts.append("complement(" + str(gstart))
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2710 ends.append(columns[2] + ")")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2711 starts.append("complement(" + columns[1])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2712 ends.append(str(dnaseqlength) + ")")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2713 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2714 complstart = "complement(" + str(columns[1])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2715 starts.append(complstart)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2716 complend = str(columns[2]) + ")"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2717 ends.append(str(complend))
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2718 firstline = "n"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2719 elif frame == "+" and len(columns) > 3:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2720 orfname = str(columns[0])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2721 orfnames.append(orfname)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2722 starts.append(columns[1])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2723 ends.append(columns[2])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2724 elif frame == "-" and len(columns) > 3:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2725 orfname = str(columns[0])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2726 orfnames.append(orfname)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2727 complstart = "complement(" + str(columns[1])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2728 starts.append(complstart)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2729 complend = str(columns[2]) + ")"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2730 ends.append(str(complend))
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2731 if len(orfnames) == 0:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2732 print >> sys.stderr, "Glimmer gene prediction failed. Please check the format of your input FASTA file. Error 10."
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2733 logfile = open("antismash.log","w")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2734 logfile.write("Glimmer gene prediction failed. Please check the format of your input FASTA file. Error 10.\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2735 logfile.close()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2736 sys.exit(1)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2737 out_file = open(emblfile,"w")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2738 a = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2739 #print "Writing EMBL file with Glimmer-predicted genes..."
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2740 logfile = open("antismash.log","w")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2741 logfile.write(loginfo)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2742 logfile.write("Writing EMBL file with Glimmer-predicted genes...\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2743 #logfile.close()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2744 loginfo = open("antismash.log","r").read()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2745 #logfile.close()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2746 if taxon == "p":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2747 out_file.write("ID A01; SV 1; linear; DNA; STD; PRO; " + str(dnaseqlength) + " BP.\nXX\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2748 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2749 out_file.write("ID A01; SV 1; linear; DNA; STD; FUN; " + str(dnaseqlength) + " BP.\nXX\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2750 out_file.write("AC A01;\nXX\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2751 out_file.write("DE " + genomename + ";\nXX\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2752 out_file.write("KW none;\nXX\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2753 out_file.write("OS unknown;\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2754 if taxon == "p":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2755 out_file.write("OC Eubacteria;\nXX\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2756 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2757 out_file.write("OC Fungi;\nXX\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2758 out_file.write("RN [1]\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2759 out_file.write("RT ;\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2760 out_file.write("RL Unknown.\nXX\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2761 out_file.write("FH Key Location/Qualifiers\nFH\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2762 out_file.write("FT source 1.." + str(dnaseqlength) + "\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2763 for i in orfnames:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2764 out_file.write("FT gene ")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2765 out_file.write(starts[a])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2766 out_file.write("..")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2767 out_file.write(ends[a])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2768 out_file.write("\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2769 out_file.write('FT /gene="' + i + '"\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2770 out_file.write("FT CDS ")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2771 out_file.write(starts[a])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2772 out_file.write("..")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2773 out_file.write(ends[a])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2774 out_file.write("\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2775 out_file.write('FT /gene="' + i + '"\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2776 a += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2777 elif taxon == "e":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2778 """
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2779 GlimmerHMM is executed extern ... in galaxy and will be provided through glimmer_prediction_path
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2780
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2781 if "--gui" in sys.argv and sys.argv[sys.argv.index("--gui") + 1] == "y":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2782 os.popen("glimmerhmm " + fastafile + " train_crypto -o " + fastafile.rpartition(".")[0] + ".predict -g")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2783 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2784 os.system("glimmerhmm " + fastafile + " train_crypto -o " + fastafile.rpartition(".")[0] + ".predict -g")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2785 """
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2786 #Convert glimmerhmm predictions into EMBL with sequence
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2787 #glfile = fastafile.rpartition(".")[0] + ".predict"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2788 glfile = glimmer_prediction_path
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2789 emblfile = fastafile.rpartition(".")[0] + ".embl"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2790 try:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2791 file = open(glfile,"r")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2792 filetext = file.read().replace("\r","")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2793 except:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2794 print >> sys.stderr, "GlimmerHMM gene prediction failed. Please check the format of your input FASTA file. Error 9."
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2795 logfile = open("antismash.log","w")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2796 logfile.write("GlimmerHMM gene prediction failed. Please check the format of your input FASTA file. Error 9.\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2797 logfile.close()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2798 sys.exit(1)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2799 if "CDS" not in filetext:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2800 print >> sys.stderr, "GlimmerHMM gene prediction failed: no genes found."
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2801 logfile = open("antismash.log","w")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2802 logfile.write("GlimmerHMM gene prediction failed: no genes found.\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2803 logfile.close()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2804 sys.exit(1)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2805 filetext = filetext.replace("\r","\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2806 lines = filetext.split("\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2807 lines = lines[2:-1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2808 orfnames = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2809 positions = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2810 firstline = "y"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2811 x = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2812 orfnr = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2813 starts = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2814 ends = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2815 for i in lines:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2816 columns = i.split("\t")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2817 if len(columns) > 1:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2818 if x == 0:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2819 strand = columns[6]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2820 if "mRNA" not in i:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2821 starts.append(columns[3])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2822 ends.append(columns[4])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2823 elif x == (len(lines) - 1) or "mRNA" in lines[x + 1]:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2824 strand = columns[6]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2825 starts.append(columns[3])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2826 ends.append(columns[4])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2827 orfnr += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2828 if len(str(orfnr)) == 1:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2829 orfname = "orf0000" + str(orfnr)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2830 elif len(str(orfnr)) == 2:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2831 orfname = "orf000" + str(orfnr)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2832 elif len(str(orfnr)) == 3:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2833 orfname = "orf00" + str(orfnr)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2834 elif len(str(orfnr)) == 4:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2835 orfname = "orf0" + str(orfnr)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2836 elif len(str(orfnr)) == 5:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2837 orfname = "orf" + str(orfnr)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2838 orfnames.append(orfname)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2839 if strand == "+":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2840 if len(starts) == 1:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2841 pos = starts[0] + ".." + ends[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2842 positions.append(pos)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2843 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2844 pos = "join("
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2845 y = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2846 for i in starts:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2847 pos = pos + i + ".." + ends[y]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2848 if i != starts[-1]:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2849 pos = pos + ","
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2850 y += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2851 pos = pos + ")"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2852 positions.append(pos)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2853 elif strand == "-":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2854 if len(starts) == 1:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2855 pos = "complement(" + starts[0] + ".." + ends[0] + ")"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2856 positions.append(pos)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2857 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2858 pos = "complement(join("
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2859 y = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2860 for i in starts:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2861 pos = pos + i + ".." + ends[y]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2862 if i != starts[-1]:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2863 pos = pos + ","
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2864 y += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2865 pos = pos + "))"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2866 positions.append(pos)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2867 starts = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2868 ends = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2869 elif "mRNA" not in i:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2870 starts.append(columns[3])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2871 ends.append(columns[4])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2872 x += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2873 if len(orfnames) == 0:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2874 print >> sys.stderr, "GlimmerHMM gene prediction failed. Please check the format of your input FASTA file. Error: 12"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2875 logfile = open("antismash.log","w")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2876 logfile.write("GlimmerHMM gene prediction failed. Please check the format of your input FASTA file. Error 12\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2877 logfile.close()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2878 sys.exit(1)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2879 out_file = open(emblfile,"w")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2880 a = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2881 #print "Writing EMBL file with GlimmerHMM-predicted genes..."
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2882 logfile = open("antismash.log","w")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2883 logfile.write(loginfo)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2884 logfile.write("Writing EMBL file with GlimmerHMM-predicted genes...\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2885 #logfile.close()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2886 loginfo = open("antismash.log","r").read()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2887 #logfile.close()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2888 out_file.write("ID A01; SV 1; linear; DNA; STD; FUN; " + str(dnaseqlength) + " BP.\nXX\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2889 out_file.write("AC A01;\nXX\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2890 out_file.write("DE " + genomename + ";\nXX\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2891 out_file.write("KW none;\nXX\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2892 out_file.write("OS unknown;\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2893 out_file.write("OC Fungi;\nXX\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2894 out_file.write("RN [1]\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2895 out_file.write("RT ;\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2896 out_file.write("RL Unknown.\nXX\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2897 out_file.write("FH Key Location/Qualifiers\nFH\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2898 out_file.write("FT source 1.." + str(dnaseqlength) + "\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2899 for i in orfnames:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2900 out_file.write("FT gene ")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2901 out_file.write(positions[a])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2902 out_file.write("\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2903 out_file.write('FT /gene="' + i + '"\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2904 out_file.write("FT CDS ")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2905 out_file.write(positions[a])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2906 out_file.write("\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2907 out_file.write('FT /gene="' + i + '"\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2908 a += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2909 out_file.write("XX\nSQ Sequence " + str(dnaseqlength) + " BP; " + str(seq.count("a") + seq.count("A")) + " A; " + str(seq.count("c") + seq.count("C")) + " C; " + str(seq.count("g") + seq.count("G")) + " G; " + str(seq.count("t") + seq.count("T")) + " T; " + str(dnaseqlength - (seq.count("a") + seq.count("A") + seq.count("c") + seq.count("C") + seq.count("g") + seq.count("G") + seq.count("t") + seq.count("T"))) + " other;\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2910 seq2 = seq
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2911 out_file.write(" ")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2912 grouplen=10
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2913 textlen = len(seq)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2914 end = textlen - (textlen % grouplen)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2915 repeated_iterator = [iter(itertools.islice(seq, 0, end))] * grouplen
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2916 parts = list(itertools.imap(lambda *chars: ''.join(chars),*repeated_iterator))
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2917 if dnaseqlength%grouplen != 0:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2918 parts.append(seq[-1 * (dnaseqlength%grouplen):])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2919 w = 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2920 for l in parts:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2921 out_file.write(l + " ")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2922 if w == len(parts):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2923 if w%6 == 0 and dnaseqlength%60 != 0:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2924 out_file.write((" " * (10 - dnaseqlength%grouplen) + " " * (10 - len(str(dnaseqlength)))) + str(dnaseqlength) + "\n//")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2925 elif dnaseqlength%60 == 0:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2926 out_file.write((" " * (10 - len(str(dnaseqlength)))) + str(dnaseqlength) + "\n//")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2927 elif w%6 == 5 and dnaseqlength%grouplen == 0:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2928 out_file.write((" " + " " * (10 - len(str(dnaseqlength)))) + str(dnaseqlength) + "\n//")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2929 elif dnaseqlength%grouplen != 0:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2930 out_file.write(" " * (10 - dnaseqlength%grouplen) + " " * (6 - len(parts)%6) + " " * (6 - len(parts)%6) + (" " * (10 - len(str(dnaseqlength)))) + str(dnaseqlength) + "\n//")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2931 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2932 out_file.write(" " * (6 - len(parts)%6) + " " * (5 - len(parts)%6) + (" " * (10 - len(str(dnaseqlength)))) + str(dnaseqlength) + "\n//")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2933 elif w%6 == 0:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2934 out_file.write((" " * (10 - len(str(w * 10)))) + str(w * 10) + "\n ")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2935 w += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2936 out_file.close()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2937 os.chdir("../../")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2938 infile = emblfile[6:]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2939 emblfile = emblfile[6:]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2940 if taxon == "p":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2941 glimmeroutputfolder = genomename + "/glimmer/"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2942 elif taxon == "e":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2943 glimmeroutputfolder = genomename + "/glimmerhmm/"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2944 try:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2945 os.mkdir(glimmeroutputfolder)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2946 except(IOError,OSError):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2947 pass
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2948 proteins = embl2proteins(infile,sequence)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2949 genomic_accnr = proteins[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2950 dnaseqlength = proteins[2]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2951 proteins = proteins[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2952 writefasta(proteins[0],proteins[1],genomename + "/genome_proteins.fasta")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2953 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2954 #print "Reading embl/gbk file and creating input FASTA file for gene cluster detection..."
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2955 logfile.write("Reading embl/gbk file and creating input FASTA file for gene cluster detection...\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2956 if infile.split(".")[-1] == "embl" or infile.split(".")[-1] == "EMBL" or infile.split(".")[-1] == "emb" or infile.split(".")[-1] == "EMB":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2957 sequence = ""
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2958 proteins = embl2proteins(infile,sequence)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2959 genomic_accnr = proteins[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2960 dnaseqlength = proteins[2]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2961 proteins = proteins[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2962 writefasta(proteins[0],proteins[1],genomename + "/genome_proteins.fasta")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2963 elif infile.split(".")[-1] == "gbk" or infile.split(".")[-1] == "GBK" or infile.split(".")[-1] == "gb" or infile.split(".")[-1] == "GB" or infile.split(".")[-1] == "genbank" or infile.split(".")[-1] == "GENBANK":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2964 proteins = gbk2proteins(infile)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2965 genomic_accnr = proteins[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2966 dnaseqlength = proteins[2]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2967 proteins = proteins[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2968 writefasta(proteins[0],proteins[1],genomename + "/genome_proteins.fasta")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2969 accessiondict = proteins[4]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2970 seqdict = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2971 fullnamedict = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2972 strandsdict = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2973 z = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2974 for i in proteins[0]:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2975 name = i.split("|")[4]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2976 seq = proteins[1][z]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2977 seqdict[name] = seq
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2978 strand = i.split("|")[3]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2979 strandsdict[name] = strand
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2980 fullnamedict[name] = i
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2981 z += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2982
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2983 elapsed = (time.time() - starttime)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2984 #print "2968Time since start: " + str(elapsed)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2985
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2986 #Run hmmsearch on proteins from input file and parse output
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2987 #print "Performing HMM search on proteins for detection of signature genes..."
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2988 logfile.write("Performing HMM search on proteins for detection of signature genes...\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2989 hmmslist = ["AMP-binding.hmm","BLS.hmm","CAS.hmm","Chal_sti_synt_C.hmm","Chal_sti_synt_N.hmm","Condensation.hmm","ene_KS.hmm","hyb_KS.hmm","itr_KS.hmm","mod_KS.hmm","tra_KS.hmm","LANC_like.hmm","ATd.hmm","PKS_AT.hmm","PKS_KS.hmm","PP-binding.hmm","t2clf.hmm","t2ks.hmm","t2ks2.hmm","Terpene_synth.hmm","Terpene_synth_C.hmm","strH_like.hmm","neoL_like.hmm","DOIS.hmm","valA_like.hmm","spcFG_like.hmm","spcDK_like_cou.hmm","spcDK_like_glyc.hmm","strK_like1.hmm","strK_like2.hmm","bt1fas.hmm","ft1fas.hmm","t2fas.hmm","hglD.hmm","hglE.hmm","fabH.hmm","AfsA.hmm","IucA_IucC.hmm","ectoine_synt.hmm","phytoene_synt.hmm","Lant_dehyd_N.hmm","Lant_dehyd_C.hmm","Antimicrobial18.hmm","Gallidermin.hmm","L_biotic_typeA.hmm","LE-DUF.hmm","LE-LAC481.hmm","LE-LanBC.hmm","LE-MER+2PEP.hmm","MA-2PEPA.hmm","MA-DUF.hmm","MA-EPI.hmm","MA-LAC481.hmm","MA-NIS+EPI.hmm","MA-NIS.hmm","indsynth.hmm","A-OX.hmm","LmbU.hmm","MoeO5.hmm","LipM.hmm","LipU.hmm","LipV.hmm","ToyB.hmm","TunD.hmm","melC.hmm","strepbact.hmm","goadsporin_like.hmm","Antimicrobial14.hmm","Bacteriocin_IId.hmm","BacteriocIIc_cy.hmm","Bacteriocin_II.hmm","Lactococcin.hmm","Antimicrobial17.hmm","Lactococcin_972.hmm","Bacteriocin_IIc.hmm","LcnG-beta.hmm","Bacteriocin_IIi.hmm","Subtilosin_A.hmm","Cloacin.hmm","Neocarzinostat.hmm","Linocin_M18.hmm","TIGR03603.hmm","TIGR03604.hmm","TIGR03605.hmm","TIGR03731.hmm","TIGR03651.hmm","TIGR03678.hmm","TIGR03693.hmm","TIGR03798.hmm","TIGR03882.hmm","TIGR03601.hmm","TIGR03602.hmm","tabtoxin.hmm","cycdipepsynth.hmm","cyanobactin_synth.hmm","fom1.hmm","bcpB.hmm","frbD.hmm","mitE.hmm",'Lycopene_cycl.hmm','terpene_cyclase.hmm','NapT7.hmm','fung_ggpps.hmm','fung_ggpps2.hmm','dmat.hmm','trichodiene_synth.hmm','novK.hmm','novJ.hmm','novI.hmm','novH.hmm','pur6.hmm','pur10.hmm','nikJ.hmm','nikO.hmm','mvnA.hmm','thiostrepton.hmm','NAD_binding_4.hmm','vlmB.hmm','salQ.hmm','prnB.hmm']
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2990 for i in hmmslist:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2991 hmmsearch = hmmsearch_path + " " + "--cpu " + str(nrcpus) + " -o " + genomename + "/hmmoutput/" + i.split(".")[0] + "_output.txt" + " --noali --tblout " + genomename + "/hmmoutput/" + i.split(".")[0] + ".txt " + hmms_path + i + " " + genomename + "/genome_proteins.fasta"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2992 os.system(hmmsearch)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2993 #print "Parsing HMM outputs..."
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2994 logfile.write("Parsing HMM outputs...\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2995 detecteddomainsdict = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2996 #Extract type I PKS proteins, KS cut-off: 50; AT cut-off: 20; exclude those sequences that score higher on type I FAS HMMs, type IV hglE-like KS domains
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2997 t1pksprots = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2998 transatpksprots = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
2999 if 1 in geneclustertypes or 2 in geneclustertypes or 3 in geneclustertypes or 4 in geneclustertypes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3000 ks = parsehmmoutput(50,hmmoutputfolder + "PKS_KS.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3001 at = parsehmmoutput(50,hmmoutputfolder + "PKS_AT.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3002 ft1fasks = parsehmmoutput(50,hmmoutputfolder + "ft1fas.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3003 bt1fasks = parsehmmoutput(50,hmmoutputfolder + "bt1fas.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3004 hgleks = parsehmmoutput(50,hmmoutputfolder + "hglE.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3005 hgldks = parsehmmoutput(50,hmmoutputfolder + "hglD.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3006 fabhks = parsehmmoutput(50,hmmoutputfolder + "fabH.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3007 pksksprots = ks[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3008 pksatprots = at[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3009 pksatscores = at[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3010 pksksscores = ks[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3011 bt1fasprots = bt1fasks[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3012 bt1fasscores = bt1fasks[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3013 ft1fasprots = ft1fasks[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3014 ft1fasscores = ft1fasks[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3015 hgleprots = hgleks[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3016 hglescores = hgleks[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3017 hgldprots = hgldks[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3018 hgldscores = hgldks[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3019 fabhprots = fabhks[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3020 fabhscores = fabhks[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3021 for i in pksksprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3022 exclude = "n"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3023 score = pksksscores[pksksprots.index(i)]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3024 if i in bt1fasprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3025 bt1fasscore = bt1fasscores[bt1fasprots.index(i)]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3026 if float(score) < float(bt1fasscore):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3027 exclude = "y"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3028 if i in ft1fasprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3029 ft1fasscore = ft1fasscores[ft1fasprots.index(i)]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3030 if float(score) < float(ft1fasscore):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3031 exclude = "y"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3032 if i in hgldprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3033 hgldscore = hgldscores[hgldprots.index(i)]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3034 if float(score) < float(hgldscore):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3035 exclude = "y"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3036 if i in hgleprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3037 hglescore = hglescores[hgleprots.index(i)]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3038 if float(score) < float(hglescore):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3039 exclude = "y"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3040 if i in fabhprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3041 fabhscore = fabhscores[fabhprots.index(i)]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3042 if float(score) < float(fabhscore):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3043 exclude = "y"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3044 if i in pksatprots and exclude == "n":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3045 t1pksprots.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3046 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3047 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3048 detdomlist.append(["PKS ketosynthase domain",pksksscores[pksksprots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3049 detdomlist.append(["PKS acyltransferase domain",pksatscores[pksatprots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3050 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3051 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3052 detecteddomainsdict[i] = [["PKS ketosynthase domain",pksksscores[pksksprots.index(i)]],["PKS acyltransferase domain",pksatscores[pksatprots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3053 #Extract trans-AT PKSs: proteins with KS hits but without AT hits, and with trans-AT specific ATd-hits
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3054 atd = parsehmmoutput(65,hmmoutputfolder + "ATd.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3055 traks = parsehmmoutput(50,hmmoutputfolder + "tra_KS.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3056 traksprots = traks[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3057 atdprots = atd[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3058 atdscores = atd[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3059 for i in pksksprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3060 if i in atdprots and i in traksprots and i not in t1pksprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3061 transatpksprots.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3062 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3063 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3064 detdomlist.append(["PKS ketosynthase domain",pksksscores[pksksprots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3065 detdomlist.append(["Trans-AT PKS AT-docking domain",atdscores[atdprots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3066 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3067 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3068 detecteddomainsdict[i] = [["PKS ketosynthase domain",pksksscores[pksksprots.index(i)]],["Trans-AT PKS AT-docking domain",atdscores[atdprots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3069 #Extract type II PKS & CLF proteins, KS-cut-off: 50, t2KS/clf score > modKS,eneKS,itrKS,traKS,t1fas,t2fas,hgle scores
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3070 t2pksprots = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3071 if 1 in geneclustertypes or 2 in geneclustertypes or 3 in geneclustertypes or 4 in geneclustertypes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3072 t2ks = parsehmmoutput(50,hmmoutputfolder + "t2ks.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3073 t2ks2 = parsehmmoutput(450,hmmoutputfolder + "t2ks2.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3074 t2clf = parsehmmoutput(50,hmmoutputfolder + "t2clf.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3075 eneks = parsehmmoutput(50,hmmoutputfolder + "ene_KS.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3076 hybks = parsehmmoutput(50,hmmoutputfolder + "hyb_KS.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3077 modks = parsehmmoutput(50,hmmoutputfolder + "mod_KS.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3078 itrks = parsehmmoutput(50,hmmoutputfolder + "itr_KS.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3079 traks = parsehmmoutput(50,hmmoutputfolder + "tra_KS.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3080 t2fasks = parsehmmoutput(50,hmmoutputfolder + "t2fas.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3081 ft1fasks = parsehmmoutput(50,hmmoutputfolder + "ft1fas.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3082 bt1fasks = parsehmmoutput(50,hmmoutputfolder + "bt1fas.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3083 hgleks = parsehmmoutput(50,hmmoutputfolder + "hglE.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3084 hgldks = parsehmmoutput(50,hmmoutputfolder + "hglD.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3085 fabhks = parsehmmoutput(50,hmmoutputfolder + "fabH.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3086 t2ksprots = t2ks[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3087 t2ks2prots = t2ks2[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3088 t2clfprots = t2clf[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3089 eneksprots = eneks[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3090 hybksprots = hybks[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3091 modksprots = modks[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3092 itrksprots = itrks[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3093 traksprots = traks[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3094 t2fasprots = t2fasks[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3095 t2ksscores = t2ks[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3096 t2ks2scores = t2ks2[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3097 t2clfscores = t2clf[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3098 eneksscores = eneks[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3099 hybksscores = hybks[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3100 modksscores = modks[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3101 itrksscores = itrks[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3102 traksscores = traks[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3103 t2fasscores = t2fasks[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3104 bt1fasprots = bt1fasks[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3105 bt1fasscores = bt1fasks[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3106 ft1fasprots = ft1fasks[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3107 ft1fasscores = ft1fasks[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3108 hgleprots = hgleks[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3109 hglescores = hgleks[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3110 hgldprots = hgldks[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3111 hgldscores = hgldks[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3112 fabhprots = fabhks[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3113 fabhscores = fabhks[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3114 for i in t2ksprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3115 type2 = "y"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3116 score = t2ksscores[t2ksprots.index(i)]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3117 if i in eneksprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3118 enescore = eneksscores[eneksprots.index(i)]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3119 if float(enescore) > float(score):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3120 type2 = "n"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3121 if i in hybksprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3122 hybscore = hybksscores[hybksprots.index(i)]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3123 if float(hybscore) > float(score):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3124 type2 = "n"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3125 if i in modksprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3126 modscore = modksscores[modksprots.index(i)]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3127 if float(modscore) > float(score):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3128 type2 = "n"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3129 if i in itrksprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3130 itrscore = itrksscores[itrksprots.index(i)]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3131 if float(itrscore) > float(score):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3132 type2 = "n"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3133 if i in traksprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3134 trascore = traksscores[traksprots.index(i)]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3135 if float(trascore) > float(score):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3136 type2 = "n"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3137 if i in bt1fasprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3138 bt1fasscore = bt1fasscores[bt1fasprots.index(i)]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3139 if float(bt1fasscore) > float(score):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3140 type2 = "n"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3141 if i in ft1fasprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3142 ft1fasscore = ft1fasscores[ft1fasprots.index(i)]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3143 if float(ft1fasscore) > float(score):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3144 type2 = "n"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3145 if i in t2fasprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3146 t2fasscore = t2fasscores[t2fasprots.index(i)]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3147 if float(t2fasscore) > float(score):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3148 type2 = "n"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3149 if i in hgleprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3150 hglescore = hglescores[hgleprots.index(i)]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3151 if float(hglescore) > float(score):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3152 type2 = "n"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3153 if i in fabhprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3154 fabhscore = fabhscores[fabhprots.index(i)]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3155 if float(fabhscore) > float(score):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3156 type2 = "n"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3157 if type2 == "y" and i not in t2pksprots and i not in t1pksprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3158 t2pksprots.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3159 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3160 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3161 detdomlist.append(["Type II ketosynthase",t2ksscores[t2ksprots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3162 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3163 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3164 detecteddomainsdict[i] = [["Type II ketosynthase",t2ksscores[t2ksprots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3165 for i in t2clfprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3166 type2 = "y"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3167 score = t2clfscores[t2clfprots.index(i)]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3168 if i in eneksprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3169 enescore = eneksscores[eneksprots.index(i)]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3170 if float(enescore) > float(score):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3171 type2 = "n"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3172 if i in hybksprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3173 hybscore = hybksscores[hybksprots.index(i)]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3174 if float(hybscore) > float(score):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3175 type2 = "n"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3176 if i in modksprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3177 modscore = modksscores[modksprots.index(i)]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3178 if float(modscore) > float(score):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3179 type2 = "n"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3180 if i in itrksprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3181 itrscore = itrksscores[itrksprots.index(i)]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3182 if float(itrscore) > float(score):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3183 type2 = "n"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3184 if i in traksprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3185 trascore = traksscores[traksprots.index(i)]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3186 if float(trascore) > float(score):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3187 type2 = "n"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3188 if i in bt1fasprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3189 bt1fasscore = bt1fasscores[bt1fasprots.index(i)]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3190 if float(bt1fasscore) > float(score):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3191 type2 = "n"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3192 if i in ft1fasprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3193 ft1fasscore = ft1fasscores[ft1fasprots.index(i)]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3194 if float(ft1fasscore) > float(score):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3195 type2 = "n"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3196 if i in t2fasprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3197 t2fasscore = t2fasscores[t2fasprots.index(i)]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3198 if float(t2fasscore) > float(score):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3199 type2 = "n"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3200 if i in hgleprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3201 hglescore = hglescores[hgleprots.index(i)]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3202 if float(hglescore) > float(score):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3203 type2 = "n"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3204 if i in fabhprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3205 fabhscore = fabhscores[fabhprots.index(i)]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3206 if float(fabhscore) > float(score):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3207 type2 = "n"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3208 if type2 == "y" and i not in t2pksprots and i not in t1pksprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3209 t2pksprots.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3210 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3211 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3212 detdomlist.append(["Chain length factor",t2clfscores[t2clfprots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3213 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3214 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3215 detecteddomainsdict[i] = [["Chain length factor",t2clfscores[t2clfprots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3216 for i in t2ks2prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3217 type2 = "y"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3218 score = t2ks2scores[t2ks2prots.index(i)]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3219 if i in eneksprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3220 enescore = eneksscores[eneksprots.index(i)]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3221 if float(enescore) > float(score):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3222 type2 = "n"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3223 if i in hybksprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3224 hybscore = hybksscores[hybksprots.index(i)]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3225 if float(hybscore) > float(score):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3226 type2 = "n"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3227 if i in modksprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3228 modscore = modksscores[modksprots.index(i)]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3229 if float(modscore) > float(score):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3230 type2 = "n"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3231 if i in itrksprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3232 itrscore = itrksscores[itrksprots.index(i)]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3233 if float(itrscore) > float(score):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3234 type2 = "n"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3235 if i in traksprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3236 trascore = traksscores[traksprots.index(i)]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3237 if float(trascore) > float(score):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3238 type2 = "n"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3239 if i in bt1fasprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3240 bt1fasscore = bt1fasscores[bt1fasprots.index(i)]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3241 if float(bt1fasscore) > float(score):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3242 type2 = "n"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3243 if i in ft1fasprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3244 ft1fasscore = ft1fasscores[ft1fasprots.index(i)]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3245 if float(ft1fasscore) > float(score):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3246 type2 = "n"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3247 if i in t2fasprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3248 t2fasscore = t2fasscores[t2fasprots.index(i)]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3249 if float(t2fasscore) > float(score):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3250 type2 = "n"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3251 if i in hgleprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3252 hglescore = hglescores[hgleprots.index(i)]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3253 if float(hglescore) > float(score):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3254 type2 = "n"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3255 if i in fabhprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3256 fabhscore = fabhscores[fabhprots.index(i)]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3257 if float(fabhscore) > float(score):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3258 type2 = "n"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3259 if type2 == "y" and i not in t2pksprots and i not in t1pksprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3260 t2pksprots.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3261 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3262 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3263 detdomlist.append(["Type II ketosynthase, model 2",t2ks2scores[t2ks2prots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3264 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3265 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3266 detecteddomainsdict[i] = [["Type II ketosynthase, model 2",t2ks2scores[t2ks2prots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3267 #Extract type III PKS proteins
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3268 t3pksprots = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3269 if 1 in geneclustertypes or 2 in geneclustertypes or 3 in geneclustertypes or 4 in geneclustertypes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3270 t3n = parsehmmoutput(63,hmmoutputfolder + "Chal_sti_synt_N.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3271 t3c = parsehmmoutput(35,hmmoutputfolder + "Chal_sti_synt_C.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3272 t3nprots = t3n[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3273 t3nscores = t3n[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3274 t3cprots = t3c[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3275 t3cscores = t3c[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3276 for i in t3cprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3277 if i not in t3pksprots and i not in t1pksprots and i not in t2pksprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3278 t3pksprots.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3279 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3280 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3281 detdomlist.append(["Chalcone/stilbene synthase,C-terminus",t3cscores[t3cprots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3282 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3283 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3284 detecteddomainsdict[i] = [["Chalcone/stilbene synthase,C-terminus",t3cscores[t3cprots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3285 for i in t3nprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3286 if i not in t3pksprots and i not in t1pksprots and i not in t2pksprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3287 t3pksprots.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3288 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3289 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3290 detdomlist.append(["Chalcone/stilbene synthase,N-terminus",t3nscores[t3nprots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3291 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3292 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3293 detecteddomainsdict[i] = [["Chalcone/stilbene synthase,N-terminus",t3nscores[t3nprots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3294 #Extract 'type IV' hglE-like PKS proteins, cut-off:50; only if not already scored as type 1-3 PKS, and not if FAS HMM has higher score
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3295 t4pksprots = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3296 if 1 in geneclustertypes or 2 in geneclustertypes or 3 in geneclustertypes or 4 in geneclustertypes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3297 t2fasks = parsehmmoutput(50,hmmoutputfolder + "t2fas.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3298 t2fasprots = t2fasks[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3299 t2fasscores = t2fasks[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3300 for i in hgleprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3301 type4 = "y"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3302 score = hglescores[hgleprots.index(i)]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3303 if i in bt1fasprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3304 bt1fasscore = bt1fasscores[bt1fasprots.index(i)]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3305 if float(bt1fasscore) > float(score):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3306 type4 = "n"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3307 if i in ft1fasprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3308 ft1fasscore = ft1fasscores[ft1fasprots.index(i)]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3309 if float(ft1fasscore) > float(score):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3310 type4 = "n"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3311 if i in t2fasprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3312 t2fasscore = t2fasscores[t2fasprots.index(i)]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3313 if float(t2fasscore) > float(score):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3314 type4 = "n"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3315 if i in fabhprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3316 fabhscore = fabhscores[fabhprots.index(i)]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3317 if float(fabhscore) > float(score):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3318 type4 = "n"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3319 if i not in t1pksprots and i not in t2pksprots and i not in t3pksprots and i not in transatpksprots and type4 == "y":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3320 t4pksprots.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3321 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3322 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3323 detdomlist.append(["Atypical PKS domain, HglE-like",hglescores[hgleprots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3324 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3325 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3326 detecteddomainsdict[i] = [["Atypical PKS domain, HglE-like",hglescores[hgleprots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3327 for i in hgldprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3328 type4 = "y"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3329 score = hgldscores[hgldprots.index(i)]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3330 if i in bt1fasprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3331 bt1fasscore = bt1fasscores[bt1fasprots.index(i)]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3332 if float(bt1fasscore) > float(score):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3333 type4 = "n"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3334 if i in ft1fasprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3335 ft1fasscore = ft1fasscores[ft1fasprots.index(i)]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3336 if float(ft1fasscore) > float(score):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3337 type4 = "n"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3338 if i in t2fasprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3339 t2fasscore = t2fasscores[t2fasprots.index(i)]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3340 if float(t2fasscore) > float(score):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3341 type4 = "n"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3342 if i in fabhprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3343 fabhscore = fabhscores[fabhprots.index(i)]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3344 if float(fabhscore) > float(score):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3345 type4 = "n"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3346 if i not in t1pksprots and i not in t2pksprots and i not in t3pksprots and i not in transatpksprots and type4 == "y" and i not in t4pksprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3347 t4pksprots.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3348 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3349 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3350 detdomlist.append(["Atypical PKS domain, HglD-like",hgldscores[hgldprots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3351 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3352 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3353 detecteddomainsdict[i] = [["Atypical PKS domain, HglD-like",hgldscores[hgldprots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3354 #Extract NRPS proteins, C cut-off: 20; A cut-off:20, both should be there, or single domain proteins C,A, or T should be within 20kb of each other or a full NRPS
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3355 nrpsprots = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3356 if 1 in geneclustertypes or 5 in geneclustertypes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3357 cond = parsehmmoutput(20,hmmoutputfolder + "Condensation.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3358 amp = parsehmmoutput(20,hmmoutputfolder + "AMP-binding.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3359 ampox = parsehmmoutput(50,hmmoutputfolder + "A-OX.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3360 ampoxprots = ampox[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3361 ampoxscores = ampox[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3362 for i in ampox[0]:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3363 if i not in amp:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3364 amp.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3365 cprots = cond[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3366 cscores = cond[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3367 aprots = amp[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3368 ascores = amp[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3369 nrpsprots = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3370 for i in cprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3371 if i in aprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3372 nrpsprots.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3373 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3374 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3375 detdomlist.append(["Condensation domain",cscores[cprots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3376 if i in aprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3377 detdomlist.append(["Adenylation domain",ascores[aprots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3378 elif i in ampoxprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3379 detdomlist.append(["Adenylation domain with integrated oxidase",ampoxscores[ampoxprots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3380 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3381 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3382 if i in aprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3383 detecteddomainsdict[i] = [["Condensation domain",cscores[cprots.index(i)]],["Adenylation domain",ascores[aprots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3384 elif i in ampoxprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3385 detecteddomainsdict[i] = [["Condensation domain",cscores[cprots.index(i)]],["Adenylation domain with integrated oxidase",ampoxscores[ampoxprots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3386 for i in t1pksprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3387 if i in aprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3388 nrpsprots.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3389 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3390 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3391 if i in aprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3392 detdomlist.append(["Adenylation domain",ascores[aprots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3393 elif i in ampoxprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3394 detdomlist.append(["Adenylation domain with integrated oxidase",ampoxscores[ampoxprots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3395 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3396 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3397 if i in aprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3398 detecteddomainsdict[i] = [["Adenylation domain",ascores[aprots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3399 elif i in ampoxprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3400 detecteddomainsdict[i] = [["Adenylation domain with integrated oxidase",ampoxscores[ampoxprots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3401 single_aprots = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3402 single_cprots = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3403 single_pptprots = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3404 pptprots = parsehmmoutput(20,hmmoutputfolder + "PP-binding.txt")[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3405 for i in aprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3406 if i not in nrpsprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3407 single_aprots.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3408 for i in cprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3409 if i not in nrpsprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3410 single_cprots.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3411 for i in pptprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3412 if i not in nrpsprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3413 single_pptprots.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3414 genelist = proteins[2]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3415 genedict = proteins[3]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3416 single_aprots_positions = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3417 single_cprots_positions = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3418 single_pptprots_positions = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3419 nrpsprots_positions = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3420 for j in single_aprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3421 if j in genelist:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3422 protstart_abs = min([int(genedict[j][0]),int(genedict[j][1])])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3423 protend_abs = max([int(genedict[j][0]),int(genedict[j][1])])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3424 single_aprots_positions[j] = int((protend_abs + protstart_abs) / 2)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3425 for j in single_cprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3426 if j in genelist:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3427 protstart_abs = min([int(genedict[j][0]),int(genedict[j][1])])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3428 protend_abs = max([int(genedict[j][0]),int(genedict[j][1])])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3429 single_cprots_positions[j] = int((protend_abs + protstart_abs) / 2)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3430 for j in single_pptprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3431 if j in genelist:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3432 protstart_abs = min([int(genedict[j][0]),int(genedict[j][1])])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3433 protend_abs = max([int(genedict[j][0]),int(genedict[j][1])])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3434 single_pptprots_positions[j] = int((protend_abs + protstart_abs) / 2)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3435 for j in nrpsprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3436 if j in genelist:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3437 protstart_abs = min([int(genedict[j][0]),int(genedict[j][1])])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3438 protend_abs = max([int(genedict[j][0]),int(genedict[j][1])])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3439 nrpsprots_positions[j] = int((protend_abs + protstart_abs) / 2)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3440 nrpsprots2 = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3441 for i in nrpsprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3442 nrpsprots2.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3443 for j in single_aprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3444 include = "n"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3445 pos = single_aprots_positions[j]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3446 for i in single_cprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3447 pos2 = single_cprots_positions[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3448 if abs(pos - pos2) < 20000:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3449 include = "y"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3450 for i in nrpsprots2:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3451 pos2 = nrpsprots_positions[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3452 if abs(pos - pos2) < 20000:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3453 include = "y"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3454 if include == "y":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3455 nrpsprots.append(j)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3456 if detecteddomainsdict.has_key(j):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3457 detdomlist = detecteddomainsdict[j]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3458 if j in aprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3459 detdomlist.append(["Adenylation domain",ascores[aprots.index(j)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3460 elif j in ampoxprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3461 detdomlist.append(["Adenylation domain with integrated oxidase",ampoxscores[ampoxprots.index(j)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3462 detecteddomainsdict[j] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3463 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3464 if j in aprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3465 detecteddomainsdict[j] = [["Adenylation domain",ascores[aprots.index(j)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3466 elif j in ampoxprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3467 detecteddomainsdict[j] = [["Adenylation domain with integrated oxidase",ampoxscores[ampoxprots.index(j)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3468 for j in single_cprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3469 include = "n"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3470 pos = single_cprots_positions[j]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3471 for i in single_aprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3472 pos2 = single_aprots_positions[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3473 if abs(pos - pos2) < 20000:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3474 include = "y"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3475 for i in nrpsprots2:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3476 pos2 = nrpsprots_positions[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3477 if abs(pos - pos2) < 20000:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3478 include = "y"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3479 if include == "y":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3480 nrpsprots.append(j)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3481 if detecteddomainsdict.has_key(j):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3482 detdomlist = detecteddomainsdict[j]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3483 detdomlist.append(["Condensation domain",cscores[cprots.index(j)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3484 detecteddomainsdict[j] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3485 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3486 detecteddomainsdict[j] = [["Condensation domain",cscores[cprots.index(j)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3487 #Extract Terpene synthase proteins, various cut-offs
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3488 terpeneprots = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3489 if 1 in geneclustertypes or 6 in geneclustertypes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3490 terpene = parsehmmoutput(23,hmmoutputfolder + "Terpene_synth_C.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3491 terpeneprots = terpene[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3492 terpenescores = terpene[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3493 for i in terpeneprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3494 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3495 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3496 detdomlist.append(["Terpene synthase, C-terminus",terpenescores[terpeneprots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3497 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3498 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3499 detecteddomainsdict[i] = [["Terpene synthase, C-terminus",terpenescores[terpeneprots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3500 if 1 in geneclustertypes or 6 in geneclustertypes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3501 physqualdata = parsehmmoutput(20,hmmoutputfolder + "phytoene_synt.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3502 physqualprots = physqualdata[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3503 physqualscores = physqualdata[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3504 for i in physqualprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3505 if i not in terpeneprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3506 terpeneprots.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3507 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3508 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3509 detdomlist.append(["Phytoene/squalene synthase",physqualscores[physqualprots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3510 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3511 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3512 detecteddomainsdict[i] = [["Phytoene/squalene synthase",physqualscores[physqualprots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3513 if 1 in geneclustertypes or 6 in geneclustertypes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3514 lycopenedata = parsehmmoutput(80,hmmoutputfolder + "Lycopene_cycl.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3515 lycopeneprots = lycopenedata[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3516 lycopenescores = lycopenedata[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3517 for i in lycopeneprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3518 if i not in terpeneprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3519 terpeneprots.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3520 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3521 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3522 detdomlist.append(["Lycopene cyclase",lycopenescores[lycopeneprots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3523 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3524 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3525 detecteddomainsdict[i] = [["Lycopene cyclase",lycopenescores[lycopeneprots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3526 if 1 in geneclustertypes or 6 in geneclustertypes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3527 terpene_cyclasesdata = parsehmmoutput(50,hmmoutputfolder + "terpene_cyclase.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3528 terpene_cyclases = terpene_cyclasesdata[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3529 terpene_cyclases_scores = terpene_cyclasesdata[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3530 for i in terpene_cyclases:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3531 if i not in terpeneprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3532 terpeneprots.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3533 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3534 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3535 detdomlist.append(["Terpene cyclase",terpene_cyclases_scores[terpene_cyclases.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3536 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3537 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3538 detecteddomainsdict[i] = [["Terpene cyclase",terpene_cyclases_scores[terpene_cyclases.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3539 if 1 in geneclustertypes or 6 in geneclustertypes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3540 NapT7 = parsehmmoutput(250,hmmoutputfolder + "NapT7.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3541 NapT7prots = NapT7[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3542 NapT7scores = NapT7[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3543 for i in NapT7prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3544 if i not in terpeneprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3545 terpeneprots.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3546 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3547 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3548 detdomlist.append(["NapT7",NapT7scores[NapT7prots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3549 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3550 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3551 detecteddomainsdict[i] = [["NapT7",NapT7scores[NapT7prots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3552 if 1 in geneclustertypes or 6 in geneclustertypes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3553 fung_ggpps = parsehmmoutput(420,hmmoutputfolder + "fung_ggpps.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3554 fung_ggppsprots = fung_ggpps[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3555 fung_ggppsscores = fung_ggpps[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3556 for i in fung_ggppsprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3557 if i not in terpeneprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3558 terpeneprots.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3559 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3560 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3561 detdomlist.append(["Fungal geranylgeranyl pyrophosphate synthase, model 1",fung_ggppsscores[fung_ggppsprots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3562 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3563 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3564 detecteddomainsdict[i] = [["Fungal geranylgeranyl pyrophosphate synthase, model 1",fung_ggppsscores[fung_ggppsprots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3565 if 1 in geneclustertypes or 6 in geneclustertypes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3566 fung_ggpps2 = parsehmmoutput(312,hmmoutputfolder + "fung_ggpps2.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3567 fung_ggpps2prots = fung_ggpps2[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3568 fung_ggpps2scores = fung_ggpps2[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3569 for i in fung_ggpps2prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3570 if i not in terpeneprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3571 terpeneprots.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3572 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3573 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3574 detdomlist.append(["Fungal geranylgeranyl pyrophosphate synthase, model 2",fung_ggpps2scores[fung_ggpps2prots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3575 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3576 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3577 detecteddomainsdict[i] = [["Fungal geranylgeranyl pyrophosphate synthase, model 2",fung_ggpps2scores[fung_ggpps2prots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3578 if 1 in geneclustertypes or 6 in geneclustertypes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3579 dmat = parsehmmoutput(200,hmmoutputfolder + "dmat.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3580 dmatprots = dmat[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3581 dmatscores = dmat[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3582 for i in dmatprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3583 if i not in terpeneprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3584 terpeneprots.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3585 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3586 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3587 detdomlist.append(["Dimethylallyl tryptophan synthase",dmatscores[dmatprots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3588 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3589 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3590 detecteddomainsdict[i] = [["Dimethylallyl tryptophan synthase",dmatscores[dmatprots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3591 if 1 in geneclustertypes or 6 in geneclustertypes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3592 trichodiene_synth = parsehmmoutput(150,hmmoutputfolder + "trichodiene_synth.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3593 trichodiene_synthprots = trichodiene_synth[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3594 trichodiene_synthscores = trichodiene_synth[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3595 for i in trichodiene_synthprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3596 if i not in terpeneprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3597 terpeneprots.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3598 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3599 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3600 detdomlist.append(["Trichodiene synthase",trichodiene_synthscores[trichodiene_synthprots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3601 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3602 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3603 detecteddomainsdict[i] = [["Trichodiene synthase",trichodiene_synthscores[trichodiene_synthprots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3604 #Extract lantibiotic proteins, LanC cut-off: 80, Lant_dehN & Lant_dehC combination cut-off: 20 each
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3605 lantprots = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3606 if 1 in geneclustertypes or 7 in geneclustertypes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3607 lantc = parsehmmoutput(80,hmmoutputfolder + "LANC_like.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3608 lancprots = lantc[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3609 lancscores = lantc[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3610 landehn = parsehmmoutput(20,hmmoutputfolder + "Lant_dehyd_N.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3611 landehnprots = landehn[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3612 landehnscores = landehn[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3613 landehc = parsehmmoutput(20,hmmoutputfolder + "Lant_dehyd_C.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3614 landehcprots = landehc[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3615 landehcscores = landehc[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3616 lanti1 = parsehmmoutput(20,hmmoutputfolder + "Antimicrobial18.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3617 lanti1prots = lanti1[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3618 lanti1scores = lanti1[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3619 lanti2 = parsehmmoutput(20,hmmoutputfolder + "Gallidermin.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3620 lanti2prots = lanti2[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3621 lanti2scores = lanti2[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3622 lanti3 = parsehmmoutput(20,hmmoutputfolder + "L_biotic_typeA.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3623 lanti3prots = lanti3[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3624 lanti3scores = lanti3[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3625 lanti4 = parsehmmoutput(20,hmmoutputfolder + "LE-DUF.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3626 lanti4prots = lanti4[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3627 lanti4scores = lanti4[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3628 lanti5 = parsehmmoutput(20,hmmoutputfolder + "LE-LAC481.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3629 lanti5prots = lanti5[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3630 lanti5scores = lanti5[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3631 lanti6 = parsehmmoutput(20,hmmoutputfolder + "LE-LanBC.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3632 lanti6prots = lanti6[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3633 lanti6scores = lanti6[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3634 lanti7 = parsehmmoutput(20,hmmoutputfolder + "LE-MER+2PEP.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3635 lanti7prots = lanti7[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3636 lanti7scores = lanti7[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3637 lanti8 = parsehmmoutput(20,hmmoutputfolder + "MA-2PEPA.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3638 lanti8prots = lanti8[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3639 lanti8scores = lanti8[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3640 lanti9 = parsehmmoutput(20,hmmoutputfolder + "MA-DUF.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3641 lanti9prots = lanti9[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3642 lanti9scores = lanti9[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3643 lanti10 = parsehmmoutput(20,hmmoutputfolder + "MA-EPI.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3644 lanti10prots = lanti10[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3645 lanti10scores = lanti10[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3646 lanti11 = parsehmmoutput(20,hmmoutputfolder + "MA-LAC481.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3647 lanti11prots = lanti11[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3648 lanti11scores = lanti11[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3649 lanti12 = parsehmmoutput(20,hmmoutputfolder + "MA-NIS+EPI.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3650 lanti12prots = lanti12[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3651 lanti12scores = lanti12[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3652 lanti13 = parsehmmoutput(20,hmmoutputfolder + "MA-NIS.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3653 lanti13prots = lanti13[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3654 lanti13scores = lanti13[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3655 lanti14 = parsehmmoutput(18,hmmoutputfolder + "TIGR03731.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3656 lanti14prots = lanti14[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3657 lanti14scores = lanti14[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3658 lantiprots = lanti1prots + lanti2prots + lanti3prots + lanti4prots + lanti5prots + lanti6prots + lanti7prots + lanti8prots + lanti9prots + lanti10prots + lanti11prots + lanti12prots + lanti13prots + lanti14prots
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3659 lantiprots2 = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3660 for i in lantiprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3661 if i not in lantiprots2:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3662 lantiprots2.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3663 lantiprots = lantiprots2
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3664 for i in lancprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3665 lantprots.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3666 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3667 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3668 detdomlist.append(["LanC lanthionine synthase domain",lancscores[lancprots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3669 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3670 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3671 detecteddomainsdict[i] = [["LanC lanthionine synthase domain",lancscores[lancprots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3672 for i in landehnprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3673 if i in landehcprots and i not in lantprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3674 lantprots.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3675 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3676 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3677 detdomlist.append(["Lantibiotic dehydratase, N-terminus",landehnscores[landehnprots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3678 detdomlist.append(["Lantibiotic dehydratase, C-terminus",landehcscores[landehcprots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3679 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3680 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3681 detecteddomainsdict[i] = [["Lantibiotic dehydratase, N-terminus",landehnscores[landehnprots.index(i)]],["Lantibiotic dehydratase, C-terminus",landehcscores[landehcprots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3682 for i in lantiprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3683 if i not in lantprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3684 lantprots.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3685 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3686 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3687 if i in lanti1prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3688 detdomlist.append(["Antimicrobial18 domain",lanti1scores[lanti1prots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3689 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3690 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3691 if i in lanti1prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3692 detecteddomainsdict[i] = [["Antimicrobial18 domain",lanti1scores[lanti1prots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3693 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3694 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3695 if i in lanti2prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3696 detdomlist.append(["Gallidermin domain",lanti2scores[lanti2prots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3697 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3698 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3699 if i in lanti2prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3700 detecteddomainsdict[i] = [["Gallidermin domain",lanti2scores[lanti2prots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3701 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3702 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3703 if i in lanti3prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3704 detdomlist.append(["L_biotic_typeA domain",lanti3scores[lanti3prots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3705 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3706 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3707 if i in lanti3prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3708 detecteddomainsdict[i] = [["L_biotic_typeA domain",lanti3scores[lanti3prots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3709 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3710 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3711 if i in lanti4prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3712 detdomlist.append(["LE-DUF domain",lanti4scores[lanti4prots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3713 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3714 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3715 if i in lanti4prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3716 detecteddomainsdict[i] = [["LE-DUF domain",lanti4scores[lanti4prots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3717 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3718 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3719 if i in lanti5prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3720 detdomlist.append(["LE-LAC481 domain",lanti5scores[lanti5prots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3721 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3722 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3723 if i in lanti5prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3724 detecteddomainsdict[i] = [["LE-LAC481 domain",lanti5scores[lanti5prots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3725 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3726 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3727 if i in lanti6prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3728 detdomlist.append(["LE-LanBC domain",lanti6scores[lanti6prots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3729 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3730 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3731 if i in lanti6prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3732 detecteddomainsdict[i] = [["LE-LanBC domain",lanti6scores[lanti6prots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3733 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3734 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3735 if i in lanti7prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3736 detdomlist.append(["LE-MER+2PEP domain",lanti7scores[lanti7prots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3737 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3738 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3739 if i in lanti7prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3740 detecteddomainsdict[i] = [["LE-MER+2PEP domain",lanti7scores[lanti7prots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3741 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3742 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3743 if i in lanti8prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3744 detdomlist.append(["MA-2PEPA domain",lanti8scores[lanti8prots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3745 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3746 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3747 if i in lanti8prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3748 detecteddomainsdict[i] = [["MA-2PEPA domain",lanti8scores[lanti8prots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3749 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3750 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3751 if i in lanti9prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3752 detdomlist.append(["MA-DUF domain",lanti9scores[lanti9prots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3753 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3754 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3755 if i in lanti9prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3756 detecteddomainsdict[i] = [["MA-DUF domain",lanti9scores[lanti9prots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3757 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3758 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3759 if i in lanti10prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3760 detdomlist.append(["MA-EPI domain",lanti10scores[lanti10prots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3761 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3762 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3763 if i in lanti10prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3764 detecteddomainsdict[i] = [["MA-EPI domain",lanti10scores[lanti10prots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3765 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3766 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3767 if i in lanti11prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3768 detdomlist.append(["MA-LAC481 domain",lanti11scores[lanti11prots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3769 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3770 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3771 if i in lanti11prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3772 detecteddomainsdict[i] = [["MA-LAC481 domain",lanti11scores[lanti11prots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3773 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3774 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3775 if i in lanti12prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3776 detdomlist.append(["MA-NIS+EPI domain",lanti12scores[lanti12prots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3777 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3778 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3779 if i in lanti12prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3780 detecteddomainsdict[i] = [["MA-NIS+EPI domain",lanti12scores[lanti12prots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3781 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3782 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3783 if i in lanti13prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3784 detdomlist.append(["MA-NIS domain",lanti13scores[lanti13prots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3785 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3786 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3787 if i in lanti13prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3788 detecteddomainsdict[i] = [["MA-NIS domain",lanti13scores[lanti13prots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3789 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3790 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3791 if i in lanti14prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3792 detdomlist.append(["TIGR03731: lantibiotic, gallidermin/nisin family",lanti14scores[lanti14prots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3793 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3794 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3795 if i in lanti14prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3796 detecteddomainsdict[i] = [["TIGR03731: lantibiotic, gallidermin/nisin family",lanti14scores[lanti14prots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3797 #Bacteriocin proteins, various cut-offs
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3798 bcinprots = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3799 if 1 in geneclustertypes or 8 in geneclustertypes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3800 bcin1prots = parsehmmoutput(50,hmmoutputfolder + "strepbact.txt")[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3801 bcin2prots = parsehmmoutput(90,hmmoutputfolder + "Antimicrobial14.txt")[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3802 bcin3prots = parsehmmoutput(23,hmmoutputfolder + "Bacteriocin_IId.txt")[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3803 bcin4prots = parsehmmoutput(92,hmmoutputfolder + "BacteriocIIc_cy.txt")[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3804 bcin5prots = parsehmmoutput(40,hmmoutputfolder + "Bacteriocin_II.txt")[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3805 bcin6prots = parsehmmoutput(24,hmmoutputfolder + "Lactococcin.txt")[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3806 bcin7prots = parsehmmoutput(31,hmmoutputfolder + "Antimicrobial17.txt")[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3807 bcin8prots = parsehmmoutput(25,hmmoutputfolder + "Lactococcin_972.txt")[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3808 bcin9prots = parsehmmoutput(27,hmmoutputfolder + "Bacteriocin_IIc.txt")[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3809 bcin10prots = parsehmmoutput(78,hmmoutputfolder + "LcnG-beta.txt")[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3810 bcin11prots = parsehmmoutput(56,hmmoutputfolder + "Bacteriocin_IIi.txt")[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3811 bcin12prots = parsehmmoutput(98,hmmoutputfolder + "Subtilosin_A.txt")[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3812 bcin13prots = parsehmmoutput(27,hmmoutputfolder + "Cloacin.txt")[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3813 bcin14prots = parsehmmoutput(25,hmmoutputfolder + "Linocin_M18.txt")[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3814 bcin15prots = parsehmmoutput(150,hmmoutputfolder + "TIGR03603.txt")[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3815 bcin16prots = parsehmmoutput(440,hmmoutputfolder + "TIGR03604.txt")[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3816 bcin17prots = parsehmmoutput(200,hmmoutputfolder + "TIGR03605.txt")[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3817 bcin18prots = parsehmmoutput(18,hmmoutputfolder + "TIGR03651.txt")[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3818 bcin19prots = parsehmmoutput(35,hmmoutputfolder + "TIGR03678.txt")[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3819 bcin20prots = parsehmmoutput(400,hmmoutputfolder + "TIGR03693.txt")[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3820 bcin21prots = parsehmmoutput(16,hmmoutputfolder + "TIGR03798.txt")[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3821 bcin22prots = parsehmmoutput(150,hmmoutputfolder + "TIGR03882.txt")[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3822 bcin23prots = parsehmmoutput(50,hmmoutputfolder + "TIGR03601.txt")[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3823 bcin24prots = parsehmmoutput(50,hmmoutputfolder + "TIGR03602.txt")[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3824 bcin25prots = parsehmmoutput(20,hmmoutputfolder + "mvnA.txt")[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3825 bcin26prots = parsehmmoutput(20,hmmoutputfolder + "thiostrepton.txt")[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3826 bcin1scores = parsehmmoutput(50,hmmoutputfolder + "strepbact.txt")[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3827 bcin2scores = parsehmmoutput(90,hmmoutputfolder + "Antimicrobial14.txt")[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3828 bcin3scores = parsehmmoutput(23,hmmoutputfolder + "Bacteriocin_IId.txt")[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3829 bcin4scores = parsehmmoutput(92,hmmoutputfolder + "BacteriocIIc_cy.txt")[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3830 bcin5scores = parsehmmoutput(40,hmmoutputfolder + "Bacteriocin_II.txt")[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3831 bcin6scores = parsehmmoutput(24,hmmoutputfolder + "Lactococcin.txt")[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3832 bcin7scores = parsehmmoutput(31,hmmoutputfolder + "Antimicrobial17.txt")[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3833 bcin8scores = parsehmmoutput(25,hmmoutputfolder + "Lactococcin_972.txt")[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3834 bcin9scores = parsehmmoutput(27,hmmoutputfolder + "Bacteriocin_IIc.txt")[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3835 bcin10scores = parsehmmoutput(78,hmmoutputfolder + "LcnG-beta.txt")[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3836 bcin11scores = parsehmmoutput(56,hmmoutputfolder + "Bacteriocin_IIi.txt")[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3837 bcin12scores = parsehmmoutput(98,hmmoutputfolder + "Subtilosin_A.txt")[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3838 bcin13scores = parsehmmoutput(27,hmmoutputfolder + "Cloacin.txt")[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3839 bcin14scores = parsehmmoutput(25,hmmoutputfolder + "Linocin_M18.txt")[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3840 bcin15scores = parsehmmoutput(150,hmmoutputfolder + "TIGR03603.txt")[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3841 bcin16scores = parsehmmoutput(440,hmmoutputfolder + "TIGR03604.txt")[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3842 bcin17scores = parsehmmoutput(200,hmmoutputfolder + "TIGR03605.txt")[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3843 bcin18scores = parsehmmoutput(18,hmmoutputfolder + "TIGR03651.txt")[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3844 bcin19scores = parsehmmoutput(35,hmmoutputfolder + "TIGR03678.txt")[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3845 bcin20scores = parsehmmoutput(400,hmmoutputfolder + "TIGR03693.txt")[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3846 bcin21scores = parsehmmoutput(16,hmmoutputfolder + "TIGR03798.txt")[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3847 bcin22scores = parsehmmoutput(150,hmmoutputfolder + "TIGR03882.txt")[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3848 bcin23scores = parsehmmoutput(50,hmmoutputfolder + "TIGR03601.txt")[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3849 bcin24scores = parsehmmoutput(50,hmmoutputfolder + "TIGR03602.txt")[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3850 bcin25scores = parsehmmoutput(20,hmmoutputfolder + "mvnA.txt")[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3851 bcin26scores = parsehmmoutput(20,hmmoutputfolder + "thiostrepton.txt")[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3852 bcinprots = bcin1prots + bcin2prots + bcin3prots + bcin4prots + bcin5prots + bcin6prots + bcin7prots + bcin8prots + bcin9prots + bcin10prots + bcin11prots + bcin12prots + bcin13prots + bcin14prots + bcin15prots + bcin16prots + bcin17prots + bcin18prots + bcin19prots + bcin20prots + bcin21prots + bcin22prots + bcin23prots + bcin24prots + bcin25prots + bcin26prots
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3853 bcinprots2 = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3854 for i in bcinprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3855 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3856 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3857 if i in bcin1prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3858 detdomlist.append(["Putative Streptomyces bacteriocin",bcin1scores[bcin1prots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3859 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3860 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3861 if i in bcin1prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3862 detecteddomainsdict[i] = [["Putative Streptomyces bacteriocin",bcin1scores[bcin1prots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3863 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3864 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3865 if i in bcin2prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3866 detdomlist.append(["Antimicrobial14 domain",bcin2scores[bcin2prots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3867 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3868 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3869 if i in bcin2prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3870 detecteddomainsdict[i] = [["Antimicrobial14 domain",bcin2scores[bcin2prots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3871 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3872 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3873 if i in bcin3prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3874 detdomlist.append(["Bacteriocin_IId domain",bcin3scores[bcin3prots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3875 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3876 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3877 if i in bcin3prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3878 detecteddomainsdict[i] = [["Bacteriocin_IId domain",bcin3scores[bcin3prots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3879 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3880 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3881 if i in bcin4prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3882 detdomlist.append(["BacteriocIIc_cy domain",bcin4scores[bcin4prots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3883 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3884 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3885 if i in bcin4prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3886 detecteddomainsdict[i] = [["BacteriocIIc_cy domain",bcin4scores[bcin4prots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3887 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3888 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3889 if i in bcin5prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3890 detdomlist.append(["Bacteriocin_II domain",bcin5scores[bcin5prots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3891 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3892 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3893 if i in bcin5prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3894 detecteddomainsdict[i] = [["Bacteriocin_II domain",bcin5scores[bcin5prots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3895 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3896 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3897 if i in bcin6prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3898 detdomlist.append(["Lactococcin",bcin6scores[bcin6prots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3899 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3900 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3901 if i in bcin6prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3902 detecteddomainsdict[i] = [["Lactococcin",bcin6scores[bcin6prots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3903 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3904 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3905 if i in bcin7prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3906 detdomlist.append(["Antimicrobial17 domain",bcin7scores[bcin7prots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3907 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3908 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3909 if i in bcin7prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3910 detecteddomainsdict[i] = [["Antimicrobial17 domain",bcin7scores[bcin7prots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3911 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3912 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3913 if i in bcin8prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3914 detdomlist.append(["Lactococcin_972 domain",bcin8scores[bcin8prots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3915 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3916 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3917 if i in bcin8prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3918 detecteddomainsdict[i] = [["Lactococcin_972 domain",bcin8scores[bcin8prots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3919 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3920 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3921 if i in bcin9prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3922 detdomlist.append(["Bacteriocin_IIc domain",bcin9scores[bcin9prots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3923 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3924 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3925 if i in bcin9prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3926 detecteddomainsdict[i] = [["Bacteriocin_IIc domain",bcin9scores[bcin9prots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3927 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3928 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3929 if i in bcin10prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3930 detdomlist.append(["LcnG-beta domain",bcin10scores[bcin10prots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3931 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3932 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3933 if i in bcin10prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3934 detecteddomainsdict[i] = [["LcnG-beta domain",bcin10scores[bcin10prots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3935 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3936 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3937 if i in bcin11prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3938 detdomlist.append(["Bacteriocin_IIi domain",bcin11scores[bcin11prots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3939 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3940 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3941 if i in bcin11prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3942 detecteddomainsdict[i] = [["Bacteriocin_IIi domain",bcin11scores[bcin11prots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3943 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3944 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3945 if i in bcin12prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3946 detdomlist.append(["Subtilosin_A domain",bcin12scores[bcin12prots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3947 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3948 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3949 if i in bcin12prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3950 detecteddomainsdict[i] = [["Subtilosin_A domain",bcin12scores[bcin12prots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3951 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3952 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3953 if i in bcin13prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3954 detdomlist.append(["Cloacin domain",bcin13scores[bcin13prots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3955 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3956 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3957 if i in bcin13prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3958 detecteddomainsdict[i] = [["Cloacin domain",bcin13scores[bcin13prots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3959 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3960 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3961 if i in bcin14prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3962 detdomlist.append(["Linocin_M18 domain",bcin14scores[bcin14prots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3963 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3964 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3965 if i in bcin14prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3966 detecteddomainsdict[i] = [["Linocin_M18 domain",bcin14scores[bcin14prots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3967 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3968 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3969 if i in bcin15prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3970 detdomlist.append(["TIGR03603: bacteriocin biosynthesis cyclodehydratase",bcin15scores[bcin15prots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3971 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3972 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3973 if i in bcin15prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3974 detecteddomainsdict[i] = [["TIGR03603: bacteriocin biosynthesis cyclodehydratase",bcin15scores[bcin15prots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3975 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3976 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3977 if i in bcin16prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3978 detdomlist.append(["TGIR03604: bacteriocin biosynthesis docking scaffold",bcin16scores[bcin16prots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3979 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3980 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3981 if i in bcin16prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3982 detecteddomainsdict[i] = [["TGIR03604: bacteriocin biosynthesis docking scaffold",bcin16scores[bcin16prots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3983 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3984 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3985 if i in bcin17prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3986 detdomlist.append(["TGIR03605: SagB-type dehydrogenase",bcin17scores[bcin17prots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3987 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3988 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3989 if i in bcin17prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3990 detecteddomainsdict[i] = [["TGIR03605: SagB-type dehydrogenase",bcin17scores[bcin17prots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3991 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3992 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3993 if i in bcin18prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3994 detdomlist.append(["TIGR03651: bacteriocin, circularin A/uberolysin family",bcin18scores[bcin18prots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3995 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3996 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3997 if i in bcin18prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3998 detecteddomainsdict[i] = [["TIGR03651: bacteriocin, circularin A/uberolysin family",bcin18scores[bcin18prots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
3999 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4000 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4001 if i in bcin19prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4002 detdomlist.append(["TIGR03678: bacteriocin, microcyclamide/patellamide family",bcin19scores[bcin19prots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4003 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4004 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4005 if i in bcin19prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4006 detecteddomainsdict[i] = [["TIGR03678: bacteriocin, microcyclamide/patellamide family",bcin19scores[bcin19prots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4007 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4008 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4009 if i in bcin20prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4010 detdomlist.append(["TIGR03693: thiazole-containing bacteriocin maturation protein",bcin20scores[bcin20prots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4011 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4012 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4013 if i in bcin20prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4014 detecteddomainsdict[i] = [["TIGR03693: thiazole-containing bacteriocin maturation protein",bcin20scores[bcin20prots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4015 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4016 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4017 if i in bcin21prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4018 detdomlist.append(["TIGR03798: bacteriocin propeptide",bcin21scores[bcin21prots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4019 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4020 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4021 if i in bcin21prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4022 detecteddomainsdict[i] = [["TIGR03798: bacteriocin propeptide",bcin21scores[bcin21prots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4023 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4024 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4025 if i in bcin22prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4026 detdomlist.append(["TIGR03882: bacteriocin biosynthesis cyclodehydratase",bcin22scores[bcin22prots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4027 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4028 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4029 if i in bcin22prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4030 detecteddomainsdict[i] = [["TIGR03882: bacteriocin biosynthesis cyclodehydratase",bcin22scores[bcin22prots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4031 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4032 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4033 if i in bcin23prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4034 detdomlist.append(["TIGR03601: bacteriocin, BA_2677 family",bcin23scores[bcin23prots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4035 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4036 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4037 if i in bcin23prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4038 detecteddomainsdict[i] = [["TIGR03601: bacteriocin, BA_2677 family",bcin23scores[bcin23prots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4039 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4040 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4041 if i in bcin24prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4042 detdomlist.append(["TIGR03602: bacteriocin protoxin, streptolysin S family",bcin24scores[bcin24prots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4043 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4044 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4045 if i in bcin24prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4046 detecteddomainsdict[i] = [["TIGR03602: bacteriocin protoxin, streptolysin S family",bcin24scores[bcin24prots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4047 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4048 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4049 if i in bcin25prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4050 detdomlist.append(["Bacteriocin, microviridin family",bcin25scores[bcin25prots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4051 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4052 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4053 if i in bcin25prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4054 detecteddomainsdict[i] = [["Bacteriocin, microviridin family",bcin25scores[bcin25prots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4055 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4056 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4057 if i in bcin26prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4058 detdomlist.append(["Thiopeptide, thiostrepton-like",bcin26scores[bcin26prots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4059 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4060 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4061 if i in bcin26prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4062 detecteddomainsdict[i] = [["Thiopeptide, thiostrepton-like",bcin26scores[bcin26prots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4063 if i not in bcinprots2:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4064 bcinprots2.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4065 bcinprots = bcinprots2
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4066 #Extract beta-lactam synthetase proteins, cut-off: 250
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4067 lactamprots = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4068 if 1 in geneclustertypes or 9 in geneclustertypes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4069 bls = parsehmmoutput(250,hmmoutputfolder + "BLS.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4070 blsprots = bls[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4071 blsscores = bls[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4072 for i in bls[0]:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4073 lactamprots.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4074 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4075 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4076 detdomlist.append(["Beta-lactam synthase",blsscores[blsprots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4077 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4078 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4079 detecteddomainsdict[i] = [["Beta-lactam synthase",blsscores[blsprots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4080 cas = parsehmmoutput(250,hmmoutputfolder + "CAS.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4081 casprots = cas[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4082 casscores = cas[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4083 for i in cas[0]:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4084 if i not in lactamprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4085 lactamprots.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4086 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4087 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4088 detdomlist.append(["Clavulanic acid synthase-like",casscores[casprots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4089 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4090 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4091 detecteddomainsdict[i] = [["Clavulanic acid synthase-like",casscores[casprots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4092 tabtoxin = parsehmmoutput(500,hmmoutputfolder + "tabtoxin.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4093 tabtoxinprots = tabtoxin[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4094 tabtoxinscores = tabtoxin[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4095 for i in tabtoxin[0]:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4096 if i not in lactamprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4097 lactamprots.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4098 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4099 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4100 detdomlist.append(["Tabtoxin synthase-like",tabtoxinscores[tabtoxinprots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4101 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4102 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4103 detecteddomainsdict[i] = [["Tabtoxin synthase-like",tabtoxinscores[tabtoxinprots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4104 #Extract aminoglycoside / aminocyclitol biosynthesis clusters, clusters taken from Flatt & Mahmud et al. 2007
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4105 amglyccyclprots = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4106 if 1 in geneclustertypes or 10 in geneclustertypes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4107 strH = parsehmmoutput(200,hmmoutputfolder + "strH_like.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4108 strhprots = strH[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4109 strhscores = strH[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4110 for i in strH[0]:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4111 amglyccyclprots.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4112 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4113 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4114 detdomlist.append(["StrH-like glycosyltransferase",strhscores[strhprots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4115 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4116 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4117 detecteddomainsdict[i] = [["StrH-like glycosyltransferase",strhscores[strhprots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4118 strK1 = parsehmmoutput(800,hmmoutputfolder + "strK_like1.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4119 strk1prots = strK1[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4120 strk1scores = strK1[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4121 for i in strK1[0]:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4122 amglyccyclprots.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4123 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4124 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4125 detdomlist.append(["StrK-like phosphatase",strk1scores[strk1prots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4126 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4127 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4128 detecteddomainsdict[i] = [["StrK-like phosphatase",strk1scores[strk1prots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4129 strK2 = parsehmmoutput(650,hmmoutputfolder + "strK_like2.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4130 strk2prots = strK2[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4131 strk2scores = strK2[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4132 for i in strK2[0]:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4133 amglyccyclprots.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4134 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4135 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4136 detdomlist.append(["StrK-like phosphatase, model 2",strk2scores[strk2prots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4137 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4138 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4139 detecteddomainsdict[i] = [["StrK-like phosphatase, model 2",strk2scores[strk2prots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4140 neoL = parsehmmoutput(50,hmmoutputfolder + "neoL_like.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4141 neolprots = neoL[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4142 neolscores = neoL[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4143 for i in neoL[0]:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4144 amglyccyclprots.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4145 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4146 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4147 detdomlist.append(["NeoL-like deacetylase",neolscores[neolprots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4148 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4149 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4150 detecteddomainsdict[i] = [["NeoL-like deacetylase",neolscores[neolprots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4151 DOIS = parsehmmoutput(500,hmmoutputfolder + "DOIS.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4152 doisprots = DOIS[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4153 doisscores = DOIS[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4154 for i in DOIS[0]:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4155 amglyccyclprots.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4156 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4157 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4158 detdomlist.append(["2-deoxy-scyllo-inosose synthase",doisscores[doisprots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4159 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4160 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4161 detecteddomainsdict[i] = [["2-deoxy-scyllo-inosose synthase",doisscores[doisprots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4162 valA = parsehmmoutput(600,hmmoutputfolder + "valA_like.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4163 valaprots = valA[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4164 valascores = valA[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4165 for i in valA[0]:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4166 amglyccyclprots.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4167 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4168 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4169 detdomlist.append(["2-epi-5-epi-valiolone synthase, ValA-like",valascores[valaprots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4170 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4171 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4172 detecteddomainsdict[i] = [["2-epi-5-epi-valiolone synthase, ValA-like",valascores[valaprots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4173 spcFG = parsehmmoutput(200,hmmoutputfolder + "spcFG_like.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4174 spcfgprots = spcFG[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4175 spcfgscores = spcFG[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4176 for i in spcFG[0]:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4177 amglyccyclprots.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4178 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4179 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4180 detdomlist.append(["SpcF/SpcG-like glycosyltransferase",spcfgscores[spcfgprots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4181 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4182 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4183 detecteddomainsdict[i] = [["SpcF/SpcG-like glycosyltransferase",spcfgscores[spcfgprots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4184 spcDK_glyc = parsehmmoutput(600,hmmoutputfolder + "spcDK_like_glyc.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4185 spcdkglycprots = spcDK_glyc[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4186 spcdkglycscores = spcDK_glyc[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4187 for i in spcDK_glyc[0]:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4188 amglyccyclprots.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4189 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4190 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4191 detdomlist.append(["SpcD/SpcK-like thymidylyltransferase",spcdkglycscores[spcdkglycprots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4192 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4193 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4194 detecteddomainsdict[i] = [["SpcD/SpcK-like thymidylyltransferase",spcdkglycscores[spcdkglycprots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4195 salQ = parsehmmoutput(480,hmmoutputfolder + "salQ.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4196 salqprots = salQ[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4197 salqscores = salQ[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4198 for i in salqprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4199 amglyccyclprots.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4200 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4201 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4202 detdomlist.append(["2-epi-5-epi-valiolone synthase, SalQ-like",salqscores[salqprots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4203 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4204 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4205 detecteddomainsdict[i] = [["2-epi-5-epi-valiolone synthase, SalQ-like",salqscores[salqprots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4206 #Extract aminocoumarin biosynthesis clusters
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4207 aminocoumarinprots = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4208 if 1 in geneclustertypes or 11 in geneclustertypes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4209 novK = parsehmmoutput(200,hmmoutputfolder + "novK.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4210 novkprots = novK[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4211 novkscores = novK[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4212 for i in novkprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4213 aminocoumarinprots.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4214 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4215 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4216 detdomlist.append(["NovK-like reductase",novkscores[novkprots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4217 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4218 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4219 detecteddomainsdict[i] = [["NovK-like reductase",novkscores[novkprots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4220 novJ = parsehmmoutput(350,hmmoutputfolder + "novJ.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4221 novjprots = novJ[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4222 novjscores = novJ[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4223 for i in novjprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4224 aminocoumarinprots.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4225 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4226 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4227 detdomlist.append(["NovJ-like reductase",novjscores[novjprots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4228 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4229 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4230 detecteddomainsdict[i] = [["NovJ-like reductase",novjscores[novjprots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4231 novI = parsehmmoutput(600,hmmoutputfolder + "novI.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4232 noviprots = novI[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4233 noviscores = novI[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4234 for i in noviprots :
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4235 aminocoumarinprots.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4236 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4237 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4238 detdomlist.append(["NovI-like cytochrome P450",noviscores[noviprots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4239 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4240 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4241 detecteddomainsdict[i] = [["NovI-like cytochrome P450",noviscores[noviprots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4242 novH = parsehmmoutput(750,hmmoutputfolder + "novH.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4243 novhprots = novH[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4244 novhscores = novH[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4245 for i in novhprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4246 aminocoumarinprots.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4247 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4248 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4249 detdomlist.append(["NovH-like protein",novhscores[novhprots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4250 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4251 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4252 detecteddomainsdict[i] = [["NovH-like protein",novhscores[novhprots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4253 spcDK_like_cou = parsehmmoutput(600,hmmoutputfolder + "spcDK_like_cou.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4254 spcDK_like_cou_prots = spcDK_like_cou[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4255 spcDK_like_cou_scores = spcDK_like_cou[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4256 for i in spcDK_like_cou_prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4257 aminocoumarinprots.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4258 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4259 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4260 detdomlist.append(["SpcD/SpcK-like thymidylyltransferase, aminocoumarins group",spcDK_like_cou_scores[spcDK_like_cou_prots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4261 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4262 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4263 detecteddomainsdict[i] = [["SpcD/SpcK-like thymidylyltransferase, aminocoumarins group",spcDK_like_cou_scores[spcDK_like_cou_prots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4264 #Extract siderophores biosynthesis proteins, IucA/C and AlcB
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4265 siderophoreprots = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4266 if 1 in geneclustertypes or 12 in geneclustertypes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4267 siderophore = parsehmmoutput(30,hmmoutputfolder + "IucA_IucC.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4268 siderophoreprots = siderophore[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4269 siderophorescores = siderophore[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4270 for i in siderophoreprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4271 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4272 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4273 detdomlist.append(["IucA-IucC domain",siderophorescores[siderophoreprots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4274 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4275 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4276 detecteddomainsdict[i] = [["IucA-IucC domain",siderophorescores[siderophoreprots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4277 #Extract ectoine biosynthesis proteins
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4278 ectprots = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4279 if 1 in geneclustertypes or 13 in geneclustertypes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4280 ect = parsehmmoutput(35,hmmoutputfolder + "ectoine_synt.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4281 ectprots = ect[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4282 ectscores = ect[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4283 for i in ectprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4284 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4285 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4286 detdomlist.append(["Ectoine synthase",ectscores[ectprots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4287 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4288 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4289 detecteddomainsdict[i] = [["Ectoine synthase",ectscores[ectprots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4290 #Extract butyrolactone biosynthesis proteins
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4291 butyrprots = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4292 if 1 in geneclustertypes or 14 in geneclustertypes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4293 butyr= parsehmmoutput(25,hmmoutputfolder + "AfsA.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4294 butyrprots = butyr[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4295 butyrscores = butyr[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4296 for i in butyrprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4297 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4298 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4299 detdomlist.append(["AfsA butyrolactone synthesis domain",butyrscores[butyrprots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4300 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4301 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4302 detecteddomainsdict[i] = [["AfsA butyrolactone synthesis domain",butyrscores[butyrprots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4303 #Extract indole biosynthesis proteins
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4304 indoleprots = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4305 if 1 in geneclustertypes or 15 in geneclustertypes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4306 indole = parsehmmoutput(100,hmmoutputfolder + "indsynth.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4307 indoleprots = indole[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4308 indolescores = indole[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4309 for i in indoleprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4310 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4311 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4312 detdomlist.append(["StaD-like chromopyrrolic acid synthase domain",indolescores[indoleprots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4313 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4314 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4315 detecteddomainsdict[i] = [["StaD-like chromopyrrolic acid synthase domain",indolescores[indoleprots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4316 #Extract nucleoside antibiotic biosynthesis proteins
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4317 nucleoprots = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4318 if 1 in geneclustertypes or 16 in geneclustertypes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4319 nucleoprots = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4320 lipm = parsehmmoutput(50,hmmoutputfolder + "LipM.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4321 lipmprots = lipm[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4322 lipmscores = lipm[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4323 lipu = parsehmmoutput(30,hmmoutputfolder + "LipU.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4324 lipuprots = lipu[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4325 lipuscores = lipu[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4326 lipv = parsehmmoutput(375,hmmoutputfolder + "LipV.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4327 lipvprots = lipv[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4328 lipvscores = lipv[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4329 toyb = parsehmmoutput(175,hmmoutputfolder + "ToyB.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4330 toybprots = toyb[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4331 toybscores = toyb[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4332 tund = parsehmmoutput(200,hmmoutputfolder + "TunD.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4333 tundprots = tund[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4334 tundscores = tund[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4335 pur6 = parsehmmoutput(200,hmmoutputfolder + "pur6.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4336 pur6prots = pur6[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4337 pur6scores = pur6[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4338 pur10 = parsehmmoutput(600,hmmoutputfolder + "pur10.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4339 pur10prots = pur10[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4340 pur10scores = pur10[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4341 nikj = parsehmmoutput(200,hmmoutputfolder + "nikJ.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4342 nikjprots = nikj[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4343 nikjscores = nikj[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4344 niko = parsehmmoutput(400,hmmoutputfolder + "nikO.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4345 nikoprots = niko[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4346 nikoscores = niko[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4347 for i in lipmprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4348 if i not in nucleoprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4349 nucleoprots.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4350 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4351 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4352 detdomlist.append(["LipM-like nucleotidyltransferase",lipmscores[lipmprots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4353 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4354 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4355 detecteddomainsdict[i] = [["LipM-like nucleotidyltransferase",lipmscores[lipmprots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4356 for i in lipuprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4357 if i not in nucleoprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4358 nucleoprots.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4359 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4360 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4361 detdomlist.append(["LipU-like protein",lipuscores[lipuprots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4362 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4363 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4364 detecteddomainsdict[i] = [["LipU-like protein",lipuscores[lipuprots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4365 for i in lipvprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4366 if i not in nucleoprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4367 nucleoprots.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4368 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4369 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4370 detdomlist.append(["LipV-like dehydrogenase",lipvscores[lipvprots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4371 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4372 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4373 detecteddomainsdict[i] = [["LipV-like dehydrogenase",lipvscores[lipvprots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4374 for i in toybprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4375 if i not in nucleoprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4376 nucleoprots.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4377 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4378 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4379 detdomlist.append(["ToyB-like synthase",toybscores[toybprots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4380 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4381 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4382 detecteddomainsdict[i] = [["ToyB-like synthase",toybscores[toybprots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4383 for i in tundprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4384 if i not in nucleoprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4385 nucleoprots.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4386 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4387 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4388 detdomlist.append(["TunD-like putative N-acetylglucosamine transferase",tundscores[tundprots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4389 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4390 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4391 detecteddomainsdict[i] = [["TunD-like putative N-acetylglucosamine transferase",tundscores[tundprots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4392 for i in pur6prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4393 if i not in nucleoprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4394 nucleoprots.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4395 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4396 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4397 detdomlist.append(["Pur6-like synthetase",pur6scores[pur6prots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4398 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4399 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4400 detecteddomainsdict[i] = [["Pur6-like synthetase",pur6scores[pur6prots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4401 for i in pur10prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4402 if i not in nucleoprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4403 nucleoprots.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4404 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4405 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4406 detdomlist.append(["Pur10-like oxidoreductase",pur10scores[pur10prots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4407 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4408 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4409 detecteddomainsdict[i] = [["Pur10-like oxidoreductase",pur10scores[pur10prots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4410 for i in nikjprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4411 if i not in nucleoprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4412 nucleoprots.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4413 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4414 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4415 detdomlist.append(["NikJ-like protein",nikjscores[nikjprots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4416 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4417 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4418 detecteddomainsdict[i] = [["NikJ-like protein",nikjscores[nikjprots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4419 for i in nikoprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4420 if i not in nucleoprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4421 nucleoprots.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4422 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4423 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4424 detdomlist.append(["NikO-like enolpyruvyl transferase",nikoscores[nikoprots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4425 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4426 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4427
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4428 detecteddomainsdict[i] = [["NikO-like enolpyruvyl transferase",nikoscores[nikoprots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4429 #Extract phosphoglycolipid biosynthesis proteins
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4430 phosphoprots = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4431 if 1 in geneclustertypes or 17 in geneclustertypes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4432 phosphogl = parsehmmoutput(65,hmmoutputfolder + "MoeO5.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4433 phosphoprots = phosphogl[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4434 phosphoscores = phosphogl[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4435 for i in phosphoprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4436 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4437 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4438 detdomlist.append(["MoeO5-like prenyl-3-phosphoglycerate synthase",phosphoscores[phosphoprots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4439 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4440 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4441 detecteddomainsdict[i] = [["MoeO5-like prenyl-3-phosphoglycerate synthase",phosphoscores[phosphoprots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4442 #Extract melanin biosynthesis proteins
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4443 melaninprots = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4444 if 1 in geneclustertypes or 18 in geneclustertypes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4445 melanin = parsehmmoutput(40,hmmoutputfolder + "melC.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4446 melaninprots = melanin[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4447 melaninscores = melanin[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4448 for i in melaninprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4449 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4450 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4451 detdomlist.append(["MelC-like melanin synthase",melaninscores[melaninprots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4452 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4453 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4454 detecteddomainsdict[i] = [["MelC-like melanin synthase",melaninscores[melaninprots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4455 #Extract other putative secondary metabolite biosynthesis proteins
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4456 otherprots = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4457 amp_t_prots = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4458 if 1 in geneclustertypes or 19 in geneclustertypes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4459 pptb = parsehmmoutput(20,hmmoutputfolder + "PP-binding.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4460 pptbprots = pptb[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4461 pptbscores = pptb[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4462 cond = parsehmmoutput(20,hmmoutputfolder + "Condensation.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4463 amp = parsehmmoutput(20,hmmoutputfolder + "AMP-binding.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4464 ampprots = amp[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4465 ampscores = amp[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4466 ampox = parsehmmoutput(50,hmmoutputfolder + "A-OX.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4467 ampoxprots = ampox[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4468 ampoxscores = ampox[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4469 nad4 = parsehmmoutput(40,hmmoutputfolder + "NAD_binding_4.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4470 nad4prots = nad4[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4471 nad4scores = nad4[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4472 cprots = cond[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4473 aprots = amp[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4474 for i in ampox[0]:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4475 if i not in aprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4476 aprots.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4477 nrpsprots2 = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4478 for i in cprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4479 if i in aprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4480 nrpsprots2.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4481 tprots = pptb[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4482 for i in tprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4483 if i in aprots and i not in nrpsprots2 and i not in aminocoumarinprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4484 otherprots.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4485 amp_t_prots.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4486 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4487 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4488 detdomlist.append(["PP-binding domain",pptbscores[pptbprots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4489 if i in ampprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4490 detdomlist.append(["Adenylation domain",ampscores[ampprots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4491 elif i in ampoxprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4492 detdomlist.append(["Adenylation domain with integrated oxidase",ampoxscores[ampoxprots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4493 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4494 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4495 if i in ampprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4496 detecteddomainsdict[i] = [["PP-binding domain",pptbscores[pptbprots.index(i)]],["Adenylation domain",ampscores[ampprots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4497 elif i in ampoxprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4498 detecteddomainsdict[i] = [["PP-binding domain",pptbscores[pptbprots.index(i)]],["Adenylation domain with integrated oxidase",ampoxscores[ampoxprots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4499 for i in nad4prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4500 if i in aprots and i not in aminocoumarinprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4501 otherprots.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4502 amp_t_prots.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4503 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4504 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4505 detdomlist.append(["NAD-binding domain 4",nad4scores[nad4prots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4506 if i in ampprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4507 detdomlist.append(["Adenylation domain",ampscores[ampprots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4508 elif i in ampoxprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4509 detdomlist.append(["Adenylation domain with integrated oxidase",ampoxscores[ampoxprots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4510 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4511 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4512 if i in ampprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4513 detecteddomainsdict[i] = [["NAD-binding domain 4",nad4scores[nad4prots.index(i)]],["Adenylation domain",ampscores[ampprots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4514 elif i in ampoxprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4515 detecteddomainsdict[i] = [["NAD-binding domain 4",nad4scores[nad4prots.index(i)]],["Adenylation domain with integrated oxidase",ampoxscores[ampoxprots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4516 lmbu = parsehmmoutput(50,hmmoutputfolder + "LmbU.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4517 lmbuprots = lmbu[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4518 lmbuscores = lmbu[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4519 for i in lmbuprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4520 if i not in otherprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4521 otherprots.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4522 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4523 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4524 detdomlist.append(["LmbU-like protein",lmbuscores[lmbuprots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4525 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4526 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4527 detecteddomainsdict[i] = [["LmbU-like protein",lmbuscores[lmbuprots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4528 goadsporin = parsehmmoutput(500,hmmoutputfolder + "goadsporin_like.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4529 goadsporinprots = goadsporin[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4530 goadsporinscores = goadsporin[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4531 for i in goadsporinprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4532 if i not in otherprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4533 otherprots.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4534 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4535 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4536 detdomlist.append(["Goadsporin-like protein",goadsporinscores[goadsporinprots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4537 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4538 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4539 detecteddomainsdict[i] = [["Goadsporin-like protein",goadsporinscores[goadsporinprots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4540 neocarzinostat = parsehmmoutput(28,hmmoutputfolder + "Neocarzinostat.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4541 neocarzinostatprots = neocarzinostat[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4542 neocarzinostatscores = neocarzinostat[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4543 for i in neocarzinostatprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4544 if i not in otherprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4545 otherprots.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4546 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4547 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4548 detdomlist.append(["Neocarzinostatin-like protein",neocarzinostatscores[neocarzinostatprots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4549 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4550 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4551 detecteddomainsdict[i] = [["Neocarzinostatin-like protein",neocarzinostatscores[neocarzinostatprots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4552 cyanobactin = parsehmmoutput(80,hmmoutputfolder + "cyanobactin_synth.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4553 cyanobactinprots = cyanobactin[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4554 cyanobactinscores = cyanobactin[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4555 for i in cyanobactinprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4556 if i not in otherprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4557 otherprots.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4558 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4559 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4560 detdomlist.append(["Cyanobactin protease",cyanobactinscores[cyanobactinprots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4561 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4562 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4563 detecteddomainsdict[i] = [["Cyanobactin protease",cyanobactinscores[cyanobactinprots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4564 cycdipeptide = parsehmmoutput(110,hmmoutputfolder + "cycdipepsynth.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4565 cycdipeptideprots = cycdipeptide[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4566 cycdipeptidescores = cycdipeptide[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4567 for i in cycdipeptideprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4568 if i not in otherprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4569 otherprots.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4570 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4571 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4572 detdomlist.append(["Cyclodipeptide synthase",cycdipeptidescores[cycdipeptideprots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4573 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4574 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4575 detecteddomainsdict[i] = [["Cyclodipeptide synthase",cycdipeptidescores[cycdipeptideprots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4576 fom1 = parsehmmoutput(750,hmmoutputfolder + "fom1.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4577 fom1prots = fom1[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4578 fom1scores = fom1[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4579 for i in fom1prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4580 if i not in otherprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4581 otherprots.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4582 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4583 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4584 detdomlist.append(["Fom1-like phosphomutase",fom1scores[fom1prots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4585 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4586 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4587 detecteddomainsdict[i] = [["Fom1-like phosphomutase",fom1scores[fom1prots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4588 bcpb = parsehmmoutput(400,hmmoutputfolder + "bcpB.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4589 bcpbprots = bcpb[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4590 bcpbscores = bcpb[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4591 for i in bcpbprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4592 if i not in otherprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4593 otherprots.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4594 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4595 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4596 detdomlist.append(["BcpB-like phosphomutase",bcpbscores[bcpbprots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4597 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4598 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4599 detecteddomainsdict[i] = [["BcpB-like phosphomutase",bcpbscores[bcpbprots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4600 frbd = parsehmmoutput(350,hmmoutputfolder + "frbD.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4601 frbdprots = frbd[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4602 frbdscores = frbd[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4603 for i in frbdprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4604 if i not in otherprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4605 otherprots.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4606 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4607 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4608 detdomlist.append(["FrbD-like phosphomutase",frbdscores[frbdprots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4609 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4610 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4611 detecteddomainsdict[i] = [["FrbD-like phosphomutase",frbdscores[frbdprots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4612 mite = parsehmmoutput(400,hmmoutputfolder + "mitE.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4613 miteprots = mite[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4614 mitescores = mite[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4615 for i in miteprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4616 if i not in otherprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4617 otherprots.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4618 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4619 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4620 detdomlist.append(["MitE-like CoA-ligase",mitescores[miteprots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4621 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4622 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4623 detecteddomainsdict[i] = [["MitE-like CoA-ligase",mitescores[miteprots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4624 vlmb = parsehmmoutput(250,hmmoutputfolder + "vlmB.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4625 vlmbprots = vlmb[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4626 vlmbscores = vlmb[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4627 for i in vlmbprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4628 if i not in otherprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4629 otherprots.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4630 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4631 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4632 detdomlist.append(["Valanimycin biosynthesis VlmB domain",vlmbscores[vlmbprots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4633 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4634 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4635 detecteddomainsdict[i] = [["Valanimycin biosynthesis VlmB domain",vlmbscores[vlmbprots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4636 prnb = parsehmmoutput(200,hmmoutputfolder + "prnB.txt")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4637 prnbprots = prnb[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4638 prnbscores = prnb[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4639 for i in prnbprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4640 if i not in otherprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4641 otherprots.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4642 if detecteddomainsdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4643 detdomlist = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4644 detdomlist.append(["Pyrrolnitrin biosynthesis PrnB domain",prnbscores[prnbprots.index(i)]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4645 detecteddomainsdict[i] = detdomlist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4646 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4647 detecteddomainsdict[i] = [["Pyrrolnitrin biosynthesis PrnB domain",prnbscores[prnbprots.index(i)]]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4648 if 5 not in geneclustertypes and 1 not in geneclustertypes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4649 nrpsprots = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4650 if 4 not in geneclustertypes and 1 not in geneclustertypes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4651 t3pksprots = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4652 if 3 not in geneclustertypes and 1 not in geneclustertypes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4653 t2pksprots = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4654 if 2 not in geneclustertypes and 1 not in geneclustertypes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4655 t1pksprots = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4656 t4pksprots = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4657 transatpksprots = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4658 #Assemble all core sec met proteins
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4659 allsecmetprots = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4660 for i in t1pksprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4661 if i not in allsecmetprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4662 allsecmetprots.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4663 for i in transatpksprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4664 if i not in allsecmetprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4665 allsecmetprots.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4666 for i in t2pksprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4667 if i not in allsecmetprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4668 allsecmetprots.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4669 for i in t3pksprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4670 if i not in allsecmetprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4671 allsecmetprots.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4672 for i in t4pksprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4673 if i not in allsecmetprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4674 allsecmetprots.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4675 for i in nrpsprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4676 if i not in allsecmetprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4677 allsecmetprots.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4678 for i in terpeneprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4679 if i not in allsecmetprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4680 allsecmetprots.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4681 for i in lantprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4682 if i not in allsecmetprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4683 allsecmetprots.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4684 for i in bcinprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4685 if i not in allsecmetprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4686 allsecmetprots.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4687 for i in lactamprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4688 if i not in allsecmetprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4689 allsecmetprots.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4690 for i in amglyccyclprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4691 if i not in allsecmetprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4692 allsecmetprots.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4693 for i in siderophoreprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4694 if i not in allsecmetprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4695 allsecmetprots.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4696 for i in ectprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4697 if i not in allsecmetprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4698 allsecmetprots.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4699 for i in butyrprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4700 if i not in allsecmetprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4701 allsecmetprots.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4702 for i in indoleprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4703 if i not in allsecmetprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4704 allsecmetprots.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4705 for i in nucleoprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4706 if i not in allsecmetprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4707 allsecmetprots.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4708 for i in phosphoprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4709 if i not in allsecmetprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4710 allsecmetprots.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4711 for i in melaninprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4712 if i not in allsecmetprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4713 allsecmetprots.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4714 for i in aminocoumarinprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4715 if i not in allsecmetprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4716 allsecmetprots.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4717 for i in otherprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4718 if i not in allsecmetprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4719 allsecmetprots.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4720 allsecmetprots.sort()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4721
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4722 if len(allsecmetprots) == 0:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4723 logfile.write("No secondary metabolite biosynthesis gene clusters detected in this nucleotide file.\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4724 logfile.close()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4725 print >> sys.stderr, "No secondary metabolite biosynthesis gene clusters detected in this nucleotide file."
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4726 sys.exit(1)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4727
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4728 elapsed = (time.time() - starttime)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4729 #print "4713Time since start: " + str(elapsed)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4730
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4731 #Extract approximate gene clusters based on hmmsearch results, create list of core PKS / NRPS genes for further analysis (use less strict parameters for this then in gene cluster detection to include all PKS/NRPS domains)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4732 #Create nucleotide fasta files with sec met gene clusters
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4733 #print "Extracting gene clusters from gbk/embl file using detected signature genes..."
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4734 logfile.write("Extracting gene clusters from gbk/embl file using detected signature genes...\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4735 fastafile = open(genomename + "/clusterblast/geneclusterprots.fasta","w")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4736 txtfile = open(genomename + "/clusterblast/geneclusters.txt","w")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4737 wb = Workbook()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4738 font1 = Font()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4739 style1 = XFStyle()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4740 style1.font = font1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4741 font1.bold = True
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4742 ws0 = wb.add_sheet('0')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4743 ws0.write(0,0,"Input accession number",style1)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4744 ws0.write(0,1,"Input name",style1)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4745 ws0.write(0,2,"Gene cluster type",style1)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4746 ws0.write(0,3,"Gene cluster genes",style1)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4747 if clusterblast == "y":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4748 ws0.write(0,4,"Compound with gene cluster of highest homology",style1)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4749 protcodes = allsecmetprots
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4750 nuccode = genomename
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4751 gbkfile = open(infile,"r")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4752 output = gbkfile.read()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4753 output = output.replace("\r","\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4754 #Extract description of nucleotide from gbk/embl file
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4755 if ".gbk" in infile or ".GBK" in infile or ".gb" in infile or ".GB" in infile or ".genbank" in infile or ".GENBANK" in infile:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4756 try:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4757 nucname1 = output.split("ACCESSION ")[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4758 nucname2 = nucname1.split("DEFINITION ")[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4759 nucname3 = nucname2.replace("\n","")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4760 while " " in nucname3:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4761 nucname3 = nucname3.replace(" "," ")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4762 nucname = nucname3
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4763 except(KeyError,IOError,IndexError):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4764 nucname = "input_nucleotide"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4765 elif ".embl" in infile or ".EMBL" in infile or ".emb" in infile or ".EMB" in infile:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4766 try:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4767 nucname1 = output.split("DE ")[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4768 nucname2 = nucname1.split("\n")[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4769 nucname3 = nucname2.replace("\n","")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4770 while " " in nucname3:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4771 nucname3 = nucname3.replace(" "," ")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4772 nucname = nucname3
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4773 except(KeyError,IOError,IndexError):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4774 nucname = "input_nucleotide"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4775 protstartlocations = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4776 protendlocations = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4777 genelist = proteins[2]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4778 genedict = proteins[3]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4779 #Save all locations of query proteins on the nucleotide in a list
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4780 for j in protcodes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4781 if j in genelist:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4782 protstart_abs = min([int(genedict[j][0]),int(genedict[j][1])])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4783 protend_abs = max([int(genedict[j][0]),int(genedict[j][1])])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4784 protstartlocations.append(protstart_abs)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4785 protendlocations.append(protend_abs)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4786 #Identify clusters of genes based on protein locations on the nucleotide
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4787 clusterstarts = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4788 clusterends = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4789 protstartlocations.sort()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4790 protendlocations.sort()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4791 nrlocations = len(protstartlocations)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4792 a = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4793 for i in protstartlocations:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4794 if a == 0:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4795 start = str(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4796 clusterstarts.append(start)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4797 if len(protendlocations) == 1:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4798 clusterends.append(protendlocations[a])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4799 elif a == nrlocations - 1:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4800 if i < ((protendlocations[a - 1]) + 20000):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4801 clusterends.append(str(protendlocations[a]))
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4802 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4803 end = str(protendlocations[a - 1])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4804 clusterends.append(end)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4805 clusterstarts.append(str(i))
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4806 clusterends.append(str(protendlocations[a]))
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4807 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4808 if i > ((protendlocations[a - 1]) + 20000):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4809 clusterends.append(str(protendlocations[a - 1]))
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4810 start = str(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4811 clusterstarts.append(start)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4812 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4813 pass
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4814 a += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4815 lastendlocation = i
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4816 #Extend clusters with 20kb on each side of the identified core genes
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4817 clusterstarts2 = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4818 for i in clusterstarts:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4819 j = int(i) - 20000
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4820 if j < 0:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4821 j = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4822 clusterstarts2.append(j)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4823 clusterstarts = clusterstarts2
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4824 clusterends2 = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4825 for i in clusterends:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4826 j = int(i) + 20000
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4827 clusterends2.append(j)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4828 clusterends = clusterends2
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4829 #For each genbank secondary metabolite gene cluster: extract all proteins and write to fasta,
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4830 a = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4831 clusterinfo = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4832 geneclusters = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4833 geneclustergenes = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4834 allcoregenes = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4835 for i in clusterstarts:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4836 cstart = int(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4837 cend = int(clusterends[a])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4838 a += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4839 clusternr = a
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4840 geneclusters.append(clusternr)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4841 coregenes = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4842 clustergenes = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4843 #For each gene in nucleotide, check if it is inside this cluster; if, so append info to list of clustergenes
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4844 if a == 1:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4845 for i in genelist:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4846 geneinfo = genedict[i][:-1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4847 geneinfo.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4848 genedict[i] = geneinfo
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4849 for i in genelist:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4850 geneinfo = genedict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4851 genestart = int(geneinfo[0])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4852 geneend = int(geneinfo[1])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4853 if (genestart > cstart and genestart < cend) or (geneend > cstart and geneend < cend):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4854 clustergenes.append(geneinfo)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4855 #Determine type of cluster
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4856 type = "other"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4857 z = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4858 for k in clustergenes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4859 i = k[4]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4860 if i in t1pksprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4861 if z == 0:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4862 type = "t1pks"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4863 elif "t1pks" not in type:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4864 type = type + "-t1pks"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4865 z = 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4866 if i in transatpksprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4867 if z == 0:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4868 type = "transatpks"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4869 elif "transatpks" not in type:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4870 type = type + "-transatpks"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4871 z = 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4872 if i in t2pksprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4873 if z == 0:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4874 type = "t2pks"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4875 elif "t2pks" not in type:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4876 type = type + "-t2pks"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4877 z = 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4878 if i in t3pksprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4879 if z == 0:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4880 type = "t3pks"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4881 elif "t3pks" not in type:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4882 type = type + "-t3pks"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4883 z = 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4884 if i in t4pksprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4885 if z == 0:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4886 type = "t1pks"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4887 elif "t1pks" not in type:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4888 type = type + "-t1pks"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4889 z = 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4890 if i in nrpsprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4891 if z == 0:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4892 type = "nrps"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4893 elif "nrps" not in type:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4894 type = type + "-nrps"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4895 z = 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4896 if i in terpeneprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4897 if z == 0:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4898 type= "terpene"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4899 elif "terpene" not in type:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4900 type = type + "-terpene"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4901 z = 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4902 if i in lantprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4903 if z == 0:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4904 type= "lant"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4905 elif "lant" not in type:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4906 type = type + "-lant"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4907 z = 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4908 if i in bcinprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4909 if z == 0:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4910 type= "bcin"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4911 elif "bcin" not in type:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4912 type = type + "-bcin"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4913 z = 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4914 if i in lactamprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4915 if z == 0:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4916 type = "blactam"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4917 elif "blactam" not in type:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4918 type = type + "-blactam"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4919 z = 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4920 if i in amglyccyclprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4921 if z == 0:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4922 type = "amglyccycl"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4923 elif "amglyccycl" not in type:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4924 type = type + "-amglyccycl"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4925 z = 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4926 if i in siderophoreprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4927 if z == 0:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4928 type = "siderophore"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4929 elif "siderophore" not in type:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4930 type = type + "-siderophore"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4931 z = 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4932 if i in ectprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4933 if z == 0:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4934 type = "ectoine"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4935 elif "ectoine" not in type:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4936 type = type + "-ectoine"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4937 z = 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4938 if i in indoleprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4939 if z == 0:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4940 type = "indole"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4941 elif "indole" not in type:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4942 type = type + "-indole"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4943 z = 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4944 if i in nucleoprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4945 if z == 0:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4946 type = "nucleoside"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4947 elif "nucleoside" not in type:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4948 type = type + "-nucleoside"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4949 z = 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4950 if i in phosphoprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4951 if z == 0:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4952 type = "phosphoglycolipid"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4953 elif "phosphoglycolipid" not in type:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4954 type = type + "-phosphoglycolipid"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4955 z = 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4956 if i in butyrprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4957 if z == 0:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4958 type = "butyrolactone"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4959 elif "butyrolactone" not in type:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4960 type = type + "-butyrolactone"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4961 z = 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4962 if i in melaninprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4963 if z == 0:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4964 type = "melanin"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4965 elif "melanin" not in type:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4966 type = type + "-melanin"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4967 z = 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4968 if i in aminocoumarinprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4969 if z == 0:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4970 type = "aminocoumarin"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4971 elif "aminocoumarin" not in type:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4972 type = type + "-aminocoumarin"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4973 z = 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4974 if "other-" in type[:6]:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4975 type = type[6:]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4976 #Shorten gene cluster if type is among typically short gene cluster types
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4977 if cend > dnaseqlength:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4978 cend = dnaseqlength
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4979 if type == "t3pks" or type == "t2pks":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4980 if cstart != 0:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4981 cstart = cstart + 5000
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4982 if cend != dnaseqlength:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4983 cend = cend - 5000
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4984 clustergenes2 = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4985 for i in clustergenes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4986 start = int(i[0])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4987 end = int(i[1])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4988 if (start > cstart and start < cend) or (end > cstart and end < cend):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4989 clustergenes2.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4990 clustergenes = clustergenes2
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4991 if type == "bcin" or type == "siderophore" or type == "lant" or type == "terpene":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4992 if cstart != 0:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4993 cstart = cstart + 10000
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4994 if cend != dnaseqlength:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4995 cend = cend - 10000
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4996 clustergenes2 = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4997 for i in clustergenes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4998 start = int(i[0])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
4999 end = int(i[1])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5000 if (start > cstart and start < cend) or (end > cstart and end < cend):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5001 clustergenes2.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5002 clustergenes = clustergenes2
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5003 if type == "butyrolactone" or type == "melanin" or type == "ectoine":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5004 if cstart != 0:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5005 cstart = cstart + 17000
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5006 if cend != dnaseqlength:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5007 cend = cend - 17000
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5008 clustergenes2 = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5009 for i in clustergenes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5010 start = int(i[0])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5011 end = int(i[1])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5012 if (start > cstart and start < cend) or (end > cstart and end < cend):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5013 clustergenes2.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5014 clustergenes = clustergenes2
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5015 #For all clustergenes, write info to fasta
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5016 for i in clustergenes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5017 start = str(i[0])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5018 end = str(i[1])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5019 strand = i[2]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5020 seq = seqdict[i[4]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5021 ann = i[3].replace(" ","_")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5022 accession = i[4]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5023 name = nuccode + "|c" + str(a) + "|" + start + "-" + end + "|" + strand + "|" + accession + "|" + ann
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5024 fastafile.write(">" + name + "\n" + seq + "\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5025 if accession not in geneclustergenes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5026 geneclustergenes.append(accession)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5027 #Write gene cluster info to separate txt file
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5028 txtfile.write(nuccode + "\t" + nucname + "\t" + "c" + str(a) + "\t" + type + "\t")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5029 ws0.write(a,0,genomic_accnr)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5030 try:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5031 ws0.write(a,1,nucname)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5032 except:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5033 ws0.write(a,1,"Name to long to be contained in Excel cell; see txt file in downloadable zip archive.")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5034 ws0.write(a,2,type)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5035 xlsgenesfield = ""
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5036 for i in clustergenes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5037 txtfile.write(i[4] + ";")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5038 xlsgenesfield = xlsgenesfield + i[4] + ";"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5039 txtfile.write("\t")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5040 for i in clustergenes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5041 txtfile.write(accessiondict[i[4]] + ";")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5042 xlsgenesfield = xlsgenesfield[:-1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5043 try:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5044 ws0.write(a,3,xlsgenesfield)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5045 except:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5046 ws0.write(a,3,"Too many genes to be contained in Excel cell; see txt file in downloadable zip archive.")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5047 txtfile.write("\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5048 #Write gene cluster info to clusterinfo dictionary
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5049 for i in clustergenes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5050 if i[4] in allsecmetprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5051 coregenes.append(i[4])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5052 allcoregenes.append(i[4])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5053 clusterinfo[clusternr] = [type,cstart,cend,coregenes,clustergenes]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5054 #Close xls, fasta and txt files
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5055 fastafile.close()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5056 txtfile.close()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5057
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5058 #Analysis of core PKS/NRPS genes (separate py), detect subgroups and predict specificities and final products
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5059 #Make list of PKS / NRPS gene clusters to be analysed
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5060 #print "Analysing core PKS/NRPS genes..."
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5061 logfile.write("Analysing core PKS/NRPS genes...\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5062 pksnrpsgeneclusters = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5063 pksnrpscoregenes = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5064 for i in geneclusters:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5065 if "t1pks" in clusterinfo[i][0] or "t4pks" in clusterinfo[i][0] or "transatpks" in clusterinfo[i][0] or "nrps" in clusterinfo[i][0]:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5066 pksnrpsgeneclusters.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5067 for i in t1pksprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5068 pksnrpscoregenes.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5069 for i in transatpksprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5070 pksnrpscoregenes.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5071 for i in t4pksprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5072 pksnrpscoregenes.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5073 for i in nrpsprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5074 pksnrpscoregenes.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5075 for i in amp_t_prots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5076 pksnrpscoregenes.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5077 pksnrpsgenestartdict = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5078 for i in pksnrpscoregenes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5079 start = int(genedict[i][0])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5080 pksnrpsgenestartdict[i] = start
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5081 pksnrpscoregenes = sortdictkeysbyvalues(pksnrpsgenestartdict)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5082 nrpsnames = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5083 nrpsseqs = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5084 pksnrpsnames = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5085 pksnrpsseqs = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5086 pksnames = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5087 pksseqs = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5088 calnames = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5089 calseqs = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5090 krnames = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5091 krseqs = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5092 nrpspkstypedict = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5093 domaindict = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5094 if len(pksnrpscoregenes) > 0:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5095 #Write PKS / NRPS core genes to FASTA file
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5096 for i in pksnrpscoregenes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5097 name = i
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5098 seq = seqdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5099 pksnrpsnames.append(name)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5100 pksnrpsseqs.append(seq)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5101 writefasta(pksnrpsnames,pksnrpsseqs,genomename + "/nrpspks_proteins.fasta")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5102 #Analyse for abMotifs
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5103 hmmsearch = hmmscan_path + " --cpu " + str(nrcpus) + " -E 0.1 -o " + genomename + "/nrpspks/abmotifshmm_output.txt" + " --noali --tblout " + genomename + "/nrpspks/abmotifshmm.txt "+ hmms_path +"abmotifs.hmm " + genomename + "/nrpspks_proteins.fasta"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5104 os.system(hmmsearch)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5105 mhmmlengthsdict = hmmlengths(hmms_path+"abmotifs.hmm")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5106 motifdict = hmmscanparse(genomename + "/nrpspks/abmotifshmm_output.txt",mhmmlengthsdict)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5107 #Analyse for C/A/PCP/E/KS/AT/ATd/DH/KR/ER/ACP/TE/TD/COM/Docking/MT/CAL domains
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5108 hmmsearch = hmmscan_path + " --cut_tc --cpu " + str(nrcpus) + " -o " + genomename + "/nrpspks/nrpspkshmm_output.txt" + " --noali --tblout " + genomename + "/nrpspks/nrpspkshmm.txt "+ hmms_path +"nrpspksdomains.hmm " + genomename + "/nrpspks_proteins.fasta"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5109 os.system(hmmsearch)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5110 hmmlengthsdict = hmmlengths(hmms_path+"nrpspksdomains.hmm")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5111 domaindict = hmmscanparse(genomename + "/nrpspks/nrpspkshmm_output.txt",hmmlengthsdict)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5112 nrpspksdomainsfile = open(genomename + "/nrpspks/nrpspksdomains.txt","w")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5113 #Analyse KS domains & PKS/NRPS protein domain composition to detect NRPS/PKS types
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5114 kshmmsearch = hmmscan_path + " --cut_tc --cpu " + str(nrcpus) + " -o " + genomename + "/nrpspks/kshmm_output.txt" + " --noali --tblout " + genomename + "/nrpspks/kshmm.txt " + hmms_path + "ksdomains.hmm " + genomename + "/nrpspks_proteins.fasta"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5115 os.system(kshmmsearch)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5116 kshmmlengthsdict = hmmlengths(hmms_path+"ksdomains.hmm")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5117 ksdomaindict = hmmscanparse(genomename + "/nrpspks/kshmm_output.txt",kshmmlengthsdict)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5118 for k in pksnrpscoregenes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5119 #structure of domaindict: domaindict[genename] = [[name,start,end,evalue,score],[name,start,end,evalue,score], etc.]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5120 domainlist = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5121 nrKSdomains = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5122 for i in domaindict[k]:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5123 domainlist.append(i[0])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5124 if i[0] == "PKS_KS":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5125 nrKSdomains += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5126 modKSscore = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5127 traKSscore = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5128 eneKSscore = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5129 iterKSscore = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5130 for i in ksdomaindict[k]:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5131 if i[0] == "Trans-AT-KS":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5132 traKSscore += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5133 if i[0] == "Modular-KS":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5134 modKSscore += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5135 if i[0] == "Enediyne-KS":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5136 eneKSscore += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5137 if i[0] == "Iterative-KS":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5138 iterKSscore += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5139 for i in domaindict[k]:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5140 if "Cglyc" in domainlist and "Epimerization" in domainlist and "AMP-binding" in domainlist and "PKS_KS" not in domainlist and "PKS_AT" not in domainlist:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5141 type = "Glycopeptide NRPS"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5142 elif ("Condensation_LCL" in domainlist or "Condensation_DCL" in domainlist or "Condensation_Starter" in domainlist or "Cglyc" in domainlist or "Condensation_Dual" in domainlist) and "AMP-binding" in domainlist and "PKS_KS" not in domainlist and "PKS_AT" not in domainlist:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5143 type = "NRPS"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5144 elif ("Condensation_LCL" in domainlist or "Condensation_DCL" in domainlist or "Condensation_Starter" in domainlist or "Cglyc" in domainlist or "Condensation_Dual" in domainlist) or "AMP-binding" in domainlist and ("PKS_KS" in domainlist or "PKS_AT" in domainlist):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5145 type = "Hybrid PKS-NRPS"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5146 elif ("Condensation_LCL" not in domainlist and "Condensation_DCL" not in domainlist and "Condensation_Starter" not in domainlist and "Cglyc" not in domainlist and "Condensation_Dual" not in domainlist and "AMP-binding" not in domainlist) and "PKS_KS" in domainlist and "PKS_AT" not in domainlist and "Trans-AT_docking" in domainlist and traKSscore > modKSscore and traKSscore > iterKSscore and traKSscore > eneKSscore:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5147 type = "Type I Trans-AT PKS"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5148 elif ("Condensation_LCL" not in domainlist and "Condensation_DCL" not in domainlist and "Condensation_Starter" not in domainlist and "Cglyc" not in domainlist and "Condensation_Dual" not in domainlist and "AMP-binding" not in domainlist) and "PKS_KS" in domainlist and "PKS_AT" in domainlist and iterKSscore > modKSscore and iterKSscore > traKSscore and iterKSscore > eneKSscore and nrKSdomains < 3:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5149 type = "Type I Iterative PKS"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5150 elif ("Condensation_LCL" not in domainlist and "Condensation_DCL" not in domainlist and "Condensation_Starter" not in domainlist and "Cglyc" not in domainlist and "Condensation_Dual" not in domainlist and "AMP-binding" not in domainlist) and "PKS_KS" in domainlist and "PKS_AT" in domainlist and eneKSscore > modKSscore and eneKSscore > traKSscore and eneKSscore > iterKSscore and nrKSdomains < 3:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5151 type = "Type I Enediyne PKS"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5152 elif ("Condensation_LCL" not in domainlist and "Condensation_DCL" not in domainlist and "Condensation_Starter" not in domainlist and "Cglyc" not in domainlist and "Condensation_Dual" not in domainlist and "AMP-binding" not in domainlist) and "PKS_KS" in domainlist and "PKS_AT" in domainlist and ((modKSscore > eneKSscore and modKSscore > traKSscore and modKSscore > iterKSscore) or nrKSdomains > 3):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5153 type = "Type I Modular PKS"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5154 elif ("Condensation_LCL" not in domainlist and "Condensation_DCL" not in domainlist and "Condensation_Starter" not in domainlist and "Cglyc" not in domainlist and "Condensation_Dual" not in domainlist and "AMP-binding" not in domainlist) and "PKS_KS" in domainlist and "PKS_AT" in domainlist:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5155 type = "PKS-like protein"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5156 elif ("Condensation_LCL" in domainlist or "Condensation_DCL" in domainlist or "Condensation_Starter" in domainlist or "Cglyc" in domainlist or "Condensation_Dual" in domainlist or "AMP-binding" in domainlist) and "PKS_KS" not in domainlist and "PKS_AT" not in domainlist:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5157 type = "NRPS-like protein"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5158 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5159 type = "PKS/NRPS-like protein"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5160 nrpspkstypedict[k] = type
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5161 #Write data to output file
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5162 for k in pksnrpscoregenes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5163 j = domaindict[k]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5164 l = motifdict[k]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5165 nrpspksdomainsfile.write(">> " + k + "\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5166 nrpspksdomainsfile.write(">> " + nrpspkstypedict[k] + "\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5167 nrpspksdomainsfile.write("name\tstart\tend\te-value\tscore\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5168 for i in j:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5169 #nrpspksdomainsfile.write(str(i[0]) + "\t" + str(i[1]) + "\t" + str(i[2]) + "\t" + str(i[3]) + "\t" + str(i[4]) + "\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5170 nrpspksdomainsfile.write("%s\t%s\t%s\t%s\t%s\n" % (i[0], i[1], i[2], i[3], i[4]) )
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5171 nrpspksdomainsfile.write("** Motifs: **\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5172 for i in l:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5173 #nrpspksdomainsfile.write(str(i[0]) + "\t" + str(i[1]) + "\t" + str(i[2]) + "\t" + str(i[3]) + "\t" + str(i[4]) + "\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5174 nrpspksdomainsfile.write("%s\t%s\t%s\t%s\t%s\n" % (i[0], i[1], i[2], i[3], i[4]) )
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5175 nrpspksdomainsfile.write("\n\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5176 nrpspksdomainsfile.close()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5177
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5178 elapsed = (time.time() - starttime)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5179 #print "5163Time since start: " + str(elapsed)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5180
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5181 #Predict NRPS A domain specificities with NRPSPredictor and Minowa et al. method
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5182 #print "Predicting NRPS A domain substrate specificities by NRPSPredictor"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5183 logfile.write("Predicting NRPS A domain substrate specificities by NRPSPredictor\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5184 #NRPSPredictor: extract AMP-binding + 120 residues N-terminal of this domain, extract 8 Angstrom residues and insert this into NRPSPredictor
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5185 for k in pksnrpscoregenes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5186 j = domaindict[k]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5187 nr = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5188 for i in j:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5189 if i[0] == "AMP-binding" or i[0] == "A-OX":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5190 nr += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5191 start = int(i[1])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5192 end = int(i[2]) + 120
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5193 seq = seqdict[k][start:end]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5194 name = k + "_A" + str(nr)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5195 nrpsnames.append(name)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5196 nrpsseqs.append(seq)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5197 if len(nrpsnames) > 0:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5198 writefasta(nrpsnames,nrpsseqs,"NRPSPredictor2/nrpsseqs.fasta")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5199 #nrpspredcommand = "perl nrpsSpecPredictor.pl nrpsseqs.fasta ../" + nrpspredictoroutputfolder + " ." #OLD NRPSPREDICTOR1 command
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5200 os.chdir("NRPSPredictor2/")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5201 #Get NRPSPredictor2 code predictions, output sig file for input for NRPSPredictor2 SVMs
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5202 if sys.platform == ('win32'):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5203 nrpspred2codecommand = 'nrpscodepred nrpsseqs.fasta input.sig nrpscodes.txt > nul'
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5204 if sys.platform == ('linux2'):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5205 nrpspred2codecommand = 'python nrpscodepred.py nrpsseqs.fasta input.sig nrpscodes.txt > /dev/null'
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5206 os.system(nrpspred2codecommand)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5207 #Run NRPSPredictor2 SVM
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5208 currentdir = os.getcwd()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5209 if sys.platform == ('win32'):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5210 nrpspred2command = 'java -Ddatadir="' + currentdir + '\\data" -cp build/NRPSpredictor2.jar;lib/java-getopt-1.0.13.jar;lib/Utilities.jar;lib/libsvm.jar org.roettig.NRPSpredictor2.NRPSpredictor2 -i input.sig -r ..\\' + nrpspredictoroutputfolder + 'nrpspredictor2.out -s 1'
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5211 if sys.platform == ('linux2'):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5212 nrpspred2command = './NRPSpredictor2.sh -i input.sig -r ../' + nrpspredictoroutputfolder + 'nrpspredictor2.out -s 1'
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5213 os.popen(nrpspred2command)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5214 #Copy NRPSPredictor results
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5215 if sys.platform == ('win32'):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5216 copycommand = 'copy/y nrpscodes.txt ..\\' + nrpspredictoroutputfolder.replace("/","\\") + ' > nul'
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5217 if sys.platform == ('linux2'):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5218 copycommand = 'cp nrpscodes.txt ../' + nrpspredictoroutputfolder + " > /dev/null"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5219 os.system(copycommand)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5220 os.chdir("..")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5221 elapsed = (time.time() - starttime)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5222 #print "5206Time since start: " + str(elapsed)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5223 # folgendes bis zum naechsten time braucht 500s, liegt wohl haupsaechlich an schlechtem minowa_A code
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5224 #Minowa method: extract AMP-binding domain, and run Minowa_A
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5225 if len(nrpsnames) > 0:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5226 #print "Predicting NRPS A domain substrate specificities by Minowa et al. method\n"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5227 logfile.write("Predicting NRPS A domain substrate specificities by Minowa et al. method")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5228 nrpsnames2 = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5229 nrpsseqs2 = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5230 for k in pksnrpscoregenes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5231 j = domaindict[k]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5232 nr = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5233 for i in j:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5234 if i[0] in ["AMP-binding", "A-OX"]:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5235 nr += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5236 start = int(i[1])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5237 end = int(i[2])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5238 seq = seqdict[k][start:end]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5239 name = k + "_A" + str(nr)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5240 nrpsnames2.append(name)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5241 nrpsseqs2.append(seq)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5242 writefasta(nrpsnames2,nrpsseqs2,minowanrpsoutputfolder + "nrpsseqs.fasta")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5243 if sys.platform == ('win32'):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5244 minowanrpscommand = "minowa_A ../" + minowanrpsoutputfolder + "nrpsseqs.fasta ../" + minowanrpsoutputfolder + "nrpspredoutput.txt"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5245 if sys.platform == ('linux2'):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5246 minowanrpscommand = "python minowa_A.py ../" + minowanrpsoutputfolder + "nrpsseqs.fasta ../" + minowanrpsoutputfolder + "nrpspredoutput.txt"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5247 os.chdir("Minowa/")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5248 os.system(minowanrpscommand)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5249 os.chdir("..")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5250
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5251 elapsed = (time.time() - starttime)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5252 #print "5235Time since start: " + str(elapsed)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5253 #Predict PKS AT domain specificities with Minowa et al. method and PKS code (NP searcher / ClustScan / own?)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5254 for k in pksnrpscoregenes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5255 j = domaindict[k]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5256 nr = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5257 for i in j:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5258 if i[0] == "PKS_AT":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5259 nr += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5260 start = int(i[1])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5261 end = int(i[2])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5262 seq = seqdict[k][start:end]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5263 name = k + "_AT" + str(nr)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5264 pksnames.append(name)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5265 pksseqs.append(seq)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5266 if len(pksnames) > 0:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5267 writefasta(pksnames,pksseqs,pkssignatureoutputfolder + "pksseqs.fasta")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5268 writefasta(pksnames,pksseqs,minowapksoutputfolder + "pksseqs.fasta")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5269 #Run PKS signature analysis
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5270 elapsed = (time.time() - starttime)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5271 #print "5254Time since start: " + str(elapsed)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5272 print "Predicting PKS AT domain substrate specificities by Yadav et al. PKS signature sequences"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5273 logfile.write("Predicting PKS AT domain substrate specificities by Yadav et al. PKS signature sequences\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5274 if sys.platform == ('win32'):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5275 pkspredcommand = "PKS_analysis ../" + pkssignatureoutputfolder + "pksseqs.fasta ../" + pkssignatureoutputfolder + "pkspredoutput.txt"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5276 if sys.platform == ('linux2'):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5277 pkspredcommand = "python PKS_analysis.py ../" + pkssignatureoutputfolder + "pksseqs.fasta ../" + pkssignatureoutputfolder + "pkspredoutput.txt"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5278 os.chdir("pkssignatures/")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5279 os.system(pkspredcommand)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5280 os.chdir("..")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5281 #Minowa method: run Minowa_AT
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5282 elapsed = (time.time() - starttime)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5283 #print "5266Time since start: " + str(elapsed)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5284 print "Predicting PKS AT domain substrate specificities by Minowa et al. method"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5285 logfile.write("Predicting PKS AT domain substrate specificities by Minowa et al. method\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5286 if sys.platform == ('win32'):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5287 minowapkscommand = "minowa_AT ../" + minowapksoutputfolder + "pksseqs.fasta ../" + minowapksoutputfolder + "pkspredoutput.txt"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5288 if sys.platform == ('linux2'):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5289 minowapkscommand = "python minowa_AT.py ../" + minowapksoutputfolder + "pksseqs.fasta ../" + minowapksoutputfolder + "pkspredoutput.txt"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5290 os.chdir("Minowa/")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5291 os.system(minowapkscommand)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5292 os.chdir("..")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5293
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5294 #Predict PKS CAL domain specificities with Minowa et al. method
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5295 elapsed = (time.time() - starttime)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5296 #print "5279Time since start: " + str(elapsed)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5297 print "Predicting CAL domain substrate specificities by Minowa et al. method"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5298 logfile.write("Predicting CAL domain substrate specificities by Minowa et al. method\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5299 for k in pksnrpscoregenes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5300 j = domaindict[k]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5301 nr = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5302 for i in j:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5303 if i[0] == "CAL_domain":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5304 nr += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5305 start = int(i[1])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5306 end = int(i[2])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5307 seq = seqdict[k][start:end]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5308 name = k + "_CAL" + str(nr)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5309 calnames.append(name)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5310 calseqs.append(seq)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5311 if len(calnames) > 0:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5312 writefasta(calnames,calseqs,minowacaloutputfolder + "calseqs.fasta")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5313 if sys.platform == ('win32'):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5314 minowacalcommand = "minowa_CAL ../" + minowacaloutputfolder + "calseqs.fasta ../" + minowacaloutputfolder + "calpredoutput.txt"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5315 if sys.platform == ('linux2'):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5316 minowacalcommand = "python minowa_CAL.py ../" + minowacaloutputfolder + "calseqs.fasta ../" + minowacaloutputfolder + "calpredoutput.txt"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5317 os.chdir("Minowa/")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5318 os.system(minowacalcommand)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5319 os.chdir("..")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5320
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5321 elapsed = (time.time() - starttime)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5322 #print "5305Time since start: " + str(elapsed)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5323 #Predict PKS KR domain stereochemistry using pattern as published in ClustScan
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5324 print "Predicting PKS KR activity and stereochemistry using KR fingerprints from Starcevic et al."
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5325 logfile.write("Predicting PKS KR activity and stereochemistry using KR fingerprints from Starcevic et al.\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5326 for k in pksnrpscoregenes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5327 j = domaindict[k]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5328 nr = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5329 for i in j:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5330 if i[0] == "PKS_KR":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5331 nr += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5332 start = int(i[1])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5333 end = int(i[2])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5334 seq = seqdict[k][start:end]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5335 name = k + "_KR" + str(nr)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5336 krnames.append(name)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5337 krseqs.append(seq)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5338 if len(krnames) > 0:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5339 writefasta(krnames,krseqs,kranalysisoutputfolder + "krseqs.fasta")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5340 if sys.platform == ('win32'):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5341 kranalysiscommand = "kr_analysis ../" + kranalysisoutputfolder + "krseqs.fasta ../" + kranalysisoutputfolder + "krpredoutput.txt"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5342 if sys.platform == ('linux2'):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5343 kranalysiscommand = "python kr_analysis.py ../" + kranalysisoutputfolder + "krseqs.fasta ../" + kranalysisoutputfolder + "krpredoutput.txt"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5344 os.chdir("kr_analysis/")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5345 os.system(kranalysiscommand)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5346 os.chdir("..")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5347
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5348 #Read and parse all substrate specificity prediction output files
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5349 minowa_nrps_preds = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5350 minowa_nrps_preds_details = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5351 nrps_svm_preds = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5352 nrps_svm_preds_details = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5353 nrps_code_preds = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5354 nrps_code_preds_details = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5355 substratetransdict2 = {'pipecolate':'pip','fOHOrn':'orn','beta-Lys':'blys','5NhOrn':'orn','OHOrn':'orn','Aad':'Aaa','bOHTyr':'bht'}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5356 if len(nrpsnames) > 0:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5357 minowa_a_file = open(minowanrpsoutputfolder + "nrpspredoutput.txt","r")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5358 minowa_a_file = minowa_a_file.read()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5359 minowa_a_file = minowa_a_file.replace("\r","\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5360 parts = minowa_a_file.split("\\\\\n")[1:]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5361 for i in parts:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5362 partlines = i.split("\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5363 acc = partlines[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5364 tophit = partlines[2].split("\t")[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5365 if tophit in substratetransdict2.keys():
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5366 tophit = substratetransdict2[tophit]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5367 minowa_nrps_preds[acc] = tophit.lower()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5368 minowa_nrps_preds_details[acc] = "<b>Minowa HMM method A-domain<br>Substrate specificity prediction top hits:</b><br>\n" + partlines[1] + "<br>\n" + partlines[2] + "<br>\n" + partlines[3] + "<br>\n" + partlines[4] + "<br><br>\n\n"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5369 nrpspredictorfile1 = open(nrpspredictoroutputfolder + "nrpspredictor2.out","r")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5370 nrpspredictorfile2 = open(nrpspredictoroutputfolder + "nrpscodes.txt","r")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5371 nrpspredictorfile1 = nrpspredictorfile1.read()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5372 nrpspredictorfile1 = nrpspredictorfile1.replace("\r","\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5373 lines = nrpspredictorfile1.split("\n")[1:-1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5374 for k in lines:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5375 tabs = k.split("\t")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5376 nrps_svm_preds[tabs[0]] = tabs[6]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5377 nrps_svm_preds_details[tabs[0]] = "<b> NRPSPredictor2 SVM prediction details:</b><br>\n8 Angstrom 34 AA code:<br>\n" + tabs[1] + "<br>\nPredicted physicochemical class:<br>\n" + tabs[3] + "<br>\nLarge clusters prediction:<br>\n" + tabs[4] + "<br>\nSmall clusters prediction:<br>\n" + tabs[5] + "<br>\nSingle AA prediction:<br>\n" + tabs[6] + "<br><br>\n\n"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5378 nrpspredictorfile2 = nrpspredictorfile2.read()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5379 nrpspredictorfile2 = nrpspredictorfile2.replace("\r","\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5380 lines = nrpspredictorfile2.split("\n")[:-1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5381 for k in lines:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5382 tabs = k.split("\t")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5383 nrps_code_preds[tabs[0]] = tabs[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5384 nrps_code_preds_details[tabs[0]] = "<b> NRPSPredictor2 Stachelhaus code prediction:</b><br>\n" + tabs[1] + "<br><br>\n\n"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5385 minowa_pks_preds_details = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5386 minowa_pks_preds = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5387 pks_code_preds ={}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5388 pks_code_preds_details ={}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5389 substratetransdict = {'Malonyl-CoA':'mal','Methylmalonyl-CoA':'mmal','Methoxymalonyl-CoA':'mxmal','Ethylmalonyl-CoA':'emal','Isobutyryl-CoA':'isobut','2-Methylbutyryl-CoA':'2metbut','trans-1,2-CPDA':'trans-1,2-CPDA','Acetyl-CoA':'Acetyl-CoA','Benzoyl-_CoA':'benz','Propionyl-CoA':'prop','3-Methylbutyryl-CoA':'3metbut','Ethylmalonyl-CoA':'Ethyl_mal','CE-Malonyl-CoA':'cemal','2-Rhyd-Malonyl-CoA':'2Rhydmal','CHC-CoA':'CHC-CoA','inactive':'inactive'}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5390 if len(pksnames) > 0:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5391 minowa_at_file = open(minowapksoutputfolder + "pkspredoutput.txt","r")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5392 minowa_at_file = minowa_at_file.read()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5393 minowa_at_file = minowa_at_file.replace("\r","\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5394 parts = minowa_at_file.split("\\\\\n")[1:]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5395 for i in parts:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5396 partlines = i.split("\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5397 acc = partlines[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5398 if substratetransdict.has_key(partlines[2].split("\t")[0]):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5399 tophit = substratetransdict[partlines[2].split("\t")[0]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5400 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5401 tophit = "pk"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5402 minowa_pks_preds[acc] = tophit
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5403 minowa_pks_preds_details[acc] = "<b>Minowa HMM method AT-domain<br>Substrate specificity prediction top hits:</b><br>\n" + partlines[1] + "<br>\n" + partlines[2] + "<br>\n" + partlines[3] + "<br>\n" + partlines[4] + "<br><br>\n\n"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5404 pkssignaturefile = open(pkssignatureoutputfolder + "pkspredoutput.txt","r")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5405 pkssignaturefile = pkssignaturefile.read()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5406 pkssignaturefile = pkssignaturefile.replace("\r","\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5407 parts = pkssignaturefile.split("//\n")[1:]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5408 for i in parts:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5409 partlines = i.split("\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5410 partlines2 = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5411 for j in partlines:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5412 if j != "":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5413 partlines2.append(j)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5414 partlines = partlines2
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5415 acc = partlines[0].split("\t")[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5416 if len(partlines) > 2:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5417 tophit = (partlines[1].split("\t")[0]).split("__")[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5418 pks_code_preds[acc] = tophit
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5419 codes = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5420 prots = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5421 scores = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5422 for i in partlines[1:4]:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5423 codes.append(i.split("\t")[0])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5424 prot = i.split("\t")[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5425 prot = prot.replace("_AT"," (AT")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5426 prot = prot.replace("__","): ")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5427 prots.append(prot)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5428 scores.append(i.split("\t")[2])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5429 if len(prots) >= 3:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5430 pks_code_preds_details[acc] = "<b>PKS Active Site Signature method<br>AT-domain substrate specificity prediction top hits:</b><br>\nCode:" + partlines[0].split("\t")[1] + "<br>\n" + codes[0] + " - " + prots[0] + " : (" + scores[0] + "% identity)<br>\n" + codes[1] + " - " + prots[1] + " : (" + scores[1] + "% identity)<br>\n" + codes[2] + " - " + prots[2] + " : (" + scores[2] + "% identity)<br><br>\n\n"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5431 elif len(prots) == 2:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5432 pks_code_preds_details[acc] = "<b>PKS Active Site Signature method<br>AT-domain substrate specificity prediction top hits:</b><br>\nCode:" + partlines[0].split("\t")[1] + "<br>\n" + codes[0] + " - " + prots[0] + " : (" + scores[0] + "% identity)<br>\n" + codes[1] + " - " + prots[1] + " : (" + scores[1] + "% identity)<br><br>\n\n"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5433 elif len(prots) == 1:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5434 pks_code_preds_details[acc] = "<b>PKS Active Site Signature method<br>AT-domain substrate specificity prediction top hits:</b><br>\nCode:" + partlines[0].split("\t")[1] + "<br>\n" + codes[0] + " - " + prots[0] + " : (" + scores[0] + "% identity)<br><br>\n\n"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5435 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5436 pks_code_preds[acc] = "N/A"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5437 pks_code_preds_details[acc] = "<b>PKS Active Site Signature method<br>No AT-domain substrate specificity prediction hits above 40% identity.<br>\n\n"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5438 minowa_cal_preds = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5439 minowa_cal_preds_details = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5440 if len(calnames) > 0:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5441 minowa_cal_file = open(minowacaloutputfolder + "calpredoutput.txt","r")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5442 minowa_cal_file = minowa_cal_file.read()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5443 minowa_cal_file = minowa_cal_file.replace("\r","\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5444 parts = minowa_cal_file.split("\\\\\n")[1:]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5445 for i in parts:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5446 partlines = i.split("\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5447 acc = partlines[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5448 tophit = partlines[2].split("\t")[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5449 minowa_cal_preds[acc] = tophit
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5450 minowa_cal_preds_details[acc] = "<b>Minowa HMM method<br>CAL-domain substrate specificity prediction top hits:</b><br>\n" + partlines[1] + "<br>\n" + partlines[2] + "<br>\n" + partlines[3] + "<br>\n" + partlines[4] + "<br><br>\n\n"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5451 kr_activity_preds = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5452 kr_stereo_preds = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5453 if len(krnames) > 0:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5454 krfile = open(kranalysisoutputfolder + "krpredoutput.txt","r")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5455 krfile = krfile.read()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5456 krfile = krfile.replace("\r","\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5457 krlines = krfile.split("\n")[:-1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5458 for i in krlines:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5459 tabs = i.split("\t")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5460 kr_activity_preds[tabs[0]] = tabs[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5461 kr_stereo_preds[tabs[0]] = tabs[2]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5462
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5463 #Combine substrate specificity predictions into consensus prediction
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5464 consensuspreds = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5465 #available_smiles_parts = ['ALA','ARG','ASN','ASP','CYS','GLN','GLU','GLY','HIS','ILE','LEU','LYS','PHE','PRO','SER','THR','TRP','TYR','VAL','MET','ORN','ala','arg','asn','asp','cys','gln','glu','gly','his','ile','leu','lys','phe','pro','ser','thr','trp','tyr','val','met','orn','Ala','Arg','Asn','Asp','Cys','Gln','Glu','Gly','His','Ile','Leu','Lys','Phe','Pro','Ser','Thr','Trp','Tyr','Val','Met','Orn','MPRO','23DHB','34DHB','2HIVA','PGLY','DAB','BALA','AEO','4MHA','PICO','AAA','DHA','SCY','PIP','BMT','ADDS','mpro','23dhb','34dhb','2hiva','pgly','dab','bala','aeo','4mha','pico','aaa','dha','scy','pip','bmt','adds','Mpro','23Dhb','34Dhb','2Hiva','Pgly','Dab','Bala','Aeo','4Mha','Pico','Aaa','Dha','Scy','Pip','Bmt','Adds','mal','mmal','omal','emal','nrp','pk']
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5466 available_smiles_parts = ['GLY','ALA','VAL','LEU','ILE','MET','PRO','PHE','TRP','SER','THR','ASN','GLN','TYR','CYS','LYS','ARG','HIS','ASP','GLU','MPRO','ORN','PGLY','DAB','BALA','AEO','DHA','PIP','BMT','gly','ala','val','leu','ile','met','pro','phe','trp','ser','thr','asn','gln','tyr','cys','lys','arg','his','asp','glu','aaa','mpro','dhb','2hiva','orn','pgly','dab','bala','aeo','4mha','pico','phg','dha','scy','pip','bmt','adds','aad','abu','hiv','dhpg','bht','3-me-glu','4pPro','ala-b','ala-d','dht','Sal','tcl','lys-b','hpg','hyv-d','iva','vol','mal','mmal','mxmal','emal','nrp','pk','Gly','Ala','Val','Leu','Ile','Met','Pro','Phe','Trp','Ser','Thr','Asn','Gln','Tyr','Cys','Lys','Arg','His','Asp','Glu','Mpro','23Dhb','34Dhb','2Hiva','Orn','Pgly','Dab','Bala','Aeo','4Mha','Pico','Aaa','Dha','Scy','Pip','Bmt','Adds','DHpg','DHB','nrp','pk']
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5467 for i in pksnrpscoregenes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5468 nra = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5469 nrat = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5470 nrcal = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5471 j = domaindict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5472 for k in j:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5473 if k[0] == "PKS_AT":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5474 nrat += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5475 preds = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5476 preds.append(minowa_pks_preds[i + "_AT" + str(nrat)])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5477 preds.append(pks_code_preds[i + "_AT" + str(nrat)])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5478 cpred = "n"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5479 for l in preds:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5480 if preds.count(l) > 1:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5481 if l in available_smiles_parts:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5482 consensuspreds[i + "_AT" + str(nrat)] = l
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5483 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5484 consensuspreds[i + "_AT" + str(nrat)] = "pk"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5485 cpred = "y"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5486 if cpred == "n":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5487 consensuspreds[i + "_AT" + str(nrat)] = "pk"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5488 if k[0] == "AMP-binding" or k[0] == "A-OX":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5489 nra +=1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5490 preds = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5491 preds.append(minowa_nrps_preds[i + "_A" + str(nra)])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5492 preds.append(nrps_svm_preds[i + "_A" + str(nra)])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5493 preds.append(nrps_code_preds[i + "_A" + str(nra)])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5494 cpred = "n"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5495 for l in preds:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5496 if preds.count(l) > 1:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5497 if l in available_smiles_parts:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5498 consensuspreds[i + "_A" + str(nra)] = l
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5499 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5500 consensuspreds[i + "_A" + str(nra)] = "nrp"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5501 cpred = "y"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5502 if cpred == "n":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5503 consensuspreds[i + "_A" + str(nra)] = "nrp"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5504 if k[0] == "CAL_domain":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5505 nrcal += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5506 if minowa_cal_preds[i + "_CAL" + str(nrcal)] in available_smiles_parts:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5507 consensuspreds[i + "_CAL" + str(nrcal)] = minowa_cal_preds[i + "_CAL" + str(nrcal)]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5508 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5509 consensuspreds[i + "_CAL" + str(nrcal)] = "pk"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5510
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5511 #Write all prediction details to HTML files for each gene to be used as pop-up window
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5512 domainnamesdict = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5513 for i in pksnrpscoregenes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5514 j = domaindict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5515 domainnames = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5516 for k in j:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5517 domainnames.append(k[0])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5518 domainnamesdict[i] = domainnames
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5519 for i in pksnrpscoregenes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5520 if "PKS_AT" in domainnamesdict[i] or "AMP-binding" in domainnamesdict[i] or "A-OX" in domainnamesdict[i] or "CAL_domain" in domainnamesdict[i]:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5521 j = domaindict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5522 nrat = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5523 nra = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5524 nrcal = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5525 nrkr = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5526 for k in j:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5527 if k[0] == "PKS_AT":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5528 nrat += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5529 domainname = i + "_AT" + str(nrat)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5530 htmloutfile = open(substrspecsfolder + domainname + ".html","w")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5531 htmloutfile.write('<html>\n<head>\n<title>Prediction details</title>\n<STYLE type="text/css">\nbody{\n text-align:left;\n background-color:white;\n font-family: Tahoma, sans-serif;\n font-size: 0.8em;\n color: #810E15;\n}\n</STYLE>\n</head>\n<body>')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5532 htmloutfile.write(minowa_pks_preds_details[domainname])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5533 htmloutfile.write(pks_code_preds_details[domainname])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5534 htmloutfile.write("<b><i>Consensus Predictions: " + consensuspreds[domainname] + "</b></i>")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5535 htmloutfile.write('\n</body>\n</html>')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5536 htmloutfile.close()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5537 if k[0] == "AMP-binding" or k[0] == "A-OX":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5538 nra += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5539 domainname = i + "_A" + str(nra)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5540 htmloutfile = open(substrspecsfolder + domainname + ".html","w")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5541 htmloutfile.write('<html>\n<head>\n<title>Prediction details</title>\n<STYLE type="text/css">\nbody{\n text-align:left;\n background-color:white;\n font-family: Tahoma, sans-serif;\n font-size: 0.8em;\n color: #810E15;\n}\n</STYLE>\n</head>\n<body>')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5542 htmloutfile.write(nrps_svm_preds_details[domainname])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5543 htmloutfile.write(nrps_code_preds_details[domainname])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5544 htmloutfile.write(minowa_nrps_preds_details[domainname])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5545 htmloutfile.write("<b><i>Consensus Prediction: '" + consensuspreds[domainname] + "'</b></i>")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5546 htmloutfile.write('\n</body>\n</html>')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5547 htmloutfile.close()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5548 if k[0] == "CAL_domain":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5549 nrcal += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5550 domainname = i + "_CAL" + str(nrcal)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5551 htmloutfile = open(substrspecsfolder + domainname + ".html","w")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5552 htmloutfile.write('<html>\n<head>\n<title>Prediction details</title>\n<STYLE type="text/css">\nbody{\n text-align:left;\n background-color:white;\n font-family: Tahoma, sans-serif;\n font-size: 0.8em;\n color: #810E15;\n}\n</STYLE>\n</head>\n<body>')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5553 htmloutfile.write(minowa_cal_preds_details[domainname])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5554 htmloutfile.write('\n</body>\n</html>')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5555 htmloutfile.close()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5556
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5557 elapsed = (time.time() - starttime)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5558 #print "5541Time since start: " + str(elapsed)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5559 #Predict biosynthetic gene order in gene cluster using starter domains, thioesterase domains, gene order and docking domains
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5560 compound_pred_dict = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5561 dockingdomainanalysis = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5562 nrpspksclusters = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5563 a = 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5564 for i in geneclusters:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5565 genecluster = i
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5566 clustercoregenes = clusterinfo[i][3]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5567 clusterpksnrpsgenes = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5568 for j in clustercoregenes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5569 if j in pksnrpscoregenes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5570 clusterpksnrpsgenes.append(j)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5571 if len(clusterpksnrpsgenes) > 0:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5572 nrpspksclusters.append(genecluster)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5573 pksgenes = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5574 clusterpksgenes = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5575 nrpsgenes = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5576 clusternrpsgenes = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5577 hybridgenes = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5578 clusterhybridgenes = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5579 for j in clusterpksnrpsgenes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5580 k = nrpspkstypedict[j]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5581 if "PKS" in k and "NRPS" not in k:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5582 pksgenes += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5583 clusterpksgenes.append(j)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5584 elif "PKS" not in k and "NRPS" in k:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5585 nrpsgenes += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5586 clusternrpsgenes.append(j)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5587 elif "PKS/NRPS" in k:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5588 if ("PKS_KS" in domainnamesdict[j] or "PKS_AT" in domainnamesdict[j]) and ("AMP-binding" not in domainnamesdict[j] and "A-OX" not in domainnamesdict[j] and "Condensation" not in domainnamesdict[j]):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5589 pksgenes += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5590 clusterpksgenes.append(j)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5591 elif ("PKS_KS" not in domainnamesdict[j] and "PKS_AT" not in domainnamesdict[j]) and ("AMP-binding" in domainnamesdict[j] or "A-OX" in domainnamesdict[j] or "Condensation" in domainnamesdict[j]):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5592 nrpsgenes += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5593 clusternrpsgenes.append(j)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5594 elif "PKS" in k and "NRPS" in k:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5595 hybridgenes += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5596 clusterhybridgenes.append(j)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5597 #If more than three PKS genes, use dock_dom_analysis if possible to identify order
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5598 dock_dom_analysis = "failed"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5599 if pksgenes > 3 and nrpsgenes == 0 and hybridgenes == 0:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5600 #print "Predicting PKS gene order by docking domain sequence analysis"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5601 logfile.write("Predicting PKS gene order by docking domain sequence analysis")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5602 dockhtmlfile = open(htmlfolder + "docking_analysis" + str(genecluster) + ".html","w")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5603 #Find first and last genes based on starter module and TE / TD
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5604 startergene = ""
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5605 endinggene = ""
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5606 for k in clusterpksgenes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5607 if "Thioesterase" in domainnamesdict[k] or "TD" in domainnamesdict[k]:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5608 if endinggene == "":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5609 endinggene = k
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5610 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5611 endinggene = ""
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5612 if len(domainnamesdict[k]) >=2 and "PKS_AT" == domainnamesdict[k][0] and "ACP" == domainnamesdict[k][1]:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5613 if startergene == "":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5614 startergene = k
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5615 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5616 startergene = ""
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5617 if startergene == "":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5618 for k in clusterpksgenes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5619 if len(domainnamesdict[k]) >=3 and "PKS_KS" == domainnamesdict[k][0] and "PKS_AT" == domainnamesdict[k][1] and "ACP" == domainnamesdict[k][2]:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5620 if startergene == "":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5621 startergene = k
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5622 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5623 startergene = ""
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5624 break
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5625 #Extract N-terminal 50 residues of each non-starting protein, scan for docking domains using hmmsearch, parse output to locate interacting residues
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5626 ntermintresdict = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5627 ntermnames = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5628 ntermseqs = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5629 for k in clusterpksgenes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5630 if k != startergene:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5631 ntermnames.append(k)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5632 seq = seqdict[k]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5633 ntermseqs.append(seq[:50])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5634 ntermfasta = "docking_analysis/input.fasta"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5635 z = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5636 for k in ntermnames:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5637 writefasta([ntermnames[z]],[ntermseqs[z]],ntermfasta)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5638 os.chdir("docking_analysis")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5639 os.system("muscle -profile -quiet -in1 nterm.fasta -in2 input.fasta -out muscle.fasta")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5640 intresidues = extractpositions("nterm.fasta","muscle.fasta",[2,15],"EryAIII_5_6_ref",ntermnames[z])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5641 ntermintresdict[ntermnames[z]] = intresidues
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5642 os.chdir("..")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5643 z += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5644 #Extract C-terminal 100 residues of each non-ending protein, scan for docking domains using hmmsearch, parse output to locate interacting residues
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5645 ctermintresdict = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5646 ctermnames = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5647 ctermseqs = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5648 for k in clusterpksgenes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5649 if k != endinggene:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5650 ctermnames.append(k)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5651 seq = seqdict[k]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5652 ctermseqs.append(seq[-100:])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5653 ctermfasta = "docking_analysis/input.fasta"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5654 z = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5655 for k in ctermnames:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5656 writefasta([ctermnames[z]],[ctermseqs[z]],ctermfasta)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5657 os.chdir("docking_analysis")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5658 os.system("muscle -profile -quiet -in1 cterm.fasta -in2 input.fasta -out muscle.fasta")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5659 intresidues = extractpositions("cterm.fasta","muscle.fasta",[55,64],"EryAII_ref",ctermnames[z])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5660 ctermintresdict[ctermnames[z]] = intresidues
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5661 os.chdir("..")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5662 z += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5663 #If docking domains found in all, check for optimal order using interacting residues
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5664 genes_to_order = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5665 z = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5666 for k in clusterpksgenes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5667 if k == startergene or k == endinggene:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5668 pass
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5669 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5670 genes_to_order.append(k)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5671 z += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5672 possible_orders = list(itertools.permutations(genes_to_order,len(genes_to_order)))
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5673 hydrophobic = ["A","V","I","L","F","W","Y","M"]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5674 positivecharge = ["H","K","R"]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5675 negativecharge = ["D","E"]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5676 other = ["C","G","P","S","T","N","Q","X","U"]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5677 possible_orders_scoredict = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5678 for k in possible_orders:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5679 score = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5680 interactions = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5681 z = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5682 for l in k[:-1]:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5683 interactions.append([l,k[z + 1]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5684 z += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5685 for l in interactions:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5686 res1a = ctermintresdict[l[0]][0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5687 res1b = ntermintresdict[l[1]][0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5688 res2a = ctermintresdict[l[0]][1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5689 res2b = ntermintresdict[l[1]][1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5690 if (res1a in hydrophobic and res1b in hydrophobic) or (res1a in positivecharge and res1b in negativecharge) or (res1a in negativecharge and res1b in positivecharge):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5691 score += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5692 if (res1a in positivecharge and res1b in positivecharge) or (res1a in negativecharge and res1b in negativecharge):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5693 score = score - 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5694 if (res2a in hydrophobic and res2b in hydrophobic) or (res2a in positivecharge and res2b in negativecharge) or (res2a in negativecharge and res2b in positivecharge):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5695 score += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5696 if (res2a in positivecharge and res2b in positivecharge) or (res2a in negativecharge and res2b in negativecharge):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5697 score = score - 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5698 possible_orders_scoredict[k] = score
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5699 ranked_orders = sortdictkeysbyvaluesrev(possible_orders_scoredict)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5700 ranked_orders_part = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5701 ranked_orders2 = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5702 a = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5703 ranked_orders_len = len(ranked_orders) - 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5704 for i in ranked_orders:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5705 if a == 0:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5706 score = possible_orders_scoredict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5707 ranked_orders_part.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5708 elif a == ranked_orders_len:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5709 ranked_orders_part.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5710 ranked_orders2 = ranked_orders2 + ranked_orders_part
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5711 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5712 if possible_orders_scoredict[i] == score:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5713 ranked_orders_part.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5714 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5715 ranked_orders_part.reverse()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5716 ranked_orders2 = ranked_orders2 + ranked_orders_part
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5717 score = possible_orders_scoredict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5718 ranked_orders_part = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5719 ranked_orders_part.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5720 a += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5721 ranked_orders = ranked_orders2[:1000]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5722 geneorders = ranked_orders
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5723 geneorders2 = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5724 for l in geneorders:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5725 geneorder = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5726 if startergene != "":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5727 geneorder.append(startergene)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5728 [ geneorder.append(m) for m in l ]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5729 #for m in l:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5730 # geneorder.append(m)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5731 if endinggene != "":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5732 geneorder.append(endinggene)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5733 geneorders2.append(geneorder)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5734 geneorders = geneorders2
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5735 if len(ranked_orders) == 1000:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5736 dockhtmlfile.write('<html>\n<head>\n<LINK href="style.css" rel="stylesheet" type="text/css">\n</head>\n<body>\nDocking domain analysis. Score for 1000 highest scoring gene orders:<br><br><table border=1>\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5737 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5738 dockhtmlfile.write('<html>\n<head>\n<LINK href="style.css" rel="stylesheet" type="text/css">\n</head>\n<body>\nDocking domain analysis. Scores for all possible gene orders:<br><br><table border=1>\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5739 dockhtmlfile.write('<tr><td><b>Gene order</b></td><td><b>Score</b></td></tr>\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5740 for l in geneorders:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5741 string = "<tr><td>"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5742 for m in l:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5743 string = string + m + ","
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5744 if startergene != "" and endinggene != "":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5745 string = string[:-1] + "</td><td>" + str(possible_orders_scoredict[tuple(l[1:-1])])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5746 elif startergene == "" and endinggene != "":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5747 string = string[:-1] + "</td><td>" + str(possible_orders_scoredict[tuple(l[:-1])])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5748 elif startergene != "" and endinggene == "":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5749 string = string[:-1] + "</td><td>" + str(possible_orders_scoredict[tuple(l[1:])])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5750 elif startergene == "" and endinggene == "":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5751 string = string[:-1] + "</td><td>" + str(possible_orders_scoredict[tuple(l)])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5752 dockhtmlfile.write(string + "</td></tr>\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5753 dockhtmlfile.write('\n</table></body></html>')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5754 dockhtmlfile.close()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5755 #print "Predicting PKS gene order by docking domain sequence analysis succeeded."
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5756 #Write html outfile with docking domain analysis output
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5757 #
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5758 logfile.write("Predicting PKS gene order by docking domain sequence analysis succeeded.")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5759 dockingdomainanalysis.append(genecluster)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5760 #If NRPS genes, mixed NRPS/PKS genes, PKS genes without detected docking domains, or clusters with a 1-3 PKS genes, assume colinearity
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5761 direction = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5762 for k in clusterpksnrpsgenes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5763 if strandsdict[k] == "+":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5764 direction += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5765 elif strandsdict[k] == "-":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5766 direction = direction - 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5767 if direction < 0:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5768 clusterpksnrpsgenes.reverse()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5769 if "Thioesterase" in domainnamesdict[clusterpksnrpsgenes[0]] or "TD" in domainnamesdict[clusterpksnrpsgenes[0]]:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5770 clusterpksnrpsgenes.reverse()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5771 geneorder = clusterpksnrpsgenes
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5772 #Generate substrates order from predicted gene order and consensus predictions
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5773 prediction = ""
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5774 for k in geneorder:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5775 domains = domainnamesdict[k]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5776 nra = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5777 nrat = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5778 nrcal = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5779 for l in domains:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5780 if "PKS_AT" in l:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5781 nrat += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5782 prediction = prediction + consensuspreds[k + "_AT" + str(nrat)] + " "
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5783 if "AMP-binding" in l or "A-OX" in l:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5784 nra += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5785 prediction = prediction + consensuspreds[k + "_A" + str(nra)] + " "
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5786 if "CAL_domain" in l:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5787 nrcal += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5788 prediction = prediction + consensuspreds[k + "_CAL" + str(nrcal)] + " "
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5789 prediction = prediction[:-1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5790 compound_pred_dict[genecluster] = prediction
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5791 a += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5792
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5793 #Combine predictions into a prediction of the final chemical structure and generate images
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5794 os.chdir("NRPeditor")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5795 failedstructures = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5796 for i in geneclusters:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5797 genecluster = i
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5798 if compound_pred_dict.has_key(genecluster):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5799 residues = compound_pred_dict[genecluster]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5800 nrresidues = len(residues.split(" "))
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5801 if nrresidues > 1:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5802 if sys.platform == ('win32'):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5803 structcommand = 'main input 100 4000 1000 AA DDV DIM ' + str(nrresidues + 1) + ' "'
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5804 elif sys.platform == ('linux2'):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5805 structcommand = './main input 100 4000 1000 AA DDV DIM ' + str(nrresidues + 1) + ' "'
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5806 for i in residues.split(" "):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5807 structcommand = structcommand + i + " "
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5808 structcommand = structcommand + 'TE"'
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5809 smilesinfo = os.popen(structcommand)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5810 smilesinfo = smilesinfo.read()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5811 smiles_string = (smilesinfo.split("core peptide: ")[1]).split("\ntermintype")[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5812 if sys.platform == ('linux2'):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5813 smiles_string.replace("[X]","[*:X]")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5814 smiles_string2 = ""
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5815 a = 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5816 for k in smiles_string:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5817 if k == "X":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5818 smiles_string2 = smiles_string2 + str(a)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5819 a += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5820 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5821 smiles_string2 = smiles_string2 + k
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5822 smiles_string = smiles_string2
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5823 smilesfile = open("genecluster" + str(genecluster) + ".smi","w")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5824 smilesfile.write(smiles_string)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5825 smilesfile.close()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5826 depictstatus = depict_smile(genecluster,structuresfolder)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5827 if depictstatus == "failed":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5828 failedstructures.append(genecluster)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5829 elif clusterinfo[genecluster][0] == "ectoine":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5830 smiles_string = "CC1=NCCC(N1)C(=O)O"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5831 smilesfile = open("genecluster" + str(genecluster) + ".smi","w")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5832 smilesfile.write(smiles_string)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5833 smilesfile.close()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5834 depictstatus = depict_smile(genecluster,structuresfolder)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5835 if depictstatus == "failed":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5836 failedstructures.append(genecluster)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5837 elif genecluster in failedstructures:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5838 del failedstructures[failedstructures.index(genecluster)]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5839 compound_pred_dict[genecluster] = "ectoine "
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5840 os.chdir("..")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5841
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5842 elapsed = (time.time() - starttime)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5843 #print "5826 Time since start: " + str(elapsed)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5844 #ClusterBlast
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5845 if clusterblast == "y":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5846 #Load gene cluster database into memory
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5847 #print "ClusterBlast: Loading gene clusters database into memory..."
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5848 logfile.write("ClusterBlast: Loading gene clusters database into memory...\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5849
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5850 os.chdir(genomename + "/clusterblast")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5851 #file = open( os.path.join(antismash_path, "clusterblast/geneclusters.txt") ,"r")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5852 #filetext = file.read()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5853 #lines = filetext.split("\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5854 clusters = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5855 #for i in open(os.path.join(antismash_path, "clusterblast/geneclusters.txt")):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5856 bin_path = os.path.join(antismash_path, "clusterblast/geneclusters.bin")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5857 if os.path.exists( bin_path ):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5858 clusters = cPickle.load( open(bin_path) )
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5859 #print clusters
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5860 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5861 for line in open( os.path.join(antismash_path, "clusterblast/geneclusters.txt") ,"r"):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5862 line = line.strip()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5863 tabs = line.split("\t")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5864 accession = tabs[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5865 clusterdescription = tabs[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5866 clusternr = tabs[2]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5867 clustertype = tabs[3]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5868 clustername = accession + "_" + clusternr
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5869 clustertags = tabs[4].split(";")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5870 clusterprots = tabs[5].split(";")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5871 clusters[clustername] = [clusterprots,clusterdescription,clustertype,clustertags]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5872 cPickle.dump(clusters, open(bin_path, 'w'), -1)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5873 #Load gene cluster database proteins info into memory
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5874 #print "ClusterBlast: Loading gene cluster database proteins into memory..."
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5875 logfile.write("ClusterBlast: Loading gene cluster database proteins into memory...\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5876 #file = open( os.path.join(antismash_path, "clusterblast/geneclusterprots.fasta") ,"r")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5877 #filetext = file.read()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5878 #filetext = filetext.replace("\r","\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5879 #lines = filetext.split("\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5880 proteingeneclusters = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5881 proteinlocations = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5882 proteinstrands = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5883 proteinannotations = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5884 proteintags = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5885 bin_path = os.path.join(antismash_path, "clusterblast/geneclusterprots.fasta.bin")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5886 if os.path.exists( bin_path ):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5887 (proteingeneclusters, proteinlocations, proteinstrands, proteinannotations, proteintags) = cPickle.load( open(bin_path, 'r') )
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5888 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5889 for line in open( os.path.join(antismash_path, "clusterblast/geneclusterprots.fasta") ,"r"):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5890 line = line.replace('\n', '')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5891 if line.startswith(">"):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5892 tabs = line.split("|")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5893 #print 'Protein:', tabs
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5894 protein = tabs[6]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5895 locustag = tabs[4]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5896 if accessiondict.has_key(locustag):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5897 locustag = "h_" + locustag
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5898 proteintags[protein] = locustag
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5899 clustername = tabs[0] + "_" + tabs[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5900 proteingeneclusters[protein] = clustername
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5901 location = tabs[2]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5902 proteinlocations[protein] = location
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5903 strand = tabs[3]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5904 proteinstrands[protein] = strand
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5905 annotation = tabs[5]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5906 proteinannotations[protein] = annotation
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5907 cPickle.dump([proteingeneclusters, proteinlocations, proteinstrands, proteinannotations, proteintags], open(bin_path, 'w'), -1)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5908 #Run BLAST on gene cluster proteins of each cluster on itself to find internal homologs, store groups of homologs - including singles - in a dictionary as a list of lists accordingly
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5909 #print "Finding internal homologs in each gene cluster.."
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5910 logfile.write("Finding internal homologs in each gene cluster..\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5911 internalhomologygroupsdict = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5912 for i in geneclusters:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5913 clusternumber = i
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5914 #Create input fasta files for BLAST search
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5915 queryclusterprotslist = clusterinfo[i][4]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5916 queryclusterprots = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5917 for i in queryclusterprotslist:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5918 queryclusterprots.append(i[4])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5919 queryclusternames = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5920 queryclusterseqs = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5921 for i in queryclusterprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5922 seq = seqdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5923 name = fullnamedict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5924 queryclusterseqs.append(seq)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5925 queryclusternames.append(name)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5926 writefasta(queryclusternames,queryclusterseqs,"internal_input.fasta")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5927 #Run and parse BLAST search
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5928 makeblastdbcommand = "makeblastdb -in internal_input.fasta -out internal_input.fasta -dbtype prot"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5929 blastsearch = "blastp -db internal_input.fasta -query internal_input.fasta -outfmt 6 -max_target_seqs 1000 -evalue 1e-05 -out internal_input.out"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5930 if "--gui" in sys.argv and sys.argv[sys.argv.index("--gui") + 1] == "y":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5931 os.popen(makeblastdbcommand)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5932 os.popen(blastsearch)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5933 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5934 os.system(makeblastdbcommand)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5935 os.system(blastsearch)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5936 #print "5920 makeblastdb finised"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5937 blastoutput = open("internal_input.out","r").read()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5938 minseqcoverage = 25
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5939 minpercidentity = 30
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5940 seqlengths = fastaseqlengths(proteins)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5941 iblastinfo = blastparse(blastoutput,minseqcoverage,minpercidentity,seqlengths,geneclustergenes)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5942 iblastdict = iblastinfo[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5943 iquerylist = iblastinfo[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5944 #find and store internal homologs
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5945 groups = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5946 for j in queryclusternames:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5947 jsplit = j.split("|")[4]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5948 if iblastdict.has_key(j):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5949 hits = iblastdict[j][0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5950 group = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5951 for k in hits:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5952 if k[:2] == "h_":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5953 group.append(k[2:])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5954 elif k.count("|") > 4:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5955 group.append(k.split("|")[4])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5956 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5957 group.append(k)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5958 if jsplit not in group:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5959 group.append( jsplit )
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5960 x = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5961 for l in groups:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5962 for m in group:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5963 if m in l:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5964 del groups[x]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5965 [group.append(n) for n in l if n not in group]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5966 #for n in l:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5967 # if n not in group:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5968 # group.append(n)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5969 break
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5970 x += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5971 group.sort()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5972 groups.append(group)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5973 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5974 groups.append([ jsplit ])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5975 internalhomologygroupsdict[clusternumber] = groups
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5976
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5977 #Run BLAST on gene cluster proteins of each cluster and parse output
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5978 #print "5961 Running NCBI BLAST+ gene cluster searches.."
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5979 logfile.write("Running NCBI BLAST+ gene cluster searches..\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5980 for i in geneclusters:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5981 clusternumber = i
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5982 #print " Gene cluster " + str(clusternumber)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5983 #Create input fasta files for BLAST search
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5984 queryclusterprotslist = clusterinfo[i][4]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5985 queryclusterprots = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5986 for i in queryclusterprotslist:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5987 queryclusterprots.append(i[4])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5988 queryclusternames = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5989 queryclusterseqs = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5990 for i in queryclusterprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5991 seq = seqdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5992 name = fullnamedict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5993 queryclusterseqs.append(seq)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5994 queryclusternames.append(name)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5995 equalpartsizes = int(len(queryclusternames)/nrcpus)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5996 for i in range(nrcpus):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5997 if i == 0:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5998 setnames = queryclusternames[:equalpartsizes]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
5999 setseqs = queryclusterseqs[:equalpartsizes]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6000 elif i == (nrcpus - 1):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6001 setnames = queryclusternames[(i*equalpartsizes):]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6002 setseqs = queryclusterseqs[(i*equalpartsizes):]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6003 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6004 setnames = queryclusternames[(i*equalpartsizes):((i+1)*equalpartsizes)]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6005 setseqs = queryclusterseqs[(i*equalpartsizes):((i+1)*equalpartsizes)]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6006 writefasta(setnames,setseqs,"input" + str(i) + ".fasta")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6007 processes = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6008 processnames = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6009 for i in range(nrcpus):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6010 processes.append(Process(target=runblast, args=["input" + str(i) + ".fasta"]))
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6011 [i.start() for i in processes]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6012 time.sleep(10)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6013 while True:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6014 processrunning = "n"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6015 for i in processes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6016 if i.is_alive():
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6017 processrunning = "y"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6018 if processrunning == "y":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6019 time.sleep(5)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6020 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6021 break
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6022 [i.join() for i in processes]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6023 blastoutput = ""
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6024 for i in range(nrcpus):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6025 output = open("input" + str(i) + ".out","r")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6026 output = output.read()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6027 blastoutput = blastoutput + output
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6028 os.chdir("..")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6029 blastoutputfile = open("./clusterblastoutput.txt","w")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6030 blastoutputfile.write(blastoutput)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6031 blastoutputfile.close()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6032 os.chdir("clusterblast")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6033 #print " Blast search finished. Parsing results..."
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6034 logfile.write(" Blast search finished. Parsing results...\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6035 minseqcoverage = 25
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6036 minpercidentity = 30
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6037 seqlengths = fastaseqlengths(proteins)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6038 blastinfo = blastparse(blastoutput,minseqcoverage,minpercidentity,seqlengths,geneclustergenes)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6039 blastdict = blastinfo[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6040 querylist = blastinfo[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6041 #Remove queries without hits
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6042 querylist2 = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6043 for i in querylist:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6044 if blastdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6045 querylist2.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6046 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6047 pass
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6048 querylist = querylist2
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6049 hitclusters = blastinfo[2]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6050 #Score BLAST output on all gene clusters
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6051 #Rank gene cluster hits based on 1) number of protein hits covering >25% sequence length or at least 100aa alignment, with >30% identity and 2) cumulative blast score
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6052 #Find number of protein hits and cumulative blast score for each gene cluster
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6053 #print " Scoring Blast outputs on database of gene clusters..."
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6054 logfile.write(" Scoring Blast outputs on database of gene clusters...\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6055 hitclusterdict = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6056 hitclusterdata = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6057 for i in hitclusters:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6058 hitclusterdatalist = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6059 nrhits = float(0)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6060 nrcoregenehits = float(0)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6061 cumblastscore = float(0)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6062 hitpositions = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6063 hitposcorelist = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6064 for j in querylist:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6065 querynrhits = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6066 querycumblastscore = float(0)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6067 nrhitsplus = "n"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6068 for k in blastdict[j][0]:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6069 if i == blastdict[j][1][k][0]:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6070 if [querylist.index(j),clusters[i][0].index(blastdict[j][1][k][9])] not in hitpositions:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6071 nrhitsplus = "y"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6072 querynrhits += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6073 blastscore = float(blastdict[j][1][k][6]) / 1000000
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6074 querycumblastscore = querycumblastscore + blastscore
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6075 hitclusterdatalist.append([j,k,blastdict[j][1][k][5],blastdict[j][1][k][6],blastdict[j][1][k][7],blastdict[j][1][k][8]])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6076 hitclusterdata[i] = hitclusterdatalist
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6077 hitpositions.append([querylist.index(j),clusters[i][0].index(blastdict[j][1][k][9])])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6078 if nrhitsplus == "y":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6079 nrhits += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6080 if j.split("|")[4] in allcoregenes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6081 nrcoregenehits += 0.1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6082 for hit in range(querynrhits):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6083 hitposcorelist.append(1)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6084 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6085 for hit in range(querynrhits):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6086 hitposcorelist.append(0)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6087 cumblastscore = cumblastscore + float(querycumblastscore)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6088 query_givenscores_querydict = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6089 query_givenscores_hitdict = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6090 #Find groups of hits
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6091 hitgroupsdict = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6092 for p in hitpositions:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6093 if not hitgroupsdict.has_key(p[0]):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6094 hitgroupsdict[p[0]] = [p[1]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6095 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6096 hitgroupsdict[p[0]].append(p[1])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6097 #Calculate synteny score; give score only if more than one hits (otherwise no synteny possible), and only once for every query gene and every hit gene
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6098 synteny_score = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6099 z = 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6100 if nrhits > 1:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6101 for p in hitpositions[:-1]:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6102 tandem = "n"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6103 #Check if a gene homologous to this gene has already been scored for synteny in the previous entry
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6104 if p[1] in hitgroupsdict[hitpositions[z][0]]:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6105 tandem = "y"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6106 #Score entry
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6107 if ((not query_givenscores_querydict.has_key(p[0])) or query_givenscores_querydict[p[0]] == 0) and ((not query_givenscores_hitdict.has_key(p[1])) or query_givenscores_hitdict[p[1]] == 0) and tandem == "n":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6108 q = hitpositions[z]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6109 if (abs(p[0] - q[0]) < 2) and abs(p[0]-q[0]) == abs(p[1]-q[1]):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6110 synteny_score += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6111 if hitposcorelist[z - 1] == 1 or hitposcorelist[z] == 1:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6112 synteny_score += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6113 query_givenscores_querydict[p[0]] = 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6114 query_givenscores_hitdict[p[1]] = 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6115 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6116 query_givenscores_querydict[p[0]] = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6117 query_givenscores_hitdict[p[1]] = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6118 z += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6119 #Give bonus to gene clusters with >0 core gene hits
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6120 if nrcoregenehits > 0:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6121 corebonus = 3
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6122 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6123 corebonus = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6124 #sorting score is based on number of hits (discrete values) & cumulative blast score (behind comma values)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6125 sortingscore = nrhits + synteny_score + corebonus + nrcoregenehits + cumblastscore
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6126 hitclusterdict[i] = sortingscore
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6127 #Sort gene clusters
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6128 rankedclusters = sortdictkeysbyvaluesrev(hitclusterdict)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6129 rankedclustervalues = sortdictkeysbyvaluesrevv(hitclusterdict)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6130 #Output for each hit: table of genes and locations of input cluster, table of genes and locations of hit cluster, table of hits between the clusters
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6131 #print " Writing output file..."
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6132 logfile.write(" Writing output file...\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6133 #os.chdir("..")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6134 #os.chdir(genomename)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6135 #os.chdir("clusterblast")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6136 out_file = open("cluster" + str(clusternumber) + ".txt","w")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6137 out_file.write("ClusterBlast scores for " + infile)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6138 out_file.write("\n\nTable of genes, locations, strands and annotations of query cluster:\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6139 #out_file.write("\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6140 #out_file.write("Table of genes, locations, strands and annotations of query cluster:")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6141 #out_file.write("\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6142 for i in queryclusterprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6143 out_file.write("%s\t%s\t%s\t%s\t%s\t\n" % (i, proteins[3][i][0], proteins[3][i][1], proteins[3][i][2], proteins[3][i][3]))
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6144 """out_file.write(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6145 out_file.write("\t")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6146 out_file.write(proteins[3][i][0])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6147 out_file.write("\t")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6148 out_file.write(proteins[3][i][1])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6149 out_file.write("\t")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6150 out_file.write(proteins[3][i][2])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6151 out_file.write("\t")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6152 out_file.write(proteins[3][i][3])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6153 out_file.write("\t")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6154 out_file.write("\n")"""
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6155 out_file.write("\n\nSignificant hits: \n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6156 #out_file.write("\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6157 #out_file.write("Significant hits: ")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6158 #out_file.write("\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6159 z = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6160 for i in rankedclusters[:100]:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6161 #out_file.write(str(z+1) + ". " + i + "\t" + clusters[i][1])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6162 #out_file.write("\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6163 out_file.write("%s. %s\t%s\n" % ((z+1), i, clusters[i][1]) )
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6164 z += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6165 out_file.write("\n\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6166 #out_file.write("\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6167 z = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6168 out_file.write("Details:")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6169 for i in rankedclusters[:100]:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6170 value = str(rankedclustervalues[z])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6171 nrhits = value.split(".",1)[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6172 if nrhits > 0:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6173 cumblastscore = str(int(float(value.split(".")[1])))
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6174 out_file.write("\n\n>>\n\n%s. %s\nSource: %s\nType: %s\nNumber of proteins with BLAST hits to this cluster: %s\nCumulative BLAST score: %s\n\nTable of genes, locations, strands and annotations of subject cluster:\n" % (z+1, i, clusters[i][1], clusters[i][2], nrhits, cumblastscore))
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6175 clusterproteins = clusters[i][0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6176 #print 'clusterproteins\n\n', clusterproteins
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6177 """out_file.write("\n\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6178 out_file.write(">>")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6179 out_file.write("\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6180 cumblastscore = str(int(float(value.split(".")[1])))
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6181 out_file.write("\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6182 out_file.write(str(z+1) + ". " + i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6183 out_file.write("\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6184 out_file.write("Source: " + clusters[i][1])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6185 out_file.write("\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6186 out_file.write("Type: " + clusters[i][2])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6187 out_file.write("\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6188 out_file.write("Number of proteins with BLAST hits to this cluster: " + nrhits)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6189 out_file.write("\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6190 out_file.write("Cumulative BLAST score: " + cumblastscore)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6191 out_file.write("\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6192 out_file.write("\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6193 out_file.write("Table of genes, locations, strands and annotations of subject cluster:")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6194 out_file.write("\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6195 clusterproteins = clusters[i][0]"""
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6196
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6197 for j in clusterproteins:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6198 #print '##########asdfasdf######', j, '---'+proteinlocations.keys()[0]+ '---', proteinannotations.has_key(j), proteinstrands.has_key(j), proteinlocations.has_key(j)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6199 if proteinlocations.has_key(j) and proteinannotations.has_key(j) and proteinstrands.has_key(j):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6200 if proteintags[j] == "no_locus_tag":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6201 out_file.write(j)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6202 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6203 out_file.write(proteintags[j])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6204 out_file.write( "\t%s\t%s\t%s\t%s\t%s\n" % (j, proteinlocations[j].split("-")[0], proteinlocations[j].split("-")[1], proteinstrands[j], proteinannotations[j]) )
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6205 """out_file.write("\t")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6206 out_file.write(j)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6207 out_file.write("\t")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6208 out_file.write(proteinlocations[j].split("-")[0])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6209 out_file.write("\t")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6210 out_file.write(proteinlocations[j].split("-")[1])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6211 out_file.write("\t")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6212 out_file.write(proteinstrands[j])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6213 out_file.write("\t")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6214 out_file.write(proteinannotations[j])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6215 out_file.write("\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6216 """
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6217
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6218 out_file.write("\nTable of Blast hits (query gene, subject gene, %identity, blast score, %coverage, e-value):\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6219 if i in hitclusterdata.keys():
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6220 tabledata = hitclusterdata[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6221 for x in tabledata:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6222 w = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6223 for y in x:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6224 if w == 0:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6225 out_file.write( "%s\t" % y.split("|")[4] )
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6226 #out_file.write("\t")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6227 w += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6228 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6229 out_file.write("%s\t" % y)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6230 #out_file.write("\t")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6231 out_file.write("\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6232 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6233 "data not found"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6234 out_file.write("\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6235 out_file.write("\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6236 z += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6237 #os.chdir("..")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6238 #os.chdir("..")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6239 #os.chdir("clusterblast")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6240 os.chdir("..")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6241 out_file.close()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6242
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6243 elapsed = (time.time() - starttime)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6244 #print "Time since start: " + str(elapsed)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6245 #smCOG analysis
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6246 smcogtreedict = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6247 if smcogs == "y":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6248 #print "Performing smCOG analysis"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6249 logfile.write("Performing smCOG analysis\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6250 hmmsearch = hmmscan_path + " --cpu " + str(nrcpus) + " -E 1E-6 -o " + "./smcogs/smcogshmm_output.txt" + " --noali --tblout " + "./smcogs/smcogshmm.txt "+ hmms_path +"smcogs.hmm " + "./clusterblast/geneclusterprots.fasta"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6251 #print hmmsearch
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6252 os.system(hmmsearch)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6253 #print 'finised'
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6254 smcoghmmlengthsdict = hmmlengths(hmms_path+"smcogs.hmm")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6255 smcogdict = hmmscanparse("./smcogs/smcogshmm_output.txt", smcoghmmlengthsdict)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6256 smcogdict2 = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6257 for i in smcogdict.keys():
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6258 newkey = i.split("|")[4]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6259 smcogdict2[newkey] = smcogdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6260 smcogdict = smcogdict2
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6261 #Write output
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6262 #os.chdir(genomename)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6263 os.chdir("smcogs")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6264 smcogfile = open("smcogs.txt","w")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6265 for k in geneclustergenes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6266 if k not in pksnrpscoregenes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6267 l = smcogdict[k]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6268 smcogfile.write(">> " + k + "\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6269 smcogfile.write("name\tstart\tend\te-value\tscore\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6270 smcogfile.write("** smCOG hits **\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6271 for i in l:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6272 smcogfile.write(str(i[0]) + "\t" + str(i[1]) + "\t" + str(i[2]) + "\t" + str(i[3]) + "\t" + str(i[4]) + "\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6273 smcogfile.write("\n\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6274 smcogfile.close()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6275 os.chdir("..")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6276 os.chdir("..")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6277 #smCOG phylogenetic tree construction
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6278 #print "Calculating and drawing phylogenetic trees of cluster genes with smCOG members"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6279 logfile.write("Calculating and drawing phylogenetic trees of cluster genes with smCOG members")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6280 os.chdir("smcogtree")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6281 smcoganalysisgenes = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6282 #for k in geneclustergenes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6283 # if k not in pksnrpscoregenes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6284 # smcoganalysisgenes.append(k)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6285 [smcoganalysisgenes.append(k) for k in geneclustergenes if k not in pksnrpscoregenes]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6286 smcogsets = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6287 equalpartsizes = int(len(smcoganalysisgenes)/nrcpus)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6288 for i in range(nrcpus):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6289 if i == 0:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6290 geneslist = smcoganalysisgenes[:equalpartsizes]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6291 elif i == (nrcpus - 1):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6292 geneslist = smcoganalysisgenes[(i*equalpartsizes):]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6293 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6294 geneslist = smcoganalysisgenes[(i*equalpartsizes):((i+1)*equalpartsizes)]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6295 smcogsets.append(geneslist)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6296 processes = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6297 processnames = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6298 z = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6299 for k in smcogsets:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6300 processes.append(Process(target=smcog_analysis, args=[k,z,accessiondict,seqdict,smcogdict,smcogsoutputfolder]))
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6301 z += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6302 for k in processes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6303 k.start()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6304 time.sleep(1)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6305 while True:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6306 processrunning = "n"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6307 for k in processes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6308 if k.is_alive():
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6309 processrunning = "y"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6310 if processrunning == "y":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6311 time.sleep(5)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6312 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6313 break
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6314 for k in processes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6315 k.join()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6316 os.chdir("..")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6317 currentpath = os.getcwd()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6318 os.chdir(smcogsoutputfolder)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6319 dircontents = getdircontents()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6320 for k in dircontents:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6321 #POTENTIAL pERFORMANCE gainfor k in glob.glob('*.png'):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6322 if ".png" in k:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6323 tag = k.split(".png")[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6324 smcogtreedict[tag] = tag + ".png"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6325 os.chdir(currentpath)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6326
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6327
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6328 ##Visualization
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6329 #Read in ClusterBlast data
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6330 #Read in PubMed / PubChem links of database gene clusters
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6331 if clusterblast == "y":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6332 if genomename in os.getcwd():
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6333 os.chdir('..')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6334 pubmed_dict = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6335 pubchem_dict = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6336 known_compound_dict = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6337 #pubfile = open(antismash_path + "pubmed_pubchem_links.txt","r")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6338 #pubfile = pubfile.read()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6339 #publines = pubfile.split("\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6340 #for i in publines:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6341 bin_path = os.path.join(antismash_path, "pubmed_pubchem_links.bin")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6342 if os.path.exists( bin_path ):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6343 (pubmed_dict, pubchem_dict, known_compound_dict) = cPickle.load( open(bin_path) )
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6344 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6345 for line in open(antismash_path + "pubmed_pubchem_links.txt","r"):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6346 line = line.replace('\n', '')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6347 tabs = line.split("\t")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6348 acc = tabs[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6349 if tabs[1] != "":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6350 pubmed_dict[acc] = tabs[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6351 if tabs[2] != "":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6352 pubchem_dict[acc] = tabs[2]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6353 if tabs[3] != "":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6354 known_compound_dict[acc] = tabs[3]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6355 cPickle.dump([pubmed_dict, pubchem_dict, known_compound_dict], open(bin_path, 'w'), -1)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6356 #print "Writing visualization SVGs and XHTML"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6357 logfile.write("Writing visualization SVGs and XHTML\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6358 queryclusterdata = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6359 nrhitgeneclusters = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6360 cblastclusternr = 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6361 #print os.getcwd()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6362 if clusterblast == "y":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6363 for x in geneclusters:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6364 clusterblastfile = open(clusterblastoutputfolder + "cluster" + str(x) + ".txt","r")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6365 #print clusterblastfile
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6366 clusterblastfile = clusterblastfile.read()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6367 clusterblastfile = clusterblastfile.replace("\r","\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6368 toptenhitclusters = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6369 #Identify top ten hits for visualization
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6370 hitlines = ((clusterblastfile.split("Significant hits: \n")[1]).split("\nDetails:")[0]).split("\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6371 #print '\n\n#######hitlines\n', hitlines
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6372 a = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6373 cb_accessiondict = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6374 b = 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6375 for i in hitlines:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6376 if " " in i:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6377 cb_accessiondict[b] = (i.split("\t")[0]).split(" ")[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6378 if genomic_accnr == "" or genomic_accnr not in i:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6379 b += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6380 if a < 10:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6381 if len(i) < 80:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6382 toptenhitclusters.append(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6383 elif len(i) >= 80:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6384 j = i[0:77] + "..."
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6385 toptenhitclusters.append(j)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6386 a += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6387 #print clusterblastfile
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6388 details = (clusterblastfile.split("\nDetails:")[1]).split(">>")[1:]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6389 #print details
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6390 nrhitclusters = len(toptenhitclusters)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6391 #Save query gene cluster data
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6392 querylines = ((clusterblastfile.split("Table of genes, locations, strands and annotations of query cluster:\n")[1]).split("\n\n\nSignificant hits:")[0]).split("\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6393 queryclustergenes = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6394 queryclustergenesdetails = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6395 for i in querylines:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6396 tabs = i.split("\t")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6397 queryclustergenes.append(tabs[0])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6398 queryclustergenesdetails[tabs[0]] = [tabs[1],tabs[2],tabs[3],tabs[4]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6399 #For every gene cluster, store hit genes and details
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6400 colorgroupsdict = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6401 hitclusterdata = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6402 hitclusternr = 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6403 compound_found = "n"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6404 nrhitgeneclusters[x] = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6405 for i in details:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6406 hitclustergenes = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6407 hitclustergenesdetails = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6408 #Only calculate for first ten hit gene clusters
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6409 if genomic_accnr == "" or genomic_accnr not in i:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6410 if hitclusternr <= 10:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6411 nrhitgeneclusters[x] = hitclusternr
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6412 accession = cb_accessiondict[hitclusternr]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6413 hitclustergeneslines = ((i.split("Table of genes, locations, strands and annotations of subject cluster:\n")[1]).split("\n\nTable of Blast hits ")[0]).split("\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6414 #print '***********\n', i, '\n'
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6415 #print hitclustergeneslines
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6416 for j in hitclustergeneslines:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6417 tabs = j.split("\t")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6418 hitclustergenes.append(tabs[0])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6419 hitclustergenesdetails[tabs[0]] = [tabs[2],tabs[3],tabs[4],tabs[5],tabs[1]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6420
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6421 blasthitslines = ((i.split("%coverage, e-value):\n")[1]).split("\n\n")[0]).split("\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6422 querygeneswithhits = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6423 coregeneswithhits = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6424
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6425
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6426 blasthitdict = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6427 blastdetailsdict = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6428 querygenes = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6429 revblasthitdict = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6430 hitgenes = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6431
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6432
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6433 for k in blasthitslines:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6434 tabs = k.split("\t")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6435 if tabs[0] not in querygeneswithhits:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6436 querygeneswithhits.append(tabs[0])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6437 if tabs[0] in allcoregenes and tabs[0] not in coregeneswithhits:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6438 coregeneswithhits.append(tabs[0])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6439
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6440
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6441 if blasthitdict.has_key(tabs[0]):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6442 hits = blasthitdict[tabs[0]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6443 hits.append(tabs[1])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6444 blasthitdict[tabs[0]] = hits
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6445 if revblasthitdict.has_key(tabs[1]):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6446 revhits = revblasthitdict[tabs[1]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6447 revhits.append(tabs[0])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6448 revblasthitdict[tabs[1]] = revhits
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6449 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6450 revblasthitdict[tabs[1]] = [tabs[0]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6451 blastdetailsdict[tabs[0] + "_|_|_" + tabs[1]] = [tabs[5],tabs[3]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6452 if tabs[0] not in querygenes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6453 querygenes.append(tabs[0])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6454 hitgenes.append(tabs[1])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6455 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6456 blasthitdict[tabs[0]] = [tabs[1]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6457 if revblasthitdict.has_key(tabs[1]):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6458 revhits = revblasthitdict[tabs[1]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6459 revhits.append(tabs[0])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6460 revblasthitdict[tabs[1]] = revhits
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6461 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6462 revblasthitdict[tabs[1]] = [tabs[0]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6463 blastdetailsdict[tabs[0] + "_|_|_" + tabs[1]] = [tabs[5],tabs[3]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6464 if tabs[0] not in querygenes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6465 querygenes.append(tabs[0])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6466 hitgenes.append(tabs[1])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6467
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6468
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6469
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6470 for k in known_compound_dict.keys():
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6471 if k in i and compound_found == "n" and len(querygeneswithhits) > 2 and len(coregeneswithhits) > 0:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6472 ws0.write(x,4,known_compound_dict[k])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6473 compound_found = "y"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6474 """blasthitdict = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6475 blastdetailsdict = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6476 querygenes = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6477 revblasthitdict = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6478 hitgenes = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6479 for i in blasthitslines:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6480 tabs = i.split("\t")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6481 if blasthitdict.has_key(tabs[0]):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6482 hits = blasthitdict[tabs[0]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6483 hits.append(tabs[1])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6484 blasthitdict[tabs[0]] = hits
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6485 if revblasthitdict.has_key(tabs[1]):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6486 revhits = revblasthitdict[tabs[1]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6487 revhits.append(tabs[0])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6488 revblasthitdict[tabs[1]] = revhits
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6489 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6490 revblasthitdict[tabs[1]] = [tabs[0]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6491 blastdetailsdict[tabs[0] + "_|_|_" + tabs[1]] = [tabs[5],tabs[3]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6492 if tabs[0] not in querygenes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6493 querygenes.append(tabs[0])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6494 hitgenes.append(tabs[1])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6495 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6496 blasthitdict[tabs[0]] = [tabs[1]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6497 if revblasthitdict.has_key(tabs[1]):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6498 revhits = revblasthitdict[tabs[1]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6499 revhits.append(tabs[0])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6500 revblasthitdict[tabs[1]] = revhits
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6501 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6502 revblasthitdict[tabs[1]] = [tabs[0]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6503 blastdetailsdict[tabs[0] + "_|_|_" + tabs[1]] = [tabs[5],tabs[3]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6504 if tabs[0] not in querygenes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6505 querygenes.append(tabs[0])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6506 hitgenes.append(tabs[1])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6507 """
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6508 #Make groups of genes for coloring
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6509 colorgroups = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6510 internalgroups = internalhomologygroupsdict[x]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6511 for i in internalgroups:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6512 querygenes_and_hits = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6513 for j in i:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6514 #Make list of query gene and its hits
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6515 additionalhits = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6516 #For each hit, check if it was also hit by another gene; if so, only add it to the group if this hit had the lowest blast score
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6517 otherscores = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6518 queryscore = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6519 if blasthitdict.has_key(j):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6520 for k in blasthitdict[j]:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6521 for l in blastdetailsdict.keys():
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6522 if k in l and j in l:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6523 queryscore = blastdetailsdict[l][1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6524 elif k in l and j not in l:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6525 otherscores.append(blastdetailsdict[l][1])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6526 allscores = otherscores + [queryscore]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6527 if queryscore == max(allscores):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6528 additionalhits.append(k)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6529 #Add additional hits to the querygenes_and_hits list that will form a colorgroup
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6530 querygenes_and_hits = querygenes_and_hits + additionalhits
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6531 if j not in querygenes_and_hits:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6532 querygenes_and_hits.append(j)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6533 if len(querygenes_and_hits) > 0:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6534 colorgroups.append(querygenes_and_hits)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6535 colorgroupsdict[hitclusternr] = colorgroups
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6536 hitclusterdata[hitclusternr] = [colorgroupsdict,hitclustergenes,hitclustergenesdetails,queryclustergenes,queryclustergenesdetails,toptenhitclusters,accession]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6537 hitclusternr += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6538 elif hitclusternr > 10 and hitclusternr <= 50:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6539 blasthitslines = ((i.split("%coverage, e-value):\n")[1]).split("\n\n")[0]).split("\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6540 querygeneswithhits = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6541 coregeneswithhits = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6542 for k in blasthitslines:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6543 tabs = k.split("\t")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6544 if tabs[0] not in querygeneswithhits:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6545 querygeneswithhits.append( tabs[0] )
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6546 if tabs[0] in allcoregenes and tabs[0] not in coregeneswithhits:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6547 coregeneswithhits.append(tabs[0])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6548 for k in known_compound_dict.keys():
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6549 if k in i and compound_found == "n" and len(querygeneswithhits) > 2 and len(coregeneswithhits) > 0:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6550 ws0.write(x,4,known_compound_dict[k])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6551 compound_found = "y"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6552 hitclusternr += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6553 queryclusterdata[cblastclusternr] = [nrhitclusters,hitclusterdata]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6554 cblastclusternr += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6555 wb.save(genomename + "/" + genomename + ".geneclusters.xls")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6556 #Gather and store data on each gene cluster
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6557 gtrcoglist = ['SMCOG1045','SMCOG1062','SMCOG1102']
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6558 transportercoglist = ['SMCOG1000','SMCOG1005','SMCOG1011','SMCOG1020','SMCOG1029','SMCOG1033','SMCOG1035','SMCOG1044','SMCOG1065','SMCOG1067','SMCOG1069','SMCOG1074','SMCOG1085','SMCOG1096','SMCOG1106','SMCOG1118','SMCOG1131','SMCOG1166','SMCOG1169','SMCOG1184','SMCOG1202','SMCOG1205','SMCOG1214','SMCOG1234','SMCOG1243','SMCOG1245','SMCOG1252','SMCOG1254','SMCOG1288']
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6559 qgeneclusterdata = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6560 if smcogs == "y":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6561 smcogdict2 = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6562 smcogdescriptions = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6563 for i in smcogdict.keys():
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6564 if len(smcogdict[i]) > 0 and len(smcogdict[i][0]) > 0 and ":" in smcogdict[i][0][0]:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6565 smcogdict2[i] = (smcogdict[i][0][0]).split(":")[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6566 smcogdescriptions[(smcogdict[i][0][0]).split(":")[0]] = (smcogdict[i][0][0]).split(":")[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6567 elif len(smcogdict[i]) > 0:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6568 smcogdict2[i] = smcogdict[i][0][0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6569 smcogdict = smcogdict2
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6570 for genecluster in geneclusters:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6571 clustergenes = clusterinfo[genecluster][4]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6572 clustergenes2 = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6573 #for i in clustergenes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6574 # clustergenes2.append(i[4])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6575 [clustergenes2.append(i[4]) for i in clustergenes]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6576 clustergenes = clustergenes2
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6577 clusternr = 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6578 clustertype = clusterinfo[genecluster][0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6579 annotations = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6580 colors = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6581 starts = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6582 ends = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6583 strands = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6584 pksnrpsprots = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6585 gtrs = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6586 transporters = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6587 for j in clustergenes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6588 annotations[j] = proteins[3][j][3]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6589 starts.append(int(proteins[3][j][0]))
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6590 ends.append(int(proteins[3][j][1]))
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6591 strands.append(proteins[3][j][2])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6592 if j in allcoregenes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6593 colors.append("#810E15")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6594 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6595 colors.append("grey")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6596 if j in pksnrpscoregenes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6597 pksnrpsprots.append(j)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6598 if smcogs == "y":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6599 if smcogdict.has_key(j) and len(smcogdict[j]) > 0 :
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6600 if smcogdict[j][0] in gtrcoglist:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6601 gtrs.append(j)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6602 if smcogdict[j][0] in transportercoglist:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6603 transporters.append(j)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6604 clustersize = max(ends) - min(starts)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6605 if clusterblast == "n":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6606 nrhitgeneclusters = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6607 for i in geneclusters:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6608 nrhitgeneclusters[i] = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6609 hitgeneclusters = range(1,(nrhitgeneclusters[genecluster] + 1))
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6610 hitgeneclusterdata = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6611 hitgeneclusterdata[genecluster] = [hitgeneclusters]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6612 pksnrpsprotsnames = nrpspkstypedict
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6613 pksnrpsdomains = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6614 domlist = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6615 domsdetails = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6616 substrspecnrpspredictordict = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6617 substrspecminowadict = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6618 substrspecpkssigdict = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6619 substrspecconsensusdict = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6620 krpredictionsdict = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6621 for i in pksnrpsprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6622 domlist = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6623 domsdetails = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6624 doms = domaindict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6625 for j in doms:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6626 nr = 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6627 while j[0] + str(nr) in domlist:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6628 nr += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6629 domname = j[0] + str(nr)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6630 domlist.append(domname)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6631 domsdetails[domname] = [j[1],j[2]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6632 if "AMP-binding" in domname or "A-OX" in domname:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6633 domname2 = i + "_" + "A" + str(nr)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6634 substrspecminowadict[domname2] = minowa_nrps_preds[i + "_A" + str(nr)]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6635 substrspecnrpspredictordict[domname2] = [nrps_code_preds[i + "_A" + str(nr)],nrps_svm_preds[i + "_A" + str(nr)]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6636 substrspecconsensusdict[domname2] = consensuspreds[i + "_A" + str(nr)]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6637 if "PKS_AT" in domname:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6638 domname2 = i + "_" + "AT" + str(nr)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6639 substrspecminowadict[domname2] = minowa_pks_preds[i + "_AT" + str(nr)]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6640 substrspecpkssigdict[domname2] = pks_code_preds[i + "_AT" + str(nr)]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6641 substrspecconsensusdict[domname2] = consensuspreds[i + "_AT" + str(nr)]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6642 if "CAL_domain" in domname:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6643 domname2 = i + "_" + "CAL" + str(nr)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6644 substrspecminowadict[domname2] = minowa_cal_preds[i + "_CAL" + str(nr)]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6645 substrspecconsensusdict[domname2] = consensuspreds[i + "_CAL" + str(nr)]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6646 if "CAL_domain" in domname:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6647 domname2 = i + "_" + "CAL" + str(nr)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6648 substrspecminowadict[domname2] = minowa_cal_preds[i + "_CAL" + str(nr)]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6649 substrspecconsensusdict[domname2] = consensuspreds[i + "_CAL" + str(nr)]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6650 if "PKS_KR" in domname:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6651 domname2 = i + "_" + "KR" + str(nr)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6652 krpredictionsdict[domname2] = [kr_activity_preds[i + "_KR" + str(nr)],kr_stereo_preds[i + "_KR" + str(nr)]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6653 pksnrpsdomains[i] = [domlist,domsdetails]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6654 if compound_pred_dict.has_key(genecluster):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6655 structpred = compound_pred_dict[genecluster]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6656 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6657 structpred = "N/A"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6658 qgeneclusterdata[genecluster] = [clustertype,clustersize,clustergenes,annotations,starts,ends,strands,pksnrpsprots,pksnrpsprotsnames,pksnrpsdomains,substrspecnrpspredictordict,substrspecminowadict,substrspecpkssigdict,substrspecconsensusdict,gtrs,transporters,colors,hitgeneclusterdata,structpred,krpredictionsdict]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6659
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6660 #Create genecluster svg for each gene cluster
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6661 geneposdict = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6662 for qclusternr in geneclusters:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6663 data = qgeneclusterdata[qclusternr]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6664 #Some of the below 23 lines may already be internal to script, scan to remove unnecessary data fetching
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6665 clustertype = data[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6666 clustersize = data[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6667 genes = data[2]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6668 annotations = data[3]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6669 starts = data[4]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6670 ends = data[5]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6671 strands = data[6]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6672 pksnrpsprots = data[7]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6673 pksnrpsprotsnames = data[8]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6674 pksnrpsdomains = data[9]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6675 substrspecnrpspredictordict = data[10]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6676 substrspecminowadict = data[11]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6677 substrspecpkssigdict = data[12]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6678 substrspecconsensusdict = data[13]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6679 gtrs = data[14]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6680 transporters = data[15]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6681 colors = data[16]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6682 hitgeneclusterdata = data[17]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6683 structpred = data[18]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6684 krpredictionsdict = data[19]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6685 relpositions = relativepositions(starts,ends,clustersize)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6686 rel_starts = relpositions[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6687 rel_ends = relpositions[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6688 y = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6689 for i in genes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6690 geneposdict[i] = [starts[y],ends[y]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6691 y += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6692 s = geneclustersvg(genes,rel_starts,rel_ends,strands,geneposdict,pksnrpsprots,pksnrpsdomains,qclusternr)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6693 outfile = open(svgfolder + "genecluster" + str(qclusternr) + ".svg","w")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6694 outfile.write(s.getXML())
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6695 outfile.close()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6696 #Create ClusterBlast svg
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6697 if clusterblast == "y":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6698 clusterblastpositiondata = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6699 #Create alignment svg for each pair of hit&query
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6700 for i in geneclusters:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6701 hitclusters = range(queryclusterdata[i][0] + 1)[1:]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6702 #Create svgs for pairwise gene cluster alignment
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6703 colorschemedict,rgbcolorscheme = calculate_colorgroups(i,hitclusters,queryclusterdata,internalhomologygroupsdict)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6704 for k in hitclusters:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6705 cresults = clusterblastresults(i,[k],queryclusterdata,colorschemedict,rgbcolorscheme)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6706 s = cresults[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6707 clusterblastpositiondata[str(i) + "_"+str(k)] = cresults[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6708 outfile = open(svgfolder + "clusterblast" + str(i) + "_" + str(k) + ".svg","w")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6709 outfile.write(s.getXML())
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6710 outfile.close()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6711 #Create svgs for multiple gene cluster alignment
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6712 cresults = clusterblastresults(i,hitclusters,queryclusterdata,colorschemedict,rgbcolorscheme)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6713 s = cresults[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6714 clusterblastpositiondata[str(i) + "_all"] = cresults[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6715 outfile = open(svgfolder + "clusterblast" + str(i) + "_all.svg","w")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6716 outfile.write(s.getXML())
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6717 outfile.close()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6718
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6719 #Create folder for SEARCHGTR HTML files, load search form template
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6720 formtemplate = open("search_form.html","r")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6721 formtemplate = formtemplate.read()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6722 formtemplate = formtemplate.replace("\r","\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6723 formtemplateparts = formtemplate.split("FASTASEQUENCE")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6724 #Create HTML file with gene cluster info in hidden div tags
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6725 htmlfile = open("empty.xhtml","r")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6726 html = htmlfile.read()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6727 html = html.replace("\r","\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6728 htmlparts = html.split("<SPLIT HERE>")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6729 htmloutfile = open(genomename + "/display.xhtml","w")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6730 htmloutfile.write(htmlparts[0])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6731 #Add lines toreload all svgs up front
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6732 for qclusternr in geneclusters:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6733 htmloutfile.write(' loadsvg(' + str(qclusternr) + ');\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6734 if clusterblast == "y":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6735 cblastclusters = [1,2,3,4,5,6,7,8,9,10]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6736 for qclusternr in geneclusters:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6737 nrhitclusters = queryclusterdata[qclusternr][0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6738 for j in range(nrhitclusters):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6739 htmloutfile.write(' loadcblastsvg(' + str(qclusternr) + ',' + str(j+1) + ');\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6740 #For each gene cluster, add hidden div tags for gene names, add hidden div tags for NRPS/PKS domains, add hidden div tags for ClusterBLAST depictions
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6741 htmloutfile.write(htmlparts[1])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6742 for qclusternr in geneclusters:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6743 data = qgeneclusterdata[qclusternr]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6744 pksnrpsprots = data[7]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6745 pksnrpsprotsnames = data[8]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6746 pksnrpsdomains = data[9]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6747 a = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6748 for i in pksnrpsprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6749 for j in pksnrpsdomains[i][0]:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6750 htmloutfile.write(' $("#b' + str(qclusternr) + '_00' + str(a) + '_div").hide();\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6751 a += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6752 htmloutfile.write(htmlparts[2])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6753 #Add top menu
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6754 gifdict = {"t1pks":"16","t2pks":"17","t3pks":"18","t4pks":"20","nrps":"10","amglyccycl":"1","bcin":"2","blactam":"3","butyrolactone":"4","ectoine":"5","terpene":"19","indole":"7","lant":"8","melanin":"9","nucleoside":"12","other":"13","phosphoglycolipid":"14","siderophore":"15"}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6755 htmloutfile.write('<img border="0" align="top" src="images/empty.png" name="img0_" />\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6756 menubutton_nr = 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6757 nrclustercolumns = 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6758 for i in geneclusters:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6759 if qgeneclusterdata[i][0] in gifdict.keys():
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6760 typenr = gifdict[qgeneclusterdata[i][0]]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6761 elif "-" in qgeneclusterdata[i][0]:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6762 typenr = "6"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6763 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6764 typenr = "13"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6765 htmloutfile.write('<a href="javascript:displaycluster(' + str(i) + ')"><img align="top" border="0" src="images/img' + str(i) + '_1.png" name="img' + str(i) + '_" onmouseover="over(' + str(i) + '),over2(0,' + typenr + ')" onmouseout="out(' + str(i) + '),out2(0,' + typenr + ')"/></a>\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6766 if menubutton_nr == 22 or menubutton_nr == 49:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6767 htmloutfile.write('<br/>')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6768 nrclustercolumns += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6769 menubutton_nr += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6770
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6771 #Add gene cluster description
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6772 htmloutfile.write(htmlparts[3])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6773 extrapixelsdict = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6774 for qclusternr in geneclusters:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6775 data = qgeneclusterdata[qclusternr]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6776 clustertype = data[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6777 clustersize = data[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6778 genes = data[2]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6779 annotations = data[3]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6780 starts = data[4]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6781 ends = data[5]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6782 strands = data[6]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6783 pksnrpsprots = data[7]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6784 pksnrpsprotsnames = data[8]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6785 pksnrpsdomains = data[9]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6786 substrspecnrpspredictordict = data[10]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6787 substrspecminowadict = data[11]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6788 substrspecpkssigdict = data[12]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6789 substrspecconsensusdict = data[13]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6790 gtrs = data[14]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6791 transporters = data[15]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6792 colors = data[16]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6793 hitgeneclusterdata = data[17]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6794 structpred = data[18]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6795 krpredictionsdict = data[19]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6796 relpositions = relativepositions(starts,ends,clustersize)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6797 rel_starts = relpositions[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6798 rel_ends = relpositions[1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6799 #Create genes overview pop-up HTMLs
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6800 genepopupoutfile = open(htmlfolder + "geneclustergenes" + str(qclusternr) + '.html',"w")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6801 genepopupoutfile.write('<html>\n<head>\n<LINK href="style.css" rel="stylesheet" type="text/css">\n</head>\n<body>\nOverview of gene cluster genes:<br><br><table border=1>\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6802 genepopupoutfile.write('<tr><td><b>Gene</b></td><td><b>Annotation</b></td><td><b>Start position</b></td><td><b>End position</b></td><td><b>Strand</b></td></tr>\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6803 for i in genes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6804 genepopupoutfile.write('<tr><td>' + i + '</td><td>' + annotations[i].replace("_"," ") + '</td><td>' + str(starts[genes.index(i)]) + '</td><td>' + str(ends[genes.index(i)]) + '</td><td>' + strands[genes.index(i)] + '</td></tr>\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6805 genepopupoutfile.write('\n</table><br><br><br>Biosynthetic gene cluster signature gene domains detected: <br><br>\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6806 genepopupoutfile.write('<table border=1><tr><td><b>Gene</b></td><td><b>Detected domains</b></td><td><b>Bit scores</b></td>\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6807 for i in genes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6808 if i in allcoregenes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6809 detected_doms = detecteddomainsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6810 for j in detected_doms:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6811 genepopupoutfile.write('<tr><td>' + i + '</td><td>' + str(j[0]) + '</td><td>' + str(j[1]) + '</td>\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6812 genepopupoutfile.write('\n</table><br><br><br>')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6813 genepopupoutfile.write('\n</body>\n</html>\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6814 genepopupoutfile.close()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6815 #Add gene cluster description on top
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6816 if qclusternr == 1:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6817 htmloutfile.write('<div id="genecluster'+ str(qclusternr) + '">')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6818 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6819 htmloutfile.write('\n\n<div id="genecluster'+ str(qclusternr) + '" style="display:none">')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6820 #Add menu bars 1 & 2
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6821 htmloutfile.write('<div id="bartext1" style="color:#FFFFFF; font-size:1em; position:absolute; z-index:2; top:' + str(113 + nrclustercolumns * 28) + 'px; left:30px;"><b>Gene cluster description</b></div>')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6822 htmloutfile.write('<div id="bartext2" style="color:#FFFFFF; font-size:1em; position:absolute; z-index:2; top:' + str(263 + nrclustercolumns * 28) + 'px; left:30px;"><b>PKS/NRPS domain annotation</b></div>')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6823 htmloutfile.write('<div id="descrbar1" style="position:absolute; z-index:1; top:' + str(110 + nrclustercolumns * 28) + 'px;"><img src="images/bar.png" height="25" width="' + str(int(0.75 * screenwidth)) + '"/></div>\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6824 htmloutfile.write('<div class="help" id="help1" style="position:absolute; z-index:1; top:' + str(112 + nrclustercolumns * 28) + 'px; left:' + str(int(screenwidth * 0.75) - 20) + 'px;"><a href="http://antismash.secondarymetabolites.org/help.html#panel1" target="_blank"><img border="0" src="images/help.png"/></a></div>\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6825 htmloutfile.write('<div id="descrbar2" style="position:absolute; z-index:1; top:' + str(260 + nrclustercolumns * 28) + 'px;"><img src="images/bar.png" height="25" width="' + str(int(0.75 * screenwidth)) + '"/></div>\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6826 htmloutfile.write('<div class="help" id="help2" style="position:absolute; z-index:1; top:' + str(262 + nrclustercolumns * 28) + 'px; left:' + str(int(screenwidth * 0.75) - 20) + 'px;"><a href="http://antismash.secondarymetabolites.org/help.html#panel2" target="_blank"><img border="0" src="images/help.png"/></a></div>\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6827 if screenwidth < 1280:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6828 htmloutfile.write('<div class="clusterdescr" style="font-size:0.7em; position:absolute; top:' + str(125 + nrclustercolumns * 28) + 'px; left:' + str(12) + 'px;">\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6829 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6830 htmloutfile.write('<div class="clusterdescr" style="font-size:0.8em; position:absolute; top:' + str(120 + nrclustercolumns * 28) + 'px; left:' + str(12) + 'px;">\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6831 htmloutfile.write("<br/>Gene Cluster " + str(qclusternr) + ". Type = " + clustertype + ". Location: "+ str(starts[0]) + " - " + str(ends[-1]) + " nt. Click on genes for more information.")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6832 if len(genomic_accnr) > 4:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6833 htmloutfile.write('&nbsp;&nbsp;<a href="http://www.ncbi.nlm.nih.gov/nuccore/' + genomic_accnr + '" target="_blank">GBK</a>')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6834 #Genes overview pop-up.
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6835 if len(clustertype) > 20:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6836 htmloutfile.write('<br/>')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6837 htmloutfile.write('&nbsp;&nbsp;&nbsp;&nbsp;<a href="html/geneclustergenes' + str(qclusternr) + '.html" onclick=\'window.open("html/geneclustergenes' + str(qclusternr) + '.html","popup","width=800,height=800,scrollbars=yes,resizable=yes,toolbar=0,directories=0,location=0,menubar=0,status=0,left=0,top=0"); return false\'>Genes and detection info overview</a>')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6838 htmloutfile.write("</div>\n\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6839 htmloutfile.write('<div id="display' + str(qclusternr) + '">\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6840 if nrclustercolumns > 1:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6841 spacers = nrclustercolumns - 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6842 for i in range(spacers):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6843 htmloutfile.write('<img src="images/spacer.png"/>\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6844 htmloutfile.write('</div>\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6845 #Add gene pop-ups
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6846 a = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6847 for i in genes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6848 htmloutfile.write('<div id="a' + str(qclusternr) + '_00' + str(a) + '_div" class="hidden popup" style="position:absolute; z-index:2; top:' + str(185 + nrclustercolumns * 28) + 'px; left:' + str(int(((rel_starts[a] + rel_ends[a])/2)*0.875)) + 'px;">\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6849 htmloutfile.write(annotations[i].replace("_"," ").replace("&","&amp;") + "\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6850 if smcogs == "y":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6851 if smcogdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6852 smcog = smcogdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6853 htmloutfile.write("<br/>smCOG: " + smcog + " (" + smcogdescriptions[smcog].replace("_"," ").replace("&","&amp;") + ")\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6854 if smcog in gtrcoglist:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6855 formfileloc = searchgtrfolder + i + ".html"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6856 formfile = open(formfileloc,"w")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6857 specificformtemplate = formtemplateparts[0].replace("GlycTr",i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6858 formfile.write(specificformtemplate)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6859 formfile.write(i + "\n" + seqdict[i])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6860 formfile.write(formtemplateparts[1])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6861 formfile.close()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6862 htmloutfile.write("<br/><a href=\"searchgtr/" + i + ".html\" target=\"_blank\"> Run SEARCHGTr on this gene </a>\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6863 if smcog in transportercoglist:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6864 link = "http://blast.jcvi.org/er-blast/index.cgi?project=transporter;program=blastp;sequence=sequence%0A" + seqdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6865 htmloutfile.write("<br/><a href=\"" + link + "\" target=\"_blank\"> TransportDB BLAST on this gene </a>\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6866 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6867 htmloutfile.write("<br/>smCOG: -\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6868 link = "http://blast.ncbi.nlm.nih.gov/Blast.cgi?PAGE=Proteins&amp;PROGRAM=blastp&amp;BLAST_PROGRAMS=blastp&amp;QUERY=" + seqdict[i] + "&amp;LINK_LOC=protein&amp;PAGE_TYPE=BlastSearch"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6869 htmloutfile.write("<br/>Location: " + str(starts[a]) + "-" + str(ends[a]) + "\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6870 htmloutfile.write("<br/><a href=\"" + link + "\" target=\"_blank\"> NCBI BlastP on this gene </a><br/>\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6871 browse_start = starts[a] - 10000
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6872 browse_end = ends[a] + 10000
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6873 if browse_start < 0:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6874 browse_start = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6875 if browse_end > dnaseqlength:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6876 browse_end = dnaseqlength
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6877 if genomic_accnr != "none" and genomic_accnr != "":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6878 htmloutfile.write('<a href="http://www.ncbi.nlm.nih.gov/projects/sviewer/?Db=gene&amp;DbFrom=protein&amp;Cmd=Link&amp;noslider=1&amp;id=' + genomic_accnr + '&amp;from=' + str(browse_start) + '&amp;to=' + str(browse_end) + '" target=\"_blank\">View genomic context</a><br/>\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6879 if smcogs == "y":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6880 if smcogtreedict.has_key(i.rpartition(".")[0]):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6881 htmloutfile.write('<a href="smcogs/' + smcogtreedict[i.rpartition(".")[0]] + '" onclick=\'window.open("smcogs/' + smcogtreedict[i.rpartition(".")[0]] + '","popup","width=1280,height=1500,resizable=yes,scrollbars=yes,toolbar=0,directories=0,location=0,menubar=0,status=0,left=0,top=0"); return false\'>View smCOG seed phylogenetic tree with this gene</a>\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6882 elif smcogtreedict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6883 htmloutfile.write('<a href="smcogs/' + smcogtreedict[i] + '" onclick=\'window.open("smcogs/' + smcogtreedict[i] + '","popup","width=1280,height=1500,resizable=yes,scrollbars=yes,toolbar=0,directories=0,location=0,menubar=0,status=0,left=0,top=0"); return false\'>View smCOG seed phylogenetic tree with this gene</a>\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6884 htmloutfile.write("</div>\n\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6885 htmloutfile.write('<div id="a' + str(qclusternr) + '_00' + str(a) + '_divtext" class="hidden genenames" style="position:absolute; top:' + str(162 + nrclustercolumns * 28) + 'px; left:' + str(float((rel_starts[a]+rel_ends[a])/2)*0.9375) + 'px;">\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6886 htmloutfile.write(i)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6887 htmloutfile.write("</div>\n\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6888 a += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6889 #Early calculation of nr of domains to be able to fit structure prediction information of large NRPSs/PKSs
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6890 pksnrpsdomainnr = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6891 krdomainnr = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6892 adomainnr = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6893 for i in pksnrpsprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6894 doms = pksnrpsdomains[i][0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6895 first = "no"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6896 nra = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6897 nrat = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6898 nrkr = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6899 nrcal = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6900 for j in doms:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6901 if "AMP-binding" in j or "A-OX" in j:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6902 j = "A"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6903 nra += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6904 adomainnr += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6905 z = nra
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6906 if "KR" in j:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6907 j = "KR"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6908 nrkr += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6909 krdomainnr += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6910 z = nrkr
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6911 if "AT" in j and "docking" not in j:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6912 j = "AT"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6913 nrat += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6914 pksnrpsdomainnr += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6915 z = nrat
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6916 if "CAL" in j:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6917 j = "CAL"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6918 nrcal += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6919 pksnrpsdomainnr += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6920 z = nrcal
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6921 pixels = adomainnr * 50 + pksnrpsdomainnr * 40 + krdomainnr * 30 + (len(pksnrpsprots) * 16) + 375
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6922 extrapixels = pixels - (676 + len(pksnrpsprots) * 99)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6923 if extrapixels < 0:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6924 extrapixels = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6925 extrapixelsdict[qclusternr] = extrapixels
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6926 #Add picture of predicted chemical structure
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6927 htmloutfile.write('<div id="verticalbar1" style="position:absolute; left:' + str(int(screenwidth * 0.75) + 12) + 'px; top:' + str(106 + nrclustercolumns * 28) + 'px;"><img src="images/linefill.png" height="' + str(1126 + len(pksnrpsprots) * 99 + extrapixels) + '" width="2"/></div>\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6928 htmloutfile.write('<div id="verticalbar2" style="position:absolute; left:' + str(int(screenwidth * 0.98)) + 'px; top:0px;"><img src="images/linefill.png" height="' + str(1288 + len(pksnrpsprots) * 99 + nrclustercolumns * 28 + extrapixels) + '" width="2"/></div>\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6929 htmloutfile.write('<div id="horizbar1" style="position:absolute; left:0px; top:' + str(92 + nrclustercolumns * 28) + 'px;"><img src="images/linefill.png" height="2" width="' + str(screenwidth * 0.98) + '"/></div>\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6930 htmloutfile.write('<div id="horizbar2" style="position:absolute; left:0px; top:82px;"><img src="images/linefill.png" height="2" width="' + str(screenwidth * 0.98) + '"/></div>\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6931 htmloutfile.write('<div id="horizbar3" style="position:absolute; left:0px; top:' + str(1223 + len(pksnrpsprots) * 99 + nrclustercolumns * 28 + extrapixels) + 'px;"><img src="images/linefill.png" height="2" width="' + str(screenwidth * 0.98) + '"/></div>\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6932 if screenwidth < 1280:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6933 htmloutfile.write('<div id="bartext4" style="color:#FFFFFF; font-size:0.8em; position:absolute; z-index:2; top:' + str(114 + nrclustercolumns * 28) + 'px; left:' + str(int(screenwidth * 0.75) + 30) + 'px;"><b>Predicted core structure</b></div>\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6934 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6935 htmloutfile.write('<div id="bartext4" style="color:#FFFFFF; font-size:1em; position:absolute; z-index:2; top:' + str(113 + nrclustercolumns * 28) + 'px; left:' + str(int(screenwidth * 0.75) + 30) + 'px;"><b>Predicted core structure</b></div>\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6936 htmloutfile.write('<div class="title" style="position:absolute; top:' + str(110 + nrclustercolumns * 28) + 'px; left:' + str(screenwidth * 0.75 + 20) + 'px;">\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6937 htmloutfile.write('<div id="descrbar4" style="right:25px; position:absolute; z-index:1; top:0px; left:0px;"><img src="images/bar.png" height="25" width="' + str(int(0.21 * screenwidth)) + '"/></div>\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6938 htmloutfile.write('<div class="help" id="help4" style="position:absolute; z-index:1; top:2px; left:' + str(int(screenwidth * 0.2) - 20) + 'px;"><a href="http://antismash.secondarymetabolites.org/help.html#sidepanel1" target="_blank"><img border="0" src="images/help.png"/></a></div>\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6939 if qclusternr in failedstructures:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6940 htmloutfile.write('<br/><br/><img src="images/nostructure_icon.png" border="1" width="' + str(int(screenwidth * 0.19)) + '" height="200" />\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6941 elif " " in structpred:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6942 htmloutfile.write('<br/><br/><a href="structures/genecluster' + str(qclusternr) + '.png" onclick=\'window.open("structures/genecluster' + str(qclusternr) + '.png","popup","width=600,height=300,scrollbars=yes,resizable=yes,toolbar=0,directories=0,location=0,menubar=0,status=0,left=0,top=0"); return false\'><img src="structures/genecluster' + str(qclusternr) + '_icon.png" border="1" width="' + str(int(screenwidth * 0.19)) + '" height="200" /></a>\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6943 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6944 htmloutfile.write('<br/><br/><img src="images/nostructure_icon.png" border="1" width="' + str(int(screenwidth * 0.19)) + '" height="200" />\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6945 htmloutfile.write('<div class="clusterdescr" style="font-size:0.8em;">\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6946 htmloutfile.write("Monomers prediction: " + structpred + "<br/>\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6947 if qclusternr in dockingdomainanalysis:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6948 htmloutfile.write('<a href="html/docking_analysis' + str(qclusternr) + '.html" onclick=\'window.open("html/docking_analysis' + str(qclusternr) + '.html","popup","width=600,height=1200,scrollbars=yes,resizable=yes,toolbar=0,directories=0,location=0,menubar=0,status=0,left=0,top=0"); return false\'>Docking domain analysis results.</a><br/>\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6949 nrpsfound = "no"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6950 pksnrpsdomainnr = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6951 adomainnr = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6952 krdomainnr = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6953 for i in pksnrpsprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6954 doms = pksnrpsdomains[i][0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6955 first = "no"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6956 nra = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6957 nrat = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6958 nrkr = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6959 nrcal = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6960 for j in doms:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6961 if "AMP-binding" in j or "A-OX" in j:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6962 j = "A"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6963 nra += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6964 adomainnr += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6965 z = nra
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6966 if "KR" in j:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6967 j = "KR"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6968 nrkr += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6969 krdomainnr += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6970 z = nrkr
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6971 if "AT" in j and "docking" not in j:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6972 j = "AT"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6973 nrat += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6974 pksnrpsdomainnr += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6975 z = nrat
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6976 if "CAL" in j:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6977 j = "CAL"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6978 nrcal += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6979 pksnrpsdomainnr += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6980 z = nrcal
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6981 prediction = "no"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6982 domname = str(i) + "_" + str(j) + str(z)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6983 if domname in substrspecnrpspredictordict.keys():
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6984 nrpsfound = "yes"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6985 prediction = "yes"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6986 if substrspecnrpspredictordict[domname][0] == "nrp":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6987 if first == "no":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6988 first = "yes"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6989 htmloutfile.write(i + ':<br/>')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6990 htmloutfile.write('<font size="1">&nbsp;&nbsp;NRPSPredictor code prediction, '+ str(j) + str(z) + ': ?</font><br/>\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6991 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6992 if first == "no":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6993 first = "yes"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6994 htmloutfile.write(i + ':<br/>')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6995 htmloutfile.write('<font size="1">&nbsp;&nbsp;NRPSPredictor code prediction, '+ str(j) + str(z) + ': ' + substrspecnrpspredictordict[domname][0] + '</font><br/>\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6996 if substrspecnrpspredictordict[domname][1] == "nrp":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6997 if first == "no":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6998 first = "yes"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
6999 htmloutfile.write(i + ':<br/>')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7000 htmloutfile.write('<font size="1">&nbsp;&nbsp;NRPSPredictor SVM prediction, '+ str(j) + str(z) + ': ?</font><br/>\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7001 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7002 if first == "no":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7003 first = "yes"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7004 htmloutfile.write(i + ':<br/>')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7005 htmloutfile.write('<font size="1">&nbsp;&nbsp;NRPSPredictor SVM prediction, '+ str(j) + str(z) + ': ' + substrspecnrpspredictordict[domname][1] + '</font><br/>\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7006 if domname in substrspecminowadict.keys():
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7007 prediction = "yes"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7008 if substrspecminowadict[domname] == "nrp" or substrspecminowadict[domname] == "pk":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7009 if first == "no":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7010 first = "yes"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7011 htmloutfile.write(i + ':<br/>')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7012 htmloutfile.write('<font size="1">&nbsp;&nbsp;Minowa prediction, '+ str(j) + str(z) + ': ?</font><br/>\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7013 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7014 if first == "no":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7015 first = "yes"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7016 htmloutfile.write(i + ':<br/>')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7017 htmloutfile.write('<font size="1">&nbsp;&nbsp;Minowa prediction, '+ str(j) + str(z) + ': ' + substrspecminowadict[domname] + '</font><br/>\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7018 if domname in substrspecpkssigdict.keys():
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7019 prediction = "yes"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7020 if substrspecpkssigdict[domname] == "pk":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7021 if first == "no":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7022 first = "yes"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7023 htmloutfile.write(i + ':<br/>')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7024 htmloutfile.write('<font size="1">&nbsp;&nbsp;PKS code prediction, '+ str(j) + str(z) + ': ?</font><br/>\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7025 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7026 if first == "no":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7027 first = "yes"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7028 htmloutfile.write(i + ':<br/>')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7029 htmloutfile.write('<font size="1">&nbsp;&nbsp;PKS code prediction, '+ str(j) + str(z) + ': ' + substrspecpkssigdict[domname] + '</font><br/>\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7030 if domname in krpredictionsdict.keys():
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7031 if first == "no":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7032 first = "yes"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7033 htmloutfile.write(i + ':<br/>')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7034 htmloutfile.write('<font size="1">&nbsp;&nbsp;KR activity, '+ str(j) + str(z) + ': ' + krpredictionsdict[domname][0] + "</font><br/>\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7035 htmloutfile.write('<font size="1">&nbsp;&nbsp;KR stereochemistry, '+ str(j) + str(z) + ': ' + krpredictionsdict[domname][1] + "</font><br/>\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7036 #Add link to prediction details pop-up
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7037 if prediction == "yes":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7038 htmloutfile.write('<font size="1">&nbsp;&nbsp;&nbsp;&nbsp;<a href="substrspecs/' + domname + '.html" onclick=\'window.open("substrspecs/' + domname + '.html","popup","width=500,height=400,scrollbars=yes,resizable=no,toolbar=0,directories=0,location=0,menubar=0,status=0,left=0,top=0"); return false\'>Prediction details</a></font><br/>\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7039 if nrpsfound == "yes":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7040 htmloutfile.write('<br/><a href="http://bioinfo.lifl.fr/norine/form2.jsp" target="_blank">Perform Norine peptide search</a>')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7041 htmloutfile.write('</div>')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7042 if screenwidth < 1280:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7043 htmloutfile.write('<div id="bartext5" style="color:#FFFFFF; font-size:0.8em; position:absolute; z-index:2; top:' + str(624 + adomainnr * 50 + pksnrpsdomainnr * 40 + krdomainnr * 30 + (len(pksnrpsprots) * 16)) + 'px; left:10px;"><b>File outputs</b></div>\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7044 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7045 htmloutfile.write('<div id="bartext5" style="color:#FFFFFF; font-size:1em; position:absolute; z-index:2; top:' + str(623 + adomainnr * 50 + pksnrpsdomainnr * 40 + krdomainnr * 30 + (len(pksnrpsprots) * 16)) + 'px; left:10px;"><b>Downloadable output files</b></div>\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7046 htmloutfile.write('<div id="descrbar5" style="right:25px; position:absolute; z-index:1; top:' + str(620 + adomainnr * 50 + pksnrpsdomainnr * 40 + krdomainnr * 30 + (len(pksnrpsprots) * 16)) + 'px; left:0px;"><img src="images/bar.png" height="25" width="' + str(int(0.21 * screenwidth)) + '"/></div>\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7047 htmloutfile.write('<div class="help" id="help5" style="position:absolute; z-index:1; top:' + str(622 + adomainnr * 50 + pksnrpsdomainnr * 40 + krdomainnr * 30 + (len(pksnrpsprots) * 16)) + 'px; left:' + str(int(screenwidth * 0.2) - 20) + 'px;"><a href="http://antismash.secondarymetabolites.org/help.html#sidepanel2" target="_blank"><img border="0" src="images/help.png"/></a></div>\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7048 htmloutfile.write('<div class="text" id="outputinfo" style="font-size:0.8em; right:25px; position:absolute; z-index:1; top:' + str(655 + adomainnr * 50 + pksnrpsdomainnr * 40 + krdomainnr * 30 + (len(pksnrpsprots) * 16)) + 'px; left:0px;">')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7049 if fullhmm == "y" or fullblast == "y":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7050 htmloutfile.write('<a href="' + oldgenomename + '.final.embl" target="_blank">Open EMBL summary file</a><br/><br/>')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7051 #htmloutfile.write('<a href="' + genomename + '.final.csv" target="_blank">Download CSV summary file</a><br/><br/>')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7052 if fullhmm == "y":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7053 htmloutfile.write('<a href="' + oldgenomename + '.cluster_prediction.png" onclick=\'window.open("' + oldgenomename + '.cluster_prediction.png","popup","width=1024,height=1400,scrollbars=0,resizable=0,toolbar=0,directories=0,location=0,menubar=0,status=0,left=0,top=0"); return false\'>Sec. met. enriched genome regions</a><br/><br/>')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7054 htmloutfile.write('<a href="' + genomename + '.geneclusters.xls" target="_blank">Open XLS overview table</a><br/><br/>')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7055 htmloutfile.write('</div>')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7056 htmloutfile.write("</div>\n\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7057 #Add descriptions of NRPS/PKS genes
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7058 htmloutfile.write('<div class="title" style="position:absolute; top:' + str(180) + 'px; left:' + str(12) + 'px;">\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7059 htmloutfile.write("</div>\n\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7060 z = 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7061 for i in pksnrpsprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7062 htmloutfile.write('<div class="text" style="position:absolute; top:' + str(228 + 84 * z + nrclustercolumns * 28) + 'px; left:' + str(12) + 'px;">\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7063 htmloutfile.write(i + " (" + pksnrpsprotsnames[i].lower() + ")")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7064 htmloutfile.write("</div>\n\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7065 z += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7066 #Add NRPS/PKS domain pop-ups
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7067 longestprot = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7068 protlengthdict = {}
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7069 for i in pksnrpsprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7070 protlength = (geneposdict[i][1] - geneposdict[i][0]) / 3
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7071 protlengthdict[i] = protlength
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7072 if protlength > longestprot:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7073 longestprot = protlength
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7074 try:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7075 aa2pixelratio = longestprot * 0.75 / screenwidth
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7076 except:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7077 aa2pixelratio = 0.1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7078 a = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7079 z = 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7080 for i in pksnrpsprots:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7081 domainsdict = pksnrpsdomains[i][1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7082 nra = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7083 nrat = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7084 nrkr = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7085 nrcal = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7086 for j in pksnrpsdomains[i][0]:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7087 startpos = domainsdict[j][0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7088 endpos = domainsdict[j][1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7089 htmloutfile.write('<div id="b' + str(qclusternr) + '_00' + str(a) + '_div" class="hidden popup" style="position:absolute; z-index:2; top:' + str(277 + 84 * z + nrclustercolumns * 28) + 'px; left:' + str( ( ( (endpos+startpos) / 2) / aa2pixelratio) * 0.9375 ) + 'px;">\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7090 htmloutfile.write("Domain " + j + " (" + i + ")")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7091 link = "http://blast.ncbi.nlm.nih.gov/Blast.cgi?PAGE=Proteins&amp;PROGRAM=blastp&amp;BLAST_PROGRAMS=blastp&amp;QUERY=" + seqdict[i][startpos:endpos] + "&amp;LINK_LOC=protein&amp;PAGE_TYPE=BlastSearch"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7092 htmloutfile.write("<br/>Location: " + str(startpos) + "-" + str(endpos) + " AA\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7093 domid = i + "_" + j
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7094 if "AMP-binding" in j or "A-OX" in j:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7095 j = "A"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7096 nra += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7097 y = nra
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7098 if "PKS_KR" in j:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7099 j = "KR"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7100 nrkr += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7101 y = nrkr
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7102 if "PKS_AT" in j:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7103 j = "AT"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7104 nrat += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7105 y = nrat
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7106 if "CAL_domain" in j:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7107 j = "CAL"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7108 nrcal += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7109 y = nrcal
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7110 prediction = "no"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7111 domid = str(i) + "_" + str(j) + str(y)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7112 if substrspecnrpspredictordict.has_key(domid) or substrspecminowadict.has_key(domid) or substrspecpkssigdict.has_key(domid):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7113 htmloutfile.write("<br/>Predicted substrate: " + substrspecconsensusdict[domid] + "\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7114 if substrspecnrpspredictordict.has_key(domid):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7115 htmloutfile.write("<br/>-NRPSPredictor code: " + substrspecnrpspredictordict[domid][0] + "\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7116 htmloutfile.write("<br/>-NRPSPredictor SVM: " + substrspecnrpspredictordict[domid][1] + "\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7117 if substrspecminowadict.has_key(domid):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7118 htmloutfile.write("<br/>-Minowa HMM: " + substrspecminowadict[domid] + "\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7119 if substrspecpkssigdict.has_key(domid):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7120 htmloutfile.write("<br/>-PKS code: " + substrspecpkssigdict[domid] + "\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7121 if krpredictionsdict.has_key(domid):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7122 htmloutfile.write("<br/>KR activity: " + krpredictionsdict[domid][0] + "\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7123 htmloutfile.write("<br/>KR stereochemistry: " + krpredictionsdict[domid][1] + "\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7124 htmloutfile.write("<br/><a href=\"" + link + "\" target=\"_blank\"> NCBI BlastP on this domain </a>\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7125 htmloutfile.write("</div>\n\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7126 a += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7127 z += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7128 htmloutfile.write('</div>\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7129
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7130 if clusterblast == "y":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7131 #Write ClusterBlast divs with pictures and description pop-up tags
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7132 htmloutfile.write('<div id="clusterblastview" class="clusterdescr">\n\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7133 #Add menu bar 3
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7134 htmloutfile.write('<div id="bartext3" style="color:#FFFFFF; font-size:1em; position:absolute; z-index:2; top:3px; left:20px;"><b>Homologous gene clusters</b></div>')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7135 htmloutfile.write('<div id="descrbar3" style="position:absolute; z-index:1; top:0px;"><img src="images/bar.png" height="25" width="' + str(int(0.75*screenwidth)) + '"/></div>')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7136 htmloutfile.write('<div class="help" id="help3" style="position:absolute; z-index:1; top:2px; left:' + str(int(screenwidth * 0.75) - 30) + 'px;"><a href="http://antismash.secondarymetabolites.org/help.html#panel3" target="_blank"><img border="0" src="images/help.png"/></a></div>')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7137 for qclusternr in geneclusters:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7138 nrhitclusters = queryclusterdata[qclusternr][0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7139 hitclusterdata = queryclusterdata[qclusternr][1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7140 if qclusternr == 1:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7141 htmloutfile.write('<div id="qcluster' + str(qclusternr) + '">\n<br/><br/>\n<div align="left">\n<form name="clusterform' + str(qclusternr) + '">\n<select name="selection' + str(qclusternr) + '" onchange="javascript:navigate(this);">\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7142 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7143 htmloutfile.write('<div id="qcluster' + str(qclusternr) + '" style="display:none">\n<br/><br/>\n<div align="left">\n<form name="clusterform' + str(qclusternr) + '">\n<select name="selection' + str(qclusternr) + '" onchange="javascript:navigate(this);">\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7144 htmloutfile.write('<option value="">Select gene cluster alignment</option>\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7145 for i in range(nrhitclusters):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7146 htmloutfile.write('<option value="javascript:displaycblastresults(' + str(qclusternr) + ',' + str(i+1) + ')">' + hitclusterdata[i+1][5][i].replace("&","&amp;") + '</option>\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7147 htmloutfile.write('</select>\n</form>\n\n</div>')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7148 htmloutfile.write('<div style="position:absolute; top:33px; left:' + str(screenwidth*0.625) + 'px;"><img src="images/button.gif" name="button' + str(qclusternr) + '" onclick="javascript:displaybutton(' + str(qclusternr) + ');"/></div>')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7149 clustersizes = []
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7150 for i in range(nrhitclusters):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7151 hitclusterdata = queryclusterdata[qclusternr][1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7152 queryclustergenes = hitclusterdata[1][3]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7153 queryclustergenesdetails = hitclusterdata[1][4]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7154 hitclusternumber = i + 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7155 cluster_acc = hitclusterdata[hitclusternumber][6]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7156 hitclustergenes = hitclusterdata[hitclusternumber][1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7157 hitclustergenesdetails = hitclusterdata[hitclusternumber][2]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7158 relpositiondata = clusterblastpositiondata[str(qclusternr) + "_" + str(i+1)]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7159 qrel_starts = relpositiondata[0][0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7160 qrel_ends = relpositiondata[0][1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7161 hrel_starts = relpositiondata[1][hitclusternumber ][0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7162 hrel_ends = relpositiondata[1][hitclusternumber ][1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7163 strandsbalance = relpositiondata[2][hitclusternumber]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7164 if strandsbalance < 0:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7165 hitclustergenes.reverse()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7166 if qclusternr == 1 and (i+1) == 1:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7167 htmloutfile.write('<div id="hitcluster' + str(qclusternr) + '_' + str(i+1) + '">\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7168 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7169 htmloutfile.write('<div id="hitcluster' + str(qclusternr) + '_' + str(i+1) + '" style="display:none">\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7170 #Insert gene cluster descriptions
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7171 cdescription = hitclusterdata[i+1][5][i].replace("&","&amp;").replace("\t"," ").partition(" ")[2].partition(" ")[2].split(", whole")[0].split(", complete")[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7172 if len(nucname) < 80:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7173 qdescription = nucname
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7174 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7175 qdescription = nucname[0:77] + "..."
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7176 htmloutfile.write('<div id="descriptionquery" style="text-align:right; position:absolute; top:70px; right:50px; font-size:10px; font-style:italic">' + qdescription + '</div>\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7177 htmloutfile.write('<div id="description' + str(qclusternr) + '" style="text-align:right; position:absolute; top:137px; right:50px; font-size:10px; font-style:italic">' + cdescription + '</div>\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7178 #Insert pubmed/pubchem links
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7179 htmloutfile.write('<div id="pub_pics" style="position:absolute; top:60px; left:' + str(int(screenwidth * 0.0)) + 'px; font-size:10px"> Hit cluster cross-links: \n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7180 htmloutfile.write('&nbsp;&nbsp;<a href="http://www.ncbi.nlm.nih.gov/nuccore/' + cluster_acc.split(".")[0] + '" target="_blank"><img align="bottom" border="0" src="images/genbank.gif"/></a>\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7181 present = "n"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7182 for j in pubmed_dict.keys():
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7183 if j in cluster_acc:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7184 present = "y"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7185 for j in pubchem_dict.keys():
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7186 if j in cluster_acc:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7187 present = "y"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7188 if present == "y":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7189 for j in pubmed_dict.keys():
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7190 if j in cluster_acc:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7191 pubmedstring = pubmed_dict[j]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7192 htmloutfile.write('&nbsp;&nbsp;<a href="http://www.ncbi.nlm.nih.gov/pubmed/' + pubmedstring + '" target="_blank"><img align="bottom" border="0" src="images/pubmed.gif"/></a>\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7193 for j in pubchem_dict.keys():
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7194 if j in cluster_acc:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7195 pubchemstring = pubchem_dict[j]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7196 if "," in pubchemstring:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7197 htmloutfile.write('&nbsp;&nbsp;<a href="http://www.ncbi.nlm.nih.gov/sites/entrez?db=pccompound&amp;term=' + pubchemstring + '" target="_blank"><img align="bottom" border="0" src="images/struct.gif"/></a>\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7198 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7199 htmloutfile.write('&nbsp;&nbsp;<a href="http://pubchem.ncbi.nlm.nih.gov/summary/summary.cgi?cid=' + pubchemstring + '" target="_blank"><img align="bottom" border="0" src="images/struct.gif"/></a>\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7200 htmloutfile.write('</div>\n\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7201 #Create gene pop-ups
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7202 a = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7203 for j in queryclustergenes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7204 j_accession = accessiondict[j]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7205 htmloutfile.write('<div id="q' + str(qclusternr) + "_" + str(hitclusternumber) + "_" + str(a) + '_div" class="hidden popup" style="position:absolute; top:' + str(113) + 'px; left:' + str(int(float(qrel_starts[a])*0.875)) + 'px;">\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7206 htmloutfile.write(queryclustergenesdetails[j][3].replace("_"," ").replace("&","&amp;") + "\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7207 link = "http://blast.ncbi.nlm.nih.gov/Blast.cgi?PAGE=Proteins&amp;PROGRAM=blastp&amp;BLAST_PROGRAMS=blastp&amp;QUERY=" + j_accession + "&amp;LINK_LOC=protein&amp;PAGE_TYPE=BlastSearch"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7208 htmloutfile.write("<br/>Location: " + str(queryclustergenesdetails[j][0]) + "-" + str(queryclustergenesdetails[j][1]) + "\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7209 htmloutfile.write("<br/><a href=\"" + link + "\" target=\"_blank\"> NCBI BlastP on this gene </a>\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7210 htmloutfile.write("</div>\n\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7211 htmloutfile.write('<div id="q' + str(qclusternr) + "_" + str(hitclusternumber) + "_" + str(a) + '_divtext" class="hidden genenames" style="position:absolute; top:' + str(83) + 'px; left:' + str(int(float((float(qrel_starts[a])+float(qrel_ends[a]))/2)*0.9375)) + 'px;">\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7212 htmloutfile.write(j)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7213 htmloutfile.write("</div>\n\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7214 a+= 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7215 a = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7216 for j in hitclustergenes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7217 j_accession = hitclustergenesdetails[j][4]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7218 htmloutfile.write('<div id="h' + str(qclusternr) + "_" + str(hitclusternumber) + "_" + str(a) + '_div" class="hidden popup" style="position:absolute; top:' + str(183) + 'px; left:' + str(int(float(hrel_starts[a])*0.875)) + 'px;">\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7219 htmloutfile.write(hitclustergenesdetails[j][3].replace("_"," ").replace("&","&amp;") + "\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7220 link = "http://blast.ncbi.nlm.nih.gov/Blast.cgi?PAGE=Proteins&amp;PROGRAM=blastp&amp;BLAST_PROGRAMS=blastp&amp;QUERY=" + j_accession + "&amp;LINK_LOC=protein&amp;PAGE_TYPE=BlastSearch"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7221 htmloutfile.write("<br/>Location: " + str(hitclustergenesdetails[j][0]) + "-" + str(hitclustergenesdetails[j][1]) + "\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7222 htmloutfile.write("<br/><a href=\"" + link + "\" target=\"_blank\"> NCBI BlastP on this gene </a>\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7223 htmloutfile.write("</div>\n\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7224 htmloutfile.write('<div id="h' + str(qclusternr) + "_" + str(hitclusternumber) + "_" + str(a) + '_divtext" class="hidden genenames" style="position:absolute; top:' + str(153) + 'px; left:' + str(int(float((float(hrel_starts[a])+float(hrel_ends[a]))/2)*0.9375)) + 'px;">\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7225 htmloutfile.write(j)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7226 htmloutfile.write("</div>\n\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7227 a += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7228 htmloutfile.write('</div>\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7229 #Find new relative positions for display of all gene clusters in one picture
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7230 relpositiondata = clusterblastpositiondata[str(qclusternr) + "_all"]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7231 qrel_starts = relpositiondata[0][0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7232 qrel_ends = relpositiondata[0][1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7233 htmloutfile.write('<div id="hitcluster' + str(qclusternr) + '_all" style="display:none">\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7234 if len(nucname) < 80:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7235 qdescription = nucname
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7236 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7237 qdescription = nucname[0:77] + "..."
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7238 htmloutfile.write('<div id="descriptionquery" style="text-align:right; position:absolute; top:60px; right:50px; font-size:10px; font-style:italic">' + qdescription + '</div>\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7239 for i in range(nrhitclusters):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7240 hitclusterdata = queryclusterdata[qclusternr][1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7241 queryclustergenes = hitclusterdata[1][3]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7242 queryclustergenesdetails = hitclusterdata[1][4]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7243 hitclusternumber = i + 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7244 hrel_starts = relpositiondata[1][hitclusternumber][0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7245 hrel_ends = relpositiondata[1][hitclusternumber][1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7246 cluster_acc = hitclusterdata[hitclusternumber][6]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7247 hitclustergenes = hitclusterdata[hitclusternumber][1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7248 hitclustergenesdetails = hitclusterdata[hitclusternumber][2]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7249 strandsbalance = relpositiondata[2][hitclusternumber]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7250 cdescription = hitclusterdata[i+1][5][i].replace("&","&amp;").replace("\t"," ").partition(" ")[2].partition(" ")[2].split(", whole")[0].split(", complete")[0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7251 htmloutfile.write('<div id="description' + str(qclusternr) + '" style="text-align:right; position:absolute; top:' + str(60 + (57 * hitclusternumber)) + 'px; right:50px; font-size:10px; font-style:italic">' + cdescription + '</div>\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7252 if hitclusternumber == 1:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7253 a = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7254 for j in queryclustergenes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7255 htmloutfile.write('<div id="all_' + str(qclusternr) + "_0_" + str(a) + '_div" class="hidden popup" style="position:absolute; top:' + str(100) + 'px; left:' + str(int(float(qrel_starts[a])*0.875)) + 'px; z-index:2;">\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7256 htmloutfile.write(queryclustergenesdetails[j][3].replace("_"," ").replace("&","&amp;") + "\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7257 link = "http://blast.ncbi.nlm.nih.gov/Blast.cgi?PAGE=Proteins&amp;PROGRAM=blastp&amp;BLAST_PROGRAMS=blastp&amp;QUERY=" + j + "&amp;LINK_LOC=protein&amp;PAGE_TYPE=BlastSearch"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7258 htmloutfile.write("<br/>Location: " + str(queryclustergenesdetails[j][0]) + "-" + str(queryclustergenesdetails[j][1]) + "\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7259 htmloutfile.write("<br/><a href=\"" + link + "\" target=\"_blank\"> NCBI BlastP on this gene </a>\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7260 htmloutfile.write("</div>\n\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7261 htmloutfile.write('<div id="all_' + str(qclusternr) + "_0_" + str(a) + '_divtext" class="hidden genenames" style="position:absolute; top:' + str(75) + 'px; left:' + str(int(float((float(qrel_starts[a])+float(qrel_ends[a]))/2)*0.9375)) + 'px;">\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7262 htmloutfile.write(j)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7263 htmloutfile.write("</div>\n\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7264 a+= 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7265 a = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7266 for j in hitclustergenes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7267 htmloutfile.write('<div id="all_' + str(qclusternr) + "_" + str(hitclusternumber) + "_" + str(a) + '_div" class="hidden popup" style="position:absolute; top:' + str(100 + 57 * hitclusternumber) + 'px; left:' + str(int(float(hrel_starts[a])*0.875)) + 'px; z-index:2;">\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7268 htmloutfile.write(hitclustergenesdetails[j][3].replace("_"," ").replace("&","&amp;") + "\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7269 link = "http://blast.ncbi.nlm.nih.gov/Blast.cgi?PAGE=Proteins&amp;PROGRAM=blastp&amp;BLAST_PROGRAMS=blastp&amp;QUERY=" + j + "&amp;LINK_LOC=protein&amp;PAGE_TYPE=BlastSearch"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7270 htmloutfile.write("<br/>Location: " + str(hitclustergenesdetails[j][0]) + "-" + str(hitclustergenesdetails[j][1]) + "\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7271 htmloutfile.write("<br/><a href=\"" + link + "\" target=\"_blank\"> NCBI BlastP on this gene </a>\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7272 htmloutfile.write("</div>\n\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7273 htmloutfile.write('<div id="all_' + str(qclusternr) + "_" + str(hitclusternumber) + "_" + str(a) + '_divtext" class="hidden genenames" style="position:absolute; top:' + str(75 + 56.75 * hitclusternumber) + 'px; left:' + str(int(float((float(hrel_starts[a])+float(hrel_ends[a]))/2)*0.9375)) + 'px;">\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7274 htmloutfile.write(j)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7275 htmloutfile.write("</div>\n\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7276 a += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7277 htmloutfile.write('</div>\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7278 htmloutfile.write('</div>\n\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7279 if clusterblast == "y":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7280 htmloutfile.write('</div>\n')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7281 for i in geneclusters:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7282 data = qgeneclusterdata[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7283 extrapixels = extrapixelsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7284 pksnrpsprots = data[7]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7285 if i == 1:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7286 htmloutfile.write('<div id="creditsbar' + str(i) + '" class="banner" style="position:absolute; width:' + str(int(0.98 * screenwidth)) +'px; align:\'left\'; height:75; top:' + str(1242 + int(len(pksnrpsprots) * 99) + nrclustercolumns * 28 + extrapixels) + 'px; left:0px; color:#810E15; z-index:-1;">')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7287 else:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7288 htmloutfile.write('<div id="creditsbar' + str(i) + '" class="banner" style="display:none; position:absolute; width:' + str(int(0.98 * screenwidth)) +'px; align:\'left\'; height:75; top:' + str(1242 + int(len(pksnrpsprots) * 99) + nrclustercolumns * 28 + extrapixels) + 'px; left:0px; color:#810E15; z-index:-1;">')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7289 htmloutfile.write('<div style="float:center; font-size:0.9em;">\n<div style="position:absolute; top:0px; left:30px;">\n<img src="images/ruglogo.gif" border="0"/>&nbsp;&nbsp;&nbsp;&nbsp;\n<img src="images/gbblogo.gif" border="0"/>&nbsp;&nbsp;&nbsp;&nbsp;\n<img src="images/tueblogo.gif" border="0"/>&nbsp;&nbsp;&nbsp;&nbsp;\n<img src="images/ucsflogo.gif" border="0"/>&nbsp;&nbsp;&nbsp;&nbsp;\n</div>\n<div style="position:absolute; top:0px; left:600px;">\nantiSMASH: Rapid identification, annotation and analysis of secondary metabolite biosynthesis gene clusters.\n<br/>Marnix H. Medema, Kai Blin, Peter Cimermancic, Victor de Jager, Piotr Zakrzewski, Michael A. Fischbach, Tilmann Weber, Rainer Breitling &amp; Eriko Takano\n<br/><i>Nucleic Acids Research</i> (2011), proposal submitted.\n</div>\n</div>\n</div>')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7290 #Add final part of HTML file
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7291 htmloutfile.write(htmlparts[-1])
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7292 #Copy accessory files for HTML viewing
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7293 if sys.platform == ('win32'):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7294 copycommand1 = "copy/y vis\\* " + genomename + " > nul"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7295 copycommand2 = "copy/y vis\\html\\* " + genomename + "\\html > nul"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7296 copycommand3 = "copy/y vis\\images\\* " + genomename + "\\images > nul"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7297 elif sys.platform == ('linux2'):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7298 copycommand1 = "cp -r vis/* " + genomename + " > /dev/null"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7299 copycommand2 = "true"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7300 copycommand3 = "true"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7301 os.system(copycommand1)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7302 os.system(copycommand2)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7303 os.system(copycommand3)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7304
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7305 #Generate EMBL output
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7306 emblfile = open(genomename + "/embl_lines.txt","w")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7307 for i in geneclustergenes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7308 emblfile.write(i + "\t")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7309 if smcogs == "y":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7310 if smcogdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7311 emblfile.write("smCOG: " + smcogdict[i] + ":" + smcogdescriptions[smcogdict[i]] + "\t")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7312 if nrpspkstypedict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7313 emblfile.write("NRPS/PKS type: " + nrpspkstypedict[i] + "\t")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7314 if domaindict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7315 domains = domaindict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7316 for j in domains:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7317 emblfile.write(j[0] + " (" + str(j[1]) + "-" + str(j[2]) + "); E-value:" + str(j[3]) + "; Bit score: " + str(j[4]) + "\t")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7318 nrat = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7319 for k in minowa_pks_preds.keys():
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7320 if i in k:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7321 nrat += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7322 emblfile.write("AT-domain " + str(nrat) + " Minowa substrate specificity prediction: " + minowa_pks_preds[k] + "\t")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7323 nrat = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7324 for k in pks_code_preds.keys():
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7325 if i in k:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7326 nrat += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7327 emblfile.write("AT-domain " + str(nrat) + " PKS code substrate specificity prediction: " + pks_code_preds[k] + "\t")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7328 nrcal = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7329 for k in minowa_cal_preds.keys():
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7330 if i in k:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7331 nrcal += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7332 emblfile.write("CAL-domain " + str(nrcal) + " Minowa substrate specificity prediction: " + minowa_cal_preds[k] + "\t")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7333 nra = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7334 for k in minowa_nrps_preds.keys():
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7335 if i in k:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7336 nra += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7337 emblfile.write("A-domain " + str(nra) + " Minowa substrate specificity prediction: " + minowa_nrps_preds[k] + "\t")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7338 nra = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7339 for k in nrps_code_preds.keys():
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7340 if i in k:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7341 nra += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7342 emblfile.write("A-domain " + str(nra) + " Stachelhaus code substrate specificity prediction: " + nrps_code_preds[k] + "\t")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7343 nra = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7344 for k in nrps_svm_preds.keys():
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7345 if i in k:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7346 nra += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7347 emblfile.write("A-domain " + str(nra) + " NRPSPredictor2 SVM substrate specificity prediction: " + nrps_svm_preds[k] + "\t")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7348 nrkr = 0
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7349 for k in kr_activity_preds.keys():
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7350 if i in k:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7351 nrkr += 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7352 emblfile.write("KR-domain " + str(nrat) + " activity prediction: " + kr_activity_preds[k] + "\t")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7353 emblfile.write("KR-domain " + str(nrat) + " predicted stereochemistry group: " + kr_stereo_preds[k] + "\t")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7354 if motifdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7355 l = motifdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7356 for m in l:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7357 emblfile.write("Motif " + str(m[0]) + " (" + str(m[1]) + "-" + str(m[2]) + "). E-value: " + str(m[3]) + "; Bit score: " + str(m[4]) + "\t")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7358 emblfile.write("\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7359 emblfile.write("\n\n>>\n\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7360 #enter separate domain entries
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7361 for i in geneclustergenes:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7362 strand = strandsdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7363 startpos = geneposdict[i][0]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7364 endpos = geneposdict[i][1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7365 if domaindict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7366 domains = domaindict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7367 for j in domains:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7368 if strand == "+":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7369 emblfile.write("misc_feature\t" + str(startpos + j[1] * 3) + ".." + str(startpos + j[2] * 3) + "\t" + str(j[0]) + " domain;\tE-value: " + str(j[3]) + "\tBit score: " + str(j[4]) + "\t/colour=2\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7370 elif strand == "-":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7371 emblfile.write("misc_feature\tcomplement(" + str(endpos - j[2] * 3) + ".." + str(endpos - j[1] * 3) + ")\t" + str(j[0]) + "domain;\tE-value: " + str(j[3]) + "Bit score: " + str(j[4]) + "\t/colour=2\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7372 if motifdict.has_key(i):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7373 l = motifdict[i]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7374 for m in l:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7375 if strand == "+":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7376 emblfile.write("misc_feature\t" + str(startpos + m[1] * 3) + ".." + str(startpos + m[2] * 3) + "\t" + str(m[0]) + " motif;\tE-value: " + str(m[3]) + "\tBit score: " + str(m[4]) + "\t/colour=6\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7377 elif strand == "-":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7378 emblfile.write("misc_feature\tcomplement(" + str(endpos - m[2] * 3) + ".." + str(endpos - m[1] * 3) + ")\t" + str(m[0]) + " motif;\tE-value: " + str(m[3]) + "\tBit score: " + str(m[4]) + "\t/colour=6\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7379 emblfile.write("\n\n>>\n\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7380 for i in geneclusters:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7381 cstart = clusterinfo[i][1]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7382 if cstart == 0:
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7383 cstart = 1
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7384 cend = clusterinfo[i][2]
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7385 emblfile.write("misc_feature\t" + str(cstart) + ".." + str(cend) + "\t" + clusterinfo[i][0] + " gene cluster\t/colour=13\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7386 emblfile.close()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7387
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7388 #Close open html file
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7389 htmloutfile.close()
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7390
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7391 #Run whole-genome BLAST / HMM CLUSEAN modules & ClusterFinder
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7392 if sys.platform == ('win32'):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7393 copycommand = "copy " + infile + " " + genomename + ' > nul'
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7394 if sys.platform == ('linux2'):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7395 copycommand = "cp " + infile + " " + genomename
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7396 os.system(copycommand)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7397 os.chdir(genomename)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7398 args = "--cpus %s " % nrcpus
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7399 if fullblast == "n":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7400 args += "--without-blast "
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7401 if fullhmm == "n":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7402 args += "--without-hmmer "
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7403 if fullhmm == "y":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7404 args += '--pfamdbpath %s ' % pfamdbpath
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7405 if fullblast == "y":
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7406 args += '--blastdbpath %s ' % blastdbpath
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7407 logfile.write("Running CLUSEAN pipeline modules.\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7408 if sys.platform == ('win32'):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7409 os.system("python ..\\clusean\\scripts\\runPipeline.py %s" % args)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7410 if sys.platform == ('linux2'):
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7411 os.system( antismash_path + "clusean/scripts/runPipeline.py %s" % args)
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7412 #print antismash_path + "clusean/scripts/runPipeline.py %s" % args
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7413
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7414 os.chdir('..')
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7415
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7416 #Close log file
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7417 logfile.write("antiSMASH successfully finished in " + str(elapsed) + " seconds.\n")
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7418 #print "antiSMASH successfully finished in " + str(elapsed) + " seconds.\n"
6a37d0a4510a initial uploaded
bjoern-gruening
parents:
diff changeset
7419 logfile.close()