comparison shm_csr.py @ 90:6809c63d9161 draft

"planemo upload commit fd64827ff6e63df008f6f50ddb8576ad2b1dbb26"
author rhpvorderman
date Tue, 25 Jan 2022 11:28:29 +0000
parents 729738462297
children 385dea3c6cb5
comparison
equal deleted inserted replaced
89:3c9d4d976c47 90:6809c63d9161
1 import argparse 1 import argparse
2 import logging 2 import logging
3 import sys 3 import sys
4 import os 4 import os
5 import re 5 import typing
6 from typing import Optional
6 7
7 from collections import defaultdict 8 from collections import defaultdict
9
10 REGION_FILTERS = ("leader", "FR1", "CDR1", "FR2", "CDR2")
11
12
13 class Mutation(typing.NamedTuple):
14 """Represent a mutation type as a tuple"""
15 frm: str # 'from' is a reserved python keyword.
16 where: int
17 to: str
18 frmAA: Optional[str] = None
19 whereAA: Optional[int] = None
20 toAA: Optional[str] = None
21 thing: Optional[str] = None # '(---)' or '(+-+)' etc. No idea
22
23 @classmethod
24 def from_string(cls, string: str):
25 # Complete mutation example: a88>g,I30>V(+ - +)
26 # Only nucleotide example: g303>t
27 # Including codon change:
28 # t169>g,Y57>D(- - -); Y57 tat 169-171 [ta 169-170]>D gac
29 # Including codon change (synonumous mutation):
30 # c114>t, Y38; Y38 tac 112-114 [tact 112-115]>Y tat
31 if ',' in string:
32 nucleotide_change, aa_change = string.split(',', maxsplit=1) # type: str, Optional[str]
33 else:
34 nucleotide_change = string
35 aa_change = None
36 frm_part, to = nucleotide_change.split('>', maxsplit=1)
37 frm = frm_part[0]
38 where = int(frm_part[1:])
39
40 if aa_change is None:
41 return cls(frm, where, to)
42
43 aa_change = aa_change.strip()
44 # The part after semicolon indicates the codon change. This part may
45 # not be present.
46 semi_colon_index = aa_change.find(";")
47 if semi_colon_index == -1:
48 codon_change = ""
49 else:
50 codon_change = aa_change[semi_colon_index:]
51 aa_change = aa_change[:semi_colon_index]
52 change_operator_index = aa_change.find(">")
53 if change_operator_index == -1:
54 # Synonymous change
55 frmAA_part = aa_change
56 toAA_part = ""
57 else:
58 frmAA_part, toAA_part = aa_change.split('>', maxsplit=1) # type: str, str
59 frmAA = frmAA_part[0]
60 whereAA = int(frmAA_part[1:])
61 if toAA_part:
62 brace_start = toAA_part.index('(')
63 toAA = toAA_part[:brace_start]
64 thing = toAA_part[brace_start:] + codon_change
65 else:
66 # Synonymous mutation
67 toAA = frmAA
68 thing = codon_change
69 return cls(frm, where, to, frmAA, whereAA, toAA, thing)
70
71
72 class Hotspot(typing.NamedTuple):
73 start: int
74 end: int
75 region: str
76
77 @classmethod
78 def from_string(cls, string):
79 # Example: aa,40-41(FR1)
80 sequence, rest = string.split(',') # type: str, str
81 brace_pos = rest.index('(')
82 numbers = rest[:brace_pos]
83 start, end = numbers.split('-')
84 region = rest[brace_pos + 1:-1] # Remove the braces
85 return cls(int(start), int(end), region)
86
8 87
9 def main(): 88 def main():
10 parser = argparse.ArgumentParser() 89 parser = argparse.ArgumentParser()
11 parser.add_argument("--input", help="The '7_V-REGION-mutation-and-AA-change-table' and '10_V-REGION-mutation-hotspots' merged together, with an added 'best_match' annotation") 90 parser.add_argument("--input", help="The '7_V-REGION-mutation-and-AA-change-table' and '10_V-REGION-mutation-hotspots' merged together, with an added 'best_match' annotation")
12 parser.add_argument("--genes", help="The genes available in the 'best_match' column") 91 parser.add_argument("--genes", help="The genes available in the 'best_match' column")
13 parser.add_argument("--empty_region_filter", help="Where does the sequence start?", choices=['leader', 'FR1', 'CDR1', 'FR2']) 92 parser.add_argument("--empty_region_filter", help="Where does the sequence start?", choices=REGION_FILTERS)
14 parser.add_argument("--output", help="Output file") 93 parser.add_argument("--output", help="Output file")
15 94
16 args = parser.parse_args() 95 args = parser.parse_args()
17 96
18 infile = args.input 97 infile = args.input
21 outfile = args.output 100 outfile = args.output
22 101
23 genedic = dict() 102 genedic = dict()
24 103
25 mutationdic = dict() 104 mutationdic = dict()
26 mutationMatcher = re.compile("^(.)(\d+).(.),?[ ]?(.)?(\d+)?.?(.)?(.?.?.?.?.?)?")
27 mutationMatcher = re.compile("^([actg])(\d+).([actg]),?[ ]?([A-Z])?(\d+)?.?([A-Z])?(.*)?")
28 mutationMatcher = re.compile("^([actg])(\d+).([actg]),?[ ]?([A-Z])?(\d+)?[>]?([A-Z;])?(.*)?")
29 mutationMatcher = re.compile(r"^([nactg])(\d+).([nactg]),?[ ]?([A-Z*])?(\d+)?[>]?([A-Z*;])?(.*)?")
30 NAMatchResult = (None, None, None, None, None, None, '') 105 NAMatchResult = (None, None, None, None, None, None, '')
31 geneMatchers = {gene: re.compile("^" + gene + ".*") for gene in genes}
32 linecount = 0 106 linecount = 0
33 107
34 IDIndex = 0 108 IDIndex = 0
35 best_matchIndex = 0 109 best_matchIndex = 0
36 fr1Index = 0 110 fr1Index = 0
40 fr3Index = 0 114 fr3Index = 0
41 first = True 115 first = True
42 IDlist = [] 116 IDlist = []
43 mutationList = [] 117 mutationList = []
44 mutationListByID = {} 118 mutationListByID = {}
45 cdr1LengthDic = {} 119 cdr1AALengthDic = {}
46 cdr2LengthDic = {} 120 cdr2AALengthDic = {}
47 121
48 fr1LengthDict = {} 122 LengthDic = {}
49 fr2LengthDict = {}
50 fr3LengthDict = {}
51 123
52 cdr1LengthIndex = 0 124 cdr1LengthIndex = 0
53 cdr2LengthIndex = 0 125 cdr2LengthIndex = 0
54
55 fr1SeqIndex = 0
56 fr2SeqIndex = 0
57 fr3SeqIndex = 0
58 126
59 tandem_sum_by_class = defaultdict(int) 127 tandem_sum_by_class = defaultdict(int)
60 expected_tandem_sum_by_class = defaultdict(float) 128 expected_tandem_sum_by_class = defaultdict(float)
61 129
62 with open(infile, 'r') as i: 130 with open(infile, 'r') as i:
68 fr1Index = linesplt.index("FR1.IMGT") 136 fr1Index = linesplt.index("FR1.IMGT")
69 cdr1Index = linesplt.index("CDR1.IMGT") 137 cdr1Index = linesplt.index("CDR1.IMGT")
70 fr2Index = linesplt.index("FR2.IMGT") 138 fr2Index = linesplt.index("FR2.IMGT")
71 cdr2Index = linesplt.index("CDR2.IMGT") 139 cdr2Index = linesplt.index("CDR2.IMGT")
72 fr3Index = linesplt.index("FR3.IMGT") 140 fr3Index = linesplt.index("FR3.IMGT")
73 cdr1LengthIndex = linesplt.index("CDR1.IMGT.length") 141 fr1LengthIndex = linesplt.index("FR1.IMGT.Nb.of.nucleotides")
74 cdr2LengthIndex = linesplt.index("CDR2.IMGT.length") 142 fr2LengthIndex = linesplt.index("FR2.IMGT.Nb.of.nucleotides")
75 fr1SeqIndex = linesplt.index("FR1.IMGT.seq") 143 fr3LengthIndex = linesplt.index("FR3.IMGT.Nb.of.nucleotides")
76 fr2SeqIndex = linesplt.index("FR2.IMGT.seq") 144 cdr1LengthIndex = linesplt.index("CDR1.IMGT.Nb.of.nucleotides")
77 fr3SeqIndex = linesplt.index("FR3.IMGT.seq") 145 cdr2LengthIndex = linesplt.index("CDR2.IMGT.Nb.of.nucleotides")
146 cdr1AALengthIndex = linesplt.index("CDR1.IMGT.length")
147 cdr2AALengthIndex = linesplt.index("CDR2.IMGT.length")
78 first = False 148 first = False
79 continue 149 continue
80 linecount += 1 150 linecount += 1
81 linesplt = line.split("\t") 151 linesplt = line.split("\t")
82 ID = linesplt[IDIndex] 152 ID = linesplt[IDIndex]
83 genedic[ID] = linesplt[best_matchIndex] 153 genedic[ID] = linesplt[best_matchIndex]
84 154
85 mutationdic[ID + "_FR1"] = [] 155 mutationdic[ID + "_FR1"] = []
86 if len(linesplt[fr1Index]) > 5 and empty_region_filter == "leader": 156 if len(linesplt[fr1Index]) > 5 and empty_region_filter == "leader":
87 mutationdic[ID + "_FR1"] = [mutationMatcher.match(x).groups() for x in linesplt[fr1Index].split("|") if x] 157 mutationdic[ID + "_FR1"] = [Mutation.from_string(x) for x in linesplt[fr1Index].split("|") if x]
88 158
89 mutationdic[ID + "_CDR1"] = [] 159 mutationdic[ID + "_CDR1"] = []
90 if len(linesplt[cdr1Index]) > 5 and empty_region_filter in ["leader", "FR1"]: 160 if len(linesplt[cdr1Index]) > 5 and empty_region_filter in ["leader", "FR1"]:
91 mutationdic[ID + "_CDR1"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr1Index].split("|") if x] 161 mutationdic[ID + "_CDR1"] = [Mutation.from_string(x) for x in linesplt[cdr1Index].split("|") if x]
92 162
93 mutationdic[ID + "_FR2"] = [] 163 mutationdic[ID + "_FR2"] = []
94 if len(linesplt[fr2Index]) > 5 and empty_region_filter in ["leader", "FR1", "CDR1"]: 164 if len(linesplt[fr2Index]) > 5 and empty_region_filter in ["leader", "FR1", "CDR1"]:
95 mutationdic[ID + "_FR2"] = [mutationMatcher.match(x).groups() for x in linesplt[fr2Index].split("|") if x] 165 mutationdic[ID + "_FR2"] = [Mutation.from_string(x) for x in linesplt[fr2Index].split("|") if x]
96 166
97 mutationdic[ID + "_CDR2"] = [] 167 mutationdic[ID + "_CDR2"] = []
98 if len(linesplt[cdr2Index]) > 5: 168 if len(linesplt[cdr2Index]) > 5:
99 mutationdic[ID + "_CDR2"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr2Index].split("|") if x] 169 mutationdic[ID + "_CDR2"] = [Mutation.from_string(x) for x in linesplt[cdr2Index].split("|") if x]
100 170
101 mutationdic[ID + "_FR2-CDR2"] = mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] 171 mutationdic[ID + "_FR2-CDR2"] = mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"]
102 172
103 mutationdic[ID + "_FR3"] = [] 173 mutationdic[ID + "_FR3"] = []
104 if len(linesplt[fr3Index]) > 5: 174 if len(linesplt[fr3Index]) > 5:
105 mutationdic[ID + "_FR3"] = [mutationMatcher.match(x).groups() for x in linesplt[fr3Index].split("|") if x] 175 mutationdic[ID + "_FR3"] = [Mutation.from_string(x) for x in linesplt[fr3Index].split("|") if x]
106 176
107 mutationList += mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"] 177 mutationList += mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"]
108 mutationListByID[ID] = mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"] 178 mutationListByID[ID] = mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"]
109 179
110 try: 180 fr1Length = int(linesplt[fr1LengthIndex])
111 cdr1Length = int(linesplt[cdr1LengthIndex]) 181 fr2Length = int(linesplt[fr2LengthIndex])
112 except: 182 fr3Length = int(linesplt[fr3LengthIndex])
113 cdr1Length = 0 183 cdr1Length = int(linesplt[cdr1LengthIndex])
114 184 cdr2Length = int(linesplt[cdr2LengthIndex])
115 try: 185 LengthDic[ID] = (fr1Length, cdr1Length, fr2Length, cdr2Length, fr3Length)
116 cdr2Length = int(linesplt[cdr2LengthIndex]) 186
117 except: 187 cdr1AALengthDic[ID] = int(linesplt[cdr1AALengthIndex])
118 cdr2Length = 0 188 cdr2AALengthDic[ID] = int(linesplt[cdr2AALengthIndex])
119
120 #print linesplt[fr2SeqIndex]
121 fr1Length = len(linesplt[fr1SeqIndex]) if empty_region_filter == "leader" else 0
122 fr2Length = len(linesplt[fr2SeqIndex]) if empty_region_filter in ["leader", "FR1", "CDR1"] else 0
123 fr3Length = len(linesplt[fr3SeqIndex])
124
125 cdr1LengthDic[ID] = cdr1Length
126 cdr2LengthDic[ID] = cdr2Length
127
128 fr1LengthDict[ID] = fr1Length
129 fr2LengthDict[ID] = fr2Length
130 fr3LengthDict[ID] = fr3Length
131 189
132 IDlist += [ID] 190 IDlist += [ID]
133 print("len(mutationdic) =", len(mutationdic)) 191 print("len(mutationdic) =", len(mutationdic))
134 192
135 with open(os.path.join(os.path.dirname(os.path.abspath(infile)), "mutationdict.txt"), 'w') as out_handle: 193 with open(os.path.join(os.path.dirname(os.path.abspath(infile)), "mutationdict.txt"), 'w') as out_handle:
153 mutations_by_id_dic[splt[0]] = int(splt[1]) 211 mutations_by_id_dic[splt[0]] = int(splt[1])
154 212
155 tandem_file = os.path.join(os.path.dirname(outfile), "tandems_by_id.txt") 213 tandem_file = os.path.join(os.path.dirname(outfile), "tandems_by_id.txt")
156 with open(tandem_file, 'w') as o: 214 with open(tandem_file, 'w') as o:
157 highest_tandem_length = 0 215 highest_tandem_length = 0
216 # LengthDic stores length as a tuple
217 # (fr1Length, cdr1Length, fr2Length, cdr2Length, fr3Length)
218 # To get the total length, we can sum(region_lengths)
219 # To get the total length for leader:
220 # sum(region_lengths[0:]) (Equivalent to everything)
221 # sum(region_lengths[1:]) Gets everything except FR1 etc.
222 # We determine the position to start summing below.
223 # This returns 0 for leader, 1 for FR1 etc.
224 length_start_pos = REGION_FILTERS.index(empty_region_filter)
158 225
159 o.write("Sequence.ID\tnumber_of_mutations\tnumber_of_tandems\tregion_length\texpected_tandems\tlongest_tandem\ttandems\n") 226 o.write("Sequence.ID\tnumber_of_mutations\tnumber_of_tandems\tregion_length\texpected_tandems\tlongest_tandem\ttandems\n")
160 for ID in IDlist: 227 for ID in IDlist:
161 mutations = mutationListByID[ID] 228 mutations = mutationListByID[ID]
229 region_length = sum(LengthDic[ID][length_start_pos:])
162 if len(mutations) == 0: 230 if len(mutations) == 0:
163 continue 231 continue
164 last_mut = max(mutations, key=lambda x: int(x[1])) 232 last_mut = max(mutations, key=lambda x: int(x[1]))
165 233
166 last_mut_pos = int(last_mut[1]) 234 last_mut_pos = int(last_mut[1])
192 260
193 if len(tandem_muts) > 0: 261 if len(tandem_muts) > 0:
194 if highest_tandem_length < len(tandem_muts): 262 if highest_tandem_length < len(tandem_muts):
195 highest_tandem_length = len(tandem_muts) 263 highest_tandem_length = len(tandem_muts)
196 264
197 region_length = fr1LengthDict[ID] + cdr1LengthDic[ID] + fr2LengthDict[ID] + cdr2LengthDic[ID] + fr3LengthDict[ID]
198 longest_tandem = max(tandem_muts, key=lambda x: x[1]) if len(tandem_muts) else (0, 0) 265 longest_tandem = max(tandem_muts, key=lambda x: x[1]) if len(tandem_muts) else (0, 0)
199 num_mutations = mutations_by_id_dic[ID] # len(mutations) 266 num_mutations = mutations_by_id_dic[ID] # len(mutations)
200 f_num_mutations = float(num_mutations) 267 f_num_mutations = float(num_mutations)
201 num_tandem_muts = len(tandem_muts) 268 num_tandem_muts = len(tandem_muts)
202 expected_tandem_muts = f_num_mutations * (f_num_mutations - 1.0) / float(region_length) 269 expected_tandem_muts = f_num_mutations * (f_num_mutations - 1.0) / float(region_length)
203 o.write("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\n".format(ID, 270 # String format and round disagree slightly (see 3.605).
204 str(num_mutations), 271 # So round before formatting.
205 str(num_tandem_muts), 272 o.write(f"{ID}\t{num_mutations}\t{num_tandem_muts}\t{region_length}\t"
206 str(region_length), 273 f"{round(expected_tandem_muts, 2):.2f}\t"
207 str(round(expected_tandem_muts, 2)), 274 f"{longest_tandem[1]}\t{tandem_muts}\n")
208 str(longest_tandem[1]),
209 str(tandem_muts)))
210 gene = genedic[ID] 275 gene = genedic[ID]
211 if gene.find("unmatched") == -1: 276 if gene.find("unmatched") == -1:
212 tandem_sum_by_class[gene] += num_tandem_muts 277 tandem_sum_by_class[gene] += num_tandem_muts
213 expected_tandem_sum_by_class[gene] += expected_tandem_muts 278 expected_tandem_sum_by_class[gene] += expected_tandem_muts
214 279
299 absentAACDR2Dic[7] = list(range(59,62)) 364 absentAACDR2Dic[7] = list(range(59,62))
300 absentAACDR2Dic[8] = list(range(59,61)) 365 absentAACDR2Dic[8] = list(range(59,61))
301 absentAACDR2Dic[9] = [60] 366 absentAACDR2Dic[9] = [60]
302 367
303 absentAA = [len(IDlist)] * (AALength-1) 368 absentAA = [len(IDlist)] * (AALength-1)
304 for k, cdr1Length in cdr1LengthDic.items(): 369 for k, cdr1Length in cdr1AALengthDic.items():
305 for c in absentAACDR1Dic[cdr1Length]: 370 for c in absentAACDR1Dic[cdr1Length]:
306 absentAA[c] -= 1 371 absentAA[c] -= 1
307 372
308 for k, cdr2Length in cdr2LengthDic.items(): 373 for k, cdr2Length in cdr2AALengthDic.items():
309 for c in absentAACDR2Dic[cdr2Length]: 374 for c in absentAACDR2Dic[cdr2Length]:
310 absentAA[c] -= 1 375 absentAA[c] -= 1
311 376
312 377
313 aa_mutations_by_id_file = outfile[:outfile.rindex("/")] + "/absent_aa_id.txt" 378 aa_mutations_by_id_file = outfile[:outfile.rindex("/")] + "/absent_aa_id.txt"
314 with open(aa_mutations_by_id_file, 'w') as o: 379 with open(aa_mutations_by_id_file, 'w') as o:
315 o.write("ID\tcdr1length\tcdr2length\tbest_match\t" + "\t".join([str(x) for x in range(1,AALength)]) + "\n") 380 o.write("ID\tcdr1length\tcdr2length\tbest_match\t" + "\t".join([str(x) for x in range(1,AALength)]) + "\n")
316 for ID in IDlist: 381 for ID in IDlist:
317 absentAAbyID = [1] * (AALength-1) 382 absentAAbyID = [1] * (AALength-1)
318 cdr1Length = cdr1LengthDic[ID] 383 cdr1Length = cdr1AALengthDic[ID]
319 for c in absentAACDR1Dic[cdr1Length]: 384 for c in absentAACDR1Dic[cdr1Length]:
320 absentAAbyID[c] -= 1 385 absentAAbyID[c] -= 1
321 386
322 cdr2Length = cdr2LengthDic[ID] 387 cdr2Length = cdr2AALengthDic[ID]
323 for c in absentAACDR2Dic[cdr2Length]: 388 for c in absentAACDR2Dic[cdr2Length]:
324 absentAAbyID[c] -= 1 389 absentAAbyID[c] -= 1
325 o.write(ID + "\t" + str(cdr1Length) + "\t" + str(cdr2Length) + "\t" + genedic[ID] + "\t" + "\t".join([str(x) for x in absentAAbyID]) + "\n") 390 o.write(ID + "\t" + str(cdr1Length) + "\t" + str(cdr2Length) + "\t" + genedic[ID] + "\t" + "\t".join([str(x) for x in absentAAbyID]) + "\n")
326 391
327 if linecount == 0: 392 if linecount == 0:
331 o.write("WRCY (%)," + ("0,0,0\n" * len(genes))) 396 o.write("WRCY (%)," + ("0,0,0\n" * len(genes)))
332 o.write("WA (%)," + ("0,0,0\n" * len(genes))) 397 o.write("WA (%)," + ("0,0,0\n" * len(genes)))
333 o.write("TW (%)," + ("0,0,0\n" * len(genes))) 398 o.write("TW (%)," + ("0,0,0\n" * len(genes)))
334 sys.exit() 399 sys.exit()
335 400
336 hotspotMatcher = re.compile("[actg]+,(\d+)-(\d+)\((.*)\)")
337 RGYWCount = {} 401 RGYWCount = {}
338 WRCYCount = {} 402 WRCYCount = {}
339 WACount = {} 403 WACount = {}
340 TWCount = {} 404 TWCount = {}
341 405
356 first = False 420 first = False
357 continue 421 continue
358 linesplt = line.split("\t") 422 linesplt = line.split("\t")
359 gene = linesplt[best_matchIndex] 423 gene = linesplt[best_matchIndex]
360 ID = linesplt[IDIndex] 424 ID = linesplt[IDIndex]
361 RGYW = [(int(x), int(y), z) for (x, y, z) in 425 RGYW = [Hotspot.from_string(x) for x in linesplt[aggctatIndex].split("|") if x]
362 [hotspotMatcher.match(x).groups() for x in linesplt[aggctatIndex].split("|") if x]] 426 WRCY = [Hotspot.from_string(x) for x in linesplt[atagcctIndex].split("|") if x]
363 WRCY = [(int(x), int(y), z) for (x, y, z) in 427 WA = [Hotspot.from_string(x) for x in linesplt[ataIndex].split("|") if x]
364 [hotspotMatcher.match(x).groups() for x in linesplt[atagcctIndex].split("|") if x]] 428 TW = [Hotspot.from_string(x) for x in linesplt[tatIndex].split("|") if x]
365 WA = [(int(x), int(y), z) for (x, y, z) in
366 [hotspotMatcher.match(x).groups() for x in linesplt[ataIndex].split("|") if x]]
367 TW = [(int(x), int(y), z) for (x, y, z) in
368 [hotspotMatcher.match(x).groups() for x in linesplt[tatIndex].split("|") if x]]
369 RGYWCount[ID], WRCYCount[ID], WACount[ID], TWCount[ID] = 0, 0, 0, 0 429 RGYWCount[ID], WRCYCount[ID], WACount[ID], TWCount[ID] = 0, 0, 0, 0
370 430
371 with open(os.path.join(os.path.dirname(os.path.abspath(infile)), "RGYW.txt"), 'a') as out_handle: 431 with open(os.path.join(os.path.dirname(os.path.abspath(infile)), "RGYW.txt"), 'a') as out_handle:
372 for hotspot in RGYW: 432 for hotspot in RGYW:
373 out_handle.write("{0}\t{1}\n".format(ID, "\t".join([str(x) for x in hotspot]))) 433 out_handle.write("{0}\t{1}\n".format(ID, "\t".join([str(x) for x in hotspot])))
415 for start, end, region in motif_dic[motif]: 475 for start, end, region in motif_dic[motif]:
416 if start <= int(where) <= end: 476 if start <= int(where) <= end:
417 out_handle.write("{0}\n".format( 477 out_handle.write("{0}\n".format(
418 "\t".join([ 478 "\t".join([
419 ID, 479 ID,
420 where, 480 str(where),
421 region, 481 region,
422 frm, 482 frm,
423 to, 483 to,
424 str(AAwhere), 484 str(AAwhere),
425 str(AAfrm), 485 str(AAfrm),
481 with open(foutfile, 'w') as o: 541 with open(foutfile, 'w') as o:
482 for typ in arr: 542 for typ in arr:
483 o.write(typ + " (%)") 543 o.write(typ + " (%)")
484 curr = dic[typ] 544 curr = dic[typ]
485 for gene in genes: 545 for gene in genes:
486 geneMatcher = geneMatchers[gene] 546 if valuedic[gene + "_" + fname] == 0:
487 if valuedic[gene + "_" + fname] is 0:
488 o.write(",0,0,0") 547 o.write(",0,0,0")
489 else: 548 else:
490 x, y, z = get_xyz([curr[x] for x in [y for y, z in genedic.items() if geneMatcher.match(z)]], gene, func, fname) 549 x, y, z = get_xyz([curr[x] for x in [y for y, z in genedic.items() if z.startswith(gene)]], gene, func, fname)
491 o.write("," + x + "," + y + "," + z) 550 o.write("," + x + "," + y + "," + z)
492 x, y, z = get_xyz([y for x, y in curr.items() if not genedic[x].startswith("unmatched")], "total", func, fname) 551 x, y, z = get_xyz([y for x, y in curr.items() if not genedic[x].startswith("unmatched")], "total", func, fname)
493 #x, y, z = get_xyz([y for x, y in curr.iteritems()], "total", func, fname) 552 #x, y, z = get_xyz([y for x, y in curr.iteritems()], "total", func, fname)
494 o.write("," + x + "," + y + "," + z + "\n") 553 o.write("," + x + "," + y + "," + z + "\n")
495 554