Mercurial > repos > gianmarco_piccinno > cs_tool_project_rm
comparison CodonSwitchTool/functions.py @ 2:aad5e435e4dc draft default tip
Uploaded
author | gianmarco_piccinno |
---|---|
date | Tue, 21 May 2019 05:24:56 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
1:1c31d6d25429 | 2:aad5e435e4dc |
---|---|
1 import string | |
2 from syngenic import * | |
3 from Bio.Seq import Seq | |
4 from Bio.SeqFeature import SeqFeature, FeatureLocation | |
5 from pprint import pprint | |
6 | |
7 from itertools import izip | |
8 | |
9 import numpy as np | |
10 import pandas as pd | |
11 | |
12 def all_patterns(input_ = []): | |
13 | |
14 patts = [] | |
15 n_patts = [] | |
16 | |
17 for patt in input_: | |
18 tmp_patt = patt#Seq(patt.rstrip(), IUPAC.ambiguous_dna) | |
19 tmp_revc = tmp_patt.reverse_complement() | |
20 | |
21 patts.append(str(tmp_patt)) | |
22 patts.append(str(tmp_revc)) | |
23 | |
24 n_patts.append(pattern(tmp_patt).plan_ambiguity()) | |
25 n_patts.append(pattern(tmp_revc).plan_ambiguity()) | |
26 | |
27 | |
28 return patts, n_patts | |
29 | |
30 def fake_from_real(path = None, id_ = None, name = None): | |
31 | |
32 plasmid_seq = SeqIO.read(open(path, "r"), "genbank") | |
33 | |
34 f_p = plasmid_seq.seq[:10] | |
35 f_CDS = [] | |
36 for f in plasmid_seq.features: | |
37 if f.type == "CDS": | |
38 tmp_start = len(f_p) | |
39 tmp_cds = plasmid_seq[f.location.start:f.location.start+9] + plasmid_seq[f.location.end-9:f.location.end] | |
40 tmp_end = tmp_start + len(tmp_cds) | |
41 f_p += tmp_cds | |
42 f_CDS.append(SeqFeature(FeatureLocation(tmp_start, tmp_end), type="gene", strand=f.location.strand)) | |
43 #f_p += plasmid_seq.seq[tmp_end:tmp_end+5] | |
44 f_p += plasmid_seq.seq[-10:] | |
45 | |
46 for feat in f_CDS: | |
47 f_p.features.append(feat) | |
48 f_p.id = id_ | |
49 f_p.name = name | |
50 | |
51 #feature_seq_0 = f_CDS[0].extract(f_p) | |
52 | |
53 return f_p | |
54 | |
55 def punctuate_targets(f_patts, n_pl): | |
56 | |
57 n_poss = {} | |
58 max_len = len(n_pl) | |
59 for key in f_patts.keys(): | |
60 for el in f_patts[key]: | |
61 if not el[2] < el[1]: | |
62 tmp = range(el[1], el[2]) | |
63 for i in range(len(tmp)): | |
64 if not tmp[i] in n_poss.keys(): | |
65 n_poss[tmp[i]] = [key[i]] | |
66 else: | |
67 n_poss[tmp[i]].append(key[i]) | |
68 else: | |
69 tmp = range(el[1], max_len) + range(0, el[2]) | |
70 for i in range(len(tmp)): | |
71 if not tmp[i] in n_poss.keys(): | |
72 n_poss[tmp[i]] = [key[i]] | |
73 else: | |
74 n_poss[tmp[i]].append(key[i]) | |
75 | |
76 for key in n_poss.keys(): | |
77 n_poss[key] = set(n_poss[key]) | |
78 | |
79 #print(n_poss) | |
80 | |
81 return n_poss | |
82 | |
83 | |
84 def print_seq(n_pl, ind_range = None): | |
85 | |
86 if ind_range == None: | |
87 | |
88 data = filter(None, re.split(r'(\w{1})', n_pl)) | |
89 index = range(len(n_pl)) | |
90 | |
91 seq = [] | |
92 ind = [] | |
93 | |
94 j = 0 | |
95 | |
96 seq.append("") | |
97 ind.append("") | |
98 | |
99 for i in range(len(data)): | |
100 | |
101 if (i % 9 == 0) & (i > 0): | |
102 j += 1 | |
103 seq.append("") | |
104 ind.append("") | |
105 print("\n") | |
106 print(seq[j-1]) | |
107 print(ind[j-1]) | |
108 | |
109 | |
110 seq[j] += " " | |
111 ind[j] += " " | |
112 for n in range(len(str(index[i]))-1): | |
113 seq[j] += " " | |
114 seq[j] += data[i] | |
115 ind[j] += str(index[i]) | |
116 print("\n") | |
117 print(seq[j]) | |
118 print(ind[j]) | |
119 else: | |
120 data = filter(None, re.split(r'(\w{1})', n_pl[ind_range[0]:ind_range[1]])) | |
121 index = range(ind_range[0], ind_range[1]) | |
122 | |
123 seq = [] | |
124 ind = [] | |
125 | |
126 j = 0 | |
127 | |
128 seq.append("") | |
129 ind.append("") | |
130 | |
131 for i in range(len(data)): | |
132 | |
133 if (i % 9 == 0) & (i > 0): | |
134 j += 1 | |
135 seq.append("") | |
136 ind.append("") | |
137 print("\n") | |
138 print(seq[j-1]) | |
139 print(ind[j-1]) | |
140 | |
141 | |
142 seq[j] += " " | |
143 ind[j] += " " | |
144 for n in range(len(str(index[i]))-1): | |
145 seq[j] += " " | |
146 seq[j] += data[i] | |
147 ind[j] += str(index[i]) | |
148 | |
149 print("\n") | |
150 print(seq[j]) | |
151 print(ind[j]) | |
152 | |
153 | |
154 | |
155 return None | |
156 | |
157 | |
158 def generalization(n_poss, n_pl, synonims_tables, reduced=False): | |
159 | |
160 | |
161 transversions = {"A": "[AT]", | |
162 "T": "[TA]", | |
163 "C": "[CG]", | |
164 "G": "[GC]"} | |
165 | |
166 count_codon_switch = 0 | |
167 count_transversion = 0 | |
168 | |
169 new_poss = {} | |
170 | |
171 for pos in n_poss.keys(): | |
172 in_cds = False | |
173 for feat in n_pl.features: | |
174 if ((pos >= feat.location.start) & (pos < feat.location.end)) & (feat.type in ["CDS", "gene"]): | |
175 in_cds = True | |
176 count_codon_switch += 1 | |
177 tmp_count_transversion = 0 | |
178 #print("\n") | |
179 #print("operate codon switch " + str(count_codon_switch)) | |
180 # | |
181 #print("Real position: " + str(pos)) | |
182 #print(n_poss[pos]) | |
183 #print(feat.location) | |
184 #print(pos - feat.location.start) | |
185 #print((pos - feat.location.start)%3) | |
186 | |
187 | |
188 if ((pos - feat.location.start) % 3 == 0) & (n_poss[pos] != {"N"}): | |
189 # first basis of a codon | |
190 #print("first basis of a codon") | |
191 #print(n_pl.seq[pos:pos+3]) | |
192 | |
193 tmp_codon = n_pl.seq[pos:pos+3] | |
194 bases = [] | |
195 if feat.strand == +1: | |
196 # check the codon table | |
197 for codon in synonims_tables["synonims"][tmp_codon]: | |
198 bases.append(codon[0]) | |
199 elif feat.strand == -1: | |
200 # check the anticodon table | |
201 for codon in synonims_tables["anti_synonims"][tmp_codon]: | |
202 bases.append(codon[0]) | |
203 if len(set(bases)) > 1: | |
204 new_poss[pos] = "[" + "".join(list(set(bases))) + "]" | |
205 | |
206 | |
207 elif ((pos - feat.location.start) % 3 == 1) & (n_poss[pos] != {"N"}): | |
208 # second basis of a codon | |
209 #print("second basis of a codon") | |
210 #print(n_pl.seq[pos-1:pos+2]) | |
211 | |
212 tmp_codon = n_pl.seq[pos-1:pos+2] | |
213 | |
214 bases = [] | |
215 if feat.strand == +1: | |
216 # check the codon table | |
217 for codon in synonims_tables["synonims"][tmp_codon]: | |
218 bases.append(codon[1]) | |
219 elif feat.strand == -1: | |
220 # check the anticodon table | |
221 for codon in synonims_tables["anti_synonims"][tmp_codon]: | |
222 bases.append(codon[1]) | |
223 if len(set(bases)) > 1: | |
224 new_poss[pos] = "[" + "".join(list(set(bases))) + "]" | |
225 | |
226 elif ((pos - feat.location.start) % 3 == 2) & (n_poss[pos] != {"N"}): | |
227 # third basis of a codon | |
228 #print("third basis of a codon") | |
229 #print(n_pl.seq[pos-2:pos+1]) | |
230 | |
231 tmp_codon = n_pl.seq[pos-2:pos+1] | |
232 | |
233 bases = [] | |
234 if feat.strand == +1: | |
235 # check the codon table | |
236 for codon in synonims_tables["synonims"][tmp_codon]: | |
237 bases.append(codon[2]) | |
238 elif feat.strand == -1: | |
239 # check the anticodon table | |
240 for codon in synonims_tables["anti_synonims"][tmp_codon]: | |
241 bases.append(codon[2]) | |
242 if len(set(bases)) > 1: | |
243 new_poss[pos] = "[" + "".join(list(set(bases))) + "]" | |
244 | |
245 tmp = n_pl.extract(feat) | |
246 #print_seq(tmp, ind_range = [feat.location.start,feat.location.start]) | |
247 | |
248 if (in_cds == False) & (set.intersection(n_poss[pos], {"A", "T", "C", "G"}) != set()): | |
249 # (set.union(n_poss[pos], {"A", "T", "C", "G"}) != {}) | |
250 # set.union(n_poss[pos], {"A", "T", "C", "G"}) != {} | |
251 # n_poss[pos] != {"N"} | |
252 | |
253 if reduced == False: | |
254 | |
255 count_transversion += 1 | |
256 #print("operate transversion " + str(count_transversion)) | |
257 | |
258 new_poss[pos] = transversions[set.difference(n_poss[pos], {"N"}).pop()] | |
259 | |
260 else: | |
261 | |
262 count_transversion += 1 | |
263 #print("operate transversion " + str(count_transversion)) | |
264 | |
265 new_poss[pos] = transversions[set.difference(n_poss[pos], {"N"}).pop()] | |
266 | |
267 #if tmp_count_transversion == 0: | |
268 | |
269 # count_transversion += 1 | |
270 # tmp_count_transversion += 1 | |
271 # print("operate transversion " + str(count_transversion)) | |
272 # | |
273 # new_poss[pos] = transversions[set.difference(n_poss[pos], {"N"}).pop()] | |
274 | |
275 #print(new_poss) | |
276 | |
277 n_seq = filter(None, re.split(r'(\w{1})', str(n_pl.seq))) | |
278 n_ind = range(len(n_seq)) | |
279 | |
280 new_obj = {} | |
281 | |
282 for pos in n_ind: | |
283 if pos in new_poss.keys(): | |
284 new_obj[pos] = new_poss[pos] | |
285 else: | |
286 new_obj[pos] = n_seq[pos] | |
287 | |
288 #pprint(new_obj) | |
289 | |
290 | |
291 new_plasmid_generalized = "" | |
292 | |
293 | |
294 for pos in n_ind: | |
295 new_plasmid_generalized += new_obj[pos] | |
296 | |
297 #print(new_plasmid_generalized) | |
298 #print(len(new_plasmid_generalized)) | |
299 | |
300 t = sre_yield.AllStrings(new_plasmid_generalized) | |
301 | |
302 #print(len(t)) | |
303 | |
304 | |
305 | |
306 return t | |
307 | |
308 | |
309 def evaluate_plasmids(plasmids = None, | |
310 original_plasmid = None, | |
311 codon_usage_table = None, | |
312 n_patts = None, | |
313 f_patts = None): | |
314 | |
315 from syngenic import plasmid | |
316 from Bio.Seq import Seq | |
317 from Bio.SeqFeature import SeqFeature, FeatureLocation | |
318 from itertools import izip | |
319 import numpy as np | |
320 | |
321 useful = {} | |
322 | |
323 i = 0 | |
324 | |
325 for tmp_pl in plasmids: | |
326 | |
327 if tmp_pl != original_plasmid.seq: | |
328 | |
329 identical_proteic_sequence = [] | |
330 | |
331 for feat in original_plasmid.features: | |
332 if feat.type.lower() in ["gene", "cds"]: | |
333 identical_proteic_sequence.append(Seq(plasmid(tmp_pl).extract(feat)).translate() == Seq(original_plasmid.extract(feat)).translate()) | |
334 identical_proteic_sequence = all(identical_proteic_sequence) | |
335 if (identical_proteic_sequence == True) & (set([True if el ==[] else False for el in plasmid(tmp_pl).findpatterns(n_patts, f_patts).values()]) == {True}): | |
336 print("\t" + str(i) + "/" + str(len(plasmids))) | |
337 #print(tmp_pl) | |
338 tmp = [j for j,(a1,a2) in enumerate(izip(tmp_pl,original_plasmid)) if a1!=a2] | |
339 #print(tmp) | |
340 useful["Plasmid_" + str(i)] = {} | |
341 useful["Plasmid_" + str(i)]["modified_positions"] = tmp | |
342 useful["Plasmid_" + str(i)]["codon_usage"] = [] | |
343 useful["Plasmid_" + str(i)]["number_of_modification"] = len(tmp) | |
344 useful["Plasmid_" + str(i)]["sequence"] = tmp_pl | |
345 for modified_position in tmp: | |
346 in_cds = False | |
347 for feat in original_plasmid.features: | |
348 if feat.type.lower() in ["gene", "cds"]: | |
349 if ((modified_position >= feat.location.start) & (modified_position < feat.location.end)) & (feat.type in ["CDS", "gene"]): | |
350 in_cds = True | |
351 if (modified_position - feat.location.start) % 3 == 0: | |
352 # first basis of a codon | |
353 if feat.strand == +1: | |
354 tmp_codon = tmp_pl[modified_position:modified_position+3] | |
355 else: | |
356 tmp_codon = str(Seq(tmp_pl[modified_position:modified_position+3]).reverse_complement()) | |
357 useful["Plasmid_" + str(i)]["codon_usage"].append(codon_usage_table.loc[tmp_codon]["Proportion"]) | |
358 elif (modified_position - feat.location.start) % 3 == 1: | |
359 # second basis of a codon | |
360 if feat.strand == +1: | |
361 tmp_codon = tmp_pl[modified_position-1:modified_position+2] | |
362 else: | |
363 tmp_codon = str(Seq(tmp_pl[modified_position-1:modified_position+2]).reverse_complement()) | |
364 useful["Plasmid_" + str(i)]["codon_usage"].append(codon_usage_table.loc[tmp_codon]["Proportion"]) | |
365 elif (modified_position - feat.location.start) % 3 == 2: | |
366 # third basis of a codon | |
367 if feat.strand == +1: | |
368 tmp_codon = original_plasmid.seq[modified_position-2:modified_position+1] | |
369 else: | |
370 tmp_codon = str(Seq(tmp_pl[modified_position-2:modified_position+1]).reverse_complement()) | |
371 useful["Plasmid_" + str(i)]["codon_usage"].append(codon_usage_table.loc[tmp_codon]["Proportion"]) | |
372 | |
373 useful["Plasmid_" + str(i)]["mean_codon_usage"] = np.mean(useful["Plasmid_" + str(i)]["codon_usage"]) | |
374 useful["Plasmid_" + str(i)]["std_codon_usage"] = np.std(useful["Plasmid_" + str(i)]["codon_usage"]) | |
375 else: | |
376 next | |
377 | |
378 i += 1 | |
379 | |
380 useful["original_plasmids"] = original_plasmid | |
381 | |
382 return useful | |
383 | |
384 | |
385 | |
386 def rank_plasmids(original_useful_plasmids = None): | |
387 | |
388 # Rank according to codon usage and less number of modifications introduced | |
389 | |
390 tmp_useful_plasmids = {} | |
391 | |
392 #print(len(original_useful_plasmids.keys())) | |
393 tmp_keys = list(set.difference(set(original_useful_plasmids.keys()), {"original_plasmids"})) | |
394 #print(len(tmp_keys)) | |
395 for key in tmp_keys: | |
396 #print(key) | |
397 #print(original_useful_plasmids[key]) | |
398 tmp_useful_plasmids[key] = {"mean_codon_usage":original_useful_plasmids[key]["mean_codon_usage"], | |
399 "std_codon_usage":original_useful_plasmids[key]["std_codon_usage"], | |
400 "number_of_modification":original_useful_plasmids[key]["number_of_modification"]} | |
401 | |
402 dat_plasmids = pd.DataFrame(tmp_useful_plasmids).T | |
403 | |
404 dat_plasmids.shape | |
405 | |
406 dat_plasmids.head() | |
407 | |
408 dat_plasmids.sort_values(['mean_codon_usage', 'std_codon_usage', 'number_of_modification'], ascending=[False, True, True]) | |
409 | |
410 dat_plasmids.index | |
411 | |
412 return dat_plasmids | |
413 #return tmp_useful_plasmids | |
414 | |
415 | |
416 def print_color_seq(original = None, | |
417 others = None, | |
418 annotation_information = None, | |
419 tot = None, | |
420 ind_range = None, | |
421 patterns = None, | |
422 f_patterns = None, | |
423 patts = None, | |
424 max_row = 18): | |
425 | |
426 """ | |
427 | |
428 original = plasmids["original_plasmid"], | |
429 others = def_pls, | |
430 annotation_information = useful_plasmids, | |
431 tot = plasmids, | |
432 ind_range = None | |
433 | |
434 """ | |
435 | |
436 ################################################################ | |
437 # Single Targets | |
438 ################################################################ | |
439 | |
440 targets = {} | |
441 | |
442 t_keys = f_patterns.keys() | |
443 | |
444 for l in range(len(t_keys)): | |
445 if f_patterns[t_keys[l]] != []: | |
446 targets["Target" + str(l)] = t_keys[l] | |
447 | |
448 #print(targets) | |
449 #print("\n") | |
450 tars = {} | |
451 | |
452 for tar in targets.keys(): | |
453 #print(tar) | |
454 tars[tar] = ["|" for i in range(len(original.seq))] | |
455 | |
456 for tar1 in f_patterns[targets[tar]]: | |
457 #print(tar1) | |
458 if tar1[1] < tar1[2]: | |
459 for l in range(tar1[1], tar1[2]): | |
460 tars[tar][l] = tar1[0][l-tar1[1]] | |
461 else: | |
462 for l in range(tar1[1], len(original.seq)): | |
463 tars[tar][l] = tar1[0][l-tar1[1]] | |
464 for l in range(tar1[2]): | |
465 tars[tar][l] = tar1[0][-tar1[2]:][l] | |
466 | |
467 #print(tars) | |
468 kkk = tars.keys() | |
469 kkk.sort() | |
470 target_lists = [[key]+tars[key] for key in kkk] | |
471 #print(target_lists); print(len(target_lists[0])) | |
472 | |
473 | |
474 ################################################################ | |
475 # Aggregate Targets | |
476 ################################################################ | |
477 target_positions = ["TargetPositions"] | |
478 for k in range(len(original)): | |
479 if k in patterns.keys(): | |
480 if len(patterns[k]) > 1: | |
481 target_positions += "+"#"T" | |
482 else: | |
483 target_positions += "T" | |
484 else: | |
485 target_positions += " " | |
486 #print(target_positions); print(len(target_positions)) | |
487 ################################################################ | |
488 # Annotation | |
489 ################################################################ | |
490 direction = [] | |
491 annot = ["Annotation"] | |
492 | |
493 distance = 0 | |
494 for feat in [f for f in original.features if f.type.lower() in ["gene", "cds"]]: | |
495 for space in range(distance, feat.location.start): | |
496 direction.append("_") | |
497 annot.append("_") | |
498 annot.append("*") | |
499 for an_space in range(feat.location.end - feat.location.start-2): | |
500 annot.append("_") | |
501 distance = feat.location.end | |
502 annot.append("/") | |
503 for space in range(distance, len(original)): | |
504 direction.append("_") | |
505 annot.append("_") | |
506 #print(annot) | |
507 | |
508 ################################################################ | |
509 # CDS | |
510 ################################################################ | |
511 | |
512 if ind_range == None: | |
513 ind_range = [0, len(original)] | |
514 | |
515 sequences = {} | |
516 sequences["original"] = filter(None, re.split(r'(\w{1})', original.seq[ind_range[0]:ind_range[1]])) | |
517 direction = ["CDS_Orientation"] | |
518 distance = 0 | |
519 | |
520 alternating = 0 | |
521 | |
522 for feat in [f for f in original.features if f.type.lower() in ["gene", "cds"]]: | |
523 for space in range(distance, feat.location.start): | |
524 direction.append("_") | |
525 if feat.type.lower() in ["gene", "cds"]: | |
526 for counter in range(feat.location.start, feat.location.end, 3): | |
527 if alternating % 2 == 1: | |
528 sequences["original"][counter] = "\033[1;31;40m" + sequences["original"][counter] + "\033[0m" | |
529 sequences["original"][counter+1] = "\033[1;31;40m" + sequences["original"][counter+1] + "\033[0m" | |
530 sequences["original"][counter+2] = "\033[1;31;40m" + sequences["original"][counter+2] + "\033[0m" | |
531 alternating += 1 | |
532 | |
533 if feat.strand == +1: | |
534 direction.append("-") | |
535 direction.append("-") | |
536 direction.append(">") | |
537 if feat.strand == -1: | |
538 direction.append("<") | |
539 direction.append("-") | |
540 direction.append("-") | |
541 | |
542 else: | |
543 sequences["original"][counter] = "\033[1;32;40m" + sequences["original"][counter] + "\033[0m" | |
544 sequences["original"][counter+1] = "\033[1;32;40m" + sequences["original"][counter+1] + "\033[0m" | |
545 sequences["original"][counter+2] = "\033[1;32;40m" + sequences["original"][counter+2] + "\033[0m" | |
546 alternating += 1 | |
547 | |
548 if feat.strand == +1: | |
549 direction.append("-") | |
550 direction.append("-") | |
551 direction.append(">") | |
552 if feat.strand == -1: | |
553 direction.append("<") | |
554 direction.append("-") | |
555 direction.append("-") | |
556 distance = feat.location.end | |
557 for space in range(distance, len(original)): | |
558 direction.append("_") | |
559 | |
560 #print(direction); print(len(direction)) | |
561 ################################################################ | |
562 # Plasmids_ids | |
563 ################################################################ | |
564 f = 0 | |
565 new_plasmids = [] | |
566 for s in others: | |
567 new_plasmids.append([s] + filter(None, re.split(r'(\w{1})', tot[s]["sequence"][ind_range[0]:ind_range[1]]))) | |
568 for k in range(len(original)): | |
569 if k in annotation_information[s]["modified_positions"]: | |
570 new_plasmids[f][k+1] = "\033[1;32;40m" + new_plasmids[f][k+1] + "\033[0m" | |
571 f += 1 | |
572 | |
573 #print(new_plasmids) | |
574 | |
575 ################################################################ | |
576 # Index | |
577 ################################################################ | |
578 | |
579 index = ["Index"] + [str(i) for i in range(ind_range[0], ind_range[1])] | |
580 | |
581 ################################################################ | |
582 # Create the pdf file | |
583 ################################################################ | |
584 | |
585 #print(target_lists); print(len(target_lists[0])) | |
586 #print(target_positions); print(len(target_positions)) | |
587 #print(annot); print(len(annot)) | |
588 #print(direction); print(len(direction)) | |
589 #print(new_plasmids); print(len(new_plasmids[0])) | |
590 #print(index) | |
591 | |
592 data = {0:target_lists, | |
593 1:target_positions, | |
594 2:annot, | |
595 3:direction, | |
596 4:["Original"] + sequences["original"], | |
597 5:new_plasmids, | |
598 6:index} | |
599 | |
600 elements = [] | |
601 #max_row = 18 | |
602 blocks = {} | |
603 | |
604 if len(range(max_row, len(original.seq)+1, max_row)) % max_row == 0: | |
605 n_blocks = len(range(max_row, len(original.seq)+1, max_row)) | |
606 else: | |
607 n_blocks = len(range(max_row, len(original.seq)+1, max_row)) + 1 | |
608 | |
609 j = 0 | |
610 | |
611 for i in range(n_blocks): | |
612 blocks[i] = [] | |
613 for l in range(7): | |
614 if l in [0, 5]: | |
615 for el in data[l]: | |
616 if len(el[j:]) > max_row: | |
617 if i >= 1: | |
618 blocks[i].append([el[0]] + el[j:j+max_row]) | |
619 else: | |
620 blocks[i].append(el[j:j+max_row]) | |
621 else: | |
622 blocks[i].append([el[0]] + el[j:]) | |
623 else: | |
624 if len(data[l][j:]) > max_row: | |
625 if i >= 1: | |
626 blocks[i].append([data[l][0]] + data[l][j:j+max_row]) | |
627 else: | |
628 blocks[i].append(data[l][j:j+max_row]) | |
629 else: | |
630 blocks[i].append([data[l][0]] + data[l][j:]) | |
631 j += max_row | |
632 print("\n") | |
633 #print(blocks[i]) | |
634 | |
635 fff = [] | |
636 for f in range(len(blocks[i])): | |
637 fff.append(len(blocks[i][f][0])) | |
638 fff = max(fff) | |
639 for f in range(len(blocks[i])): | |
640 for r in range(fff-len(blocks[i][f][0])): | |
641 blocks[i][f][0] += " " | |
642 if f < len(blocks[i])-1: | |
643 for l in range(1,len(blocks[i][f])): | |
644 tmp = "" | |
645 #print(blocks[i][-1][l]) | |
646 if l < len(blocks[i][-1]): | |
647 for g in range(len(str(blocks[i][-1][l]))): | |
648 #print(g) | |
649 tmp += " " | |
650 blocks[i][f][l] = tmp + blocks[i][f][l] | |
651 #print(blocks[i][f][l]) | |
652 blocks[i][f] = " ".join(blocks[i][f]) | |
653 else: | |
654 blocks[i][f] = " ".join(blocks[i][f]) | |
655 print(blocks[i][f]) | |
656 #print(" ".join(blocks[i][-1])) | |
657 | |
658 print("\n") | |
659 print([f for f in original.features if f.type.lower() in ["gene", "cds"]]) | |
660 print("\n") | |
661 print(f_patterns) | |
662 | |
663 return | |
664 | |
665 def print_to_pdf(original = None, | |
666 others = None, | |
667 annotation_information = None, | |
668 tot = None, | |
669 ind_range = None, | |
670 patterns = None, | |
671 f_patterns = None, | |
672 patts = None, | |
673 max_row = 9): | |
674 | |
675 """ | |
676 | |
677 original = plasmids["original_plasmid"], | |
678 others = def_pls, | |
679 annotation_information = useful_plasmids, | |
680 tot = plasmids, | |
681 ind_range = None | |
682 | |
683 """ | |
684 | |
685 from reportlab.lib import colors | |
686 from reportlab.lib.pagesizes import letter | |
687 from reportlab.platypus import SimpleDocTemplate, Table, TableStyle | |
688 from reportlab.pdfgen import canvas | |
689 | |
690 ################################################################ | |
691 # Single Targets | |
692 ################################################################ | |
693 | |
694 targets = {} | |
695 | |
696 t_keys = f_patterns.keys() | |
697 | |
698 for l in range(len(t_keys)): | |
699 if f_patterns[t_keys[l]] != []: | |
700 targets["Target" + str(l)] = t_keys[l] | |
701 | |
702 #print(targets) | |
703 #print("\n") | |
704 tars = {} | |
705 | |
706 for tar in targets.keys(): | |
707 #print(tar) | |
708 tars[tar] = ["|" for i in range(len(original.seq))] | |
709 | |
710 for tar1 in f_patterns[targets[tar]]: | |
711 #print(tar1) | |
712 if tar1[1] < tar1[2]: | |
713 for l in range(tar1[1], tar1[2]): | |
714 tars[tar][l] = tar1[0][l-tar1[1]] | |
715 else: | |
716 for l in range(tar1[1], len(original.seq)): | |
717 tars[tar][l] = tar1[0][l-tar1[1]] | |
718 for l in range(tar1[2]): | |
719 tars[tar][l] = tar1[0][-tar1[2]:][l] | |
720 | |
721 #print(tars) | |
722 kkk = tars.keys() | |
723 kkk.sort() | |
724 target_lists = [[key]+tars[key] for key in kkk] | |
725 #print(target_lists); print(len(target_lists[0])) | |
726 | |
727 | |
728 ################################################################ | |
729 # Aggregate Targets | |
730 ################################################################ | |
731 target_positions = ["TargetPositions"] | |
732 for k in range(len(original)): | |
733 if k in patterns.keys(): | |
734 if len(patterns[k]) > 1: | |
735 target_positions += "+"#"T" | |
736 else: | |
737 target_positions += "T" | |
738 else: | |
739 target_positions += " " | |
740 #print(target_positions); print(len(target_positions)) | |
741 ################################################################ | |
742 # Annotation | |
743 ################################################################ | |
744 direction = [] | |
745 annot = ["Annotation"] | |
746 | |
747 distance = 0 | |
748 for feat in [f for f in original.features if f.type.lower() in ["gene", "cds"]]: | |
749 for space in range(distance, feat.location.start): | |
750 direction.append("_") | |
751 annot.append("_") | |
752 annot.append("*") | |
753 for an_space in range(feat.location.end - feat.location.start-2): | |
754 annot.append("_") | |
755 distance = feat.location.end | |
756 annot.append("/") | |
757 for space in range(distance, len(original)): | |
758 direction.append("_") | |
759 annot.append("_") | |
760 #print(annot) | |
761 | |
762 ################################################################ | |
763 # CDS | |
764 ################################################################ | |
765 | |
766 if ind_range == None: | |
767 ind_range = [0, len(original)] | |
768 | |
769 sequences = {} | |
770 sequences["original"] = filter(None, re.split(r'(\w{1})', original.seq[ind_range[0]:ind_range[1]])) | |
771 direction = ["CDS_Orientation"] | |
772 distance = 0 | |
773 | |
774 alternating = 0 | |
775 | |
776 for feat in [f for f in original.features if f.type.lower() in ["gene", "cds"]]: | |
777 for space in range(distance, feat.location.start): | |
778 direction.append("_") | |
779 if feat.type.lower() in ["gene", "cds"]: | |
780 for counter in range(feat.location.start, feat.location.end, 3): | |
781 if alternating % 2 == 1: | |
782 sequences["original"][counter] = 'f' + sequences["original"][counter]#'<font size=44>' + sequences["original"][counter] + '</font>' | |
783 sequences["original"][counter+1] = 'f' + sequences["original"][counter+1] | |
784 sequences["original"][counter+2] = 'f' + sequences["original"][counter+2] | |
785 alternating += 1 | |
786 | |
787 if feat.strand == +1: | |
788 direction.append("-") | |
789 direction.append("-") | |
790 direction.append(">") | |
791 if feat.strand == -1: | |
792 direction.append("<") | |
793 direction.append("-") | |
794 direction.append("-") | |
795 | |
796 else: | |
797 sequences["original"][counter] = 's' + sequences["original"][counter] | |
798 sequences["original"][counter+1] = 's' + sequences["original"][counter+1] | |
799 sequences["original"][counter+2] = 's' + sequences["original"][counter+2] | |
800 alternating += 1 | |
801 | |
802 if feat.strand == +1: | |
803 direction.append("-") | |
804 direction.append("-") | |
805 direction.append(">") | |
806 if feat.strand == -1: | |
807 direction.append("<") | |
808 direction.append("-") | |
809 direction.append("-") | |
810 distance = feat.location.end | |
811 for space in range(distance, len(original)): | |
812 direction.append("_") | |
813 | |
814 #print(direction); print(len(direction)) | |
815 ################################################################ | |
816 # Plasmids_ids | |
817 ################################################################ | |
818 f = 0 | |
819 new_plasmids = [] | |
820 for s in others: | |
821 new_plasmids.append([s] + filter(None, re.split(r'(\w{1})', tot[s]["sequence"][ind_range[0]:ind_range[1]]))) | |
822 for k in range(len(original)): | |
823 if k in annotation_information[s]["modified_positions"]: | |
824 new_plasmids[f][k+1] += "m" | |
825 f += 1 | |
826 | |
827 #print(new_plasmids) | |
828 | |
829 ################################################################ | |
830 # Index | |
831 ################################################################ | |
832 | |
833 index = ["Index"] + [str(i) for i in range(ind_range[0], ind_range[1])] | |
834 | |
835 ################################################################ | |
836 # Create the pdf file | |
837 ################################################################ | |
838 | |
839 #print(target_lists); print(len(target_lists[0])) | |
840 #print(target_positions); print(len(target_positions)) | |
841 #print(annot); print(len(annot)) | |
842 #print(direction); print(len(direction)) | |
843 #print(new_plasmids); print(len(new_plasmids[0])) | |
844 #print(index) | |
845 | |
846 #colors = [('BACKGROUND',(0,0),(0,0),colors.palegreen), | |
847 # ('BACKGROUND',(1,1),(1,1),colors.palegreen), | |
848 # ('BACKGROUND',(2,2),(3,2),colors.palegreen)] | |
849 | |
850 data = {0:target_lists, | |
851 1:target_positions, | |
852 2:annot, | |
853 3:direction, | |
854 4:["Original"] + sequences["original"], | |
855 5:new_plasmids, | |
856 6:index} | |
857 | |
858 doc = SimpleDocTemplate("comparison_syngenic_plasmids.pdf",pagesize=letter, | |
859 rightMargin=30,leftMargin=30, | |
860 topMargin=30,bottomMargin=30) | |
861 | |
862 elements = [] | |
863 #max_row = 18 | |
864 blocks = {} | |
865 | |
866 if len(range(max_row, len(original.seq)+1, max_row)) % max_row == 0: | |
867 n_blocks = len(range(max_row, len(original.seq)+1, max_row)) | |
868 else: | |
869 n_blocks = len(range(max_row, len(original.seq)+1, max_row)) + 1 | |
870 | |
871 j = 0 | |
872 | |
873 for i in range(n_blocks): | |
874 blocks[i] = [] | |
875 for l in range(7): | |
876 if l in [0, 5]: | |
877 for el in data[l]: | |
878 if len(el[j:]) > max_row: | |
879 if i >= 1: | |
880 blocks[i].append([el[0]] + el[j:j+max_row]) | |
881 else: | |
882 blocks[i].append(el[j:j+max_row]) | |
883 else: | |
884 blocks[i].append([el[0]] + el[j:]) | |
885 else: | |
886 if len(data[l][j:]) > max_row: | |
887 if i >= 1: | |
888 blocks[i].append([data[l][0]] + data[l][j:j+max_row]) | |
889 else: | |
890 blocks[i].append(data[l][j:j+max_row]) | |
891 else: | |
892 blocks[i].append([data[l][0]] + data[l][j:]) | |
893 j += max_row | |
894 #print("\n") | |
895 #print(blocks[i]) | |
896 | |
897 elements.append(Table(blocks[i], hAlign='LEFT'))#, | |
898 #style=[('BACKGROUND',(0,0),(0,0),colors.palegreen), | |
899 # ('BACKGROUND',(1,1),(1,1),colors.palegreen), | |
900 # ('TEXTCOLOR',(2,2),(3,2),colors.palegreen), | |
901 # ('BOX',(0,0),(0,0),2,colors.red)])) | |
902 elements.append(Table([["", "", "", "", ""]])) | |
903 | |
904 doc.build(elements) | |
905 | |
906 | |
907 #new_doc = SimpleDocTemplate("further_information.pdf",pagesize=letter, | |
908 # rightMargin=30,leftMargin=30, | |
909 # topMargin=30,bottomMargin=30) | |
910 #new_elements = [] | |
911 | |
912 #new_elements.append([f for f in original.features if f.type.lower() in ["gene", "cds"]]) | |
913 #new_elements.append(f_patterns) | |
914 | |
915 #doc.build(new_elements) | |
916 | |
917 c = canvas.Canvas("./further_information.pdf") | |
918 c.drawString(100,750,"CDS regions:") | |
919 upper_bound = 750 | |
920 for feat in original.features: | |
921 if feat.type.lower() in ["gene", "cds"]: | |
922 upper_bound -= 15 | |
923 if feat.location.strand == -1: | |
924 sign = "-" | |
925 else: | |
926 sign = "+" | |
927 c.drawString(115,upper_bound, str("[") + str(feat.location.start)+ ":" + str(feat.location.end) + "]" + "(" + sign + ")") | |
928 upper_bound -= 30 | |
929 c.drawString(100,upper_bound,"Patterns and the corresponding targets on the plasmid sequence:") | |
930 for f_pattern in f_patterns.keys(): | |
931 upper_bound -= 15 | |
932 c.drawString(115,upper_bound,f_pattern + ":") | |
933 for val in f_patterns[f_pattern]: | |
934 upper_bound -= 15 | |
935 c.drawString(130,upper_bound,str(val)) | |
936 upper_bound -= 5 | |
937 | |
938 upper_bound -= 30 | |
939 c.drawString(100,upper_bound,"Identifiers of the targets found in the plasmid sequence:") | |
940 for target in targets.keys(): | |
941 upper_bound -= 15 | |
942 c.drawString(115,upper_bound,target + ": " + targets[target]) | |
943 | |
944 c.save() | |
945 | |
946 | |
947 return | |
948 | |
949 | |
950 def produce_random_targets(sequence): | |
951 | |
952 # Produce a target on two continous CDS | |
953 # Produce a target in a non-coding region | |
954 # Produce a target in coding region | |
955 # Produce a target on a overlapping left | |
956 # Produce a target on a overlapping right | |
957 | |
958 | |
959 | |
960 return |