annotate CodonSwitchTool/functions.py @ 2:aad5e435e4dc draft default tip

Uploaded
author gianmarco_piccinno
date Tue, 21 May 2019 05:24:56 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
1 import string
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
2 from syngenic import *
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
3 from Bio.Seq import Seq
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
4 from Bio.SeqFeature import SeqFeature, FeatureLocation
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
5 from pprint import pprint
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
6
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
7 from itertools import izip
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
8
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
9 import numpy as np
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
10 import pandas as pd
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
11
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
12 def all_patterns(input_ = []):
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
13
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
14 patts = []
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
15 n_patts = []
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
16
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
17 for patt in input_:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
18 tmp_patt = patt#Seq(patt.rstrip(), IUPAC.ambiguous_dna)
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
19 tmp_revc = tmp_patt.reverse_complement()
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
20
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
21 patts.append(str(tmp_patt))
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
22 patts.append(str(tmp_revc))
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
23
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
24 n_patts.append(pattern(tmp_patt).plan_ambiguity())
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
25 n_patts.append(pattern(tmp_revc).plan_ambiguity())
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
26
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
27
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
28 return patts, n_patts
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
29
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
30 def fake_from_real(path = None, id_ = None, name = None):
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
31
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
32 plasmid_seq = SeqIO.read(open(path, "r"), "genbank")
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
33
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
34 f_p = plasmid_seq.seq[:10]
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
35 f_CDS = []
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
36 for f in plasmid_seq.features:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
37 if f.type == "CDS":
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
38 tmp_start = len(f_p)
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
39 tmp_cds = plasmid_seq[f.location.start:f.location.start+9] + plasmid_seq[f.location.end-9:f.location.end]
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
40 tmp_end = tmp_start + len(tmp_cds)
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
41 f_p += tmp_cds
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
42 f_CDS.append(SeqFeature(FeatureLocation(tmp_start, tmp_end), type="gene", strand=f.location.strand))
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
43 #f_p += plasmid_seq.seq[tmp_end:tmp_end+5]
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
44 f_p += plasmid_seq.seq[-10:]
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
45
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
46 for feat in f_CDS:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
47 f_p.features.append(feat)
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
48 f_p.id = id_
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
49 f_p.name = name
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
50
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
51 #feature_seq_0 = f_CDS[0].extract(f_p)
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
52
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
53 return f_p
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
54
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
55 def punctuate_targets(f_patts, n_pl):
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
56
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
57 n_poss = {}
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
58 max_len = len(n_pl)
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
59 for key in f_patts.keys():
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
60 for el in f_patts[key]:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
61 if not el[2] < el[1]:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
62 tmp = range(el[1], el[2])
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
63 for i in range(len(tmp)):
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
64 if not tmp[i] in n_poss.keys():
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
65 n_poss[tmp[i]] = [key[i]]
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
66 else:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
67 n_poss[tmp[i]].append(key[i])
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
68 else:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
69 tmp = range(el[1], max_len) + range(0, el[2])
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
70 for i in range(len(tmp)):
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
71 if not tmp[i] in n_poss.keys():
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
72 n_poss[tmp[i]] = [key[i]]
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
73 else:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
74 n_poss[tmp[i]].append(key[i])
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
75
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
76 for key in n_poss.keys():
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
77 n_poss[key] = set(n_poss[key])
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
78
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
79 #print(n_poss)
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
80
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
81 return n_poss
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
82
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
83
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
84 def print_seq(n_pl, ind_range = None):
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
85
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
86 if ind_range == None:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
87
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
88 data = filter(None, re.split(r'(\w{1})', n_pl))
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
89 index = range(len(n_pl))
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
90
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
91 seq = []
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
92 ind = []
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
93
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
94 j = 0
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
95
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
96 seq.append("")
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
97 ind.append("")
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
98
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
99 for i in range(len(data)):
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
100
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
101 if (i % 9 == 0) & (i > 0):
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
102 j += 1
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
103 seq.append("")
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
104 ind.append("")
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
105 print("\n")
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
106 print(seq[j-1])
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
107 print(ind[j-1])
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
108
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
109
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
110 seq[j] += " "
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
111 ind[j] += " "
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
112 for n in range(len(str(index[i]))-1):
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
113 seq[j] += " "
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
114 seq[j] += data[i]
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
115 ind[j] += str(index[i])
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
116 print("\n")
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
117 print(seq[j])
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
118 print(ind[j])
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
119 else:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
120 data = filter(None, re.split(r'(\w{1})', n_pl[ind_range[0]:ind_range[1]]))
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
121 index = range(ind_range[0], ind_range[1])
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
122
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
123 seq = []
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
124 ind = []
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
125
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
126 j = 0
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
127
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
128 seq.append("")
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
129 ind.append("")
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
130
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
131 for i in range(len(data)):
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
132
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
133 if (i % 9 == 0) & (i > 0):
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
134 j += 1
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
135 seq.append("")
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
136 ind.append("")
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
137 print("\n")
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
138 print(seq[j-1])
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
139 print(ind[j-1])
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
140
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
141
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
142 seq[j] += " "
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
143 ind[j] += " "
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
144 for n in range(len(str(index[i]))-1):
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
145 seq[j] += " "
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
146 seq[j] += data[i]
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
147 ind[j] += str(index[i])
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
148
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
149 print("\n")
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
150 print(seq[j])
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
151 print(ind[j])
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
152
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
153
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
154
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
155 return None
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
156
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
157
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
158 def generalization(n_poss, n_pl, synonims_tables, reduced=False):
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
159
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
160
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
161 transversions = {"A": "[AT]",
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
162 "T": "[TA]",
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
163 "C": "[CG]",
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
164 "G": "[GC]"}
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
165
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
166 count_codon_switch = 0
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
167 count_transversion = 0
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
168
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
169 new_poss = {}
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
170
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
171 for pos in n_poss.keys():
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
172 in_cds = False
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
173 for feat in n_pl.features:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
174 if ((pos >= feat.location.start) & (pos < feat.location.end)) & (feat.type in ["CDS", "gene"]):
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
175 in_cds = True
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
176 count_codon_switch += 1
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
177 tmp_count_transversion = 0
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
178 #print("\n")
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
179 #print("operate codon switch " + str(count_codon_switch))
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
180 #
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
181 #print("Real position: " + str(pos))
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
182 #print(n_poss[pos])
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
183 #print(feat.location)
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
184 #print(pos - feat.location.start)
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
185 #print((pos - feat.location.start)%3)
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
186
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
187
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
188 if ((pos - feat.location.start) % 3 == 0) & (n_poss[pos] != {"N"}):
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
189 # first basis of a codon
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
190 #print("first basis of a codon")
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
191 #print(n_pl.seq[pos:pos+3])
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
192
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
193 tmp_codon = n_pl.seq[pos:pos+3]
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
194 bases = []
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
195 if feat.strand == +1:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
196 # check the codon table
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
197 for codon in synonims_tables["synonims"][tmp_codon]:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
198 bases.append(codon[0])
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
199 elif feat.strand == -1:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
200 # check the anticodon table
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
201 for codon in synonims_tables["anti_synonims"][tmp_codon]:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
202 bases.append(codon[0])
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
203 if len(set(bases)) > 1:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
204 new_poss[pos] = "[" + "".join(list(set(bases))) + "]"
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
205
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
206
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
207 elif ((pos - feat.location.start) % 3 == 1) & (n_poss[pos] != {"N"}):
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
208 # second basis of a codon
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
209 #print("second basis of a codon")
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
210 #print(n_pl.seq[pos-1:pos+2])
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
211
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
212 tmp_codon = n_pl.seq[pos-1:pos+2]
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
213
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
214 bases = []
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
215 if feat.strand == +1:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
216 # check the codon table
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
217 for codon in synonims_tables["synonims"][tmp_codon]:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
218 bases.append(codon[1])
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
219 elif feat.strand == -1:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
220 # check the anticodon table
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
221 for codon in synonims_tables["anti_synonims"][tmp_codon]:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
222 bases.append(codon[1])
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
223 if len(set(bases)) > 1:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
224 new_poss[pos] = "[" + "".join(list(set(bases))) + "]"
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
225
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
226 elif ((pos - feat.location.start) % 3 == 2) & (n_poss[pos] != {"N"}):
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
227 # third basis of a codon
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
228 #print("third basis of a codon")
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
229 #print(n_pl.seq[pos-2:pos+1])
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
230
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
231 tmp_codon = n_pl.seq[pos-2:pos+1]
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
232
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
233 bases = []
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
234 if feat.strand == +1:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
235 # check the codon table
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
236 for codon in synonims_tables["synonims"][tmp_codon]:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
237 bases.append(codon[2])
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
238 elif feat.strand == -1:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
239 # check the anticodon table
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
240 for codon in synonims_tables["anti_synonims"][tmp_codon]:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
241 bases.append(codon[2])
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
242 if len(set(bases)) > 1:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
243 new_poss[pos] = "[" + "".join(list(set(bases))) + "]"
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
244
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
245 tmp = n_pl.extract(feat)
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
246 #print_seq(tmp, ind_range = [feat.location.start,feat.location.start])
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
247
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
248 if (in_cds == False) & (set.intersection(n_poss[pos], {"A", "T", "C", "G"}) != set()):
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
249 # (set.union(n_poss[pos], {"A", "T", "C", "G"}) != {})
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
250 # set.union(n_poss[pos], {"A", "T", "C", "G"}) != {}
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
251 # n_poss[pos] != {"N"}
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
252
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
253 if reduced == False:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
254
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
255 count_transversion += 1
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
256 #print("operate transversion " + str(count_transversion))
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
257
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
258 new_poss[pos] = transversions[set.difference(n_poss[pos], {"N"}).pop()]
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
259
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
260 else:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
261
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
262 count_transversion += 1
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
263 #print("operate transversion " + str(count_transversion))
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
264
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
265 new_poss[pos] = transversions[set.difference(n_poss[pos], {"N"}).pop()]
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
266
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
267 #if tmp_count_transversion == 0:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
268
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
269 # count_transversion += 1
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
270 # tmp_count_transversion += 1
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
271 # print("operate transversion " + str(count_transversion))
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
272 #
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
273 # new_poss[pos] = transversions[set.difference(n_poss[pos], {"N"}).pop()]
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
274
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
275 #print(new_poss)
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
276
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
277 n_seq = filter(None, re.split(r'(\w{1})', str(n_pl.seq)))
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
278 n_ind = range(len(n_seq))
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
279
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
280 new_obj = {}
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
281
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
282 for pos in n_ind:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
283 if pos in new_poss.keys():
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
284 new_obj[pos] = new_poss[pos]
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
285 else:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
286 new_obj[pos] = n_seq[pos]
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
287
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
288 #pprint(new_obj)
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
289
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
290
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
291 new_plasmid_generalized = ""
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
292
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
293
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
294 for pos in n_ind:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
295 new_plasmid_generalized += new_obj[pos]
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
296
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
297 #print(new_plasmid_generalized)
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
298 #print(len(new_plasmid_generalized))
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
299
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
300 t = sre_yield.AllStrings(new_plasmid_generalized)
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
301
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
302 #print(len(t))
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
303
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
304
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
305
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
306 return t
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
307
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
308
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
309 def evaluate_plasmids(plasmids = None,
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
310 original_plasmid = None,
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
311 codon_usage_table = None,
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
312 n_patts = None,
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
313 f_patts = None):
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
314
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
315 from syngenic import plasmid
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
316 from Bio.Seq import Seq
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
317 from Bio.SeqFeature import SeqFeature, FeatureLocation
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
318 from itertools import izip
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
319 import numpy as np
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
320
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
321 useful = {}
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
322
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
323 i = 0
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
324
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
325 for tmp_pl in plasmids:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
326
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
327 if tmp_pl != original_plasmid.seq:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
328
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
329 identical_proteic_sequence = []
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
330
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
331 for feat in original_plasmid.features:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
332 if feat.type.lower() in ["gene", "cds"]:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
333 identical_proteic_sequence.append(Seq(plasmid(tmp_pl).extract(feat)).translate() == Seq(original_plasmid.extract(feat)).translate())
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
334 identical_proteic_sequence = all(identical_proteic_sequence)
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
335 if (identical_proteic_sequence == True) & (set([True if el ==[] else False for el in plasmid(tmp_pl).findpatterns(n_patts, f_patts).values()]) == {True}):
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
336 print("\t" + str(i) + "/" + str(len(plasmids)))
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
337 #print(tmp_pl)
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
338 tmp = [j for j,(a1,a2) in enumerate(izip(tmp_pl,original_plasmid)) if a1!=a2]
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
339 #print(tmp)
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
340 useful["Plasmid_" + str(i)] = {}
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
341 useful["Plasmid_" + str(i)]["modified_positions"] = tmp
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
342 useful["Plasmid_" + str(i)]["codon_usage"] = []
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
343 useful["Plasmid_" + str(i)]["number_of_modification"] = len(tmp)
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
344 useful["Plasmid_" + str(i)]["sequence"] = tmp_pl
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
345 for modified_position in tmp:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
346 in_cds = False
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
347 for feat in original_plasmid.features:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
348 if feat.type.lower() in ["gene", "cds"]:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
349 if ((modified_position >= feat.location.start) & (modified_position < feat.location.end)) & (feat.type in ["CDS", "gene"]):
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
350 in_cds = True
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
351 if (modified_position - feat.location.start) % 3 == 0:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
352 # first basis of a codon
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
353 if feat.strand == +1:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
354 tmp_codon = tmp_pl[modified_position:modified_position+3]
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
355 else:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
356 tmp_codon = str(Seq(tmp_pl[modified_position:modified_position+3]).reverse_complement())
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
357 useful["Plasmid_" + str(i)]["codon_usage"].append(codon_usage_table.loc[tmp_codon]["Proportion"])
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
358 elif (modified_position - feat.location.start) % 3 == 1:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
359 # second basis of a codon
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
360 if feat.strand == +1:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
361 tmp_codon = tmp_pl[modified_position-1:modified_position+2]
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
362 else:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
363 tmp_codon = str(Seq(tmp_pl[modified_position-1:modified_position+2]).reverse_complement())
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
364 useful["Plasmid_" + str(i)]["codon_usage"].append(codon_usage_table.loc[tmp_codon]["Proportion"])
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
365 elif (modified_position - feat.location.start) % 3 == 2:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
366 # third basis of a codon
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
367 if feat.strand == +1:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
368 tmp_codon = original_plasmid.seq[modified_position-2:modified_position+1]
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
369 else:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
370 tmp_codon = str(Seq(tmp_pl[modified_position-2:modified_position+1]).reverse_complement())
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
371 useful["Plasmid_" + str(i)]["codon_usage"].append(codon_usage_table.loc[tmp_codon]["Proportion"])
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
372
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
373 useful["Plasmid_" + str(i)]["mean_codon_usage"] = np.mean(useful["Plasmid_" + str(i)]["codon_usage"])
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
374 useful["Plasmid_" + str(i)]["std_codon_usage"] = np.std(useful["Plasmid_" + str(i)]["codon_usage"])
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
375 else:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
376 next
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
377
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
378 i += 1
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
379
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
380 useful["original_plasmids"] = original_plasmid
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
381
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
382 return useful
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
383
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
384
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
385
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
386 def rank_plasmids(original_useful_plasmids = None):
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
387
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
388 # Rank according to codon usage and less number of modifications introduced
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
389
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
390 tmp_useful_plasmids = {}
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
391
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
392 #print(len(original_useful_plasmids.keys()))
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
393 tmp_keys = list(set.difference(set(original_useful_plasmids.keys()), {"original_plasmids"}))
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
394 #print(len(tmp_keys))
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
395 for key in tmp_keys:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
396 #print(key)
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
397 #print(original_useful_plasmids[key])
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
398 tmp_useful_plasmids[key] = {"mean_codon_usage":original_useful_plasmids[key]["mean_codon_usage"],
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
399 "std_codon_usage":original_useful_plasmids[key]["std_codon_usage"],
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
400 "number_of_modification":original_useful_plasmids[key]["number_of_modification"]}
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
401
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
402 dat_plasmids = pd.DataFrame(tmp_useful_plasmids).T
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
403
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
404 dat_plasmids.shape
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
405
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
406 dat_plasmids.head()
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
407
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
408 dat_plasmids.sort_values(['mean_codon_usage', 'std_codon_usage', 'number_of_modification'], ascending=[False, True, True])
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
409
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
410 dat_plasmids.index
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
411
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
412 return dat_plasmids
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
413 #return tmp_useful_plasmids
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
414
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
415
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
416 def print_color_seq(original = None,
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
417 others = None,
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
418 annotation_information = None,
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
419 tot = None,
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
420 ind_range = None,
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
421 patterns = None,
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
422 f_patterns = None,
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
423 patts = None,
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
424 max_row = 18):
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
425
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
426 """
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
427
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
428 original = plasmids["original_plasmid"],
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
429 others = def_pls,
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
430 annotation_information = useful_plasmids,
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
431 tot = plasmids,
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
432 ind_range = None
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
433
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
434 """
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
435
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
436 ################################################################
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
437 # Single Targets
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
438 ################################################################
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
439
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
440 targets = {}
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
441
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
442 t_keys = f_patterns.keys()
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
443
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
444 for l in range(len(t_keys)):
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
445 if f_patterns[t_keys[l]] != []:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
446 targets["Target" + str(l)] = t_keys[l]
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
447
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
448 #print(targets)
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
449 #print("\n")
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
450 tars = {}
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
451
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
452 for tar in targets.keys():
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
453 #print(tar)
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
454 tars[tar] = ["|" for i in range(len(original.seq))]
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
455
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
456 for tar1 in f_patterns[targets[tar]]:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
457 #print(tar1)
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
458 if tar1[1] < tar1[2]:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
459 for l in range(tar1[1], tar1[2]):
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
460 tars[tar][l] = tar1[0][l-tar1[1]]
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
461 else:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
462 for l in range(tar1[1], len(original.seq)):
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
463 tars[tar][l] = tar1[0][l-tar1[1]]
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
464 for l in range(tar1[2]):
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
465 tars[tar][l] = tar1[0][-tar1[2]:][l]
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
466
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
467 #print(tars)
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
468 kkk = tars.keys()
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
469 kkk.sort()
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
470 target_lists = [[key]+tars[key] for key in kkk]
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
471 #print(target_lists); print(len(target_lists[0]))
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
472
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
473
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
474 ################################################################
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
475 # Aggregate Targets
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
476 ################################################################
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
477 target_positions = ["TargetPositions"]
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
478 for k in range(len(original)):
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
479 if k in patterns.keys():
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
480 if len(patterns[k]) > 1:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
481 target_positions += "+"#"T"
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
482 else:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
483 target_positions += "T"
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
484 else:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
485 target_positions += " "
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
486 #print(target_positions); print(len(target_positions))
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
487 ################################################################
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
488 # Annotation
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
489 ################################################################
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
490 direction = []
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
491 annot = ["Annotation"]
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
492
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
493 distance = 0
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
494 for feat in [f for f in original.features if f.type.lower() in ["gene", "cds"]]:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
495 for space in range(distance, feat.location.start):
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
496 direction.append("_")
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
497 annot.append("_")
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
498 annot.append("*")
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
499 for an_space in range(feat.location.end - feat.location.start-2):
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
500 annot.append("_")
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
501 distance = feat.location.end
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
502 annot.append("/")
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
503 for space in range(distance, len(original)):
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
504 direction.append("_")
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
505 annot.append("_")
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
506 #print(annot)
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
507
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
508 ################################################################
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
509 # CDS
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
510 ################################################################
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
511
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
512 if ind_range == None:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
513 ind_range = [0, len(original)]
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
514
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
515 sequences = {}
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
516 sequences["original"] = filter(None, re.split(r'(\w{1})', original.seq[ind_range[0]:ind_range[1]]))
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
517 direction = ["CDS_Orientation"]
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
518 distance = 0
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
519
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
520 alternating = 0
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
521
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
522 for feat in [f for f in original.features if f.type.lower() in ["gene", "cds"]]:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
523 for space in range(distance, feat.location.start):
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
524 direction.append("_")
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
525 if feat.type.lower() in ["gene", "cds"]:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
526 for counter in range(feat.location.start, feat.location.end, 3):
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
527 if alternating % 2 == 1:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
528 sequences["original"][counter] = "\033[1;31;40m" + sequences["original"][counter] + "\033[0m"
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
529 sequences["original"][counter+1] = "\033[1;31;40m" + sequences["original"][counter+1] + "\033[0m"
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
530 sequences["original"][counter+2] = "\033[1;31;40m" + sequences["original"][counter+2] + "\033[0m"
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
531 alternating += 1
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
532
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
533 if feat.strand == +1:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
534 direction.append("-")
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
535 direction.append("-")
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
536 direction.append(">")
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
537 if feat.strand == -1:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
538 direction.append("<")
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
539 direction.append("-")
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
540 direction.append("-")
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
541
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
542 else:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
543 sequences["original"][counter] = "\033[1;32;40m" + sequences["original"][counter] + "\033[0m"
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
544 sequences["original"][counter+1] = "\033[1;32;40m" + sequences["original"][counter+1] + "\033[0m"
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
545 sequences["original"][counter+2] = "\033[1;32;40m" + sequences["original"][counter+2] + "\033[0m"
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
546 alternating += 1
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
547
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
548 if feat.strand == +1:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
549 direction.append("-")
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
550 direction.append("-")
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
551 direction.append(">")
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
552 if feat.strand == -1:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
553 direction.append("<")
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
554 direction.append("-")
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
555 direction.append("-")
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
556 distance = feat.location.end
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
557 for space in range(distance, len(original)):
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
558 direction.append("_")
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
559
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
560 #print(direction); print(len(direction))
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
561 ################################################################
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
562 # Plasmids_ids
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
563 ################################################################
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
564 f = 0
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
565 new_plasmids = []
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
566 for s in others:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
567 new_plasmids.append([s] + filter(None, re.split(r'(\w{1})', tot[s]["sequence"][ind_range[0]:ind_range[1]])))
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
568 for k in range(len(original)):
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
569 if k in annotation_information[s]["modified_positions"]:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
570 new_plasmids[f][k+1] = "\033[1;32;40m" + new_plasmids[f][k+1] + "\033[0m"
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
571 f += 1
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
572
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
573 #print(new_plasmids)
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
574
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
575 ################################################################
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
576 # Index
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
577 ################################################################
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
578
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
579 index = ["Index"] + [str(i) for i in range(ind_range[0], ind_range[1])]
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
580
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
581 ################################################################
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
582 # Create the pdf file
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
583 ################################################################
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
584
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
585 #print(target_lists); print(len(target_lists[0]))
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
586 #print(target_positions); print(len(target_positions))
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
587 #print(annot); print(len(annot))
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
588 #print(direction); print(len(direction))
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
589 #print(new_plasmids); print(len(new_plasmids[0]))
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
590 #print(index)
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
591
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
592 data = {0:target_lists,
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
593 1:target_positions,
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
594 2:annot,
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
595 3:direction,
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
596 4:["Original"] + sequences["original"],
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
597 5:new_plasmids,
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
598 6:index}
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
599
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
600 elements = []
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
601 #max_row = 18
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
602 blocks = {}
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
603
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
604 if len(range(max_row, len(original.seq)+1, max_row)) % max_row == 0:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
605 n_blocks = len(range(max_row, len(original.seq)+1, max_row))
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
606 else:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
607 n_blocks = len(range(max_row, len(original.seq)+1, max_row)) + 1
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
608
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
609 j = 0
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
610
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
611 for i in range(n_blocks):
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
612 blocks[i] = []
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
613 for l in range(7):
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
614 if l in [0, 5]:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
615 for el in data[l]:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
616 if len(el[j:]) > max_row:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
617 if i >= 1:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
618 blocks[i].append([el[0]] + el[j:j+max_row])
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
619 else:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
620 blocks[i].append(el[j:j+max_row])
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
621 else:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
622 blocks[i].append([el[0]] + el[j:])
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
623 else:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
624 if len(data[l][j:]) > max_row:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
625 if i >= 1:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
626 blocks[i].append([data[l][0]] + data[l][j:j+max_row])
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
627 else:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
628 blocks[i].append(data[l][j:j+max_row])
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
629 else:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
630 blocks[i].append([data[l][0]] + data[l][j:])
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
631 j += max_row
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
632 print("\n")
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
633 #print(blocks[i])
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
634
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
635 fff = []
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
636 for f in range(len(blocks[i])):
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
637 fff.append(len(blocks[i][f][0]))
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
638 fff = max(fff)
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
639 for f in range(len(blocks[i])):
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
640 for r in range(fff-len(blocks[i][f][0])):
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
641 blocks[i][f][0] += " "
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
642 if f < len(blocks[i])-1:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
643 for l in range(1,len(blocks[i][f])):
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
644 tmp = ""
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
645 #print(blocks[i][-1][l])
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
646 if l < len(blocks[i][-1]):
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
647 for g in range(len(str(blocks[i][-1][l]))):
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
648 #print(g)
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
649 tmp += " "
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
650 blocks[i][f][l] = tmp + blocks[i][f][l]
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
651 #print(blocks[i][f][l])
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
652 blocks[i][f] = " ".join(blocks[i][f])
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
653 else:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
654 blocks[i][f] = " ".join(blocks[i][f])
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
655 print(blocks[i][f])
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
656 #print(" ".join(blocks[i][-1]))
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
657
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
658 print("\n")
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
659 print([f for f in original.features if f.type.lower() in ["gene", "cds"]])
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
660 print("\n")
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
661 print(f_patterns)
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
662
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
663 return
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
664
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
665 def print_to_pdf(original = None,
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
666 others = None,
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
667 annotation_information = None,
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
668 tot = None,
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
669 ind_range = None,
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
670 patterns = None,
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
671 f_patterns = None,
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
672 patts = None,
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
673 max_row = 9):
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
674
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
675 """
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
676
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
677 original = plasmids["original_plasmid"],
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
678 others = def_pls,
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
679 annotation_information = useful_plasmids,
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
680 tot = plasmids,
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
681 ind_range = None
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
682
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
683 """
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
684
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
685 from reportlab.lib import colors
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
686 from reportlab.lib.pagesizes import letter
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
687 from reportlab.platypus import SimpleDocTemplate, Table, TableStyle
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
688 from reportlab.pdfgen import canvas
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
689
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
690 ################################################################
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
691 # Single Targets
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
692 ################################################################
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
693
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
694 targets = {}
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
695
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
696 t_keys = f_patterns.keys()
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
697
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
698 for l in range(len(t_keys)):
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
699 if f_patterns[t_keys[l]] != []:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
700 targets["Target" + str(l)] = t_keys[l]
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
701
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
702 #print(targets)
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
703 #print("\n")
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
704 tars = {}
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
705
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
706 for tar in targets.keys():
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
707 #print(tar)
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
708 tars[tar] = ["|" for i in range(len(original.seq))]
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
709
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
710 for tar1 in f_patterns[targets[tar]]:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
711 #print(tar1)
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
712 if tar1[1] < tar1[2]:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
713 for l in range(tar1[1], tar1[2]):
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
714 tars[tar][l] = tar1[0][l-tar1[1]]
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
715 else:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
716 for l in range(tar1[1], len(original.seq)):
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
717 tars[tar][l] = tar1[0][l-tar1[1]]
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
718 for l in range(tar1[2]):
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
719 tars[tar][l] = tar1[0][-tar1[2]:][l]
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
720
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
721 #print(tars)
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
722 kkk = tars.keys()
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
723 kkk.sort()
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
724 target_lists = [[key]+tars[key] for key in kkk]
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
725 #print(target_lists); print(len(target_lists[0]))
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
726
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
727
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
728 ################################################################
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
729 # Aggregate Targets
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
730 ################################################################
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
731 target_positions = ["TargetPositions"]
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
732 for k in range(len(original)):
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
733 if k in patterns.keys():
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
734 if len(patterns[k]) > 1:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
735 target_positions += "+"#"T"
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
736 else:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
737 target_positions += "T"
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
738 else:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
739 target_positions += " "
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
740 #print(target_positions); print(len(target_positions))
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
741 ################################################################
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
742 # Annotation
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
743 ################################################################
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
744 direction = []
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
745 annot = ["Annotation"]
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
746
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
747 distance = 0
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
748 for feat in [f for f in original.features if f.type.lower() in ["gene", "cds"]]:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
749 for space in range(distance, feat.location.start):
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
750 direction.append("_")
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
751 annot.append("_")
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
752 annot.append("*")
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
753 for an_space in range(feat.location.end - feat.location.start-2):
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
754 annot.append("_")
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
755 distance = feat.location.end
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
756 annot.append("/")
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
757 for space in range(distance, len(original)):
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
758 direction.append("_")
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
759 annot.append("_")
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
760 #print(annot)
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
761
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
762 ################################################################
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
763 # CDS
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
764 ################################################################
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
765
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
766 if ind_range == None:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
767 ind_range = [0, len(original)]
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
768
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
769 sequences = {}
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
770 sequences["original"] = filter(None, re.split(r'(\w{1})', original.seq[ind_range[0]:ind_range[1]]))
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
771 direction = ["CDS_Orientation"]
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
772 distance = 0
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
773
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
774 alternating = 0
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
775
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
776 for feat in [f for f in original.features if f.type.lower() in ["gene", "cds"]]:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
777 for space in range(distance, feat.location.start):
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
778 direction.append("_")
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
779 if feat.type.lower() in ["gene", "cds"]:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
780 for counter in range(feat.location.start, feat.location.end, 3):
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
781 if alternating % 2 == 1:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
782 sequences["original"][counter] = 'f' + sequences["original"][counter]#'<font size=44>' + sequences["original"][counter] + '</font>'
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
783 sequences["original"][counter+1] = 'f' + sequences["original"][counter+1]
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
784 sequences["original"][counter+2] = 'f' + sequences["original"][counter+2]
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
785 alternating += 1
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
786
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
787 if feat.strand == +1:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
788 direction.append("-")
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
789 direction.append("-")
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
790 direction.append(">")
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
791 if feat.strand == -1:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
792 direction.append("<")
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
793 direction.append("-")
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
794 direction.append("-")
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
795
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
796 else:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
797 sequences["original"][counter] = 's' + sequences["original"][counter]
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
798 sequences["original"][counter+1] = 's' + sequences["original"][counter+1]
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
799 sequences["original"][counter+2] = 's' + sequences["original"][counter+2]
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
800 alternating += 1
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
801
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
802 if feat.strand == +1:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
803 direction.append("-")
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
804 direction.append("-")
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
805 direction.append(">")
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
806 if feat.strand == -1:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
807 direction.append("<")
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
808 direction.append("-")
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
809 direction.append("-")
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
810 distance = feat.location.end
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
811 for space in range(distance, len(original)):
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
812 direction.append("_")
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
813
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
814 #print(direction); print(len(direction))
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
815 ################################################################
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
816 # Plasmids_ids
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
817 ################################################################
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
818 f = 0
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
819 new_plasmids = []
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
820 for s in others:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
821 new_plasmids.append([s] + filter(None, re.split(r'(\w{1})', tot[s]["sequence"][ind_range[0]:ind_range[1]])))
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
822 for k in range(len(original)):
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
823 if k in annotation_information[s]["modified_positions"]:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
824 new_plasmids[f][k+1] += "m"
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
825 f += 1
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
826
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
827 #print(new_plasmids)
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
828
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
829 ################################################################
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
830 # Index
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
831 ################################################################
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
832
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
833 index = ["Index"] + [str(i) for i in range(ind_range[0], ind_range[1])]
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
834
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
835 ################################################################
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
836 # Create the pdf file
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
837 ################################################################
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
838
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
839 #print(target_lists); print(len(target_lists[0]))
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
840 #print(target_positions); print(len(target_positions))
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
841 #print(annot); print(len(annot))
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
842 #print(direction); print(len(direction))
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
843 #print(new_plasmids); print(len(new_plasmids[0]))
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
844 #print(index)
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
845
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
846 #colors = [('BACKGROUND',(0,0),(0,0),colors.palegreen),
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
847 # ('BACKGROUND',(1,1),(1,1),colors.palegreen),
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
848 # ('BACKGROUND',(2,2),(3,2),colors.palegreen)]
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
849
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
850 data = {0:target_lists,
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
851 1:target_positions,
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
852 2:annot,
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
853 3:direction,
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
854 4:["Original"] + sequences["original"],
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
855 5:new_plasmids,
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
856 6:index}
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
857
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
858 doc = SimpleDocTemplate("comparison_syngenic_plasmids.pdf",pagesize=letter,
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
859 rightMargin=30,leftMargin=30,
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
860 topMargin=30,bottomMargin=30)
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
861
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
862 elements = []
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
863 #max_row = 18
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
864 blocks = {}
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
865
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
866 if len(range(max_row, len(original.seq)+1, max_row)) % max_row == 0:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
867 n_blocks = len(range(max_row, len(original.seq)+1, max_row))
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
868 else:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
869 n_blocks = len(range(max_row, len(original.seq)+1, max_row)) + 1
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
870
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
871 j = 0
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
872
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
873 for i in range(n_blocks):
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
874 blocks[i] = []
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
875 for l in range(7):
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
876 if l in [0, 5]:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
877 for el in data[l]:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
878 if len(el[j:]) > max_row:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
879 if i >= 1:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
880 blocks[i].append([el[0]] + el[j:j+max_row])
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
881 else:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
882 blocks[i].append(el[j:j+max_row])
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
883 else:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
884 blocks[i].append([el[0]] + el[j:])
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
885 else:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
886 if len(data[l][j:]) > max_row:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
887 if i >= 1:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
888 blocks[i].append([data[l][0]] + data[l][j:j+max_row])
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
889 else:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
890 blocks[i].append(data[l][j:j+max_row])
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
891 else:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
892 blocks[i].append([data[l][0]] + data[l][j:])
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
893 j += max_row
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
894 #print("\n")
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
895 #print(blocks[i])
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
896
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
897 elements.append(Table(blocks[i], hAlign='LEFT'))#,
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
898 #style=[('BACKGROUND',(0,0),(0,0),colors.palegreen),
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
899 # ('BACKGROUND',(1,1),(1,1),colors.palegreen),
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
900 # ('TEXTCOLOR',(2,2),(3,2),colors.palegreen),
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
901 # ('BOX',(0,0),(0,0),2,colors.red)]))
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
902 elements.append(Table([["", "", "", "", ""]]))
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
903
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
904 doc.build(elements)
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
905
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
906
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
907 #new_doc = SimpleDocTemplate("further_information.pdf",pagesize=letter,
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
908 # rightMargin=30,leftMargin=30,
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
909 # topMargin=30,bottomMargin=30)
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
910 #new_elements = []
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
911
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
912 #new_elements.append([f for f in original.features if f.type.lower() in ["gene", "cds"]])
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
913 #new_elements.append(f_patterns)
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
914
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
915 #doc.build(new_elements)
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
916
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
917 c = canvas.Canvas("./further_information.pdf")
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
918 c.drawString(100,750,"CDS regions:")
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
919 upper_bound = 750
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
920 for feat in original.features:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
921 if feat.type.lower() in ["gene", "cds"]:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
922 upper_bound -= 15
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
923 if feat.location.strand == -1:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
924 sign = "-"
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
925 else:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
926 sign = "+"
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
927 c.drawString(115,upper_bound, str("[") + str(feat.location.start)+ ":" + str(feat.location.end) + "]" + "(" + sign + ")")
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
928 upper_bound -= 30
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
929 c.drawString(100,upper_bound,"Patterns and the corresponding targets on the plasmid sequence:")
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
930 for f_pattern in f_patterns.keys():
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
931 upper_bound -= 15
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
932 c.drawString(115,upper_bound,f_pattern + ":")
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
933 for val in f_patterns[f_pattern]:
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
934 upper_bound -= 15
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
935 c.drawString(130,upper_bound,str(val))
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
936 upper_bound -= 5
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
937
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
938 upper_bound -= 30
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
939 c.drawString(100,upper_bound,"Identifiers of the targets found in the plasmid sequence:")
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
940 for target in targets.keys():
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
941 upper_bound -= 15
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
942 c.drawString(115,upper_bound,target + ": " + targets[target])
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
943
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
944 c.save()
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
945
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
946
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
947 return
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
948
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
949
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
950 def produce_random_targets(sequence):
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
951
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
952 # Produce a target on two continous CDS
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
953 # Produce a target in a non-coding region
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
954 # Produce a target in coding region
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
955 # Produce a target on a overlapping left
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
956 # Produce a target on a overlapping right
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
957
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
958
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
959
aad5e435e4dc Uploaded
gianmarco_piccinno
parents:
diff changeset
960 return