annotate galaxy/functions.py @ 42:439b70949f8d draft

Uploaded
author gianmarco_piccinno
date Mon, 20 May 2019 16:44:00 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
42
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
1 import string
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
2 from syngenic import *
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
3 from Bio.Seq import Seq
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
4 from Bio.SeqFeature import SeqFeature, FeatureLocation
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
5 from pprint import pprint
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
6
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
7 from itertools import izip
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
8
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
9 import numpy as np
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
10 import pandas as pd
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
11
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
12 def all_patterns(input_ = []):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
13
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
14 patts = []
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
15 n_patts = []
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
16
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
17 for patt in input_:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
18 tmp_patt = patt#Seq(patt.rstrip(), IUPAC.ambiguous_dna)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
19 tmp_revc = tmp_patt.reverse_complement()
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
20
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
21 patts.append(str(tmp_patt))
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
22 patts.append(str(tmp_revc))
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
23
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
24 n_patts.append(pattern(tmp_patt).plan_ambiguity())
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
25 n_patts.append(pattern(tmp_revc).plan_ambiguity())
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
26
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
27
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
28 return patts, n_patts
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
29
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
30 def fake_from_real(path = None, id_ = None, name = None):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
31
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
32 plasmid_seq = SeqIO.read(open(path, "r"), "genbank")
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
33
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
34 f_p = plasmid_seq.seq[:10]
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
35 f_CDS = []
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
36 for f in plasmid_seq.features:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
37 if f.type == "CDS":
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
38 tmp_start = len(f_p)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
39 tmp_cds = plasmid_seq[f.location.start:f.location.start+9] + plasmid_seq[f.location.end-9:f.location.end]
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
40 tmp_end = tmp_start + len(tmp_cds)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
41 f_p += tmp_cds
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
42 f_CDS.append(SeqFeature(FeatureLocation(tmp_start, tmp_end), type="gene", strand=f.location.strand))
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
43 #f_p += plasmid_seq.seq[tmp_end:tmp_end+5]
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
44 f_p += plasmid_seq.seq[-10:]
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
45
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
46 for feat in f_CDS:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
47 f_p.features.append(feat)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
48 f_p.id = id_
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
49 f_p.name = name
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
50
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
51 #feature_seq_0 = f_CDS[0].extract(f_p)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
52
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
53 return f_p
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
54
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
55 def punctuate_targets(f_patts, n_pl):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
56
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
57 n_poss = {}
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
58 max_len = len(n_pl)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
59 for key in f_patts.keys():
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
60 for el in f_patts[key]:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
61 if not el[2] < el[1]:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
62 tmp = range(el[1], el[2])
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
63 for i in range(len(tmp)):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
64 if not tmp[i] in n_poss.keys():
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
65 n_poss[tmp[i]] = [key[i]]
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
66 else:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
67 n_poss[tmp[i]].append(key[i])
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
68 else:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
69 tmp = range(el[1], max_len) + range(0, el[2])
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
70 for i in range(len(tmp)):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
71 if not tmp[i] in n_poss.keys():
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
72 n_poss[tmp[i]] = [key[i]]
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
73 else:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
74 n_poss[tmp[i]].append(key[i])
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
75
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
76 for key in n_poss.keys():
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
77 n_poss[key] = set(n_poss[key])
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
78
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
79 #print(n_poss)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
80
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
81 return n_poss
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
82
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
83
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
84 def print_seq(n_pl, ind_range = None):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
85
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
86 if ind_range == None:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
87
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
88 data = filter(None, re.split(r'(\w{1})', n_pl))
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
89 index = range(len(n_pl))
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
90
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
91 seq = []
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
92 ind = []
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
93
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
94 j = 0
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
95
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
96 seq.append("")
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
97 ind.append("")
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
98
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
99 for i in range(len(data)):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
100
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
101 if (i % 9 == 0) & (i > 0):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
102 j += 1
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
103 seq.append("")
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
104 ind.append("")
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
105 print("\n")
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
106 print(seq[j-1])
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
107 print(ind[j-1])
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
108
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
109
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
110 seq[j] += " "
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
111 ind[j] += " "
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
112 for n in range(len(str(index[i]))-1):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
113 seq[j] += " "
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
114 seq[j] += data[i]
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
115 ind[j] += str(index[i])
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
116 print("\n")
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
117 print(seq[j])
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
118 print(ind[j])
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
119 else:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
120 data = filter(None, re.split(r'(\w{1})', n_pl[ind_range[0]:ind_range[1]]))
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
121 index = range(ind_range[0], ind_range[1])
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
122
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
123 seq = []
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
124 ind = []
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
125
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
126 j = 0
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
127
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
128 seq.append("")
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
129 ind.append("")
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
130
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
131 for i in range(len(data)):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
132
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
133 if (i % 9 == 0) & (i > 0):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
134 j += 1
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
135 seq.append("")
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
136 ind.append("")
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
137 print("\n")
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
138 print(seq[j-1])
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
139 print(ind[j-1])
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
140
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
141
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
142 seq[j] += " "
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
143 ind[j] += " "
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
144 for n in range(len(str(index[i]))-1):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
145 seq[j] += " "
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
146 seq[j] += data[i]
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
147 ind[j] += str(index[i])
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
148
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
149 print("\n")
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
150 print(seq[j])
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
151 print(ind[j])
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
152
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
153
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
154
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
155 return None
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
156
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
157
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
158 def generalization(n_poss, n_pl, synonims_tables, reduced=False):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
159
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
160
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
161 transversions = {"A": "[AT]",
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
162 "T": "[TA]",
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
163 "C": "[CG]",
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
164 "G": "[GC]"}
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
165
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
166 count_codon_switch = 0
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
167 count_transversion = 0
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
168
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
169 new_poss = {}
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
170
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
171 for pos in n_poss.keys():
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
172 in_cds = False
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
173 for feat in n_pl.features:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
174 if ((pos >= feat.location.start) & (pos < feat.location.end)) & (feat.type in ["CDS", "gene"]):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
175 in_cds = True
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
176 count_codon_switch += 1
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
177 tmp_count_transversion = 0
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
178 #print("\n")
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
179 #print("operate codon switch " + str(count_codon_switch))
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
180 #
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
181 #print("Real position: " + str(pos))
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
182 #print(n_poss[pos])
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
183 #print(feat.location)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
184 #print(pos - feat.location.start)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
185 #print((pos - feat.location.start)%3)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
186
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
187
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
188 if ((pos - feat.location.start) % 3 == 0) & (n_poss[pos] != {"N"}):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
189 # first basis of a codon
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
190 #print("first basis of a codon")
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
191 #print(n_pl.seq[pos:pos+3])
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
192
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
193 tmp_codon = n_pl.seq[pos:pos+3]
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
194 bases = []
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
195 if feat.strand == +1:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
196 # check the codon table
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
197 for codon in synonims_tables["synonims"][tmp_codon]:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
198 bases.append(codon[0])
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
199 elif feat.strand == -1:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
200 # check the anticodon table
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
201 for codon in synonims_tables["anti_synonims"][tmp_codon]:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
202 bases.append(codon[0])
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
203 if len(set(bases)) > 1:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
204 new_poss[pos] = "[" + "".join(list(set(bases))) + "]"
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
205
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
206
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
207 elif ((pos - feat.location.start) % 3 == 1) & (n_poss[pos] != {"N"}):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
208 # second basis of a codon
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
209 #print("second basis of a codon")
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
210 #print(n_pl.seq[pos-1:pos+2])
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
211
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
212 tmp_codon = n_pl.seq[pos-1:pos+2]
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
213
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
214 bases = []
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
215 if feat.strand == +1:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
216 # check the codon table
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
217 for codon in synonims_tables["synonims"][tmp_codon]:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
218 bases.append(codon[1])
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
219 elif feat.strand == -1:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
220 # check the anticodon table
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
221 for codon in synonims_tables["anti_synonims"][tmp_codon]:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
222 bases.append(codon[1])
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
223 if len(set(bases)) > 1:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
224 new_poss[pos] = "[" + "".join(list(set(bases))) + "]"
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
225
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
226 elif ((pos - feat.location.start) % 3 == 2) & (n_poss[pos] != {"N"}):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
227 # third basis of a codon
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
228 #print("third basis of a codon")
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
229 #print(n_pl.seq[pos-2:pos+1])
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
230
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
231 tmp_codon = n_pl.seq[pos-2:pos+1]
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
232
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
233 bases = []
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
234 if feat.strand == +1:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
235 # check the codon table
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
236 for codon in synonims_tables["synonims"][tmp_codon]:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
237 bases.append(codon[2])
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
238 elif feat.strand == -1:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
239 # check the anticodon table
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
240 for codon in synonims_tables["anti_synonims"][tmp_codon]:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
241 bases.append(codon[2])
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
242 if len(set(bases)) > 1:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
243 new_poss[pos] = "[" + "".join(list(set(bases))) + "]"
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
244
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
245 tmp = n_pl.extract(feat)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
246 #print_seq(tmp, ind_range = [feat.location.start,feat.location.start])
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
247
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
248 if (in_cds == False) & (set.intersection(n_poss[pos], {"A", "T", "C", "G"}) != set()):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
249 # (set.union(n_poss[pos], {"A", "T", "C", "G"}) != {})
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
250 # set.union(n_poss[pos], {"A", "T", "C", "G"}) != {}
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
251 # n_poss[pos] != {"N"}
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
252
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
253 if reduced == False:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
254
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
255 count_transversion += 1
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
256 #print("operate transversion " + str(count_transversion))
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
257
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
258 new_poss[pos] = transversions[set.difference(n_poss[pos], {"N"}).pop()]
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
259
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
260 else:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
261
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
262 count_transversion += 1
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
263 #print("operate transversion " + str(count_transversion))
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
264
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
265 new_poss[pos] = transversions[set.difference(n_poss[pos], {"N"}).pop()]
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
266
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
267 #if tmp_count_transversion == 0:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
268
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
269 # count_transversion += 1
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
270 # tmp_count_transversion += 1
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
271 # print("operate transversion " + str(count_transversion))
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
272 #
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
273 # new_poss[pos] = transversions[set.difference(n_poss[pos], {"N"}).pop()]
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
274
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
275 #print(new_poss)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
276
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
277 n_seq = filter(None, re.split(r'(\w{1})', str(n_pl.seq)))
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
278 n_ind = range(len(n_seq))
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
279
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
280 new_obj = {}
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
281
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
282 for pos in n_ind:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
283 if pos in new_poss.keys():
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
284 new_obj[pos] = new_poss[pos]
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
285 else:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
286 new_obj[pos] = n_seq[pos]
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
287
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
288 #pprint(new_obj)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
289
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
290
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
291 new_plasmid_generalized = ""
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
292
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
293
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
294 for pos in n_ind:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
295 new_plasmid_generalized += new_obj[pos]
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
296
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
297 #print(new_plasmid_generalized)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
298 #print(len(new_plasmid_generalized))
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
299
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
300 t = sre_yield.AllStrings(new_plasmid_generalized)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
301
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
302 #print(len(t))
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
303
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
304
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
305
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
306 return t
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
307
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
308
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
309 def evaluate_plasmids(plasmids = None,
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
310 original_plasmid = None,
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
311 codon_usage_table = None,
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
312 n_patts = None,
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
313 f_patts = None):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
314
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
315 from syngenic import plasmid
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
316 from Bio.Seq import Seq
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
317 from Bio.SeqFeature import SeqFeature, FeatureLocation
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
318 from itertools import izip
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
319 import numpy as np
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
320
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
321 useful = {}
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
322
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
323 i = 0
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
324
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
325 for tmp_pl in plasmids:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
326
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
327 if tmp_pl != original_plasmid.seq:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
328
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
329 identical_proteic_sequence = []
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
330
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
331 for feat in original_plasmid.features:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
332 if feat.type.lower() in ["gene", "cds"]:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
333 identical_proteic_sequence.append(Seq(plasmid(tmp_pl).extract(feat)).translate() == Seq(original_plasmid.extract(feat)).translate())
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
334 identical_proteic_sequence = all(identical_proteic_sequence)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
335 if (identical_proteic_sequence == True) & (set([True if el ==[] else False for el in plasmid(tmp_pl).findpatterns(n_patts, f_patts).values()]) == {True}):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
336 print("\t" + str(i) + "/" + str(len(plasmids)))
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
337 #print(tmp_pl)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
338 tmp = [j for j,(a1,a2) in enumerate(izip(tmp_pl,original_plasmid)) if a1!=a2]
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
339 #print(tmp)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
340 useful["Plasmid_" + str(i)] = {}
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
341 useful["Plasmid_" + str(i)]["modified_positions"] = tmp
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
342 useful["Plasmid_" + str(i)]["codon_usage"] = []
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
343 useful["Plasmid_" + str(i)]["number_of_modification"] = len(tmp)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
344 useful["Plasmid_" + str(i)]["sequence"] = tmp_pl
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
345 for modified_position in tmp:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
346 in_cds = False
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
347 for feat in original_plasmid.features:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
348 if feat.type.lower() in ["gene", "cds"]:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
349 if ((modified_position >= feat.location.start) & (modified_position < feat.location.end)) & (feat.type in ["CDS", "gene"]):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
350 in_cds = True
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
351 if (modified_position - feat.location.start) % 3 == 0:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
352 # first basis of a codon
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
353 if feat.strand == +1:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
354 tmp_codon = tmp_pl[modified_position:modified_position+3]
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
355 else:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
356 tmp_codon = str(Seq(tmp_pl[modified_position:modified_position+3]).reverse_complement())
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
357 useful["Plasmid_" + str(i)]["codon_usage"].append(codon_usage_table.loc[tmp_codon]["Proportion"])
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
358 elif (modified_position - feat.location.start) % 3 == 1:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
359 # second basis of a codon
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
360 if feat.strand == +1:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
361 tmp_codon = tmp_pl[modified_position-1:modified_position+2]
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
362 else:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
363 tmp_codon = str(Seq(tmp_pl[modified_position-1:modified_position+2]).reverse_complement())
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
364 useful["Plasmid_" + str(i)]["codon_usage"].append(codon_usage_table.loc[tmp_codon]["Proportion"])
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
365 elif (modified_position - feat.location.start) % 3 == 2:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
366 # third basis of a codon
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
367 if feat.strand == +1:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
368 tmp_codon = original_plasmid.seq[modified_position-2:modified_position+1]
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
369 else:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
370 tmp_codon = str(Seq(tmp_pl[modified_position-2:modified_position+1]).reverse_complement())
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
371 useful["Plasmid_" + str(i)]["codon_usage"].append(codon_usage_table.loc[tmp_codon]["Proportion"])
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
372
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
373 useful["Plasmid_" + str(i)]["mean_codon_usage"] = np.mean(useful["Plasmid_" + str(i)]["codon_usage"])
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
374 useful["Plasmid_" + str(i)]["std_codon_usage"] = np.std(useful["Plasmid_" + str(i)]["codon_usage"])
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
375 else:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
376 next
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
377
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
378 i += 1
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
379
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
380 useful["original_plasmids"] = original_plasmid
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
381
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
382 return useful
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
383
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
384
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
385
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
386 def rank_plasmids(original_useful_plasmids = None):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
387
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
388 # Rank according to codon usage and less number of modifications introduced
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
389
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
390 tmp_useful_plasmids = {}
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
391
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
392 #print(len(original_useful_plasmids.keys()))
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
393 tmp_keys = list(set.difference(set(original_useful_plasmids.keys()), {"original_plasmids"}))
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
394 #print(len(tmp_keys))
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
395 for key in tmp_keys:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
396 #print(key)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
397 #print(original_useful_plasmids[key])
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
398 tmp_useful_plasmids[key] = {"mean_codon_usage":original_useful_plasmids[key]["mean_codon_usage"],
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
399 "std_codon_usage":original_useful_plasmids[key]["std_codon_usage"],
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
400 "number_of_modification":original_useful_plasmids[key]["number_of_modification"]}
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
401
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
402 dat_plasmids = pd.DataFrame(tmp_useful_plasmids).T
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
403
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
404 dat_plasmids.shape
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
405
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
406 dat_plasmids.head()
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
407
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
408 dat_plasmids.sort_values(['mean_codon_usage', 'std_codon_usage', 'number_of_modification'], ascending=[False, True, True])
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
409
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
410 dat_plasmids.index
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
411
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
412 return dat_plasmids
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
413 #return tmp_useful_plasmids
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
414
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
415
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
416 def print_color_seq(original = None,
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
417 others = None,
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
418 annotation_information = None,
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
419 tot = None,
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
420 ind_range = None,
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
421 patterns = None,
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
422 f_patterns = None,
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
423 patts = None,
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
424 max_row = 18):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
425
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
426 """
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
427
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
428 original = plasmids["original_plasmid"],
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
429 others = def_pls,
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
430 annotation_information = useful_plasmids,
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
431 tot = plasmids,
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
432 ind_range = None
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
433
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
434 """
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
435
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
436 ################################################################
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
437 # Single Targets
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
438 ################################################################
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
439
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
440 targets = {}
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
441
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
442 t_keys = f_patterns.keys()
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
443
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
444 for l in range(len(t_keys)):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
445 if f_patterns[t_keys[l]] != []:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
446 targets["Target" + str(l)] = t_keys[l]
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
447
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
448 #print(targets)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
449 #print("\n")
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
450 tars = {}
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
451
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
452 for tar in targets.keys():
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
453 #print(tar)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
454 tars[tar] = ["|" for i in range(len(original.seq))]
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
455
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
456 for tar1 in f_patterns[targets[tar]]:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
457 #print(tar1)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
458 if tar1[1] < tar1[2]:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
459 for l in range(tar1[1], tar1[2]):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
460 tars[tar][l] = tar1[0][l-tar1[1]]
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
461 else:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
462 for l in range(tar1[1], len(original.seq)):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
463 tars[tar][l] = tar1[0][l-tar1[1]]
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
464 for l in range(tar1[2]):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
465 tars[tar][l] = tar1[0][-tar1[2]:][l]
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
466
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
467 #print(tars)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
468 kkk = tars.keys()
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
469 kkk.sort()
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
470 target_lists = [[key]+tars[key] for key in kkk]
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
471 #print(target_lists); print(len(target_lists[0]))
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
472
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
473
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
474 ################################################################
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
475 # Aggregate Targets
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
476 ################################################################
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
477 target_positions = ["TargetPositions"]
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
478 for k in range(len(original)):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
479 if k in patterns.keys():
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
480 if len(patterns[k]) > 1:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
481 target_positions += "+"#"T"
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
482 else:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
483 target_positions += "T"
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
484 else:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
485 target_positions += " "
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
486 #print(target_positions); print(len(target_positions))
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
487 ################################################################
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
488 # Annotation
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
489 ################################################################
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
490 direction = []
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
491 annot = ["Annotation"]
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
492
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
493 distance = 0
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
494 for feat in [f for f in original.features if f.type.lower() in ["gene", "cds"]]:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
495 for space in range(distance, feat.location.start):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
496 direction.append("_")
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
497 annot.append("_")
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
498 annot.append("*")
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
499 for an_space in range(feat.location.end - feat.location.start-2):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
500 annot.append("_")
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
501 distance = feat.location.end
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
502 annot.append("/")
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
503 for space in range(distance, len(original)):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
504 direction.append("_")
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
505 annot.append("_")
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
506 #print(annot)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
507
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
508 ################################################################
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
509 # CDS
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
510 ################################################################
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
511
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
512 if ind_range == None:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
513 ind_range = [0, len(original)]
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
514
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
515 sequences = {}
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
516 sequences["original"] = filter(None, re.split(r'(\w{1})', original.seq[ind_range[0]:ind_range[1]]))
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
517 direction = ["CDS_Orientation"]
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
518 distance = 0
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
519
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
520 alternating = 0
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
521
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
522 for feat in [f for f in original.features if f.type.lower() in ["gene", "cds"]]:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
523 for space in range(distance, feat.location.start):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
524 direction.append("_")
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
525 if feat.type.lower() in ["gene", "cds"]:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
526 for counter in range(feat.location.start, feat.location.end, 3):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
527 if alternating % 2 == 1:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
528 sequences["original"][counter] = "\033[1;31;40m" + sequences["original"][counter] + "\033[0m"
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
529 sequences["original"][counter+1] = "\033[1;31;40m" + sequences["original"][counter+1] + "\033[0m"
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
530 sequences["original"][counter+2] = "\033[1;31;40m" + sequences["original"][counter+2] + "\033[0m"
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
531 alternating += 1
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
532
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
533 if feat.strand == +1:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
534 direction.append("-")
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
535 direction.append("-")
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
536 direction.append(">")
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
537 if feat.strand == -1:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
538 direction.append("<")
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
539 direction.append("-")
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
540 direction.append("-")
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
541
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
542 else:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
543 sequences["original"][counter] = "\033[1;32;40m" + sequences["original"][counter] + "\033[0m"
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
544 sequences["original"][counter+1] = "\033[1;32;40m" + sequences["original"][counter+1] + "\033[0m"
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
545 sequences["original"][counter+2] = "\033[1;32;40m" + sequences["original"][counter+2] + "\033[0m"
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
546 alternating += 1
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
547
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
548 if feat.strand == +1:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
549 direction.append("-")
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
550 direction.append("-")
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
551 direction.append(">")
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
552 if feat.strand == -1:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
553 direction.append("<")
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
554 direction.append("-")
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
555 direction.append("-")
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
556 distance = feat.location.end
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
557 for space in range(distance, len(original)):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
558 direction.append("_")
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
559
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
560 #print(direction); print(len(direction))
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
561 ################################################################
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
562 # Plasmids_ids
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
563 ################################################################
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
564 f = 0
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
565 new_plasmids = []
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
566 for s in others:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
567 new_plasmids.append([s] + filter(None, re.split(r'(\w{1})', tot[s]["sequence"][ind_range[0]:ind_range[1]])))
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
568 for k in range(len(original)):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
569 if k in annotation_information[s]["modified_positions"]:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
570 new_plasmids[f][k+1] = "\033[1;32;40m" + new_plasmids[f][k+1] + "\033[0m"
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
571 f += 1
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
572
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
573 #print(new_plasmids)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
574
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
575 ################################################################
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
576 # Index
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
577 ################################################################
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
578
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
579 index = ["Index"] + [str(i) for i in range(ind_range[0], ind_range[1])]
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
580
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
581 ################################################################
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
582 # Create the pdf file
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
583 ################################################################
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
584
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
585 #print(target_lists); print(len(target_lists[0]))
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
586 #print(target_positions); print(len(target_positions))
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
587 #print(annot); print(len(annot))
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
588 #print(direction); print(len(direction))
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
589 #print(new_plasmids); print(len(new_plasmids[0]))
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
590 #print(index)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
591
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
592 data = {0:target_lists,
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
593 1:target_positions,
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
594 2:annot,
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
595 3:direction,
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
596 4:["Original"] + sequences["original"],
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
597 5:new_plasmids,
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
598 6:index}
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
599
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
600 elements = []
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
601 #max_row = 18
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
602 blocks = {}
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
603
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
604 if len(range(max_row, len(original.seq)+1, max_row)) % max_row == 0:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
605 n_blocks = len(range(max_row, len(original.seq)+1, max_row))
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
606 else:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
607 n_blocks = len(range(max_row, len(original.seq)+1, max_row)) + 1
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
608
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
609 j = 0
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
610
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
611 for i in range(n_blocks):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
612 blocks[i] = []
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
613 for l in range(7):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
614 if l in [0, 5]:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
615 for el in data[l]:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
616 if len(el[j:]) > max_row:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
617 if i >= 1:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
618 blocks[i].append([el[0]] + el[j:j+max_row])
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
619 else:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
620 blocks[i].append(el[j:j+max_row])
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
621 else:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
622 blocks[i].append([el[0]] + el[j:])
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
623 else:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
624 if len(data[l][j:]) > max_row:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
625 if i >= 1:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
626 blocks[i].append([data[l][0]] + data[l][j:j+max_row])
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
627 else:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
628 blocks[i].append(data[l][j:j+max_row])
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
629 else:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
630 blocks[i].append([data[l][0]] + data[l][j:])
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
631 j += max_row
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
632 print("\n")
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
633 #print(blocks[i])
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
634
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
635 fff = []
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
636 for f in range(len(blocks[i])):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
637 fff.append(len(blocks[i][f][0]))
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
638 fff = max(fff)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
639 for f in range(len(blocks[i])):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
640 for r in range(fff-len(blocks[i][f][0])):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
641 blocks[i][f][0] += " "
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
642 if f < len(blocks[i])-1:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
643 for l in range(1,len(blocks[i][f])):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
644 tmp = ""
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
645 #print(blocks[i][-1][l])
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
646 if l < len(blocks[i][-1]):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
647 for g in range(len(str(blocks[i][-1][l]))):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
648 #print(g)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
649 tmp += " "
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
650 blocks[i][f][l] = tmp + blocks[i][f][l]
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
651 #print(blocks[i][f][l])
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
652 blocks[i][f] = " ".join(blocks[i][f])
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
653 else:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
654 blocks[i][f] = " ".join(blocks[i][f])
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
655 print(blocks[i][f])
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
656 #print(" ".join(blocks[i][-1]))
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
657
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
658 print("\n")
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
659 print([f for f in original.features if f.type.lower() in ["gene", "cds"]])
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
660 print("\n")
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
661 print(f_patterns)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
662
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
663 return
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
664
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
665 def print_to_pdf(original = None,
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
666 others = None,
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
667 annotation_information = None,
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
668 tot = None,
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
669 ind_range = None,
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
670 patterns = None,
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
671 f_patterns = None,
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
672 patts = None,
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
673 max_row = 9):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
674
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
675 """
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
676
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
677 original = plasmids["original_plasmid"],
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
678 others = def_pls,
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
679 annotation_information = useful_plasmids,
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
680 tot = plasmids,
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
681 ind_range = None
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
682
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
683 """
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
684
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
685 from reportlab.lib import colors
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
686 from reportlab.lib.pagesizes import letter
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
687 from reportlab.platypus import SimpleDocTemplate, Table, TableStyle
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
688 from reportlab.pdfgen import canvas
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
689
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
690 ################################################################
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
691 # Single Targets
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
692 ################################################################
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
693
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
694 targets = {}
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
695
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
696 t_keys = f_patterns.keys()
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
697
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
698 for l in range(len(t_keys)):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
699 if f_patterns[t_keys[l]] != []:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
700 targets["Target" + str(l)] = t_keys[l]
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
701
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
702 #print(targets)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
703 #print("\n")
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
704 tars = {}
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
705
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
706 for tar in targets.keys():
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
707 #print(tar)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
708 tars[tar] = ["|" for i in range(len(original.seq))]
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
709
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
710 for tar1 in f_patterns[targets[tar]]:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
711 #print(tar1)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
712 if tar1[1] < tar1[2]:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
713 for l in range(tar1[1], tar1[2]):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
714 tars[tar][l] = tar1[0][l-tar1[1]]
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
715 else:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
716 for l in range(tar1[1], len(original.seq)):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
717 tars[tar][l] = tar1[0][l-tar1[1]]
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
718 for l in range(tar1[2]):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
719 tars[tar][l] = tar1[0][-tar1[2]:][l]
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
720
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
721 #print(tars)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
722 kkk = tars.keys()
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
723 kkk.sort()
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
724 target_lists = [[key]+tars[key] for key in kkk]
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
725 #print(target_lists); print(len(target_lists[0]))
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
726
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
727
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
728 ################################################################
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
729 # Aggregate Targets
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
730 ################################################################
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
731 target_positions = ["TargetPositions"]
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
732 for k in range(len(original)):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
733 if k in patterns.keys():
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
734 if len(patterns[k]) > 1:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
735 target_positions += "+"#"T"
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
736 else:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
737 target_positions += "T"
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
738 else:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
739 target_positions += " "
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
740 #print(target_positions); print(len(target_positions))
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
741 ################################################################
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
742 # Annotation
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
743 ################################################################
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
744 direction = []
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
745 annot = ["Annotation"]
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
746
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
747 distance = 0
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
748 for feat in [f for f in original.features if f.type.lower() in ["gene", "cds"]]:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
749 for space in range(distance, feat.location.start):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
750 direction.append("_")
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
751 annot.append("_")
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
752 annot.append("*")
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
753 for an_space in range(feat.location.end - feat.location.start-2):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
754 annot.append("_")
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
755 distance = feat.location.end
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
756 annot.append("/")
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
757 for space in range(distance, len(original)):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
758 direction.append("_")
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
759 annot.append("_")
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
760 #print(annot)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
761
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
762 ################################################################
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
763 # CDS
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
764 ################################################################
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
765
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
766 if ind_range == None:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
767 ind_range = [0, len(original)]
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
768
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
769 sequences = {}
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
770 sequences["original"] = filter(None, re.split(r'(\w{1})', original.seq[ind_range[0]:ind_range[1]]))
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
771 direction = ["CDS_Orientation"]
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
772 distance = 0
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
773
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
774 alternating = 0
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
775
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
776 for feat in [f for f in original.features if f.type.lower() in ["gene", "cds"]]:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
777 for space in range(distance, feat.location.start):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
778 direction.append("_")
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
779 if feat.type.lower() in ["gene", "cds"]:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
780 for counter in range(feat.location.start, feat.location.end, 3):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
781 if alternating % 2 == 1:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
782 sequences["original"][counter] = 'f' + sequences["original"][counter]#'<font size=44>' + sequences["original"][counter] + '</font>'
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
783 sequences["original"][counter+1] = 'f' + sequences["original"][counter+1]
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
784 sequences["original"][counter+2] = 'f' + sequences["original"][counter+2]
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
785 alternating += 1
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
786
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
787 if feat.strand == +1:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
788 direction.append("-")
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
789 direction.append("-")
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
790 direction.append(">")
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
791 if feat.strand == -1:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
792 direction.append("<")
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
793 direction.append("-")
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
794 direction.append("-")
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
795
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
796 else:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
797 sequences["original"][counter] = 's' + sequences["original"][counter]
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
798 sequences["original"][counter+1] = 's' + sequences["original"][counter+1]
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
799 sequences["original"][counter+2] = 's' + sequences["original"][counter+2]
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
800 alternating += 1
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
801
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
802 if feat.strand == +1:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
803 direction.append("-")
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
804 direction.append("-")
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
805 direction.append(">")
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
806 if feat.strand == -1:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
807 direction.append("<")
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
808 direction.append("-")
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
809 direction.append("-")
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
810 distance = feat.location.end
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
811 for space in range(distance, len(original)):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
812 direction.append("_")
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
813
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
814 #print(direction); print(len(direction))
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
815 ################################################################
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
816 # Plasmids_ids
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
817 ################################################################
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
818 f = 0
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
819 new_plasmids = []
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
820 for s in others:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
821 new_plasmids.append([s] + filter(None, re.split(r'(\w{1})', tot[s]["sequence"][ind_range[0]:ind_range[1]])))
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
822 for k in range(len(original)):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
823 if k in annotation_information[s]["modified_positions"]:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
824 new_plasmids[f][k+1] += "m"
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
825 f += 1
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
826
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
827 #print(new_plasmids)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
828
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
829 ################################################################
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
830 # Index
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
831 ################################################################
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
832
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
833 index = ["Index"] + [str(i) for i in range(ind_range[0], ind_range[1])]
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
834
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
835 ################################################################
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
836 # Create the pdf file
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
837 ################################################################
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
838
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
839 #print(target_lists); print(len(target_lists[0]))
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
840 #print(target_positions); print(len(target_positions))
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
841 #print(annot); print(len(annot))
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
842 #print(direction); print(len(direction))
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
843 #print(new_plasmids); print(len(new_plasmids[0]))
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
844 #print(index)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
845
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
846 #colors = [('BACKGROUND',(0,0),(0,0),colors.palegreen),
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
847 # ('BACKGROUND',(1,1),(1,1),colors.palegreen),
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
848 # ('BACKGROUND',(2,2),(3,2),colors.palegreen)]
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
849
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
850 data = {0:target_lists,
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
851 1:target_positions,
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
852 2:annot,
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
853 3:direction,
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
854 4:["Original"] + sequences["original"],
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
855 5:new_plasmids,
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
856 6:index}
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
857
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
858 doc = SimpleDocTemplate("comparison_syngenic_plasmids.pdf",pagesize=letter,
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
859 rightMargin=30,leftMargin=30,
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
860 topMargin=30,bottomMargin=30)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
861
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
862 elements = []
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
863 #max_row = 18
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
864 blocks = {}
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
865
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
866 if len(range(max_row, len(original.seq)+1, max_row)) % max_row == 0:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
867 n_blocks = len(range(max_row, len(original.seq)+1, max_row))
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
868 else:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
869 n_blocks = len(range(max_row, len(original.seq)+1, max_row)) + 1
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
870
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
871 j = 0
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
872
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
873 for i in range(n_blocks):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
874 blocks[i] = []
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
875 for l in range(7):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
876 if l in [0, 5]:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
877 for el in data[l]:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
878 if len(el[j:]) > max_row:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
879 if i >= 1:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
880 blocks[i].append([el[0]] + el[j:j+max_row])
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
881 else:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
882 blocks[i].append(el[j:j+max_row])
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
883 else:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
884 blocks[i].append([el[0]] + el[j:])
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
885 else:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
886 if len(data[l][j:]) > max_row:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
887 if i >= 1:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
888 blocks[i].append([data[l][0]] + data[l][j:j+max_row])
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
889 else:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
890 blocks[i].append(data[l][j:j+max_row])
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
891 else:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
892 blocks[i].append([data[l][0]] + data[l][j:])
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
893 j += max_row
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
894 #print("\n")
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
895 #print(blocks[i])
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
896
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
897 elements.append(Table(blocks[i], hAlign='LEFT'))#,
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
898 #style=[('BACKGROUND',(0,0),(0,0),colors.palegreen),
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
899 # ('BACKGROUND',(1,1),(1,1),colors.palegreen),
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
900 # ('TEXTCOLOR',(2,2),(3,2),colors.palegreen),
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
901 # ('BOX',(0,0),(0,0),2,colors.red)]))
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
902 elements.append(Table([["", "", "", "", ""]]))
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
903
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
904 doc.build(elements)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
905
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
906
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
907 #new_doc = SimpleDocTemplate("further_information.pdf",pagesize=letter,
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
908 # rightMargin=30,leftMargin=30,
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
909 # topMargin=30,bottomMargin=30)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
910 #new_elements = []
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
911
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
912 #new_elements.append([f for f in original.features if f.type.lower() in ["gene", "cds"]])
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
913 #new_elements.append(f_patterns)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
914
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
915 #doc.build(new_elements)
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
916
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
917 c = canvas.Canvas("./further_information.pdf")
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
918 c.drawString(100,750,"CDS regions:")
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
919 upper_bound = 750
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
920 for feat in original.features:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
921 if feat.type.lower() in ["gene", "cds"]:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
922 upper_bound -= 15
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
923 if feat.location.strand == -1:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
924 sign = "-"
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
925 else:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
926 sign = "+"
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
927 c.drawString(115,upper_bound, str("[") + str(feat.location.start)+ ":" + str(feat.location.end) + "]" + "(" + sign + ")")
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
928 upper_bound -= 30
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
929 c.drawString(100,upper_bound,"Patterns and the corresponding targets on the plasmid sequence:")
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
930 for f_pattern in f_patterns.keys():
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
931 upper_bound -= 15
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
932 c.drawString(115,upper_bound,f_pattern + ":")
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
933 for val in f_patterns[f_pattern]:
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
934 upper_bound -= 15
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
935 c.drawString(130,upper_bound,str(val))
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
936 upper_bound -= 5
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
937
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
938 upper_bound -= 30
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
939 c.drawString(100,upper_bound,"Identifiers of the targets found in the plasmid sequence:")
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
940 for target in targets.keys():
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
941 upper_bound -= 15
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
942 c.drawString(115,upper_bound,target + ": " + targets[target])
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
943
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
944 c.save()
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
945
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
946
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
947 return
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
948
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
949
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
950 def produce_random_targets(sequence):
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
951
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
952 # Produce a target on two continous CDS
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
953 # Produce a target in a non-coding region
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
954 # Produce a target in coding region
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
955 # Produce a target on a overlapping left
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
956 # Produce a target on a overlapping right
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
957
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
958
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
959
439b70949f8d Uploaded
gianmarco_piccinno
parents:
diff changeset
960 return