Mercurial > repos > gianmarco_piccinno > project_rm
comparison galaxy/functions.py @ 42:439b70949f8d draft
Uploaded
| author | gianmarco_piccinno | 
|---|---|
| date | Mon, 20 May 2019 16:44:00 -0400 | 
| parents | |
| children | 
   comparison
  equal
  deleted
  inserted
  replaced
| 41:bd35b13fabfb | 42:439b70949f8d | 
|---|---|
| 1 import string | |
| 2 from syngenic import * | |
| 3 from Bio.Seq import Seq | |
| 4 from Bio.SeqFeature import SeqFeature, FeatureLocation | |
| 5 from pprint import pprint | |
| 6 | |
| 7 from itertools import izip | |
| 8 | |
| 9 import numpy as np | |
| 10 import pandas as pd | |
| 11 | |
| 12 def all_patterns(input_ = []): | |
| 13 | |
| 14 patts = [] | |
| 15 n_patts = [] | |
| 16 | |
| 17 for patt in input_: | |
| 18 tmp_patt = patt#Seq(patt.rstrip(), IUPAC.ambiguous_dna) | |
| 19 tmp_revc = tmp_patt.reverse_complement() | |
| 20 | |
| 21 patts.append(str(tmp_patt)) | |
| 22 patts.append(str(tmp_revc)) | |
| 23 | |
| 24 n_patts.append(pattern(tmp_patt).plan_ambiguity()) | |
| 25 n_patts.append(pattern(tmp_revc).plan_ambiguity()) | |
| 26 | |
| 27 | |
| 28 return patts, n_patts | |
| 29 | |
| 30 def fake_from_real(path = None, id_ = None, name = None): | |
| 31 | |
| 32 plasmid_seq = SeqIO.read(open(path, "r"), "genbank") | |
| 33 | |
| 34 f_p = plasmid_seq.seq[:10] | |
| 35 f_CDS = [] | |
| 36 for f in plasmid_seq.features: | |
| 37 if f.type == "CDS": | |
| 38 tmp_start = len(f_p) | |
| 39 tmp_cds = plasmid_seq[f.location.start:f.location.start+9] + plasmid_seq[f.location.end-9:f.location.end] | |
| 40 tmp_end = tmp_start + len(tmp_cds) | |
| 41 f_p += tmp_cds | |
| 42 f_CDS.append(SeqFeature(FeatureLocation(tmp_start, tmp_end), type="gene", strand=f.location.strand)) | |
| 43 #f_p += plasmid_seq.seq[tmp_end:tmp_end+5] | |
| 44 f_p += plasmid_seq.seq[-10:] | |
| 45 | |
| 46 for feat in f_CDS: | |
| 47 f_p.features.append(feat) | |
| 48 f_p.id = id_ | |
| 49 f_p.name = name | |
| 50 | |
| 51 #feature_seq_0 = f_CDS[0].extract(f_p) | |
| 52 | |
| 53 return f_p | |
| 54 | |
| 55 def punctuate_targets(f_patts, n_pl): | |
| 56 | |
| 57 n_poss = {} | |
| 58 max_len = len(n_pl) | |
| 59 for key in f_patts.keys(): | |
| 60 for el in f_patts[key]: | |
| 61 if not el[2] < el[1]: | |
| 62 tmp = range(el[1], el[2]) | |
| 63 for i in range(len(tmp)): | |
| 64 if not tmp[i] in n_poss.keys(): | |
| 65 n_poss[tmp[i]] = [key[i]] | |
| 66 else: | |
| 67 n_poss[tmp[i]].append(key[i]) | |
| 68 else: | |
| 69 tmp = range(el[1], max_len) + range(0, el[2]) | |
| 70 for i in range(len(tmp)): | |
| 71 if not tmp[i] in n_poss.keys(): | |
| 72 n_poss[tmp[i]] = [key[i]] | |
| 73 else: | |
| 74 n_poss[tmp[i]].append(key[i]) | |
| 75 | |
| 76 for key in n_poss.keys(): | |
| 77 n_poss[key] = set(n_poss[key]) | |
| 78 | |
| 79 #print(n_poss) | |
| 80 | |
| 81 return n_poss | |
| 82 | |
| 83 | |
| 84 def print_seq(n_pl, ind_range = None): | |
| 85 | |
| 86 if ind_range == None: | |
| 87 | |
| 88 data = filter(None, re.split(r'(\w{1})', n_pl)) | |
| 89 index = range(len(n_pl)) | |
| 90 | |
| 91 seq = [] | |
| 92 ind = [] | |
| 93 | |
| 94 j = 0 | |
| 95 | |
| 96 seq.append("") | |
| 97 ind.append("") | |
| 98 | |
| 99 for i in range(len(data)): | |
| 100 | |
| 101 if (i % 9 == 0) & (i > 0): | |
| 102 j += 1 | |
| 103 seq.append("") | |
| 104 ind.append("") | |
| 105 print("\n") | |
| 106 print(seq[j-1]) | |
| 107 print(ind[j-1]) | |
| 108 | |
| 109 | |
| 110 seq[j] += " " | |
| 111 ind[j] += " " | |
| 112 for n in range(len(str(index[i]))-1): | |
| 113 seq[j] += " " | |
| 114 seq[j] += data[i] | |
| 115 ind[j] += str(index[i]) | |
| 116 print("\n") | |
| 117 print(seq[j]) | |
| 118 print(ind[j]) | |
| 119 else: | |
| 120 data = filter(None, re.split(r'(\w{1})', n_pl[ind_range[0]:ind_range[1]])) | |
| 121 index = range(ind_range[0], ind_range[1]) | |
| 122 | |
| 123 seq = [] | |
| 124 ind = [] | |
| 125 | |
| 126 j = 0 | |
| 127 | |
| 128 seq.append("") | |
| 129 ind.append("") | |
| 130 | |
| 131 for i in range(len(data)): | |
| 132 | |
| 133 if (i % 9 == 0) & (i > 0): | |
| 134 j += 1 | |
| 135 seq.append("") | |
| 136 ind.append("") | |
| 137 print("\n") | |
| 138 print(seq[j-1]) | |
| 139 print(ind[j-1]) | |
| 140 | |
| 141 | |
| 142 seq[j] += " " | |
| 143 ind[j] += " " | |
| 144 for n in range(len(str(index[i]))-1): | |
| 145 seq[j] += " " | |
| 146 seq[j] += data[i] | |
| 147 ind[j] += str(index[i]) | |
| 148 | |
| 149 print("\n") | |
| 150 print(seq[j]) | |
| 151 print(ind[j]) | |
| 152 | |
| 153 | |
| 154 | |
| 155 return None | |
| 156 | |
| 157 | |
| 158 def generalization(n_poss, n_pl, synonims_tables, reduced=False): | |
| 159 | |
| 160 | |
| 161 transversions = {"A": "[AT]", | |
| 162 "T": "[TA]", | |
| 163 "C": "[CG]", | |
| 164 "G": "[GC]"} | |
| 165 | |
| 166 count_codon_switch = 0 | |
| 167 count_transversion = 0 | |
| 168 | |
| 169 new_poss = {} | |
| 170 | |
| 171 for pos in n_poss.keys(): | |
| 172 in_cds = False | |
| 173 for feat in n_pl.features: | |
| 174 if ((pos >= feat.location.start) & (pos < feat.location.end)) & (feat.type in ["CDS", "gene"]): | |
| 175 in_cds = True | |
| 176 count_codon_switch += 1 | |
| 177 tmp_count_transversion = 0 | |
| 178 #print("\n") | |
| 179 #print("operate codon switch " + str(count_codon_switch)) | |
| 180 # | |
| 181 #print("Real position: " + str(pos)) | |
| 182 #print(n_poss[pos]) | |
| 183 #print(feat.location) | |
| 184 #print(pos - feat.location.start) | |
| 185 #print((pos - feat.location.start)%3) | |
| 186 | |
| 187 | |
| 188 if ((pos - feat.location.start) % 3 == 0) & (n_poss[pos] != {"N"}): | |
| 189 # first basis of a codon | |
| 190 #print("first basis of a codon") | |
| 191 #print(n_pl.seq[pos:pos+3]) | |
| 192 | |
| 193 tmp_codon = n_pl.seq[pos:pos+3] | |
| 194 bases = [] | |
| 195 if feat.strand == +1: | |
| 196 # check the codon table | |
| 197 for codon in synonims_tables["synonims"][tmp_codon]: | |
| 198 bases.append(codon[0]) | |
| 199 elif feat.strand == -1: | |
| 200 # check the anticodon table | |
| 201 for codon in synonims_tables["anti_synonims"][tmp_codon]: | |
| 202 bases.append(codon[0]) | |
| 203 if len(set(bases)) > 1: | |
| 204 new_poss[pos] = "[" + "".join(list(set(bases))) + "]" | |
| 205 | |
| 206 | |
| 207 elif ((pos - feat.location.start) % 3 == 1) & (n_poss[pos] != {"N"}): | |
| 208 # second basis of a codon | |
| 209 #print("second basis of a codon") | |
| 210 #print(n_pl.seq[pos-1:pos+2]) | |
| 211 | |
| 212 tmp_codon = n_pl.seq[pos-1:pos+2] | |
| 213 | |
| 214 bases = [] | |
| 215 if feat.strand == +1: | |
| 216 # check the codon table | |
| 217 for codon in synonims_tables["synonims"][tmp_codon]: | |
| 218 bases.append(codon[1]) | |
| 219 elif feat.strand == -1: | |
| 220 # check the anticodon table | |
| 221 for codon in synonims_tables["anti_synonims"][tmp_codon]: | |
| 222 bases.append(codon[1]) | |
| 223 if len(set(bases)) > 1: | |
| 224 new_poss[pos] = "[" + "".join(list(set(bases))) + "]" | |
| 225 | |
| 226 elif ((pos - feat.location.start) % 3 == 2) & (n_poss[pos] != {"N"}): | |
| 227 # third basis of a codon | |
| 228 #print("third basis of a codon") | |
| 229 #print(n_pl.seq[pos-2:pos+1]) | |
| 230 | |
| 231 tmp_codon = n_pl.seq[pos-2:pos+1] | |
| 232 | |
| 233 bases = [] | |
| 234 if feat.strand == +1: | |
| 235 # check the codon table | |
| 236 for codon in synonims_tables["synonims"][tmp_codon]: | |
| 237 bases.append(codon[2]) | |
| 238 elif feat.strand == -1: | |
| 239 # check the anticodon table | |
| 240 for codon in synonims_tables["anti_synonims"][tmp_codon]: | |
| 241 bases.append(codon[2]) | |
| 242 if len(set(bases)) > 1: | |
| 243 new_poss[pos] = "[" + "".join(list(set(bases))) + "]" | |
| 244 | |
| 245 tmp = n_pl.extract(feat) | |
| 246 #print_seq(tmp, ind_range = [feat.location.start,feat.location.start]) | |
| 247 | |
| 248 if (in_cds == False) & (set.intersection(n_poss[pos], {"A", "T", "C", "G"}) != set()): | |
| 249 # (set.union(n_poss[pos], {"A", "T", "C", "G"}) != {}) | |
| 250 # set.union(n_poss[pos], {"A", "T", "C", "G"}) != {} | |
| 251 # n_poss[pos] != {"N"} | |
| 252 | |
| 253 if reduced == False: | |
| 254 | |
| 255 count_transversion += 1 | |
| 256 #print("operate transversion " + str(count_transversion)) | |
| 257 | |
| 258 new_poss[pos] = transversions[set.difference(n_poss[pos], {"N"}).pop()] | |
| 259 | |
| 260 else: | |
| 261 | |
| 262 count_transversion += 1 | |
| 263 #print("operate transversion " + str(count_transversion)) | |
| 264 | |
| 265 new_poss[pos] = transversions[set.difference(n_poss[pos], {"N"}).pop()] | |
| 266 | |
| 267 #if tmp_count_transversion == 0: | |
| 268 | |
| 269 # count_transversion += 1 | |
| 270 # tmp_count_transversion += 1 | |
| 271 # print("operate transversion " + str(count_transversion)) | |
| 272 # | |
| 273 # new_poss[pos] = transversions[set.difference(n_poss[pos], {"N"}).pop()] | |
| 274 | |
| 275 #print(new_poss) | |
| 276 | |
| 277 n_seq = filter(None, re.split(r'(\w{1})', str(n_pl.seq))) | |
| 278 n_ind = range(len(n_seq)) | |
| 279 | |
| 280 new_obj = {} | |
| 281 | |
| 282 for pos in n_ind: | |
| 283 if pos in new_poss.keys(): | |
| 284 new_obj[pos] = new_poss[pos] | |
| 285 else: | |
| 286 new_obj[pos] = n_seq[pos] | |
| 287 | |
| 288 #pprint(new_obj) | |
| 289 | |
| 290 | |
| 291 new_plasmid_generalized = "" | |
| 292 | |
| 293 | |
| 294 for pos in n_ind: | |
| 295 new_plasmid_generalized += new_obj[pos] | |
| 296 | |
| 297 #print(new_plasmid_generalized) | |
| 298 #print(len(new_plasmid_generalized)) | |
| 299 | |
| 300 t = sre_yield.AllStrings(new_plasmid_generalized) | |
| 301 | |
| 302 #print(len(t)) | |
| 303 | |
| 304 | |
| 305 | |
| 306 return t | |
| 307 | |
| 308 | |
| 309 def evaluate_plasmids(plasmids = None, | |
| 310 original_plasmid = None, | |
| 311 codon_usage_table = None, | |
| 312 n_patts = None, | |
| 313 f_patts = None): | |
| 314 | |
| 315 from syngenic import plasmid | |
| 316 from Bio.Seq import Seq | |
| 317 from Bio.SeqFeature import SeqFeature, FeatureLocation | |
| 318 from itertools import izip | |
| 319 import numpy as np | |
| 320 | |
| 321 useful = {} | |
| 322 | |
| 323 i = 0 | |
| 324 | |
| 325 for tmp_pl in plasmids: | |
| 326 | |
| 327 if tmp_pl != original_plasmid.seq: | |
| 328 | |
| 329 identical_proteic_sequence = [] | |
| 330 | |
| 331 for feat in original_plasmid.features: | |
| 332 if feat.type.lower() in ["gene", "cds"]: | |
| 333 identical_proteic_sequence.append(Seq(plasmid(tmp_pl).extract(feat)).translate() == Seq(original_plasmid.extract(feat)).translate()) | |
| 334 identical_proteic_sequence = all(identical_proteic_sequence) | |
| 335 if (identical_proteic_sequence == True) & (set([True if el ==[] else False for el in plasmid(tmp_pl).findpatterns(n_patts, f_patts).values()]) == {True}): | |
| 336 print("\t" + str(i) + "/" + str(len(plasmids))) | |
| 337 #print(tmp_pl) | |
| 338 tmp = [j for j,(a1,a2) in enumerate(izip(tmp_pl,original_plasmid)) if a1!=a2] | |
| 339 #print(tmp) | |
| 340 useful["Plasmid_" + str(i)] = {} | |
| 341 useful["Plasmid_" + str(i)]["modified_positions"] = tmp | |
| 342 useful["Plasmid_" + str(i)]["codon_usage"] = [] | |
| 343 useful["Plasmid_" + str(i)]["number_of_modification"] = len(tmp) | |
| 344 useful["Plasmid_" + str(i)]["sequence"] = tmp_pl | |
| 345 for modified_position in tmp: | |
| 346 in_cds = False | |
| 347 for feat in original_plasmid.features: | |
| 348 if feat.type.lower() in ["gene", "cds"]: | |
| 349 if ((modified_position >= feat.location.start) & (modified_position < feat.location.end)) & (feat.type in ["CDS", "gene"]): | |
| 350 in_cds = True | |
| 351 if (modified_position - feat.location.start) % 3 == 0: | |
| 352 # first basis of a codon | |
| 353 if feat.strand == +1: | |
| 354 tmp_codon = tmp_pl[modified_position:modified_position+3] | |
| 355 else: | |
| 356 tmp_codon = str(Seq(tmp_pl[modified_position:modified_position+3]).reverse_complement()) | |
| 357 useful["Plasmid_" + str(i)]["codon_usage"].append(codon_usage_table.loc[tmp_codon]["Proportion"]) | |
| 358 elif (modified_position - feat.location.start) % 3 == 1: | |
| 359 # second basis of a codon | |
| 360 if feat.strand == +1: | |
| 361 tmp_codon = tmp_pl[modified_position-1:modified_position+2] | |
| 362 else: | |
| 363 tmp_codon = str(Seq(tmp_pl[modified_position-1:modified_position+2]).reverse_complement()) | |
| 364 useful["Plasmid_" + str(i)]["codon_usage"].append(codon_usage_table.loc[tmp_codon]["Proportion"]) | |
| 365 elif (modified_position - feat.location.start) % 3 == 2: | |
| 366 # third basis of a codon | |
| 367 if feat.strand == +1: | |
| 368 tmp_codon = original_plasmid.seq[modified_position-2:modified_position+1] | |
| 369 else: | |
| 370 tmp_codon = str(Seq(tmp_pl[modified_position-2:modified_position+1]).reverse_complement()) | |
| 371 useful["Plasmid_" + str(i)]["codon_usage"].append(codon_usage_table.loc[tmp_codon]["Proportion"]) | |
| 372 | |
| 373 useful["Plasmid_" + str(i)]["mean_codon_usage"] = np.mean(useful["Plasmid_" + str(i)]["codon_usage"]) | |
| 374 useful["Plasmid_" + str(i)]["std_codon_usage"] = np.std(useful["Plasmid_" + str(i)]["codon_usage"]) | |
| 375 else: | |
| 376 next | |
| 377 | |
| 378 i += 1 | |
| 379 | |
| 380 useful["original_plasmids"] = original_plasmid | |
| 381 | |
| 382 return useful | |
| 383 | |
| 384 | |
| 385 | |
| 386 def rank_plasmids(original_useful_plasmids = None): | |
| 387 | |
| 388 # Rank according to codon usage and less number of modifications introduced | |
| 389 | |
| 390 tmp_useful_plasmids = {} | |
| 391 | |
| 392 #print(len(original_useful_plasmids.keys())) | |
| 393 tmp_keys = list(set.difference(set(original_useful_plasmids.keys()), {"original_plasmids"})) | |
| 394 #print(len(tmp_keys)) | |
| 395 for key in tmp_keys: | |
| 396 #print(key) | |
| 397 #print(original_useful_plasmids[key]) | |
| 398 tmp_useful_plasmids[key] = {"mean_codon_usage":original_useful_plasmids[key]["mean_codon_usage"], | |
| 399 "std_codon_usage":original_useful_plasmids[key]["std_codon_usage"], | |
| 400 "number_of_modification":original_useful_plasmids[key]["number_of_modification"]} | |
| 401 | |
| 402 dat_plasmids = pd.DataFrame(tmp_useful_plasmids).T | |
| 403 | |
| 404 dat_plasmids.shape | |
| 405 | |
| 406 dat_plasmids.head() | |
| 407 | |
| 408 dat_plasmids.sort_values(['mean_codon_usage', 'std_codon_usage', 'number_of_modification'], ascending=[False, True, True]) | |
| 409 | |
| 410 dat_plasmids.index | |
| 411 | |
| 412 return dat_plasmids | |
| 413 #return tmp_useful_plasmids | |
| 414 | |
| 415 | |
| 416 def print_color_seq(original = None, | |
| 417 others = None, | |
| 418 annotation_information = None, | |
| 419 tot = None, | |
| 420 ind_range = None, | |
| 421 patterns = None, | |
| 422 f_patterns = None, | |
| 423 patts = None, | |
| 424 max_row = 18): | |
| 425 | |
| 426 """ | |
| 427 | |
| 428 original = plasmids["original_plasmid"], | |
| 429 others = def_pls, | |
| 430 annotation_information = useful_plasmids, | |
| 431 tot = plasmids, | |
| 432 ind_range = None | |
| 433 | |
| 434 """ | |
| 435 | |
| 436 ################################################################ | |
| 437 # Single Targets | |
| 438 ################################################################ | |
| 439 | |
| 440 targets = {} | |
| 441 | |
| 442 t_keys = f_patterns.keys() | |
| 443 | |
| 444 for l in range(len(t_keys)): | |
| 445 if f_patterns[t_keys[l]] != []: | |
| 446 targets["Target" + str(l)] = t_keys[l] | |
| 447 | |
| 448 #print(targets) | |
| 449 #print("\n") | |
| 450 tars = {} | |
| 451 | |
| 452 for tar in targets.keys(): | |
| 453 #print(tar) | |
| 454 tars[tar] = ["|" for i in range(len(original.seq))] | |
| 455 | |
| 456 for tar1 in f_patterns[targets[tar]]: | |
| 457 #print(tar1) | |
| 458 if tar1[1] < tar1[2]: | |
| 459 for l in range(tar1[1], tar1[2]): | |
| 460 tars[tar][l] = tar1[0][l-tar1[1]] | |
| 461 else: | |
| 462 for l in range(tar1[1], len(original.seq)): | |
| 463 tars[tar][l] = tar1[0][l-tar1[1]] | |
| 464 for l in range(tar1[2]): | |
| 465 tars[tar][l] = tar1[0][-tar1[2]:][l] | |
| 466 | |
| 467 #print(tars) | |
| 468 kkk = tars.keys() | |
| 469 kkk.sort() | |
| 470 target_lists = [[key]+tars[key] for key in kkk] | |
| 471 #print(target_lists); print(len(target_lists[0])) | |
| 472 | |
| 473 | |
| 474 ################################################################ | |
| 475 # Aggregate Targets | |
| 476 ################################################################ | |
| 477 target_positions = ["TargetPositions"] | |
| 478 for k in range(len(original)): | |
| 479 if k in patterns.keys(): | |
| 480 if len(patterns[k]) > 1: | |
| 481 target_positions += "+"#"T" | |
| 482 else: | |
| 483 target_positions += "T" | |
| 484 else: | |
| 485 target_positions += " " | |
| 486 #print(target_positions); print(len(target_positions)) | |
| 487 ################################################################ | |
| 488 # Annotation | |
| 489 ################################################################ | |
| 490 direction = [] | |
| 491 annot = ["Annotation"] | |
| 492 | |
| 493 distance = 0 | |
| 494 for feat in [f for f in original.features if f.type.lower() in ["gene", "cds"]]: | |
| 495 for space in range(distance, feat.location.start): | |
| 496 direction.append("_") | |
| 497 annot.append("_") | |
| 498 annot.append("*") | |
| 499 for an_space in range(feat.location.end - feat.location.start-2): | |
| 500 annot.append("_") | |
| 501 distance = feat.location.end | |
| 502 annot.append("/") | |
| 503 for space in range(distance, len(original)): | |
| 504 direction.append("_") | |
| 505 annot.append("_") | |
| 506 #print(annot) | |
| 507 | |
| 508 ################################################################ | |
| 509 # CDS | |
| 510 ################################################################ | |
| 511 | |
| 512 if ind_range == None: | |
| 513 ind_range = [0, len(original)] | |
| 514 | |
| 515 sequences = {} | |
| 516 sequences["original"] = filter(None, re.split(r'(\w{1})', original.seq[ind_range[0]:ind_range[1]])) | |
| 517 direction = ["CDS_Orientation"] | |
| 518 distance = 0 | |
| 519 | |
| 520 alternating = 0 | |
| 521 | |
| 522 for feat in [f for f in original.features if f.type.lower() in ["gene", "cds"]]: | |
| 523 for space in range(distance, feat.location.start): | |
| 524 direction.append("_") | |
| 525 if feat.type.lower() in ["gene", "cds"]: | |
| 526 for counter in range(feat.location.start, feat.location.end, 3): | |
| 527 if alternating % 2 == 1: | |
| 528 sequences["original"][counter] = "\033[1;31;40m" + sequences["original"][counter] + "\033[0m" | |
| 529 sequences["original"][counter+1] = "\033[1;31;40m" + sequences["original"][counter+1] + "\033[0m" | |
| 530 sequences["original"][counter+2] = "\033[1;31;40m" + sequences["original"][counter+2] + "\033[0m" | |
| 531 alternating += 1 | |
| 532 | |
| 533 if feat.strand == +1: | |
| 534 direction.append("-") | |
| 535 direction.append("-") | |
| 536 direction.append(">") | |
| 537 if feat.strand == -1: | |
| 538 direction.append("<") | |
| 539 direction.append("-") | |
| 540 direction.append("-") | |
| 541 | |
| 542 else: | |
| 543 sequences["original"][counter] = "\033[1;32;40m" + sequences["original"][counter] + "\033[0m" | |
| 544 sequences["original"][counter+1] = "\033[1;32;40m" + sequences["original"][counter+1] + "\033[0m" | |
| 545 sequences["original"][counter+2] = "\033[1;32;40m" + sequences["original"][counter+2] + "\033[0m" | |
| 546 alternating += 1 | |
| 547 | |
| 548 if feat.strand == +1: | |
| 549 direction.append("-") | |
| 550 direction.append("-") | |
| 551 direction.append(">") | |
| 552 if feat.strand == -1: | |
| 553 direction.append("<") | |
| 554 direction.append("-") | |
| 555 direction.append("-") | |
| 556 distance = feat.location.end | |
| 557 for space in range(distance, len(original)): | |
| 558 direction.append("_") | |
| 559 | |
| 560 #print(direction); print(len(direction)) | |
| 561 ################################################################ | |
| 562 # Plasmids_ids | |
| 563 ################################################################ | |
| 564 f = 0 | |
| 565 new_plasmids = [] | |
| 566 for s in others: | |
| 567 new_plasmids.append([s] + filter(None, re.split(r'(\w{1})', tot[s]["sequence"][ind_range[0]:ind_range[1]]))) | |
| 568 for k in range(len(original)): | |
| 569 if k in annotation_information[s]["modified_positions"]: | |
| 570 new_plasmids[f][k+1] = "\033[1;32;40m" + new_plasmids[f][k+1] + "\033[0m" | |
| 571 f += 1 | |
| 572 | |
| 573 #print(new_plasmids) | |
| 574 | |
| 575 ################################################################ | |
| 576 # Index | |
| 577 ################################################################ | |
| 578 | |
| 579 index = ["Index"] + [str(i) for i in range(ind_range[0], ind_range[1])] | |
| 580 | |
| 581 ################################################################ | |
| 582 # Create the pdf file | |
| 583 ################################################################ | |
| 584 | |
| 585 #print(target_lists); print(len(target_lists[0])) | |
| 586 #print(target_positions); print(len(target_positions)) | |
| 587 #print(annot); print(len(annot)) | |
| 588 #print(direction); print(len(direction)) | |
| 589 #print(new_plasmids); print(len(new_plasmids[0])) | |
| 590 #print(index) | |
| 591 | |
| 592 data = {0:target_lists, | |
| 593 1:target_positions, | |
| 594 2:annot, | |
| 595 3:direction, | |
| 596 4:["Original"] + sequences["original"], | |
| 597 5:new_plasmids, | |
| 598 6:index} | |
| 599 | |
| 600 elements = [] | |
| 601 #max_row = 18 | |
| 602 blocks = {} | |
| 603 | |
| 604 if len(range(max_row, len(original.seq)+1, max_row)) % max_row == 0: | |
| 605 n_blocks = len(range(max_row, len(original.seq)+1, max_row)) | |
| 606 else: | |
| 607 n_blocks = len(range(max_row, len(original.seq)+1, max_row)) + 1 | |
| 608 | |
| 609 j = 0 | |
| 610 | |
| 611 for i in range(n_blocks): | |
| 612 blocks[i] = [] | |
| 613 for l in range(7): | |
| 614 if l in [0, 5]: | |
| 615 for el in data[l]: | |
| 616 if len(el[j:]) > max_row: | |
| 617 if i >= 1: | |
| 618 blocks[i].append([el[0]] + el[j:j+max_row]) | |
| 619 else: | |
| 620 blocks[i].append(el[j:j+max_row]) | |
| 621 else: | |
| 622 blocks[i].append([el[0]] + el[j:]) | |
| 623 else: | |
| 624 if len(data[l][j:]) > max_row: | |
| 625 if i >= 1: | |
| 626 blocks[i].append([data[l][0]] + data[l][j:j+max_row]) | |
| 627 else: | |
| 628 blocks[i].append(data[l][j:j+max_row]) | |
| 629 else: | |
| 630 blocks[i].append([data[l][0]] + data[l][j:]) | |
| 631 j += max_row | |
| 632 print("\n") | |
| 633 #print(blocks[i]) | |
| 634 | |
| 635 fff = [] | |
| 636 for f in range(len(blocks[i])): | |
| 637 fff.append(len(blocks[i][f][0])) | |
| 638 fff = max(fff) | |
| 639 for f in range(len(blocks[i])): | |
| 640 for r in range(fff-len(blocks[i][f][0])): | |
| 641 blocks[i][f][0] += " " | |
| 642 if f < len(blocks[i])-1: | |
| 643 for l in range(1,len(blocks[i][f])): | |
| 644 tmp = "" | |
| 645 #print(blocks[i][-1][l]) | |
| 646 if l < len(blocks[i][-1]): | |
| 647 for g in range(len(str(blocks[i][-1][l]))): | |
| 648 #print(g) | |
| 649 tmp += " " | |
| 650 blocks[i][f][l] = tmp + blocks[i][f][l] | |
| 651 #print(blocks[i][f][l]) | |
| 652 blocks[i][f] = " ".join(blocks[i][f]) | |
| 653 else: | |
| 654 blocks[i][f] = " ".join(blocks[i][f]) | |
| 655 print(blocks[i][f]) | |
| 656 #print(" ".join(blocks[i][-1])) | |
| 657 | |
| 658 print("\n") | |
| 659 print([f for f in original.features if f.type.lower() in ["gene", "cds"]]) | |
| 660 print("\n") | |
| 661 print(f_patterns) | |
| 662 | |
| 663 return | |
| 664 | |
| 665 def print_to_pdf(original = None, | |
| 666 others = None, | |
| 667 annotation_information = None, | |
| 668 tot = None, | |
| 669 ind_range = None, | |
| 670 patterns = None, | |
| 671 f_patterns = None, | |
| 672 patts = None, | |
| 673 max_row = 9): | |
| 674 | |
| 675 """ | |
| 676 | |
| 677 original = plasmids["original_plasmid"], | |
| 678 others = def_pls, | |
| 679 annotation_information = useful_plasmids, | |
| 680 tot = plasmids, | |
| 681 ind_range = None | |
| 682 | |
| 683 """ | |
| 684 | |
| 685 from reportlab.lib import colors | |
| 686 from reportlab.lib.pagesizes import letter | |
| 687 from reportlab.platypus import SimpleDocTemplate, Table, TableStyle | |
| 688 from reportlab.pdfgen import canvas | |
| 689 | |
| 690 ################################################################ | |
| 691 # Single Targets | |
| 692 ################################################################ | |
| 693 | |
| 694 targets = {} | |
| 695 | |
| 696 t_keys = f_patterns.keys() | |
| 697 | |
| 698 for l in range(len(t_keys)): | |
| 699 if f_patterns[t_keys[l]] != []: | |
| 700 targets["Target" + str(l)] = t_keys[l] | |
| 701 | |
| 702 #print(targets) | |
| 703 #print("\n") | |
| 704 tars = {} | |
| 705 | |
| 706 for tar in targets.keys(): | |
| 707 #print(tar) | |
| 708 tars[tar] = ["|" for i in range(len(original.seq))] | |
| 709 | |
| 710 for tar1 in f_patterns[targets[tar]]: | |
| 711 #print(tar1) | |
| 712 if tar1[1] < tar1[2]: | |
| 713 for l in range(tar1[1], tar1[2]): | |
| 714 tars[tar][l] = tar1[0][l-tar1[1]] | |
| 715 else: | |
| 716 for l in range(tar1[1], len(original.seq)): | |
| 717 tars[tar][l] = tar1[0][l-tar1[1]] | |
| 718 for l in range(tar1[2]): | |
| 719 tars[tar][l] = tar1[0][-tar1[2]:][l] | |
| 720 | |
| 721 #print(tars) | |
| 722 kkk = tars.keys() | |
| 723 kkk.sort() | |
| 724 target_lists = [[key]+tars[key] for key in kkk] | |
| 725 #print(target_lists); print(len(target_lists[0])) | |
| 726 | |
| 727 | |
| 728 ################################################################ | |
| 729 # Aggregate Targets | |
| 730 ################################################################ | |
| 731 target_positions = ["TargetPositions"] | |
| 732 for k in range(len(original)): | |
| 733 if k in patterns.keys(): | |
| 734 if len(patterns[k]) > 1: | |
| 735 target_positions += "+"#"T" | |
| 736 else: | |
| 737 target_positions += "T" | |
| 738 else: | |
| 739 target_positions += " " | |
| 740 #print(target_positions); print(len(target_positions)) | |
| 741 ################################################################ | |
| 742 # Annotation | |
| 743 ################################################################ | |
| 744 direction = [] | |
| 745 annot = ["Annotation"] | |
| 746 | |
| 747 distance = 0 | |
| 748 for feat in [f for f in original.features if f.type.lower() in ["gene", "cds"]]: | |
| 749 for space in range(distance, feat.location.start): | |
| 750 direction.append("_") | |
| 751 annot.append("_") | |
| 752 annot.append("*") | |
| 753 for an_space in range(feat.location.end - feat.location.start-2): | |
| 754 annot.append("_") | |
| 755 distance = feat.location.end | |
| 756 annot.append("/") | |
| 757 for space in range(distance, len(original)): | |
| 758 direction.append("_") | |
| 759 annot.append("_") | |
| 760 #print(annot) | |
| 761 | |
| 762 ################################################################ | |
| 763 # CDS | |
| 764 ################################################################ | |
| 765 | |
| 766 if ind_range == None: | |
| 767 ind_range = [0, len(original)] | |
| 768 | |
| 769 sequences = {} | |
| 770 sequences["original"] = filter(None, re.split(r'(\w{1})', original.seq[ind_range[0]:ind_range[1]])) | |
| 771 direction = ["CDS_Orientation"] | |
| 772 distance = 0 | |
| 773 | |
| 774 alternating = 0 | |
| 775 | |
| 776 for feat in [f for f in original.features if f.type.lower() in ["gene", "cds"]]: | |
| 777 for space in range(distance, feat.location.start): | |
| 778 direction.append("_") | |
| 779 if feat.type.lower() in ["gene", "cds"]: | |
| 780 for counter in range(feat.location.start, feat.location.end, 3): | |
| 781 if alternating % 2 == 1: | |
| 782 sequences["original"][counter] = 'f' + sequences["original"][counter]#'<font size=44>' + sequences["original"][counter] + '</font>' | |
| 783 sequences["original"][counter+1] = 'f' + sequences["original"][counter+1] | |
| 784 sequences["original"][counter+2] = 'f' + sequences["original"][counter+2] | |
| 785 alternating += 1 | |
| 786 | |
| 787 if feat.strand == +1: | |
| 788 direction.append("-") | |
| 789 direction.append("-") | |
| 790 direction.append(">") | |
| 791 if feat.strand == -1: | |
| 792 direction.append("<") | |
| 793 direction.append("-") | |
| 794 direction.append("-") | |
| 795 | |
| 796 else: | |
| 797 sequences["original"][counter] = 's' + sequences["original"][counter] | |
| 798 sequences["original"][counter+1] = 's' + sequences["original"][counter+1] | |
| 799 sequences["original"][counter+2] = 's' + sequences["original"][counter+2] | |
| 800 alternating += 1 | |
| 801 | |
| 802 if feat.strand == +1: | |
| 803 direction.append("-") | |
| 804 direction.append("-") | |
| 805 direction.append(">") | |
| 806 if feat.strand == -1: | |
| 807 direction.append("<") | |
| 808 direction.append("-") | |
| 809 direction.append("-") | |
| 810 distance = feat.location.end | |
| 811 for space in range(distance, len(original)): | |
| 812 direction.append("_") | |
| 813 | |
| 814 #print(direction); print(len(direction)) | |
| 815 ################################################################ | |
| 816 # Plasmids_ids | |
| 817 ################################################################ | |
| 818 f = 0 | |
| 819 new_plasmids = [] | |
| 820 for s in others: | |
| 821 new_plasmids.append([s] + filter(None, re.split(r'(\w{1})', tot[s]["sequence"][ind_range[0]:ind_range[1]]))) | |
| 822 for k in range(len(original)): | |
| 823 if k in annotation_information[s]["modified_positions"]: | |
| 824 new_plasmids[f][k+1] += "m" | |
| 825 f += 1 | |
| 826 | |
| 827 #print(new_plasmids) | |
| 828 | |
| 829 ################################################################ | |
| 830 # Index | |
| 831 ################################################################ | |
| 832 | |
| 833 index = ["Index"] + [str(i) for i in range(ind_range[0], ind_range[1])] | |
| 834 | |
| 835 ################################################################ | |
| 836 # Create the pdf file | |
| 837 ################################################################ | |
| 838 | |
| 839 #print(target_lists); print(len(target_lists[0])) | |
| 840 #print(target_positions); print(len(target_positions)) | |
| 841 #print(annot); print(len(annot)) | |
| 842 #print(direction); print(len(direction)) | |
| 843 #print(new_plasmids); print(len(new_plasmids[0])) | |
| 844 #print(index) | |
| 845 | |
| 846 #colors = [('BACKGROUND',(0,0),(0,0),colors.palegreen), | |
| 847 # ('BACKGROUND',(1,1),(1,1),colors.palegreen), | |
| 848 # ('BACKGROUND',(2,2),(3,2),colors.palegreen)] | |
| 849 | |
| 850 data = {0:target_lists, | |
| 851 1:target_positions, | |
| 852 2:annot, | |
| 853 3:direction, | |
| 854 4:["Original"] + sequences["original"], | |
| 855 5:new_plasmids, | |
| 856 6:index} | |
| 857 | |
| 858 doc = SimpleDocTemplate("comparison_syngenic_plasmids.pdf",pagesize=letter, | |
| 859 rightMargin=30,leftMargin=30, | |
| 860 topMargin=30,bottomMargin=30) | |
| 861 | |
| 862 elements = [] | |
| 863 #max_row = 18 | |
| 864 blocks = {} | |
| 865 | |
| 866 if len(range(max_row, len(original.seq)+1, max_row)) % max_row == 0: | |
| 867 n_blocks = len(range(max_row, len(original.seq)+1, max_row)) | |
| 868 else: | |
| 869 n_blocks = len(range(max_row, len(original.seq)+1, max_row)) + 1 | |
| 870 | |
| 871 j = 0 | |
| 872 | |
| 873 for i in range(n_blocks): | |
| 874 blocks[i] = [] | |
| 875 for l in range(7): | |
| 876 if l in [0, 5]: | |
| 877 for el in data[l]: | |
| 878 if len(el[j:]) > max_row: | |
| 879 if i >= 1: | |
| 880 blocks[i].append([el[0]] + el[j:j+max_row]) | |
| 881 else: | |
| 882 blocks[i].append(el[j:j+max_row]) | |
| 883 else: | |
| 884 blocks[i].append([el[0]] + el[j:]) | |
| 885 else: | |
| 886 if len(data[l][j:]) > max_row: | |
| 887 if i >= 1: | |
| 888 blocks[i].append([data[l][0]] + data[l][j:j+max_row]) | |
| 889 else: | |
| 890 blocks[i].append(data[l][j:j+max_row]) | |
| 891 else: | |
| 892 blocks[i].append([data[l][0]] + data[l][j:]) | |
| 893 j += max_row | |
| 894 #print("\n") | |
| 895 #print(blocks[i]) | |
| 896 | |
| 897 elements.append(Table(blocks[i], hAlign='LEFT'))#, | |
| 898 #style=[('BACKGROUND',(0,0),(0,0),colors.palegreen), | |
| 899 # ('BACKGROUND',(1,1),(1,1),colors.palegreen), | |
| 900 # ('TEXTCOLOR',(2,2),(3,2),colors.palegreen), | |
| 901 # ('BOX',(0,0),(0,0),2,colors.red)])) | |
| 902 elements.append(Table([["", "", "", "", ""]])) | |
| 903 | |
| 904 doc.build(elements) | |
| 905 | |
| 906 | |
| 907 #new_doc = SimpleDocTemplate("further_information.pdf",pagesize=letter, | |
| 908 # rightMargin=30,leftMargin=30, | |
| 909 # topMargin=30,bottomMargin=30) | |
| 910 #new_elements = [] | |
| 911 | |
| 912 #new_elements.append([f for f in original.features if f.type.lower() in ["gene", "cds"]]) | |
| 913 #new_elements.append(f_patterns) | |
| 914 | |
| 915 #doc.build(new_elements) | |
| 916 | |
| 917 c = canvas.Canvas("./further_information.pdf") | |
| 918 c.drawString(100,750,"CDS regions:") | |
| 919 upper_bound = 750 | |
| 920 for feat in original.features: | |
| 921 if feat.type.lower() in ["gene", "cds"]: | |
| 922 upper_bound -= 15 | |
| 923 if feat.location.strand == -1: | |
| 924 sign = "-" | |
| 925 else: | |
| 926 sign = "+" | |
| 927 c.drawString(115,upper_bound, str("[") + str(feat.location.start)+ ":" + str(feat.location.end) + "]" + "(" + sign + ")") | |
| 928 upper_bound -= 30 | |
| 929 c.drawString(100,upper_bound,"Patterns and the corresponding targets on the plasmid sequence:") | |
| 930 for f_pattern in f_patterns.keys(): | |
| 931 upper_bound -= 15 | |
| 932 c.drawString(115,upper_bound,f_pattern + ":") | |
| 933 for val in f_patterns[f_pattern]: | |
| 934 upper_bound -= 15 | |
| 935 c.drawString(130,upper_bound,str(val)) | |
| 936 upper_bound -= 5 | |
| 937 | |
| 938 upper_bound -= 30 | |
| 939 c.drawString(100,upper_bound,"Identifiers of the targets found in the plasmid sequence:") | |
| 940 for target in targets.keys(): | |
| 941 upper_bound -= 15 | |
| 942 c.drawString(115,upper_bound,target + ": " + targets[target]) | |
| 943 | |
| 944 c.save() | |
| 945 | |
| 946 | |
| 947 return | |
| 948 | |
| 949 | |
| 950 def produce_random_targets(sequence): | |
| 951 | |
| 952 # Produce a target on two continous CDS | |
| 953 # Produce a target in a non-coding region | |
| 954 # Produce a target in coding region | |
| 955 # Produce a target on a overlapping left | |
| 956 # Produce a target on a overlapping right | |
| 957 | |
| 958 | |
| 959 | |
| 960 return | 
