| 42 | 1 import string | 
|  | 2 from syngenic import * | 
|  | 3 from Bio.Seq import Seq | 
|  | 4 from Bio.SeqFeature import SeqFeature, FeatureLocation | 
|  | 5 from pprint import pprint | 
|  | 6 | 
|  | 7 from itertools import izip | 
|  | 8 | 
|  | 9 import numpy as np | 
|  | 10 import pandas as pd | 
|  | 11 | 
|  | 12 def all_patterns(input_ = []): | 
|  | 13 | 
|  | 14     patts = [] | 
|  | 15     n_patts = [] | 
|  | 16 | 
|  | 17     for patt in input_: | 
|  | 18         tmp_patt = patt#Seq(patt.rstrip(), IUPAC.ambiguous_dna) | 
|  | 19         tmp_revc = tmp_patt.reverse_complement() | 
|  | 20 | 
|  | 21         patts.append(str(tmp_patt)) | 
|  | 22         patts.append(str(tmp_revc)) | 
|  | 23 | 
|  | 24         n_patts.append(pattern(tmp_patt).plan_ambiguity()) | 
|  | 25         n_patts.append(pattern(tmp_revc).plan_ambiguity()) | 
|  | 26 | 
|  | 27 | 
|  | 28     return patts, n_patts | 
|  | 29 | 
|  | 30 def fake_from_real(path = None, id_ = None, name = None): | 
|  | 31 | 
|  | 32     plasmid_seq = SeqIO.read(open(path, "r"), "genbank") | 
|  | 33 | 
|  | 34     f_p = plasmid_seq.seq[:10] | 
|  | 35     f_CDS = [] | 
|  | 36     for f in plasmid_seq.features: | 
|  | 37         if f.type == "CDS": | 
|  | 38             tmp_start = len(f_p) | 
|  | 39             tmp_cds = plasmid_seq[f.location.start:f.location.start+9] + plasmid_seq[f.location.end-9:f.location.end] | 
|  | 40             tmp_end = tmp_start + len(tmp_cds) | 
|  | 41             f_p += tmp_cds | 
|  | 42             f_CDS.append(SeqFeature(FeatureLocation(tmp_start, tmp_end), type="gene", strand=f.location.strand)) | 
|  | 43             #f_p += plasmid_seq.seq[tmp_end:tmp_end+5] | 
|  | 44     f_p += plasmid_seq.seq[-10:] | 
|  | 45 | 
|  | 46     for feat in f_CDS: | 
|  | 47         f_p.features.append(feat) | 
|  | 48     f_p.id = id_ | 
|  | 49     f_p.name = name | 
|  | 50 | 
|  | 51     #feature_seq_0 = f_CDS[0].extract(f_p) | 
|  | 52 | 
|  | 53     return f_p | 
|  | 54 | 
|  | 55 def punctuate_targets(f_patts, n_pl): | 
|  | 56 | 
|  | 57     n_poss = {} | 
|  | 58     max_len = len(n_pl) | 
|  | 59     for key in f_patts.keys(): | 
|  | 60         for el in f_patts[key]: | 
|  | 61             if not el[2] < el[1]: | 
|  | 62                 tmp = range(el[1], el[2]) | 
|  | 63                 for i in range(len(tmp)): | 
|  | 64                     if not tmp[i] in n_poss.keys(): | 
|  | 65                         n_poss[tmp[i]] = [key[i]] | 
|  | 66                     else: | 
|  | 67                         n_poss[tmp[i]].append(key[i]) | 
|  | 68             else: | 
|  | 69                 tmp = range(el[1], max_len) + range(0, el[2]) | 
|  | 70                 for i in range(len(tmp)): | 
|  | 71                     if not tmp[i] in n_poss.keys(): | 
|  | 72                         n_poss[tmp[i]] = [key[i]] | 
|  | 73                     else: | 
|  | 74                         n_poss[tmp[i]].append(key[i]) | 
|  | 75 | 
|  | 76     for key in n_poss.keys(): | 
|  | 77         n_poss[key] = set(n_poss[key]) | 
|  | 78 | 
|  | 79     #print(n_poss) | 
|  | 80 | 
|  | 81     return n_poss | 
|  | 82 | 
|  | 83 | 
|  | 84 def print_seq(n_pl, ind_range = None): | 
|  | 85 | 
|  | 86     if ind_range == None: | 
|  | 87 | 
|  | 88         data = filter(None, re.split(r'(\w{1})', n_pl)) | 
|  | 89         index = range(len(n_pl)) | 
|  | 90 | 
|  | 91         seq = [] | 
|  | 92         ind = [] | 
|  | 93 | 
|  | 94         j = 0 | 
|  | 95 | 
|  | 96         seq.append("") | 
|  | 97         ind.append("") | 
|  | 98 | 
|  | 99         for i in range(len(data)): | 
|  | 100 | 
|  | 101             if (i % 9 == 0) & (i > 0): | 
|  | 102                 j += 1 | 
|  | 103                 seq.append("") | 
|  | 104                 ind.append("") | 
|  | 105                 print("\n") | 
|  | 106                 print(seq[j-1]) | 
|  | 107                 print(ind[j-1]) | 
|  | 108 | 
|  | 109 | 
|  | 110             seq[j] += " " | 
|  | 111             ind[j] += " " | 
|  | 112             for n in range(len(str(index[i]))-1): | 
|  | 113                 seq[j] += " " | 
|  | 114             seq[j] += data[i] | 
|  | 115             ind[j] += str(index[i]) | 
|  | 116         print("\n") | 
|  | 117         print(seq[j]) | 
|  | 118         print(ind[j]) | 
|  | 119     else: | 
|  | 120         data = filter(None, re.split(r'(\w{1})', n_pl[ind_range[0]:ind_range[1]])) | 
|  | 121         index = range(ind_range[0], ind_range[1]) | 
|  | 122 | 
|  | 123         seq = [] | 
|  | 124         ind = [] | 
|  | 125 | 
|  | 126         j = 0 | 
|  | 127 | 
|  | 128         seq.append("") | 
|  | 129         ind.append("") | 
|  | 130 | 
|  | 131         for i in range(len(data)): | 
|  | 132 | 
|  | 133             if (i % 9 == 0) & (i > 0): | 
|  | 134                 j += 1 | 
|  | 135                 seq.append("") | 
|  | 136                 ind.append("") | 
|  | 137                 print("\n") | 
|  | 138                 print(seq[j-1]) | 
|  | 139                 print(ind[j-1]) | 
|  | 140 | 
|  | 141 | 
|  | 142             seq[j] += " " | 
|  | 143             ind[j] += " " | 
|  | 144             for n in range(len(str(index[i]))-1): | 
|  | 145                 seq[j] += " " | 
|  | 146             seq[j] += data[i] | 
|  | 147             ind[j] += str(index[i]) | 
|  | 148 | 
|  | 149         print("\n") | 
|  | 150         print(seq[j]) | 
|  | 151         print(ind[j]) | 
|  | 152 | 
|  | 153 | 
|  | 154 | 
|  | 155     return None | 
|  | 156 | 
|  | 157 | 
|  | 158 def generalization(n_poss, n_pl, synonims_tables, reduced=False): | 
|  | 159 | 
|  | 160 | 
|  | 161     transversions = {"A": "[AT]", | 
|  | 162                      "T": "[TA]", | 
|  | 163                      "C": "[CG]", | 
|  | 164                      "G": "[GC]"} | 
|  | 165 | 
|  | 166     count_codon_switch = 0 | 
|  | 167     count_transversion = 0 | 
|  | 168 | 
|  | 169     new_poss = {} | 
|  | 170 | 
|  | 171     for pos in n_poss.keys(): | 
|  | 172         in_cds = False | 
|  | 173         for feat in n_pl.features: | 
|  | 174             if ((pos >= feat.location.start) & (pos < feat.location.end)) & (feat.type in ["CDS", "gene"]): | 
|  | 175                 in_cds = True | 
|  | 176                 count_codon_switch += 1 | 
|  | 177                 tmp_count_transversion = 0 | 
|  | 178                 #print("\n") | 
|  | 179                 #print("operate codon switch " + str(count_codon_switch)) | 
|  | 180                 # | 
|  | 181                 #print("Real position: " + str(pos)) | 
|  | 182                 #print(n_poss[pos]) | 
|  | 183                 #print(feat.location) | 
|  | 184                 #print(pos - feat.location.start) | 
|  | 185                 #print((pos - feat.location.start)%3) | 
|  | 186 | 
|  | 187 | 
|  | 188                 if ((pos - feat.location.start) % 3 == 0) & (n_poss[pos] != {"N"}): | 
|  | 189                     # first basis of a codon | 
|  | 190                     #print("first basis of a codon") | 
|  | 191                     #print(n_pl.seq[pos:pos+3]) | 
|  | 192 | 
|  | 193                     tmp_codon = n_pl.seq[pos:pos+3] | 
|  | 194                     bases = [] | 
|  | 195                     if feat.strand == +1: | 
|  | 196                         # check the codon table | 
|  | 197                         for codon in synonims_tables["synonims"][tmp_codon]: | 
|  | 198                             bases.append(codon[0]) | 
|  | 199                     elif feat.strand == -1: | 
|  | 200                         # check the anticodon table | 
|  | 201                         for codon in synonims_tables["anti_synonims"][tmp_codon]: | 
|  | 202                             bases.append(codon[0]) | 
|  | 203                     if len(set(bases)) > 1: | 
|  | 204                         new_poss[pos] = "[" + "".join(list(set(bases))) + "]" | 
|  | 205 | 
|  | 206 | 
|  | 207                 elif ((pos - feat.location.start) % 3 == 1) & (n_poss[pos] != {"N"}): | 
|  | 208                     # second basis of a codon | 
|  | 209                     #print("second basis of a codon") | 
|  | 210                     #print(n_pl.seq[pos-1:pos+2]) | 
|  | 211 | 
|  | 212                     tmp_codon = n_pl.seq[pos-1:pos+2] | 
|  | 213 | 
|  | 214                     bases = [] | 
|  | 215                     if feat.strand == +1: | 
|  | 216                         # check the codon table | 
|  | 217                         for codon in synonims_tables["synonims"][tmp_codon]: | 
|  | 218                             bases.append(codon[1]) | 
|  | 219                     elif feat.strand == -1: | 
|  | 220                         # check the anticodon table | 
|  | 221                         for codon in synonims_tables["anti_synonims"][tmp_codon]: | 
|  | 222                             bases.append(codon[1]) | 
|  | 223                     if len(set(bases)) > 1: | 
|  | 224                         new_poss[pos] = "[" + "".join(list(set(bases))) + "]" | 
|  | 225 | 
|  | 226                 elif ((pos - feat.location.start) % 3 == 2) & (n_poss[pos] != {"N"}): | 
|  | 227                     # third basis of a codon | 
|  | 228                     #print("third basis of a codon") | 
|  | 229                     #print(n_pl.seq[pos-2:pos+1]) | 
|  | 230 | 
|  | 231                     tmp_codon = n_pl.seq[pos-2:pos+1] | 
|  | 232 | 
|  | 233                     bases = [] | 
|  | 234                     if feat.strand == +1: | 
|  | 235                         # check the codon table | 
|  | 236                         for codon in synonims_tables["synonims"][tmp_codon]: | 
|  | 237                             bases.append(codon[2]) | 
|  | 238                     elif feat.strand == -1: | 
|  | 239                         # check the anticodon table | 
|  | 240                         for codon in synonims_tables["anti_synonims"][tmp_codon]: | 
|  | 241                             bases.append(codon[2]) | 
|  | 242                     if len(set(bases)) > 1: | 
|  | 243                         new_poss[pos] = "[" + "".join(list(set(bases))) + "]" | 
|  | 244 | 
|  | 245                 tmp = n_pl.extract(feat) | 
|  | 246                 #print_seq(tmp, ind_range = [feat.location.start,feat.location.start]) | 
|  | 247 | 
|  | 248         if (in_cds == False) & (set.intersection(n_poss[pos], {"A", "T", "C", "G"}) != set()): | 
|  | 249             # (set.union(n_poss[pos], {"A", "T", "C", "G"}) != {}) | 
|  | 250             # set.union(n_poss[pos], {"A", "T", "C", "G"}) != {} | 
|  | 251             # n_poss[pos] != {"N"} | 
|  | 252 | 
|  | 253             if reduced == False: | 
|  | 254 | 
|  | 255                 count_transversion += 1 | 
|  | 256                 #print("operate transversion " + str(count_transversion)) | 
|  | 257 | 
|  | 258                 new_poss[pos] = transversions[set.difference(n_poss[pos], {"N"}).pop()] | 
|  | 259 | 
|  | 260             else: | 
|  | 261 | 
|  | 262                 count_transversion += 1 | 
|  | 263                 #print("operate transversion " + str(count_transversion)) | 
|  | 264 | 
|  | 265                 new_poss[pos] = transversions[set.difference(n_poss[pos], {"N"}).pop()] | 
|  | 266 | 
|  | 267                 #if tmp_count_transversion == 0: | 
|  | 268 | 
|  | 269                 #    count_transversion += 1 | 
|  | 270                 #    tmp_count_transversion += 1 | 
|  | 271                 #    print("operate transversion " + str(count_transversion)) | 
|  | 272                 # | 
|  | 273                 #    new_poss[pos] = transversions[set.difference(n_poss[pos], {"N"}).pop()] | 
|  | 274 | 
|  | 275         #print(new_poss) | 
|  | 276 | 
|  | 277     n_seq = filter(None, re.split(r'(\w{1})', str(n_pl.seq))) | 
|  | 278     n_ind = range(len(n_seq)) | 
|  | 279 | 
|  | 280     new_obj = {} | 
|  | 281 | 
|  | 282     for pos in n_ind: | 
|  | 283         if pos in new_poss.keys(): | 
|  | 284             new_obj[pos] = new_poss[pos] | 
|  | 285         else: | 
|  | 286             new_obj[pos] = n_seq[pos] | 
|  | 287 | 
|  | 288     #pprint(new_obj) | 
|  | 289 | 
|  | 290 | 
|  | 291     new_plasmid_generalized = "" | 
|  | 292 | 
|  | 293 | 
|  | 294     for pos in n_ind: | 
|  | 295         new_plasmid_generalized += new_obj[pos] | 
|  | 296 | 
|  | 297     #print(new_plasmid_generalized) | 
|  | 298     #print(len(new_plasmid_generalized)) | 
|  | 299 | 
|  | 300     t = sre_yield.AllStrings(new_plasmid_generalized) | 
|  | 301 | 
|  | 302     #print(len(t)) | 
|  | 303 | 
|  | 304 | 
|  | 305 | 
|  | 306     return t | 
|  | 307 | 
|  | 308 | 
|  | 309 def evaluate_plasmids(plasmids = None, | 
|  | 310                       original_plasmid = None, | 
|  | 311                       codon_usage_table = None, | 
|  | 312                       n_patts = None, | 
|  | 313                       f_patts = None): | 
|  | 314 | 
|  | 315     from syngenic import plasmid | 
|  | 316     from Bio.Seq import Seq | 
|  | 317     from Bio.SeqFeature import SeqFeature, FeatureLocation | 
|  | 318     from itertools import izip | 
|  | 319     import numpy as np | 
|  | 320 | 
|  | 321     useful = {} | 
|  | 322 | 
|  | 323     i = 0 | 
|  | 324 | 
|  | 325     for tmp_pl in plasmids: | 
|  | 326 | 
|  | 327         if tmp_pl != original_plasmid.seq: | 
|  | 328 | 
|  | 329             identical_proteic_sequence = [] | 
|  | 330 | 
|  | 331             for feat in original_plasmid.features: | 
|  | 332                 if feat.type.lower() in ["gene", "cds"]: | 
|  | 333                     identical_proteic_sequence.append(Seq(plasmid(tmp_pl).extract(feat)).translate() == Seq(original_plasmid.extract(feat)).translate()) | 
|  | 334             identical_proteic_sequence = all(identical_proteic_sequence) | 
|  | 335             if (identical_proteic_sequence == True) & (set([True if el ==[] else False for el in plasmid(tmp_pl).findpatterns(n_patts, f_patts).values()]) == {True}): | 
|  | 336                 print("\t" + str(i) + "/" + str(len(plasmids))) | 
|  | 337                 #print(tmp_pl) | 
|  | 338                 tmp = [j for j,(a1,a2)  in enumerate(izip(tmp_pl,original_plasmid)) if a1!=a2] | 
|  | 339                 #print(tmp) | 
|  | 340                 useful["Plasmid_" + str(i)] = {} | 
|  | 341                 useful["Plasmid_" + str(i)]["modified_positions"] = tmp | 
|  | 342                 useful["Plasmid_" + str(i)]["codon_usage"] = [] | 
|  | 343                 useful["Plasmid_" + str(i)]["number_of_modification"] = len(tmp) | 
|  | 344                 useful["Plasmid_" + str(i)]["sequence"] = tmp_pl | 
|  | 345                 for modified_position in tmp: | 
|  | 346                     in_cds = False | 
|  | 347                     for feat in original_plasmid.features: | 
|  | 348                         if feat.type.lower() in ["gene", "cds"]: | 
|  | 349                             if ((modified_position >= feat.location.start) & (modified_position < feat.location.end)) & (feat.type in ["CDS", "gene"]): | 
|  | 350                                 in_cds = True | 
|  | 351                                 if (modified_position - feat.location.start) % 3 == 0: | 
|  | 352                                     # first basis of a codon | 
|  | 353                                     if feat.strand == +1: | 
|  | 354                                         tmp_codon = tmp_pl[modified_position:modified_position+3] | 
|  | 355                                     else: | 
|  | 356                                         tmp_codon = str(Seq(tmp_pl[modified_position:modified_position+3]).reverse_complement()) | 
|  | 357                                     useful["Plasmid_" + str(i)]["codon_usage"].append(codon_usage_table.loc[tmp_codon]["Proportion"]) | 
|  | 358                                 elif (modified_position - feat.location.start) % 3 == 1: | 
|  | 359                                     # second basis of a codon | 
|  | 360                                     if feat.strand == +1: | 
|  | 361                                         tmp_codon = tmp_pl[modified_position-1:modified_position+2] | 
|  | 362                                     else: | 
|  | 363                                         tmp_codon = str(Seq(tmp_pl[modified_position-1:modified_position+2]).reverse_complement()) | 
|  | 364                                     useful["Plasmid_" + str(i)]["codon_usage"].append(codon_usage_table.loc[tmp_codon]["Proportion"]) | 
|  | 365                                 elif (modified_position - feat.location.start) % 3 == 2: | 
|  | 366                                     # third basis of a codon | 
|  | 367                                     if feat.strand == +1: | 
|  | 368                                         tmp_codon = original_plasmid.seq[modified_position-2:modified_position+1] | 
|  | 369                                     else: | 
|  | 370                                         tmp_codon = str(Seq(tmp_pl[modified_position-2:modified_position+1]).reverse_complement()) | 
|  | 371                                     useful["Plasmid_" + str(i)]["codon_usage"].append(codon_usage_table.loc[tmp_codon]["Proportion"]) | 
|  | 372 | 
|  | 373                 useful["Plasmid_" + str(i)]["mean_codon_usage"] = np.mean(useful["Plasmid_" + str(i)]["codon_usage"]) | 
|  | 374                 useful["Plasmid_" + str(i)]["std_codon_usage"] = np.std(useful["Plasmid_" + str(i)]["codon_usage"]) | 
|  | 375             else: | 
|  | 376                 next | 
|  | 377 | 
|  | 378         i += 1 | 
|  | 379 | 
|  | 380     useful["original_plasmids"] = original_plasmid | 
|  | 381 | 
|  | 382     return useful | 
|  | 383 | 
|  | 384 | 
|  | 385 | 
|  | 386 def rank_plasmids(original_useful_plasmids = None): | 
|  | 387 | 
|  | 388     # Rank according to codon usage and less number of modifications introduced | 
|  | 389 | 
|  | 390     tmp_useful_plasmids = {} | 
|  | 391 | 
|  | 392     #print(len(original_useful_plasmids.keys())) | 
|  | 393     tmp_keys = list(set.difference(set(original_useful_plasmids.keys()), {"original_plasmids"})) | 
|  | 394     #print(len(tmp_keys)) | 
|  | 395     for key in tmp_keys: | 
|  | 396         #print(key) | 
|  | 397         #print(original_useful_plasmids[key]) | 
|  | 398         tmp_useful_plasmids[key] = {"mean_codon_usage":original_useful_plasmids[key]["mean_codon_usage"], | 
|  | 399                                 "std_codon_usage":original_useful_plasmids[key]["std_codon_usage"], | 
|  | 400                                 "number_of_modification":original_useful_plasmids[key]["number_of_modification"]} | 
|  | 401 | 
|  | 402     dat_plasmids = pd.DataFrame(tmp_useful_plasmids).T | 
|  | 403 | 
|  | 404     dat_plasmids.shape | 
|  | 405 | 
|  | 406     dat_plasmids.head() | 
|  | 407 | 
|  | 408     dat_plasmids.sort_values(['mean_codon_usage', 'std_codon_usage', 'number_of_modification'], ascending=[False, True, True]) | 
|  | 409 | 
|  | 410     dat_plasmids.index | 
|  | 411 | 
|  | 412     return dat_plasmids | 
|  | 413     #return tmp_useful_plasmids | 
|  | 414 | 
|  | 415 | 
|  | 416 def print_color_seq(original = None, | 
|  | 417                  others = None, | 
|  | 418                  annotation_information = None, | 
|  | 419                  tot = None, | 
|  | 420                  ind_range = None, | 
|  | 421                  patterns = None, | 
|  | 422                  f_patterns = None, | 
|  | 423                  patts = None, | 
|  | 424                  max_row = 18): | 
|  | 425 | 
|  | 426     """ | 
|  | 427 | 
|  | 428     original = plasmids["original_plasmid"], | 
|  | 429     others = def_pls, | 
|  | 430     annotation_information = useful_plasmids, | 
|  | 431     tot = plasmids, | 
|  | 432     ind_range = None | 
|  | 433 | 
|  | 434     """ | 
|  | 435 | 
|  | 436     ################################################################ | 
|  | 437     # Single Targets | 
|  | 438     ################################################################ | 
|  | 439 | 
|  | 440     targets = {} | 
|  | 441 | 
|  | 442     t_keys = f_patterns.keys() | 
|  | 443 | 
|  | 444     for l in range(len(t_keys)): | 
|  | 445         if f_patterns[t_keys[l]] != []: | 
|  | 446             targets["Target" + str(l)] = t_keys[l] | 
|  | 447 | 
|  | 448     #print(targets) | 
|  | 449     #print("\n") | 
|  | 450     tars = {} | 
|  | 451 | 
|  | 452     for tar in targets.keys(): | 
|  | 453         #print(tar) | 
|  | 454         tars[tar] = ["|" for i in range(len(original.seq))] | 
|  | 455 | 
|  | 456         for tar1 in f_patterns[targets[tar]]: | 
|  | 457             #print(tar1) | 
|  | 458             if tar1[1] < tar1[2]: | 
|  | 459                 for l in range(tar1[1], tar1[2]): | 
|  | 460                     tars[tar][l] = tar1[0][l-tar1[1]] | 
|  | 461             else: | 
|  | 462                 for l in range(tar1[1], len(original.seq)): | 
|  | 463                     tars[tar][l] = tar1[0][l-tar1[1]] | 
|  | 464                 for l in range(tar1[2]): | 
|  | 465                     tars[tar][l] = tar1[0][-tar1[2]:][l] | 
|  | 466 | 
|  | 467     #print(tars) | 
|  | 468     kkk = tars.keys() | 
|  | 469     kkk.sort() | 
|  | 470     target_lists = [[key]+tars[key] for key in kkk] | 
|  | 471     #print(target_lists); print(len(target_lists[0])) | 
|  | 472 | 
|  | 473 | 
|  | 474     ################################################################ | 
|  | 475     # Aggregate Targets | 
|  | 476     ################################################################ | 
|  | 477     target_positions = ["TargetPositions"] | 
|  | 478     for k in range(len(original)): | 
|  | 479         if k in patterns.keys(): | 
|  | 480             if len(patterns[k]) > 1: | 
|  | 481                 target_positions += "+"#"T" | 
|  | 482             else: | 
|  | 483                 target_positions += "T" | 
|  | 484         else: | 
|  | 485             target_positions += " " | 
|  | 486     #print(target_positions); print(len(target_positions)) | 
|  | 487     ################################################################ | 
|  | 488     # Annotation | 
|  | 489     ################################################################ | 
|  | 490     direction = [] | 
|  | 491     annot = ["Annotation"] | 
|  | 492 | 
|  | 493     distance = 0 | 
|  | 494     for feat in [f for f in original.features if f.type.lower() in ["gene", "cds"]]: | 
|  | 495         for space in range(distance, feat.location.start): | 
|  | 496             direction.append("_") | 
|  | 497             annot.append("_") | 
|  | 498         annot.append("*") | 
|  | 499         for an_space in range(feat.location.end - feat.location.start-2): | 
|  | 500             annot.append("_") | 
|  | 501         distance = feat.location.end | 
|  | 502         annot.append("/") | 
|  | 503     for space in range(distance, len(original)): | 
|  | 504         direction.append("_") | 
|  | 505         annot.append("_") | 
|  | 506     #print(annot) | 
|  | 507 | 
|  | 508     ################################################################ | 
|  | 509     # CDS | 
|  | 510     ################################################################ | 
|  | 511 | 
|  | 512     if ind_range == None: | 
|  | 513         ind_range = [0, len(original)] | 
|  | 514 | 
|  | 515     sequences = {} | 
|  | 516     sequences["original"] = filter(None, re.split(r'(\w{1})', original.seq[ind_range[0]:ind_range[1]])) | 
|  | 517     direction = ["CDS_Orientation"] | 
|  | 518     distance = 0 | 
|  | 519 | 
|  | 520     alternating = 0 | 
|  | 521 | 
|  | 522     for feat in [f for f in original.features if f.type.lower() in ["gene", "cds"]]: | 
|  | 523         for space in range(distance, feat.location.start): | 
|  | 524             direction.append("_") | 
|  | 525         if feat.type.lower() in ["gene", "cds"]: | 
|  | 526             for counter in range(feat.location.start, feat.location.end, 3): | 
|  | 527                 if alternating % 2 == 1: | 
|  | 528                     sequences["original"][counter] = "\033[1;31;40m" + sequences["original"][counter] + "\033[0m" | 
|  | 529                     sequences["original"][counter+1] = "\033[1;31;40m" + sequences["original"][counter+1] + "\033[0m" | 
|  | 530                     sequences["original"][counter+2] = "\033[1;31;40m" + sequences["original"][counter+2] + "\033[0m" | 
|  | 531                     alternating += 1 | 
|  | 532 | 
|  | 533                     if feat.strand == +1: | 
|  | 534                         direction.append("-") | 
|  | 535                         direction.append("-") | 
|  | 536                         direction.append(">") | 
|  | 537                     if feat.strand == -1: | 
|  | 538                         direction.append("<") | 
|  | 539                         direction.append("-") | 
|  | 540                         direction.append("-") | 
|  | 541 | 
|  | 542                 else: | 
|  | 543                     sequences["original"][counter] = "\033[1;32;40m" + sequences["original"][counter] + "\033[0m" | 
|  | 544                     sequences["original"][counter+1] = "\033[1;32;40m" + sequences["original"][counter+1] + "\033[0m" | 
|  | 545                     sequences["original"][counter+2] = "\033[1;32;40m" + sequences["original"][counter+2] + "\033[0m" | 
|  | 546                     alternating += 1 | 
|  | 547 | 
|  | 548                     if feat.strand == +1: | 
|  | 549                         direction.append("-") | 
|  | 550                         direction.append("-") | 
|  | 551                         direction.append(">") | 
|  | 552                     if feat.strand == -1: | 
|  | 553                         direction.append("<") | 
|  | 554                         direction.append("-") | 
|  | 555                         direction.append("-") | 
|  | 556         distance = feat.location.end | 
|  | 557     for space in range(distance, len(original)): | 
|  | 558         direction.append("_") | 
|  | 559 | 
|  | 560     #print(direction); print(len(direction)) | 
|  | 561     ################################################################ | 
|  | 562     # Plasmids_ids | 
|  | 563     ################################################################ | 
|  | 564     f = 0 | 
|  | 565     new_plasmids = [] | 
|  | 566     for s in others: | 
|  | 567         new_plasmids.append([s] + filter(None, re.split(r'(\w{1})', tot[s]["sequence"][ind_range[0]:ind_range[1]]))) | 
|  | 568         for k in range(len(original)): | 
|  | 569             if k in annotation_information[s]["modified_positions"]: | 
|  | 570                 new_plasmids[f][k+1] = "\033[1;32;40m" + new_plasmids[f][k+1] + "\033[0m" | 
|  | 571         f += 1 | 
|  | 572 | 
|  | 573     #print(new_plasmids) | 
|  | 574 | 
|  | 575     ################################################################ | 
|  | 576     # Index | 
|  | 577     ################################################################ | 
|  | 578 | 
|  | 579     index = ["Index"] + [str(i) for i in range(ind_range[0], ind_range[1])] | 
|  | 580 | 
|  | 581     ################################################################ | 
|  | 582     # Create the pdf file | 
|  | 583     ################################################################ | 
|  | 584 | 
|  | 585     #print(target_lists); print(len(target_lists[0])) | 
|  | 586     #print(target_positions); print(len(target_positions)) | 
|  | 587     #print(annot); print(len(annot)) | 
|  | 588     #print(direction); print(len(direction)) | 
|  | 589     #print(new_plasmids); print(len(new_plasmids[0])) | 
|  | 590     #print(index) | 
|  | 591 | 
|  | 592     data = {0:target_lists, | 
|  | 593             1:target_positions, | 
|  | 594             2:annot, | 
|  | 595             3:direction, | 
|  | 596             4:["Original"] + sequences["original"], | 
|  | 597             5:new_plasmids, | 
|  | 598             6:index} | 
|  | 599 | 
|  | 600     elements = [] | 
|  | 601     #max_row = 18 | 
|  | 602     blocks = {} | 
|  | 603 | 
|  | 604     if len(range(max_row, len(original.seq)+1, max_row)) % max_row == 0: | 
|  | 605         n_blocks = len(range(max_row, len(original.seq)+1, max_row)) | 
|  | 606     else: | 
|  | 607         n_blocks = len(range(max_row, len(original.seq)+1, max_row)) + 1 | 
|  | 608 | 
|  | 609     j = 0 | 
|  | 610 | 
|  | 611     for i in range(n_blocks): | 
|  | 612         blocks[i] = [] | 
|  | 613         for l in range(7): | 
|  | 614             if l in [0, 5]: | 
|  | 615                 for el in data[l]: | 
|  | 616                     if len(el[j:]) > max_row: | 
|  | 617                         if i >= 1: | 
|  | 618                             blocks[i].append([el[0]] + el[j:j+max_row]) | 
|  | 619                         else: | 
|  | 620                             blocks[i].append(el[j:j+max_row]) | 
|  | 621                     else: | 
|  | 622                         blocks[i].append([el[0]] + el[j:]) | 
|  | 623             else: | 
|  | 624                 if len(data[l][j:]) > max_row: | 
|  | 625                     if i >= 1: | 
|  | 626                         blocks[i].append([data[l][0]] + data[l][j:j+max_row]) | 
|  | 627                     else: | 
|  | 628                         blocks[i].append(data[l][j:j+max_row]) | 
|  | 629                 else: | 
|  | 630                     blocks[i].append([data[l][0]] + data[l][j:]) | 
|  | 631         j += max_row | 
|  | 632         print("\n") | 
|  | 633         #print(blocks[i]) | 
|  | 634 | 
|  | 635         fff = [] | 
|  | 636         for f in range(len(blocks[i])): | 
|  | 637             fff.append(len(blocks[i][f][0])) | 
|  | 638         fff = max(fff) | 
|  | 639         for f in range(len(blocks[i])): | 
|  | 640             for r in range(fff-len(blocks[i][f][0])): | 
|  | 641                 blocks[i][f][0] += " " | 
|  | 642             if f < len(blocks[i])-1: | 
|  | 643                 for l in range(1,len(blocks[i][f])): | 
|  | 644                     tmp = "" | 
|  | 645                     #print(blocks[i][-1][l]) | 
|  | 646                     if l < len(blocks[i][-1]): | 
|  | 647                         for g in range(len(str(blocks[i][-1][l]))): | 
|  | 648                             #print(g) | 
|  | 649                             tmp += " " | 
|  | 650                     blocks[i][f][l] = tmp + blocks[i][f][l] | 
|  | 651                     #print(blocks[i][f][l]) | 
|  | 652                 blocks[i][f] = " ".join(blocks[i][f]) | 
|  | 653             else: | 
|  | 654                 blocks[i][f] = "  ".join(blocks[i][f]) | 
|  | 655             print(blocks[i][f]) | 
|  | 656         #print(" ".join(blocks[i][-1])) | 
|  | 657 | 
|  | 658     print("\n") | 
|  | 659     print([f for f in original.features if f.type.lower() in ["gene", "cds"]]) | 
|  | 660     print("\n") | 
|  | 661     print(f_patterns) | 
|  | 662 | 
|  | 663     return | 
|  | 664 | 
|  | 665 def print_to_pdf(original = None, | 
|  | 666                  others = None, | 
|  | 667                  annotation_information = None, | 
|  | 668                  tot = None, | 
|  | 669                  ind_range = None, | 
|  | 670                  patterns = None, | 
|  | 671                  f_patterns = None, | 
|  | 672                  patts = None, | 
|  | 673                  max_row = 9): | 
|  | 674 | 
|  | 675     """ | 
|  | 676 | 
|  | 677     original = plasmids["original_plasmid"], | 
|  | 678     others = def_pls, | 
|  | 679     annotation_information = useful_plasmids, | 
|  | 680     tot = plasmids, | 
|  | 681     ind_range = None | 
|  | 682 | 
|  | 683     """ | 
|  | 684 | 
|  | 685     from reportlab.lib import colors | 
|  | 686     from reportlab.lib.pagesizes import letter | 
|  | 687     from reportlab.platypus import SimpleDocTemplate, Table, TableStyle | 
|  | 688     from reportlab.pdfgen import canvas | 
|  | 689 | 
|  | 690     ################################################################ | 
|  | 691     # Single Targets | 
|  | 692     ################################################################ | 
|  | 693 | 
|  | 694     targets = {} | 
|  | 695 | 
|  | 696     t_keys = f_patterns.keys() | 
|  | 697 | 
|  | 698     for l in range(len(t_keys)): | 
|  | 699         if f_patterns[t_keys[l]] != []: | 
|  | 700             targets["Target" + str(l)] = t_keys[l] | 
|  | 701 | 
|  | 702     #print(targets) | 
|  | 703     #print("\n") | 
|  | 704     tars = {} | 
|  | 705 | 
|  | 706     for tar in targets.keys(): | 
|  | 707         #print(tar) | 
|  | 708         tars[tar] = ["|" for i in range(len(original.seq))] | 
|  | 709 | 
|  | 710         for tar1 in f_patterns[targets[tar]]: | 
|  | 711             #print(tar1) | 
|  | 712             if tar1[1] < tar1[2]: | 
|  | 713                 for l in range(tar1[1], tar1[2]): | 
|  | 714                     tars[tar][l] = tar1[0][l-tar1[1]] | 
|  | 715             else: | 
|  | 716                 for l in range(tar1[1], len(original.seq)): | 
|  | 717                     tars[tar][l] = tar1[0][l-tar1[1]] | 
|  | 718                 for l in range(tar1[2]): | 
|  | 719                     tars[tar][l] = tar1[0][-tar1[2]:][l] | 
|  | 720 | 
|  | 721     #print(tars) | 
|  | 722     kkk = tars.keys() | 
|  | 723     kkk.sort() | 
|  | 724     target_lists = [[key]+tars[key] for key in kkk] | 
|  | 725     #print(target_lists); print(len(target_lists[0])) | 
|  | 726 | 
|  | 727 | 
|  | 728     ################################################################ | 
|  | 729     # Aggregate Targets | 
|  | 730     ################################################################ | 
|  | 731     target_positions = ["TargetPositions"] | 
|  | 732     for k in range(len(original)): | 
|  | 733         if k in patterns.keys(): | 
|  | 734             if len(patterns[k]) > 1: | 
|  | 735                 target_positions += "+"#"T" | 
|  | 736             else: | 
|  | 737                 target_positions += "T" | 
|  | 738         else: | 
|  | 739             target_positions += " " | 
|  | 740     #print(target_positions); print(len(target_positions)) | 
|  | 741     ################################################################ | 
|  | 742     # Annotation | 
|  | 743     ################################################################ | 
|  | 744     direction = [] | 
|  | 745     annot = ["Annotation"] | 
|  | 746 | 
|  | 747     distance = 0 | 
|  | 748     for feat in [f for f in original.features if f.type.lower() in ["gene", "cds"]]: | 
|  | 749         for space in range(distance, feat.location.start): | 
|  | 750             direction.append("_") | 
|  | 751             annot.append("_") | 
|  | 752         annot.append("*") | 
|  | 753         for an_space in range(feat.location.end - feat.location.start-2): | 
|  | 754             annot.append("_") | 
|  | 755         distance = feat.location.end | 
|  | 756         annot.append("/") | 
|  | 757     for space in range(distance, len(original)): | 
|  | 758         direction.append("_") | 
|  | 759         annot.append("_") | 
|  | 760     #print(annot) | 
|  | 761 | 
|  | 762     ################################################################ | 
|  | 763     # CDS | 
|  | 764     ################################################################ | 
|  | 765 | 
|  | 766     if ind_range == None: | 
|  | 767         ind_range = [0, len(original)] | 
|  | 768 | 
|  | 769     sequences = {} | 
|  | 770     sequences["original"] = filter(None, re.split(r'(\w{1})', original.seq[ind_range[0]:ind_range[1]])) | 
|  | 771     direction = ["CDS_Orientation"] | 
|  | 772     distance = 0 | 
|  | 773 | 
|  | 774     alternating = 0 | 
|  | 775 | 
|  | 776     for feat in [f for f in original.features if f.type.lower() in ["gene", "cds"]]: | 
|  | 777         for space in range(distance, feat.location.start): | 
|  | 778             direction.append("_") | 
|  | 779         if feat.type.lower() in ["gene", "cds"]: | 
|  | 780             for counter in range(feat.location.start, feat.location.end, 3): | 
|  | 781                 if alternating % 2 == 1: | 
|  | 782                     sequences["original"][counter] = 'f' + sequences["original"][counter]#'<font size=44>' + sequences["original"][counter] + '</font>' | 
|  | 783                     sequences["original"][counter+1] = 'f' + sequences["original"][counter+1] | 
|  | 784                     sequences["original"][counter+2] = 'f' + sequences["original"][counter+2] | 
|  | 785                     alternating += 1 | 
|  | 786 | 
|  | 787                     if feat.strand == +1: | 
|  | 788                         direction.append("-") | 
|  | 789                         direction.append("-") | 
|  | 790                         direction.append(">") | 
|  | 791                     if feat.strand == -1: | 
|  | 792                         direction.append("<") | 
|  | 793                         direction.append("-") | 
|  | 794                         direction.append("-") | 
|  | 795 | 
|  | 796                 else: | 
|  | 797                     sequences["original"][counter] = 's' + sequences["original"][counter] | 
|  | 798                     sequences["original"][counter+1] = 's' + sequences["original"][counter+1] | 
|  | 799                     sequences["original"][counter+2] = 's' + sequences["original"][counter+2] | 
|  | 800                     alternating += 1 | 
|  | 801 | 
|  | 802                     if feat.strand == +1: | 
|  | 803                         direction.append("-") | 
|  | 804                         direction.append("-") | 
|  | 805                         direction.append(">") | 
|  | 806                     if feat.strand == -1: | 
|  | 807                         direction.append("<") | 
|  | 808                         direction.append("-") | 
|  | 809                         direction.append("-") | 
|  | 810         distance = feat.location.end | 
|  | 811     for space in range(distance, len(original)): | 
|  | 812         direction.append("_") | 
|  | 813 | 
|  | 814     #print(direction); print(len(direction)) | 
|  | 815     ################################################################ | 
|  | 816     # Plasmids_ids | 
|  | 817     ################################################################ | 
|  | 818     f = 0 | 
|  | 819     new_plasmids = [] | 
|  | 820     for s in others: | 
|  | 821         new_plasmids.append([s] + filter(None, re.split(r'(\w{1})', tot[s]["sequence"][ind_range[0]:ind_range[1]]))) | 
|  | 822         for k in range(len(original)): | 
|  | 823             if k in annotation_information[s]["modified_positions"]: | 
|  | 824                 new_plasmids[f][k+1] += "m" | 
|  | 825         f += 1 | 
|  | 826 | 
|  | 827     #print(new_plasmids) | 
|  | 828 | 
|  | 829     ################################################################ | 
|  | 830     # Index | 
|  | 831     ################################################################ | 
|  | 832 | 
|  | 833     index = ["Index"] + [str(i) for i in range(ind_range[0], ind_range[1])] | 
|  | 834 | 
|  | 835     ################################################################ | 
|  | 836     # Create the pdf file | 
|  | 837     ################################################################ | 
|  | 838 | 
|  | 839     #print(target_lists); print(len(target_lists[0])) | 
|  | 840     #print(target_positions); print(len(target_positions)) | 
|  | 841     #print(annot); print(len(annot)) | 
|  | 842     #print(direction); print(len(direction)) | 
|  | 843     #print(new_plasmids); print(len(new_plasmids[0])) | 
|  | 844     #print(index) | 
|  | 845 | 
|  | 846     #colors = [('BACKGROUND',(0,0),(0,0),colors.palegreen), | 
|  | 847     #                           ('BACKGROUND',(1,1),(1,1),colors.palegreen), | 
|  | 848     #                           ('BACKGROUND',(2,2),(3,2),colors.palegreen)] | 
|  | 849 | 
|  | 850     data = {0:target_lists, | 
|  | 851             1:target_positions, | 
|  | 852             2:annot, | 
|  | 853             3:direction, | 
|  | 854             4:["Original"] + sequences["original"], | 
|  | 855             5:new_plasmids, | 
|  | 856             6:index} | 
|  | 857 | 
|  | 858     doc = SimpleDocTemplate("comparison_syngenic_plasmids.pdf",pagesize=letter, | 
|  | 859                         rightMargin=30,leftMargin=30, | 
|  | 860                         topMargin=30,bottomMargin=30) | 
|  | 861 | 
|  | 862     elements = [] | 
|  | 863     #max_row = 18 | 
|  | 864     blocks = {} | 
|  | 865 | 
|  | 866     if len(range(max_row, len(original.seq)+1, max_row)) % max_row == 0: | 
|  | 867         n_blocks = len(range(max_row, len(original.seq)+1, max_row)) | 
|  | 868     else: | 
|  | 869         n_blocks = len(range(max_row, len(original.seq)+1, max_row)) + 1 | 
|  | 870 | 
|  | 871     j = 0 | 
|  | 872 | 
|  | 873     for i in range(n_blocks): | 
|  | 874         blocks[i] = [] | 
|  | 875         for l in range(7): | 
|  | 876             if l in [0, 5]: | 
|  | 877                 for el in data[l]: | 
|  | 878                     if len(el[j:]) > max_row: | 
|  | 879                         if i >= 1: | 
|  | 880                             blocks[i].append([el[0]] + el[j:j+max_row]) | 
|  | 881                         else: | 
|  | 882                             blocks[i].append(el[j:j+max_row]) | 
|  | 883                     else: | 
|  | 884                         blocks[i].append([el[0]] + el[j:]) | 
|  | 885             else: | 
|  | 886                 if len(data[l][j:]) > max_row: | 
|  | 887                     if i >= 1: | 
|  | 888                         blocks[i].append([data[l][0]] + data[l][j:j+max_row]) | 
|  | 889                     else: | 
|  | 890                         blocks[i].append(data[l][j:j+max_row]) | 
|  | 891                 else: | 
|  | 892                     blocks[i].append([data[l][0]] + data[l][j:]) | 
|  | 893         j += max_row | 
|  | 894         #print("\n") | 
|  | 895         #print(blocks[i]) | 
|  | 896 | 
|  | 897         elements.append(Table(blocks[i], hAlign='LEFT'))#, | 
|  | 898                         #style=[('BACKGROUND',(0,0),(0,0),colors.palegreen), | 
|  | 899                         #       ('BACKGROUND',(1,1),(1,1),colors.palegreen), | 
|  | 900                         #       ('TEXTCOLOR',(2,2),(3,2),colors.palegreen), | 
|  | 901                         #       ('BOX',(0,0),(0,0),2,colors.red)])) | 
|  | 902         elements.append(Table([["", "", "", "", ""]])) | 
|  | 903 | 
|  | 904     doc.build(elements) | 
|  | 905 | 
|  | 906 | 
|  | 907     #new_doc = SimpleDocTemplate("further_information.pdf",pagesize=letter, | 
|  | 908     #                            rightMargin=30,leftMargin=30, | 
|  | 909     #                            topMargin=30,bottomMargin=30) | 
|  | 910     #new_elements = [] | 
|  | 911 | 
|  | 912     #new_elements.append([f for f in original.features if f.type.lower() in ["gene", "cds"]]) | 
|  | 913     #new_elements.append(f_patterns) | 
|  | 914 | 
|  | 915     #doc.build(new_elements) | 
|  | 916 | 
|  | 917     c = canvas.Canvas("./further_information.pdf") | 
|  | 918     c.drawString(100,750,"CDS regions:") | 
|  | 919     upper_bound = 750 | 
|  | 920     for feat in original.features: | 
|  | 921         if feat.type.lower() in ["gene", "cds"]: | 
|  | 922             upper_bound -= 15 | 
|  | 923             if feat.location.strand == -1: | 
|  | 924                 sign = "-" | 
|  | 925             else: | 
|  | 926                 sign = "+" | 
|  | 927             c.drawString(115,upper_bound, str("[") + str(feat.location.start)+ ":" + str(feat.location.end) + "]" + "(" + sign + ")") | 
|  | 928     upper_bound -= 30 | 
|  | 929     c.drawString(100,upper_bound,"Patterns and the corresponding targets on the plasmid sequence:") | 
|  | 930     for f_pattern in f_patterns.keys(): | 
|  | 931         upper_bound -= 15 | 
|  | 932         c.drawString(115,upper_bound,f_pattern + ":") | 
|  | 933         for val in f_patterns[f_pattern]: | 
|  | 934             upper_bound -= 15 | 
|  | 935             c.drawString(130,upper_bound,str(val)) | 
|  | 936         upper_bound -= 5 | 
|  | 937 | 
|  | 938     upper_bound -= 30 | 
|  | 939     c.drawString(100,upper_bound,"Identifiers of the targets found in the plasmid sequence:") | 
|  | 940     for target in targets.keys(): | 
|  | 941         upper_bound -= 15 | 
|  | 942         c.drawString(115,upper_bound,target + ": " + targets[target]) | 
|  | 943 | 
|  | 944     c.save() | 
|  | 945 | 
|  | 946 | 
|  | 947     return | 
|  | 948 | 
|  | 949 | 
|  | 950 def produce_random_targets(sequence): | 
|  | 951 | 
|  | 952     # Produce a target on two continous CDS | 
|  | 953     # Produce a target in a non-coding region | 
|  | 954     # Produce a target in coding region | 
|  | 955     # Produce a target on a overlapping left | 
|  | 956     # Produce a target on a overlapping right | 
|  | 957 | 
|  | 958 | 
|  | 959 | 
|  | 960     return |