| 
41
 | 
     1 import string
 | 
| 
 | 
     2 from syngenic import *
 | 
| 
 | 
     3 from Bio.Seq import Seq
 | 
| 
 | 
     4 from Bio.SeqFeature import SeqFeature, FeatureLocation
 | 
| 
 | 
     5 from pprint import pprint
 | 
| 
 | 
     6 
 | 
| 
 | 
     7 from itertools import izip
 | 
| 
 | 
     8 
 | 
| 
 | 
     9 import numpy as np
 | 
| 
 | 
    10 import pandas as pd
 | 
| 
 | 
    11 
 | 
| 
 | 
    12 def all_patterns(input_ = []):
 | 
| 
 | 
    13 
 | 
| 
 | 
    14     patts = []
 | 
| 
 | 
    15     n_patts = []
 | 
| 
 | 
    16 
 | 
| 
 | 
    17     for patt in input_:
 | 
| 
 | 
    18         tmp_patt = patt#Seq(patt.rstrip(), IUPAC.ambiguous_dna)
 | 
| 
 | 
    19         tmp_revc = tmp_patt.reverse_complement()
 | 
| 
 | 
    20 
 | 
| 
 | 
    21         patts.append(str(tmp_patt))
 | 
| 
 | 
    22         patts.append(str(tmp_revc))
 | 
| 
 | 
    23 
 | 
| 
 | 
    24         n_patts.append(pattern(tmp_patt).plan_ambiguity())
 | 
| 
 | 
    25         n_patts.append(pattern(tmp_revc).plan_ambiguity())
 | 
| 
 | 
    26 
 | 
| 
 | 
    27 
 | 
| 
 | 
    28     return patts, n_patts
 | 
| 
 | 
    29 
 | 
| 
 | 
    30 def fake_from_real(path = None, id_ = None, name = None):
 | 
| 
 | 
    31 
 | 
| 
 | 
    32     plasmid_seq = SeqIO.read(open(path, "r"), "genbank")
 | 
| 
 | 
    33 
 | 
| 
 | 
    34     f_p = plasmid_seq.seq[:10]
 | 
| 
 | 
    35     f_CDS = []
 | 
| 
 | 
    36     for f in plasmid_seq.features:
 | 
| 
 | 
    37         if f.type == "CDS":
 | 
| 
 | 
    38             tmp_start = len(f_p)
 | 
| 
 | 
    39             tmp_cds = plasmid_seq[f.location.start:f.location.start+9] + plasmid_seq[f.location.end-9:f.location.end]
 | 
| 
 | 
    40             tmp_end = tmp_start + len(tmp_cds)
 | 
| 
 | 
    41             f_p += tmp_cds
 | 
| 
 | 
    42             f_CDS.append(SeqFeature(FeatureLocation(tmp_start, tmp_end), type="gene", strand=f.location.strand))
 | 
| 
 | 
    43             #f_p += plasmid_seq.seq[tmp_end:tmp_end+5]
 | 
| 
 | 
    44     f_p += plasmid_seq.seq[-10:]
 | 
| 
 | 
    45 
 | 
| 
 | 
    46     for feat in f_CDS:
 | 
| 
 | 
    47         f_p.features.append(feat)
 | 
| 
 | 
    48     f_p.id = id_
 | 
| 
 | 
    49     f_p.name = name
 | 
| 
 | 
    50 
 | 
| 
 | 
    51     #feature_seq_0 = f_CDS[0].extract(f_p)
 | 
| 
 | 
    52 
 | 
| 
 | 
    53     return f_p
 | 
| 
 | 
    54 
 | 
| 
 | 
    55 def punctuate_targets(f_patts, n_pl):
 | 
| 
 | 
    56 
 | 
| 
 | 
    57     n_poss = {}
 | 
| 
 | 
    58     max_len = len(n_pl)
 | 
| 
 | 
    59     for key in f_patts.keys():
 | 
| 
 | 
    60         for el in f_patts[key]:
 | 
| 
 | 
    61             if not el[2] < el[1]:
 | 
| 
 | 
    62                 tmp = range(el[1], el[2])
 | 
| 
 | 
    63                 for i in range(len(tmp)):
 | 
| 
 | 
    64                     if not tmp[i] in n_poss.keys():
 | 
| 
 | 
    65                         n_poss[tmp[i]] = [key[i]]
 | 
| 
 | 
    66                     else:
 | 
| 
 | 
    67                         n_poss[tmp[i]].append(key[i])
 | 
| 
 | 
    68             else:
 | 
| 
 | 
    69                 tmp = range(el[1], max_len) + range(0, el[2])
 | 
| 
 | 
    70                 for i in range(len(tmp)):
 | 
| 
 | 
    71                     if not tmp[i] in n_poss.keys():
 | 
| 
 | 
    72                         n_poss[tmp[i]] = [key[i]]
 | 
| 
 | 
    73                     else:
 | 
| 
 | 
    74                         n_poss[tmp[i]].append(key[i])
 | 
| 
 | 
    75 
 | 
| 
 | 
    76     for key in n_poss.keys():
 | 
| 
 | 
    77         n_poss[key] = set(n_poss[key])
 | 
| 
 | 
    78 
 | 
| 
 | 
    79     #print(n_poss)
 | 
| 
 | 
    80 
 | 
| 
 | 
    81     return n_poss
 | 
| 
 | 
    82 
 | 
| 
 | 
    83 
 | 
| 
 | 
    84 def print_seq(n_pl, ind_range = None):
 | 
| 
 | 
    85 
 | 
| 
 | 
    86     if ind_range == None:
 | 
| 
 | 
    87 
 | 
| 
 | 
    88         data = filter(None, re.split(r'(\w{1})', n_pl))
 | 
| 
 | 
    89         index = range(len(n_pl))
 | 
| 
 | 
    90 
 | 
| 
 | 
    91         seq = []
 | 
| 
 | 
    92         ind = []
 | 
| 
 | 
    93 
 | 
| 
 | 
    94         j = 0
 | 
| 
 | 
    95 
 | 
| 
 | 
    96         seq.append("")
 | 
| 
 | 
    97         ind.append("")
 | 
| 
 | 
    98 
 | 
| 
 | 
    99         for i in range(len(data)):
 | 
| 
 | 
   100 
 | 
| 
 | 
   101             if (i % 9 == 0) & (i > 0):
 | 
| 
 | 
   102                 j += 1
 | 
| 
 | 
   103                 seq.append("")
 | 
| 
 | 
   104                 ind.append("")
 | 
| 
 | 
   105                 print("\n")
 | 
| 
 | 
   106                 print(seq[j-1])
 | 
| 
 | 
   107                 print(ind[j-1])
 | 
| 
 | 
   108 
 | 
| 
 | 
   109 
 | 
| 
 | 
   110             seq[j] += " "
 | 
| 
 | 
   111             ind[j] += " "
 | 
| 
 | 
   112             for n in range(len(str(index[i]))-1):
 | 
| 
 | 
   113                 seq[j] += " "
 | 
| 
 | 
   114             seq[j] += data[i]
 | 
| 
 | 
   115             ind[j] += str(index[i])
 | 
| 
 | 
   116         print("\n")
 | 
| 
 | 
   117         print(seq[j])
 | 
| 
 | 
   118         print(ind[j])
 | 
| 
 | 
   119     else:
 | 
| 
 | 
   120         data = filter(None, re.split(r'(\w{1})', n_pl[ind_range[0]:ind_range[1]]))
 | 
| 
 | 
   121         index = range(ind_range[0], ind_range[1])
 | 
| 
 | 
   122 
 | 
| 
 | 
   123         seq = []
 | 
| 
 | 
   124         ind = []
 | 
| 
 | 
   125 
 | 
| 
 | 
   126         j = 0
 | 
| 
 | 
   127 
 | 
| 
 | 
   128         seq.append("")
 | 
| 
 | 
   129         ind.append("")
 | 
| 
 | 
   130 
 | 
| 
 | 
   131         for i in range(len(data)):
 | 
| 
 | 
   132 
 | 
| 
 | 
   133             if (i % 9 == 0) & (i > 0):
 | 
| 
 | 
   134                 j += 1
 | 
| 
 | 
   135                 seq.append("")
 | 
| 
 | 
   136                 ind.append("")
 | 
| 
 | 
   137                 print("\n")
 | 
| 
 | 
   138                 print(seq[j-1])
 | 
| 
 | 
   139                 print(ind[j-1])
 | 
| 
 | 
   140 
 | 
| 
 | 
   141 
 | 
| 
 | 
   142             seq[j] += " "
 | 
| 
 | 
   143             ind[j] += " "
 | 
| 
 | 
   144             for n in range(len(str(index[i]))-1):
 | 
| 
 | 
   145                 seq[j] += " "
 | 
| 
 | 
   146             seq[j] += data[i]
 | 
| 
 | 
   147             ind[j] += str(index[i])
 | 
| 
 | 
   148 
 | 
| 
 | 
   149         print("\n")
 | 
| 
 | 
   150         print(seq[j])
 | 
| 
 | 
   151         print(ind[j])
 | 
| 
 | 
   152 
 | 
| 
 | 
   153 
 | 
| 
 | 
   154 
 | 
| 
 | 
   155     return None
 | 
| 
 | 
   156 
 | 
| 
 | 
   157 
 | 
| 
 | 
   158 def generalization(n_poss, n_pl, synonims_tables, reduced=False):
 | 
| 
 | 
   159 
 | 
| 
 | 
   160 
 | 
| 
 | 
   161     transversions = {"A": "[AT]",
 | 
| 
 | 
   162                      "T": "[TA]",
 | 
| 
 | 
   163                      "C": "[CG]",
 | 
| 
 | 
   164                      "G": "[GC]"}
 | 
| 
 | 
   165 
 | 
| 
 | 
   166     count_codon_switch = 0
 | 
| 
 | 
   167     count_transversion = 0
 | 
| 
 | 
   168 
 | 
| 
 | 
   169     new_poss = {}
 | 
| 
 | 
   170 
 | 
| 
 | 
   171     for pos in n_poss.keys():
 | 
| 
 | 
   172         in_cds = False
 | 
| 
 | 
   173         for feat in n_pl.features:
 | 
| 
 | 
   174             if ((pos >= feat.location.start) & (pos < feat.location.end)) & (feat.type in ["CDS", "gene"]):
 | 
| 
 | 
   175                 in_cds = True
 | 
| 
 | 
   176                 count_codon_switch += 1
 | 
| 
 | 
   177                 tmp_count_transversion = 0
 | 
| 
 | 
   178                 #print("\n")
 | 
| 
 | 
   179                 #print("operate codon switch " + str(count_codon_switch))
 | 
| 
 | 
   180                 #
 | 
| 
 | 
   181                 #print("Real position: " + str(pos))
 | 
| 
 | 
   182                 #print(n_poss[pos])
 | 
| 
 | 
   183                 #print(feat.location)
 | 
| 
 | 
   184                 #print(pos - feat.location.start)
 | 
| 
 | 
   185                 #print((pos - feat.location.start)%3)
 | 
| 
 | 
   186 
 | 
| 
 | 
   187 
 | 
| 
 | 
   188                 if ((pos - feat.location.start) % 3 == 0) & (n_poss[pos] != {"N"}):
 | 
| 
 | 
   189                     # first basis of a codon
 | 
| 
 | 
   190                     #print("first basis of a codon")
 | 
| 
 | 
   191                     #print(n_pl.seq[pos:pos+3])
 | 
| 
 | 
   192 
 | 
| 
 | 
   193                     tmp_codon = n_pl.seq[pos:pos+3]
 | 
| 
 | 
   194                     bases = []
 | 
| 
 | 
   195                     if feat.strand == +1:
 | 
| 
 | 
   196                         # check the codon table
 | 
| 
 | 
   197                         for codon in synonims_tables["synonims"][tmp_codon]:
 | 
| 
 | 
   198                             bases.append(codon[0])
 | 
| 
 | 
   199                     elif feat.strand == -1:
 | 
| 
 | 
   200                         # check the anticodon table
 | 
| 
 | 
   201                         for codon in synonims_tables["anti_synonims"][tmp_codon]:
 | 
| 
 | 
   202                             bases.append(codon[0])
 | 
| 
 | 
   203                     if len(set(bases)) > 1:
 | 
| 
 | 
   204                         new_poss[pos] = "[" + "".join(list(set(bases))) + "]"
 | 
| 
 | 
   205 
 | 
| 
 | 
   206 
 | 
| 
 | 
   207                 elif ((pos - feat.location.start) % 3 == 1) & (n_poss[pos] != {"N"}):
 | 
| 
 | 
   208                     # second basis of a codon
 | 
| 
 | 
   209                     #print("second basis of a codon")
 | 
| 
 | 
   210                     #print(n_pl.seq[pos-1:pos+2])
 | 
| 
 | 
   211 
 | 
| 
 | 
   212                     tmp_codon = n_pl.seq[pos-1:pos+2]
 | 
| 
 | 
   213 
 | 
| 
 | 
   214                     bases = []
 | 
| 
 | 
   215                     if feat.strand == +1:
 | 
| 
 | 
   216                         # check the codon table
 | 
| 
 | 
   217                         for codon in synonims_tables["synonims"][tmp_codon]:
 | 
| 
 | 
   218                             bases.append(codon[1])
 | 
| 
 | 
   219                     elif feat.strand == -1:
 | 
| 
 | 
   220                         # check the anticodon table
 | 
| 
 | 
   221                         for codon in synonims_tables["anti_synonims"][tmp_codon]:
 | 
| 
 | 
   222                             bases.append(codon[1])
 | 
| 
 | 
   223                     if len(set(bases)) > 1:
 | 
| 
 | 
   224                         new_poss[pos] = "[" + "".join(list(set(bases))) + "]"
 | 
| 
 | 
   225 
 | 
| 
 | 
   226                 elif ((pos - feat.location.start) % 3 == 2) & (n_poss[pos] != {"N"}):
 | 
| 
 | 
   227                     # third basis of a codon
 | 
| 
 | 
   228                     #print("third basis of a codon")
 | 
| 
 | 
   229                     #print(n_pl.seq[pos-2:pos+1])
 | 
| 
 | 
   230 
 | 
| 
 | 
   231                     tmp_codon = n_pl.seq[pos-2:pos+1]
 | 
| 
 | 
   232 
 | 
| 
 | 
   233                     bases = []
 | 
| 
 | 
   234                     if feat.strand == +1:
 | 
| 
 | 
   235                         # check the codon table
 | 
| 
 | 
   236                         for codon in synonims_tables["synonims"][tmp_codon]:
 | 
| 
 | 
   237                             bases.append(codon[2])
 | 
| 
 | 
   238                     elif feat.strand == -1:
 | 
| 
 | 
   239                         # check the anticodon table
 | 
| 
 | 
   240                         for codon in synonims_tables["anti_synonims"][tmp_codon]:
 | 
| 
 | 
   241                             bases.append(codon[2])
 | 
| 
 | 
   242                     if len(set(bases)) > 1:
 | 
| 
 | 
   243                         new_poss[pos] = "[" + "".join(list(set(bases))) + "]"
 | 
| 
 | 
   244 
 | 
| 
 | 
   245                 tmp = n_pl.extract(feat)
 | 
| 
 | 
   246                 #print_seq(tmp, ind_range = [feat.location.start,feat.location.start])
 | 
| 
 | 
   247 
 | 
| 
 | 
   248         if (in_cds == False) & (set.intersection(n_poss[pos], {"A", "T", "C", "G"}) != set()):
 | 
| 
 | 
   249             # (set.union(n_poss[pos], {"A", "T", "C", "G"}) != {})
 | 
| 
 | 
   250             # set.union(n_poss[pos], {"A", "T", "C", "G"}) != {}
 | 
| 
 | 
   251             # n_poss[pos] != {"N"}
 | 
| 
 | 
   252 
 | 
| 
 | 
   253             if reduced == False:
 | 
| 
 | 
   254 
 | 
| 
 | 
   255                 count_transversion += 1
 | 
| 
 | 
   256                 #print("operate transversion " + str(count_transversion))
 | 
| 
 | 
   257 
 | 
| 
 | 
   258                 new_poss[pos] = transversions[set.difference(n_poss[pos], {"N"}).pop()]
 | 
| 
 | 
   259 
 | 
| 
 | 
   260             else:
 | 
| 
 | 
   261 
 | 
| 
 | 
   262                 count_transversion += 1
 | 
| 
 | 
   263                 #print("operate transversion " + str(count_transversion))
 | 
| 
 | 
   264 
 | 
| 
 | 
   265                 new_poss[pos] = transversions[set.difference(n_poss[pos], {"N"}).pop()]
 | 
| 
 | 
   266 
 | 
| 
 | 
   267                 #if tmp_count_transversion == 0:
 | 
| 
 | 
   268 
 | 
| 
 | 
   269                 #    count_transversion += 1
 | 
| 
 | 
   270                 #    tmp_count_transversion += 1
 | 
| 
 | 
   271                 #    print("operate transversion " + str(count_transversion))
 | 
| 
 | 
   272                 #
 | 
| 
 | 
   273                 #    new_poss[pos] = transversions[set.difference(n_poss[pos], {"N"}).pop()]
 | 
| 
 | 
   274 
 | 
| 
 | 
   275         #print(new_poss)
 | 
| 
 | 
   276 
 | 
| 
 | 
   277     n_seq = filter(None, re.split(r'(\w{1})', str(n_pl.seq)))
 | 
| 
 | 
   278     n_ind = range(len(n_seq))
 | 
| 
 | 
   279 
 | 
| 
 | 
   280     new_obj = {}
 | 
| 
 | 
   281 
 | 
| 
 | 
   282     for pos in n_ind:
 | 
| 
 | 
   283         if pos in new_poss.keys():
 | 
| 
 | 
   284             new_obj[pos] = new_poss[pos]
 | 
| 
 | 
   285         else:
 | 
| 
 | 
   286             new_obj[pos] = n_seq[pos]
 | 
| 
 | 
   287 
 | 
| 
 | 
   288     #pprint(new_obj)
 | 
| 
 | 
   289 
 | 
| 
 | 
   290 
 | 
| 
 | 
   291     new_plasmid_generalized = ""
 | 
| 
 | 
   292 
 | 
| 
 | 
   293 
 | 
| 
 | 
   294     for pos in n_ind:
 | 
| 
 | 
   295         new_plasmid_generalized += new_obj[pos]
 | 
| 
 | 
   296 
 | 
| 
 | 
   297     #print(new_plasmid_generalized)
 | 
| 
 | 
   298     #print(len(new_plasmid_generalized))
 | 
| 
 | 
   299 
 | 
| 
 | 
   300     t = sre_yield.AllStrings(new_plasmid_generalized)
 | 
| 
 | 
   301 
 | 
| 
 | 
   302     #print(len(t))
 | 
| 
 | 
   303 
 | 
| 
 | 
   304 
 | 
| 
 | 
   305 
 | 
| 
 | 
   306     return t
 | 
| 
 | 
   307 
 | 
| 
 | 
   308 
 | 
| 
 | 
   309 def evaluate_plasmids(plasmids = None,
 | 
| 
 | 
   310                       original_plasmid = None,
 | 
| 
 | 
   311                       codon_usage_table = None,
 | 
| 
 | 
   312                       n_patts = None,
 | 
| 
 | 
   313                       f_patts = None):
 | 
| 
 | 
   314 
 | 
| 
 | 
   315     from syngenic import plasmid
 | 
| 
 | 
   316     from Bio.Seq import Seq
 | 
| 
 | 
   317     from Bio.SeqFeature import SeqFeature, FeatureLocation
 | 
| 
 | 
   318     from itertools import izip
 | 
| 
 | 
   319     import numpy as np
 | 
| 
 | 
   320 
 | 
| 
 | 
   321     useful = {}
 | 
| 
 | 
   322 
 | 
| 
 | 
   323     i = 0
 | 
| 
 | 
   324 
 | 
| 
 | 
   325     for tmp_pl in plasmids:
 | 
| 
 | 
   326 
 | 
| 
 | 
   327         if tmp_pl != original_plasmid.seq:
 | 
| 
 | 
   328 
 | 
| 
 | 
   329             identical_proteic_sequence = []
 | 
| 
 | 
   330 
 | 
| 
 | 
   331             for feat in original_plasmid.features:
 | 
| 
 | 
   332                 if feat.type.lower() in ["gene", "cds"]:
 | 
| 
 | 
   333                     identical_proteic_sequence.append(Seq(plasmid(tmp_pl).extract(feat)).translate() == Seq(original_plasmid.extract(feat)).translate())
 | 
| 
 | 
   334             identical_proteic_sequence = all(identical_proteic_sequence)
 | 
| 
 | 
   335             if (identical_proteic_sequence == True) & (set([True if el ==[] else False for el in plasmid(tmp_pl).findpatterns(n_patts, f_patts).values()]) == {True}):
 | 
| 
 | 
   336                 print("\t" + str(i) + "/" + str(len(plasmids)))
 | 
| 
 | 
   337                 #print(tmp_pl)
 | 
| 
 | 
   338                 tmp = [j for j,(a1,a2)  in enumerate(izip(tmp_pl,original_plasmid)) if a1!=a2]
 | 
| 
 | 
   339                 #print(tmp)
 | 
| 
 | 
   340                 useful["Plasmid_" + str(i)] = {}
 | 
| 
 | 
   341                 useful["Plasmid_" + str(i)]["modified_positions"] = tmp
 | 
| 
 | 
   342                 useful["Plasmid_" + str(i)]["codon_usage"] = []
 | 
| 
 | 
   343                 useful["Plasmid_" + str(i)]["number_of_modification"] = len(tmp)
 | 
| 
 | 
   344                 useful["Plasmid_" + str(i)]["sequence"] = tmp_pl
 | 
| 
 | 
   345                 for modified_position in tmp:
 | 
| 
 | 
   346                     in_cds = False
 | 
| 
 | 
   347                     for feat in original_plasmid.features:
 | 
| 
 | 
   348                         if feat.type.lower() in ["gene", "cds"]:
 | 
| 
 | 
   349                             if ((modified_position >= feat.location.start) & (modified_position < feat.location.end)) & (feat.type in ["CDS", "gene"]):
 | 
| 
 | 
   350                                 in_cds = True
 | 
| 
 | 
   351                                 if (modified_position - feat.location.start) % 3 == 0:
 | 
| 
 | 
   352                                     # first basis of a codon
 | 
| 
 | 
   353                                     if feat.strand == +1:
 | 
| 
 | 
   354                                         tmp_codon = tmp_pl[modified_position:modified_position+3]
 | 
| 
 | 
   355                                     else:
 | 
| 
 | 
   356                                         tmp_codon = str(Seq(tmp_pl[modified_position:modified_position+3]).reverse_complement())
 | 
| 
 | 
   357                                     useful["Plasmid_" + str(i)]["codon_usage"].append(codon_usage_table.loc[tmp_codon]["Proportion"])
 | 
| 
 | 
   358                                 elif (modified_position - feat.location.start) % 3 == 1:
 | 
| 
 | 
   359                                     # second basis of a codon
 | 
| 
 | 
   360                                     if feat.strand == +1:
 | 
| 
 | 
   361                                         tmp_codon = tmp_pl[modified_position-1:modified_position+2]
 | 
| 
 | 
   362                                     else:
 | 
| 
 | 
   363                                         tmp_codon = str(Seq(tmp_pl[modified_position-1:modified_position+2]).reverse_complement())
 | 
| 
 | 
   364                                     useful["Plasmid_" + str(i)]["codon_usage"].append(codon_usage_table.loc[tmp_codon]["Proportion"])
 | 
| 
 | 
   365                                 elif (modified_position - feat.location.start) % 3 == 2:
 | 
| 
 | 
   366                                     # third basis of a codon
 | 
| 
 | 
   367                                     if feat.strand == +1:
 | 
| 
 | 
   368                                         tmp_codon = original_plasmid.seq[modified_position-2:modified_position+1]
 | 
| 
 | 
   369                                     else:
 | 
| 
 | 
   370                                         tmp_codon = str(Seq(tmp_pl[modified_position-2:modified_position+1]).reverse_complement())
 | 
| 
 | 
   371                                     useful["Plasmid_" + str(i)]["codon_usage"].append(codon_usage_table.loc[tmp_codon]["Proportion"])
 | 
| 
 | 
   372 
 | 
| 
 | 
   373                 useful["Plasmid_" + str(i)]["mean_codon_usage"] = np.mean(useful["Plasmid_" + str(i)]["codon_usage"])
 | 
| 
 | 
   374                 useful["Plasmid_" + str(i)]["std_codon_usage"] = np.std(useful["Plasmid_" + str(i)]["codon_usage"])
 | 
| 
 | 
   375             else:
 | 
| 
 | 
   376                 next
 | 
| 
 | 
   377 
 | 
| 
 | 
   378         i += 1
 | 
| 
 | 
   379 
 | 
| 
 | 
   380     useful["original_plasmids"] = original_plasmid
 | 
| 
 | 
   381 
 | 
| 
 | 
   382     return useful
 | 
| 
 | 
   383 
 | 
| 
 | 
   384 
 | 
| 
 | 
   385 
 | 
| 
 | 
   386 def rank_plasmids(original_useful_plasmids = None):
 | 
| 
 | 
   387 
 | 
| 
 | 
   388     # Rank according to codon usage and less number of modifications introduced
 | 
| 
 | 
   389 
 | 
| 
 | 
   390     tmp_useful_plasmids = {}
 | 
| 
 | 
   391 
 | 
| 
 | 
   392     #print(len(original_useful_plasmids.keys()))
 | 
| 
 | 
   393     tmp_keys = list(set.difference(set(original_useful_plasmids.keys()), {"original_plasmids"}))
 | 
| 
 | 
   394     #print(len(tmp_keys))
 | 
| 
 | 
   395     for key in tmp_keys:
 | 
| 
 | 
   396         #print(key)
 | 
| 
 | 
   397         #print(original_useful_plasmids[key])
 | 
| 
 | 
   398         tmp_useful_plasmids[key] = {"mean_codon_usage":original_useful_plasmids[key]["mean_codon_usage"],
 | 
| 
 | 
   399                                 "std_codon_usage":original_useful_plasmids[key]["std_codon_usage"],
 | 
| 
 | 
   400                                 "number_of_modification":original_useful_plasmids[key]["number_of_modification"]}
 | 
| 
 | 
   401 
 | 
| 
 | 
   402     dat_plasmids = pd.DataFrame(tmp_useful_plasmids).T
 | 
| 
 | 
   403 
 | 
| 
 | 
   404     dat_plasmids.shape
 | 
| 
 | 
   405 
 | 
| 
 | 
   406     dat_plasmids.head()
 | 
| 
 | 
   407 
 | 
| 
 | 
   408     dat_plasmids.sort_values(['mean_codon_usage', 'std_codon_usage', 'number_of_modification'], ascending=[False, True, True])
 | 
| 
 | 
   409 
 | 
| 
 | 
   410     dat_plasmids.index
 | 
| 
 | 
   411 
 | 
| 
 | 
   412     return dat_plasmids
 | 
| 
 | 
   413     #return tmp_useful_plasmids
 | 
| 
 | 
   414 
 | 
| 
 | 
   415 
 | 
| 
 | 
   416 def print_color_seq(original = None,
 | 
| 
 | 
   417                  others = None,
 | 
| 
 | 
   418                  annotation_information = None,
 | 
| 
 | 
   419                  tot = None,
 | 
| 
 | 
   420                  ind_range = None,
 | 
| 
 | 
   421                  patterns = None,
 | 
| 
 | 
   422                  f_patterns = None,
 | 
| 
 | 
   423                  patts = None,
 | 
| 
 | 
   424                  max_row = 18):
 | 
| 
 | 
   425 
 | 
| 
 | 
   426     """
 | 
| 
 | 
   427 
 | 
| 
 | 
   428     original = plasmids["original_plasmid"],
 | 
| 
 | 
   429     others = def_pls,
 | 
| 
 | 
   430     annotation_information = useful_plasmids,
 | 
| 
 | 
   431     tot = plasmids,
 | 
| 
 | 
   432     ind_range = None
 | 
| 
 | 
   433 
 | 
| 
 | 
   434     """
 | 
| 
 | 
   435 
 | 
| 
 | 
   436     ################################################################
 | 
| 
 | 
   437     # Single Targets
 | 
| 
 | 
   438     ################################################################
 | 
| 
 | 
   439 
 | 
| 
 | 
   440     targets = {}
 | 
| 
 | 
   441 
 | 
| 
 | 
   442     t_keys = f_patterns.keys()
 | 
| 
 | 
   443 
 | 
| 
 | 
   444     for l in range(len(t_keys)):
 | 
| 
 | 
   445         if f_patterns[t_keys[l]] != []:
 | 
| 
 | 
   446             targets["Target" + str(l)] = t_keys[l]
 | 
| 
 | 
   447 
 | 
| 
 | 
   448     #print(targets)
 | 
| 
 | 
   449     #print("\n")
 | 
| 
 | 
   450     tars = {}
 | 
| 
 | 
   451 
 | 
| 
 | 
   452     for tar in targets.keys():
 | 
| 
 | 
   453         #print(tar)
 | 
| 
 | 
   454         tars[tar] = ["|" for i in range(len(original.seq))]
 | 
| 
 | 
   455 
 | 
| 
 | 
   456         for tar1 in f_patterns[targets[tar]]:
 | 
| 
 | 
   457             #print(tar1)
 | 
| 
 | 
   458             if tar1[1] < tar1[2]:
 | 
| 
 | 
   459                 for l in range(tar1[1], tar1[2]):
 | 
| 
 | 
   460                     tars[tar][l] = tar1[0][l-tar1[1]]
 | 
| 
 | 
   461             else:
 | 
| 
 | 
   462                 for l in range(tar1[1], len(original.seq)):
 | 
| 
 | 
   463                     tars[tar][l] = tar1[0][l-tar1[1]]
 | 
| 
 | 
   464                 for l in range(tar1[2]):
 | 
| 
 | 
   465                     tars[tar][l] = tar1[0][-tar1[2]:][l]
 | 
| 
 | 
   466 
 | 
| 
 | 
   467     #print(tars)
 | 
| 
 | 
   468     kkk = tars.keys()
 | 
| 
 | 
   469     kkk.sort()
 | 
| 
 | 
   470     target_lists = [[key]+tars[key] for key in kkk]
 | 
| 
 | 
   471     #print(target_lists); print(len(target_lists[0]))
 | 
| 
 | 
   472 
 | 
| 
 | 
   473 
 | 
| 
 | 
   474     ################################################################
 | 
| 
 | 
   475     # Aggregate Targets
 | 
| 
 | 
   476     ################################################################
 | 
| 
 | 
   477     target_positions = ["TargetPositions"]
 | 
| 
 | 
   478     for k in range(len(original)):
 | 
| 
 | 
   479         if k in patterns.keys():
 | 
| 
 | 
   480             if len(patterns[k]) > 1:
 | 
| 
 | 
   481                 target_positions += "+"#"T"
 | 
| 
 | 
   482             else:
 | 
| 
 | 
   483                 target_positions += "T"
 | 
| 
 | 
   484         else:
 | 
| 
 | 
   485             target_positions += " "
 | 
| 
 | 
   486     #print(target_positions); print(len(target_positions))
 | 
| 
 | 
   487     ################################################################
 | 
| 
 | 
   488     # Annotation
 | 
| 
 | 
   489     ################################################################
 | 
| 
 | 
   490     direction = []
 | 
| 
 | 
   491     annot = ["Annotation"]
 | 
| 
 | 
   492 
 | 
| 
 | 
   493     distance = 0
 | 
| 
 | 
   494     for feat in [f for f in original.features if f.type.lower() in ["gene", "cds"]]:
 | 
| 
 | 
   495         for space in range(distance, feat.location.start):
 | 
| 
 | 
   496             direction.append("_")
 | 
| 
 | 
   497             annot.append("_")
 | 
| 
 | 
   498         annot.append("*")
 | 
| 
 | 
   499         for an_space in range(feat.location.end - feat.location.start-2):
 | 
| 
 | 
   500             annot.append("_")
 | 
| 
 | 
   501         distance = feat.location.end
 | 
| 
 | 
   502         annot.append("/")
 | 
| 
 | 
   503     for space in range(distance, len(original)):
 | 
| 
 | 
   504         direction.append("_")
 | 
| 
 | 
   505         annot.append("_")
 | 
| 
 | 
   506     #print(annot)
 | 
| 
 | 
   507 
 | 
| 
 | 
   508     ################################################################
 | 
| 
 | 
   509     # CDS
 | 
| 
 | 
   510     ################################################################
 | 
| 
 | 
   511 
 | 
| 
 | 
   512     if ind_range == None:
 | 
| 
 | 
   513         ind_range = [0, len(original)]
 | 
| 
 | 
   514 
 | 
| 
 | 
   515     sequences = {}
 | 
| 
 | 
   516     sequences["original"] = filter(None, re.split(r'(\w{1})', original.seq[ind_range[0]:ind_range[1]]))
 | 
| 
 | 
   517     direction = ["CDS_Orientation"]
 | 
| 
 | 
   518     distance = 0
 | 
| 
 | 
   519 
 | 
| 
 | 
   520     alternating = 0
 | 
| 
 | 
   521 
 | 
| 
 | 
   522     for feat in [f for f in original.features if f.type.lower() in ["gene", "cds"]]:
 | 
| 
 | 
   523         for space in range(distance, feat.location.start):
 | 
| 
 | 
   524             direction.append("_")
 | 
| 
 | 
   525         if feat.type.lower() in ["gene", "cds"]:
 | 
| 
 | 
   526             for counter in range(feat.location.start, feat.location.end, 3):
 | 
| 
 | 
   527                 if alternating % 2 == 1:
 | 
| 
 | 
   528                     sequences["original"][counter] = "\033[1;31;40m" + sequences["original"][counter] + "\033[0m"
 | 
| 
 | 
   529                     sequences["original"][counter+1] = "\033[1;31;40m" + sequences["original"][counter+1] + "\033[0m"
 | 
| 
 | 
   530                     sequences["original"][counter+2] = "\033[1;31;40m" + sequences["original"][counter+2] + "\033[0m"
 | 
| 
 | 
   531                     alternating += 1
 | 
| 
 | 
   532 
 | 
| 
 | 
   533                     if feat.strand == +1:
 | 
| 
 | 
   534                         direction.append("-")
 | 
| 
 | 
   535                         direction.append("-")
 | 
| 
 | 
   536                         direction.append(">")
 | 
| 
 | 
   537                     if feat.strand == -1:
 | 
| 
 | 
   538                         direction.append("<")
 | 
| 
 | 
   539                         direction.append("-")
 | 
| 
 | 
   540                         direction.append("-")
 | 
| 
 | 
   541 
 | 
| 
 | 
   542                 else:
 | 
| 
 | 
   543                     sequences["original"][counter] = "\033[1;32;40m" + sequences["original"][counter] + "\033[0m"
 | 
| 
 | 
   544                     sequences["original"][counter+1] = "\033[1;32;40m" + sequences["original"][counter+1] + "\033[0m"
 | 
| 
 | 
   545                     sequences["original"][counter+2] = "\033[1;32;40m" + sequences["original"][counter+2] + "\033[0m"
 | 
| 
 | 
   546                     alternating += 1
 | 
| 
 | 
   547 
 | 
| 
 | 
   548                     if feat.strand == +1:
 | 
| 
 | 
   549                         direction.append("-")
 | 
| 
 | 
   550                         direction.append("-")
 | 
| 
 | 
   551                         direction.append(">")
 | 
| 
 | 
   552                     if feat.strand == -1:
 | 
| 
 | 
   553                         direction.append("<")
 | 
| 
 | 
   554                         direction.append("-")
 | 
| 
 | 
   555                         direction.append("-")
 | 
| 
 | 
   556         distance = feat.location.end
 | 
| 
 | 
   557     for space in range(distance, len(original)):
 | 
| 
 | 
   558         direction.append("_")
 | 
| 
 | 
   559 
 | 
| 
 | 
   560     #print(direction); print(len(direction))
 | 
| 
 | 
   561     ################################################################
 | 
| 
 | 
   562     # Plasmids_ids
 | 
| 
 | 
   563     ################################################################
 | 
| 
 | 
   564     f = 0
 | 
| 
 | 
   565     new_plasmids = []
 | 
| 
 | 
   566     for s in others:
 | 
| 
 | 
   567         new_plasmids.append([s] + filter(None, re.split(r'(\w{1})', tot[s]["sequence"][ind_range[0]:ind_range[1]])))
 | 
| 
 | 
   568         for k in range(len(original)):
 | 
| 
 | 
   569             if k in annotation_information[s]["modified_positions"]:
 | 
| 
 | 
   570                 new_plasmids[f][k+1] = "\033[1;32;40m" + new_plasmids[f][k+1] + "\033[0m"
 | 
| 
 | 
   571         f += 1
 | 
| 
 | 
   572 
 | 
| 
 | 
   573     #print(new_plasmids)
 | 
| 
 | 
   574 
 | 
| 
 | 
   575     ################################################################
 | 
| 
 | 
   576     # Index
 | 
| 
 | 
   577     ################################################################
 | 
| 
 | 
   578 
 | 
| 
 | 
   579     index = ["Index"] + [str(i) for i in range(ind_range[0], ind_range[1])]
 | 
| 
 | 
   580 
 | 
| 
 | 
   581     ################################################################
 | 
| 
 | 
   582     # Create the pdf file
 | 
| 
 | 
   583     ################################################################
 | 
| 
 | 
   584 
 | 
| 
 | 
   585     #print(target_lists); print(len(target_lists[0]))
 | 
| 
 | 
   586     #print(target_positions); print(len(target_positions))
 | 
| 
 | 
   587     #print(annot); print(len(annot))
 | 
| 
 | 
   588     #print(direction); print(len(direction))
 | 
| 
 | 
   589     #print(new_plasmids); print(len(new_plasmids[0]))
 | 
| 
 | 
   590     #print(index)
 | 
| 
 | 
   591 
 | 
| 
 | 
   592     data = {0:target_lists,
 | 
| 
 | 
   593             1:target_positions,
 | 
| 
 | 
   594             2:annot,
 | 
| 
 | 
   595             3:direction,
 | 
| 
 | 
   596             4:["Original"] + sequences["original"],
 | 
| 
 | 
   597             5:new_plasmids,
 | 
| 
 | 
   598             6:index}
 | 
| 
 | 
   599 
 | 
| 
 | 
   600     elements = []
 | 
| 
 | 
   601     #max_row = 18
 | 
| 
 | 
   602     blocks = {}
 | 
| 
 | 
   603 
 | 
| 
 | 
   604     if len(range(max_row, len(original.seq)+1, max_row)) % max_row == 0:
 | 
| 
 | 
   605         n_blocks = len(range(max_row, len(original.seq)+1, max_row))
 | 
| 
 | 
   606     else:
 | 
| 
 | 
   607         n_blocks = len(range(max_row, len(original.seq)+1, max_row)) + 1
 | 
| 
 | 
   608 
 | 
| 
 | 
   609     j = 0
 | 
| 
 | 
   610 
 | 
| 
 | 
   611     for i in range(n_blocks):
 | 
| 
 | 
   612         blocks[i] = []
 | 
| 
 | 
   613         for l in range(7):
 | 
| 
 | 
   614             if l in [0, 5]:
 | 
| 
 | 
   615                 for el in data[l]:
 | 
| 
 | 
   616                     if len(el[j:]) > max_row:
 | 
| 
 | 
   617                         if i >= 1:
 | 
| 
 | 
   618                             blocks[i].append([el[0]] + el[j:j+max_row])
 | 
| 
 | 
   619                         else:
 | 
| 
 | 
   620                             blocks[i].append(el[j:j+max_row])
 | 
| 
 | 
   621                     else:
 | 
| 
 | 
   622                         blocks[i].append([el[0]] + el[j:])
 | 
| 
 | 
   623             else:
 | 
| 
 | 
   624                 if len(data[l][j:]) > max_row:
 | 
| 
 | 
   625                     if i >= 1:
 | 
| 
 | 
   626                         blocks[i].append([data[l][0]] + data[l][j:j+max_row])
 | 
| 
 | 
   627                     else:
 | 
| 
 | 
   628                         blocks[i].append(data[l][j:j+max_row])
 | 
| 
 | 
   629                 else:
 | 
| 
 | 
   630                     blocks[i].append([data[l][0]] + data[l][j:])
 | 
| 
 | 
   631         j += max_row
 | 
| 
 | 
   632         print("\n")
 | 
| 
 | 
   633         #print(blocks[i])
 | 
| 
 | 
   634 
 | 
| 
 | 
   635         fff = []
 | 
| 
 | 
   636         for f in range(len(blocks[i])):
 | 
| 
 | 
   637             fff.append(len(blocks[i][f][0]))
 | 
| 
 | 
   638         fff = max(fff)
 | 
| 
 | 
   639         for f in range(len(blocks[i])):
 | 
| 
 | 
   640             for r in range(fff-len(blocks[i][f][0])):
 | 
| 
 | 
   641                 blocks[i][f][0] += " "
 | 
| 
 | 
   642             if f < len(blocks[i])-1:
 | 
| 
 | 
   643                 for l in range(1,len(blocks[i][f])):
 | 
| 
 | 
   644                     tmp = ""
 | 
| 
 | 
   645                     #print(blocks[i][-1][l])
 | 
| 
 | 
   646                     if l < len(blocks[i][-1]):
 | 
| 
 | 
   647                         for g in range(len(str(blocks[i][-1][l]))):
 | 
| 
 | 
   648                             #print(g)
 | 
| 
 | 
   649                             tmp += " "
 | 
| 
 | 
   650                     blocks[i][f][l] = tmp + blocks[i][f][l]
 | 
| 
 | 
   651                     #print(blocks[i][f][l])
 | 
| 
 | 
   652                 blocks[i][f] = " ".join(blocks[i][f])
 | 
| 
 | 
   653             else:
 | 
| 
 | 
   654                 blocks[i][f] = "  ".join(blocks[i][f])
 | 
| 
 | 
   655             print(blocks[i][f])
 | 
| 
 | 
   656         #print(" ".join(blocks[i][-1]))
 | 
| 
 | 
   657 
 | 
| 
 | 
   658     print("\n")
 | 
| 
 | 
   659     print([f for f in original.features if f.type.lower() in ["gene", "cds"]])
 | 
| 
 | 
   660     print("\n")
 | 
| 
 | 
   661     print(f_patterns)
 | 
| 
 | 
   662 
 | 
| 
 | 
   663     return
 | 
| 
 | 
   664 
 | 
| 
 | 
   665 def print_to_pdf(original = None,
 | 
| 
 | 
   666                  others = None,
 | 
| 
 | 
   667                  annotation_information = None,
 | 
| 
 | 
   668                  tot = None,
 | 
| 
 | 
   669                  ind_range = None,
 | 
| 
 | 
   670                  patterns = None,
 | 
| 
 | 
   671                  f_patterns = None,
 | 
| 
 | 
   672                  patts = None,
 | 
| 
 | 
   673                  max_row = 9):
 | 
| 
 | 
   674 
 | 
| 
 | 
   675     """
 | 
| 
 | 
   676 
 | 
| 
 | 
   677     original = plasmids["original_plasmid"],
 | 
| 
 | 
   678     others = def_pls,
 | 
| 
 | 
   679     annotation_information = useful_plasmids,
 | 
| 
 | 
   680     tot = plasmids,
 | 
| 
 | 
   681     ind_range = None
 | 
| 
 | 
   682 
 | 
| 
 | 
   683     """
 | 
| 
 | 
   684 
 | 
| 
 | 
   685     from reportlab.lib import colors
 | 
| 
 | 
   686     from reportlab.lib.pagesizes import letter
 | 
| 
 | 
   687     from reportlab.platypus import SimpleDocTemplate, Table, TableStyle
 | 
| 
 | 
   688     from reportlab.pdfgen import canvas
 | 
| 
 | 
   689 
 | 
| 
 | 
   690     ################################################################
 | 
| 
 | 
   691     # Single Targets
 | 
| 
 | 
   692     ################################################################
 | 
| 
 | 
   693 
 | 
| 
 | 
   694     targets = {}
 | 
| 
 | 
   695 
 | 
| 
 | 
   696     t_keys = f_patterns.keys()
 | 
| 
 | 
   697 
 | 
| 
 | 
   698     for l in range(len(t_keys)):
 | 
| 
 | 
   699         if f_patterns[t_keys[l]] != []:
 | 
| 
 | 
   700             targets["Target" + str(l)] = t_keys[l]
 | 
| 
 | 
   701 
 | 
| 
 | 
   702     #print(targets)
 | 
| 
 | 
   703     #print("\n")
 | 
| 
 | 
   704     tars = {}
 | 
| 
 | 
   705 
 | 
| 
 | 
   706     for tar in targets.keys():
 | 
| 
 | 
   707         #print(tar)
 | 
| 
 | 
   708         tars[tar] = ["|" for i in range(len(original.seq))]
 | 
| 
 | 
   709 
 | 
| 
 | 
   710         for tar1 in f_patterns[targets[tar]]:
 | 
| 
 | 
   711             #print(tar1)
 | 
| 
 | 
   712             if tar1[1] < tar1[2]:
 | 
| 
 | 
   713                 for l in range(tar1[1], tar1[2]):
 | 
| 
 | 
   714                     tars[tar][l] = tar1[0][l-tar1[1]]
 | 
| 
 | 
   715             else:
 | 
| 
 | 
   716                 for l in range(tar1[1], len(original.seq)):
 | 
| 
 | 
   717                     tars[tar][l] = tar1[0][l-tar1[1]]
 | 
| 
 | 
   718                 for l in range(tar1[2]):
 | 
| 
 | 
   719                     tars[tar][l] = tar1[0][-tar1[2]:][l]
 | 
| 
 | 
   720 
 | 
| 
 | 
   721     #print(tars)
 | 
| 
 | 
   722     kkk = tars.keys()
 | 
| 
 | 
   723     kkk.sort()
 | 
| 
 | 
   724     target_lists = [[key]+tars[key] for key in kkk]
 | 
| 
 | 
   725     #print(target_lists); print(len(target_lists[0]))
 | 
| 
 | 
   726 
 | 
| 
 | 
   727 
 | 
| 
 | 
   728     ################################################################
 | 
| 
 | 
   729     # Aggregate Targets
 | 
| 
 | 
   730     ################################################################
 | 
| 
 | 
   731     target_positions = ["TargetPositions"]
 | 
| 
 | 
   732     for k in range(len(original)):
 | 
| 
 | 
   733         if k in patterns.keys():
 | 
| 
 | 
   734             if len(patterns[k]) > 1:
 | 
| 
 | 
   735                 target_positions += "+"#"T"
 | 
| 
 | 
   736             else:
 | 
| 
 | 
   737                 target_positions += "T"
 | 
| 
 | 
   738         else:
 | 
| 
 | 
   739             target_positions += " "
 | 
| 
 | 
   740     #print(target_positions); print(len(target_positions))
 | 
| 
 | 
   741     ################################################################
 | 
| 
 | 
   742     # Annotation
 | 
| 
 | 
   743     ################################################################
 | 
| 
 | 
   744     direction = []
 | 
| 
 | 
   745     annot = ["Annotation"]
 | 
| 
 | 
   746 
 | 
| 
 | 
   747     distance = 0
 | 
| 
 | 
   748     for feat in [f for f in original.features if f.type.lower() in ["gene", "cds"]]:
 | 
| 
 | 
   749         for space in range(distance, feat.location.start):
 | 
| 
 | 
   750             direction.append("_")
 | 
| 
 | 
   751             annot.append("_")
 | 
| 
 | 
   752         annot.append("*")
 | 
| 
 | 
   753         for an_space in range(feat.location.end - feat.location.start-2):
 | 
| 
 | 
   754             annot.append("_")
 | 
| 
 | 
   755         distance = feat.location.end
 | 
| 
 | 
   756         annot.append("/")
 | 
| 
 | 
   757     for space in range(distance, len(original)):
 | 
| 
 | 
   758         direction.append("_")
 | 
| 
 | 
   759         annot.append("_")
 | 
| 
 | 
   760     #print(annot)
 | 
| 
 | 
   761 
 | 
| 
 | 
   762     ################################################################
 | 
| 
 | 
   763     # CDS
 | 
| 
 | 
   764     ################################################################
 | 
| 
 | 
   765 
 | 
| 
 | 
   766     if ind_range == None:
 | 
| 
 | 
   767         ind_range = [0, len(original)]
 | 
| 
 | 
   768 
 | 
| 
 | 
   769     sequences = {}
 | 
| 
 | 
   770     sequences["original"] = filter(None, re.split(r'(\w{1})', original.seq[ind_range[0]:ind_range[1]]))
 | 
| 
 | 
   771     direction = ["CDS_Orientation"]
 | 
| 
 | 
   772     distance = 0
 | 
| 
 | 
   773 
 | 
| 
 | 
   774     alternating = 0
 | 
| 
 | 
   775 
 | 
| 
 | 
   776     for feat in [f for f in original.features if f.type.lower() in ["gene", "cds"]]:
 | 
| 
 | 
   777         for space in range(distance, feat.location.start):
 | 
| 
 | 
   778             direction.append("_")
 | 
| 
 | 
   779         if feat.type.lower() in ["gene", "cds"]:
 | 
| 
 | 
   780             for counter in range(feat.location.start, feat.location.end, 3):
 | 
| 
 | 
   781                 if alternating % 2 == 1:
 | 
| 
 | 
   782                     sequences["original"][counter] = 'f' + sequences["original"][counter]#'<font size=44>' + sequences["original"][counter] + '</font>'
 | 
| 
 | 
   783                     sequences["original"][counter+1] = 'f' + sequences["original"][counter+1]
 | 
| 
 | 
   784                     sequences["original"][counter+2] = 'f' + sequences["original"][counter+2]
 | 
| 
 | 
   785                     alternating += 1
 | 
| 
 | 
   786 
 | 
| 
 | 
   787                     if feat.strand == +1:
 | 
| 
 | 
   788                         direction.append("-")
 | 
| 
 | 
   789                         direction.append("-")
 | 
| 
 | 
   790                         direction.append(">")
 | 
| 
 | 
   791                     if feat.strand == -1:
 | 
| 
 | 
   792                         direction.append("<")
 | 
| 
 | 
   793                         direction.append("-")
 | 
| 
 | 
   794                         direction.append("-")
 | 
| 
 | 
   795 
 | 
| 
 | 
   796                 else:
 | 
| 
 | 
   797                     sequences["original"][counter] = 's' + sequences["original"][counter]
 | 
| 
 | 
   798                     sequences["original"][counter+1] = 's' + sequences["original"][counter+1]
 | 
| 
 | 
   799                     sequences["original"][counter+2] = 's' + sequences["original"][counter+2]
 | 
| 
 | 
   800                     alternating += 1
 | 
| 
 | 
   801 
 | 
| 
 | 
   802                     if feat.strand == +1:
 | 
| 
 | 
   803                         direction.append("-")
 | 
| 
 | 
   804                         direction.append("-")
 | 
| 
 | 
   805                         direction.append(">")
 | 
| 
 | 
   806                     if feat.strand == -1:
 | 
| 
 | 
   807                         direction.append("<")
 | 
| 
 | 
   808                         direction.append("-")
 | 
| 
 | 
   809                         direction.append("-")
 | 
| 
 | 
   810         distance = feat.location.end
 | 
| 
 | 
   811     for space in range(distance, len(original)):
 | 
| 
 | 
   812         direction.append("_")
 | 
| 
 | 
   813 
 | 
| 
 | 
   814     #print(direction); print(len(direction))
 | 
| 
 | 
   815     ################################################################
 | 
| 
 | 
   816     # Plasmids_ids
 | 
| 
 | 
   817     ################################################################
 | 
| 
 | 
   818     f = 0
 | 
| 
 | 
   819     new_plasmids = []
 | 
| 
 | 
   820     for s in others:
 | 
| 
 | 
   821         new_plasmids.append([s] + filter(None, re.split(r'(\w{1})', tot[s]["sequence"][ind_range[0]:ind_range[1]])))
 | 
| 
 | 
   822         for k in range(len(original)):
 | 
| 
 | 
   823             if k in annotation_information[s]["modified_positions"]:
 | 
| 
 | 
   824                 new_plasmids[f][k+1] += "m"
 | 
| 
 | 
   825         f += 1
 | 
| 
 | 
   826 
 | 
| 
 | 
   827     #print(new_plasmids)
 | 
| 
 | 
   828 
 | 
| 
 | 
   829     ################################################################
 | 
| 
 | 
   830     # Index
 | 
| 
 | 
   831     ################################################################
 | 
| 
 | 
   832 
 | 
| 
 | 
   833     index = ["Index"] + [str(i) for i in range(ind_range[0], ind_range[1])]
 | 
| 
 | 
   834 
 | 
| 
 | 
   835     ################################################################
 | 
| 
 | 
   836     # Create the pdf file
 | 
| 
 | 
   837     ################################################################
 | 
| 
 | 
   838 
 | 
| 
 | 
   839     #print(target_lists); print(len(target_lists[0]))
 | 
| 
 | 
   840     #print(target_positions); print(len(target_positions))
 | 
| 
 | 
   841     #print(annot); print(len(annot))
 | 
| 
 | 
   842     #print(direction); print(len(direction))
 | 
| 
 | 
   843     #print(new_plasmids); print(len(new_plasmids[0]))
 | 
| 
 | 
   844     #print(index)
 | 
| 
 | 
   845 
 | 
| 
 | 
   846     #colors = [('BACKGROUND',(0,0),(0,0),colors.palegreen),
 | 
| 
 | 
   847     #                           ('BACKGROUND',(1,1),(1,1),colors.palegreen),
 | 
| 
 | 
   848     #                           ('BACKGROUND',(2,2),(3,2),colors.palegreen)]
 | 
| 
 | 
   849 
 | 
| 
 | 
   850     data = {0:target_lists,
 | 
| 
 | 
   851             1:target_positions,
 | 
| 
 | 
   852             2:annot,
 | 
| 
 | 
   853             3:direction,
 | 
| 
 | 
   854             4:["Original"] + sequences["original"],
 | 
| 
 | 
   855             5:new_plasmids,
 | 
| 
 | 
   856             6:index}
 | 
| 
 | 
   857 
 | 
| 
 | 
   858     doc = SimpleDocTemplate("comparison_syngenic_plasmids.pdf",pagesize=letter,
 | 
| 
 | 
   859                         rightMargin=30,leftMargin=30,
 | 
| 
 | 
   860                         topMargin=30,bottomMargin=30)
 | 
| 
 | 
   861 
 | 
| 
 | 
   862     elements = []
 | 
| 
 | 
   863     #max_row = 18
 | 
| 
 | 
   864     blocks = {}
 | 
| 
 | 
   865 
 | 
| 
 | 
   866     if len(range(max_row, len(original.seq)+1, max_row)) % max_row == 0:
 | 
| 
 | 
   867         n_blocks = len(range(max_row, len(original.seq)+1, max_row))
 | 
| 
 | 
   868     else:
 | 
| 
 | 
   869         n_blocks = len(range(max_row, len(original.seq)+1, max_row)) + 1
 | 
| 
 | 
   870 
 | 
| 
 | 
   871     j = 0
 | 
| 
 | 
   872 
 | 
| 
 | 
   873     for i in range(n_blocks):
 | 
| 
 | 
   874         blocks[i] = []
 | 
| 
 | 
   875         for l in range(7):
 | 
| 
 | 
   876             if l in [0, 5]:
 | 
| 
 | 
   877                 for el in data[l]:
 | 
| 
 | 
   878                     if len(el[j:]) > max_row:
 | 
| 
 | 
   879                         if i >= 1:
 | 
| 
 | 
   880                             blocks[i].append([el[0]] + el[j:j+max_row])
 | 
| 
 | 
   881                         else:
 | 
| 
 | 
   882                             blocks[i].append(el[j:j+max_row])
 | 
| 
 | 
   883                     else:
 | 
| 
 | 
   884                         blocks[i].append([el[0]] + el[j:])
 | 
| 
 | 
   885             else:
 | 
| 
 | 
   886                 if len(data[l][j:]) > max_row:
 | 
| 
 | 
   887                     if i >= 1:
 | 
| 
 | 
   888                         blocks[i].append([data[l][0]] + data[l][j:j+max_row])
 | 
| 
 | 
   889                     else:
 | 
| 
 | 
   890                         blocks[i].append(data[l][j:j+max_row])
 | 
| 
 | 
   891                 else:
 | 
| 
 | 
   892                     blocks[i].append([data[l][0]] + data[l][j:])
 | 
| 
 | 
   893         j += max_row
 | 
| 
 | 
   894         #print("\n")
 | 
| 
 | 
   895         #print(blocks[i])
 | 
| 
 | 
   896 
 | 
| 
 | 
   897         elements.append(Table(blocks[i], hAlign='LEFT'))#,
 | 
| 
 | 
   898                         #style=[('BACKGROUND',(0,0),(0,0),colors.palegreen),
 | 
| 
 | 
   899                         #       ('BACKGROUND',(1,1),(1,1),colors.palegreen),
 | 
| 
 | 
   900                         #       ('TEXTCOLOR',(2,2),(3,2),colors.palegreen),
 | 
| 
 | 
   901                         #       ('BOX',(0,0),(0,0),2,colors.red)]))
 | 
| 
 | 
   902         elements.append(Table([["", "", "", "", ""]]))
 | 
| 
 | 
   903 
 | 
| 
 | 
   904     doc.build(elements)
 | 
| 
 | 
   905 
 | 
| 
 | 
   906 
 | 
| 
 | 
   907     #new_doc = SimpleDocTemplate("further_information.pdf",pagesize=letter,
 | 
| 
 | 
   908     #                            rightMargin=30,leftMargin=30,
 | 
| 
 | 
   909     #                            topMargin=30,bottomMargin=30)
 | 
| 
 | 
   910     #new_elements = []
 | 
| 
 | 
   911 
 | 
| 
 | 
   912     #new_elements.append([f for f in original.features if f.type.lower() in ["gene", "cds"]])
 | 
| 
 | 
   913     #new_elements.append(f_patterns)
 | 
| 
 | 
   914 
 | 
| 
 | 
   915     #doc.build(new_elements)
 | 
| 
 | 
   916 
 | 
| 
 | 
   917     c = canvas.Canvas("./further_information.pdf")
 | 
| 
 | 
   918     c.drawString(100,750,"CDS regions:")
 | 
| 
 | 
   919     upper_bound = 750
 | 
| 
 | 
   920     for feat in original.features:
 | 
| 
 | 
   921         if feat.type.lower() in ["gene", "cds"]:
 | 
| 
 | 
   922             upper_bound -= 15
 | 
| 
 | 
   923             if feat.location.strand == -1:
 | 
| 
 | 
   924                 sign = "-"
 | 
| 
 | 
   925             else:
 | 
| 
 | 
   926                 sign = "+"
 | 
| 
 | 
   927             c.drawString(115,upper_bound, str("[") + str(feat.location.start)+ ":" + str(feat.location.end) + "]" + "(" + sign + ")")
 | 
| 
 | 
   928     upper_bound -= 30
 | 
| 
 | 
   929     c.drawString(100,upper_bound,"Patterns and the corresponding targets on the plasmid sequence:")
 | 
| 
 | 
   930     for f_pattern in f_patterns.keys():
 | 
| 
 | 
   931         upper_bound -= 15
 | 
| 
 | 
   932         c.drawString(115,upper_bound,f_pattern + ":")
 | 
| 
 | 
   933         for val in f_patterns[f_pattern]:
 | 
| 
 | 
   934             upper_bound -= 15
 | 
| 
 | 
   935             c.drawString(130,upper_bound,str(val))
 | 
| 
 | 
   936         upper_bound -= 5
 | 
| 
 | 
   937 
 | 
| 
 | 
   938     upper_bound -= 30
 | 
| 
 | 
   939     c.drawString(100,upper_bound,"Identifiers of the targets found in the plasmid sequence:")
 | 
| 
 | 
   940     for target in targets.keys():
 | 
| 
 | 
   941         upper_bound -= 15
 | 
| 
 | 
   942         c.drawString(115,upper_bound,target + ": " + targets[target])
 | 
| 
 | 
   943 
 | 
| 
 | 
   944     c.save()
 | 
| 
 | 
   945 
 | 
| 
 | 
   946 
 | 
| 
 | 
   947     return
 | 
| 
 | 
   948 
 | 
| 
 | 
   949 
 | 
| 
 | 
   950 def produce_random_targets(sequence):
 | 
| 
 | 
   951 
 | 
| 
 | 
   952     # Produce a target on two continous CDS
 | 
| 
 | 
   953     # Produce a target in a non-coding region
 | 
| 
 | 
   954     # Produce a target in coding region
 | 
| 
 | 
   955     # Produce a target on a overlapping left
 | 
| 
 | 
   956     # Produce a target on a overlapping right
 | 
| 
 | 
   957 
 | 
| 
 | 
   958 
 | 
| 
 | 
   959 
 | 
| 
 | 
   960     return
 |