diff CodonSwitchTool/functions.py @ 2:aad5e435e4dc draft default tip

Uploaded
author gianmarco_piccinno
date Tue, 21 May 2019 05:24:56 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/CodonSwitchTool/functions.py	Tue May 21 05:24:56 2019 -0400
@@ -0,0 +1,960 @@
+import string
+from syngenic import *
+from Bio.Seq import Seq
+from Bio.SeqFeature import SeqFeature, FeatureLocation
+from pprint import pprint
+
+from itertools import izip
+
+import numpy as np
+import pandas as pd
+
+def all_patterns(input_ = []):
+
+    patts = []
+    n_patts = []
+
+    for patt in input_:
+        tmp_patt = patt#Seq(patt.rstrip(), IUPAC.ambiguous_dna)
+        tmp_revc = tmp_patt.reverse_complement()
+
+        patts.append(str(tmp_patt))
+        patts.append(str(tmp_revc))
+
+        n_patts.append(pattern(tmp_patt).plan_ambiguity())
+        n_patts.append(pattern(tmp_revc).plan_ambiguity())
+
+
+    return patts, n_patts
+
+def fake_from_real(path = None, id_ = None, name = None):
+
+    plasmid_seq = SeqIO.read(open(path, "r"), "genbank")
+
+    f_p = plasmid_seq.seq[:10]
+    f_CDS = []
+    for f in plasmid_seq.features:
+        if f.type == "CDS":
+            tmp_start = len(f_p)
+            tmp_cds = plasmid_seq[f.location.start:f.location.start+9] + plasmid_seq[f.location.end-9:f.location.end]
+            tmp_end = tmp_start + len(tmp_cds)
+            f_p += tmp_cds
+            f_CDS.append(SeqFeature(FeatureLocation(tmp_start, tmp_end), type="gene", strand=f.location.strand))
+            #f_p += plasmid_seq.seq[tmp_end:tmp_end+5]
+    f_p += plasmid_seq.seq[-10:]
+
+    for feat in f_CDS:
+        f_p.features.append(feat)
+    f_p.id = id_
+    f_p.name = name
+
+    #feature_seq_0 = f_CDS[0].extract(f_p)
+
+    return f_p
+
+def punctuate_targets(f_patts, n_pl):
+
+    n_poss = {}
+    max_len = len(n_pl)
+    for key in f_patts.keys():
+        for el in f_patts[key]:
+            if not el[2] < el[1]:
+                tmp = range(el[1], el[2])
+                for i in range(len(tmp)):
+                    if not tmp[i] in n_poss.keys():
+                        n_poss[tmp[i]] = [key[i]]
+                    else:
+                        n_poss[tmp[i]].append(key[i])
+            else:
+                tmp = range(el[1], max_len) + range(0, el[2])
+                for i in range(len(tmp)):
+                    if not tmp[i] in n_poss.keys():
+                        n_poss[tmp[i]] = [key[i]]
+                    else:
+                        n_poss[tmp[i]].append(key[i])
+
+    for key in n_poss.keys():
+        n_poss[key] = set(n_poss[key])
+
+    #print(n_poss)
+
+    return n_poss
+
+
+def print_seq(n_pl, ind_range = None):
+
+    if ind_range == None:
+
+        data = filter(None, re.split(r'(\w{1})', n_pl))
+        index = range(len(n_pl))
+
+        seq = []
+        ind = []
+
+        j = 0
+
+        seq.append("")
+        ind.append("")
+
+        for i in range(len(data)):
+
+            if (i % 9 == 0) & (i > 0):
+                j += 1
+                seq.append("")
+                ind.append("")
+                print("\n")
+                print(seq[j-1])
+                print(ind[j-1])
+
+
+            seq[j] += " "
+            ind[j] += " "
+            for n in range(len(str(index[i]))-1):
+                seq[j] += " "
+            seq[j] += data[i]
+            ind[j] += str(index[i])
+        print("\n")
+        print(seq[j])
+        print(ind[j])
+    else:
+        data = filter(None, re.split(r'(\w{1})', n_pl[ind_range[0]:ind_range[1]]))
+        index = range(ind_range[0], ind_range[1])
+
+        seq = []
+        ind = []
+
+        j = 0
+
+        seq.append("")
+        ind.append("")
+
+        for i in range(len(data)):
+
+            if (i % 9 == 0) & (i > 0):
+                j += 1
+                seq.append("")
+                ind.append("")
+                print("\n")
+                print(seq[j-1])
+                print(ind[j-1])
+
+
+            seq[j] += " "
+            ind[j] += " "
+            for n in range(len(str(index[i]))-1):
+                seq[j] += " "
+            seq[j] += data[i]
+            ind[j] += str(index[i])
+
+        print("\n")
+        print(seq[j])
+        print(ind[j])
+
+
+
+    return None
+
+
+def generalization(n_poss, n_pl, synonims_tables, reduced=False):
+
+
+    transversions = {"A": "[AT]",
+                     "T": "[TA]",
+                     "C": "[CG]",
+                     "G": "[GC]"}
+
+    count_codon_switch = 0
+    count_transversion = 0
+
+    new_poss = {}
+
+    for pos in n_poss.keys():
+        in_cds = False
+        for feat in n_pl.features:
+            if ((pos >= feat.location.start) & (pos < feat.location.end)) & (feat.type in ["CDS", "gene"]):
+                in_cds = True
+                count_codon_switch += 1
+                tmp_count_transversion = 0
+                #print("\n")
+                #print("operate codon switch " + str(count_codon_switch))
+                #
+                #print("Real position: " + str(pos))
+                #print(n_poss[pos])
+                #print(feat.location)
+                #print(pos - feat.location.start)
+                #print((pos - feat.location.start)%3)
+
+
+                if ((pos - feat.location.start) % 3 == 0) & (n_poss[pos] != {"N"}):
+                    # first basis of a codon
+                    #print("first basis of a codon")
+                    #print(n_pl.seq[pos:pos+3])
+
+                    tmp_codon = n_pl.seq[pos:pos+3]
+                    bases = []
+                    if feat.strand == +1:
+                        # check the codon table
+                        for codon in synonims_tables["synonims"][tmp_codon]:
+                            bases.append(codon[0])
+                    elif feat.strand == -1:
+                        # check the anticodon table
+                        for codon in synonims_tables["anti_synonims"][tmp_codon]:
+                            bases.append(codon[0])
+                    if len(set(bases)) > 1:
+                        new_poss[pos] = "[" + "".join(list(set(bases))) + "]"
+
+
+                elif ((pos - feat.location.start) % 3 == 1) & (n_poss[pos] != {"N"}):
+                    # second basis of a codon
+                    #print("second basis of a codon")
+                    #print(n_pl.seq[pos-1:pos+2])
+
+                    tmp_codon = n_pl.seq[pos-1:pos+2]
+
+                    bases = []
+                    if feat.strand == +1:
+                        # check the codon table
+                        for codon in synonims_tables["synonims"][tmp_codon]:
+                            bases.append(codon[1])
+                    elif feat.strand == -1:
+                        # check the anticodon table
+                        for codon in synonims_tables["anti_synonims"][tmp_codon]:
+                            bases.append(codon[1])
+                    if len(set(bases)) > 1:
+                        new_poss[pos] = "[" + "".join(list(set(bases))) + "]"
+
+                elif ((pos - feat.location.start) % 3 == 2) & (n_poss[pos] != {"N"}):
+                    # third basis of a codon
+                    #print("third basis of a codon")
+                    #print(n_pl.seq[pos-2:pos+1])
+
+                    tmp_codon = n_pl.seq[pos-2:pos+1]
+
+                    bases = []
+                    if feat.strand == +1:
+                        # check the codon table
+                        for codon in synonims_tables["synonims"][tmp_codon]:
+                            bases.append(codon[2])
+                    elif feat.strand == -1:
+                        # check the anticodon table
+                        for codon in synonims_tables["anti_synonims"][tmp_codon]:
+                            bases.append(codon[2])
+                    if len(set(bases)) > 1:
+                        new_poss[pos] = "[" + "".join(list(set(bases))) + "]"
+
+                tmp = n_pl.extract(feat)
+                #print_seq(tmp, ind_range = [feat.location.start,feat.location.start])
+
+        if (in_cds == False) & (set.intersection(n_poss[pos], {"A", "T", "C", "G"}) != set()):
+            # (set.union(n_poss[pos], {"A", "T", "C", "G"}) != {})
+            # set.union(n_poss[pos], {"A", "T", "C", "G"}) != {}
+            # n_poss[pos] != {"N"}
+
+            if reduced == False:
+
+                count_transversion += 1
+                #print("operate transversion " + str(count_transversion))
+
+                new_poss[pos] = transversions[set.difference(n_poss[pos], {"N"}).pop()]
+
+            else:
+
+                count_transversion += 1
+                #print("operate transversion " + str(count_transversion))
+
+                new_poss[pos] = transversions[set.difference(n_poss[pos], {"N"}).pop()]
+
+                #if tmp_count_transversion == 0:
+
+                #    count_transversion += 1
+                #    tmp_count_transversion += 1
+                #    print("operate transversion " + str(count_transversion))
+                #
+                #    new_poss[pos] = transversions[set.difference(n_poss[pos], {"N"}).pop()]
+
+        #print(new_poss)
+
+    n_seq = filter(None, re.split(r'(\w{1})', str(n_pl.seq)))
+    n_ind = range(len(n_seq))
+
+    new_obj = {}
+
+    for pos in n_ind:
+        if pos in new_poss.keys():
+            new_obj[pos] = new_poss[pos]
+        else:
+            new_obj[pos] = n_seq[pos]
+
+    #pprint(new_obj)
+
+
+    new_plasmid_generalized = ""
+
+
+    for pos in n_ind:
+        new_plasmid_generalized += new_obj[pos]
+
+    #print(new_plasmid_generalized)
+    #print(len(new_plasmid_generalized))
+
+    t = sre_yield.AllStrings(new_plasmid_generalized)
+
+    #print(len(t))
+
+
+
+    return t
+
+
+def evaluate_plasmids(plasmids = None,
+                      original_plasmid = None,
+                      codon_usage_table = None,
+                      n_patts = None,
+                      f_patts = None):
+
+    from syngenic import plasmid
+    from Bio.Seq import Seq
+    from Bio.SeqFeature import SeqFeature, FeatureLocation
+    from itertools import izip
+    import numpy as np
+
+    useful = {}
+
+    i = 0
+
+    for tmp_pl in plasmids:
+
+        if tmp_pl != original_plasmid.seq:
+
+            identical_proteic_sequence = []
+
+            for feat in original_plasmid.features:
+                if feat.type.lower() in ["gene", "cds"]:
+                    identical_proteic_sequence.append(Seq(plasmid(tmp_pl).extract(feat)).translate() == Seq(original_plasmid.extract(feat)).translate())
+            identical_proteic_sequence = all(identical_proteic_sequence)
+            if (identical_proteic_sequence == True) & (set([True if el ==[] else False for el in plasmid(tmp_pl).findpatterns(n_patts, f_patts).values()]) == {True}):
+                print("\t" + str(i) + "/" + str(len(plasmids)))
+                #print(tmp_pl)
+                tmp = [j for j,(a1,a2)  in enumerate(izip(tmp_pl,original_plasmid)) if a1!=a2]
+                #print(tmp)
+                useful["Plasmid_" + str(i)] = {}
+                useful["Plasmid_" + str(i)]["modified_positions"] = tmp
+                useful["Plasmid_" + str(i)]["codon_usage"] = []
+                useful["Plasmid_" + str(i)]["number_of_modification"] = len(tmp)
+                useful["Plasmid_" + str(i)]["sequence"] = tmp_pl
+                for modified_position in tmp:
+                    in_cds = False
+                    for feat in original_plasmid.features:
+                        if feat.type.lower() in ["gene", "cds"]:
+                            if ((modified_position >= feat.location.start) & (modified_position < feat.location.end)) & (feat.type in ["CDS", "gene"]):
+                                in_cds = True
+                                if (modified_position - feat.location.start) % 3 == 0:
+                                    # first basis of a codon
+                                    if feat.strand == +1:
+                                        tmp_codon = tmp_pl[modified_position:modified_position+3]
+                                    else:
+                                        tmp_codon = str(Seq(tmp_pl[modified_position:modified_position+3]).reverse_complement())
+                                    useful["Plasmid_" + str(i)]["codon_usage"].append(codon_usage_table.loc[tmp_codon]["Proportion"])
+                                elif (modified_position - feat.location.start) % 3 == 1:
+                                    # second basis of a codon
+                                    if feat.strand == +1:
+                                        tmp_codon = tmp_pl[modified_position-1:modified_position+2]
+                                    else:
+                                        tmp_codon = str(Seq(tmp_pl[modified_position-1:modified_position+2]).reverse_complement())
+                                    useful["Plasmid_" + str(i)]["codon_usage"].append(codon_usage_table.loc[tmp_codon]["Proportion"])
+                                elif (modified_position - feat.location.start) % 3 == 2:
+                                    # third basis of a codon
+                                    if feat.strand == +1:
+                                        tmp_codon = original_plasmid.seq[modified_position-2:modified_position+1]
+                                    else:
+                                        tmp_codon = str(Seq(tmp_pl[modified_position-2:modified_position+1]).reverse_complement())
+                                    useful["Plasmid_" + str(i)]["codon_usage"].append(codon_usage_table.loc[tmp_codon]["Proportion"])
+
+                useful["Plasmid_" + str(i)]["mean_codon_usage"] = np.mean(useful["Plasmid_" + str(i)]["codon_usage"])
+                useful["Plasmid_" + str(i)]["std_codon_usage"] = np.std(useful["Plasmid_" + str(i)]["codon_usage"])
+            else:
+                next
+
+        i += 1
+
+    useful["original_plasmids"] = original_plasmid
+
+    return useful
+
+
+
+def rank_plasmids(original_useful_plasmids = None):
+
+    # Rank according to codon usage and less number of modifications introduced
+
+    tmp_useful_plasmids = {}
+
+    #print(len(original_useful_plasmids.keys()))
+    tmp_keys = list(set.difference(set(original_useful_plasmids.keys()), {"original_plasmids"}))
+    #print(len(tmp_keys))
+    for key in tmp_keys:
+        #print(key)
+        #print(original_useful_plasmids[key])
+        tmp_useful_plasmids[key] = {"mean_codon_usage":original_useful_plasmids[key]["mean_codon_usage"],
+                                "std_codon_usage":original_useful_plasmids[key]["std_codon_usage"],
+                                "number_of_modification":original_useful_plasmids[key]["number_of_modification"]}
+
+    dat_plasmids = pd.DataFrame(tmp_useful_plasmids).T
+
+    dat_plasmids.shape
+
+    dat_plasmids.head()
+
+    dat_plasmids.sort_values(['mean_codon_usage', 'std_codon_usage', 'number_of_modification'], ascending=[False, True, True])
+
+    dat_plasmids.index
+
+    return dat_plasmids
+    #return tmp_useful_plasmids
+
+
+def print_color_seq(original = None,
+                 others = None,
+                 annotation_information = None,
+                 tot = None,
+                 ind_range = None,
+                 patterns = None,
+                 f_patterns = None,
+                 patts = None,
+                 max_row = 18):
+
+    """
+
+    original = plasmids["original_plasmid"],
+    others = def_pls,
+    annotation_information = useful_plasmids,
+    tot = plasmids,
+    ind_range = None
+
+    """
+
+    ################################################################
+    # Single Targets
+    ################################################################
+
+    targets = {}
+
+    t_keys = f_patterns.keys()
+
+    for l in range(len(t_keys)):
+        if f_patterns[t_keys[l]] != []:
+            targets["Target" + str(l)] = t_keys[l]
+
+    #print(targets)
+    #print("\n")
+    tars = {}
+
+    for tar in targets.keys():
+        #print(tar)
+        tars[tar] = ["|" for i in range(len(original.seq))]
+
+        for tar1 in f_patterns[targets[tar]]:
+            #print(tar1)
+            if tar1[1] < tar1[2]:
+                for l in range(tar1[1], tar1[2]):
+                    tars[tar][l] = tar1[0][l-tar1[1]]
+            else:
+                for l in range(tar1[1], len(original.seq)):
+                    tars[tar][l] = tar1[0][l-tar1[1]]
+                for l in range(tar1[2]):
+                    tars[tar][l] = tar1[0][-tar1[2]:][l]
+
+    #print(tars)
+    kkk = tars.keys()
+    kkk.sort()
+    target_lists = [[key]+tars[key] for key in kkk]
+    #print(target_lists); print(len(target_lists[0]))
+
+
+    ################################################################
+    # Aggregate Targets
+    ################################################################
+    target_positions = ["TargetPositions"]
+    for k in range(len(original)):
+        if k in patterns.keys():
+            if len(patterns[k]) > 1:
+                target_positions += "+"#"T"
+            else:
+                target_positions += "T"
+        else:
+            target_positions += " "
+    #print(target_positions); print(len(target_positions))
+    ################################################################
+    # Annotation
+    ################################################################
+    direction = []
+    annot = ["Annotation"]
+
+    distance = 0
+    for feat in [f for f in original.features if f.type.lower() in ["gene", "cds"]]:
+        for space in range(distance, feat.location.start):
+            direction.append("_")
+            annot.append("_")
+        annot.append("*")
+        for an_space in range(feat.location.end - feat.location.start-2):
+            annot.append("_")
+        distance = feat.location.end
+        annot.append("/")
+    for space in range(distance, len(original)):
+        direction.append("_")
+        annot.append("_")
+    #print(annot)
+
+    ################################################################
+    # CDS
+    ################################################################
+
+    if ind_range == None:
+        ind_range = [0, len(original)]
+
+    sequences = {}
+    sequences["original"] = filter(None, re.split(r'(\w{1})', original.seq[ind_range[0]:ind_range[1]]))
+    direction = ["CDS_Orientation"]
+    distance = 0
+
+    alternating = 0
+
+    for feat in [f for f in original.features if f.type.lower() in ["gene", "cds"]]:
+        for space in range(distance, feat.location.start):
+            direction.append("_")
+        if feat.type.lower() in ["gene", "cds"]:
+            for counter in range(feat.location.start, feat.location.end, 3):
+                if alternating % 2 == 1:
+                    sequences["original"][counter] = "\033[1;31;40m" + sequences["original"][counter] + "\033[0m"
+                    sequences["original"][counter+1] = "\033[1;31;40m" + sequences["original"][counter+1] + "\033[0m"
+                    sequences["original"][counter+2] = "\033[1;31;40m" + sequences["original"][counter+2] + "\033[0m"
+                    alternating += 1
+
+                    if feat.strand == +1:
+                        direction.append("-")
+                        direction.append("-")
+                        direction.append(">")
+                    if feat.strand == -1:
+                        direction.append("<")
+                        direction.append("-")
+                        direction.append("-")
+
+                else:
+                    sequences["original"][counter] = "\033[1;32;40m" + sequences["original"][counter] + "\033[0m"
+                    sequences["original"][counter+1] = "\033[1;32;40m" + sequences["original"][counter+1] + "\033[0m"
+                    sequences["original"][counter+2] = "\033[1;32;40m" + sequences["original"][counter+2] + "\033[0m"
+                    alternating += 1
+
+                    if feat.strand == +1:
+                        direction.append("-")
+                        direction.append("-")
+                        direction.append(">")
+                    if feat.strand == -1:
+                        direction.append("<")
+                        direction.append("-")
+                        direction.append("-")
+        distance = feat.location.end
+    for space in range(distance, len(original)):
+        direction.append("_")
+
+    #print(direction); print(len(direction))
+    ################################################################
+    # Plasmids_ids
+    ################################################################
+    f = 0
+    new_plasmids = []
+    for s in others:
+        new_plasmids.append([s] + filter(None, re.split(r'(\w{1})', tot[s]["sequence"][ind_range[0]:ind_range[1]])))
+        for k in range(len(original)):
+            if k in annotation_information[s]["modified_positions"]:
+                new_plasmids[f][k+1] = "\033[1;32;40m" + new_plasmids[f][k+1] + "\033[0m"
+        f += 1
+
+    #print(new_plasmids)
+
+    ################################################################
+    # Index
+    ################################################################
+
+    index = ["Index"] + [str(i) for i in range(ind_range[0], ind_range[1])]
+
+    ################################################################
+    # Create the pdf file
+    ################################################################
+
+    #print(target_lists); print(len(target_lists[0]))
+    #print(target_positions); print(len(target_positions))
+    #print(annot); print(len(annot))
+    #print(direction); print(len(direction))
+    #print(new_plasmids); print(len(new_plasmids[0]))
+    #print(index)
+
+    data = {0:target_lists,
+            1:target_positions,
+            2:annot,
+            3:direction,
+            4:["Original"] + sequences["original"],
+            5:new_plasmids,
+            6:index}
+
+    elements = []
+    #max_row = 18
+    blocks = {}
+
+    if len(range(max_row, len(original.seq)+1, max_row)) % max_row == 0:
+        n_blocks = len(range(max_row, len(original.seq)+1, max_row))
+    else:
+        n_blocks = len(range(max_row, len(original.seq)+1, max_row)) + 1
+
+    j = 0
+
+    for i in range(n_blocks):
+        blocks[i] = []
+        for l in range(7):
+            if l in [0, 5]:
+                for el in data[l]:
+                    if len(el[j:]) > max_row:
+                        if i >= 1:
+                            blocks[i].append([el[0]] + el[j:j+max_row])
+                        else:
+                            blocks[i].append(el[j:j+max_row])
+                    else:
+                        blocks[i].append([el[0]] + el[j:])
+            else:
+                if len(data[l][j:]) > max_row:
+                    if i >= 1:
+                        blocks[i].append([data[l][0]] + data[l][j:j+max_row])
+                    else:
+                        blocks[i].append(data[l][j:j+max_row])
+                else:
+                    blocks[i].append([data[l][0]] + data[l][j:])
+        j += max_row
+        print("\n")
+        #print(blocks[i])
+
+        fff = []
+        for f in range(len(blocks[i])):
+            fff.append(len(blocks[i][f][0]))
+        fff = max(fff)
+        for f in range(len(blocks[i])):
+            for r in range(fff-len(blocks[i][f][0])):
+                blocks[i][f][0] += " "
+            if f < len(blocks[i])-1:
+                for l in range(1,len(blocks[i][f])):
+                    tmp = ""
+                    #print(blocks[i][-1][l])
+                    if l < len(blocks[i][-1]):
+                        for g in range(len(str(blocks[i][-1][l]))):
+                            #print(g)
+                            tmp += " "
+                    blocks[i][f][l] = tmp + blocks[i][f][l]
+                    #print(blocks[i][f][l])
+                blocks[i][f] = " ".join(blocks[i][f])
+            else:
+                blocks[i][f] = "  ".join(blocks[i][f])
+            print(blocks[i][f])
+        #print(" ".join(blocks[i][-1]))
+
+    print("\n")
+    print([f for f in original.features if f.type.lower() in ["gene", "cds"]])
+    print("\n")
+    print(f_patterns)
+
+    return
+
+def print_to_pdf(original = None,
+                 others = None,
+                 annotation_information = None,
+                 tot = None,
+                 ind_range = None,
+                 patterns = None,
+                 f_patterns = None,
+                 patts = None,
+                 max_row = 9):
+
+    """
+
+    original = plasmids["original_plasmid"],
+    others = def_pls,
+    annotation_information = useful_plasmids,
+    tot = plasmids,
+    ind_range = None
+
+    """
+
+    from reportlab.lib import colors
+    from reportlab.lib.pagesizes import letter
+    from reportlab.platypus import SimpleDocTemplate, Table, TableStyle
+    from reportlab.pdfgen import canvas
+
+    ################################################################
+    # Single Targets
+    ################################################################
+
+    targets = {}
+
+    t_keys = f_patterns.keys()
+
+    for l in range(len(t_keys)):
+        if f_patterns[t_keys[l]] != []:
+            targets["Target" + str(l)] = t_keys[l]
+
+    #print(targets)
+    #print("\n")
+    tars = {}
+
+    for tar in targets.keys():
+        #print(tar)
+        tars[tar] = ["|" for i in range(len(original.seq))]
+
+        for tar1 in f_patterns[targets[tar]]:
+            #print(tar1)
+            if tar1[1] < tar1[2]:
+                for l in range(tar1[1], tar1[2]):
+                    tars[tar][l] = tar1[0][l-tar1[1]]
+            else:
+                for l in range(tar1[1], len(original.seq)):
+                    tars[tar][l] = tar1[0][l-tar1[1]]
+                for l in range(tar1[2]):
+                    tars[tar][l] = tar1[0][-tar1[2]:][l]
+
+    #print(tars)
+    kkk = tars.keys()
+    kkk.sort()
+    target_lists = [[key]+tars[key] for key in kkk]
+    #print(target_lists); print(len(target_lists[0]))
+
+
+    ################################################################
+    # Aggregate Targets
+    ################################################################
+    target_positions = ["TargetPositions"]
+    for k in range(len(original)):
+        if k in patterns.keys():
+            if len(patterns[k]) > 1:
+                target_positions += "+"#"T"
+            else:
+                target_positions += "T"
+        else:
+            target_positions += " "
+    #print(target_positions); print(len(target_positions))
+    ################################################################
+    # Annotation
+    ################################################################
+    direction = []
+    annot = ["Annotation"]
+
+    distance = 0
+    for feat in [f for f in original.features if f.type.lower() in ["gene", "cds"]]:
+        for space in range(distance, feat.location.start):
+            direction.append("_")
+            annot.append("_")
+        annot.append("*")
+        for an_space in range(feat.location.end - feat.location.start-2):
+            annot.append("_")
+        distance = feat.location.end
+        annot.append("/")
+    for space in range(distance, len(original)):
+        direction.append("_")
+        annot.append("_")
+    #print(annot)
+
+    ################################################################
+    # CDS
+    ################################################################
+
+    if ind_range == None:
+        ind_range = [0, len(original)]
+
+    sequences = {}
+    sequences["original"] = filter(None, re.split(r'(\w{1})', original.seq[ind_range[0]:ind_range[1]]))
+    direction = ["CDS_Orientation"]
+    distance = 0
+
+    alternating = 0
+
+    for feat in [f for f in original.features if f.type.lower() in ["gene", "cds"]]:
+        for space in range(distance, feat.location.start):
+            direction.append("_")
+        if feat.type.lower() in ["gene", "cds"]:
+            for counter in range(feat.location.start, feat.location.end, 3):
+                if alternating % 2 == 1:
+                    sequences["original"][counter] = 'f' + sequences["original"][counter]#'<font size=44>' + sequences["original"][counter] + '</font>'
+                    sequences["original"][counter+1] = 'f' + sequences["original"][counter+1]
+                    sequences["original"][counter+2] = 'f' + sequences["original"][counter+2]
+                    alternating += 1
+
+                    if feat.strand == +1:
+                        direction.append("-")
+                        direction.append("-")
+                        direction.append(">")
+                    if feat.strand == -1:
+                        direction.append("<")
+                        direction.append("-")
+                        direction.append("-")
+
+                else:
+                    sequences["original"][counter] = 's' + sequences["original"][counter]
+                    sequences["original"][counter+1] = 's' + sequences["original"][counter+1]
+                    sequences["original"][counter+2] = 's' + sequences["original"][counter+2]
+                    alternating += 1
+
+                    if feat.strand == +1:
+                        direction.append("-")
+                        direction.append("-")
+                        direction.append(">")
+                    if feat.strand == -1:
+                        direction.append("<")
+                        direction.append("-")
+                        direction.append("-")
+        distance = feat.location.end
+    for space in range(distance, len(original)):
+        direction.append("_")
+
+    #print(direction); print(len(direction))
+    ################################################################
+    # Plasmids_ids
+    ################################################################
+    f = 0
+    new_plasmids = []
+    for s in others:
+        new_plasmids.append([s] + filter(None, re.split(r'(\w{1})', tot[s]["sequence"][ind_range[0]:ind_range[1]])))
+        for k in range(len(original)):
+            if k in annotation_information[s]["modified_positions"]:
+                new_plasmids[f][k+1] += "m"
+        f += 1
+
+    #print(new_plasmids)
+
+    ################################################################
+    # Index
+    ################################################################
+
+    index = ["Index"] + [str(i) for i in range(ind_range[0], ind_range[1])]
+
+    ################################################################
+    # Create the pdf file
+    ################################################################
+
+    #print(target_lists); print(len(target_lists[0]))
+    #print(target_positions); print(len(target_positions))
+    #print(annot); print(len(annot))
+    #print(direction); print(len(direction))
+    #print(new_plasmids); print(len(new_plasmids[0]))
+    #print(index)
+
+    #colors = [('BACKGROUND',(0,0),(0,0),colors.palegreen),
+    #                           ('BACKGROUND',(1,1),(1,1),colors.palegreen),
+    #                           ('BACKGROUND',(2,2),(3,2),colors.palegreen)]
+
+    data = {0:target_lists,
+            1:target_positions,
+            2:annot,
+            3:direction,
+            4:["Original"] + sequences["original"],
+            5:new_plasmids,
+            6:index}
+
+    doc = SimpleDocTemplate("comparison_syngenic_plasmids.pdf",pagesize=letter,
+                        rightMargin=30,leftMargin=30,
+                        topMargin=30,bottomMargin=30)
+
+    elements = []
+    #max_row = 18
+    blocks = {}
+
+    if len(range(max_row, len(original.seq)+1, max_row)) % max_row == 0:
+        n_blocks = len(range(max_row, len(original.seq)+1, max_row))
+    else:
+        n_blocks = len(range(max_row, len(original.seq)+1, max_row)) + 1
+
+    j = 0
+
+    for i in range(n_blocks):
+        blocks[i] = []
+        for l in range(7):
+            if l in [0, 5]:
+                for el in data[l]:
+                    if len(el[j:]) > max_row:
+                        if i >= 1:
+                            blocks[i].append([el[0]] + el[j:j+max_row])
+                        else:
+                            blocks[i].append(el[j:j+max_row])
+                    else:
+                        blocks[i].append([el[0]] + el[j:])
+            else:
+                if len(data[l][j:]) > max_row:
+                    if i >= 1:
+                        blocks[i].append([data[l][0]] + data[l][j:j+max_row])
+                    else:
+                        blocks[i].append(data[l][j:j+max_row])
+                else:
+                    blocks[i].append([data[l][0]] + data[l][j:])
+        j += max_row
+        #print("\n")
+        #print(blocks[i])
+
+        elements.append(Table(blocks[i], hAlign='LEFT'))#,
+                        #style=[('BACKGROUND',(0,0),(0,0),colors.palegreen),
+                        #       ('BACKGROUND',(1,1),(1,1),colors.palegreen),
+                        #       ('TEXTCOLOR',(2,2),(3,2),colors.palegreen),
+                        #       ('BOX',(0,0),(0,0),2,colors.red)]))
+        elements.append(Table([["", "", "", "", ""]]))
+
+    doc.build(elements)
+
+
+    #new_doc = SimpleDocTemplate("further_information.pdf",pagesize=letter,
+    #                            rightMargin=30,leftMargin=30,
+    #                            topMargin=30,bottomMargin=30)
+    #new_elements = []
+
+    #new_elements.append([f for f in original.features if f.type.lower() in ["gene", "cds"]])
+    #new_elements.append(f_patterns)
+
+    #doc.build(new_elements)
+
+    c = canvas.Canvas("./further_information.pdf")
+    c.drawString(100,750,"CDS regions:")
+    upper_bound = 750
+    for feat in original.features:
+        if feat.type.lower() in ["gene", "cds"]:
+            upper_bound -= 15
+            if feat.location.strand == -1:
+                sign = "-"
+            else:
+                sign = "+"
+            c.drawString(115,upper_bound, str("[") + str(feat.location.start)+ ":" + str(feat.location.end) + "]" + "(" + sign + ")")
+    upper_bound -= 30
+    c.drawString(100,upper_bound,"Patterns and the corresponding targets on the plasmid sequence:")
+    for f_pattern in f_patterns.keys():
+        upper_bound -= 15
+        c.drawString(115,upper_bound,f_pattern + ":")
+        for val in f_patterns[f_pattern]:
+            upper_bound -= 15
+            c.drawString(130,upper_bound,str(val))
+        upper_bound -= 5
+
+    upper_bound -= 30
+    c.drawString(100,upper_bound,"Identifiers of the targets found in the plasmid sequence:")
+    for target in targets.keys():
+        upper_bound -= 15
+        c.drawString(115,upper_bound,target + ": " + targets[target])
+
+    c.save()
+
+
+    return
+
+
+def produce_random_targets(sequence):
+
+    # Produce a target on two continous CDS
+    # Produce a target in a non-coding region
+    # Produce a target in coding region
+    # Produce a target on a overlapping left
+    # Produce a target on a overlapping right
+
+
+
+    return