view CodonSwitchTool/functions.py @ 2:aad5e435e4dc draft default tip

Uploaded
author gianmarco_piccinno
date Tue, 21 May 2019 05:24:56 -0400
parents
children
line wrap: on
line source

import string
from syngenic import *
from Bio.Seq import Seq
from Bio.SeqFeature import SeqFeature, FeatureLocation
from pprint import pprint

from itertools import izip

import numpy as np
import pandas as pd

def all_patterns(input_ = []):

    patts = []
    n_patts = []

    for patt in input_:
        tmp_patt = patt#Seq(patt.rstrip(), IUPAC.ambiguous_dna)
        tmp_revc = tmp_patt.reverse_complement()

        patts.append(str(tmp_patt))
        patts.append(str(tmp_revc))

        n_patts.append(pattern(tmp_patt).plan_ambiguity())
        n_patts.append(pattern(tmp_revc).plan_ambiguity())


    return patts, n_patts

def fake_from_real(path = None, id_ = None, name = None):

    plasmid_seq = SeqIO.read(open(path, "r"), "genbank")

    f_p = plasmid_seq.seq[:10]
    f_CDS = []
    for f in plasmid_seq.features:
        if f.type == "CDS":
            tmp_start = len(f_p)
            tmp_cds = plasmid_seq[f.location.start:f.location.start+9] + plasmid_seq[f.location.end-9:f.location.end]
            tmp_end = tmp_start + len(tmp_cds)
            f_p += tmp_cds
            f_CDS.append(SeqFeature(FeatureLocation(tmp_start, tmp_end), type="gene", strand=f.location.strand))
            #f_p += plasmid_seq.seq[tmp_end:tmp_end+5]
    f_p += plasmid_seq.seq[-10:]

    for feat in f_CDS:
        f_p.features.append(feat)
    f_p.id = id_
    f_p.name = name

    #feature_seq_0 = f_CDS[0].extract(f_p)

    return f_p

def punctuate_targets(f_patts, n_pl):

    n_poss = {}
    max_len = len(n_pl)
    for key in f_patts.keys():
        for el in f_patts[key]:
            if not el[2] < el[1]:
                tmp = range(el[1], el[2])
                for i in range(len(tmp)):
                    if not tmp[i] in n_poss.keys():
                        n_poss[tmp[i]] = [key[i]]
                    else:
                        n_poss[tmp[i]].append(key[i])
            else:
                tmp = range(el[1], max_len) + range(0, el[2])
                for i in range(len(tmp)):
                    if not tmp[i] in n_poss.keys():
                        n_poss[tmp[i]] = [key[i]]
                    else:
                        n_poss[tmp[i]].append(key[i])

    for key in n_poss.keys():
        n_poss[key] = set(n_poss[key])

    #print(n_poss)

    return n_poss


def print_seq(n_pl, ind_range = None):

    if ind_range == None:

        data = filter(None, re.split(r'(\w{1})', n_pl))
        index = range(len(n_pl))

        seq = []
        ind = []

        j = 0

        seq.append("")
        ind.append("")

        for i in range(len(data)):

            if (i % 9 == 0) & (i > 0):
                j += 1
                seq.append("")
                ind.append("")
                print("\n")
                print(seq[j-1])
                print(ind[j-1])


            seq[j] += " "
            ind[j] += " "
            for n in range(len(str(index[i]))-1):
                seq[j] += " "
            seq[j] += data[i]
            ind[j] += str(index[i])
        print("\n")
        print(seq[j])
        print(ind[j])
    else:
        data = filter(None, re.split(r'(\w{1})', n_pl[ind_range[0]:ind_range[1]]))
        index = range(ind_range[0], ind_range[1])

        seq = []
        ind = []

        j = 0

        seq.append("")
        ind.append("")

        for i in range(len(data)):

            if (i % 9 == 0) & (i > 0):
                j += 1
                seq.append("")
                ind.append("")
                print("\n")
                print(seq[j-1])
                print(ind[j-1])


            seq[j] += " "
            ind[j] += " "
            for n in range(len(str(index[i]))-1):
                seq[j] += " "
            seq[j] += data[i]
            ind[j] += str(index[i])

        print("\n")
        print(seq[j])
        print(ind[j])



    return None


def generalization(n_poss, n_pl, synonims_tables, reduced=False):


    transversions = {"A": "[AT]",
                     "T": "[TA]",
                     "C": "[CG]",
                     "G": "[GC]"}

    count_codon_switch = 0
    count_transversion = 0

    new_poss = {}

    for pos in n_poss.keys():
        in_cds = False
        for feat in n_pl.features:
            if ((pos >= feat.location.start) & (pos < feat.location.end)) & (feat.type in ["CDS", "gene"]):
                in_cds = True
                count_codon_switch += 1
                tmp_count_transversion = 0
                #print("\n")
                #print("operate codon switch " + str(count_codon_switch))
                #
                #print("Real position: " + str(pos))
                #print(n_poss[pos])
                #print(feat.location)
                #print(pos - feat.location.start)
                #print((pos - feat.location.start)%3)


                if ((pos - feat.location.start) % 3 == 0) & (n_poss[pos] != {"N"}):
                    # first basis of a codon
                    #print("first basis of a codon")
                    #print(n_pl.seq[pos:pos+3])

                    tmp_codon = n_pl.seq[pos:pos+3]
                    bases = []
                    if feat.strand == +1:
                        # check the codon table
                        for codon in synonims_tables["synonims"][tmp_codon]:
                            bases.append(codon[0])
                    elif feat.strand == -1:
                        # check the anticodon table
                        for codon in synonims_tables["anti_synonims"][tmp_codon]:
                            bases.append(codon[0])
                    if len(set(bases)) > 1:
                        new_poss[pos] = "[" + "".join(list(set(bases))) + "]"


                elif ((pos - feat.location.start) % 3 == 1) & (n_poss[pos] != {"N"}):
                    # second basis of a codon
                    #print("second basis of a codon")
                    #print(n_pl.seq[pos-1:pos+2])

                    tmp_codon = n_pl.seq[pos-1:pos+2]

                    bases = []
                    if feat.strand == +1:
                        # check the codon table
                        for codon in synonims_tables["synonims"][tmp_codon]:
                            bases.append(codon[1])
                    elif feat.strand == -1:
                        # check the anticodon table
                        for codon in synonims_tables["anti_synonims"][tmp_codon]:
                            bases.append(codon[1])
                    if len(set(bases)) > 1:
                        new_poss[pos] = "[" + "".join(list(set(bases))) + "]"

                elif ((pos - feat.location.start) % 3 == 2) & (n_poss[pos] != {"N"}):
                    # third basis of a codon
                    #print("third basis of a codon")
                    #print(n_pl.seq[pos-2:pos+1])

                    tmp_codon = n_pl.seq[pos-2:pos+1]

                    bases = []
                    if feat.strand == +1:
                        # check the codon table
                        for codon in synonims_tables["synonims"][tmp_codon]:
                            bases.append(codon[2])
                    elif feat.strand == -1:
                        # check the anticodon table
                        for codon in synonims_tables["anti_synonims"][tmp_codon]:
                            bases.append(codon[2])
                    if len(set(bases)) > 1:
                        new_poss[pos] = "[" + "".join(list(set(bases))) + "]"

                tmp = n_pl.extract(feat)
                #print_seq(tmp, ind_range = [feat.location.start,feat.location.start])

        if (in_cds == False) & (set.intersection(n_poss[pos], {"A", "T", "C", "G"}) != set()):
            # (set.union(n_poss[pos], {"A", "T", "C", "G"}) != {})
            # set.union(n_poss[pos], {"A", "T", "C", "G"}) != {}
            # n_poss[pos] != {"N"}

            if reduced == False:

                count_transversion += 1
                #print("operate transversion " + str(count_transversion))

                new_poss[pos] = transversions[set.difference(n_poss[pos], {"N"}).pop()]

            else:

                count_transversion += 1
                #print("operate transversion " + str(count_transversion))

                new_poss[pos] = transversions[set.difference(n_poss[pos], {"N"}).pop()]

                #if tmp_count_transversion == 0:

                #    count_transversion += 1
                #    tmp_count_transversion += 1
                #    print("operate transversion " + str(count_transversion))
                #
                #    new_poss[pos] = transversions[set.difference(n_poss[pos], {"N"}).pop()]

        #print(new_poss)

    n_seq = filter(None, re.split(r'(\w{1})', str(n_pl.seq)))
    n_ind = range(len(n_seq))

    new_obj = {}

    for pos in n_ind:
        if pos in new_poss.keys():
            new_obj[pos] = new_poss[pos]
        else:
            new_obj[pos] = n_seq[pos]

    #pprint(new_obj)


    new_plasmid_generalized = ""


    for pos in n_ind:
        new_plasmid_generalized += new_obj[pos]

    #print(new_plasmid_generalized)
    #print(len(new_plasmid_generalized))

    t = sre_yield.AllStrings(new_plasmid_generalized)

    #print(len(t))



    return t


def evaluate_plasmids(plasmids = None,
                      original_plasmid = None,
                      codon_usage_table = None,
                      n_patts = None,
                      f_patts = None):

    from syngenic import plasmid
    from Bio.Seq import Seq
    from Bio.SeqFeature import SeqFeature, FeatureLocation
    from itertools import izip
    import numpy as np

    useful = {}

    i = 0

    for tmp_pl in plasmids:

        if tmp_pl != original_plasmid.seq:

            identical_proteic_sequence = []

            for feat in original_plasmid.features:
                if feat.type.lower() in ["gene", "cds"]:
                    identical_proteic_sequence.append(Seq(plasmid(tmp_pl).extract(feat)).translate() == Seq(original_plasmid.extract(feat)).translate())
            identical_proteic_sequence = all(identical_proteic_sequence)
            if (identical_proteic_sequence == True) & (set([True if el ==[] else False for el in plasmid(tmp_pl).findpatterns(n_patts, f_patts).values()]) == {True}):
                print("\t" + str(i) + "/" + str(len(plasmids)))
                #print(tmp_pl)
                tmp = [j for j,(a1,a2)  in enumerate(izip(tmp_pl,original_plasmid)) if a1!=a2]
                #print(tmp)
                useful["Plasmid_" + str(i)] = {}
                useful["Plasmid_" + str(i)]["modified_positions"] = tmp
                useful["Plasmid_" + str(i)]["codon_usage"] = []
                useful["Plasmid_" + str(i)]["number_of_modification"] = len(tmp)
                useful["Plasmid_" + str(i)]["sequence"] = tmp_pl
                for modified_position in tmp:
                    in_cds = False
                    for feat in original_plasmid.features:
                        if feat.type.lower() in ["gene", "cds"]:
                            if ((modified_position >= feat.location.start) & (modified_position < feat.location.end)) & (feat.type in ["CDS", "gene"]):
                                in_cds = True
                                if (modified_position - feat.location.start) % 3 == 0:
                                    # first basis of a codon
                                    if feat.strand == +1:
                                        tmp_codon = tmp_pl[modified_position:modified_position+3]
                                    else:
                                        tmp_codon = str(Seq(tmp_pl[modified_position:modified_position+3]).reverse_complement())
                                    useful["Plasmid_" + str(i)]["codon_usage"].append(codon_usage_table.loc[tmp_codon]["Proportion"])
                                elif (modified_position - feat.location.start) % 3 == 1:
                                    # second basis of a codon
                                    if feat.strand == +1:
                                        tmp_codon = tmp_pl[modified_position-1:modified_position+2]
                                    else:
                                        tmp_codon = str(Seq(tmp_pl[modified_position-1:modified_position+2]).reverse_complement())
                                    useful["Plasmid_" + str(i)]["codon_usage"].append(codon_usage_table.loc[tmp_codon]["Proportion"])
                                elif (modified_position - feat.location.start) % 3 == 2:
                                    # third basis of a codon
                                    if feat.strand == +1:
                                        tmp_codon = original_plasmid.seq[modified_position-2:modified_position+1]
                                    else:
                                        tmp_codon = str(Seq(tmp_pl[modified_position-2:modified_position+1]).reverse_complement())
                                    useful["Plasmid_" + str(i)]["codon_usage"].append(codon_usage_table.loc[tmp_codon]["Proportion"])

                useful["Plasmid_" + str(i)]["mean_codon_usage"] = np.mean(useful["Plasmid_" + str(i)]["codon_usage"])
                useful["Plasmid_" + str(i)]["std_codon_usage"] = np.std(useful["Plasmid_" + str(i)]["codon_usage"])
            else:
                next

        i += 1

    useful["original_plasmids"] = original_plasmid

    return useful



def rank_plasmids(original_useful_plasmids = None):

    # Rank according to codon usage and less number of modifications introduced

    tmp_useful_plasmids = {}

    #print(len(original_useful_plasmids.keys()))
    tmp_keys = list(set.difference(set(original_useful_plasmids.keys()), {"original_plasmids"}))
    #print(len(tmp_keys))
    for key in tmp_keys:
        #print(key)
        #print(original_useful_plasmids[key])
        tmp_useful_plasmids[key] = {"mean_codon_usage":original_useful_plasmids[key]["mean_codon_usage"],
                                "std_codon_usage":original_useful_plasmids[key]["std_codon_usage"],
                                "number_of_modification":original_useful_plasmids[key]["number_of_modification"]}

    dat_plasmids = pd.DataFrame(tmp_useful_plasmids).T

    dat_plasmids.shape

    dat_plasmids.head()

    dat_plasmids.sort_values(['mean_codon_usage', 'std_codon_usage', 'number_of_modification'], ascending=[False, True, True])

    dat_plasmids.index

    return dat_plasmids
    #return tmp_useful_plasmids


def print_color_seq(original = None,
                 others = None,
                 annotation_information = None,
                 tot = None,
                 ind_range = None,
                 patterns = None,
                 f_patterns = None,
                 patts = None,
                 max_row = 18):

    """

    original = plasmids["original_plasmid"],
    others = def_pls,
    annotation_information = useful_plasmids,
    tot = plasmids,
    ind_range = None

    """

    ################################################################
    # Single Targets
    ################################################################

    targets = {}

    t_keys = f_patterns.keys()

    for l in range(len(t_keys)):
        if f_patterns[t_keys[l]] != []:
            targets["Target" + str(l)] = t_keys[l]

    #print(targets)
    #print("\n")
    tars = {}

    for tar in targets.keys():
        #print(tar)
        tars[tar] = ["|" for i in range(len(original.seq))]

        for tar1 in f_patterns[targets[tar]]:
            #print(tar1)
            if tar1[1] < tar1[2]:
                for l in range(tar1[1], tar1[2]):
                    tars[tar][l] = tar1[0][l-tar1[1]]
            else:
                for l in range(tar1[1], len(original.seq)):
                    tars[tar][l] = tar1[0][l-tar1[1]]
                for l in range(tar1[2]):
                    tars[tar][l] = tar1[0][-tar1[2]:][l]

    #print(tars)
    kkk = tars.keys()
    kkk.sort()
    target_lists = [[key]+tars[key] for key in kkk]
    #print(target_lists); print(len(target_lists[0]))


    ################################################################
    # Aggregate Targets
    ################################################################
    target_positions = ["TargetPositions"]
    for k in range(len(original)):
        if k in patterns.keys():
            if len(patterns[k]) > 1:
                target_positions += "+"#"T"
            else:
                target_positions += "T"
        else:
            target_positions += " "
    #print(target_positions); print(len(target_positions))
    ################################################################
    # Annotation
    ################################################################
    direction = []
    annot = ["Annotation"]

    distance = 0
    for feat in [f for f in original.features if f.type.lower() in ["gene", "cds"]]:
        for space in range(distance, feat.location.start):
            direction.append("_")
            annot.append("_")
        annot.append("*")
        for an_space in range(feat.location.end - feat.location.start-2):
            annot.append("_")
        distance = feat.location.end
        annot.append("/")
    for space in range(distance, len(original)):
        direction.append("_")
        annot.append("_")
    #print(annot)

    ################################################################
    # CDS
    ################################################################

    if ind_range == None:
        ind_range = [0, len(original)]

    sequences = {}
    sequences["original"] = filter(None, re.split(r'(\w{1})', original.seq[ind_range[0]:ind_range[1]]))
    direction = ["CDS_Orientation"]
    distance = 0

    alternating = 0

    for feat in [f for f in original.features if f.type.lower() in ["gene", "cds"]]:
        for space in range(distance, feat.location.start):
            direction.append("_")
        if feat.type.lower() in ["gene", "cds"]:
            for counter in range(feat.location.start, feat.location.end, 3):
                if alternating % 2 == 1:
                    sequences["original"][counter] = "\033[1;31;40m" + sequences["original"][counter] + "\033[0m"
                    sequences["original"][counter+1] = "\033[1;31;40m" + sequences["original"][counter+1] + "\033[0m"
                    sequences["original"][counter+2] = "\033[1;31;40m" + sequences["original"][counter+2] + "\033[0m"
                    alternating += 1

                    if feat.strand == +1:
                        direction.append("-")
                        direction.append("-")
                        direction.append(">")
                    if feat.strand == -1:
                        direction.append("<")
                        direction.append("-")
                        direction.append("-")

                else:
                    sequences["original"][counter] = "\033[1;32;40m" + sequences["original"][counter] + "\033[0m"
                    sequences["original"][counter+1] = "\033[1;32;40m" + sequences["original"][counter+1] + "\033[0m"
                    sequences["original"][counter+2] = "\033[1;32;40m" + sequences["original"][counter+2] + "\033[0m"
                    alternating += 1

                    if feat.strand == +1:
                        direction.append("-")
                        direction.append("-")
                        direction.append(">")
                    if feat.strand == -1:
                        direction.append("<")
                        direction.append("-")
                        direction.append("-")
        distance = feat.location.end
    for space in range(distance, len(original)):
        direction.append("_")

    #print(direction); print(len(direction))
    ################################################################
    # Plasmids_ids
    ################################################################
    f = 0
    new_plasmids = []
    for s in others:
        new_plasmids.append([s] + filter(None, re.split(r'(\w{1})', tot[s]["sequence"][ind_range[0]:ind_range[1]])))
        for k in range(len(original)):
            if k in annotation_information[s]["modified_positions"]:
                new_plasmids[f][k+1] = "\033[1;32;40m" + new_plasmids[f][k+1] + "\033[0m"
        f += 1

    #print(new_plasmids)

    ################################################################
    # Index
    ################################################################

    index = ["Index"] + [str(i) for i in range(ind_range[0], ind_range[1])]

    ################################################################
    # Create the pdf file
    ################################################################

    #print(target_lists); print(len(target_lists[0]))
    #print(target_positions); print(len(target_positions))
    #print(annot); print(len(annot))
    #print(direction); print(len(direction))
    #print(new_plasmids); print(len(new_plasmids[0]))
    #print(index)

    data = {0:target_lists,
            1:target_positions,
            2:annot,
            3:direction,
            4:["Original"] + sequences["original"],
            5:new_plasmids,
            6:index}

    elements = []
    #max_row = 18
    blocks = {}

    if len(range(max_row, len(original.seq)+1, max_row)) % max_row == 0:
        n_blocks = len(range(max_row, len(original.seq)+1, max_row))
    else:
        n_blocks = len(range(max_row, len(original.seq)+1, max_row)) + 1

    j = 0

    for i in range(n_blocks):
        blocks[i] = []
        for l in range(7):
            if l in [0, 5]:
                for el in data[l]:
                    if len(el[j:]) > max_row:
                        if i >= 1:
                            blocks[i].append([el[0]] + el[j:j+max_row])
                        else:
                            blocks[i].append(el[j:j+max_row])
                    else:
                        blocks[i].append([el[0]] + el[j:])
            else:
                if len(data[l][j:]) > max_row:
                    if i >= 1:
                        blocks[i].append([data[l][0]] + data[l][j:j+max_row])
                    else:
                        blocks[i].append(data[l][j:j+max_row])
                else:
                    blocks[i].append([data[l][0]] + data[l][j:])
        j += max_row
        print("\n")
        #print(blocks[i])

        fff = []
        for f in range(len(blocks[i])):
            fff.append(len(blocks[i][f][0]))
        fff = max(fff)
        for f in range(len(blocks[i])):
            for r in range(fff-len(blocks[i][f][0])):
                blocks[i][f][0] += " "
            if f < len(blocks[i])-1:
                for l in range(1,len(blocks[i][f])):
                    tmp = ""
                    #print(blocks[i][-1][l])
                    if l < len(blocks[i][-1]):
                        for g in range(len(str(blocks[i][-1][l]))):
                            #print(g)
                            tmp += " "
                    blocks[i][f][l] = tmp + blocks[i][f][l]
                    #print(blocks[i][f][l])
                blocks[i][f] = " ".join(blocks[i][f])
            else:
                blocks[i][f] = "  ".join(blocks[i][f])
            print(blocks[i][f])
        #print(" ".join(blocks[i][-1]))

    print("\n")
    print([f for f in original.features if f.type.lower() in ["gene", "cds"]])
    print("\n")
    print(f_patterns)

    return

def print_to_pdf(original = None,
                 others = None,
                 annotation_information = None,
                 tot = None,
                 ind_range = None,
                 patterns = None,
                 f_patterns = None,
                 patts = None,
                 max_row = 9):

    """

    original = plasmids["original_plasmid"],
    others = def_pls,
    annotation_information = useful_plasmids,
    tot = plasmids,
    ind_range = None

    """

    from reportlab.lib import colors
    from reportlab.lib.pagesizes import letter
    from reportlab.platypus import SimpleDocTemplate, Table, TableStyle
    from reportlab.pdfgen import canvas

    ################################################################
    # Single Targets
    ################################################################

    targets = {}

    t_keys = f_patterns.keys()

    for l in range(len(t_keys)):
        if f_patterns[t_keys[l]] != []:
            targets["Target" + str(l)] = t_keys[l]

    #print(targets)
    #print("\n")
    tars = {}

    for tar in targets.keys():
        #print(tar)
        tars[tar] = ["|" for i in range(len(original.seq))]

        for tar1 in f_patterns[targets[tar]]:
            #print(tar1)
            if tar1[1] < tar1[2]:
                for l in range(tar1[1], tar1[2]):
                    tars[tar][l] = tar1[0][l-tar1[1]]
            else:
                for l in range(tar1[1], len(original.seq)):
                    tars[tar][l] = tar1[0][l-tar1[1]]
                for l in range(tar1[2]):
                    tars[tar][l] = tar1[0][-tar1[2]:][l]

    #print(tars)
    kkk = tars.keys()
    kkk.sort()
    target_lists = [[key]+tars[key] for key in kkk]
    #print(target_lists); print(len(target_lists[0]))


    ################################################################
    # Aggregate Targets
    ################################################################
    target_positions = ["TargetPositions"]
    for k in range(len(original)):
        if k in patterns.keys():
            if len(patterns[k]) > 1:
                target_positions += "+"#"T"
            else:
                target_positions += "T"
        else:
            target_positions += " "
    #print(target_positions); print(len(target_positions))
    ################################################################
    # Annotation
    ################################################################
    direction = []
    annot = ["Annotation"]

    distance = 0
    for feat in [f for f in original.features if f.type.lower() in ["gene", "cds"]]:
        for space in range(distance, feat.location.start):
            direction.append("_")
            annot.append("_")
        annot.append("*")
        for an_space in range(feat.location.end - feat.location.start-2):
            annot.append("_")
        distance = feat.location.end
        annot.append("/")
    for space in range(distance, len(original)):
        direction.append("_")
        annot.append("_")
    #print(annot)

    ################################################################
    # CDS
    ################################################################

    if ind_range == None:
        ind_range = [0, len(original)]

    sequences = {}
    sequences["original"] = filter(None, re.split(r'(\w{1})', original.seq[ind_range[0]:ind_range[1]]))
    direction = ["CDS_Orientation"]
    distance = 0

    alternating = 0

    for feat in [f for f in original.features if f.type.lower() in ["gene", "cds"]]:
        for space in range(distance, feat.location.start):
            direction.append("_")
        if feat.type.lower() in ["gene", "cds"]:
            for counter in range(feat.location.start, feat.location.end, 3):
                if alternating % 2 == 1:
                    sequences["original"][counter] = 'f' + sequences["original"][counter]#'<font size=44>' + sequences["original"][counter] + '</font>'
                    sequences["original"][counter+1] = 'f' + sequences["original"][counter+1]
                    sequences["original"][counter+2] = 'f' + sequences["original"][counter+2]
                    alternating += 1

                    if feat.strand == +1:
                        direction.append("-")
                        direction.append("-")
                        direction.append(">")
                    if feat.strand == -1:
                        direction.append("<")
                        direction.append("-")
                        direction.append("-")

                else:
                    sequences["original"][counter] = 's' + sequences["original"][counter]
                    sequences["original"][counter+1] = 's' + sequences["original"][counter+1]
                    sequences["original"][counter+2] = 's' + sequences["original"][counter+2]
                    alternating += 1

                    if feat.strand == +1:
                        direction.append("-")
                        direction.append("-")
                        direction.append(">")
                    if feat.strand == -1:
                        direction.append("<")
                        direction.append("-")
                        direction.append("-")
        distance = feat.location.end
    for space in range(distance, len(original)):
        direction.append("_")

    #print(direction); print(len(direction))
    ################################################################
    # Plasmids_ids
    ################################################################
    f = 0
    new_plasmids = []
    for s in others:
        new_plasmids.append([s] + filter(None, re.split(r'(\w{1})', tot[s]["sequence"][ind_range[0]:ind_range[1]])))
        for k in range(len(original)):
            if k in annotation_information[s]["modified_positions"]:
                new_plasmids[f][k+1] += "m"
        f += 1

    #print(new_plasmids)

    ################################################################
    # Index
    ################################################################

    index = ["Index"] + [str(i) for i in range(ind_range[0], ind_range[1])]

    ################################################################
    # Create the pdf file
    ################################################################

    #print(target_lists); print(len(target_lists[0]))
    #print(target_positions); print(len(target_positions))
    #print(annot); print(len(annot))
    #print(direction); print(len(direction))
    #print(new_plasmids); print(len(new_plasmids[0]))
    #print(index)

    #colors = [('BACKGROUND',(0,0),(0,0),colors.palegreen),
    #                           ('BACKGROUND',(1,1),(1,1),colors.palegreen),
    #                           ('BACKGROUND',(2,2),(3,2),colors.palegreen)]

    data = {0:target_lists,
            1:target_positions,
            2:annot,
            3:direction,
            4:["Original"] + sequences["original"],
            5:new_plasmids,
            6:index}

    doc = SimpleDocTemplate("comparison_syngenic_plasmids.pdf",pagesize=letter,
                        rightMargin=30,leftMargin=30,
                        topMargin=30,bottomMargin=30)

    elements = []
    #max_row = 18
    blocks = {}

    if len(range(max_row, len(original.seq)+1, max_row)) % max_row == 0:
        n_blocks = len(range(max_row, len(original.seq)+1, max_row))
    else:
        n_blocks = len(range(max_row, len(original.seq)+1, max_row)) + 1

    j = 0

    for i in range(n_blocks):
        blocks[i] = []
        for l in range(7):
            if l in [0, 5]:
                for el in data[l]:
                    if len(el[j:]) > max_row:
                        if i >= 1:
                            blocks[i].append([el[0]] + el[j:j+max_row])
                        else:
                            blocks[i].append(el[j:j+max_row])
                    else:
                        blocks[i].append([el[0]] + el[j:])
            else:
                if len(data[l][j:]) > max_row:
                    if i >= 1:
                        blocks[i].append([data[l][0]] + data[l][j:j+max_row])
                    else:
                        blocks[i].append(data[l][j:j+max_row])
                else:
                    blocks[i].append([data[l][0]] + data[l][j:])
        j += max_row
        #print("\n")
        #print(blocks[i])

        elements.append(Table(blocks[i], hAlign='LEFT'))#,
                        #style=[('BACKGROUND',(0,0),(0,0),colors.palegreen),
                        #       ('BACKGROUND',(1,1),(1,1),colors.palegreen),
                        #       ('TEXTCOLOR',(2,2),(3,2),colors.palegreen),
                        #       ('BOX',(0,0),(0,0),2,colors.red)]))
        elements.append(Table([["", "", "", "", ""]]))

    doc.build(elements)


    #new_doc = SimpleDocTemplate("further_information.pdf",pagesize=letter,
    #                            rightMargin=30,leftMargin=30,
    #                            topMargin=30,bottomMargin=30)
    #new_elements = []

    #new_elements.append([f for f in original.features if f.type.lower() in ["gene", "cds"]])
    #new_elements.append(f_patterns)

    #doc.build(new_elements)

    c = canvas.Canvas("./further_information.pdf")
    c.drawString(100,750,"CDS regions:")
    upper_bound = 750
    for feat in original.features:
        if feat.type.lower() in ["gene", "cds"]:
            upper_bound -= 15
            if feat.location.strand == -1:
                sign = "-"
            else:
                sign = "+"
            c.drawString(115,upper_bound, str("[") + str(feat.location.start)+ ":" + str(feat.location.end) + "]" + "(" + sign + ")")
    upper_bound -= 30
    c.drawString(100,upper_bound,"Patterns and the corresponding targets on the plasmid sequence:")
    for f_pattern in f_patterns.keys():
        upper_bound -= 15
        c.drawString(115,upper_bound,f_pattern + ":")
        for val in f_patterns[f_pattern]:
            upper_bound -= 15
            c.drawString(130,upper_bound,str(val))
        upper_bound -= 5

    upper_bound -= 30
    c.drawString(100,upper_bound,"Identifiers of the targets found in the plasmid sequence:")
    for target in targets.keys():
        upper_bound -= 15
        c.drawString(115,upper_bound,target + ": " + targets[target])

    c.save()


    return


def produce_random_targets(sequence):

    # Produce a target on two continous CDS
    # Produce a target in a non-coding region
    # Produce a target in coding region
    # Produce a target on a overlapping left
    # Produce a target on a overlapping right



    return