Mercurial > repos > gianmarco_piccinno > cs_tool_project_rm
diff CodonSwitchTool/functions.py @ 2:aad5e435e4dc draft default tip
Uploaded
author | gianmarco_piccinno |
---|---|
date | Tue, 21 May 2019 05:24:56 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/CodonSwitchTool/functions.py Tue May 21 05:24:56 2019 -0400 @@ -0,0 +1,960 @@ +import string +from syngenic import * +from Bio.Seq import Seq +from Bio.SeqFeature import SeqFeature, FeatureLocation +from pprint import pprint + +from itertools import izip + +import numpy as np +import pandas as pd + +def all_patterns(input_ = []): + + patts = [] + n_patts = [] + + for patt in input_: + tmp_patt = patt#Seq(patt.rstrip(), IUPAC.ambiguous_dna) + tmp_revc = tmp_patt.reverse_complement() + + patts.append(str(tmp_patt)) + patts.append(str(tmp_revc)) + + n_patts.append(pattern(tmp_patt).plan_ambiguity()) + n_patts.append(pattern(tmp_revc).plan_ambiguity()) + + + return patts, n_patts + +def fake_from_real(path = None, id_ = None, name = None): + + plasmid_seq = SeqIO.read(open(path, "r"), "genbank") + + f_p = plasmid_seq.seq[:10] + f_CDS = [] + for f in plasmid_seq.features: + if f.type == "CDS": + tmp_start = len(f_p) + tmp_cds = plasmid_seq[f.location.start:f.location.start+9] + plasmid_seq[f.location.end-9:f.location.end] + tmp_end = tmp_start + len(tmp_cds) + f_p += tmp_cds + f_CDS.append(SeqFeature(FeatureLocation(tmp_start, tmp_end), type="gene", strand=f.location.strand)) + #f_p += plasmid_seq.seq[tmp_end:tmp_end+5] + f_p += plasmid_seq.seq[-10:] + + for feat in f_CDS: + f_p.features.append(feat) + f_p.id = id_ + f_p.name = name + + #feature_seq_0 = f_CDS[0].extract(f_p) + + return f_p + +def punctuate_targets(f_patts, n_pl): + + n_poss = {} + max_len = len(n_pl) + for key in f_patts.keys(): + for el in f_patts[key]: + if not el[2] < el[1]: + tmp = range(el[1], el[2]) + for i in range(len(tmp)): + if not tmp[i] in n_poss.keys(): + n_poss[tmp[i]] = [key[i]] + else: + n_poss[tmp[i]].append(key[i]) + else: + tmp = range(el[1], max_len) + range(0, el[2]) + for i in range(len(tmp)): + if not tmp[i] in n_poss.keys(): + n_poss[tmp[i]] = [key[i]] + else: + n_poss[tmp[i]].append(key[i]) + + for key in n_poss.keys(): + n_poss[key] = set(n_poss[key]) + + #print(n_poss) + + return n_poss + + +def print_seq(n_pl, ind_range = None): + + if ind_range == None: + + data = filter(None, re.split(r'(\w{1})', n_pl)) + index = range(len(n_pl)) + + seq = [] + ind = [] + + j = 0 + + seq.append("") + ind.append("") + + for i in range(len(data)): + + if (i % 9 == 0) & (i > 0): + j += 1 + seq.append("") + ind.append("") + print("\n") + print(seq[j-1]) + print(ind[j-1]) + + + seq[j] += " " + ind[j] += " " + for n in range(len(str(index[i]))-1): + seq[j] += " " + seq[j] += data[i] + ind[j] += str(index[i]) + print("\n") + print(seq[j]) + print(ind[j]) + else: + data = filter(None, re.split(r'(\w{1})', n_pl[ind_range[0]:ind_range[1]])) + index = range(ind_range[0], ind_range[1]) + + seq = [] + ind = [] + + j = 0 + + seq.append("") + ind.append("") + + for i in range(len(data)): + + if (i % 9 == 0) & (i > 0): + j += 1 + seq.append("") + ind.append("") + print("\n") + print(seq[j-1]) + print(ind[j-1]) + + + seq[j] += " " + ind[j] += " " + for n in range(len(str(index[i]))-1): + seq[j] += " " + seq[j] += data[i] + ind[j] += str(index[i]) + + print("\n") + print(seq[j]) + print(ind[j]) + + + + return None + + +def generalization(n_poss, n_pl, synonims_tables, reduced=False): + + + transversions = {"A": "[AT]", + "T": "[TA]", + "C": "[CG]", + "G": "[GC]"} + + count_codon_switch = 0 + count_transversion = 0 + + new_poss = {} + + for pos in n_poss.keys(): + in_cds = False + for feat in n_pl.features: + if ((pos >= feat.location.start) & (pos < feat.location.end)) & (feat.type in ["CDS", "gene"]): + in_cds = True + count_codon_switch += 1 + tmp_count_transversion = 0 + #print("\n") + #print("operate codon switch " + str(count_codon_switch)) + # + #print("Real position: " + str(pos)) + #print(n_poss[pos]) + #print(feat.location) + #print(pos - feat.location.start) + #print((pos - feat.location.start)%3) + + + if ((pos - feat.location.start) % 3 == 0) & (n_poss[pos] != {"N"}): + # first basis of a codon + #print("first basis of a codon") + #print(n_pl.seq[pos:pos+3]) + + tmp_codon = n_pl.seq[pos:pos+3] + bases = [] + if feat.strand == +1: + # check the codon table + for codon in synonims_tables["synonims"][tmp_codon]: + bases.append(codon[0]) + elif feat.strand == -1: + # check the anticodon table + for codon in synonims_tables["anti_synonims"][tmp_codon]: + bases.append(codon[0]) + if len(set(bases)) > 1: + new_poss[pos] = "[" + "".join(list(set(bases))) + "]" + + + elif ((pos - feat.location.start) % 3 == 1) & (n_poss[pos] != {"N"}): + # second basis of a codon + #print("second basis of a codon") + #print(n_pl.seq[pos-1:pos+2]) + + tmp_codon = n_pl.seq[pos-1:pos+2] + + bases = [] + if feat.strand == +1: + # check the codon table + for codon in synonims_tables["synonims"][tmp_codon]: + bases.append(codon[1]) + elif feat.strand == -1: + # check the anticodon table + for codon in synonims_tables["anti_synonims"][tmp_codon]: + bases.append(codon[1]) + if len(set(bases)) > 1: + new_poss[pos] = "[" + "".join(list(set(bases))) + "]" + + elif ((pos - feat.location.start) % 3 == 2) & (n_poss[pos] != {"N"}): + # third basis of a codon + #print("third basis of a codon") + #print(n_pl.seq[pos-2:pos+1]) + + tmp_codon = n_pl.seq[pos-2:pos+1] + + bases = [] + if feat.strand == +1: + # check the codon table + for codon in synonims_tables["synonims"][tmp_codon]: + bases.append(codon[2]) + elif feat.strand == -1: + # check the anticodon table + for codon in synonims_tables["anti_synonims"][tmp_codon]: + bases.append(codon[2]) + if len(set(bases)) > 1: + new_poss[pos] = "[" + "".join(list(set(bases))) + "]" + + tmp = n_pl.extract(feat) + #print_seq(tmp, ind_range = [feat.location.start,feat.location.start]) + + if (in_cds == False) & (set.intersection(n_poss[pos], {"A", "T", "C", "G"}) != set()): + # (set.union(n_poss[pos], {"A", "T", "C", "G"}) != {}) + # set.union(n_poss[pos], {"A", "T", "C", "G"}) != {} + # n_poss[pos] != {"N"} + + if reduced == False: + + count_transversion += 1 + #print("operate transversion " + str(count_transversion)) + + new_poss[pos] = transversions[set.difference(n_poss[pos], {"N"}).pop()] + + else: + + count_transversion += 1 + #print("operate transversion " + str(count_transversion)) + + new_poss[pos] = transversions[set.difference(n_poss[pos], {"N"}).pop()] + + #if tmp_count_transversion == 0: + + # count_transversion += 1 + # tmp_count_transversion += 1 + # print("operate transversion " + str(count_transversion)) + # + # new_poss[pos] = transversions[set.difference(n_poss[pos], {"N"}).pop()] + + #print(new_poss) + + n_seq = filter(None, re.split(r'(\w{1})', str(n_pl.seq))) + n_ind = range(len(n_seq)) + + new_obj = {} + + for pos in n_ind: + if pos in new_poss.keys(): + new_obj[pos] = new_poss[pos] + else: + new_obj[pos] = n_seq[pos] + + #pprint(new_obj) + + + new_plasmid_generalized = "" + + + for pos in n_ind: + new_plasmid_generalized += new_obj[pos] + + #print(new_plasmid_generalized) + #print(len(new_plasmid_generalized)) + + t = sre_yield.AllStrings(new_plasmid_generalized) + + #print(len(t)) + + + + return t + + +def evaluate_plasmids(plasmids = None, + original_plasmid = None, + codon_usage_table = None, + n_patts = None, + f_patts = None): + + from syngenic import plasmid + from Bio.Seq import Seq + from Bio.SeqFeature import SeqFeature, FeatureLocation + from itertools import izip + import numpy as np + + useful = {} + + i = 0 + + for tmp_pl in plasmids: + + if tmp_pl != original_plasmid.seq: + + identical_proteic_sequence = [] + + for feat in original_plasmid.features: + if feat.type.lower() in ["gene", "cds"]: + identical_proteic_sequence.append(Seq(plasmid(tmp_pl).extract(feat)).translate() == Seq(original_plasmid.extract(feat)).translate()) + identical_proteic_sequence = all(identical_proteic_sequence) + if (identical_proteic_sequence == True) & (set([True if el ==[] else False for el in plasmid(tmp_pl).findpatterns(n_patts, f_patts).values()]) == {True}): + print("\t" + str(i) + "/" + str(len(plasmids))) + #print(tmp_pl) + tmp = [j for j,(a1,a2) in enumerate(izip(tmp_pl,original_plasmid)) if a1!=a2] + #print(tmp) + useful["Plasmid_" + str(i)] = {} + useful["Plasmid_" + str(i)]["modified_positions"] = tmp + useful["Plasmid_" + str(i)]["codon_usage"] = [] + useful["Plasmid_" + str(i)]["number_of_modification"] = len(tmp) + useful["Plasmid_" + str(i)]["sequence"] = tmp_pl + for modified_position in tmp: + in_cds = False + for feat in original_plasmid.features: + if feat.type.lower() in ["gene", "cds"]: + if ((modified_position >= feat.location.start) & (modified_position < feat.location.end)) & (feat.type in ["CDS", "gene"]): + in_cds = True + if (modified_position - feat.location.start) % 3 == 0: + # first basis of a codon + if feat.strand == +1: + tmp_codon = tmp_pl[modified_position:modified_position+3] + else: + tmp_codon = str(Seq(tmp_pl[modified_position:modified_position+3]).reverse_complement()) + useful["Plasmid_" + str(i)]["codon_usage"].append(codon_usage_table.loc[tmp_codon]["Proportion"]) + elif (modified_position - feat.location.start) % 3 == 1: + # second basis of a codon + if feat.strand == +1: + tmp_codon = tmp_pl[modified_position-1:modified_position+2] + else: + tmp_codon = str(Seq(tmp_pl[modified_position-1:modified_position+2]).reverse_complement()) + useful["Plasmid_" + str(i)]["codon_usage"].append(codon_usage_table.loc[tmp_codon]["Proportion"]) + elif (modified_position - feat.location.start) % 3 == 2: + # third basis of a codon + if feat.strand == +1: + tmp_codon = original_plasmid.seq[modified_position-2:modified_position+1] + else: + tmp_codon = str(Seq(tmp_pl[modified_position-2:modified_position+1]).reverse_complement()) + useful["Plasmid_" + str(i)]["codon_usage"].append(codon_usage_table.loc[tmp_codon]["Proportion"]) + + useful["Plasmid_" + str(i)]["mean_codon_usage"] = np.mean(useful["Plasmid_" + str(i)]["codon_usage"]) + useful["Plasmid_" + str(i)]["std_codon_usage"] = np.std(useful["Plasmid_" + str(i)]["codon_usage"]) + else: + next + + i += 1 + + useful["original_plasmids"] = original_plasmid + + return useful + + + +def rank_plasmids(original_useful_plasmids = None): + + # Rank according to codon usage and less number of modifications introduced + + tmp_useful_plasmids = {} + + #print(len(original_useful_plasmids.keys())) + tmp_keys = list(set.difference(set(original_useful_plasmids.keys()), {"original_plasmids"})) + #print(len(tmp_keys)) + for key in tmp_keys: + #print(key) + #print(original_useful_plasmids[key]) + tmp_useful_plasmids[key] = {"mean_codon_usage":original_useful_plasmids[key]["mean_codon_usage"], + "std_codon_usage":original_useful_plasmids[key]["std_codon_usage"], + "number_of_modification":original_useful_plasmids[key]["number_of_modification"]} + + dat_plasmids = pd.DataFrame(tmp_useful_plasmids).T + + dat_plasmids.shape + + dat_plasmids.head() + + dat_plasmids.sort_values(['mean_codon_usage', 'std_codon_usage', 'number_of_modification'], ascending=[False, True, True]) + + dat_plasmids.index + + return dat_plasmids + #return tmp_useful_plasmids + + +def print_color_seq(original = None, + others = None, + annotation_information = None, + tot = None, + ind_range = None, + patterns = None, + f_patterns = None, + patts = None, + max_row = 18): + + """ + + original = plasmids["original_plasmid"], + others = def_pls, + annotation_information = useful_plasmids, + tot = plasmids, + ind_range = None + + """ + + ################################################################ + # Single Targets + ################################################################ + + targets = {} + + t_keys = f_patterns.keys() + + for l in range(len(t_keys)): + if f_patterns[t_keys[l]] != []: + targets["Target" + str(l)] = t_keys[l] + + #print(targets) + #print("\n") + tars = {} + + for tar in targets.keys(): + #print(tar) + tars[tar] = ["|" for i in range(len(original.seq))] + + for tar1 in f_patterns[targets[tar]]: + #print(tar1) + if tar1[1] < tar1[2]: + for l in range(tar1[1], tar1[2]): + tars[tar][l] = tar1[0][l-tar1[1]] + else: + for l in range(tar1[1], len(original.seq)): + tars[tar][l] = tar1[0][l-tar1[1]] + for l in range(tar1[2]): + tars[tar][l] = tar1[0][-tar1[2]:][l] + + #print(tars) + kkk = tars.keys() + kkk.sort() + target_lists = [[key]+tars[key] for key in kkk] + #print(target_lists); print(len(target_lists[0])) + + + ################################################################ + # Aggregate Targets + ################################################################ + target_positions = ["TargetPositions"] + for k in range(len(original)): + if k in patterns.keys(): + if len(patterns[k]) > 1: + target_positions += "+"#"T" + else: + target_positions += "T" + else: + target_positions += " " + #print(target_positions); print(len(target_positions)) + ################################################################ + # Annotation + ################################################################ + direction = [] + annot = ["Annotation"] + + distance = 0 + for feat in [f for f in original.features if f.type.lower() in ["gene", "cds"]]: + for space in range(distance, feat.location.start): + direction.append("_") + annot.append("_") + annot.append("*") + for an_space in range(feat.location.end - feat.location.start-2): + annot.append("_") + distance = feat.location.end + annot.append("/") + for space in range(distance, len(original)): + direction.append("_") + annot.append("_") + #print(annot) + + ################################################################ + # CDS + ################################################################ + + if ind_range == None: + ind_range = [0, len(original)] + + sequences = {} + sequences["original"] = filter(None, re.split(r'(\w{1})', original.seq[ind_range[0]:ind_range[1]])) + direction = ["CDS_Orientation"] + distance = 0 + + alternating = 0 + + for feat in [f for f in original.features if f.type.lower() in ["gene", "cds"]]: + for space in range(distance, feat.location.start): + direction.append("_") + if feat.type.lower() in ["gene", "cds"]: + for counter in range(feat.location.start, feat.location.end, 3): + if alternating % 2 == 1: + sequences["original"][counter] = "\033[1;31;40m" + sequences["original"][counter] + "\033[0m" + sequences["original"][counter+1] = "\033[1;31;40m" + sequences["original"][counter+1] + "\033[0m" + sequences["original"][counter+2] = "\033[1;31;40m" + sequences["original"][counter+2] + "\033[0m" + alternating += 1 + + if feat.strand == +1: + direction.append("-") + direction.append("-") + direction.append(">") + if feat.strand == -1: + direction.append("<") + direction.append("-") + direction.append("-") + + else: + sequences["original"][counter] = "\033[1;32;40m" + sequences["original"][counter] + "\033[0m" + sequences["original"][counter+1] = "\033[1;32;40m" + sequences["original"][counter+1] + "\033[0m" + sequences["original"][counter+2] = "\033[1;32;40m" + sequences["original"][counter+2] + "\033[0m" + alternating += 1 + + if feat.strand == +1: + direction.append("-") + direction.append("-") + direction.append(">") + if feat.strand == -1: + direction.append("<") + direction.append("-") + direction.append("-") + distance = feat.location.end + for space in range(distance, len(original)): + direction.append("_") + + #print(direction); print(len(direction)) + ################################################################ + # Plasmids_ids + ################################################################ + f = 0 + new_plasmids = [] + for s in others: + new_plasmids.append([s] + filter(None, re.split(r'(\w{1})', tot[s]["sequence"][ind_range[0]:ind_range[1]]))) + for k in range(len(original)): + if k in annotation_information[s]["modified_positions"]: + new_plasmids[f][k+1] = "\033[1;32;40m" + new_plasmids[f][k+1] + "\033[0m" + f += 1 + + #print(new_plasmids) + + ################################################################ + # Index + ################################################################ + + index = ["Index"] + [str(i) for i in range(ind_range[0], ind_range[1])] + + ################################################################ + # Create the pdf file + ################################################################ + + #print(target_lists); print(len(target_lists[0])) + #print(target_positions); print(len(target_positions)) + #print(annot); print(len(annot)) + #print(direction); print(len(direction)) + #print(new_plasmids); print(len(new_plasmids[0])) + #print(index) + + data = {0:target_lists, + 1:target_positions, + 2:annot, + 3:direction, + 4:["Original"] + sequences["original"], + 5:new_plasmids, + 6:index} + + elements = [] + #max_row = 18 + blocks = {} + + if len(range(max_row, len(original.seq)+1, max_row)) % max_row == 0: + n_blocks = len(range(max_row, len(original.seq)+1, max_row)) + else: + n_blocks = len(range(max_row, len(original.seq)+1, max_row)) + 1 + + j = 0 + + for i in range(n_blocks): + blocks[i] = [] + for l in range(7): + if l in [0, 5]: + for el in data[l]: + if len(el[j:]) > max_row: + if i >= 1: + blocks[i].append([el[0]] + el[j:j+max_row]) + else: + blocks[i].append(el[j:j+max_row]) + else: + blocks[i].append([el[0]] + el[j:]) + else: + if len(data[l][j:]) > max_row: + if i >= 1: + blocks[i].append([data[l][0]] + data[l][j:j+max_row]) + else: + blocks[i].append(data[l][j:j+max_row]) + else: + blocks[i].append([data[l][0]] + data[l][j:]) + j += max_row + print("\n") + #print(blocks[i]) + + fff = [] + for f in range(len(blocks[i])): + fff.append(len(blocks[i][f][0])) + fff = max(fff) + for f in range(len(blocks[i])): + for r in range(fff-len(blocks[i][f][0])): + blocks[i][f][0] += " " + if f < len(blocks[i])-1: + for l in range(1,len(blocks[i][f])): + tmp = "" + #print(blocks[i][-1][l]) + if l < len(blocks[i][-1]): + for g in range(len(str(blocks[i][-1][l]))): + #print(g) + tmp += " " + blocks[i][f][l] = tmp + blocks[i][f][l] + #print(blocks[i][f][l]) + blocks[i][f] = " ".join(blocks[i][f]) + else: + blocks[i][f] = " ".join(blocks[i][f]) + print(blocks[i][f]) + #print(" ".join(blocks[i][-1])) + + print("\n") + print([f for f in original.features if f.type.lower() in ["gene", "cds"]]) + print("\n") + print(f_patterns) + + return + +def print_to_pdf(original = None, + others = None, + annotation_information = None, + tot = None, + ind_range = None, + patterns = None, + f_patterns = None, + patts = None, + max_row = 9): + + """ + + original = plasmids["original_plasmid"], + others = def_pls, + annotation_information = useful_plasmids, + tot = plasmids, + ind_range = None + + """ + + from reportlab.lib import colors + from reportlab.lib.pagesizes import letter + from reportlab.platypus import SimpleDocTemplate, Table, TableStyle + from reportlab.pdfgen import canvas + + ################################################################ + # Single Targets + ################################################################ + + targets = {} + + t_keys = f_patterns.keys() + + for l in range(len(t_keys)): + if f_patterns[t_keys[l]] != []: + targets["Target" + str(l)] = t_keys[l] + + #print(targets) + #print("\n") + tars = {} + + for tar in targets.keys(): + #print(tar) + tars[tar] = ["|" for i in range(len(original.seq))] + + for tar1 in f_patterns[targets[tar]]: + #print(tar1) + if tar1[1] < tar1[2]: + for l in range(tar1[1], tar1[2]): + tars[tar][l] = tar1[0][l-tar1[1]] + else: + for l in range(tar1[1], len(original.seq)): + tars[tar][l] = tar1[0][l-tar1[1]] + for l in range(tar1[2]): + tars[tar][l] = tar1[0][-tar1[2]:][l] + + #print(tars) + kkk = tars.keys() + kkk.sort() + target_lists = [[key]+tars[key] for key in kkk] + #print(target_lists); print(len(target_lists[0])) + + + ################################################################ + # Aggregate Targets + ################################################################ + target_positions = ["TargetPositions"] + for k in range(len(original)): + if k in patterns.keys(): + if len(patterns[k]) > 1: + target_positions += "+"#"T" + else: + target_positions += "T" + else: + target_positions += " " + #print(target_positions); print(len(target_positions)) + ################################################################ + # Annotation + ################################################################ + direction = [] + annot = ["Annotation"] + + distance = 0 + for feat in [f for f in original.features if f.type.lower() in ["gene", "cds"]]: + for space in range(distance, feat.location.start): + direction.append("_") + annot.append("_") + annot.append("*") + for an_space in range(feat.location.end - feat.location.start-2): + annot.append("_") + distance = feat.location.end + annot.append("/") + for space in range(distance, len(original)): + direction.append("_") + annot.append("_") + #print(annot) + + ################################################################ + # CDS + ################################################################ + + if ind_range == None: + ind_range = [0, len(original)] + + sequences = {} + sequences["original"] = filter(None, re.split(r'(\w{1})', original.seq[ind_range[0]:ind_range[1]])) + direction = ["CDS_Orientation"] + distance = 0 + + alternating = 0 + + for feat in [f for f in original.features if f.type.lower() in ["gene", "cds"]]: + for space in range(distance, feat.location.start): + direction.append("_") + if feat.type.lower() in ["gene", "cds"]: + for counter in range(feat.location.start, feat.location.end, 3): + if alternating % 2 == 1: + sequences["original"][counter] = 'f' + sequences["original"][counter]#'<font size=44>' + sequences["original"][counter] + '</font>' + sequences["original"][counter+1] = 'f' + sequences["original"][counter+1] + sequences["original"][counter+2] = 'f' + sequences["original"][counter+2] + alternating += 1 + + if feat.strand == +1: + direction.append("-") + direction.append("-") + direction.append(">") + if feat.strand == -1: + direction.append("<") + direction.append("-") + direction.append("-") + + else: + sequences["original"][counter] = 's' + sequences["original"][counter] + sequences["original"][counter+1] = 's' + sequences["original"][counter+1] + sequences["original"][counter+2] = 's' + sequences["original"][counter+2] + alternating += 1 + + if feat.strand == +1: + direction.append("-") + direction.append("-") + direction.append(">") + if feat.strand == -1: + direction.append("<") + direction.append("-") + direction.append("-") + distance = feat.location.end + for space in range(distance, len(original)): + direction.append("_") + + #print(direction); print(len(direction)) + ################################################################ + # Plasmids_ids + ################################################################ + f = 0 + new_plasmids = [] + for s in others: + new_plasmids.append([s] + filter(None, re.split(r'(\w{1})', tot[s]["sequence"][ind_range[0]:ind_range[1]]))) + for k in range(len(original)): + if k in annotation_information[s]["modified_positions"]: + new_plasmids[f][k+1] += "m" + f += 1 + + #print(new_plasmids) + + ################################################################ + # Index + ################################################################ + + index = ["Index"] + [str(i) for i in range(ind_range[0], ind_range[1])] + + ################################################################ + # Create the pdf file + ################################################################ + + #print(target_lists); print(len(target_lists[0])) + #print(target_positions); print(len(target_positions)) + #print(annot); print(len(annot)) + #print(direction); print(len(direction)) + #print(new_plasmids); print(len(new_plasmids[0])) + #print(index) + + #colors = [('BACKGROUND',(0,0),(0,0),colors.palegreen), + # ('BACKGROUND',(1,1),(1,1),colors.palegreen), + # ('BACKGROUND',(2,2),(3,2),colors.palegreen)] + + data = {0:target_lists, + 1:target_positions, + 2:annot, + 3:direction, + 4:["Original"] + sequences["original"], + 5:new_plasmids, + 6:index} + + doc = SimpleDocTemplate("comparison_syngenic_plasmids.pdf",pagesize=letter, + rightMargin=30,leftMargin=30, + topMargin=30,bottomMargin=30) + + elements = [] + #max_row = 18 + blocks = {} + + if len(range(max_row, len(original.seq)+1, max_row)) % max_row == 0: + n_blocks = len(range(max_row, len(original.seq)+1, max_row)) + else: + n_blocks = len(range(max_row, len(original.seq)+1, max_row)) + 1 + + j = 0 + + for i in range(n_blocks): + blocks[i] = [] + for l in range(7): + if l in [0, 5]: + for el in data[l]: + if len(el[j:]) > max_row: + if i >= 1: + blocks[i].append([el[0]] + el[j:j+max_row]) + else: + blocks[i].append(el[j:j+max_row]) + else: + blocks[i].append([el[0]] + el[j:]) + else: + if len(data[l][j:]) > max_row: + if i >= 1: + blocks[i].append([data[l][0]] + data[l][j:j+max_row]) + else: + blocks[i].append(data[l][j:j+max_row]) + else: + blocks[i].append([data[l][0]] + data[l][j:]) + j += max_row + #print("\n") + #print(blocks[i]) + + elements.append(Table(blocks[i], hAlign='LEFT'))#, + #style=[('BACKGROUND',(0,0),(0,0),colors.palegreen), + # ('BACKGROUND',(1,1),(1,1),colors.palegreen), + # ('TEXTCOLOR',(2,2),(3,2),colors.palegreen), + # ('BOX',(0,0),(0,0),2,colors.red)])) + elements.append(Table([["", "", "", "", ""]])) + + doc.build(elements) + + + #new_doc = SimpleDocTemplate("further_information.pdf",pagesize=letter, + # rightMargin=30,leftMargin=30, + # topMargin=30,bottomMargin=30) + #new_elements = [] + + #new_elements.append([f for f in original.features if f.type.lower() in ["gene", "cds"]]) + #new_elements.append(f_patterns) + + #doc.build(new_elements) + + c = canvas.Canvas("./further_information.pdf") + c.drawString(100,750,"CDS regions:") + upper_bound = 750 + for feat in original.features: + if feat.type.lower() in ["gene", "cds"]: + upper_bound -= 15 + if feat.location.strand == -1: + sign = "-" + else: + sign = "+" + c.drawString(115,upper_bound, str("[") + str(feat.location.start)+ ":" + str(feat.location.end) + "]" + "(" + sign + ")") + upper_bound -= 30 + c.drawString(100,upper_bound,"Patterns and the corresponding targets on the plasmid sequence:") + for f_pattern in f_patterns.keys(): + upper_bound -= 15 + c.drawString(115,upper_bound,f_pattern + ":") + for val in f_patterns[f_pattern]: + upper_bound -= 15 + c.drawString(130,upper_bound,str(val)) + upper_bound -= 5 + + upper_bound -= 30 + c.drawString(100,upper_bound,"Identifiers of the targets found in the plasmid sequence:") + for target in targets.keys(): + upper_bound -= 15 + c.drawString(115,upper_bound,target + ": " + targets[target]) + + c.save() + + + return + + +def produce_random_targets(sequence): + + # Produce a target on two continous CDS + # Produce a target in a non-coding region + # Produce a target in coding region + # Produce a target on a overlapping left + # Produce a target on a overlapping right + + + + return