Mercurial > repos > gianmarco_piccinno > cs_tool_project_rm
view CodonSwitchTool/functions.py @ 2:aad5e435e4dc draft default tip
Uploaded
author | gianmarco_piccinno |
---|---|
date | Tue, 21 May 2019 05:24:56 -0400 |
parents | |
children |
line wrap: on
line source
import string from syngenic import * from Bio.Seq import Seq from Bio.SeqFeature import SeqFeature, FeatureLocation from pprint import pprint from itertools import izip import numpy as np import pandas as pd def all_patterns(input_ = []): patts = [] n_patts = [] for patt in input_: tmp_patt = patt#Seq(patt.rstrip(), IUPAC.ambiguous_dna) tmp_revc = tmp_patt.reverse_complement() patts.append(str(tmp_patt)) patts.append(str(tmp_revc)) n_patts.append(pattern(tmp_patt).plan_ambiguity()) n_patts.append(pattern(tmp_revc).plan_ambiguity()) return patts, n_patts def fake_from_real(path = None, id_ = None, name = None): plasmid_seq = SeqIO.read(open(path, "r"), "genbank") f_p = plasmid_seq.seq[:10] f_CDS = [] for f in plasmid_seq.features: if f.type == "CDS": tmp_start = len(f_p) tmp_cds = plasmid_seq[f.location.start:f.location.start+9] + plasmid_seq[f.location.end-9:f.location.end] tmp_end = tmp_start + len(tmp_cds) f_p += tmp_cds f_CDS.append(SeqFeature(FeatureLocation(tmp_start, tmp_end), type="gene", strand=f.location.strand)) #f_p += plasmid_seq.seq[tmp_end:tmp_end+5] f_p += plasmid_seq.seq[-10:] for feat in f_CDS: f_p.features.append(feat) f_p.id = id_ f_p.name = name #feature_seq_0 = f_CDS[0].extract(f_p) return f_p def punctuate_targets(f_patts, n_pl): n_poss = {} max_len = len(n_pl) for key in f_patts.keys(): for el in f_patts[key]: if not el[2] < el[1]: tmp = range(el[1], el[2]) for i in range(len(tmp)): if not tmp[i] in n_poss.keys(): n_poss[tmp[i]] = [key[i]] else: n_poss[tmp[i]].append(key[i]) else: tmp = range(el[1], max_len) + range(0, el[2]) for i in range(len(tmp)): if not tmp[i] in n_poss.keys(): n_poss[tmp[i]] = [key[i]] else: n_poss[tmp[i]].append(key[i]) for key in n_poss.keys(): n_poss[key] = set(n_poss[key]) #print(n_poss) return n_poss def print_seq(n_pl, ind_range = None): if ind_range == None: data = filter(None, re.split(r'(\w{1})', n_pl)) index = range(len(n_pl)) seq = [] ind = [] j = 0 seq.append("") ind.append("") for i in range(len(data)): if (i % 9 == 0) & (i > 0): j += 1 seq.append("") ind.append("") print("\n") print(seq[j-1]) print(ind[j-1]) seq[j] += " " ind[j] += " " for n in range(len(str(index[i]))-1): seq[j] += " " seq[j] += data[i] ind[j] += str(index[i]) print("\n") print(seq[j]) print(ind[j]) else: data = filter(None, re.split(r'(\w{1})', n_pl[ind_range[0]:ind_range[1]])) index = range(ind_range[0], ind_range[1]) seq = [] ind = [] j = 0 seq.append("") ind.append("") for i in range(len(data)): if (i % 9 == 0) & (i > 0): j += 1 seq.append("") ind.append("") print("\n") print(seq[j-1]) print(ind[j-1]) seq[j] += " " ind[j] += " " for n in range(len(str(index[i]))-1): seq[j] += " " seq[j] += data[i] ind[j] += str(index[i]) print("\n") print(seq[j]) print(ind[j]) return None def generalization(n_poss, n_pl, synonims_tables, reduced=False): transversions = {"A": "[AT]", "T": "[TA]", "C": "[CG]", "G": "[GC]"} count_codon_switch = 0 count_transversion = 0 new_poss = {} for pos in n_poss.keys(): in_cds = False for feat in n_pl.features: if ((pos >= feat.location.start) & (pos < feat.location.end)) & (feat.type in ["CDS", "gene"]): in_cds = True count_codon_switch += 1 tmp_count_transversion = 0 #print("\n") #print("operate codon switch " + str(count_codon_switch)) # #print("Real position: " + str(pos)) #print(n_poss[pos]) #print(feat.location) #print(pos - feat.location.start) #print((pos - feat.location.start)%3) if ((pos - feat.location.start) % 3 == 0) & (n_poss[pos] != {"N"}): # first basis of a codon #print("first basis of a codon") #print(n_pl.seq[pos:pos+3]) tmp_codon = n_pl.seq[pos:pos+3] bases = [] if feat.strand == +1: # check the codon table for codon in synonims_tables["synonims"][tmp_codon]: bases.append(codon[0]) elif feat.strand == -1: # check the anticodon table for codon in synonims_tables["anti_synonims"][tmp_codon]: bases.append(codon[0]) if len(set(bases)) > 1: new_poss[pos] = "[" + "".join(list(set(bases))) + "]" elif ((pos - feat.location.start) % 3 == 1) & (n_poss[pos] != {"N"}): # second basis of a codon #print("second basis of a codon") #print(n_pl.seq[pos-1:pos+2]) tmp_codon = n_pl.seq[pos-1:pos+2] bases = [] if feat.strand == +1: # check the codon table for codon in synonims_tables["synonims"][tmp_codon]: bases.append(codon[1]) elif feat.strand == -1: # check the anticodon table for codon in synonims_tables["anti_synonims"][tmp_codon]: bases.append(codon[1]) if len(set(bases)) > 1: new_poss[pos] = "[" + "".join(list(set(bases))) + "]" elif ((pos - feat.location.start) % 3 == 2) & (n_poss[pos] != {"N"}): # third basis of a codon #print("third basis of a codon") #print(n_pl.seq[pos-2:pos+1]) tmp_codon = n_pl.seq[pos-2:pos+1] bases = [] if feat.strand == +1: # check the codon table for codon in synonims_tables["synonims"][tmp_codon]: bases.append(codon[2]) elif feat.strand == -1: # check the anticodon table for codon in synonims_tables["anti_synonims"][tmp_codon]: bases.append(codon[2]) if len(set(bases)) > 1: new_poss[pos] = "[" + "".join(list(set(bases))) + "]" tmp = n_pl.extract(feat) #print_seq(tmp, ind_range = [feat.location.start,feat.location.start]) if (in_cds == False) & (set.intersection(n_poss[pos], {"A", "T", "C", "G"}) != set()): # (set.union(n_poss[pos], {"A", "T", "C", "G"}) != {}) # set.union(n_poss[pos], {"A", "T", "C", "G"}) != {} # n_poss[pos] != {"N"} if reduced == False: count_transversion += 1 #print("operate transversion " + str(count_transversion)) new_poss[pos] = transversions[set.difference(n_poss[pos], {"N"}).pop()] else: count_transversion += 1 #print("operate transversion " + str(count_transversion)) new_poss[pos] = transversions[set.difference(n_poss[pos], {"N"}).pop()] #if tmp_count_transversion == 0: # count_transversion += 1 # tmp_count_transversion += 1 # print("operate transversion " + str(count_transversion)) # # new_poss[pos] = transversions[set.difference(n_poss[pos], {"N"}).pop()] #print(new_poss) n_seq = filter(None, re.split(r'(\w{1})', str(n_pl.seq))) n_ind = range(len(n_seq)) new_obj = {} for pos in n_ind: if pos in new_poss.keys(): new_obj[pos] = new_poss[pos] else: new_obj[pos] = n_seq[pos] #pprint(new_obj) new_plasmid_generalized = "" for pos in n_ind: new_plasmid_generalized += new_obj[pos] #print(new_plasmid_generalized) #print(len(new_plasmid_generalized)) t = sre_yield.AllStrings(new_plasmid_generalized) #print(len(t)) return t def evaluate_plasmids(plasmids = None, original_plasmid = None, codon_usage_table = None, n_patts = None, f_patts = None): from syngenic import plasmid from Bio.Seq import Seq from Bio.SeqFeature import SeqFeature, FeatureLocation from itertools import izip import numpy as np useful = {} i = 0 for tmp_pl in plasmids: if tmp_pl != original_plasmid.seq: identical_proteic_sequence = [] for feat in original_plasmid.features: if feat.type.lower() in ["gene", "cds"]: identical_proteic_sequence.append(Seq(plasmid(tmp_pl).extract(feat)).translate() == Seq(original_plasmid.extract(feat)).translate()) identical_proteic_sequence = all(identical_proteic_sequence) if (identical_proteic_sequence == True) & (set([True if el ==[] else False for el in plasmid(tmp_pl).findpatterns(n_patts, f_patts).values()]) == {True}): print("\t" + str(i) + "/" + str(len(plasmids))) #print(tmp_pl) tmp = [j for j,(a1,a2) in enumerate(izip(tmp_pl,original_plasmid)) if a1!=a2] #print(tmp) useful["Plasmid_" + str(i)] = {} useful["Plasmid_" + str(i)]["modified_positions"] = tmp useful["Plasmid_" + str(i)]["codon_usage"] = [] useful["Plasmid_" + str(i)]["number_of_modification"] = len(tmp) useful["Plasmid_" + str(i)]["sequence"] = tmp_pl for modified_position in tmp: in_cds = False for feat in original_plasmid.features: if feat.type.lower() in ["gene", "cds"]: if ((modified_position >= feat.location.start) & (modified_position < feat.location.end)) & (feat.type in ["CDS", "gene"]): in_cds = True if (modified_position - feat.location.start) % 3 == 0: # first basis of a codon if feat.strand == +1: tmp_codon = tmp_pl[modified_position:modified_position+3] else: tmp_codon = str(Seq(tmp_pl[modified_position:modified_position+3]).reverse_complement()) useful["Plasmid_" + str(i)]["codon_usage"].append(codon_usage_table.loc[tmp_codon]["Proportion"]) elif (modified_position - feat.location.start) % 3 == 1: # second basis of a codon if feat.strand == +1: tmp_codon = tmp_pl[modified_position-1:modified_position+2] else: tmp_codon = str(Seq(tmp_pl[modified_position-1:modified_position+2]).reverse_complement()) useful["Plasmid_" + str(i)]["codon_usage"].append(codon_usage_table.loc[tmp_codon]["Proportion"]) elif (modified_position - feat.location.start) % 3 == 2: # third basis of a codon if feat.strand == +1: tmp_codon = original_plasmid.seq[modified_position-2:modified_position+1] else: tmp_codon = str(Seq(tmp_pl[modified_position-2:modified_position+1]).reverse_complement()) useful["Plasmid_" + str(i)]["codon_usage"].append(codon_usage_table.loc[tmp_codon]["Proportion"]) useful["Plasmid_" + str(i)]["mean_codon_usage"] = np.mean(useful["Plasmid_" + str(i)]["codon_usage"]) useful["Plasmid_" + str(i)]["std_codon_usage"] = np.std(useful["Plasmid_" + str(i)]["codon_usage"]) else: next i += 1 useful["original_plasmids"] = original_plasmid return useful def rank_plasmids(original_useful_plasmids = None): # Rank according to codon usage and less number of modifications introduced tmp_useful_plasmids = {} #print(len(original_useful_plasmids.keys())) tmp_keys = list(set.difference(set(original_useful_plasmids.keys()), {"original_plasmids"})) #print(len(tmp_keys)) for key in tmp_keys: #print(key) #print(original_useful_plasmids[key]) tmp_useful_plasmids[key] = {"mean_codon_usage":original_useful_plasmids[key]["mean_codon_usage"], "std_codon_usage":original_useful_plasmids[key]["std_codon_usage"], "number_of_modification":original_useful_plasmids[key]["number_of_modification"]} dat_plasmids = pd.DataFrame(tmp_useful_plasmids).T dat_plasmids.shape dat_plasmids.head() dat_plasmids.sort_values(['mean_codon_usage', 'std_codon_usage', 'number_of_modification'], ascending=[False, True, True]) dat_plasmids.index return dat_plasmids #return tmp_useful_plasmids def print_color_seq(original = None, others = None, annotation_information = None, tot = None, ind_range = None, patterns = None, f_patterns = None, patts = None, max_row = 18): """ original = plasmids["original_plasmid"], others = def_pls, annotation_information = useful_plasmids, tot = plasmids, ind_range = None """ ################################################################ # Single Targets ################################################################ targets = {} t_keys = f_patterns.keys() for l in range(len(t_keys)): if f_patterns[t_keys[l]] != []: targets["Target" + str(l)] = t_keys[l] #print(targets) #print("\n") tars = {} for tar in targets.keys(): #print(tar) tars[tar] = ["|" for i in range(len(original.seq))] for tar1 in f_patterns[targets[tar]]: #print(tar1) if tar1[1] < tar1[2]: for l in range(tar1[1], tar1[2]): tars[tar][l] = tar1[0][l-tar1[1]] else: for l in range(tar1[1], len(original.seq)): tars[tar][l] = tar1[0][l-tar1[1]] for l in range(tar1[2]): tars[tar][l] = tar1[0][-tar1[2]:][l] #print(tars) kkk = tars.keys() kkk.sort() target_lists = [[key]+tars[key] for key in kkk] #print(target_lists); print(len(target_lists[0])) ################################################################ # Aggregate Targets ################################################################ target_positions = ["TargetPositions"] for k in range(len(original)): if k in patterns.keys(): if len(patterns[k]) > 1: target_positions += "+"#"T" else: target_positions += "T" else: target_positions += " " #print(target_positions); print(len(target_positions)) ################################################################ # Annotation ################################################################ direction = [] annot = ["Annotation"] distance = 0 for feat in [f for f in original.features if f.type.lower() in ["gene", "cds"]]: for space in range(distance, feat.location.start): direction.append("_") annot.append("_") annot.append("*") for an_space in range(feat.location.end - feat.location.start-2): annot.append("_") distance = feat.location.end annot.append("/") for space in range(distance, len(original)): direction.append("_") annot.append("_") #print(annot) ################################################################ # CDS ################################################################ if ind_range == None: ind_range = [0, len(original)] sequences = {} sequences["original"] = filter(None, re.split(r'(\w{1})', original.seq[ind_range[0]:ind_range[1]])) direction = ["CDS_Orientation"] distance = 0 alternating = 0 for feat in [f for f in original.features if f.type.lower() in ["gene", "cds"]]: for space in range(distance, feat.location.start): direction.append("_") if feat.type.lower() in ["gene", "cds"]: for counter in range(feat.location.start, feat.location.end, 3): if alternating % 2 == 1: sequences["original"][counter] = "\033[1;31;40m" + sequences["original"][counter] + "\033[0m" sequences["original"][counter+1] = "\033[1;31;40m" + sequences["original"][counter+1] + "\033[0m" sequences["original"][counter+2] = "\033[1;31;40m" + sequences["original"][counter+2] + "\033[0m" alternating += 1 if feat.strand == +1: direction.append("-") direction.append("-") direction.append(">") if feat.strand == -1: direction.append("<") direction.append("-") direction.append("-") else: sequences["original"][counter] = "\033[1;32;40m" + sequences["original"][counter] + "\033[0m" sequences["original"][counter+1] = "\033[1;32;40m" + sequences["original"][counter+1] + "\033[0m" sequences["original"][counter+2] = "\033[1;32;40m" + sequences["original"][counter+2] + "\033[0m" alternating += 1 if feat.strand == +1: direction.append("-") direction.append("-") direction.append(">") if feat.strand == -1: direction.append("<") direction.append("-") direction.append("-") distance = feat.location.end for space in range(distance, len(original)): direction.append("_") #print(direction); print(len(direction)) ################################################################ # Plasmids_ids ################################################################ f = 0 new_plasmids = [] for s in others: new_plasmids.append([s] + filter(None, re.split(r'(\w{1})', tot[s]["sequence"][ind_range[0]:ind_range[1]]))) for k in range(len(original)): if k in annotation_information[s]["modified_positions"]: new_plasmids[f][k+1] = "\033[1;32;40m" + new_plasmids[f][k+1] + "\033[0m" f += 1 #print(new_plasmids) ################################################################ # Index ################################################################ index = ["Index"] + [str(i) for i in range(ind_range[0], ind_range[1])] ################################################################ # Create the pdf file ################################################################ #print(target_lists); print(len(target_lists[0])) #print(target_positions); print(len(target_positions)) #print(annot); print(len(annot)) #print(direction); print(len(direction)) #print(new_plasmids); print(len(new_plasmids[0])) #print(index) data = {0:target_lists, 1:target_positions, 2:annot, 3:direction, 4:["Original"] + sequences["original"], 5:new_plasmids, 6:index} elements = [] #max_row = 18 blocks = {} if len(range(max_row, len(original.seq)+1, max_row)) % max_row == 0: n_blocks = len(range(max_row, len(original.seq)+1, max_row)) else: n_blocks = len(range(max_row, len(original.seq)+1, max_row)) + 1 j = 0 for i in range(n_blocks): blocks[i] = [] for l in range(7): if l in [0, 5]: for el in data[l]: if len(el[j:]) > max_row: if i >= 1: blocks[i].append([el[0]] + el[j:j+max_row]) else: blocks[i].append(el[j:j+max_row]) else: blocks[i].append([el[0]] + el[j:]) else: if len(data[l][j:]) > max_row: if i >= 1: blocks[i].append([data[l][0]] + data[l][j:j+max_row]) else: blocks[i].append(data[l][j:j+max_row]) else: blocks[i].append([data[l][0]] + data[l][j:]) j += max_row print("\n") #print(blocks[i]) fff = [] for f in range(len(blocks[i])): fff.append(len(blocks[i][f][0])) fff = max(fff) for f in range(len(blocks[i])): for r in range(fff-len(blocks[i][f][0])): blocks[i][f][0] += " " if f < len(blocks[i])-1: for l in range(1,len(blocks[i][f])): tmp = "" #print(blocks[i][-1][l]) if l < len(blocks[i][-1]): for g in range(len(str(blocks[i][-1][l]))): #print(g) tmp += " " blocks[i][f][l] = tmp + blocks[i][f][l] #print(blocks[i][f][l]) blocks[i][f] = " ".join(blocks[i][f]) else: blocks[i][f] = " ".join(blocks[i][f]) print(blocks[i][f]) #print(" ".join(blocks[i][-1])) print("\n") print([f for f in original.features if f.type.lower() in ["gene", "cds"]]) print("\n") print(f_patterns) return def print_to_pdf(original = None, others = None, annotation_information = None, tot = None, ind_range = None, patterns = None, f_patterns = None, patts = None, max_row = 9): """ original = plasmids["original_plasmid"], others = def_pls, annotation_information = useful_plasmids, tot = plasmids, ind_range = None """ from reportlab.lib import colors from reportlab.lib.pagesizes import letter from reportlab.platypus import SimpleDocTemplate, Table, TableStyle from reportlab.pdfgen import canvas ################################################################ # Single Targets ################################################################ targets = {} t_keys = f_patterns.keys() for l in range(len(t_keys)): if f_patterns[t_keys[l]] != []: targets["Target" + str(l)] = t_keys[l] #print(targets) #print("\n") tars = {} for tar in targets.keys(): #print(tar) tars[tar] = ["|" for i in range(len(original.seq))] for tar1 in f_patterns[targets[tar]]: #print(tar1) if tar1[1] < tar1[2]: for l in range(tar1[1], tar1[2]): tars[tar][l] = tar1[0][l-tar1[1]] else: for l in range(tar1[1], len(original.seq)): tars[tar][l] = tar1[0][l-tar1[1]] for l in range(tar1[2]): tars[tar][l] = tar1[0][-tar1[2]:][l] #print(tars) kkk = tars.keys() kkk.sort() target_lists = [[key]+tars[key] for key in kkk] #print(target_lists); print(len(target_lists[0])) ################################################################ # Aggregate Targets ################################################################ target_positions = ["TargetPositions"] for k in range(len(original)): if k in patterns.keys(): if len(patterns[k]) > 1: target_positions += "+"#"T" else: target_positions += "T" else: target_positions += " " #print(target_positions); print(len(target_positions)) ################################################################ # Annotation ################################################################ direction = [] annot = ["Annotation"] distance = 0 for feat in [f for f in original.features if f.type.lower() in ["gene", "cds"]]: for space in range(distance, feat.location.start): direction.append("_") annot.append("_") annot.append("*") for an_space in range(feat.location.end - feat.location.start-2): annot.append("_") distance = feat.location.end annot.append("/") for space in range(distance, len(original)): direction.append("_") annot.append("_") #print(annot) ################################################################ # CDS ################################################################ if ind_range == None: ind_range = [0, len(original)] sequences = {} sequences["original"] = filter(None, re.split(r'(\w{1})', original.seq[ind_range[0]:ind_range[1]])) direction = ["CDS_Orientation"] distance = 0 alternating = 0 for feat in [f for f in original.features if f.type.lower() in ["gene", "cds"]]: for space in range(distance, feat.location.start): direction.append("_") if feat.type.lower() in ["gene", "cds"]: for counter in range(feat.location.start, feat.location.end, 3): if alternating % 2 == 1: sequences["original"][counter] = 'f' + sequences["original"][counter]#'<font size=44>' + sequences["original"][counter] + '</font>' sequences["original"][counter+1] = 'f' + sequences["original"][counter+1] sequences["original"][counter+2] = 'f' + sequences["original"][counter+2] alternating += 1 if feat.strand == +1: direction.append("-") direction.append("-") direction.append(">") if feat.strand == -1: direction.append("<") direction.append("-") direction.append("-") else: sequences["original"][counter] = 's' + sequences["original"][counter] sequences["original"][counter+1] = 's' + sequences["original"][counter+1] sequences["original"][counter+2] = 's' + sequences["original"][counter+2] alternating += 1 if feat.strand == +1: direction.append("-") direction.append("-") direction.append(">") if feat.strand == -1: direction.append("<") direction.append("-") direction.append("-") distance = feat.location.end for space in range(distance, len(original)): direction.append("_") #print(direction); print(len(direction)) ################################################################ # Plasmids_ids ################################################################ f = 0 new_plasmids = [] for s in others: new_plasmids.append([s] + filter(None, re.split(r'(\w{1})', tot[s]["sequence"][ind_range[0]:ind_range[1]]))) for k in range(len(original)): if k in annotation_information[s]["modified_positions"]: new_plasmids[f][k+1] += "m" f += 1 #print(new_plasmids) ################################################################ # Index ################################################################ index = ["Index"] + [str(i) for i in range(ind_range[0], ind_range[1])] ################################################################ # Create the pdf file ################################################################ #print(target_lists); print(len(target_lists[0])) #print(target_positions); print(len(target_positions)) #print(annot); print(len(annot)) #print(direction); print(len(direction)) #print(new_plasmids); print(len(new_plasmids[0])) #print(index) #colors = [('BACKGROUND',(0,0),(0,0),colors.palegreen), # ('BACKGROUND',(1,1),(1,1),colors.palegreen), # ('BACKGROUND',(2,2),(3,2),colors.palegreen)] data = {0:target_lists, 1:target_positions, 2:annot, 3:direction, 4:["Original"] + sequences["original"], 5:new_plasmids, 6:index} doc = SimpleDocTemplate("comparison_syngenic_plasmids.pdf",pagesize=letter, rightMargin=30,leftMargin=30, topMargin=30,bottomMargin=30) elements = [] #max_row = 18 blocks = {} if len(range(max_row, len(original.seq)+1, max_row)) % max_row == 0: n_blocks = len(range(max_row, len(original.seq)+1, max_row)) else: n_blocks = len(range(max_row, len(original.seq)+1, max_row)) + 1 j = 0 for i in range(n_blocks): blocks[i] = [] for l in range(7): if l in [0, 5]: for el in data[l]: if len(el[j:]) > max_row: if i >= 1: blocks[i].append([el[0]] + el[j:j+max_row]) else: blocks[i].append(el[j:j+max_row]) else: blocks[i].append([el[0]] + el[j:]) else: if len(data[l][j:]) > max_row: if i >= 1: blocks[i].append([data[l][0]] + data[l][j:j+max_row]) else: blocks[i].append(data[l][j:j+max_row]) else: blocks[i].append([data[l][0]] + data[l][j:]) j += max_row #print("\n") #print(blocks[i]) elements.append(Table(blocks[i], hAlign='LEFT'))#, #style=[('BACKGROUND',(0,0),(0,0),colors.palegreen), # ('BACKGROUND',(1,1),(1,1),colors.palegreen), # ('TEXTCOLOR',(2,2),(3,2),colors.palegreen), # ('BOX',(0,0),(0,0),2,colors.red)])) elements.append(Table([["", "", "", "", ""]])) doc.build(elements) #new_doc = SimpleDocTemplate("further_information.pdf",pagesize=letter, # rightMargin=30,leftMargin=30, # topMargin=30,bottomMargin=30) #new_elements = [] #new_elements.append([f for f in original.features if f.type.lower() in ["gene", "cds"]]) #new_elements.append(f_patterns) #doc.build(new_elements) c = canvas.Canvas("./further_information.pdf") c.drawString(100,750,"CDS regions:") upper_bound = 750 for feat in original.features: if feat.type.lower() in ["gene", "cds"]: upper_bound -= 15 if feat.location.strand == -1: sign = "-" else: sign = "+" c.drawString(115,upper_bound, str("[") + str(feat.location.start)+ ":" + str(feat.location.end) + "]" + "(" + sign + ")") upper_bound -= 30 c.drawString(100,upper_bound,"Patterns and the corresponding targets on the plasmid sequence:") for f_pattern in f_patterns.keys(): upper_bound -= 15 c.drawString(115,upper_bound,f_pattern + ":") for val in f_patterns[f_pattern]: upper_bound -= 15 c.drawString(130,upper_bound,str(val)) upper_bound -= 5 upper_bound -= 30 c.drawString(100,upper_bound,"Identifiers of the targets found in the plasmid sequence:") for target in targets.keys(): upper_bound -= 15 c.drawString(115,upper_bound,target + ": " + targets[target]) c.save() return def produce_random_targets(sequence): # Produce a target on two continous CDS # Produce a target in a non-coding region # Produce a target in coding region # Produce a target on a overlapping left # Produce a target on a overlapping right return