Mercurial > repos > gianmarco_piccinno > project_rm
comparison CodonSwitchTool/codon_switch.py @ 49:640db7b6847b draft default tip
Uploaded
| author | gianmarco_piccinno |
|---|---|
| date | Mon, 20 May 2019 18:10:52 -0400 |
| parents | bd35b13fabfb |
| children |
comparison
equal
deleted
inserted
replaced
| 48:611cac5e3066 | 49:640db7b6847b |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 | |
| 3 __author__= "Gianmarco Piccinno" | |
| 4 __version__ = "1.0.0" | |
| 5 | |
| 6 from syngenic import * | |
| 7 from functions import * | |
| 8 from Bio import * | |
| 9 import argparse as ap | |
| 10 | |
| 11 if __name__ == '__main__': | |
| 12 | |
| 13 parser = ap.ArgumentParser(description="", formatter_class=ap.RawTextHelpFormatter) | |
| 14 | |
| 15 parser.add_argument( | |
| 16 '-i', '--input_plasmid', help='Input plasmid', required=True) | |
| 17 parser.add_argument( | |
| 18 '-l', '--plasmid_format', help='Format of the plasmid: {fasta, genbank}', required=True) | |
| 19 parser.add_argument( | |
| 20 '-p', '--input_patterns', help='Input patterns separated by new_line', required=True) | |
| 21 parser.add_argument( | |
| 22 '-g', '--input_genome', help='Input annotated genome', required=True) | |
| 23 parser.add_argument( | |
| 24 '-q', '--genome_format', help='Format of the annotated genome: {fasta, gbk}', required=True) | |
| 25 parser.add_argument( | |
| 26 '-c', '--codon_table', help='Codon table to be used {Bacterial}', required=True) | |
| 27 parser.add_argument( | |
| 28 '-m', '--max_row', help='Max row length when print', required=False) | |
| 29 parser.add_argument( | |
| 30 '-d', '--demonstration', help='Use demonstration simplication', required=False) | |
| 31 parser.add_argument( | |
| 32 '-f', '--n_plasmids', help='Use demonstration simplication', required=False) | |
| 33 parser.add_argument( | |
| 34 '-o', '--output_folder', help='Folder for writing the output file', required=True) | |
| 35 args = vars(parser.parse_args()) | |
| 36 | |
| 37 """ | |
| 38 | |
| 39 python codon_switch_v2.py | |
| 40 -i ./pEPSA5_annotated.gb | |
| 41 -l genbank | |
| 42 -p ./patterns.txt | |
| 43 -g S_aureus_JE2.gbf | |
| 44 -q gbk -c Bacterial | |
| 45 -o ./output | |
| 46 | |
| 47 python codon_switch_v2.py -i ./pEPSA5_annotated.gb -l genbank -p ./patterns.txt -g S_aureus_JE2.gbf -q genbank -c Bacterial -o ./output | |
| 48 | |
| 49 """ | |
| 50 | |
| 51 | |
| 52 pl = SeqIO.read( | |
| 53 open(args['input_plasmid'], "r"), args['plasmid_format']) | |
| 54 | |
| 55 if args['demonstration'] == "demonstration": | |
| 56 pl = pl[0:3000] | |
| 57 pats = read_patterns(args['input_patterns']) | |
| 58 | |
| 59 | |
| 60 ############################################################# | |
| 61 # | |
| 62 ############################################################# | |
| 63 | |
| 64 #pl = fake_from_real(path = "./data/pEPSA5_annotated.gb", id_ = "Trial", name = "Fake_plasmid") | |
| 65 print(type(pl)) | |
| 66 print(pl); print(pl.seq); print(pl.features) | |
| 67 | |
| 68 #for feat in pl.features: | |
| 69 # print(str(feat.extract(pl))) | |
| 70 # print(str(pl[feat.location.start:feat.location.end])) | |
| 71 # print("\n") | |
| 72 | |
| 73 | |
| 74 n_pl = plasmid(pl) | |
| 75 print(n_pl); print(len(n_pl)) | |
| 76 print(n_pl.features) | |
| 77 | |
| 78 | |
| 79 patts, n_patts = all_patterns(input_ = pats) | |
| 80 | |
| 81 | |
| 82 f_patts = n_pl.findpatterns(n_patts, patts) | |
| 83 print(f_patts) | |
| 84 print(pl.seq) | |
| 85 print(len(pl.seq)) | |
| 86 | |
| 87 | |
| 88 n_poss = punctuate_targets(f_patts, n_pl) | |
| 89 print(n_poss) | |
| 90 | |
| 91 print_seq(n_pl.seq) | |
| 92 | |
| 93 synonims_tables = synonims_(table_name=args['codon_table']) | |
| 94 | |
| 95 synonims_tables | |
| 96 | |
| 97 plasmids = generalization(n_poss, n_pl, synonims_tables) | |
| 98 | |
| 99 print(len(plasmids)) | |
| 100 | |
| 101 #plasmids | |
| 102 | |
| 103 #if len(plasmids) > 5000000: | |
| 104 #redo generalization without considering internal bases | |
| 105 #in target sites that are not in CDS | |
| 106 #this means considering only the outer bases of the target | |
| 107 # plasmids = generalization(n_poss, n_pl, synonims_tables, | |
| 108 # reduced = True) | |
| 109 | |
| 110 ######################################################### | |
| 111 # Read plasmid and compute codon usage | |
| 112 ######################################################### | |
| 113 | |
| 114 genome = annotated_genome(read_annotated_genome( | |
| 115 data=args['input_genome'], type_=args['genome_format'])) | |
| 116 | |
| 117 out_genome = genome.codon_usage(args['codon_table']) | |
| 118 print(out_genome.keys()) | |
| 119 print(out_genome["Table"]) | |
| 120 | |
| 121 print(out_genome["Table"].loc["GCA"]["Proportion"]) | |
| 122 print(type(out_genome["Table"].loc["GCA"]["Proportion"])) | |
| 123 | |
| 124 | |
| 125 ######################################################### | |
| 126 # Evaluate the plasmid | |
| 127 ######################################################### | |
| 128 | |
| 129 useful_plasmids = evaluate_plasmids(plasmids = plasmids, | |
| 130 original_plasmid = n_pl, | |
| 131 codon_usage_table = out_genome["Table"], | |
| 132 n_patts = n_patts, | |
| 133 f_patts = patts) | |
| 134 | |
| 135 dat_plasmids = rank_plasmids(original_useful_plasmids = useful_plasmids) | |
| 136 | |
| 137 def_pls = dat_plasmids.index[:int(args['n_plasmids'])] | |
| 138 | |
| 139 for to_save in def_pls: | |
| 140 #print(to_save) | |
| 141 #print(useful_plasmids[to_save]) | |
| 142 with open(to_save+".fa", "w") as handle: | |
| 143 handle.write(">"+to_save+"\n") | |
| 144 handle.write(useful_plasmids[to_save]["sequence"]) | |
| 145 | |
| 146 | |
| 147 | |
| 148 if args['max_row'] != None: | |
| 149 tmp_max_row = int(args['max_row']) | |
| 150 else: | |
| 151 tmp_max_row = 27 | |
| 152 | |
| 153 print_color_seq(original = n_pl, | |
| 154 others = def_pls, | |
| 155 annotation_information = useful_plasmids, | |
| 156 tot = useful_plasmids, | |
| 157 ind_range = None, | |
| 158 patterns = n_poss, | |
| 159 f_patterns = f_patts, | |
| 160 patts = patts, | |
| 161 max_row = tmp_max_row) | |
| 162 | |
| 163 | |
| 164 print_to_pdf(original = n_pl, | |
| 165 others = def_pls, | |
| 166 annotation_information = useful_plasmids, | |
| 167 tot = useful_plasmids, | |
| 168 ind_range = None, | |
| 169 patterns = n_poss, | |
| 170 f_patterns = f_patts, | |
| 171 patts = patts, | |
| 172 max_row = tmp_max_row) |
