| 42 | 1 #!/usr/bin/env python | 
|  | 2 | 
|  | 3 __author__= "Gianmarco Piccinno" | 
|  | 4 __version__ = "1.0.0" | 
|  | 5 | 
|  | 6 from syngenic import * | 
|  | 7 from functions import * | 
|  | 8 from Bio import * | 
|  | 9 import argparse as ap | 
|  | 10 | 
|  | 11 if __name__ == '__main__': | 
|  | 12 | 
|  | 13     parser = ap.ArgumentParser(description="", formatter_class=ap.RawTextHelpFormatter) | 
|  | 14 | 
|  | 15     parser.add_argument( | 
|  | 16         '-i', '--input_plasmid', help='Input plasmid', required=True) | 
|  | 17     parser.add_argument( | 
|  | 18         '-l', '--plasmid_format', help='Format of the plasmid: {fasta, genbank}', required=True) | 
|  | 19     parser.add_argument( | 
|  | 20         '-p', '--input_patterns', help='Input patterns separated by new_line', required=True) | 
|  | 21     parser.add_argument( | 
|  | 22         '-g', '--input_genome', help='Input annotated genome', required=True) | 
|  | 23     parser.add_argument( | 
|  | 24         '-q', '--genome_format', help='Format of the annotated genome: {fasta, gbk}', required=True) | 
|  | 25     parser.add_argument( | 
|  | 26         '-c', '--codon_table', help='Codon table to be used {Bacterial}', required=True) | 
|  | 27     parser.add_argument( | 
|  | 28         '-m', '--max_row', help='Max row length when print', required=False) | 
|  | 29     parser.add_argument( | 
|  | 30         '-d', '--demonstration', help='Use demonstration simplication', required=False) | 
|  | 31     parser.add_argument( | 
|  | 32         '-f', '--n_plasmids', help='Use demonstration simplication', required=False) | 
|  | 33     parser.add_argument( | 
|  | 34         '-o', '--output_folder', help='Folder for writing the output file', required=True) | 
|  | 35     args = vars(parser.parse_args()) | 
|  | 36 | 
|  | 37     """ | 
|  | 38 | 
|  | 39     python codon_switch_v2.py | 
|  | 40         -i ./pEPSA5_annotated.gb | 
|  | 41         -l genbank | 
|  | 42         -p ./patterns.txt | 
|  | 43         -g S_aureus_JE2.gbf | 
|  | 44         -q gbk -c Bacterial | 
|  | 45         -o ./output | 
|  | 46 | 
|  | 47         python codon_switch_v2.py -i ./pEPSA5_annotated.gb -l genbank -p ./patterns.txt -g S_aureus_JE2.gbf -q genbank -c Bacterial -o ./output | 
|  | 48 | 
|  | 49     """ | 
|  | 50 | 
|  | 51 | 
|  | 52     pl = SeqIO.read( | 
|  | 53         open(args['input_plasmid'], "r"), args['plasmid_format']) | 
|  | 54 | 
|  | 55     if args['demonstration'] == "demonstration": | 
|  | 56         pl = pl[0:3000] | 
|  | 57     pats = read_patterns(args['input_patterns']) | 
|  | 58 | 
|  | 59 | 
|  | 60     ############################################################# | 
|  | 61     # | 
|  | 62     ############################################################# | 
|  | 63 | 
|  | 64     #pl = fake_from_real(path = "./data/pEPSA5_annotated.gb", id_ = "Trial", name = "Fake_plasmid") | 
|  | 65     print(type(pl)) | 
|  | 66     print(pl); print(pl.seq); print(pl.features) | 
|  | 67 | 
|  | 68     #for feat in pl.features: | 
|  | 69     #    print(str(feat.extract(pl))) | 
|  | 70     #    print(str(pl[feat.location.start:feat.location.end])) | 
|  | 71     #    print("\n") | 
|  | 72 | 
|  | 73 | 
|  | 74     n_pl = plasmid(pl) | 
|  | 75     print(n_pl); print(len(n_pl)) | 
|  | 76     print(n_pl.features) | 
|  | 77 | 
|  | 78 | 
|  | 79     patts, n_patts = all_patterns(input_ = pats) | 
|  | 80 | 
|  | 81 | 
|  | 82     f_patts = n_pl.findpatterns(n_patts, patts) | 
|  | 83     print(f_patts) | 
|  | 84     print(pl.seq) | 
|  | 85     print(len(pl.seq)) | 
|  | 86 | 
|  | 87 | 
|  | 88     n_poss = punctuate_targets(f_patts, n_pl) | 
|  | 89     print(n_poss) | 
|  | 90 | 
|  | 91     print_seq(n_pl.seq) | 
|  | 92 | 
|  | 93     synonims_tables = synonims_(table_name=args['codon_table']) | 
|  | 94 | 
|  | 95     synonims_tables | 
|  | 96 | 
|  | 97     plasmids = generalization(n_poss, n_pl, synonims_tables) | 
|  | 98 | 
|  | 99     print(len(plasmids)) | 
|  | 100 | 
|  | 101     #plasmids | 
|  | 102 | 
|  | 103     #if len(plasmids) > 5000000: | 
|  | 104         #redo generalization without considering internal bases | 
|  | 105         #in target sites that are not in CDS | 
|  | 106         #this means considering only the outer bases of the target | 
|  | 107     #    plasmids = generalization(n_poss, n_pl, synonims_tables, | 
|  | 108     #                              reduced = True) | 
|  | 109 | 
|  | 110     ######################################################### | 
|  | 111     # Read plasmid and compute codon usage | 
|  | 112     ######################################################### | 
|  | 113 | 
|  | 114     genome = annotated_genome(read_annotated_genome( | 
|  | 115             data=args['input_genome'], type_=args['genome_format'])) | 
|  | 116 | 
|  | 117     out_genome = genome.codon_usage(args['codon_table']) | 
|  | 118     print(out_genome.keys()) | 
|  | 119     print(out_genome["Table"]) | 
|  | 120 | 
|  | 121     print(out_genome["Table"].loc["GCA"]["Proportion"]) | 
|  | 122     print(type(out_genome["Table"].loc["GCA"]["Proportion"])) | 
|  | 123 | 
|  | 124 | 
|  | 125     ######################################################### | 
|  | 126     # Evaluate the plasmid | 
|  | 127     ######################################################### | 
|  | 128 | 
|  | 129     useful_plasmids = evaluate_plasmids(plasmids = plasmids, | 
|  | 130                                         original_plasmid = n_pl, | 
|  | 131                                         codon_usage_table = out_genome["Table"], | 
|  | 132                                         n_patts = n_patts, | 
|  | 133                                         f_patts = patts) | 
|  | 134 | 
|  | 135     dat_plasmids = rank_plasmids(original_useful_plasmids = useful_plasmids) | 
|  | 136 | 
|  | 137     def_pls = dat_plasmids.index[:int(args['n_plasmids'])] | 
|  | 138 | 
|  | 139     for to_save in def_pls: | 
|  | 140         #print(to_save) | 
|  | 141         #print(useful_plasmids[to_save]) | 
|  | 142         with open(to_save+".fa", "w") as handle: | 
|  | 143             handle.write(">"+to_save+"\n") | 
|  | 144             handle.write(useful_plasmids[to_save]["sequence"]) | 
|  | 145 | 
|  | 146 | 
|  | 147 | 
|  | 148     if args['max_row'] != None: | 
|  | 149         tmp_max_row = int(args['max_row']) | 
|  | 150     else: | 
|  | 151         tmp_max_row = 27 | 
|  | 152 | 
|  | 153     print_color_seq(original = n_pl, | 
|  | 154                     others = def_pls, | 
|  | 155                     annotation_information = useful_plasmids, | 
|  | 156                     tot = useful_plasmids, | 
|  | 157                     ind_range = None, | 
|  | 158                     patterns = n_poss, | 
|  | 159                     f_patterns = f_patts, | 
|  | 160                     patts = patts, | 
|  | 161                     max_row = tmp_max_row) | 
|  | 162 | 
|  | 163 | 
|  | 164     print_to_pdf(original = n_pl, | 
|  | 165                  others = def_pls, | 
|  | 166                  annotation_information = useful_plasmids, | 
|  | 167                  tot = useful_plasmids, | 
|  | 168                  ind_range = None, | 
|  | 169                  patterns = n_poss, | 
|  | 170                  f_patterns = f_patts, | 
|  | 171                  patts = patts, | 
|  | 172                  max_row = tmp_max_row) |