2
|
1 #!/usr/bin/env python
|
|
2
|
|
3 __author__= "Gianmarco Piccinno"
|
|
4 __version__ = "1.0.0"
|
|
5
|
|
6 from syngenic import *
|
|
7 from functions import *
|
|
8 from Bio import *
|
|
9 import argparse as ap
|
|
10
|
|
11 if __name__ == '__main__':
|
|
12
|
|
13 parser = ap.ArgumentParser(description="", formatter_class=ap.RawTextHelpFormatter)
|
|
14
|
|
15 parser.add_argument(
|
|
16 '-i', '--input_plasmid', help='Input plasmid', required=True)
|
|
17 parser.add_argument(
|
|
18 '-l', '--plasmid_format', help='Format of the plasmid: {fasta, genbank}', required=True)
|
|
19 parser.add_argument(
|
|
20 '-p', '--input_patterns', help='Input patterns separated by new_line', required=True)
|
|
21 parser.add_argument(
|
|
22 '-g', '--input_genome', help='Input annotated genome', required=True)
|
|
23 parser.add_argument(
|
|
24 '-q', '--genome_format', help='Format of the annotated genome: {fasta, gbk}', required=True)
|
|
25 parser.add_argument(
|
|
26 '-c', '--codon_table', help='Codon table to be used {Bacterial}', required=True)
|
|
27 parser.add_argument(
|
|
28 '-m', '--max_row', help='Max row length when print', required=False)
|
|
29 parser.add_argument(
|
|
30 '-d', '--demonstration', help='Use demonstration simplication', required=False)
|
|
31 parser.add_argument(
|
|
32 '-f', '--n_plasmids', help='Use demonstration simplication', required=False)
|
|
33 parser.add_argument(
|
|
34 '-o', '--output_folder', help='Folder for writing the output file', required=True)
|
|
35 args = vars(parser.parse_args())
|
|
36
|
|
37 """
|
|
38
|
|
39 python codon_switch_v2.py
|
|
40 -i ./pEPSA5_annotated.gb
|
|
41 -l genbank
|
|
42 -p ./patterns.txt
|
|
43 -g S_aureus_JE2.gbf
|
|
44 -q gbk -c Bacterial
|
|
45 -o ./output
|
|
46
|
|
47 python codon_switch_v2.py -i ./pEPSA5_annotated.gb -l genbank -p ./patterns.txt -g S_aureus_JE2.gbf -q genbank -c Bacterial -o ./output
|
|
48
|
|
49 """
|
|
50
|
|
51
|
|
52 pl = SeqIO.read(
|
|
53 open(args['input_plasmid'], "r"), args['plasmid_format'])
|
|
54
|
|
55 if args['demonstration'] == "demonstration":
|
|
56 pl = pl[0:3000]
|
|
57 pats = read_patterns(args['input_patterns'])
|
|
58
|
|
59
|
|
60 #############################################################
|
|
61 #
|
|
62 #############################################################
|
|
63
|
|
64 #pl = fake_from_real(path = "./data/pEPSA5_annotated.gb", id_ = "Trial", name = "Fake_plasmid")
|
|
65 print(type(pl))
|
|
66 print(pl); print(pl.seq); print(pl.features)
|
|
67
|
|
68 #for feat in pl.features:
|
|
69 # print(str(feat.extract(pl)))
|
|
70 # print(str(pl[feat.location.start:feat.location.end]))
|
|
71 # print("\n")
|
|
72
|
|
73
|
|
74 n_pl = plasmid(pl)
|
|
75 print(n_pl); print(len(n_pl))
|
|
76 print(n_pl.features)
|
|
77
|
|
78
|
|
79 patts, n_patts = all_patterns(input_ = pats)
|
|
80
|
|
81
|
|
82 f_patts = n_pl.findpatterns(n_patts, patts)
|
|
83 print(f_patts)
|
|
84 print(pl.seq)
|
|
85 print(len(pl.seq))
|
|
86
|
|
87
|
|
88 n_poss = punctuate_targets(f_patts, n_pl)
|
|
89 print(n_poss)
|
|
90
|
|
91 print_seq(n_pl.seq)
|
|
92
|
|
93 synonims_tables = synonims_(table_name=args['codon_table'])
|
|
94
|
|
95 synonims_tables
|
|
96
|
|
97 plasmids = generalization(n_poss, n_pl, synonims_tables)
|
|
98
|
|
99 print(len(plasmids))
|
|
100
|
|
101 #plasmids
|
|
102
|
|
103 #if len(plasmids) > 5000000:
|
|
104 #redo generalization without considering internal bases
|
|
105 #in target sites that are not in CDS
|
|
106 #this means considering only the outer bases of the target
|
|
107 # plasmids = generalization(n_poss, n_pl, synonims_tables,
|
|
108 # reduced = True)
|
|
109
|
|
110 #########################################################
|
|
111 # Read plasmid and compute codon usage
|
|
112 #########################################################
|
|
113
|
|
114 genome = annotated_genome(read_annotated_genome(
|
|
115 data=args['input_genome'], type_=args['genome_format']))
|
|
116
|
|
117 out_genome = genome.codon_usage(args['codon_table'])
|
|
118 print(out_genome.keys())
|
|
119 print(out_genome["Table"])
|
|
120
|
|
121 print(out_genome["Table"].loc["GCA"]["Proportion"])
|
|
122 print(type(out_genome["Table"].loc["GCA"]["Proportion"]))
|
|
123
|
|
124
|
|
125 #########################################################
|
|
126 # Evaluate the plasmid
|
|
127 #########################################################
|
|
128
|
|
129 useful_plasmids = evaluate_plasmids(plasmids = plasmids,
|
|
130 original_plasmid = n_pl,
|
|
131 codon_usage_table = out_genome["Table"],
|
|
132 n_patts = n_patts,
|
|
133 f_patts = patts)
|
|
134
|
|
135 dat_plasmids = rank_plasmids(original_useful_plasmids = useful_plasmids)
|
|
136
|
|
137 def_pls = dat_plasmids.index[:int(args['n_plasmids'])]
|
|
138
|
|
139 for to_save in def_pls:
|
|
140 #print(to_save)
|
|
141 #print(useful_plasmids[to_save])
|
|
142 with open(to_save+".fa", "w") as handle:
|
|
143 handle.write(">"+to_save+"\n")
|
|
144 handle.write(useful_plasmids[to_save]["sequence"])
|
|
145
|
|
146
|
|
147
|
|
148 if args['max_row'] != None:
|
|
149 tmp_max_row = int(args['max_row'])
|
|
150 else:
|
|
151 tmp_max_row = 27
|
|
152
|
|
153 print_color_seq(original = n_pl,
|
|
154 others = def_pls,
|
|
155 annotation_information = useful_plasmids,
|
|
156 tot = useful_plasmids,
|
|
157 ind_range = None,
|
|
158 patterns = n_poss,
|
|
159 f_patterns = f_patts,
|
|
160 patts = patts,
|
|
161 max_row = tmp_max_row)
|
|
162
|
|
163
|
|
164 print_to_pdf(original = n_pl,
|
|
165 others = def_pls,
|
|
166 annotation_information = useful_plasmids,
|
|
167 tot = useful_plasmids,
|
|
168 ind_range = None,
|
|
169 patterns = n_poss,
|
|
170 f_patterns = f_patts,
|
|
171 patts = patts,
|
|
172 max_row = tmp_max_row)
|