Mercurial > repos > gianmarco_piccinno > cs_tool_project_rm
view codon_switch.py @ 0:5397da1ef896 draft
Uploaded
author | gianmarco_piccinno |
---|---|
date | Tue, 21 May 2019 05:05:15 -0400 |
parents | |
children |
line wrap: on
line source
#!/usr/bin/env python __author__= "Gianmarco Piccinno" __version__ = "1.0.0" from syngenic import * from functions import * from Bio import * import argparse as ap if __name__ == '__main__': parser = ap.ArgumentParser(description="", formatter_class=ap.RawTextHelpFormatter) parser.add_argument( '-i', '--input_plasmid', help='Input plasmid', required=True) parser.add_argument( '-l', '--plasmid_format', help='Format of the plasmid: {fasta, genbank}', required=True) parser.add_argument( '-p', '--input_patterns', help='Input patterns separated by new_line', required=True) parser.add_argument( '-g', '--input_genome', help='Input annotated genome', required=True) parser.add_argument( '-q', '--genome_format', help='Format of the annotated genome: {fasta, gbk}', required=True) parser.add_argument( '-c', '--codon_table', help='Codon table to be used {Bacterial}', required=True) parser.add_argument( '-m', '--max_row', help='Max row length when print', required=False) parser.add_argument( '-d', '--demonstration', help='Use demonstration simplication', required=False) parser.add_argument( '-f', '--n_plasmids', help='Use demonstration simplication', required=False) parser.add_argument( '-o', '--output_folder', help='Folder for writing the output file', required=True) args = vars(parser.parse_args()) """ python codon_switch_v2.py -i ./pEPSA5_annotated.gb -l genbank -p ./patterns.txt -g S_aureus_JE2.gbf -q gbk -c Bacterial -o ./output python codon_switch_v2.py -i ./pEPSA5_annotated.gb -l genbank -p ./patterns.txt -g S_aureus_JE2.gbf -q genbank -c Bacterial -o ./output """ pl = SeqIO.read( open(args['input_plasmid'], "r"), args['plasmid_format']) if args['demonstration'] == "demonstration": pl = pl[0:3000] pats = read_patterns(args['input_patterns']) ############################################################# # ############################################################# #pl = fake_from_real(path = "./data/pEPSA5_annotated.gb", id_ = "Trial", name = "Fake_plasmid") print(type(pl)) print(pl); print(pl.seq); print(pl.features) #for feat in pl.features: # print(str(feat.extract(pl))) # print(str(pl[feat.location.start:feat.location.end])) # print("\n") n_pl = plasmid(pl) print(n_pl); print(len(n_pl)) print(n_pl.features) patts, n_patts = all_patterns(input_ = pats) f_patts = n_pl.findpatterns(n_patts, patts) print(f_patts) print(pl.seq) print(len(pl.seq)) n_poss = punctuate_targets(f_patts, n_pl) print(n_poss) print_seq(n_pl.seq) synonims_tables = synonims_(table_name=args['codon_table']) synonims_tables plasmids = generalization(n_poss, n_pl, synonims_tables) print(len(plasmids)) #plasmids #if len(plasmids) > 5000000: #redo generalization without considering internal bases #in target sites that are not in CDS #this means considering only the outer bases of the target # plasmids = generalization(n_poss, n_pl, synonims_tables, # reduced = True) ######################################################### # Read plasmid and compute codon usage ######################################################### genome = annotated_genome(read_annotated_genome( data=args['input_genome'], type_=args['genome_format'])) out_genome = genome.codon_usage(args['codon_table']) print(out_genome.keys()) print(out_genome["Table"]) print(out_genome["Table"].loc["GCA"]["Proportion"]) print(type(out_genome["Table"].loc["GCA"]["Proportion"])) ######################################################### # Evaluate the plasmid ######################################################### useful_plasmids = evaluate_plasmids(plasmids = plasmids, original_plasmid = n_pl, codon_usage_table = out_genome["Table"], n_patts = n_patts, f_patts = patts) dat_plasmids = rank_plasmids(original_useful_plasmids = useful_plasmids) def_pls = dat_plasmids.index[:int(args['n_plasmids'])] for to_save in def_pls: #print(to_save) #print(useful_plasmids[to_save]) with open(to_save+".fa", "w") as handle: handle.write(">"+to_save+"\n") handle.write(useful_plasmids[to_save]["sequence"]) if args['max_row'] != None: tmp_max_row = int(args['max_row']) else: tmp_max_row = 27 print_color_seq(original = n_pl, others = def_pls, annotation_information = useful_plasmids, tot = useful_plasmids, ind_range = None, patterns = n_poss, f_patterns = f_patts, patts = patts, max_row = tmp_max_row) print_to_pdf(original = n_pl, others = def_pls, annotation_information = useful_plasmids, tot = useful_plasmids, ind_range = None, patterns = n_poss, f_patterns = f_patts, patts = patts, max_row = tmp_max_row)