| 
41
 | 
     1 #!/usr/bin/env python
 | 
| 
 | 
     2 
 | 
| 
 | 
     3 __author__= "Gianmarco Piccinno"
 | 
| 
 | 
     4 __version__ = "1.0.0"
 | 
| 
 | 
     5 
 | 
| 
 | 
     6 from syngenic import *
 | 
| 
 | 
     7 from functions import *
 | 
| 
 | 
     8 from Bio import *
 | 
| 
 | 
     9 import argparse as ap
 | 
| 
 | 
    10 
 | 
| 
 | 
    11 if __name__ == '__main__':
 | 
| 
 | 
    12 
 | 
| 
 | 
    13     parser = ap.ArgumentParser(description="", formatter_class=ap.RawTextHelpFormatter)
 | 
| 
 | 
    14 
 | 
| 
 | 
    15     parser.add_argument(
 | 
| 
 | 
    16         '-i', '--input_plasmid', help='Input plasmid', required=True)
 | 
| 
 | 
    17     parser.add_argument(
 | 
| 
 | 
    18         '-l', '--plasmid_format', help='Format of the plasmid: {fasta, genbank}', required=True)
 | 
| 
 | 
    19     parser.add_argument(
 | 
| 
 | 
    20         '-p', '--input_patterns', help='Input patterns separated by new_line', required=True)
 | 
| 
 | 
    21     parser.add_argument(
 | 
| 
 | 
    22         '-g', '--input_genome', help='Input annotated genome', required=True)
 | 
| 
 | 
    23     parser.add_argument(
 | 
| 
 | 
    24         '-q', '--genome_format', help='Format of the annotated genome: {fasta, gbk}', required=True)
 | 
| 
 | 
    25     parser.add_argument(
 | 
| 
 | 
    26         '-c', '--codon_table', help='Codon table to be used {Bacterial}', required=True)
 | 
| 
 | 
    27     parser.add_argument(
 | 
| 
 | 
    28         '-m', '--max_row', help='Max row length when print', required=False)
 | 
| 
 | 
    29     parser.add_argument(
 | 
| 
 | 
    30         '-d', '--demonstration', help='Use demonstration simplication', required=False)
 | 
| 
 | 
    31     parser.add_argument(
 | 
| 
 | 
    32         '-f', '--n_plasmids', help='Use demonstration simplication', required=False)
 | 
| 
 | 
    33     parser.add_argument(
 | 
| 
 | 
    34         '-o', '--output_folder', help='Folder for writing the output file', required=True)
 | 
| 
 | 
    35     args = vars(parser.parse_args())
 | 
| 
 | 
    36 
 | 
| 
 | 
    37     """
 | 
| 
 | 
    38 
 | 
| 
 | 
    39     python codon_switch_v2.py
 | 
| 
 | 
    40         -i ./pEPSA5_annotated.gb
 | 
| 
 | 
    41         -l genbank
 | 
| 
 | 
    42         -p ./patterns.txt
 | 
| 
 | 
    43         -g S_aureus_JE2.gbf
 | 
| 
 | 
    44         -q gbk -c Bacterial
 | 
| 
 | 
    45         -o ./output
 | 
| 
 | 
    46 
 | 
| 
 | 
    47         python codon_switch_v2.py -i ./pEPSA5_annotated.gb -l genbank -p ./patterns.txt -g S_aureus_JE2.gbf -q genbank -c Bacterial -o ./output
 | 
| 
 | 
    48 
 | 
| 
 | 
    49     """
 | 
| 
 | 
    50 
 | 
| 
 | 
    51 
 | 
| 
 | 
    52     pl = SeqIO.read(
 | 
| 
 | 
    53         open(args['input_plasmid'], "r"), args['plasmid_format'])
 | 
| 
 | 
    54 
 | 
| 
 | 
    55     if args['demonstration'] == "demonstration":
 | 
| 
 | 
    56         pl = pl[0:3000]
 | 
| 
 | 
    57     pats = read_patterns(args['input_patterns'])
 | 
| 
 | 
    58 
 | 
| 
 | 
    59 
 | 
| 
 | 
    60     #############################################################
 | 
| 
 | 
    61     #
 | 
| 
 | 
    62     #############################################################
 | 
| 
 | 
    63 
 | 
| 
 | 
    64     #pl = fake_from_real(path = "./data/pEPSA5_annotated.gb", id_ = "Trial", name = "Fake_plasmid")
 | 
| 
 | 
    65     print(type(pl))
 | 
| 
 | 
    66     print(pl); print(pl.seq); print(pl.features)
 | 
| 
 | 
    67 
 | 
| 
 | 
    68     #for feat in pl.features:
 | 
| 
 | 
    69     #    print(str(feat.extract(pl)))
 | 
| 
 | 
    70     #    print(str(pl[feat.location.start:feat.location.end]))
 | 
| 
 | 
    71     #    print("\n")
 | 
| 
 | 
    72 
 | 
| 
 | 
    73 
 | 
| 
 | 
    74     n_pl = plasmid(pl)
 | 
| 
 | 
    75     print(n_pl); print(len(n_pl))
 | 
| 
 | 
    76     print(n_pl.features)
 | 
| 
 | 
    77 
 | 
| 
 | 
    78 
 | 
| 
 | 
    79     patts, n_patts = all_patterns(input_ = pats)
 | 
| 
 | 
    80 
 | 
| 
 | 
    81 
 | 
| 
 | 
    82     f_patts = n_pl.findpatterns(n_patts, patts)
 | 
| 
 | 
    83     print(f_patts)
 | 
| 
 | 
    84     print(pl.seq)
 | 
| 
 | 
    85     print(len(pl.seq))
 | 
| 
 | 
    86 
 | 
| 
 | 
    87 
 | 
| 
 | 
    88     n_poss = punctuate_targets(f_patts, n_pl)
 | 
| 
 | 
    89     print(n_poss)
 | 
| 
 | 
    90 
 | 
| 
 | 
    91     print_seq(n_pl.seq)
 | 
| 
 | 
    92 
 | 
| 
 | 
    93     synonims_tables = synonims_(table_name=args['codon_table'])
 | 
| 
 | 
    94 
 | 
| 
 | 
    95     synonims_tables
 | 
| 
 | 
    96 
 | 
| 
 | 
    97     plasmids = generalization(n_poss, n_pl, synonims_tables)
 | 
| 
 | 
    98 
 | 
| 
 | 
    99     print(len(plasmids))
 | 
| 
 | 
   100 
 | 
| 
 | 
   101     #plasmids
 | 
| 
 | 
   102 
 | 
| 
 | 
   103     #if len(plasmids) > 5000000:
 | 
| 
 | 
   104         #redo generalization without considering internal bases
 | 
| 
 | 
   105         #in target sites that are not in CDS
 | 
| 
 | 
   106         #this means considering only the outer bases of the target
 | 
| 
 | 
   107     #    plasmids = generalization(n_poss, n_pl, synonims_tables,
 | 
| 
 | 
   108     #                              reduced = True)
 | 
| 
 | 
   109 
 | 
| 
 | 
   110     #########################################################
 | 
| 
 | 
   111     # Read plasmid and compute codon usage
 | 
| 
 | 
   112     #########################################################
 | 
| 
 | 
   113 
 | 
| 
 | 
   114     genome = annotated_genome(read_annotated_genome(
 | 
| 
 | 
   115             data=args['input_genome'], type_=args['genome_format']))
 | 
| 
 | 
   116 
 | 
| 
 | 
   117     out_genome = genome.codon_usage(args['codon_table'])
 | 
| 
 | 
   118     print(out_genome.keys())
 | 
| 
 | 
   119     print(out_genome["Table"])
 | 
| 
 | 
   120 
 | 
| 
 | 
   121     print(out_genome["Table"].loc["GCA"]["Proportion"])
 | 
| 
 | 
   122     print(type(out_genome["Table"].loc["GCA"]["Proportion"]))
 | 
| 
 | 
   123 
 | 
| 
 | 
   124 
 | 
| 
 | 
   125     #########################################################
 | 
| 
 | 
   126     # Evaluate the plasmid
 | 
| 
 | 
   127     #########################################################
 | 
| 
 | 
   128 
 | 
| 
 | 
   129     useful_plasmids = evaluate_plasmids(plasmids = plasmids,
 | 
| 
 | 
   130                                         original_plasmid = n_pl,
 | 
| 
 | 
   131                                         codon_usage_table = out_genome["Table"],
 | 
| 
 | 
   132                                         n_patts = n_patts,
 | 
| 
 | 
   133                                         f_patts = patts)
 | 
| 
 | 
   134 
 | 
| 
 | 
   135     dat_plasmids = rank_plasmids(original_useful_plasmids = useful_plasmids)
 | 
| 
 | 
   136 
 | 
| 
 | 
   137     def_pls = dat_plasmids.index[:int(args['n_plasmids'])]
 | 
| 
 | 
   138 
 | 
| 
 | 
   139     for to_save in def_pls:
 | 
| 
 | 
   140         #print(to_save)
 | 
| 
 | 
   141         #print(useful_plasmids[to_save])
 | 
| 
 | 
   142         with open(to_save+".fa", "w") as handle:
 | 
| 
 | 
   143             handle.write(">"+to_save+"\n")
 | 
| 
 | 
   144             handle.write(useful_plasmids[to_save]["sequence"])
 | 
| 
 | 
   145 
 | 
| 
 | 
   146 
 | 
| 
 | 
   147 
 | 
| 
 | 
   148     if args['max_row'] != None:
 | 
| 
 | 
   149         tmp_max_row = int(args['max_row'])
 | 
| 
 | 
   150     else:
 | 
| 
 | 
   151         tmp_max_row = 27
 | 
| 
 | 
   152 
 | 
| 
 | 
   153     print_color_seq(original = n_pl,
 | 
| 
 | 
   154                     others = def_pls,
 | 
| 
 | 
   155                     annotation_information = useful_plasmids,
 | 
| 
 | 
   156                     tot = useful_plasmids,
 | 
| 
 | 
   157                     ind_range = None,
 | 
| 
 | 
   158                     patterns = n_poss,
 | 
| 
 | 
   159                     f_patterns = f_patts,
 | 
| 
 | 
   160                     patts = patts,
 | 
| 
 | 
   161                     max_row = tmp_max_row)
 | 
| 
 | 
   162 
 | 
| 
 | 
   163 
 | 
| 
 | 
   164     print_to_pdf(original = n_pl,
 | 
| 
 | 
   165                  others = def_pls,
 | 
| 
 | 
   166                  annotation_information = useful_plasmids,
 | 
| 
 | 
   167                  tot = useful_plasmids,
 | 
| 
 | 
   168                  ind_range = None,
 | 
| 
 | 
   169                  patterns = n_poss,
 | 
| 
 | 
   170                  f_patterns = f_patts,
 | 
| 
 | 
   171                  patts = patts,
 | 
| 
 | 
   172                  max_row = tmp_max_row)
 |