| 
13
 | 
     1 #!/home/gianmarco/galaxy-python/python
 | 
| 
 | 
     2 
 | 
| 
 | 
     3 import Bio
 | 
| 
 | 
     4 from Bio import SeqIO
 | 
| 
 | 
     5 from Bio.Data import CodonTable
 | 
| 
 | 
     6 import re
 | 
| 
 | 
     7 import sys
 | 
| 
 | 
     8 import os
 | 
| 
 | 
     9 import pandas as pd
 | 
| 
 | 
    10 
 | 
| 
 | 
    11 def read_input(data = "example.fna"):
 | 
| 
 | 
    12 
 | 
| 
 | 
    13     seqs = ""
 | 
| 
 | 
    14     with open(data, "rU") as handle:
 | 
| 
 | 
    15         for record in SeqIO.parse(handle, "fasta"):
 | 
| 
 | 
    16             seqs = seqs + str(record.seq)
 | 
| 
 | 
    17 
 | 
| 
 | 
    18     return seqs
 | 
| 
 | 
    19 
 | 
| 
 | 
    20 def codon_usage(seqs, codonTable):
 | 
| 
 | 
    21 
 | 
| 
 | 
    22     codon_usage = {}
 | 
| 
 | 
    23     tmp = [x for x in re.split(r'(\w{3})', seqs) if x != ""]
 | 
| 
 | 
    24 
 | 
| 
 | 
    25     b_cod_table = CodonTable.unambiguous_dna_by_name[codonTable].forward_table
 | 
| 
 | 
    26 
 | 
| 
 | 
    27 
 | 
| 
 | 
    28     for cod in CodonTable.unambiguous_dna_by_name[codonTable].stop_codons:
 | 
| 
 | 
    29         b_cod_table[cod] = "_Stop"
 | 
| 
 | 
    30 
 | 
| 
 | 
    31     for cod in CodonTable.unambiguous_dna_by_name[codonTable].start_codons:
 | 
| 
 | 
    32             b_cod_table[cod + " Start"] = b_cod_table[cod]
 | 
| 
 | 
    33             b_cod_table.pop(cod)
 | 
| 
 | 
    34 
 | 
| 
 | 
    35     aas = set(b_cod_table.values())
 | 
| 
 | 
    36 
 | 
| 
 | 
    37 
 | 
| 
 | 
    38     for aa in aas:
 | 
| 
 | 
    39         codon_usage[aa] = {}
 | 
| 
 | 
    40         for codon in b_cod_table.keys():
 | 
| 
 | 
    41             if b_cod_table[codon] == aa:
 | 
| 
 | 
    42                 codon_usage[aa][codon] = tmp.count(codon.split(" ")[0])
 | 
| 
 | 
    43 
 | 
| 
 | 
    44 
 | 
| 
 | 
    45     tups = {(outerKey, innerKey): values for outerKey, innerDict in codon_usage.iteritems() for innerKey, values in innerDict.iteritems()}
 | 
| 
 | 
    46 
 | 
| 
 | 
    47     #aas_ = set(tups.keys())
 | 
| 
 | 
    48 
 | 
| 
 | 
    49     #stops_ = {el for el in aas_ if el[0] == "Stop"}
 | 
| 
 | 
    50     #aas_ = list(aas_.difference(stops_))
 | 
| 
 | 
    51     #stops_ = list(stops_)
 | 
| 
 | 
    52     #aas_.sort()
 | 
| 
 | 
    53     #stops_.sort()
 | 
| 
 | 
    54 
 | 
| 
 | 
    55     codon_usage_ = pd.DataFrame(pd.Series(tups), columns = ["Count"])
 | 
| 
 | 
    56     codon_usage_.index = codon_usage_.index.set_names(["AA", "Codon"])
 | 
| 
 | 
    57     #codon_usage_.index.reindex(pd.MultiIndex.from_tuples([aas_, stops_], names=('AA', 'Codon')), level=[0,1])
 | 
| 
 | 
    58 
 | 
| 
 | 
    59 
 | 
| 
 | 
    60     codon_usage_['Proportion'] = codon_usage_.groupby(level=0).transform(lambda x: (x / x.sum()).round(2))
 | 
| 
 | 
    61 
 | 
| 
 | 
    62     return {"Dictionary": codon_usage, "Tuples": tups, "Table": codon_usage_}
 | 
| 
 | 
    63 
 | 
| 
 | 
    64 
 | 
| 
 | 
    65 
 | 
| 
 | 
    66 if __name__ == '__main__':
 | 
| 
 | 
    67 
 | 
| 
 | 
    68     
 | 
| 
 | 
    69     seqs = read_input(data=sys.argv[1])
 | 
| 
 | 
    70     out = codon_usage(seqs,"Bacterial")
 | 
| 
 | 
    71 
 | 
| 
 | 
    72 
 | 
| 
 | 
    73     with open(sys.argv[2], "w") as outf:
 | 
| 
 | 
    74         out["Table"].to_csv(outf, sep="\t")
 | 
| 
 | 
    75     #sys.stdout.write(out['Table']) |