| 13 | 1 #!/home/gianmarco/galaxy-python/python | 
|  | 2 | 
|  | 3 import Bio | 
|  | 4 from Bio import SeqIO | 
|  | 5 from Bio.Data import CodonTable | 
|  | 6 import re | 
|  | 7 import sys | 
|  | 8 import os | 
|  | 9 import pandas as pd | 
|  | 10 | 
|  | 11 def read_input(data = "example.fna"): | 
|  | 12 | 
|  | 13     seqs = "" | 
|  | 14     with open(data, "rU") as handle: | 
|  | 15         for record in SeqIO.parse(handle, "fasta"): | 
|  | 16             seqs = seqs + str(record.seq) | 
|  | 17 | 
|  | 18     return seqs | 
|  | 19 | 
|  | 20 def codon_usage(seqs, codonTable): | 
|  | 21 | 
|  | 22     codon_usage = {} | 
|  | 23     tmp = [x for x in re.split(r'(\w{3})', seqs) if x != ""] | 
|  | 24 | 
|  | 25     b_cod_table = CodonTable.unambiguous_dna_by_name[codonTable].forward_table | 
|  | 26 | 
|  | 27 | 
|  | 28     for cod in CodonTable.unambiguous_dna_by_name[codonTable].stop_codons: | 
|  | 29         b_cod_table[cod] = "_Stop" | 
|  | 30 | 
|  | 31     for cod in CodonTable.unambiguous_dna_by_name[codonTable].start_codons: | 
|  | 32             b_cod_table[cod + " Start"] = b_cod_table[cod] | 
|  | 33             b_cod_table.pop(cod) | 
|  | 34 | 
|  | 35     aas = set(b_cod_table.values()) | 
|  | 36 | 
|  | 37 | 
|  | 38     for aa in aas: | 
|  | 39         codon_usage[aa] = {} | 
|  | 40         for codon in b_cod_table.keys(): | 
|  | 41             if b_cod_table[codon] == aa: | 
|  | 42                 codon_usage[aa][codon] = tmp.count(codon.split(" ")[0]) | 
|  | 43 | 
|  | 44 | 
|  | 45     tups = {(outerKey, innerKey): values for outerKey, innerDict in codon_usage.iteritems() for innerKey, values in innerDict.iteritems()} | 
|  | 46 | 
|  | 47     #aas_ = set(tups.keys()) | 
|  | 48 | 
|  | 49     #stops_ = {el for el in aas_ if el[0] == "Stop"} | 
|  | 50     #aas_ = list(aas_.difference(stops_)) | 
|  | 51     #stops_ = list(stops_) | 
|  | 52     #aas_.sort() | 
|  | 53     #stops_.sort() | 
|  | 54 | 
|  | 55     codon_usage_ = pd.DataFrame(pd.Series(tups), columns = ["Count"]) | 
|  | 56     codon_usage_.index = codon_usage_.index.set_names(["AA", "Codon"]) | 
|  | 57     #codon_usage_.index.reindex(pd.MultiIndex.from_tuples([aas_, stops_], names=('AA', 'Codon')), level=[0,1]) | 
|  | 58 | 
|  | 59 | 
|  | 60     codon_usage_['Proportion'] = codon_usage_.groupby(level=0).transform(lambda x: (x / x.sum()).round(2)) | 
|  | 61 | 
|  | 62     return {"Dictionary": codon_usage, "Tuples": tups, "Table": codon_usage_} | 
|  | 63 | 
|  | 64 | 
|  | 65 | 
|  | 66 if __name__ == '__main__': | 
|  | 67 | 
|  | 68 | 
|  | 69     seqs = read_input(data=sys.argv[1]) | 
|  | 70     out = codon_usage(seqs,"Bacterial") | 
|  | 71 | 
|  | 72 | 
|  | 73     with open(sys.argv[2], "w") as outf: | 
|  | 74         out["Table"].to_csv(outf, sep="\t") | 
|  | 75     #sys.stdout.write(out['Table']) |