Mercurial > repos > gianmarco_piccinno > project_rm
comparison Project_RM/codon_usage.py @ 8:e130658cab16 draft
Uploaded
| author | gianmarco_piccinno |
|---|---|
| date | Sun, 09 Dec 2018 06:22:52 -0500 |
| parents | 791a47d9a777 |
| children |
comparison
equal
deleted
inserted
replaced
| 7:d9e845a8792f | 8:e130658cab16 |
|---|---|
| 1 #!/home/gianmarco/galaxy-python/python | |
| 2 | |
| 3 import Bio | |
| 4 from Bio import SeqIO | |
| 5 from Bio.Data import CodonTable | |
| 6 import re | |
| 7 import sys | |
| 8 import os | |
| 9 import pandas as pd | |
| 10 | |
| 11 def read_input(data = "example.fna"): | |
| 12 | |
| 13 seqs = "" | |
| 14 with open(data, "rU") as handle: | |
| 15 for record in SeqIO.parse(handle, "fasta"): | |
| 16 seqs = seqs + str(record.seq) | |
| 17 | |
| 18 return seqs | |
| 19 | |
| 20 def codon_usage(seqs, codonTable): | |
| 21 | |
| 22 codon_usage = {} | |
| 23 tmp = [x for x in re.split(r'(\w{3})', seqs) if x != ""] | |
| 24 | |
| 25 b_cod_table = CodonTable.unambiguous_dna_by_name[codonTable].forward_table | |
| 26 | |
| 27 | |
| 28 for cod in CodonTable.unambiguous_dna_by_name[codonTable].stop_codons: | |
| 29 b_cod_table[cod] = "_Stop" | |
| 30 | |
| 31 for cod in CodonTable.unambiguous_dna_by_name[codonTable].start_codons: | |
| 32 b_cod_table[cod + " Start"] = b_cod_table[cod] | |
| 33 b_cod_table.pop(cod) | |
| 34 | |
| 35 aas = set(b_cod_table.values()) | |
| 36 | |
| 37 | |
| 38 for aa in aas: | |
| 39 codon_usage[aa] = {} | |
| 40 for codon in b_cod_table.keys(): | |
| 41 if b_cod_table[codon] == aa: | |
| 42 codon_usage[aa][codon] = tmp.count(codon.split(" ")[0]) | |
| 43 | |
| 44 | |
| 45 tups = {(outerKey, innerKey): values for outerKey, innerDict in codon_usage.iteritems() for innerKey, values in innerDict.iteritems()} | |
| 46 | |
| 47 #aas_ = set(tups.keys()) | |
| 48 | |
| 49 #stops_ = {el for el in aas_ if el[0] == "Stop"} | |
| 50 #aas_ = list(aas_.difference(stops_)) | |
| 51 #stops_ = list(stops_) | |
| 52 #aas_.sort() | |
| 53 #stops_.sort() | |
| 54 | |
| 55 codon_usage_ = pd.DataFrame(pd.Series(tups), columns = ["Count"]) | |
| 56 codon_usage_.index = codon_usage_.index.set_names(["AA", "Codon"]) | |
| 57 #codon_usage_.index.reindex(pd.MultiIndex.from_tuples([aas_, stops_], names=('AA', 'Codon')), level=[0,1]) | |
| 58 | |
| 59 | |
| 60 codon_usage_['Proportion'] = codon_usage_.groupby(level=0).transform(lambda x: (x / x.sum()).round(2)) | |
| 61 | |
| 62 return {"Dictionary": codon_usage, "Tuples": tups, "Table": codon_usage_} | |
| 63 | |
| 64 | |
| 65 | |
| 66 if __name__ == '__main__': | |
| 67 | |
| 68 | |
| 69 seqs = read_input(data=sys.argv[1]) | |
| 70 out = codon_usage(seqs,"Bacterial") | |
| 71 | |
| 72 | |
| 73 with open(sys.argv[2], "w") as outf: | |
| 74 out["Table"].to_csv(outf, sep="\t") | |
| 75 #sys.stdout.write(out['Table']) |
