Mercurial > repos > fabio > new_project_rm
comparison codon_usage.py @ 0:5b61f1b564b3 draft
Uploaded
author | fabio |
---|---|
date | Tue, 11 Dec 2018 12:27:52 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:5b61f1b564b3 |
---|---|
1 #!/home/gianmarco/galaxy-python/python | |
2 | |
3 import Bio | |
4 from Bio import SeqIO | |
5 from Bio.Data import CodonTable | |
6 import re | |
7 import sys | |
8 import os | |
9 import pandas as pd | |
10 | |
11 def read_input(data = "example.fna"): | |
12 | |
13 seqs = "" | |
14 with open(data, "rU") as handle: | |
15 for record in SeqIO.parse(handle, "fasta"): | |
16 seqs = seqs + str(record.seq) | |
17 | |
18 return seqs | |
19 | |
20 def codon_usage(seqs, codonTable): | |
21 | |
22 codon_usage = {} | |
23 tmp = [x for x in re.split(r'(\w{3})', seqs) if x != ""] | |
24 | |
25 b_cod_table = CodonTable.unambiguous_dna_by_name[codonTable].forward_table | |
26 | |
27 | |
28 for cod in CodonTable.unambiguous_dna_by_name[codonTable].stop_codons: | |
29 b_cod_table[cod] = "_Stop" | |
30 | |
31 for cod in CodonTable.unambiguous_dna_by_name[codonTable].start_codons: | |
32 b_cod_table[cod + " Start"] = b_cod_table[cod] | |
33 b_cod_table.pop(cod) | |
34 | |
35 aas = set(b_cod_table.values()) | |
36 | |
37 | |
38 for aa in aas: | |
39 codon_usage[aa] = {} | |
40 for codon in b_cod_table.keys(): | |
41 if b_cod_table[codon] == aa: | |
42 codon_usage[aa][codon] = tmp.count(codon.split(" ")[0]) | |
43 | |
44 | |
45 tups = {(outerKey, innerKey): values for outerKey, innerDict in codon_usage.iteritems() for innerKey, values in innerDict.iteritems()} | |
46 | |
47 #aas_ = set(tups.keys()) | |
48 | |
49 #stops_ = {el for el in aas_ if el[0] == "Stop"} | |
50 #aas_ = list(aas_.difference(stops_)) | |
51 #stops_ = list(stops_) | |
52 #aas_.sort() | |
53 #stops_.sort() | |
54 | |
55 codon_usage_ = pd.DataFrame(pd.Series(tups), columns = ["Count"]) | |
56 codon_usage_.index = codon_usage_.index.set_names(["AA", "Codon"]) | |
57 #codon_usage_.index.reindex(pd.MultiIndex.from_tuples([aas_, stops_], names=('AA', 'Codon')), level=[0,1]) | |
58 | |
59 | |
60 codon_usage_['Proportion'] = codon_usage_.groupby(level=0).transform(lambda x: (x / x.sum()).round(2)) | |
61 | |
62 return {"Dictionary": codon_usage, "Tuples": tups, "Table": codon_usage_} | |
63 | |
64 | |
65 | |
66 if __name__ == '__main__': | |
67 | |
68 | |
69 seqs = read_input(data=sys.argv[1]) | |
70 out = codon_usage(seqs,"Bacterial") | |
71 | |
72 | |
73 with open(sys.argv[2], "w") as outf: | |
74 out["Table"].to_csv(outf, sep="\t") | |
75 #sys.stdout.write(out['Table']) |