comparison PDAUG_Peptide_Ngrams/PDAUG_Peptide_Ngrams.py @ 0:e59674e3a391 draft

"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 6f53ad797ec1af02b41510063a86bec7d121abf3"
author jay
date Fri, 20 Nov 2020 19:47:44 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:e59674e3a391
1 import matplotlib
2 matplotlib.use('Agg')
3 import os
4 import sys
5 sys.path.insert(0, os.path.abspath('..'))
6 import quantiprot
7 from quantiprot.utils.io import load_fasta_file
8 from quantiprot.utils.feature import Feature, FeatureSet
9 from quantiprot.metrics.aaindex import get_aa2hydropathy
10 from quantiprot.metrics.basic import identity
11 from quantiprot.metrics.ngram import pattern_match, pattern_count
12 from quantiprot.analysis.ngram import ngram_count
13 from quantiprot.analysis.ngram import zipf_law_fit
14 from matplotlib import pyplot as plt
15
16
17 def Run_ngrams(fasta1, fasta2, OutFile ):
18
19 alphasyn_seq = load_fasta_file(fasta1)
20 amyload_pos_seq = load_fasta_file(fasta2)
21
22 fs_aa = FeatureSet("aa patterns")
23 fs_aa.add(identity)
24 fs_aa.add(pattern_match, pattern='VT', padded=True)
25 fs_aa.add(pattern_count, pattern='VT')
26
27 result_seq = fs_aa(alphasyn_seq)
28
29 fs_hp = FeatureSet("hydropathy patterns")
30 fs_hp.add(Feature(get_aa2hydropathy()))
31 fs_hp.add(Feature(get_aa2hydropathy()).then(pattern_match, pattern=[0.0, 2.0],
32 metric='taxi', radius=1.0))
33 result_seq2 = fs_hp(alphasyn_seq)
34 result_freq = ngram_count(alphasyn_seq, n=2)
35 result_fit = zipf_law_fit(amyload_pos_seq, n=3, verbose=True)
36
37 counts = sorted(result_fit["ngram_counts"], reverse=True)
38 ranks = range(1, len(counts)+1)
39
40 slope = result_fit["slope"]
41 harmonic_num = sum([rank**-slope for rank in ranks])
42 fitted_counts = [(rank**-slope) / harmonic_num * sum(counts) for rank in ranks]
43
44 plt.plot(ranks, counts, 'k', label="empirical")
45 plt.plot(ranks, fitted_counts, 'k--',
46 label="Zipf's law\nslope: {:.2f}".format((slope)))
47 plt.xlabel('rank')
48 plt.ylabel('count')
49 plt.xscale('log')
50 plt.yscale('log')
51 plt.legend()
52
53 plt.savefig(OutFile)
54
55 if __name__=="__main__":
56
57
58 import argparse
59
60 parser = argparse.ArgumentParser()
61
62 parser.add_argument("-f1", "--Fasta1",
63 required=True,
64 default=None,
65 help="First fasta file")
66
67 parser.add_argument("-f2", "--Fasta2",
68 required=True,
69 default=None,
70 help="Second fasta file")
71
72
73 parser.add_argument("--OutFile",
74 required=True,
75 help="HTML out file",
76 default="report.html")
77
78
79 args = parser.parse_args()
80
81 Run_ngrams(args.Fasta1, args.Fasta2, args.OutFile)
82