Mercurial > repos > cpt > cpt_helical_wheel
annotate plotWheels/descriptors.py @ 1:9b276485c94a draft
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
author | cpt |
---|---|
date | Mon, 05 Jun 2023 02:44:43 +0000 |
parents | |
children |
rev | line source |
---|---|
1
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1 # -*- coding: utf-8 -*- |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
2 """ |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
3 .. currentmodule:: modlamp.descriptors |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
4 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
5 .. moduleauthor:: modlab Alex Mueller ETH Zurich <alex.mueller@pharma.ethz.ch> |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
6 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
7 This module incorporates different classes to calculate peptide descriptor values. The following classes are available: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
8 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
9 ============================= ============================================================================ |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
10 Class Characteristics |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
11 ============================= ============================================================================ |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
12 :py:class:`GlobalDescriptor` Global one-dimensional peptide descriptors calculated from the AA sequence. |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
13 :py:class:`PeptideDescriptor` AA scale based global or convoluted descriptors (auto-/cross-correlated). |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
14 ============================= ============================================================================ |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
15 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
16 .. seealso:: :class:`modlamp.core.BaseDescriptor` from which the classes in :mod:`modlamp.descriptors` inherit. |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
17 """ |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
18 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
19 import sys |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
20 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
21 import numpy as np |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
22 from scipy import stats |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
23 from sklearn.externals.joblib import Parallel, delayed |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
24 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
25 from plotWheels.core import ( |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
26 BaseDescriptor, |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
27 load_scale, |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
28 count_aas, |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
29 aa_weights, |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
30 aa_energies, |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
31 aa_formulas, |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
32 ) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
33 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
34 __author__ = "Alex Müller, Gisela Gabernet" |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
35 __docformat__ = "restructuredtext en" |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
36 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
37 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
38 def _one_autocorr(seq, window, scale): |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
39 """Private function used for calculating auto-correlated descriptors for 1 given sequence, window and an AA scale. |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
40 This function is used by the :py:func:`calculate_autocorr` method of :py:class:`PeptideDescriptor`. |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
41 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
42 :param seq: {str} amino acid sequence to calculate descriptor for |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
43 :param window: {int} correlation-window size |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
44 :param scale: {str} amino acid scale to be used to calculate descriptor |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
45 :return: {numpy.array} calculated descriptor data |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
46 """ |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
47 try: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
48 m = list() # list of lists to store translated sequence values |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
49 for l in range(len(seq)): # translate AA sequence into values |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
50 m.append(scale[str(seq[l])]) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
51 # auto-correlation in defined sequence window |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
52 seqdesc = list() |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
53 for dist in range(window): # for all correlation distances |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
54 for val in range( |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
55 len(scale["A"]) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
56 ): # for all features of the descriptor scale |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
57 valsum = list() |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
58 cntr = 0.0 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
59 for pos in range(len(seq)): # for every position in the sequence |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
60 if (pos + dist) < len( |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
61 seq |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
62 ): # check if corr distance is possible at that sequence position |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
63 cntr += 1 # counter to scale sum |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
64 valsum.append(m[pos][val] * m[pos + dist][val]) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
65 seqdesc.append( |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
66 sum(valsum) / cntr |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
67 ) # append scaled correlation distance values |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
68 return seqdesc |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
69 except ZeroDivisionError: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
70 print( |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
71 "ERROR!\nThe chosen correlation window % i is larger than the sequence %s !" |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
72 % (window, seq) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
73 ) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
74 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
75 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
76 def _one_crosscorr(seq, window, scale): |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
77 """Private function used for calculating cross-correlated descriptors for 1 given sequence, window and an AA scale. |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
78 This function is used by the :py:func:`calculate_crosscorr` method of :py:class:`PeptideDescriptor`. |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
79 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
80 :param seq: {str} amino acid sequence to calculate descriptor for |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
81 :param window: {int} correlation-window size |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
82 :param scale: {str} amino acid scale to be used to calculate descriptor |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
83 :return: {numpy.array} calculated descriptor data |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
84 """ |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
85 try: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
86 m = list() # list of lists to store translated sequence values |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
87 for l in range(len(seq)): # translate AA sequence into values |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
88 m.append(scale[str(seq[l])]) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
89 # auto-correlation in defined sequence window |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
90 seqdesc = list() |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
91 for val in range(len(scale["A"])): # for all features of the descriptor scale |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
92 for cc in range(len(scale["A"])): # for every feature cross correlation |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
93 if (val + cc) < len( |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
94 scale["A"] |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
95 ): # check if corr distance is in range of the num of features |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
96 for dist in range(window): # for all correlation distances |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
97 cntr = float() |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
98 valsum = list() |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
99 for pos in range( |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
100 len(seq) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
101 ): # for every position in the sequence |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
102 if (pos + dist) < len( |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
103 seq |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
104 ): # check if corr distance is possible at that sequence pos |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
105 cntr += 1 # counter to scale sum |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
106 valsum.append(m[pos][val] * m[pos + dist][val + cc]) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
107 seqdesc.append( |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
108 sum(valsum) / cntr |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
109 ) # append scaled correlation distance values |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
110 return seqdesc |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
111 except ZeroDivisionError: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
112 print( |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
113 "ERROR!\nThe chosen correlation window % i is larger than the sequence %s !" |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
114 % (window, seq) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
115 ) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
116 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
117 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
118 def _one_arc(seq, modality, scale): |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
119 """Privat function used for calculating arc descriptors for one sequence and AA scale. This function is used by |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
120 :py:func:`calculate_arc` method method of :py:class:`PeptideDescriptor`. |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
121 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
122 :param seq: {str} amino acid sequence to calculate descriptor for |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
123 :param scale: {str} amino acid scale to be used to calculate descriptor |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
124 :return: {numpy.array} calculated descriptor data |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
125 """ |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
126 desc_mat = [] |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
127 for aa in seq: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
128 desc_mat.append(scale[aa]) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
129 desc_mat = np.asarray(desc_mat) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
130 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
131 # Check descriptor dimension |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
132 desc_dim = desc_mat.shape[1] |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
133 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
134 # list to store descriptor values for all windows |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
135 allwindows_arc = [] |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
136 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
137 if len(seq) > 18: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
138 window = 18 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
139 # calculates number of windows in sequence |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
140 num_windows = len(seq) - window |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
141 else: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
142 window = len(seq) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
143 num_windows = 1 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
144 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
145 # loop through all windows |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
146 for j in range(num_windows): |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
147 # slices descriptor matrix into current window |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
148 window_mat = desc_mat[j : j + window, :] |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
149 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
150 # defines order of amino acids in helical projection |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
151 order = [0, 11, 4, 15, 8, 1, 12, 5, 16, 9, 2, 13, 6, 17, 10, 3, 14, 7] |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
152 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
153 # orders window descriptor matrix into helical projection order |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
154 ordered = [] |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
155 for pos in order: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
156 try: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
157 ordered.append(window_mat[pos, :]) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
158 except: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
159 # for sequences of len < 18 adding dummy vector with 2s, length of descriptor dimensions |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
160 ordered.append([2] * desc_dim) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
161 ordered = np.asarray(ordered) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
162 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
163 window_arc = [] |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
164 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
165 # loop through pharmacophoric features |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
166 for m in range(desc_dim): |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
167 all_arcs = ( |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
168 [] |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
169 ) # stores all arcs that can be found of a pharmacophoric feature |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
170 arc = 0 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
171 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
172 for n in range( |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
173 18 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
174 ): # for all positions in helix, regardless of sequence length |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
175 if ( |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
176 ordered[n, m] == 0 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
177 ): # if position does not contain pharmacophoric feature |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
178 all_arcs.append(arc) # append previous arc to all arcs list |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
179 arc = 0 # arc is initialized |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
180 elif ( |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
181 ordered[n, m] == 1 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
182 ): # if position contains pharmacophoric feature(PF), elongate arc by 20° |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
183 arc += 20 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
184 elif ordered[n, m] == 2: # if position doesn't contain amino acid: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
185 if ( |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
186 ordered[n - 1, m] == 1 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
187 ): # if previous position contained PF add 10° |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
188 arc += 10 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
189 elif ( |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
190 ordered[n - 1, m] == 0 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
191 ): # if previous position didn't contain PF don't add anything |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
192 arc += 0 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
193 elif ( |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
194 ordered[n - 2, m] == 1 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
195 ): # if previous position is empty then check second previous for PF |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
196 arc += 10 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
197 if ( |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
198 n == 17 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
199 ): # if we are at the last position check for position n=0 instead of next position. |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
200 if ordered[0, m] == 1: # if it contains PF add 10° extra |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
201 arc += 10 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
202 else: # if next position contains PF add 10° extra |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
203 if ordered[n + 1, m] == 1: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
204 arc += 10 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
205 elif ordered[n + 1, m] == 0: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
206 arc += 0 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
207 else: # if next position is empty check for 2nd next position |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
208 if n == 16: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
209 if ordered[0, m] == 1: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
210 arc += 10 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
211 else: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
212 if ordered[n + 2, m] == 1: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
213 arc += 10 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
214 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
215 all_arcs.append(arc) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
216 if not arc == 360: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
217 arc0 = all_arcs.pop() + all_arcs[0] # join first and last arc together |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
218 all_arcs = [arc0] + all_arcs[1:] |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
219 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
220 window_arc.append( |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
221 np.max(all_arcs) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
222 ) # append to window arcs the maximum arc of this PF |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
223 allwindows_arc.append(window_arc) # append all PF arcs of this window |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
224 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
225 allwindows_arc = np.asarray(allwindows_arc) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
226 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
227 if modality == "max": |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
228 final_arc = np.max( |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
229 allwindows_arc, axis=0 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
230 ) # calculate maximum / mean arc along all windows |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
231 elif modality == "mean": |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
232 final_arc = np.mean(allwindows_arc, axis=0) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
233 else: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
234 print('modality is unknown, please choose between "max" and "mean"\n.') |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
235 sys.exit() |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
236 return final_arc |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
237 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
238 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
239 def _charge(seq, ph=7.0, amide=False): |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
240 """Calculates charge of a single sequence. The method used is first described by Bjellqvist. In the case of |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
241 amidation, the value for the 'Cterm' pKa is 15 (and Cterm is added to the pos_pks dictionary. |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
242 The pKa scale is extracted from: http://www.hbcpnetbase.com/ (CRC Handbook of Chemistry and Physics, 96th ed). |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
243 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
244 **pos_pks** = {'Nterm': 9.38, 'K': 10.67, 'R': 12.10, 'H': 6.04} |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
245 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
246 **neg_pks** = {'Cterm': 2.15, 'D': 3.71, 'E': 4.15, 'C': 8.14, 'Y': 10.10} |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
247 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
248 :param ph: {float} pH at which to calculate peptide charge. |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
249 :param amide: {boolean} whether the sequences have an amidated C-terminus. |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
250 :return: {array} descriptor values in the attribute :py:attr:`descriptor |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
251 """ |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
252 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
253 if amide: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
254 pos_pks = {"Nterm": 9.38, "K": 10.67, "R": 12.10, "H": 6.04} |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
255 neg_pks = {"Cterm": 15.0, "D": 3.71, "E": 4.15, "C": 8.14, "Y": 10.10} |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
256 else: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
257 pos_pks = {"Nterm": 9.38, "K": 10.67, "R": 12.10, "H": 6.04} |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
258 neg_pks = {"Cterm": 2.15, "D": 3.71, "E": 4.15, "C": 8.14, "Y": 10.10} |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
259 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
260 aa_content = count_aas(seq, scale="absolute") |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
261 aa_content["Nterm"] = 1.0 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
262 aa_content["Cterm"] = 1.0 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
263 pos_charge = 0.0 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
264 for aa, pK in pos_pks.items(): |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
265 c_r = 10 ** (pK - ph) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
266 partial_charge = c_r / (c_r + 1.0) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
267 pos_charge += aa_content[aa] * partial_charge |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
268 neg_charge = 0.0 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
269 for aa, pK in neg_pks.items(): |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
270 c_r = 10 ** (ph - pK) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
271 partial_charge = c_r / (c_r + 1.0) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
272 neg_charge += aa_content[aa] * partial_charge |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
273 return round(pos_charge - neg_charge, 3) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
274 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
275 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
276 class GlobalDescriptor(BaseDescriptor): |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
277 """ |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
278 Base class for global, non-amino acid scale dependant descriptors. The following descriptors can be calculated by |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
279 the **methods** linked below: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
280 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
281 - `Sequence Length <modlamp.html#modlamp.descriptors.GlobalDescriptor.length>`_ |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
282 - `Molecular Formula <modlamp.html#modlamp.descriptors.GlobalDescriptor.formula>`_ |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
283 - `Molecular Weight <modlamp.html#modlamp.descriptors.GlobalDescriptor.calculate_MW>`_ |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
284 - `Sequence Charge <modlamp.html#modlamp.descriptors.GlobalDescriptor.calculate_charge>`_ |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
285 - `Charge Density <modlamp.html#modlamp.descriptors.GlobalDescriptor.charge_density>`_ |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
286 - `Isoelectric Point <modlamp.html#modlamp.descriptors.GlobalDescriptor.isoelectric_point>`_ |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
287 - `Instability Index <modlamp.html#modlamp.descriptors.GlobalDescriptor.instability_index>`_ |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
288 - `Aromaticity <modlamp.html#modlamp.descriptors.GlobalDescriptor.aromaticity>`_ |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
289 - `Aliphatic Index <modlamp.html#modlamp.descriptors.GlobalDescriptor.aliphatic_index>`_ |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
290 - `Boman Index <modlamp.html#modlamp.descriptors.GlobalDescriptor.boman_index>`_ |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
291 - `Hydrophobic Ratio <modlamp.html#modlamp.descriptors.GlobalDescriptor.hydrophobic_ratio>`_ |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
292 - `all of the above <modlamp.html#modlamp.descriptors.GlobalDescriptor.calculate_all>`_ |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
293 """ |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
294 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
295 def length(self, append=False): |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
296 """ |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
297 Method to calculate the length (total AA count) of every sequence in the attribute :py:attr:`sequences`. |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
298 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
299 :param append: {boolean} whether the produced descriptor values should be appended to the existing ones in the |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
300 attribute :py:attr:`descriptor`. |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
301 :return: array of sequence lengths in the attribute :py:attr:`descriptor` |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
302 :Example: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
303 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
304 >>> desc = GlobalDescriptor(['AFDGHLKI','KKLQRSDLLRTK','KKLASCNNIPPR']) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
305 >>> desc.length() |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
306 >>> desc.descriptor |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
307 array([[ 8.], [12.], [12.]]) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
308 """ |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
309 desc = [] |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
310 for seq in self.sequences: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
311 desc.append(float(len(seq.strip()))) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
312 desc = np.asarray(desc).reshape(len(desc), 1) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
313 if append: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
314 self.descriptor = np.hstack((self.descriptor, np.array(desc))) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
315 self.featurenames.append("Length") |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
316 else: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
317 self.descriptor = np.array(desc) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
318 self.featurenames = ["Length"] |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
319 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
320 def formula(self, amide=False, append=False): |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
321 """Method to calculate the molecular formula of every sequence in the attribute :py:attr:`sequences`. |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
322 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
323 :param amide: {boolean} whether the sequences are C-terminally amidated. |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
324 :param append: {boolean} whether the produced descriptor values should be appended to the existing ones in the |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
325 attribute :py:attr:`descriptor`. |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
326 :return: array of molecular formulas {str} in the attribute :py:attr:`descriptor` |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
327 :Example: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
328 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
329 >>> desc = GlobalDescriptor(['KADSFLSADGHSADFSLDKKLKERL', 'ERTILSDFPQWWFASLDFLNC', 'ACDEFGHIKLMNPQRSTVWY']) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
330 >>> desc.formula(amide=True) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
331 >>> for v in desc.descriptor: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
332 ... print(v[0]) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
333 C122 H197 N35 O39 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
334 C121 H168 N28 O33 S |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
335 C106 H157 N29 O30 S2 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
336 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
337 .. seealso:: :py:func:`modlamp.core.aa_formulas()` |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
338 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
339 .. versionadded:: v2.7.6 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
340 """ |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
341 desc = [] |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
342 formulas = aa_formulas() |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
343 for seq in self.sequences: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
344 f = {"C": 0, "H": 0, "N": 0, "O": 0, "S": 0} |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
345 for aa in seq: # loop over all AAs |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
346 for k in f.keys(): |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
347 f[k] += formulas[aa][k] |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
348 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
349 # substract H2O for every peptide bond |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
350 f["H"] -= 2 * (len(seq) - 1) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
351 f["O"] -= len(seq) - 1 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
352 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
353 if amide: # add C-terminal amide --> replace OH with NH2 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
354 f["O"] -= 1 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
355 f["H"] += 1 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
356 f["N"] += 1 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
357 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
358 if f["S"] != 0: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
359 val = "C%s H%s N%s O%s %s%s" % ( |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
360 f["C"], |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
361 f["H"], |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
362 f["N"], |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
363 f["O"], |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
364 "S", |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
365 f["S"], |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
366 ) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
367 else: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
368 val = "C%s H%s N%s O%s" % (f["C"], f["H"], f["N"], f["O"]) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
369 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
370 desc.append([val]) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
371 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
372 if append: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
373 self.descriptor = np.hstack((self.descriptor, np.array(desc))) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
374 self.featurenames.append("Formula") |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
375 else: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
376 self.descriptor = np.array(desc) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
377 self.featurenames = ["Formula"] |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
378 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
379 def calculate_MW(self, amide=False, append=False): |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
380 """Method to calculate the molecular weight [g/mol] of every sequence in the attribute :py:attr:`sequences`. |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
381 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
382 :param amide: {boolean} whether the sequences are C-terminally amidated (subtracts 0.95 from the MW). |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
383 :param append: {boolean} whether the produced descriptor values should be appended to the existing ones in the |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
384 attribute :py:attr:`descriptor`. |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
385 :return: array of descriptor values in the attribute :py:attr:`descriptor` |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
386 :Example: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
387 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
388 >>> desc = GlobalDescriptor('IAESFKGHIPL') |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
389 >>> desc.calculate_MW(amide=True) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
390 >>> desc.descriptor |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
391 array([[ 1210.43]]) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
392 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
393 .. seealso:: :py:func:`modlamp.core.aa_weights()` |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
394 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
395 .. versionchanged:: v2.1.5 amide option added |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
396 """ |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
397 desc = [] |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
398 weights = aa_weights() |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
399 for seq in self.sequences: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
400 mw = [] |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
401 for aa in seq: # sum over aa weights |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
402 mw.append(weights[aa]) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
403 desc.append( |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
404 round(sum(mw) - 18.015 * (len(seq) - 1), 2) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
405 ) # sum over AA MW and subtract H20 MW for every |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
406 # peptide bond |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
407 desc = np.asarray(desc).reshape(len(desc), 1) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
408 if ( |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
409 amide |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
410 ): # if sequences are amidated, subtract 0.98 from calculated MW (OH - NH2) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
411 desc = [d - 0.98 for d in desc] |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
412 if append: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
413 self.descriptor = np.hstack((self.descriptor, np.array(desc))) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
414 self.featurenames.append("MW") |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
415 else: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
416 self.descriptor = np.array(desc) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
417 self.featurenames = ["MW"] |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
418 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
419 def calculate_charge(self, ph=7.0, amide=False, append=False): |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
420 """Method to overall charge of every sequence in the attribute :py:attr:`sequences`. |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
421 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
422 The method used is first described by Bjellqvist. In the case of amidation, the value for the 'Cterm' pKa is 15 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
423 (and Cterm is added to the pos_pKs dictionary. |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
424 The pKa scale is extracted from: http://www.hbcpnetbase.com/ (CRC Handbook of Chemistry and Physics, 96th ed). |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
425 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
426 **pos_pKs** = {'Nterm': 9.38, 'K': 10.67, 'R': 12.10, 'H': 6.04} |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
427 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
428 **neg_pKs** = {'Cterm': 2.15, 'D': 3.71, 'E': 4.15, 'C': 8.14, 'Y': 10.10} |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
429 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
430 :param ph: {float} ph at which to calculate peptide charge. |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
431 :param amide: {boolean} whether the sequences have an amidated C-terminus. |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
432 :param append: {boolean} whether the produced descriptor values should be appended to the existing ones in the |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
433 attribute :py:attr:`descriptor`. |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
434 :return: array of descriptor values in the attribute :py:attr:`descriptor` |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
435 :Example: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
436 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
437 >>> desc = GlobalDescriptor('KLAKFGKRSELVALSG') |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
438 >>> desc.calculate_charge(ph=7.4, amide=True) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
439 >>> desc.descriptor |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
440 array([[ 3.989]]) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
441 """ |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
442 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
443 desc = [] |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
444 for seq in self.sequences: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
445 desc.append( |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
446 _charge(seq, ph, amide) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
447 ) # calculate charge with helper function |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
448 desc = np.asarray(desc).reshape(len(desc), 1) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
449 if append: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
450 self.descriptor = np.hstack((self.descriptor, np.array(desc))) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
451 self.featurenames.append("Charge") |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
452 else: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
453 self.descriptor = np.array(desc) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
454 self.featurenames = ["Charge"] |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
455 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
456 def charge_density(self, ph=7.0, amide=False, append=False): |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
457 """Method to calculate the charge density (charge / MW) of every sequences in the attributes :py:attr:`sequences` |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
458 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
459 :param ph: {float} pH at which to calculate peptide charge. |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
460 :param amide: {boolean} whether the sequences have an amidated C-terminus. |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
461 :param append: {boolean} whether the produced descriptor values should be appended to the existing ones in the |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
462 attribute :py:attr:`descriptor`. |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
463 :return: array of descriptor values in the attribute :py:attr:`descriptor`. |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
464 :Example: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
465 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
466 >>> desc = GlobalDescriptor('GNSDLLIEQRTLLASDEF') |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
467 >>> desc.charge_density(ph=6, amide=True) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
468 >>> desc.descriptor |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
469 array([[-0.00097119]]) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
470 """ |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
471 self.calculate_charge(ph, amide) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
472 charges = self.descriptor |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
473 self.calculate_MW(amide) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
474 masses = self.descriptor |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
475 desc = charges / masses |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
476 desc = np.asarray(desc).reshape(len(desc), 1) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
477 if append: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
478 self.descriptor = np.hstack((self.descriptor, np.array(desc))) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
479 self.featurenames.append("ChargeDensity") |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
480 else: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
481 self.descriptor = np.array(desc) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
482 self.featurenames = ["ChargeDensity"] |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
483 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
484 def isoelectric_point(self, amide=False, append=False): |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
485 """ |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
486 Method to calculate the isoelectric point of every sequence in the attribute :py:attr:`sequences`. |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
487 The pK scale is extracted from: http://www.hbcpnetbase.com/ (CRC Handbook of Chemistry and Physics, 96th ed). |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
488 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
489 **pos_pKs** = {'Nterm': 9.38, 'K': 10.67, 'R': 12.10, 'H': 6.04} |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
490 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
491 **neg_pKs** = {'Cterm': 2.15, 'D': 3.71, 'E': 4.15, 'C': 8.14, 'Y': 10.10} |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
492 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
493 :param amide: {boolean} whether the sequences have an amidated C-terminus. |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
494 :param append: {boolean} whether the produced descriptor values should be appended to the existing ones in the |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
495 attribute :py:attr:`descriptor`. |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
496 :return: array of descriptor values in the attribute :py:attr:`descriptor` |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
497 :Example: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
498 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
499 >>> desc = GlobalDescriptor('KLFDIKFGHIPQRST') |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
500 >>> desc.isoelectric_point() |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
501 >>> desc.descriptor |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
502 array([[ 10.6796875]]) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
503 """ |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
504 ph, ph1, ph2 = float(), float(), float() |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
505 desc = [] |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
506 for seq in self.sequences: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
507 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
508 # Bracket between ph1 and ph2 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
509 ph = 7.0 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
510 charge = _charge(seq, ph, amide) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
511 if charge > 0.0: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
512 ph1 = ph |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
513 charge1 = charge |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
514 while charge1 > 0.0: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
515 ph = ph1 + 1.0 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
516 charge = _charge(seq, ph, amide) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
517 if charge > 0.0: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
518 ph1 = ph |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
519 charge1 = charge |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
520 else: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
521 ph2 = ph |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
522 break |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
523 else: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
524 ph2 = ph |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
525 charge2 = charge |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
526 while charge2 < 0.0: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
527 ph = ph2 - 1.0 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
528 charge = _charge(seq, ph, amide) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
529 if charge < 0.0: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
530 ph2 = ph |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
531 charge2 = charge |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
532 else: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
533 ph1 = ph |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
534 break |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
535 # Bisection |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
536 while ph2 - ph1 > 0.0001 and charge != 0.0: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
537 ph = (ph1 + ph2) / 2.0 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
538 charge = _charge(seq, ph, amide) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
539 if charge > 0.0: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
540 ph1 = ph |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
541 else: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
542 ph2 = ph |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
543 desc.append(ph) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
544 desc = np.asarray(desc).reshape(len(desc), 1) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
545 if append: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
546 self.descriptor = np.hstack((self.descriptor, np.array(desc))) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
547 self.featurenames.append("pI") |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
548 else: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
549 self.descriptor = np.array(desc) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
550 self.featurenames = ["pI"] |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
551 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
552 def instability_index(self, append=False): |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
553 """ |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
554 Method to calculate the instability of every sequence in the attribute :py:attr:`sequences`. |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
555 The instability index is a prediction of protein stability based on the amino acid composition. |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
556 ([1] K. Guruprasad, B. V Reddy, M. W. Pandit, Protein Eng. 1990, 4, 155–161.) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
557 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
558 :param append: {boolean} whether the produced descriptor values should be appended to the existing ones in the |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
559 attribute :py:attr:`descriptor`. |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
560 :return: array of descriptor values in the attribute :py:attr:`descriptor` |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
561 :Example: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
562 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
563 >>> desc = GlobalDescriptor('LLASMNDLLAKRST') |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
564 >>> desc.instability_index() |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
565 >>> desc.descriptor |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
566 array([[ 63.95714286]]) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
567 """ |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
568 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
569 desc = [] |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
570 dimv = load_scale("instability")[1] |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
571 for seq in self.sequences: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
572 stabindex = float() |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
573 for i in range(len(seq) - 1): |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
574 stabindex += dimv[seq[i]][seq[i + 1]] |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
575 desc.append((10.0 / len(seq)) * stabindex) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
576 desc = np.asarray(desc).reshape(len(desc), 1) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
577 if append: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
578 self.descriptor = np.hstack((self.descriptor, np.array(desc))) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
579 self.featurenames.append("InstabilityInd") |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
580 else: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
581 self.descriptor = np.array(desc) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
582 self.featurenames = ["InstabilityInd"] |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
583 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
584 def aromaticity(self, append=False): |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
585 """ |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
586 Method to calculate the aromaticity of every sequence in the attribute :py:attr:`sequences`. |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
587 According to Lobry, 1994, it is simply the relative frequency of Phe+Trp+Tyr. |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
588 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
589 :param append: {boolean} whether the produced descriptor values should be appended to the existing ones in the |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
590 attribute :py:attr:`descriptor`. |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
591 :return: array of descriptor values in the attribute :py:attr:`descriptor` |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
592 :Example: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
593 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
594 >>> desc = GlobalDescriptor('GLFYWRFFLQRRFLYWW') |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
595 >>> desc.aromaticity() |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
596 >>> desc.descriptor |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
597 array([[ 0.52941176]]) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
598 """ |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
599 desc = [] |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
600 for seq in self.sequences: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
601 f = seq.count("F") |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
602 w = seq.count("W") |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
603 y = seq.count("Y") |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
604 desc.append(float(f + w + y) / len(seq)) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
605 desc = np.asarray(desc).reshape(len(desc), 1) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
606 if append: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
607 self.descriptor = np.hstack((self.descriptor, np.array(desc))) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
608 self.featurenames.append("Aromaticity") |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
609 else: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
610 self.descriptor = np.array(desc) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
611 self.featurenames = ["Aromaticity"] |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
612 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
613 def aliphatic_index(self, append=False): |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
614 """ |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
615 Method to calculate the aliphatic index of every sequence in the attribute :py:attr:`sequences`. |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
616 According to Ikai, 1980, the aliphatic index is a measure of thermal stability of proteins and is dependant |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
617 on the relative volume occupied by aliphatic amino acids (A,I,L & V). |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
618 ([1] A. Ikai, J. Biochem. 1980, 88, 1895–1898.) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
619 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
620 :param append: {boolean} whether the produced descriptor values should be appended to the existing ones in the |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
621 attribute :py:attr:`descriptor`. |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
622 :return: array of descriptor values in the attribute :py:attr:`descriptor` |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
623 :Example: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
624 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
625 >>> desc = GlobalDescriptor('KWLKYLKKLAKLVK') |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
626 >>> desc.aliphatic_index() |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
627 >>> desc.descriptor |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
628 array([[ 139.28571429]]) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
629 """ |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
630 desc = [] |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
631 aa_dict = aa_weights() |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
632 for seq in self.sequences: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
633 d = {aa: seq.count(aa) for aa in aa_dict.keys()} # count aa |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
634 d = { |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
635 k: (float(d[k]) / len(seq)) * 100 for k in d.keys() |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
636 } # get mole percent of all AA |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
637 desc.append( |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
638 d["A"] + 2.9 * d["V"] + 3.9 * (d["I"] + d["L"]) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
639 ) # formula for calculating the AI (Ikai, 1980) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
640 desc = np.asarray(desc).reshape(len(desc), 1) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
641 if append: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
642 self.descriptor = np.hstack((self.descriptor, np.array(desc))) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
643 self.featurenames.append("AliphaticInd") |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
644 else: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
645 self.descriptor = np.array(desc) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
646 self.featurenames = ["AliphaticInd"] |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
647 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
648 def boman_index(self, append=False): |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
649 """Method to calculate the boman index of every sequence in the attribute :py:attr:`sequences`. |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
650 According to Boman, 2003, the boman index is a measure for protein-protein interactions and is calculated by |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
651 summing over all amino acid free energy of transfer [kcal/mol] between water and cyclohexane,[2] followed by |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
652 dividing by sequence length. |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
653 ([1] H. G. Boman, D. Wade, I. a Boman, B. Wåhlin, R. B. Merrifield, *FEBS Lett*. **1989**, *259*, 103–106. |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
654 [2] A. Radzick, R. Wolfenden, *Biochemistry* **1988**, *27*, 1664–1670.) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
655 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
656 .. seealso:: :py:func:`modlamp.core.aa_energies()` |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
657 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
658 :param append: {boolean} whether the produced descriptor values should be appended to the existing ones in the |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
659 attribute :py:attr:`descriptor`. |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
660 :return: array of descriptor values in the attribute :py:attr:`descriptor` |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
661 :Example: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
662 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
663 >>> desc = GlobalDescriptor('GLFDIVKKVVGALGSL') |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
664 >>> desc.boman_index() |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
665 >>> desc.descriptor |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
666 array([[-1.011875]]) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
667 """ |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
668 d = aa_energies() |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
669 desc = [] |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
670 for seq in self.sequences: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
671 val = [] |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
672 for a in seq: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
673 val.append(d[a]) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
674 desc.append(sum(val) / len(val)) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
675 desc = np.asarray(desc).reshape(len(desc), 1) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
676 if append: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
677 self.descriptor = np.hstack((self.descriptor, np.array(desc))) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
678 self.featurenames.append("BomanInd") |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
679 else: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
680 self.descriptor = np.array(desc) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
681 self.featurenames = ["BomanInd"] |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
682 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
683 def hydrophobic_ratio(self, append=False): |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
684 """ |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
685 Method to calculate the hydrophobic ratio of every sequence in the attribute :py:attr:`sequences`, which is the |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
686 relative frequency of the amino acids **A,C,F,I,L,M & V**. |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
687 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
688 :param append: {boolean} whether the produced descriptor values should be appended to the existing ones in the |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
689 attribute :py:attr:`descriptor`. |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
690 :return: array of descriptor values in the attribute :py:attr:`descriptor` |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
691 :Example: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
692 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
693 >>> desc = GlobalDescriptor('VALLYWRTVLLAIII') |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
694 >>> desc.hydrophobic_ratio() |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
695 >>> desc.descriptor |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
696 array([[ 0.73333333]]) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
697 """ |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
698 desc = [] |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
699 aa_dict = aa_weights() |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
700 for seq in self.sequences: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
701 pa = {aa: seq.count(aa) for aa in aa_dict.keys()} # count aa |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
702 # formula for calculating the AI (Ikai, 1980): |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
703 desc.append( |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
704 (pa["A"] + pa["C"] + pa["F"] + pa["I"] + pa["L"] + pa["M"] + pa["V"]) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
705 / float(len(seq)) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
706 ) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
707 desc = np.asarray(desc).reshape(len(desc), 1) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
708 if append: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
709 self.descriptor = np.hstack((self.descriptor, np.array(desc))) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
710 self.featurenames.append("HydrophRatio") |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
711 else: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
712 self.descriptor = np.array(desc) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
713 self.featurenames = ["HydrophRatio"] |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
714 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
715 def calculate_all(self, ph=7.4, amide=True): |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
716 """Method combining all global descriptors and appending them into the feature matrix in the attribute |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
717 :py:attr:`descriptor`. |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
718 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
719 :param ph: {float} pH at which to calculate peptide charge |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
720 :param amide: {boolean} whether the sequences have an amidated C-terminus. |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
721 :return: array of descriptor values in the attribute :py:attr:`descriptor` |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
722 :Example: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
723 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
724 >>> desc = GlobalDescriptor('AFGHFKLKKLFIFGHERT') |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
725 >>> desc.calculate_all(amide=True) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
726 >>> desc.featurenames |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
727 ['Length', 'MW', 'ChargeDensity', 'pI', 'InstabilityInd', 'Aromaticity', 'AliphaticInd', 'BomanInd', 'HydRatio'] |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
728 >>> desc.descriptor |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
729 array([[ 18., 2.17559000e+03, 1.87167619e-03, 1.16757812e+01, ... 1.10555556e+00, 4.44444444e-01]]) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
730 >>> desc.save_descriptor('/path/to/outputfile.csv') # save the descriptor data (with feature names header) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
731 """ |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
732 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
733 # This is a strange way of doing it. However, the append=True option excludes length and charge, no idea why! |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
734 fn = [] |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
735 self.length() # sequence length |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
736 l = self.descriptor |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
737 fn.extend(self.featurenames) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
738 self.calculate_MW(amide=amide) # molecular weight |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
739 mw = self.descriptor |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
740 fn.extend(self.featurenames) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
741 self.calculate_charge(ph=ph, amide=amide) # net charge |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
742 c = self.descriptor |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
743 fn.extend(self.featurenames) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
744 self.charge_density(ph=ph, amide=amide) # charge density |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
745 cd = self.descriptor |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
746 fn.extend(self.featurenames) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
747 self.isoelectric_point(amide=amide) # pI |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
748 pi = self.descriptor |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
749 fn.extend(self.featurenames) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
750 self.instability_index() # instability index |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
751 si = self.descriptor |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
752 fn.extend(self.featurenames) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
753 self.aromaticity() # global aromaticity |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
754 ar = self.descriptor |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
755 fn.extend(self.featurenames) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
756 self.aliphatic_index() # aliphatic index |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
757 ai = self.descriptor |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
758 fn.extend(self.featurenames) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
759 self.boman_index() # Boman index |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
760 bi = self.descriptor |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
761 fn.extend(self.featurenames) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
762 self.hydrophobic_ratio() # Hydrophobic ratio |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
763 hr = self.descriptor |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
764 fn.extend(self.featurenames) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
765 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
766 self.descriptor = np.concatenate((l, mw, c, cd, pi, si, ar, ai, bi, hr), axis=1) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
767 self.featurenames = fn |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
768 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
769 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
770 class PeptideDescriptor(BaseDescriptor): |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
771 """Base class for peptide descriptors. The following **amino acid descriptor scales** are available for descriptor |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
772 calculation: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
773 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
774 - **AASI** (An amino acid selectivity index scale for helical antimicrobial peptides, *[1] D. Juretić, D. Vukicević, N. Ilić, N. Antcheva, A. Tossi, J. Chem. Inf. Model. 2009, 49, 2873–2882.*) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
775 - **ABHPRK** (modlabs inhouse physicochemical feature scale (Acidic, Basic, Hydrophobic, Polar, aRomatic, Kink-inducer) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
776 - **argos** (Argos hydrophobicity amino acid scale, *[2] Argos, P., Rao, J. K. M. & Hargrave, P. A., Eur. J. Biochem. 2005, 128, 565–575.*) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
777 - **bulkiness** (Amino acid side chain bulkiness scale, *[3] J. M. Zimmerman, N. Eliezer, R. Simha, J. Theor. Biol. 1968, 21, 170–201.*) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
778 - **charge_phys** (Amino acid charge at pH 7.0 - Hystidine charge +0.1.) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
779 - **charge_acid** (Amino acid charge at acidic pH - Hystidine charge +1.0.) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
780 - **cougar** (modlabs inhouse selection of global peptide descriptors) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
781 - **eisenberg** (the Eisenberg hydrophobicity consensus amino acid scale, *[4] D. Eisenberg, R. M. Weiss, T. C. Terwilliger, W. Wilcox, Faraday Symp. Chem. Soc. 1982, 17, 109.*) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
782 - **Ez** (potential that assesses energies of insertion of amino acid side chains into lipid bilayers, *[5] A. Senes, D. C. Chadi, P. B. Law, R. F. S. Walters, V. Nanda, W. F. DeGrado, J. Mol. Biol. 2007, 366, 436–448.*) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
783 - **flexibility** (amino acid side chain flexibilitiy scale, *[6] R. Bhaskaran, P. K. Ponnuswamy, Int. J. Pept. Protein Res. 1988, 32, 241–255.*) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
784 - **grantham** (amino acid side chain composition, polarity and molecular volume, *[8] Grantham, R. Science. 185, 862–864 (1974).*) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
785 - **gravy** (GRAVY hydrophobicity amino acid scale, *[9] J. Kyte, R. F. Doolittle, J. Mol. Biol. 1982, 157, 105–132.*) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
786 - **hopp-woods** (Hopp-Woods amino acid hydrophobicity scale,*[10] T. P. Hopp, K. R. Woods, Proc. Natl. Acad. Sci. 1981, 78, 3824–3828.*) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
787 - **ISAECI** (Isotropic Surface Area (ISA) and Electronic Charge Index (ECI) of amino acid side chains, *[11] E. R. Collantes, W. J. Dunn, J. Med. Chem. 1995, 38, 2705–2713.*) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
788 - **janin** (Janin hydrophobicity amino acid scale, *[12] J. L. Cornette, K. B. Cease, H. Margalit, J. L. Spouge, J. A. Berzofsky, C. DeLisi, J. Mol. Biol. 1987, 195, 659–685.*) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
789 - **kytedoolittle** (Kyte & Doolittle hydrophobicity amino acid scale, *[13] J. Kyte, R. F. Doolittle, J. Mol. Biol. 1982, 157, 105–132.*) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
790 - **levitt_alpha** (Levitt amino acid alpha-helix propensity scale, extracted from http://web.expasy.org/protscale. *[14] M. Levitt, Biochemistry 1978, 17, 4277-4285.*) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
791 - **MSS** (A graph-theoretical index that reflects topological shape and size of amino acid side chains, *[15] C. Raychaudhury, A. Banerjee, P. Bag, S. Roy, J. Chem. Inf. Comput. Sci. 1999, 39, 248–254.*) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
792 - **MSW** (Amino acid scale based on a PCA of the molecular surface based WHIM descriptor (MS-WHIM), extended to natural amino acids, *[16] A. Zaliani, E. Gancia, J. Chem. Inf. Comput. Sci 1999, 39, 525–533.*) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
793 - **pepArc** (modlabs pharmacophoric feature scale, dimensions are: hydrophobicity, polarity, positive charge, negative charge, proline.) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
794 - **pepcats** (modlabs pharmacophoric feature based PEPCATS scale, *[17] C. P. Koch, A. M. Perna, M. Pillong, N. K. Todoroff, P. Wrede, G. Folkers, J. A. Hiss, G. Schneider, PLoS Comput. Biol. 2013, 9, e1003088.*) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
795 - **polarity** (Amino acid polarity scale, *[18] J. M. Zimmerman, N. Eliezer, R. Simha, J. Theor. Biol. 1968, 21, 170–201.*) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
796 - **PPCALI** (modlabs inhouse scale derived from a PCA of 143 amino acid property scales, *[19] C. P. Koch, A. M. Perna, M. Pillong, N. K. Todoroff, P. Wrede, G. Folkers, J. A. Hiss, G. Schneider, PLoS Comput. Biol. 2013, 9, e1003088.*) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
797 - **refractivity** (Relative amino acid refractivity values, *[20] T. L. McMeekin, M. Wilensky, M. L. Groves, Biochem. Biophys. Res. Commun. 1962, 7, 151–156.*) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
798 - **t_scale** (A PCA derived scale based on amino acid side chain properties calculated with 6 different probes of the GRID program, *[21] M. Cocchi, E. Johansson, Quant. Struct. Act. Relationships 1993, 12, 1–8.*) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
799 - **TM_tend** (Amino acid transmembrane propensity scale, extracted from http://web.expasy.org/protscale, *[22] Zhao, G., London E. Protein Sci. 2006, 15, 1987-2001.*) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
800 - **z3** (The original three dimensional Z-scale, *[23] S. Hellberg, M. Sjöström, B. Skagerberg, S. Wold, J. Med. Chem. 1987, 30, 1126–1135.*) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
801 - **z5** (The extended five dimensional Z-scale, *[24] M. Sandberg, L. Eriksson, J. Jonsson, M. Sjöström, S. Wold, J. Med. Chem. 1998, 41, 2481–2491.*) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
802 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
803 Further, amino acid scale independent methods can be calculated with help of the :class:`GlobalDescriptor` class. |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
804 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
805 """ |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
806 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
807 def __init__(self, seqs, scalename="Eisenberg"): |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
808 """ |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
809 :param seqs: a .fasta file with sequences, a list of sequences or a single sequence as string to calculate the |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
810 descriptor values for. |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
811 :param scalename: {str} name of the amino acid scale (one of the given list above) used to calculate the |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
812 descriptor values |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
813 :return: initialized attributes :py:attr:`sequences`, :py:attr:`names` and dictionary :py:attr:`scale` with |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
814 amino acid scale values of the scale name in :py:attr:`scalename`. |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
815 :Example: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
816 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
817 >>> AMP = PeptideDescriptor('KLLKLLKKLLKLLK','pepcats') |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
818 >>> AMP.sequences |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
819 ['KLLKLLKKLLKLLK'] |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
820 >>> seqs = PeptideDescriptor('/Path/to/file.fasta', 'eisenberg') # load sequences from .fasta file |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
821 >>> seqs.sequences |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
822 ['AFDGHLKI','KKLQRSDLLRTK','KKLASCNNIPPR'...] |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
823 """ |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
824 super(PeptideDescriptor, self).__init__(seqs) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
825 self.scalename, self.scale = load_scale(scalename.lower()) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
826 self.all_moms = list() # for passing hydrophobic moments to calculate_profile |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
827 self.all_globs = list() # for passing global to calculate_profile |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
828 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
829 def load_scale(self, scalename): |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
830 """Method to load amino acid values from a given scale |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
831 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
832 :param scalename: {str} name of the amino acid scale to be loaded. |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
833 :return: loaded amino acid scale values in a dictionary in the attribute :py:attr:`scale`. |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
834 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
835 .. seealso:: :func:`modlamp.core.load_scale()` |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
836 """ |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
837 self.scalename, self.scale = load_scale(scalename.lower()) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
838 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
839 def calculate_autocorr(self, window, append=False): |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
840 """Method for auto-correlating the amino acid values for a given descriptor scale |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
841 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
842 :param window: {int} correlation window for descriptor calculation in a sliding window approach |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
843 :param append: {boolean} whether the produced descriptor values should be appended to the existing ones in the |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
844 attribute :py:attr:`descriptor`. |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
845 :return: calculated descriptor numpy.array in the attribute :py:attr:`descriptor`. |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
846 :Example: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
847 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
848 >>> AMP = PeptideDescriptor('GLFDIVKKVVGALGSL','PPCALI') |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
849 >>> AMP.calculate_autocorr(7) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
850 >>> AMP.descriptor |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
851 array([[ 1.28442339e+00, 1.29025116e+00, 1.03240901e+00, .... ]]) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
852 >>> AMP.descriptor.shape |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
853 (1, 133) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
854 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
855 .. versionchanged:: v.2.3.0 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
856 """ |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
857 desc = Parallel(n_jobs=-1)( |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
858 delayed(_one_autocorr)(seq, window, self.scale) for seq in self.sequences |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
859 ) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
860 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
861 if append: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
862 self.descriptor = np.hstack((self.descriptor, np.array(desc))) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
863 else: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
864 self.descriptor = np.array(desc) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
865 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
866 def calculate_crosscorr(self, window, append=False): |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
867 """Method for cross-correlating the amino acid values for a given descriptor scale |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
868 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
869 :param window: {int} correlation window for descriptor calculation in a sliding window approach |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
870 :param append: {boolean} whether the produced descriptor values should be appended to the existing ones in the |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
871 attribute :py:attr:`descriptor`. |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
872 :return: calculated descriptor numpy.array in the attribute :py:attr:`descriptor`. |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
873 :Example: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
874 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
875 >>> AMP = PeptideDescriptor('GLFDIVKKVVGALGSL','pepcats') |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
876 >>> AMP.calculate_crosscorr(7) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
877 >>> AMP.descriptor |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
878 array([[ 0.6875 , 0.46666667, 0.42857143, 0.61538462, 0.58333333, ... ]]) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
879 >>> AMP.descriptor.shape |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
880 (1, 147) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
881 """ |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
882 desc = Parallel(n_jobs=-1)( |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
883 delayed(_one_crosscorr)(seq, window, self.scale) for seq in self.sequences |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
884 ) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
885 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
886 if append: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
887 self.descriptor = np.hstack((self.descriptor, np.array(desc))) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
888 else: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
889 self.descriptor = np.array(desc) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
890 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
891 def calculate_moment(self, window=1000, angle=100, modality="max", append=False): |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
892 """Method for calculating the maximum or mean moment of the amino acid values for a given descriptor scale and |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
893 window. |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
894 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
895 :param window: {int} amino acid window in which to calculate the moment. If the sequence is shorter than the |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
896 window, the length of the sequence is taken. So if the default window of 1000 is chosen, for all sequences |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
897 shorter than 1000, the **global** hydrophobic moment will be calculated. Otherwise, the maximal |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
898 hydrophiobic moment for the chosen window size found in the sequence will be returned. |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
899 :param angle: {int} angle in which to calculate the moment. **100** for alpha helices, **180** for beta sheets. |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
900 :param modality: {'all', 'max' or 'mean'} Calculate respectively maximum or mean hydrophobic moment. If all, |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
901 moments for all windows are returned. |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
902 :param append: {boolean} whether the produced descriptor values should be appended to the existing ones in the |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
903 attribute :py:attr:`descriptor`. |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
904 :return: Calculated descriptor as a numpy.array in the attribute :py:attr:`descriptor` and all possible global |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
905 values in :py:attr:`all_moms` (needed for the :py:func:`calculate_profile` method) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
906 :Example: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
907 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
908 >>> AMP = PeptideDescriptor('GLFDIVKKVVGALGSL', 'eisenberg') |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
909 >>> AMP.calculate_moment() |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
910 >>> AMP.descriptor |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
911 array([[ 0.48790226]]) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
912 """ |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
913 if self.scale["A"] == list: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
914 print( |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
915 "\n Descriptor moment calculation is only possible for one dimensional descriptors.\n" |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
916 ) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
917 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
918 else: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
919 desc = [] |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
920 for seq in self.sequences: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
921 wdw = min( |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
922 window, len(seq) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
923 ) # if sequence is shorter than window, take the whole sequence instead |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
924 mtrx = [] |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
925 mwdw = [] |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
926 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
927 for aa in range(len(seq)): |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
928 mtrx.append(self.scale[str(seq[aa])]) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
929 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
930 for i in range(len(mtrx) - wdw + 1): |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
931 mwdw.append(sum(mtrx[i : i + wdw], [])) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
932 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
933 mwdw = np.asarray(mwdw) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
934 rads = ( |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
935 angle * (np.pi / 180) * np.asarray(range(wdw)) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
936 ) # calculate actual moment (radial) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
937 vcos = (mwdw * np.cos(rads)).sum(axis=1) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
938 vsin = (mwdw * np.sin(rads)).sum(axis=1) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
939 moms = np.sqrt(vsin**2 + vcos**2) / wdw |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
940 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
941 if modality == "max": # take window with maximal value |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
942 moment = np.max(moms) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
943 elif modality == "mean": # take average value over all windows |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
944 moment = np.mean(moms) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
945 elif modality == "all": |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
946 moment = moms |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
947 else: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
948 print( |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
949 '\nERROR!\nModality parameter is wrong, please choose between "all", "max" and "mean".\n' |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
950 ) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
951 return |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
952 desc.append(moment) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
953 self.all_moms.append(moms) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
954 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
955 desc = np.asarray(desc).reshape(len(desc), 1) # final descriptor array |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
956 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
957 if append: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
958 self.descriptor = np.hstack((self.descriptor, np.array(desc))) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
959 else: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
960 self.descriptor = np.array(desc) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
961 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
962 def calculate_global(self, window=1000, modality="max", append=False): |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
963 """Method for calculating a global / window averaging descriptor value of a given AA scale |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
964 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
965 :param window: {int} amino acid window in which to calculate the moment. If the sequence is shorter than the |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
966 window, the length of the sequence is taken. |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
967 :param modality: {'max' or 'mean'} Calculate respectively maximum or mean hydrophobic moment. |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
968 :param append: {boolean} whether the produced descriptor values should be appended to the existing ones in the |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
969 attribute :py:attr:`descriptor`. |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
970 :return: Calculated descriptor as a numpy.array in the attribute :py:attr:`descriptor` and all possible global |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
971 values in :py:attr:`all_globs` (needed for the :py:func:`calculate_profile` method) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
972 :Example: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
973 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
974 >>> AMP = PeptideDescriptor('GLFDIVKKVVGALGSL','eisenberg') |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
975 >>> AMP.calculate_global(window=1000, modality='max') |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
976 >>> AMP.descriptor |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
977 array([[ 0.44875]]) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
978 """ |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
979 desc = list() |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
980 for n, seq in enumerate(self.sequences): |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
981 wdw = min( |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
982 window, len(seq) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
983 ) # if sequence is shorter than window, take the whole sequence instead |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
984 mtrx = [] |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
985 mwdw = [] |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
986 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
987 for l in range(len(seq)): # translate AA sequence into values |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
988 mtrx.append(self.scale[str(seq[l])]) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
989 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
990 for i in range(len(mtrx) - wdw + 1): |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
991 mwdw.append( |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
992 sum(mtrx[i : i + wdw], []) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
993 ) # list of all the values for the different windows |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
994 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
995 mwdw = np.asarray(mwdw) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
996 glob = np.sum(mwdw, axis=1) / float(wdw) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
997 outglob = float() |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
998 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
999 if modality in ["max", "mean"]: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1000 if modality == "max": |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1001 outglob = np.max( |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1002 glob |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1003 ) # returned moment will be the maximum of all windows |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1004 elif modality == "mean": |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1005 outglob = np.mean( |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1006 glob |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1007 ) # returned moment will be the mean of all windows |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1008 else: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1009 print( |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1010 'Modality parameter is wrong, please choose between "max" and "mean"\n.' |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1011 ) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1012 return |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1013 desc.append(outglob) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1014 self.all_globs.append(glob) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1015 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1016 desc = np.asarray(desc).reshape(len(desc), 1) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1017 if append: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1018 self.descriptor = np.hstack((self.descriptor, np.array(desc))) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1019 else: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1020 self.descriptor = np.array(desc) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1021 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1022 def calculate_profile(self, prof_type="uH", window=7, append=False): |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1023 """Method for calculating hydrophobicity or hydrophobic moment profiles for given sequences and fitting for |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1024 slope and intercept. The hydrophobicity scale used is "eisenberg" |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1025 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1026 :param prof_type: prof_type of profile, available: 'H' for hydrophobicity or 'uH' for hydrophobic moment |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1027 :param window: {int} size of sliding window used (odd-numbered). |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1028 :param append: {boolean} whether the produced descriptor values should be appended to the existing ones in the |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1029 attribute :py:attr:`descriptor`. |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1030 :return: Fitted slope and intercept of calculated profile for every given sequence in the attribute |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1031 :py:attr:`descriptor`. |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1032 :Example: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1033 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1034 >>> AMP = PeptideDescriptor('KLLKLLKKVVGALG','kytedoolittle') |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1035 >>> AMP.calculate_profile(prof_type='H') |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1036 >>> AMP.descriptor |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1037 array([[ 0.03731293, 0.19246599]]) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1038 """ |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1039 if prof_type == "uH": |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1040 self.calculate_moment(window=window) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1041 y_vals = self.all_moms |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1042 elif prof_type == "H": |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1043 self.calculate_global(window=window) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1044 y_vals = self.all_globs |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1045 else: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1046 print( |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1047 'prof_type parameter is unknown, choose "uH" for hydrophobic moment or "H" for hydrophobicity\n.' |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1048 ) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1049 sys.exit() |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1050 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1051 desc = list() |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1052 for n, seq in enumerate(self.sequences): |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1053 x_vals = range(len(seq))[int((window - 1) / 2) : -int((window - 1) / 2)] |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1054 if len(seq) <= window: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1055 slope, intercept, r_value, p_value, std_err = [0, 0, 0, 0, 0] |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1056 else: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1057 slope, intercept, r_value, p_value, std_err = stats.linregress( |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1058 x_vals, y_vals[n] |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1059 ) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1060 desc.append([slope, intercept]) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1061 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1062 if append: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1063 self.descriptor = np.hstack((self.descriptor, np.array(desc))) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1064 else: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1065 self.descriptor = np.array(desc) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1066 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1067 def calculate_arc(self, modality="max", append=False): |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1068 """Method for calculating property arcs as seen in the helical wheel plot. Use for binary amino acid scales only. |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1069 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1070 :param modality: modality of the arc to calculate, to choose between "max" and "mean". |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1071 :param append: if true, append to current descriptor stored in the descriptor attribute. |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1072 :return: calculated descriptor as numpy.array in the descriptor attribute. |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1073 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1074 :Example: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1075 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1076 >>> arc = PeptideDescriptor("KLLKLLKKLLKLLK", scalename="peparc") |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1077 >>> arc.calculate_arc(modality="max", append=False) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1078 >>> arc.descriptor |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1079 array([[200, 160, 160, 0, 0]]) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1080 """ |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1081 desc = Parallel(n_jobs=-1)( |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1082 delayed(_one_arc)(seq, modality, self.scale) for seq in self.sequences |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1083 ) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1084 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1085 # Converts each of the amino acids to descriptor vector |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1086 for seq in self.sequences: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1087 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1088 # desc_mat = [] |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1089 # for aa in seq: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1090 # desc_mat.append(self.scale[aa]) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1091 # desc_mat = np.asarray(desc_mat) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1092 # |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1093 # # Check descriptor dimension |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1094 # desc_dim = desc_mat.shape[1] |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1095 # |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1096 # # list to store descriptor values for all windows |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1097 # allwindows_arc = [] |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1098 # |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1099 # if len(seq) > 18: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1100 # window = 18 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1101 # # calculates number of windows in sequence |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1102 # num_windows = len(seq) - window |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1103 # else: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1104 # window = len(seq) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1105 # num_windows = 1 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1106 # |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1107 # # loop through all windows |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1108 # for j in range(num_windows): |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1109 # # slices descriptor matrix into current window |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1110 # window_mat = desc_mat[j:j + window, :] |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1111 # |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1112 # # defines order of amino acids in helical projection |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1113 # order = [0, 11, 4, 15, 8, 1, 12, 5, 16, 9, 2, 13, 6, 17, 10, 3, 14, 7] |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1114 # |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1115 # # orders window descriptor matrix into helical projection order |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1116 # ordered = [] |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1117 # for pos in order: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1118 # try: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1119 # ordered.append(window_mat[pos, :]) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1120 # except: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1121 # # for sequences of len < 18 adding dummy vector with 2s, length of descriptor dimensions |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1122 # ordered.append([2] * desc_dim) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1123 # ordered = np.asarray(ordered) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1124 # |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1125 # window_arc = [] |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1126 # |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1127 # # loop through pharmacophoric features |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1128 # for m in range(desc_dim): |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1129 # all_arcs = [] # stores all arcs that can be found of a pharmacophoric feature |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1130 # arc = 0 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1131 # |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1132 # for n in range(18): # for all positions in helix, regardless of sequence length |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1133 # if ordered[n, m] == 0: # if position does not contain pharmacophoric feature |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1134 # all_arcs.append(arc) # append previous arc to all arcs list |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1135 # arc = 0 # arc is initialized |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1136 # elif ordered[n, m] == 1: # if position contains pharmacophoric feature(PF), elongate arc by 20° |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1137 # arc += 20 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1138 # elif ordered[n, m] == 2: # if position doesn't contain amino acid: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1139 # if ordered[n - 1, m] == 1: # if previous position contained PF add 10° |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1140 # arc += 10 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1141 # elif ordered[n - 1, m] == 0: # if previous position didn't contain PF don't add anything |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1142 # arc += 0 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1143 # elif ordered[ |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1144 # n - 2, m] == 1: # if previous position is empty then check second previous for PF |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1145 # arc += 10 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1146 # if n == 17: # if we are at the last position check for position n=0 instead of next position. |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1147 # if ordered[0, m] == 1: # if it contains PF add 10° extra |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1148 # arc += 10 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1149 # else: # if next position contains PF add 10° extra |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1150 # if ordered[n + 1, m] == 1: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1151 # arc += 10 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1152 # elif ordered[n + 1, m] == 0: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1153 # arc += 0 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1154 # else: # if next position is empty check for 2nd next position |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1155 # if n == 16: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1156 # if ordered[0, m] == 1: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1157 # arc += 10 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1158 # else: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1159 # if ordered[n + 2, m] == 1: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1160 # arc += 10 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1161 # |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1162 # all_arcs.append(arc) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1163 # if not arc == 360: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1164 # arc0 = all_arcs.pop() + all_arcs[0] # join first and last arc together |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1165 # all_arcs = [arc0] + all_arcs[1:] |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1166 # |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1167 # window_arc.append(np.max(all_arcs)) # append to window arcs the maximum arc of this PF |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1168 # allwindows_arc.append(window_arc) # append all PF arcs of this window |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1169 # |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1170 # allwindows_arc = np.asarray(allwindows_arc) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1171 # |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1172 # if modality == 'max': |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1173 # final_arc = np.max(allwindows_arc, axis=0) # calculate maximum / mean arc along all windows |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1174 # elif modality == 'mean': |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1175 # final_arc = np.mean(allwindows_arc, axis=0) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1176 # else: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1177 # print('modality is unknown, please choose between "max" and "mean"\n.') |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1178 # sys.exit() |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1179 |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1180 if append: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1181 self.descriptor = np.hstack((self.descriptor, np.array(desc))) |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1182 else: |
9b276485c94a
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1183 self.descriptor = np.array(desc) |