Mercurial > repos > rnateam > rnacommender
comparison model.py @ 0:8918de535391 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
author | rnateam |
---|---|
date | Tue, 31 May 2016 05:41:03 -0400 |
parents | |
children | a609d6dc8047 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:8918de535391 |
---|---|
1 """Recommender model.""" | |
2 from __future__ import print_function | |
3 | |
4 import sys | |
5 | |
6 import numpy as np | |
7 | |
8 from theano import config, function, shared | |
9 import theano.tensor as T | |
10 | |
11 __author__ = "Gianluca Corrado" | |
12 __copyright__ = "Copyright 2016, Gianluca Corrado" | |
13 __license__ = "MIT" | |
14 __maintainer__ = "Gianluca Corrado" | |
15 __email__ = "gianluca.corrado@unitn.it" | |
16 __status__ = "Production" | |
17 | |
18 | |
19 class Model(): | |
20 """Factorization model.""" | |
21 | |
22 def __init__(self, sp, sr, kp, kr, irange=0.01, learning_rate=0.01, | |
23 lambda_reg=0.01, verbose=True, seed=1234): | |
24 """ | |
25 Constructor. | |
26 | |
27 Parameters | |
28 ---------- | |
29 sp : int | |
30 Number of protein features. | |
31 | |
32 sr : int | |
33 Number of RNA features. | |
34 | |
35 kp : int | |
36 Size of the protein latent space. | |
37 | |
38 kr : int | |
39 Size of the RNA latent space. | |
40 | |
41 irange : float (default : 0.01) | |
42 Initialization range for the model weights. | |
43 | |
44 learning_rate : float (default : 0.01) | |
45 Learning rate for the weights update. | |
46 | |
47 lambda_reg : (default : 0.01) | |
48 Lambda parameter for the regularization. | |
49 | |
50 verbose : bool (default : True) | |
51 Print information at STDOUT. | |
52 | |
53 seed : int (default : 1234) | |
54 Seed for random number generator. | |
55 """ | |
56 if verbose: | |
57 print("Compiling model...", end=' ') | |
58 sys.stdout.flush() | |
59 | |
60 self.learning_rate = learning_rate | |
61 self.lambda_reg = lambda_reg | |
62 np.random.seed(seed) | |
63 # explictit features for proteins | |
64 fp = T.matrix("Fp", dtype=config.floatX) | |
65 # explictit features for RNAs | |
66 fr = T.matrix("Fr", dtype=config.floatX) | |
67 # Correct label | |
68 y = T.vector("y") | |
69 | |
70 # projection matrix for proteins | |
71 self.Ap = shared(((.5 - np.random.rand(kp, sp)) * | |
72 irange).astype(config.floatX), name="Ap") | |
73 self.bp = shared(((.5 - np.random.rand(kp)) * | |
74 irange).astype(config.floatX), name="bp") | |
75 # projection matrix for RNAs | |
76 self.Ar = shared(((.5 - np.random.rand(kr, sr)) * | |
77 irange).astype(config.floatX), name="Ar") | |
78 self.br = shared(((.5 - np.random.rand(kr)) * | |
79 irange).astype(config.floatX), name="br") | |
80 # generalization matrix | |
81 self.B = shared(((.5 - np.random.rand(kp, kr)) * | |
82 irange).astype(config.floatX), name="B") | |
83 | |
84 # Latent space for proteins | |
85 p = T.nnet.sigmoid(T.dot(fp, self.Ap.T) + self.bp) | |
86 # Latent space for RNAs | |
87 r = T.nnet.sigmoid(T.dot(fr, self.Ar.T) + self.br) | |
88 # Predicted output | |
89 y_hat = T.nnet.sigmoid(T.sum(T.dot(p, self.B) * r, axis=1)) | |
90 | |
91 def _regularization(): | |
92 """Normalized Frobenius norm.""" | |
93 norm_proteins = self.Ap.norm(2) + self.bp.norm(2) | |
94 norm_rnas = self.Ar.norm(2) + self.br.norm(2) | |
95 norm_b = self.B.norm(2) | |
96 | |
97 num_proteins = self.Ap.flatten().shape[0] + self.bp.shape[0] | |
98 num_rnas = self.Ar.flatten().shape[0] + self.br.shape[0] | |
99 num_b = self.B.flatten().shape[0] | |
100 | |
101 return (norm_proteins / num_proteins + norm_rnas / num_rnas + | |
102 norm_b / num_b) / 3 | |
103 | |
104 # mean squared error | |
105 cost_ = (T.sqr(y - y_hat)).mean() | |
106 reg = lambda_reg * _regularization() | |
107 cost = cost_ + reg | |
108 | |
109 # compute sgd updates | |
110 g_Ap, g_bp, g_Ar, g_br, g_B = T.grad( | |
111 cost, [self.Ap, self.bp, self.Ar, self.br, self.B]) | |
112 updates = ((self.Ap, self.Ap - learning_rate * g_Ap), | |
113 (self.bp, self.bp - learning_rate * g_bp), | |
114 (self.Ar, self.Ar - learning_rate * g_Ar), | |
115 (self.br, self.br - learning_rate * g_br), | |
116 (self.B, self.B - learning_rate * g_B)) | |
117 | |
118 # training step | |
119 self.train = function( | |
120 inputs=[fp, fr, y], | |
121 outputs=[y_hat, cost], | |
122 updates=updates) | |
123 # test | |
124 self.test = function( | |
125 inputs=[fp, fr, y], | |
126 outputs=[y_hat, cost]) | |
127 | |
128 # predict | |
129 self.predict = function( | |
130 inputs=[fp, fr], | |
131 outputs=y_hat) | |
132 | |
133 if verbose: | |
134 print("Done.") | |
135 sys.stdout.flush() | |
136 | |
137 def get_params(self): | |
138 """Return the parameters of the model.""" | |
139 return {'Ap': self.Ap.get_value(), 'bp': self.bp.get_value(), | |
140 'Ar': self.Ar.get_value(), 'br': self.br.get_value(), | |
141 'B': self.B.get_value()} |