Mercurial > repos > rnateam > rnacommender
comparison data.py @ 0:8918de535391 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
author | rnateam |
---|---|
date | Tue, 31 May 2016 05:41:03 -0400 |
parents | |
children | a609d6dc8047 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:8918de535391 |
---|---|
1 """Dataset handler.""" | |
2 | |
3 import numpy as np | |
4 | |
5 import pandas as pd | |
6 | |
7 from theano import config | |
8 | |
9 __author__ = "Gianluca Corrado" | |
10 __copyright__ = "Copyright 2016, Gianluca Corrado" | |
11 __license__ = "MIT" | |
12 __maintainer__ = "Gianluca Corrado" | |
13 __email__ = "gianluca.corrado@unitn.it" | |
14 __status__ = "Production" | |
15 | |
16 | |
17 class Dataset(object): | |
18 """General dataset.""" | |
19 | |
20 def __init__(self, fp, fr, standardize_proteins=False, | |
21 standardize_rnas=False): | |
22 """ | |
23 Constructor. | |
24 | |
25 Parameters | |
26 ---------- | |
27 fp : str | |
28 Protein features | |
29 | |
30 fr : str | |
31 The name of the HDF5 file containing features for the RNAs. | |
32 """ | |
33 self.Fp = fp.astype(config.floatX) | |
34 | |
35 store = pd.io.pytables.HDFStore(fr) | |
36 self.Fr = store.features.astype(config.floatX) | |
37 store.close() | |
38 | |
39 def load(self): | |
40 """Load dataset in memory.""" | |
41 raise NotImplementedError() | |
42 | |
43 | |
44 class PredictDataset(Dataset): | |
45 """Test dataset.""" | |
46 | |
47 def __init__(self, fp, fr): | |
48 """ | |
49 Constructor. | |
50 | |
51 Parameters | |
52 ---------- | |
53 fp : str | |
54 The name of the HDF5 file containing features for the proteins. | |
55 | |
56 fr : str | |
57 The name of the HDF5 file containing features for the RNAs. | |
58 """ | |
59 super(PredictDataset, self).__init__(fp, fr) | |
60 | |
61 def load(self): | |
62 """ | |
63 Load dataset in memory. | |
64 | |
65 Return | |
66 ------ | |
67 Examples to predict. For each example: | |
68 - p contains the protein features, | |
69 - r contains the RNA features, | |
70 - p_names contains the name of the protein, | |
71 - r_names contains the name of the RNA. | |
72 | |
73 """ | |
74 protein_input_dim = self.Fp.shape[0] | |
75 rna_input_dim = self.Fr.shape[0] | |
76 num_examples = self.Fp.shape[1] * self.Fr.shape[1] | |
77 p = np.zeros((num_examples, protein_input_dim)).astype(config.floatX) | |
78 p_names = [] | |
79 r = np.zeros((num_examples, rna_input_dim)).astype(config.floatX) | |
80 r_names = [] | |
81 index = 0 | |
82 for protein in self.Fp.columns: | |
83 for rna in self.Fr.columns: | |
84 p[index] = self.Fp[protein] | |
85 p_names.append(protein) | |
86 r[index] = self.Fr[rna] | |
87 r_names.append(rna) | |
88 index += 1 | |
89 | |
90 return (p, np.array(p_names), r, np.array(r_names)) |