comparison data.py @ 0:8918de535391 draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
author rnateam
date Tue, 31 May 2016 05:41:03 -0400
parents
children a609d6dc8047
comparison
equal deleted inserted replaced
-1:000000000000 0:8918de535391
1 """Dataset handler."""
2
3 import numpy as np
4
5 import pandas as pd
6
7 from theano import config
8
9 __author__ = "Gianluca Corrado"
10 __copyright__ = "Copyright 2016, Gianluca Corrado"
11 __license__ = "MIT"
12 __maintainer__ = "Gianluca Corrado"
13 __email__ = "gianluca.corrado@unitn.it"
14 __status__ = "Production"
15
16
17 class Dataset(object):
18 """General dataset."""
19
20 def __init__(self, fp, fr, standardize_proteins=False,
21 standardize_rnas=False):
22 """
23 Constructor.
24
25 Parameters
26 ----------
27 fp : str
28 Protein features
29
30 fr : str
31 The name of the HDF5 file containing features for the RNAs.
32 """
33 self.Fp = fp.astype(config.floatX)
34
35 store = pd.io.pytables.HDFStore(fr)
36 self.Fr = store.features.astype(config.floatX)
37 store.close()
38
39 def load(self):
40 """Load dataset in memory."""
41 raise NotImplementedError()
42
43
44 class PredictDataset(Dataset):
45 """Test dataset."""
46
47 def __init__(self, fp, fr):
48 """
49 Constructor.
50
51 Parameters
52 ----------
53 fp : str
54 The name of the HDF5 file containing features for the proteins.
55
56 fr : str
57 The name of the HDF5 file containing features for the RNAs.
58 """
59 super(PredictDataset, self).__init__(fp, fr)
60
61 def load(self):
62 """
63 Load dataset in memory.
64
65 Return
66 ------
67 Examples to predict. For each example:
68 - p contains the protein features,
69 - r contains the RNA features,
70 - p_names contains the name of the protein,
71 - r_names contains the name of the RNA.
72
73 """
74 protein_input_dim = self.Fp.shape[0]
75 rna_input_dim = self.Fr.shape[0]
76 num_examples = self.Fp.shape[1] * self.Fr.shape[1]
77 p = np.zeros((num_examples, protein_input_dim)).astype(config.floatX)
78 p_names = []
79 r = np.zeros((num_examples, rna_input_dim)).astype(config.floatX)
80 r_names = []
81 index = 0
82 for protein in self.Fp.columns:
83 for rna in self.Fr.columns:
84 p[index] = self.Fp[protein]
85 p_names.append(protein)
86 r[index] = self.Fr[rna]
87 r_names.append(rna)
88 index += 1
89
90 return (p, np.array(p_names), r, np.array(r_names))