rnacommender: data.py comparison

comparison data.py @ 0:8918de535391 draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1

author	rnateam
date	Tue, 31 May 2016 05:41:03 -0400
parents
children	a609d6dc8047

comparison

equal deleted inserted replaced

--1:000000000000
+:8918de535391
+"""Dataset handler."""
+import numpy as np
+import pandas as pd
+from theano import config
+__author__ = "Gianluca Corrado"
+__copyright__ = "Copyright 2016, Gianluca Corrado"
+__license__ = "MIT"
+__maintainer__ = "Gianluca Corrado"
+__email__ = "gianluca.corrado@unitn.it"
+__status__ = "Production"
+class Dataset(object):
+"""General dataset."""
+def __init__(self, fp, fr, standardize_proteins=False,
+standardize_rnas=False):
+"""
+Constructor.
+Parameters
+----------
+fp : str
+Protein features
+fr : str
+The name of the HDF5 file containing features for the RNAs.
+"""
+self.Fp = fp.astype(config.floatX)
+store = pd.io.pytables.HDFStore(fr)
+self.Fr = store.features.astype(config.floatX)
+store.close()
+def load(self):
+"""Load dataset in memory."""
+raise NotImplementedError()
+class PredictDataset(Dataset):
+"""Test dataset."""
+def __init__(self, fp, fr):
+"""
+Constructor.
+Parameters
+----------
+fp : str
+The name of the HDF5 file containing features for the proteins.
+fr : str
+The name of the HDF5 file containing features for the RNAs.
+"""
+super(PredictDataset, self).__init__(fp, fr)
+def load(self):
+"""
+Load dataset in memory.
+Return
+------
+Examples to predict. For each example:
+- p contains the protein features,
+- r contains the RNA features,
+- p_names contains the name of the protein,
+- r_names contains the name of the RNA.
+"""
+protein_input_dim = self.Fp.shape[0]
+rna_input_dim = self.Fr.shape[0]
+num_examples = self.Fp.shape[1] * self.Fr.shape[1]
+p = np.zeros((num_examples, protein_input_dim)).astype(config.floatX)
+p_names = []
+r = np.zeros((num_examples, rna_input_dim)).astype(config.floatX)
+r_names = []
+index = 0
+for protein in self.Fp.columns:
+for rna in self.Fr.columns:
+p[index] = self.Fp[protein]
+p_names.append(protein)
+r[index] = self.Fr[rna]
+r_names.append(rna)
+index += 1
+return (p, np.array(p_names), r, np.array(r_names))

Mercurial > repos > rnateam > rnacommender

comparison data.py @ 0:8918de535391 draft