Mercurial > repos > rnateam > rnacommender

--- a/data.py	Tue Jul 19 10:22:02 2016 -0400
+++ b/data.py	Thu Jul 28 05:55:25 2016 -0400
@@ -4,8 +4,6 @@

 import pandas as pd

-from theano import config
-
 __author__ = "Gianluca Corrado"
 __copyright__ = "Copyright 2016, Gianluca Corrado"
 __license__ = "MIT"
@@ -30,10 +28,10 @@
         fr : str
             The name of the HDF5 file containing features for the RNAs.
         """
-        self.Fp = fp.astype(config.floatX)
+        self.Fp = fp.astype('float32')

         store = pd.io.pytables.HDFStore(fr)
-        self.Fr = store.features.astype(config.floatX)
+        self.Fr = store.features.astype('float32')
         store.close()

     def load(self):
@@ -74,9 +72,9 @@
         protein_input_dim = self.Fp.shape[0]
         rna_input_dim = self.Fr.shape[0]
         num_examples = self.Fp.shape[1] * self.Fr.shape[1]
-        p = np.zeros((num_examples, protein_input_dim)).astype(config.floatX)
+        p = np.zeros((num_examples, protein_input_dim)).astype('float32')
         p_names = []
-        r = np.zeros((num_examples, rna_input_dim)).astype(config.floatX)
+        r = np.zeros((num_examples, rna_input_dim)).astype('float32')
         r_names = []
         index = 0
         for protein in self.Fp.columns:
--- a/main.py	Tue Jul 19 10:22:02 2016 -0400
+++ b/main.py	Thu Jul 28 05:55:25 2016 -0400
@@ -7,8 +7,6 @@
 from data import PredictDataset
 from recommend import Predictor

-from theano import config
-
 __author__ = "Gianluca Corrado"
 __copyright__ = "Copyright 2016, Gianluca Corrado"
 __license__ = "MIT"
@@ -16,7 +14,6 @@
 __email__ = "gianluca.corrado@unitn.it"
 __status__ = "Production"

-config.floatX = 'float32'

 if __name__ == '__main__':
     parser = argparse.ArgumentParser(
@@ -46,4 +43,6 @@
                       output="output.txt")
         P.predict()
     else:
-        sys.exit("""The queried protein has no domain similarity with the proteins in the training dataset. It cannot be predicted.""")
+        sys.stdout.write("""
+        The queried protein has no domain similarity with the proteins in the training dataset. It cannot be predicted.
+        """)
--- a/model.py	Tue Jul 19 10:22:02 2016 -0400
+++ b/model.py	Thu Jul 28 05:55:25 2016 -0400
@@ -5,7 +5,7 @@

 import numpy as np

-from theano import config, function, shared
+from theano import function, shared
 import theano.tensor as T

 __author__ = "Gianluca Corrado"
@@ -61,25 +61,25 @@
         self.lambda_reg = lambda_reg
         np.random.seed(seed)
         # explictit features for proteins
-        fp = T.matrix("Fp", dtype=config.floatX)
+        fp = T.matrix("Fp", dtype='float32')
         # explictit features for RNAs
-        fr = T.matrix("Fr", dtype=config.floatX)
+        fr = T.matrix("Fr", dtype='float32')
         # Correct label
         y = T.vector("y")

         # projection matrix for proteins
         self.Ap = shared(((.5 - np.random.rand(kp, sp)) *
-                          irange).astype(config.floatX), name="Ap")
+                          irange).astype('float32'), name="Ap")
         self.bp = shared(((.5 - np.random.rand(kp)) *
-                          irange).astype(config.floatX), name="bp")
+                          irange).astype('float32'), name="bp")
         # projection matrix for RNAs
         self.Ar = shared(((.5 - np.random.rand(kr, sr)) *
-                          irange).astype(config.floatX), name="Ar")
+                          irange).astype('float32'), name="Ar")
         self.br = shared(((.5 - np.random.rand(kr)) *
-                          irange).astype(config.floatX), name="br")
+                          irange).astype('float32'), name="br")
         # generalization matrix
         self.B = shared(((.5 - np.random.rand(kp, kr)) *
-                         irange).astype(config.floatX), name="B")
+                         irange).astype('float32'), name="B")

         # Latent space for proteins
         p = T.nnet.sigmoid(T.dot(fp, self.Ap.T) + self.bp)
--- a/rbpfeatures.py	Tue Jul 19 10:22:02 2016 -0400
+++ b/rbpfeatures.py	Thu Jul 28 05:55:25 2016 -0400
@@ -1,6 +1,7 @@
 """Compute the RBP features."""

 import re
+import sys
 import subprocess as sp
 import uuid
 from os import mkdir
@@ -57,6 +58,9 @@

         fasta = fasta_utils.import_fasta(self.fasta)

+        if len(fasta) != 1:
+            sys.exit("""Fasta file must contain exactly one sequence.""")
+
         for rbp in sorted(fasta.keys()):
             seq = fasta[rbp]
             text = pfam_utils.sequence_search(rbp, seq)
--- a/recommend.py	Tue Jul 19 10:22:02 2016 -0400
+++ b/recommend.py	Thu Jul 28 05:55:25 2016 -0400
@@ -53,6 +53,8 @@
         """Predict interaction values."""
         # predict the y_hat
         (p, p_names, r, r_names) = self.predict_dataset
+        assert p.dtype == 'float32'
+        assert r.dtype == 'float32'
         y_hat = self.model.predict(p, r)
         # sort the interactions according to y_hat
         ordering = sorted(range(len(y_hat)),
--- a/rnacommender.xml	Tue Jul 19 10:22:02 2016 -0400
+++ b/rnacommender.xml	Thu Jul 28 05:55:25 2016 -0400
@@ -1,20 +1,21 @@
 <tool id="rbc_rnacommender" name="RNAcommender" version="0.1.1">
-    <description>files into a collection</description>
+    <description>genome-wide recommendation of RNA-protein interactions</description>
     <requirements>
         <requirement type="package" version="3.5">sam</requirement>
         <requirement type="package" version="1.11.1">numpy</requirement>
+        <requirement type="package" version="0.17.1">scipy</requirement>
         <requirement type="package" version="0.18.1">pandas</requirement>
         <requirement type="package" version="3.2.2">pytables</requirement>
-        <requirement type="package" version="0.7.2">theano</requirement>
+        <requirement type="package" version="0.8.2">theano</requirement>
         <requirement type="package" version="2.10.0">requests</requirement>
     </requirements>
     <command detect_errors="aggressive">
     <![CDATA[
-        sh $__tool_directory__/init.sh &&
-        python $__tool_directory__/main.py "$infile"
+        sh $__tool_directory__/init.sh 2> hide.txt &&
+        THEANO_FLAGS=base_compiledir=./tmp python $__tool_directory__/main.py "$infile"
     ]]></command>
     <inputs>
-        <param name="infile" type="data" format="fasta" label="Fasta file to split"/>
+        <param name="infile" type="data" format="fasta" label="Fasta file containing (exactly) one RBP sequence"/>
     </inputs>
     <outputs>
         <data format="tabular" from_work_dir="output.txt" name="outfile" />