Mercurial > repos > rnateam > rnacommender
changeset 4:a609d6dc8047 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
author | rnateam |
---|---|
date | Thu, 28 Jul 2016 05:55:25 -0400 |
parents | ecf125a1ad73 |
children | b3462a72ff76 |
files | data.py main.py model.py rbpfeatures.py recommend.py rnacommender.xml |
diffstat | 6 files changed, 27 insertions(+), 23 deletions(-) [+] |
line wrap: on
line diff
--- a/data.py Tue Jul 19 10:22:02 2016 -0400 +++ b/data.py Thu Jul 28 05:55:25 2016 -0400 @@ -4,8 +4,6 @@ import pandas as pd -from theano import config - __author__ = "Gianluca Corrado" __copyright__ = "Copyright 2016, Gianluca Corrado" __license__ = "MIT" @@ -30,10 +28,10 @@ fr : str The name of the HDF5 file containing features for the RNAs. """ - self.Fp = fp.astype(config.floatX) + self.Fp = fp.astype('float32') store = pd.io.pytables.HDFStore(fr) - self.Fr = store.features.astype(config.floatX) + self.Fr = store.features.astype('float32') store.close() def load(self): @@ -74,9 +72,9 @@ protein_input_dim = self.Fp.shape[0] rna_input_dim = self.Fr.shape[0] num_examples = self.Fp.shape[1] * self.Fr.shape[1] - p = np.zeros((num_examples, protein_input_dim)).astype(config.floatX) + p = np.zeros((num_examples, protein_input_dim)).astype('float32') p_names = [] - r = np.zeros((num_examples, rna_input_dim)).astype(config.floatX) + r = np.zeros((num_examples, rna_input_dim)).astype('float32') r_names = [] index = 0 for protein in self.Fp.columns:
--- a/main.py Tue Jul 19 10:22:02 2016 -0400 +++ b/main.py Thu Jul 28 05:55:25 2016 -0400 @@ -7,8 +7,6 @@ from data import PredictDataset from recommend import Predictor -from theano import config - __author__ = "Gianluca Corrado" __copyright__ = "Copyright 2016, Gianluca Corrado" __license__ = "MIT" @@ -16,7 +14,6 @@ __email__ = "gianluca.corrado@unitn.it" __status__ = "Production" -config.floatX = 'float32' if __name__ == '__main__': parser = argparse.ArgumentParser( @@ -46,4 +43,6 @@ output="output.txt") P.predict() else: - sys.exit("""The queried protein has no domain similarity with the proteins in the training dataset. It cannot be predicted.""") + sys.stdout.write(""" + The queried protein has no domain similarity with the proteins in the training dataset. It cannot be predicted. + """)
--- a/model.py Tue Jul 19 10:22:02 2016 -0400 +++ b/model.py Thu Jul 28 05:55:25 2016 -0400 @@ -5,7 +5,7 @@ import numpy as np -from theano import config, function, shared +from theano import function, shared import theano.tensor as T __author__ = "Gianluca Corrado" @@ -61,25 +61,25 @@ self.lambda_reg = lambda_reg np.random.seed(seed) # explictit features for proteins - fp = T.matrix("Fp", dtype=config.floatX) + fp = T.matrix("Fp", dtype='float32') # explictit features for RNAs - fr = T.matrix("Fr", dtype=config.floatX) + fr = T.matrix("Fr", dtype='float32') # Correct label y = T.vector("y") # projection matrix for proteins self.Ap = shared(((.5 - np.random.rand(kp, sp)) * - irange).astype(config.floatX), name="Ap") + irange).astype('float32'), name="Ap") self.bp = shared(((.5 - np.random.rand(kp)) * - irange).astype(config.floatX), name="bp") + irange).astype('float32'), name="bp") # projection matrix for RNAs self.Ar = shared(((.5 - np.random.rand(kr, sr)) * - irange).astype(config.floatX), name="Ar") + irange).astype('float32'), name="Ar") self.br = shared(((.5 - np.random.rand(kr)) * - irange).astype(config.floatX), name="br") + irange).astype('float32'), name="br") # generalization matrix self.B = shared(((.5 - np.random.rand(kp, kr)) * - irange).astype(config.floatX), name="B") + irange).astype('float32'), name="B") # Latent space for proteins p = T.nnet.sigmoid(T.dot(fp, self.Ap.T) + self.bp)
--- a/rbpfeatures.py Tue Jul 19 10:22:02 2016 -0400 +++ b/rbpfeatures.py Thu Jul 28 05:55:25 2016 -0400 @@ -1,6 +1,7 @@ """Compute the RBP features.""" import re +import sys import subprocess as sp import uuid from os import mkdir @@ -57,6 +58,9 @@ fasta = fasta_utils.import_fasta(self.fasta) + if len(fasta) != 1: + sys.exit("""Fasta file must contain exactly one sequence.""") + for rbp in sorted(fasta.keys()): seq = fasta[rbp] text = pfam_utils.sequence_search(rbp, seq)
--- a/recommend.py Tue Jul 19 10:22:02 2016 -0400 +++ b/recommend.py Thu Jul 28 05:55:25 2016 -0400 @@ -53,6 +53,8 @@ """Predict interaction values.""" # predict the y_hat (p, p_names, r, r_names) = self.predict_dataset + assert p.dtype == 'float32' + assert r.dtype == 'float32' y_hat = self.model.predict(p, r) # sort the interactions according to y_hat ordering = sorted(range(len(y_hat)),
--- a/rnacommender.xml Tue Jul 19 10:22:02 2016 -0400 +++ b/rnacommender.xml Thu Jul 28 05:55:25 2016 -0400 @@ -1,20 +1,21 @@ <tool id="rbc_rnacommender" name="RNAcommender" version="0.1.1"> - <description>files into a collection</description> + <description>genome-wide recommendation of RNA-protein interactions</description> <requirements> <requirement type="package" version="3.5">sam</requirement> <requirement type="package" version="1.11.1">numpy</requirement> + <requirement type="package" version="0.17.1">scipy</requirement> <requirement type="package" version="0.18.1">pandas</requirement> <requirement type="package" version="3.2.2">pytables</requirement> - <requirement type="package" version="0.7.2">theano</requirement> + <requirement type="package" version="0.8.2">theano</requirement> <requirement type="package" version="2.10.0">requests</requirement> </requirements> <command detect_errors="aggressive"> <![CDATA[ - sh $__tool_directory__/init.sh && - python $__tool_directory__/main.py "$infile" + sh $__tool_directory__/init.sh 2> hide.txt && + THEANO_FLAGS=base_compiledir=./tmp python $__tool_directory__/main.py "$infile" ]]></command> <inputs> - <param name="infile" type="data" format="fasta" label="Fasta file to split"/> + <param name="infile" type="data" format="fasta" label="Fasta file containing (exactly) one RBP sequence"/> </inputs> <outputs> <data format="tabular" from_work_dir="output.txt" name="outfile" />