diff main.py @ 0:8918de535391 draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
author rnateam
date Tue, 31 May 2016 05:41:03 -0400
parents
children a609d6dc8047
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/main.py	Tue May 31 05:41:03 2016 -0400
@@ -0,0 +1,49 @@
+#!/usr/bin/env python
+"""Recommendation."""
+
+import argparse
+import sys
+from rbpfeatures import RBPVectorizer
+from data import PredictDataset
+from recommend import Predictor
+
+from theano import config
+
+__author__ = "Gianluca Corrado"
+__copyright__ = "Copyright 2016, Gianluca Corrado"
+__license__ = "MIT"
+__maintainer__ = "Gianluca Corrado"
+__email__ = "gianluca.corrado@unitn.it"
+__status__ = "Production"
+
+config.floatX = 'float32'
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(
+        description=__doc__,
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+    parser.add_argument('fasta', metavar='fasta', type=str,
+                        help="""Fasta file containing the RBP \
+                        sequences.""")
+
+    args = parser.parse_args()
+
+    v = RBPVectorizer(fasta=args.fasta)
+    rbp_fea = v.vectorize()
+
+    if rbp_fea is not None:
+        # Define and load dataset
+        D = PredictDataset(
+            fp=rbp_fea, fr="AURA_Human_data/RNA_features/HT_utrs.h5")
+        dataset = D.load()
+
+        model = "AURA_Human_data/model/trained_model.pkl"
+
+        # Define the Trainer and train the model
+        P = Predictor(predict_dataset=dataset,
+                      trained_model=model,
+                      serendipity_dic=model + '_',
+                      output="output.txt")
+        P.predict()
+    else:
+        sys.exit("""The queried protein has no domain similarity with the proteins in the training dataset. It cannot be predicted.""")