changeset 2:6e6726be0728 draft

planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit 641ac64ded23fbb6fe85d5f13926da12dcce4e76
author bgruening
date Tue, 13 Mar 2018 04:56:08 -0400
parents 883f2973d37d
children 968863bd9008
files ensemble.xml main_macros.xml
diffstat 2 files changed, 19 insertions(+), 11 deletions(-) [+]
line wrap: on
line diff
--- a/ensemble.xml	Fri Feb 16 14:56:05 2018 -0500
+++ b/ensemble.xml	Tue Mar 13 04:56:08 2018 -0400
@@ -25,23 +25,31 @@
 input_json_path = sys.argv[1]
 params = json.load(open(input_json_path, "r"))
 
+@COLUMNS_FUNCTION@
+
 #if $selected_tasks.selected_task == "train":
 
 algorithm = params["selected_tasks"]["selected_algorithms"]["selected_algorithm"]
 options = params["selected_tasks"]["selected_algorithms"]["options"]
 input_type = params["selected_tasks"]["selected_algorithms"]["input_options"]["selected_input"]
 if input_type=="tabular":
-    col1 = params["selected_tasks"]["selected_algorithms"]["input_options"]["col1"]
-    col1 = list(map(lambda x: x - 1, col1))
-    f1 = pandas.read_csv("$selected_tasks.selected_algorithms.input_options.infile1", sep='\t', header=None, index_col=None, parse_dates=True, encoding=None, tupleize_cols=False )
-    X = f1.iloc[:,col1].values
+    X = read_columns(
+            "$selected_tasks.selected_algorithms.input_options.infile1",
+            "$selected_tasks.selected_algorithms.input_options.col1",
+            sep='\t',
+            header=None,
+            parse_dates=True
+    )
 else:
     X = mmread(open("$selected_tasks.selected_algorithms.input_options.infile1", 'r'))
 
-col2 = params["selected_tasks"]["selected_algorithms"]["input_options"]["col2"]
-col2 = list(map(lambda x: x - 1, col2))
-f2 = pandas.read_csv("$selected_tasks.selected_algorithms.input_options.infile2", sep='\t', header=None, index_col=None, parse_dates=True, encoding=None, tupleize_cols=False )
-y = f2.iloc[:,col2].values
+y = read_columns(
+        "$selected_tasks.selected_algorithms.input_options.infile2",
+        "$selected_tasks.selected_algorithms.input_options.col2",
+        sep='\t',
+        header=None,
+        parse_dates=True
+)
 
 my_class = getattr(sklearn.ensemble, algorithm)
 estimator = my_class(**options)
@@ -50,7 +58,7 @@
 
 #else:
 classifier_object = pickle.load(open("$selected_tasks.infile_model", 'r'))
-data = pandas.read_csv("$selected_tasks.infile_data", sep='\t', header=0, index_col=None, parse_dates=True, encoding=None, tupleize_cols=False )
+data = pandas.read_csv("$selected_tasks.infile_data", sep='\t', header=0, index_col=None, parse_dates=True, encoding=None, tupleize_cols=False)
 prediction = classifier_object.predict(data)
 prediction_df = pandas.DataFrame(prediction)
 res = pandas.concat([data, prediction_df], axis=1)
--- a/main_macros.xml	Fri Feb 16 14:56:05 2018 -0500
+++ b/main_macros.xml	Tue Mar 13 04:56:08 2018 -0400
@@ -2,8 +2,8 @@
   <token name="@VERSION@">0.9</token>
 
   <token name="@COLUMNS_FUNCTION@">
-def columns(f,c):
-  data = pandas.read_csv(f, sep='\t', header=None, index_col=None, parse_dates=True, encoding=None, tupleize_cols=False)
+def read_columns(f, c, **args):
+  data = pandas.read_csv(f, **args)
   cols = c.split (',')
   cols = map(int, cols)
   cols = list(map(lambda x: x - 1, cols))