# HG changeset patch
# User bgruening
# Date 1531161068 14400
# Node ID 78c664cc1841323afbb99e9d129135146564e2b2
# Parent ada7bb28fe134d2c796293b52b65a60ba3e75a70
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 5d71c93a3dd804b1469852240a86021ab9130364
diff -r ada7bb28fe13 -r 78c664cc1841 main_macros.xml
--- a/main_macros.xml Sun Jul 01 03:18:53 2018 -0400
+++ b/main_macros.xml Mon Jul 09 14:31:08 2018 -0400
@@ -64,6 +64,45 @@
return new_selector
+
+def get_X_y(params, file1, file2):
+ input_type = params["selected_tasks"]["selected_algorithms"]["input_options"]["selected_input"]
+ if input_type=="tabular":
+ header = 'infer' if params["selected_tasks"]["selected_algorithms"]["input_options"]["header1"] else None
+ column_option = params["selected_tasks"]["selected_algorithms"]["input_options"]["column_selector_options_1"]["selected_column_selector_option"]
+ if column_option in ["by_index_number", "all_but_by_index_number", "by_header_name", "all_but_by_header_name"]:
+ c = params["selected_tasks"]["selected_algorithms"]["input_options"]["column_selector_options_1"]["col1"]
+ else:
+ c = None
+ X = read_columns(
+ file1,
+ c = c,
+ c_option = column_option,
+ sep='\t',
+ header=header,
+ parse_dates=True
+ )
+ else:
+ X = mmread(open(file1, 'r'))
+
+ header = 'infer' if params["selected_tasks"]["selected_algorithms"]["input_options"]["header2"] else None
+ column_option = params["selected_tasks"]["selected_algorithms"]["input_options"]["column_selector_options_2"]["selected_column_selector_option2"]
+ if column_option in ["by_index_number", "all_but_by_index_number", "by_header_name", "all_but_by_header_name"]:
+ c = params["selected_tasks"]["selected_algorithms"]["input_options"]["column_selector_options_2"]["col2"]
+ else:
+ c = None
+ y = read_columns(
+ file2,
+ c = c,
+ c_option = column_option,
+ sep='\t',
+ header=header,
+ parse_dates=True
+ )
+ y=y.ravel()
+ return X, y
+
+
python
@@ -81,34 +120,6 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff -r ada7bb28fe13 -r 78c664cc1841 svm.xml
--- a/svm.xml Sun Jul 01 03:18:53 2018 -0400
+++ b/svm.xml Mon Jul 09 14:31:08 2018 -0400
@@ -22,6 +22,9 @@
import pandas
import pickle
+@COLUMNS_FUNCTION@
+@GET_X_y_FUNCTION@
+
input_json_path = sys.argv[1]
params = json.load(open(input_json_path, "r"))
@@ -29,7 +32,8 @@
classifier_object = pickle.load(open("$infile_model", 'rb'))
-data = pandas.read_csv("$selected_tasks.infile_data", sep='\t', header=0, index_col=None, parse_dates=True, encoding=None, tupleize_cols=False )
+header = 'infer' if params["selected_tasks"]["header"] else None
+data = pandas.read_csv("$selected_tasks.infile_data", sep='\t', header=header, index_col=None, parse_dates=True, encoding=None, tupleize_cols=False)
prediction = classifier_object.predict(data)
prediction_df = pandas.DataFrame(prediction)
res = pandas.concat([data, prediction_df], axis=1)
@@ -37,10 +41,7 @@
#else:
-data_train = pandas.read_csv("$selected_tasks.infile_train", sep='\t', header=0, index_col=None, parse_dates=True, encoding=None, tupleize_cols=False )
-
-data = data_train.ix[:,0:len(data_train.columns)-1]
-labels = np.array(data_train[data_train.columns[len(data_train.columns)-1]])
+X, y = get_X_y(params, "$selected_tasks.selected_algorithms.input_options.infile1" ,"$selected_tasks.selected_algorithms.input_options.infile2")
options = params["selected_tasks"]["selected_algorithms"]["options"]
selected_algorithm = params["selected_tasks"]["selected_algorithms"]["selected_algorithm"]
@@ -51,7 +52,7 @@
my_class = getattr(sklearn.svm, selected_algorithm)
classifier_object = my_class(**options)
-classifier_object.fit(data,labels)
+classifier_object.fit(X, y)
pickle.dump(classifier_object,open("$outfile_fit", 'w+'))
@@ -61,23 +62,26 @@
-
+
+
+
+
@@ -108,21 +112,36 @@
-
+
+
+
+
+
+
-
+
+
+
+
+
+
-
+
+
+
+
+
+
@@ -131,18 +150,21 @@
+
+
+