# HG changeset patch # User bgruening # Date 1531161068 14400 # Node ID 78c664cc1841323afbb99e9d129135146564e2b2 # Parent ada7bb28fe134d2c796293b52b65a60ba3e75a70 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 5d71c93a3dd804b1469852240a86021ab9130364 diff -r ada7bb28fe13 -r 78c664cc1841 main_macros.xml --- a/main_macros.xml Sun Jul 01 03:18:53 2018 -0400 +++ b/main_macros.xml Mon Jul 09 14:31:08 2018 -0400 @@ -64,6 +64,45 @@ return new_selector + +def get_X_y(params, file1, file2): + input_type = params["selected_tasks"]["selected_algorithms"]["input_options"]["selected_input"] + if input_type=="tabular": + header = 'infer' if params["selected_tasks"]["selected_algorithms"]["input_options"]["header1"] else None + column_option = params["selected_tasks"]["selected_algorithms"]["input_options"]["column_selector_options_1"]["selected_column_selector_option"] + if column_option in ["by_index_number", "all_but_by_index_number", "by_header_name", "all_but_by_header_name"]: + c = params["selected_tasks"]["selected_algorithms"]["input_options"]["column_selector_options_1"]["col1"] + else: + c = None + X = read_columns( + file1, + c = c, + c_option = column_option, + sep='\t', + header=header, + parse_dates=True + ) + else: + X = mmread(open(file1, 'r')) + + header = 'infer' if params["selected_tasks"]["selected_algorithms"]["input_options"]["header2"] else None + column_option = params["selected_tasks"]["selected_algorithms"]["input_options"]["column_selector_options_2"]["selected_column_selector_option2"] + if column_option in ["by_index_number", "all_but_by_index_number", "by_header_name", "all_but_by_header_name"]: + c = params["selected_tasks"]["selected_algorithms"]["input_options"]["column_selector_options_2"]["col2"] + else: + c = None + y = read_columns( + file2, + c = c, + c_option = column_option, + sep='\t', + header=header, + parse_dates=True + ) + y=y.ravel() + return X, y + + python @@ -81,34 +120,6 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff -r ada7bb28fe13 -r 78c664cc1841 svm.xml --- a/svm.xml Sun Jul 01 03:18:53 2018 -0400 +++ b/svm.xml Mon Jul 09 14:31:08 2018 -0400 @@ -22,6 +22,9 @@ import pandas import pickle +@COLUMNS_FUNCTION@ +@GET_X_y_FUNCTION@ + input_json_path = sys.argv[1] params = json.load(open(input_json_path, "r")) @@ -29,7 +32,8 @@ classifier_object = pickle.load(open("$infile_model", 'rb')) -data = pandas.read_csv("$selected_tasks.infile_data", sep='\t', header=0, index_col=None, parse_dates=True, encoding=None, tupleize_cols=False ) +header = 'infer' if params["selected_tasks"]["header"] else None +data = pandas.read_csv("$selected_tasks.infile_data", sep='\t', header=header, index_col=None, parse_dates=True, encoding=None, tupleize_cols=False) prediction = classifier_object.predict(data) prediction_df = pandas.DataFrame(prediction) res = pandas.concat([data, prediction_df], axis=1) @@ -37,10 +41,7 @@ #else: -data_train = pandas.read_csv("$selected_tasks.infile_train", sep='\t', header=0, index_col=None, parse_dates=True, encoding=None, tupleize_cols=False ) - -data = data_train.ix[:,0:len(data_train.columns)-1] -labels = np.array(data_train[data_train.columns[len(data_train.columns)-1]]) +X, y = get_X_y(params, "$selected_tasks.selected_algorithms.input_options.infile1" ,"$selected_tasks.selected_algorithms.input_options.infile2") options = params["selected_tasks"]["selected_algorithms"]["options"] selected_algorithm = params["selected_tasks"]["selected_algorithms"]["selected_algorithm"] @@ -51,7 +52,7 @@ my_class = getattr(sklearn.svm, selected_algorithm) classifier_object = my_class(**options) -classifier_object.fit(data,labels) +classifier_object.fit(X, y) pickle.dump(classifier_object,open("$outfile_fit", 'w+')) @@ -61,23 +62,26 @@ - + + + +
@@ -108,21 +112,36 @@ - + + + + + + - + + + + + + - + + + + + + @@ -131,18 +150,21 @@ + + +