# HG changeset patch # User bgruening # Date 1531161205 14400 # Node ID f46da2feb23385b546da83eea7aa2fb7768836ee # Parent cb14b6827f7031e5f2c2e7be6237c97250c6559d planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 5d71c93a3dd804b1469852240a86021ab9130364 diff -r cb14b6827f70 -r f46da2feb233 discriminant.xml --- a/discriminant.xml Sun Jul 01 03:20:59 2018 -0400 +++ b/discriminant.xml Mon Jul 09 14:33:25 2018 -0400 @@ -22,6 +22,9 @@ import pandas import pickle +@COLUMNS_FUNCTION@ +@GET_X_y_FUNCTION@ + input_json_path = sys.argv[1] params = json.load(open(input_json_path, "r")) @@ -30,7 +33,8 @@ classifier_object = pickle.load(open("$infile_model", 'r')) -data = pandas.read_csv("$selected_tasks.infile_data", sep='\t', header=0, index_col=None, parse_dates=True, encoding=None, tupleize_cols=False ) +header = 'infer' if params["selected_tasks"]["header"] else None +data = pandas.read_csv("$selected_tasks.infile_data", sep='\t', header=header, index_col=None, parse_dates=True, encoding=None, tupleize_cols=False) prediction = classifier_object.predict(data) prediction_df = pandas.DataFrame(prediction) res = pandas.concat([data, prediction_df], axis=1) @@ -38,17 +42,14 @@ #else: -data_train = pandas.read_csv("$selected_tasks.infile_train", sep='\t', header=0, index_col=None, parse_dates=True, encoding=None, tupleize_cols=False ) - -data = data_train.ix[:,0:len(data_train.columns)-1] -labels = np.array(data_train[data_train.columns[len(data_train.columns)-1]]) +X, y = get_X_y(params, "$selected_tasks.selected_algorithms.input_options.infile1" ,"$selected_tasks.selected_algorithms.input_options.infile2") options = params["selected_tasks"]["selected_algorithms"]["options"] selected_algorithm = params["selected_tasks"]["selected_algorithms"]["selected_algorithm"] my_class = getattr(sklearn.discriminant_analysis, selected_algorithm) classifier_object = my_class(**options) -classifier_object.fit(data,labels) +classifier_object.fit(X, y) pickle.dump(classifier_object,open("$outfile_fit", 'w+'), pickle.HIGHEST_PROTOCOL) #end if @@ -56,12 +57,13 @@ - + +
@@ -78,6 +80,7 @@
+
@@ -91,7 +94,12 @@ - + + + + + + @@ -99,33 +107,46 @@ - + + + + + + - + + + + + + - + + + + diff -r cb14b6827f70 -r f46da2feb233 main_macros.xml --- a/main_macros.xml Sun Jul 01 03:20:59 2018 -0400 +++ b/main_macros.xml Mon Jul 09 14:33:25 2018 -0400 @@ -64,6 +64,45 @@ return new_selector + +def get_X_y(params, file1, file2): + input_type = params["selected_tasks"]["selected_algorithms"]["input_options"]["selected_input"] + if input_type=="tabular": + header = 'infer' if params["selected_tasks"]["selected_algorithms"]["input_options"]["header1"] else None + column_option = params["selected_tasks"]["selected_algorithms"]["input_options"]["column_selector_options_1"]["selected_column_selector_option"] + if column_option in ["by_index_number", "all_but_by_index_number", "by_header_name", "all_but_by_header_name"]: + c = params["selected_tasks"]["selected_algorithms"]["input_options"]["column_selector_options_1"]["col1"] + else: + c = None + X = read_columns( + file1, + c = c, + c_option = column_option, + sep='\t', + header=header, + parse_dates=True + ) + else: + X = mmread(open(file1, 'r')) + + header = 'infer' if params["selected_tasks"]["selected_algorithms"]["input_options"]["header2"] else None + column_option = params["selected_tasks"]["selected_algorithms"]["input_options"]["column_selector_options_2"]["selected_column_selector_option2"] + if column_option in ["by_index_number", "all_but_by_index_number", "by_header_name", "all_but_by_header_name"]: + c = params["selected_tasks"]["selected_algorithms"]["input_options"]["column_selector_options_2"]["col2"] + else: + c = None + y = read_columns( + file2, + c = c, + c_option = column_option, + sep='\t', + header=header, + parse_dates=True + ) + y=y.ravel() + return X, y + + python @@ -81,34 +120,6 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - -