# HG changeset patch # User bgruening # Date 1520931368 14400 # Node ID 6e6726be0728f90ba0c02656f617d03e0326479e # Parent 883f2973d37d428e3b39546aa23d42364da4d99d planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit 641ac64ded23fbb6fe85d5f13926da12dcce4e76 diff -r 883f2973d37d -r 6e6726be0728 ensemble.xml --- a/ensemble.xml Fri Feb 16 14:56:05 2018 -0500 +++ b/ensemble.xml Tue Mar 13 04:56:08 2018 -0400 @@ -25,23 +25,31 @@ input_json_path = sys.argv[1] params = json.load(open(input_json_path, "r")) +@COLUMNS_FUNCTION@ + #if $selected_tasks.selected_task == "train": algorithm = params["selected_tasks"]["selected_algorithms"]["selected_algorithm"] options = params["selected_tasks"]["selected_algorithms"]["options"] input_type = params["selected_tasks"]["selected_algorithms"]["input_options"]["selected_input"] if input_type=="tabular": - col1 = params["selected_tasks"]["selected_algorithms"]["input_options"]["col1"] - col1 = list(map(lambda x: x - 1, col1)) - f1 = pandas.read_csv("$selected_tasks.selected_algorithms.input_options.infile1", sep='\t', header=None, index_col=None, parse_dates=True, encoding=None, tupleize_cols=False ) - X = f1.iloc[:,col1].values + X = read_columns( + "$selected_tasks.selected_algorithms.input_options.infile1", + "$selected_tasks.selected_algorithms.input_options.col1", + sep='\t', + header=None, + parse_dates=True + ) else: X = mmread(open("$selected_tasks.selected_algorithms.input_options.infile1", 'r')) -col2 = params["selected_tasks"]["selected_algorithms"]["input_options"]["col2"] -col2 = list(map(lambda x: x - 1, col2)) -f2 = pandas.read_csv("$selected_tasks.selected_algorithms.input_options.infile2", sep='\t', header=None, index_col=None, parse_dates=True, encoding=None, tupleize_cols=False ) -y = f2.iloc[:,col2].values +y = read_columns( + "$selected_tasks.selected_algorithms.input_options.infile2", + "$selected_tasks.selected_algorithms.input_options.col2", + sep='\t', + header=None, + parse_dates=True +) my_class = getattr(sklearn.ensemble, algorithm) estimator = my_class(**options) @@ -50,7 +58,7 @@ #else: classifier_object = pickle.load(open("$selected_tasks.infile_model", 'r')) -data = pandas.read_csv("$selected_tasks.infile_data", sep='\t', header=0, index_col=None, parse_dates=True, encoding=None, tupleize_cols=False ) +data = pandas.read_csv("$selected_tasks.infile_data", sep='\t', header=0, index_col=None, parse_dates=True, encoding=None, tupleize_cols=False) prediction = classifier_object.predict(data) prediction_df = pandas.DataFrame(prediction) res = pandas.concat([data, prediction_df], axis=1) diff -r 883f2973d37d -r 6e6726be0728 main_macros.xml --- a/main_macros.xml Fri Feb 16 14:56:05 2018 -0500 +++ b/main_macros.xml Tue Mar 13 04:56:08 2018 -0400 @@ -2,8 +2,8 @@ 0.9 -def columns(f,c): - data = pandas.read_csv(f, sep='\t', header=None, index_col=None, parse_dates=True, encoding=None, tupleize_cols=False) +def read_columns(f, c, **args): + data = pandas.read_csv(f, **args) cols = c.split (',') cols = map(int, cols) cols = list(map(lambda x: x - 1, cols))