Mercurial > repos > bgruening > sklearn_ensemble
comparison ensemble.xml @ 13:6352834b1c99 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 5d71c93a3dd804b1469852240a86021ab9130364
author | bgruening |
---|---|
date | Mon, 09 Jul 2018 14:32:15 -0400 |
parents | a78f96f64939 |
children | 84724d805bfa |
comparison
equal
deleted
inserted
replaced
12:a78f96f64939 | 13:6352834b1c99 |
---|---|
21 import pandas | 21 import pandas |
22 import pickle | 22 import pickle |
23 from scipy.io import mmread | 23 from scipy.io import mmread |
24 | 24 |
25 @COLUMNS_FUNCTION@ | 25 @COLUMNS_FUNCTION@ |
26 @GET_X_y_FUNCTION@ | |
26 | 27 |
27 input_json_path = sys.argv[1] | 28 input_json_path = sys.argv[1] |
28 params = json.load(open(input_json_path, "r")) | 29 params = json.load(open(input_json_path, "r")) |
29 | 30 |
30 #if $selected_tasks.selected_task == "train": | 31 #if $selected_tasks.selected_task == "train": |
44 options["presort"] = False | 45 options["presort"] = False |
45 if "min_samples_leaf" in options and options["min_samples_leaf"] == 1.0: | 46 if "min_samples_leaf" in options and options["min_samples_leaf"] == 1.0: |
46 options["min_samples_leaf"] = 1 | 47 options["min_samples_leaf"] = 1 |
47 if "min_samples_split" in options and options["min_samples_split"] > 1.0: | 48 if "min_samples_split" in options and options["min_samples_split"] > 1.0: |
48 options["min_samples_split"] = int(options["min_samples_split"]) | 49 options["min_samples_split"] = int(options["min_samples_split"]) |
49 input_type = params["selected_tasks"]["selected_algorithms"]["input_options"]["selected_input"] | 50 |
50 if input_type=="tabular": | 51 X, y = get_X_y(params, "$selected_tasks.selected_algorithms.input_options.infile1" ,"$selected_tasks.selected_algorithms.input_options.infile2") |
51 header = 'infer' if params["selected_tasks"]["selected_algorithms"]["input_options"]["header1"] else None | |
52 column_option = params["selected_tasks"]["selected_algorithms"]["input_options"]["column_selector_options_1"]["selected_column_selector_option"] | |
53 if column_option in ["by_index_number", "all_but_by_index_number", "by_header_name", "all_but_by_header_name"]: | |
54 c = params["selected_tasks"]["selected_algorithms"]["input_options"]["column_selector_options_1"]["col1"] | |
55 else: | |
56 c = None | |
57 X = read_columns( | |
58 "$selected_tasks.selected_algorithms.input_options.infile1", | |
59 c = c, | |
60 c_option = column_option, | |
61 sep='\t', | |
62 header=header, | |
63 parse_dates=True | |
64 ) | |
65 else: | |
66 X = mmread(open("$selected_tasks.selected_algorithms.input_options.infile1", 'r')) | |
67 | |
68 header = 'infer' if params["selected_tasks"]["selected_algorithms"]["input_options"]["header2"] else None | |
69 column_option = params["selected_tasks"]["selected_algorithms"]["input_options"]["column_selector_options_2"]["selected_column_selector_option2"] | |
70 if column_option in ["by_index_number", "all_but_by_index_number", "by_header_name", "all_but_by_header_name"]: | |
71 c = params["selected_tasks"]["selected_algorithms"]["input_options"]["column_selector_options_2"]["col2"] | |
72 else: | |
73 c = None | |
74 y = read_columns( | |
75 "$selected_tasks.selected_algorithms.input_options.infile2", | |
76 c = c, | |
77 c_option = column_option, | |
78 sep='\t', | |
79 header=header, | |
80 parse_dates=True | |
81 ) | |
82 y=y.ravel() | |
83 | 52 |
84 my_class = getattr(sklearn.ensemble, algorithm) | 53 my_class = getattr(sklearn.ensemble, algorithm) |
85 estimator = my_class(**options) | 54 estimator = my_class(**options) |
86 estimator.fit(X,y) | 55 estimator.fit(X,y) |
87 pickle.dump(estimator,open("$outfile_fit", 'w+'), pickle.HIGHEST_PROTOCOL) | 56 pickle.dump(estimator,open("$outfile_fit", 'w+'), pickle.HIGHEST_PROTOCOL) |