comparison feature_selection.xml @ 17:2bbbac61e48d draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
author bgruening
date Sun, 30 Dec 2018 01:57:11 -0500
parents 026667802750
children ec25331946b8
comparison
equal deleted inserted replaced
16:328a8d547ca2 17:2bbbac61e48d
13 </command> 13 </command>
14 <configfiles> 14 <configfiles>
15 <inputs name="inputs" /> 15 <inputs name="inputs" />
16 <configfile name="feature_selection_script"> 16 <configfile name="feature_selection_script">
17 <![CDATA[ 17 <![CDATA[
18 import sys
19 import os
20 import json 18 import json
21 import pandas
22 import sklearn.feature_selection 19 import sklearn.feature_selection
23 20
24 with open("$__tool_directory__/sk_whitelist.json", "r") as f: 21 with open('$__tool_directory__/sk_whitelist.json', 'r') as f:
25 sk_whitelist = json.load(f) 22 sk_whitelist = json.load(f)
26 exec(open("$__tool_directory__/utils.py").read(), globals()) 23 exec(open('$__tool_directory__/utils.py').read(), globals())
24
25 warnings.simplefilter('ignore')
27 26
28 safe_eval = SafeEval() 27 safe_eval = SafeEval()
29 28
30 input_json_path = sys.argv[1] 29 input_json_path = sys.argv[1]
31 with open(input_json_path, "r") as param_handler: 30 with open(input_json_path, 'r') as param_handler:
32 params = json.load(param_handler) 31 params = json.load(param_handler)
33 32
34 #handle cheetah 33 #handle cheetah
35 #if $fs_algorithm_selector.selected_algorithm == "SelectFromModel"\ 34 #if $fs_algorithm_selector.selected_algorithm == 'SelectFromModel'\
36 and $fs_algorithm_selector.model_inputter.input_mode == "prefitted": 35 and $fs_algorithm_selector.model_inputter.input_mode == 'prefitted':
37 params['fs_algorithm_selector']['model_inputter']['fitted_estimator'] =\ 36 params['fs_algorithm_selector']['model_inputter']['fitted_estimator'] =\
38 "$fs_algorithm_selector.model_inputter.fitted_estimator" 37 '$fs_algorithm_selector.model_inputter.fitted_estimator'
38 #end if
39
40 #if $fs_algorithm_selector.selected_algorithm == 'SelectFromModel'\
41 and $fs_algorithm_selector.model_inputter.input_mode == 'new'\
42 and $fs_algorithm_selector.model_inputter.estimator_selector.selected_module == 'customer_estimator':
43 params['fs_algorithm_selector']['model_inputter']['estimator_selector']['c_estimator'] =\
44 '$fs_algorithm_selector.model_inputter.estimator_selector.c_estimator'
45 #end if
46
47 #if $fs_algorithm_selector.selected_algorithm in ['RFE', 'RFECV']\
48 and $fs_algorithm_selector.estimator_selector.selected_module == 'customer_estimator':
49 params['fs_algorithm_selector']['estimator_selector']['c_estimator'] =\
50 '$fs_algorithm_selector.estimator_selector.c_estimator'
39 #end if 51 #end if
40 52
41 # Read features 53 # Read features
42 features_has_header = params["input_options"]["header1"] 54 features_has_header = params['input_options']['header1']
43 input_type = params["input_options"]["selected_input"] 55 input_type = params['input_options']['selected_input']
44 if input_type=="tabular": 56 if input_type == 'tabular':
45 header = 'infer' if features_has_header else None 57 header = 'infer' if features_has_header else None
46 column_option = params["input_options"]["column_selector_options_1"]["selected_column_selector_option"] 58 column_option = params['input_options']['column_selector_options_1']['selected_column_selector_option']
47 if column_option in ["by_index_number", "all_but_by_index_number", "by_header_name", "all_but_by_header_name"]: 59 if column_option in ['by_index_number', 'all_but_by_index_number', 'by_header_name', 'all_but_by_header_name']:
48 c = params["input_options"]["column_selector_options_1"]["col1"] 60 c = params['input_options']['column_selector_options_1']['col1']
49 else: 61 else:
50 c = None 62 c = None
51 X, input_df = read_columns( 63 X, input_df = read_columns(
52 "$input_options.infile1", 64 '$input_options.infile1',
53 c = c, 65 c = c,
54 c_option = column_option, 66 c_option = column_option,
55 return_df = True, 67 return_df = True,
56 sep='\t', 68 sep='\t',
57 header=header, 69 header=header,
58 parse_dates=True 70 parse_dates=True
59 ) 71 )
60 else: 72 else:
61 X = mmread("$input_options.infile1") 73 X = mmread('$input_options.infile1')
62 74
63 # Read labels 75 # Read labels
64 header = 'infer' if params["input_options"]["header2"] else None 76 header = 'infer' if params['input_options']['header2'] else None
65 column_option = params["input_options"]["column_selector_options_2"]["selected_column_selector_option2"] 77 column_option = params['input_options']['column_selector_options_2']['selected_column_selector_option2']
66 if column_option in ["by_index_number", "all_but_by_index_number", "by_header_name", "all_but_by_header_name"]: 78 if column_option in ['by_index_number', 'all_but_by_index_number', 'by_header_name', 'all_but_by_header_name']:
67 c = params["input_options"]["column_selector_options_2"]["col2"] 79 c = params['input_options']['column_selector_options_2']['col2']
68 else: 80 else:
69 c = None 81 c = None
70 y = read_columns( 82 y = read_columns(
71 "$input_options.infile2", 83 '$input_options.infile2',
72 c = c, 84 c = c,
73 c_option = column_option, 85 c_option = column_option,
74 sep='\t', 86 sep='\t',
75 header=header, 87 header=header,
76 parse_dates=True 88 parse_dates=True
83 or params['fs_algorithm_selector']['model_inputter']['input_mode'] != 'prefitted' : 95 or params['fs_algorithm_selector']['model_inputter']['input_mode'] != 'prefitted' :
84 new_selector.fit(X, y) 96 new_selector.fit(X, y)
85 97
86 ## Transform to select features 98 ## Transform to select features
87 selected_names = None 99 selected_names = None
88 if "$output_method_selector.selected_method" == "fit_transform": 100
89 res = new_selector.transform(X) 101 res = new_selector.transform(X)
90 if features_has_header: 102 if features_has_header:
91 selected_names = input_df.columns[new_selector.get_support(indices=True)] 103 selected_names = input_df.columns[new_selector.get_support(indices=True)]
92 else:
93 res = new_selector.get_support(params["output_method_selector"]["indices"])
94
95 res = pandas.DataFrame(res, columns = selected_names) 104 res = pandas.DataFrame(res, columns = selected_names)
96 res.to_csv(path_or_buf="$outfile", sep='\t', index=False) 105 res.to_csv(path_or_buf='$outfile', sep='\t', index=False)
97 106
107 #if $save:
108 with open('$outfile_selector', 'wb') as output_handler:
109 pickle.dump(new_selector, output_handler, pickle.HIGHEST_PROTOCOL)
110 #end if
98 111
99 ]]> 112 ]]>
100 </configfile> 113 </configfile>
101 </configfiles> 114 </configfiles>
102 <inputs> 115 <inputs>
103 <expand macro="feature_selection_all"> 116 <expand macro="feature_selection_fs"/>
104 <expand macro="fs_selectfrommodel_prefitted"/> 117 <param name="save" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false" label="Save the fitted selector?"/>
105 </expand>
106 <expand macro="feature_selection_output_mothods" />
107 <expand macro="sl_mixed_input"/> 118 <expand macro="sl_mixed_input"/>
108 </inputs> 119 </inputs>
109 <outputs> 120 <outputs>
110 <data format="tabular" name="outfile"/> 121 <data format="tabular" name="outfile" />
122 <data format="zip" name="outfile_selector" label="${fs_algorithm_selector.selected_algorithm}">
123 <filter>save</filter>
124 </data>
111 </outputs> 125 </outputs>
112 <tests> 126 <tests>
113 <test> 127 <test>
114 <param name="selected_algorithm" value="SelectFromModel"/> 128 <param name="selected_algorithm" value="SelectFromModel"/>
115 <param name="input_mode" value="new"/> 129 <param name="input_mode" value="new"/>