comparison pre_process.xml @ 26:685046e0381a draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
author bgruening
date Fri, 09 Aug 2019 07:16:21 -0400
parents 9e43ee712723
children eb79bde99328
comparison
equal deleted inserted replaced
25:9ac0b78c6b6d 26:685046e0381a
17 <![CDATA[ 17 <![CDATA[
18 import sys 18 import sys
19 import json 19 import json
20 import pandas 20 import pandas
21 import pickle 21 import pickle
22
22 from scipy.io import mmread 23 from scipy.io import mmread
23 from scipy.io import mmwrite 24 from scipy.io import mmwrite
24 from sklearn import preprocessing 25 from sklearn import preprocessing
25 26 from galaxy_ml.utils import read_columns, SafeEval
26 sys.path.insert(0, '$__tool_directory__') 27
27 from utils import read_columns 28
29 safe_eval = SafeEval()
28 30
29 input_json_path = sys.argv[1] 31 input_json_path = sys.argv[1]
30 with open(input_json_path, "r") as param_handler: 32 with open(input_json_path, "r") as param_handler:
31 params = json.load(param_handler) 33 params = json.load(param_handler)
32 34
37 column_option = params["input_type"]["column_selector_options_1"]["selected_column_selector_option"] 39 column_option = params["input_type"]["column_selector_options_1"]["selected_column_selector_option"]
38 if column_option in ["by_index_number", "all_but_by_index_number", "by_header_name", "all_but_by_header_name"]: 40 if column_option in ["by_index_number", "all_but_by_index_number", "by_header_name", "all_but_by_header_name"]:
39 c = params["input_type"]["column_selector_options_1"]["col1"] 41 c = params["input_type"]["column_selector_options_1"]["col1"]
40 else: 42 else:
41 c = None 43 c = None
42 X = read_columns( 44 X, input_df = read_columns(
43 "$input_type.infile", 45 "$input_type.infile",
44 c = c, 46 c=c,
45 c_option = column_option, 47 c_option=column_option,
48 return_df=True,
46 sep='\t', 49 sep='\t',
47 header=header, 50 header=header,
48 parse_dates=True, 51 parse_dates=True,
49 encoding=None, 52 encoding=None,
50 index_col=None, 53 index_col=None,
51 tupleize_cols=False).astype(float) 54 tupleize_cols=False)
55 X = X.astype(float)
52 #end if 56 #end if
53 57
54 preprocessor = params["input_type"]["pre_processors"]["selected_pre_processor"] 58 preprocessor = params["input_type"]["pre_processors"]["selected_pre_processor"]
55 options = params["input_type"]["pre_processors"]["options"] 59 options = params["input_type"]["pre_processors"]["options"]
60 if 'feature_range' in options:
61 feature_range = safe_eval(options['feature_range'].strip())
62 if not feature_range:
63 feature_range = (0, 1)
64 options['feature_range'] = feature_range
56 65
57 my_class = getattr(preprocessing, preprocessor) 66 my_class = getattr(preprocessing, preprocessor)
58 estimator = my_class(**options) 67 estimator = my_class(**options)
59 estimator.fit(X) 68 estimator.fit(X)
60 result = estimator.transform(X) 69 result = estimator.transform(X)
61 70
62 #if $input_type.selected_input_type == "sparse": 71 #if $input_type.selected_input_type == "sparse":
63 with open("$outfile_transform", "wb") as transform_handler: 72 with open("$outfile_transform", "wb") as transform_handler:
64 mmwrite(transform_handler, result) 73 mmwrite(transform_handler, result)
65 #else: 74 #else:
66 res = pandas.DataFrame(result) 75 columns = input_df.columns
67 res.to_csv(path_or_buf = "$outfile_transform", sep="\t", index=False, header=None) 76 if preprocessor == 'PolynomialFeatures':
77 columns = None
78 header = False
79 res = pandas.DataFrame(result, columns=columns)
80 res.to_csv(path_or_buf = "$outfile_transform", sep="\t",
81 index=False, header=True if header else False)
68 #end if 82 #end if
69 83
70 #if $save: 84 #if $save:
71 with open("$outfile_fit", 'wb') as out_handler: 85 with open("$outfile_fit", 'wb') as out_handler:
72 pickle.dump(estimator, out_handler, pickle.HIGHEST_PROTOCOL) 86 pickle.dump(estimator, out_handler, pickle.HIGHEST_PROTOCOL)
153 <param name="save" value="true"/> 167 <param name="save" value="true"/>
154 <output name="outfile_transform" file="prp_result05" ftype="tabular"/> 168 <output name="outfile_transform" file="prp_result05" ftype="tabular"/>
155 <output name="outfile_fit" file="prp_model05" ftype="zip" compare="sim_size" delta="5"/> 169 <output name="outfile_fit" file="prp_model05" ftype="zip" compare="sim_size" delta="5"/>
156 </test> 170 </test>
157 <test> 171 <test>
158 <param name="infile" value="csr_sparse2.mtx" ftype="txt"/>
159 <param name="selected_input_type" value="sparse"/>
160 <param name="selected_pre_processor" value="Imputer"/>
161 <param name="save" value="true"/>
162 <param name="axis" value="true"/>
163 <output name="outfile_transform" file="prp_result06" ftype="tabular"/>
164 <output name="outfile_fit" file="prp_model06" ftype="zip" compare="sim_size" delta="50"/>
165 </test>
166 <test>
167 <param name="infile" value="train.tabular" ftype="tabular"/> 172 <param name="infile" value="train.tabular" ftype="tabular"/>
168 <param name="selected_input_type" value="tabular"/> 173 <param name="selected_input_type" value="tabular"/>
169 <param name="selected_column_selector_option" value="all_columns"/> 174 <param name="selected_column_selector_option" value="all_columns"/>
170 <param name="selected_pre_processor" value="StandardScaler"/> 175 <param name="selected_pre_processor" value="StandardScaler"/>
171 <param name="save" value="true"/> 176 <param name="save" value="true"/>
185 <param name="selected_input_type" value="sparse"/> 190 <param name="selected_input_type" value="sparse"/>
186 <param name="selected_pre_processor" value="Normalizer"/> 191 <param name="selected_pre_processor" value="Normalizer"/>
187 <param name="save" value="true"/> 192 <param name="save" value="true"/>
188 <output name="outfile_transform" file="prp_result09" ftype="tabular"/> 193 <output name="outfile_transform" file="prp_result09" ftype="tabular"/>
189 <output name="outfile_fit" file="prp_model09" ftype="zip" compare="sim_size" delta="5"/> 194 <output name="outfile_fit" file="prp_model09" ftype="zip" compare="sim_size" delta="5"/>
195 </test>
196 <test>
197 <param name="infile" value="regression_X.tabular" ftype="tabular"/>
198 <param name="header1" value="true"/>
199 <param name="selected_column_selector_option" value="all_columns"/>
200 <param name="selected_input_type" value="tabular"/>
201 <param name="selected_pre_processor" value="MinMaxScaler"/>
202 <param name="feature_range" value="(-1, 1)"/>
203 <param name="save" value="false"/>
204 <output name="outfile_transform" file="prp_result10" ftype="tabular"/>
190 </test> 205 </test>
191 </tests> 206 </tests>
192 <help> 207 <help>
193 <![CDATA[ 208 <![CDATA[
194 **What it does** 209 **What it does**