diff pre_process.xml @ 26:685046e0381a draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
author bgruening
date Fri, 09 Aug 2019 07:16:21 -0400
parents 9e43ee712723
children eb79bde99328
line wrap: on
line diff
--- a/pre_process.xml	Tue Jul 09 19:35:04 2019 -0400
+++ b/pre_process.xml	Fri Aug 09 07:16:21 2019 -0400
@@ -19,12 +19,14 @@
 import json
 import pandas
 import pickle
+
 from scipy.io import mmread
 from scipy.io import mmwrite
 from sklearn import preprocessing
+from galaxy_ml.utils import read_columns, SafeEval
 
-sys.path.insert(0, '$__tool_directory__')
-from utils import read_columns
+
+safe_eval = SafeEval()
 
 input_json_path = sys.argv[1]
 with open(input_json_path, "r") as param_handler:
@@ -39,20 +41,27 @@
     c = params["input_type"]["column_selector_options_1"]["col1"]
 else:
     c = None
-X = read_columns(
+X, input_df = read_columns(
         "$input_type.infile",
-        c = c,
-        c_option = column_option,
+        c=c,
+        c_option=column_option,
+        return_df=True,
         sep='\t',
         header=header,
         parse_dates=True,
         encoding=None,
         index_col=None,
-        tupleize_cols=False).astype(float)
+        tupleize_cols=False)
+X = X.astype(float)
 #end if
 
 preprocessor = params["input_type"]["pre_processors"]["selected_pre_processor"]
 options = params["input_type"]["pre_processors"]["options"]
+if 'feature_range' in options:
+    feature_range = safe_eval(options['feature_range'].strip())
+    if not feature_range:
+        feature_range = (0, 1)
+    options['feature_range'] = feature_range
 
 my_class = getattr(preprocessing, preprocessor)
 estimator = my_class(**options)
@@ -63,8 +72,13 @@
 with open("$outfile_transform", "wb") as transform_handler:
     mmwrite(transform_handler, result)
 #else:
-res = pandas.DataFrame(result)
-res.to_csv(path_or_buf = "$outfile_transform", sep="\t", index=False, header=None)
+columns = input_df.columns
+if preprocessor == 'PolynomialFeatures':
+    columns = None
+    header = False
+res = pandas.DataFrame(result, columns=columns)
+res.to_csv(path_or_buf = "$outfile_transform", sep="\t",
+           index=False, header=True if header else False)
 #end if
 
 #if $save:
@@ -155,15 +169,6 @@
             <output name="outfile_fit" file="prp_model05" ftype="zip" compare="sim_size" delta="5"/>
         </test>
         <test>
-            <param name="infile" value="csr_sparse2.mtx" ftype="txt"/>
-            <param name="selected_input_type" value="sparse"/>
-            <param name="selected_pre_processor" value="Imputer"/>
-            <param name="save" value="true"/>
-            <param name="axis" value="true"/>
-            <output name="outfile_transform" file="prp_result06" ftype="tabular"/>
-            <output name="outfile_fit" file="prp_model06" ftype="zip" compare="sim_size" delta="50"/>
-        </test>
-        <test>
             <param name="infile" value="train.tabular" ftype="tabular"/>
             <param name="selected_input_type" value="tabular"/>
             <param name="selected_column_selector_option" value="all_columns"/>
@@ -188,6 +193,16 @@
             <output name="outfile_transform" file="prp_result09" ftype="tabular"/>
             <output name="outfile_fit" file="prp_model09" ftype="zip" compare="sim_size" delta="5"/>
         </test>
+        <test>
+            <param name="infile" value="regression_X.tabular" ftype="tabular"/>
+            <param name="header1" value="true"/>
+            <param name="selected_column_selector_option" value="all_columns"/>
+            <param name="selected_input_type" value="tabular"/>
+            <param name="selected_pre_processor" value="MinMaxScaler"/>
+            <param name="feature_range" value="(-1, 1)"/>
+            <param name="save" value="false"/>
+            <output name="outfile_transform" file="prp_result10" ftype="tabular"/>
+        </test>
     </tests>
     <help>
         <![CDATA[