diff model_validation.xml @ 8:fd7a054ffdbd draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit f54ff2ba2f8e7542d68966ce5a6b17d7f624ac48
author bgruening
date Fri, 13 Jul 2018 03:56:45 -0400
parents 510aa2ce035e
children c6b3efcba7bd
line wrap: on
line diff
--- a/model_validation.xml	Tue Jul 10 03:13:16 2018 -0400
+++ b/model_validation.xml	Fri Jul 13 03:56:45 2018 -0400
@@ -22,7 +22,7 @@
 import pickle
 import numpy as np
 import sklearn.model_selection
-from sklearn import svm, linear_model, ensemble
+from sklearn import svm, linear_model, ensemble, preprocessing
 from sklearn.pipeline import Pipeline
 
 @COLUMNS_FUNCTION@
@@ -30,7 +30,8 @@
 @FEATURE_SELECTOR_FUNCTION@
 
 input_json_path = sys.argv[1]
-params = json.load(open(input_json_path, "r"))
+with open(input_json_path, "r") as param_handler:
+    params = json.load(param_handler)
 
 input_type = params["input_options"]["selected_input"]
 if input_type=="tabular":
@@ -49,7 +50,7 @@
             parse_dates=True
     )
 else:
-    X = mmread(open("$input_options.infile1", 'r'))
+    X = mmread("$input_options.infile1")
 
 header = 'infer' if params["input_options"]["header2"] else None
 column_option = params["input_options"]["column_selector_options_2"]["selected_column_selector_option2"]
@@ -75,10 +76,17 @@
 
 pipeline_steps = []
 
+## Set up pre_processor and add to pipeline steps.
+if params['pre_processing']['do_pre_processing'] == 'Yes':
+    preprocessor = params["pre_processing"]["pre_processors"]["selected_pre_processor"]
+    pre_processor_options = params["pre_processing"]["pre_processors"]["options"]
+    my_class = getattr(preprocessing, preprocessor)
+    pipeline_steps.append( ('pre_processor', my_class(**pre_processor_options)) )
+
 ## Set up feature selector and add to pipeline steps.
 if params['feature_selection']['do_feature_selection'] == 'Yes':
     feature_selector = feature_selector(params['feature_selection']['feature_selection_algorithms'])
-    pipeline_steps.append( ('feature_selector', feature_selector))
+    pipeline_steps.append( ('feature_selector', feature_selector) )
 
 ## Set up estimator and add to pipeline.
 estimator=params["model_validation_functions"]["estimator"]
@@ -138,6 +146,19 @@
         </configfile>
     </configfiles>
     <inputs>
+        <conditional name="pre_processing">
+            <param name="do_pre_processing" type="select" label="Do pre_processing?">
+                <option value="No" selected="true"/>
+                <option value="Yes"/>
+            </param>
+            <when value="No"/>
+            <when value="Yes">
+                <conditional name="pre_processors">
+                    <expand macro="sparse_preprocessors_ext" />
+                    <expand macro="sparse_preprocessor_options_ext" />
+                </conditional>
+            </when>
+        </conditional>
         <conditional name="feature_selection">
             <param name="do_feature_selection" type="select" label="Do feature selection?">
                 <option value="No" selected="true"/>
@@ -352,7 +373,54 @@
             <param name="infile2" value="regression_y.tabular" ftype="tabular"/>
             <param name="header2" value="true" />
             <param name="selected_column_selector_option2" value="all_columns"/>
-            <output name="outfile" file="mv_result07.tabular"/>
+            <output name="outfile" >
+                <assert_contents>
+                    <has_line line="0.7824428015300172" />
+                </assert_contents>
+            </output>
+        </test>
+        <test>
+            <param name="do_pre_processing" value="Yes"/>
+            <param name="selected_pre_processor" value="RobustScaler"/>
+            <param name="do_feature_selection" value="Yes"/>
+            <param name="selected_algorithm" value="SelectKBest"/>
+            <param name="score_func" value="f_classif"/>
+            <param name="selected_function" value="GridSearchCV"/>
+            <param name="estimator" value="svm.SVR(kernel=&quot;linear&quot;)"/>
+            <param name="has_estimator" value="yes"/>
+            <param name="param_grid" value="[{'feature_selector__k': [3, 5, 7, 9], 'estimator__C': [1, 10, 100, 1000]}]"/>
+            <param name="return_type" value="best_score_"/>
+            <param name="infile1" value="regression_X.tabular" ftype="tabular"/>
+            <param name="header1" value="true" />
+            <param name="selected_column_selector_option" value="all_columns"/>
+            <param name="infile2" value="regression_y.tabular" ftype="tabular"/>
+            <param name="header2" value="true" />
+            <param name="selected_column_selector_option2" value="all_columns"/>
+            <output name="outfile" >
+                <assert_contents>
+                    <has_line line="0.7938837807353147" />
+                </assert_contents>
+            </output>
+        </test>
+         <test>
+            <param name="do_pre_processing" value="Yes"/>
+            <param name="selected_pre_processor" value="RobustScaler"/>
+            <param name="selected_function" value="GridSearchCV"/>
+            <param name="estimator" value="svm.SVR(kernel=&quot;linear&quot;)"/>
+            <param name="has_estimator" value="yes"/>
+            <param name="param_grid" value="[{'estimator__C': [1, 10, 100, 1000]}]"/>
+            <param name="return_type" value="best_score_"/>
+            <param name="infile1" value="regression_X.tabular" ftype="tabular"/>
+            <param name="header1" value="true" />
+            <param name="selected_column_selector_option" value="all_columns"/>
+            <param name="infile2" value="regression_y.tabular" ftype="tabular"/>
+            <param name="header2" value="true" />
+            <param name="selected_column_selector_option2" value="all_columns"/>
+            <output name="outfile" >
+                <assert_contents>
+                    <has_line line="0.7904476204861263" />
+                </assert_contents>
+            </output>
         </test>
     </tests>
     <help>