Mercurial > repos > bgruening > sklearn_nn_classifier
view nn_classifier.xml @ 14:9871a634540f draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 02087ce2966cf8b4aac9197a41171e7f986c11d1-dirty"
author | bgruening |
---|---|
date | Wed, 02 Oct 2019 03:58:00 -0400 |
parents | d0efc68a3ddb |
children | 699024d5c451 |
line wrap: on
line source
<tool id="sklearn_nn_classifier" name="Nearest Neighbors Classification" version="@VERSION@"> <description></description> <macros> <import>main_macros.xml</import> </macros> <expand macro="python_requirements"/> <expand macro="macro_stdio"/> <version_command>echo "@VERSION@"</version_command> <command><![CDATA[ python '$nnc_script' '$inputs' ]]> </command> <configfiles> <inputs name="inputs"/> <configfile name="nnc_script"> <![CDATA[ import sys import json import numpy as np import sklearn.neighbors import pandas import pickle from galaxy_ml.utils import load_model, get_X_y input_json_path = sys.argv[1] with open(input_json_path, "r") as param_handler: params = json.load(param_handler) #if $selected_tasks.selected_task == "load": with open("$infile_model", 'rb') as model_handler: classifier_object = load_model(model_handler) header = 'infer' if params["selected_tasks"]["header"] else None data = pandas.read_csv("$selected_tasks.infile_data", sep='\t', header=header, index_col=None, parse_dates=True, encoding=None, tupleize_cols=False) prediction = classifier_object.predict(data) prediction_df = pandas.DataFrame(prediction) res = pandas.concat([data, prediction_df], axis=1) res.to_csv(path_or_buf = "$outfile_predict", sep="\t", index=False) #else: X, y = get_X_y(params, "$selected_tasks.selected_algorithms.input_options.infile1" ,"$selected_tasks.selected_algorithms.input_options.infile2") selected_algorithm = params["selected_tasks"]["selected_algorithms"]["selected_algorithm"] if selected_algorithm == "nneighbors": classifier = params["selected_tasks"]["selected_algorithms"]["sampling_methods"]["sampling_method"] sys.stdout.write(classifier) options = params["selected_tasks"]["selected_algorithms"]["sampling_methods"]["options"] sys.stdout.write(str(options)) elif selected_algorithm == "ncentroid": options = params["selected_tasks"]["selected_algorithms"]["options"] classifier = "NearestCentroid" my_class = getattr(sklearn.neighbors, classifier) classifier_object = my_class(**options) classifier_object.fit(X, y) with open("$outfile_fit", 'wb') as out_handler: pickle.dump(classifier_object, out_handler) #end if ]]> </configfile> </configfiles> <inputs> <expand macro="sl_Conditional" model="zip"><!--Todo: add sparse to targets--> <param name="selected_algorithm" type="select" label="Classifier type"> <option value="nneighbors">Nearest Neighbors</option> <option value="ncentroid">Nearest Centroid</option> </param> <when value="nneighbors"> <expand macro="sl_mixed_input"/> <conditional name="sampling_methods"> <param name="sampling_method" type="select" label="Neighbor selection method"> <option value="KNeighborsClassifier" selected="true">K-nearest neighbors</option> <option value="RadiusNeighborsClassifier">Radius-based</option> </param> <when value="KNeighborsClassifier"> <expand macro="nn_advanced_options"> <param argument="n_neighbors" type="integer" optional="true" value="5" label="Number of neighbors" help=" "/> </expand> </when> <when value="RadiusNeighborsClassifier"> <expand macro="nn_advanced_options"> <param argument="radius" type="float" optional="true" value="1.0" label="Radius" help="Range of parameter space to use by default for :meth ''radius_neighbors'' queries."/> </expand> </when> </conditional> </when> <when value="ncentroid"> <expand macro="sl_mixed_input"/> <section name="options" title="Advanced Options" expanded="False"> <param argument="metric" type="text" optional="true" value="euclidean" label="Metric" help="The metric to use when calculating distance between instances in a feature array."/> <param argument="shrink_threshold" type="float" optional="true" value="" label="Shrink threshold" help="Floating point number for shrinking centroids to remove features."/> </section> </when> </expand> </inputs> <expand macro="output"/> <tests> <test> <param name="infile1" value="train_set.tabular" ftype="tabular"/> <param name="infile2" value="train_set.tabular" ftype="tabular"/> <param name="header1" value="True"/> <param name="header2" value="True"/> <param name="col1" value="1,2,3,4"/> <param name="col2" value="5"/> <param name="selected_task" value="train"/> <param name="selected_algorithm" value="nneighbors"/> <param name="sampling_method" value="KNeighborsClassifier" /> <param name="algorithm" value="brute" /> <output name="outfile_fit" file="nn_model01" compare="sim_size" delta="5"/> </test> <test> <param name="infile1" value="train_set.tabular" ftype="tabular"/> <param name="infile2" value="train_set.tabular" ftype="tabular"/> <param name="header1" value="True"/> <param name="header2" value="True"/> <param name="col1" value="1,2,3,4"/> <param name="col2" value="5"/> <param name="selected_task" value="train"/> <param name="selected_algorithm" value="nneighbors"/> <param name="sampling_method" value="RadiusNeighborsClassifier" /> <output name="outfile_fit" file="nn_model02" compare="sim_size" delta="5"/> </test> <test> <param name="infile1" value="train_set.tabular" ftype="tabular"/> <param name="infile2" value="train_set.tabular" ftype="tabular"/> <param name="header1" value="True"/> <param name="header2" value="True"/> <param name="col1" value="1,2,3,4"/> <param name="col2" value="5"/> <param name="selected_task" value="train"/> <param name="selected_algorithm" value="ncentroid"/> <output name="outfile_fit" file="nn_model03" compare="sim_size" delta="5"/> </test> <test> <param name="infile_model" value="nn_model01" ftype="zip"/> <param name="infile_data" value="test_set.tabular" ftype="tabular"/> <param name="header" value="True"/> <param name="selected_task" value="load"/> <output name="outfile_predict" file="nn_prediction_result01.tabular"/> </test> <test> <param name="infile_model" value="nn_model02" ftype="zip"/> <param name="infile_data" value="test_set.tabular" ftype="tabular"/> <param name="header" value="True"/> <param name="selected_task" value="load"/> <output name="outfile_predict" file="nn_prediction_result02.tabular"/> </test> <test> <param name="infile_model" value="nn_model03" ftype="zip"/> <param name="infile_data" value="test_set.tabular" ftype="tabular"/> <param name="header" value="True"/> <param name="selected_task" value="load"/> <output name="outfile_predict" file="nn_prediction_result03.tabular"/> </test> </tests> <help><![CDATA[ **What it does** This module implements the k-nearest neighbors classification algorithms. For more information check http://scikit-learn.org/stable/modules/neighbors.html ]]></help> <expand macro="sklearn_citation"/> </tool>