Mercurial > repos > bgruening > sklearn_pairwise_metrics
diff pairwise_metrics.xml @ 0:dd1ed289bba1 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit 0e582cf1f3134c777cce3aa57d71b80ed95e6ba9
author | bgruening |
---|---|
date | Fri, 16 Feb 2018 09:17:16 -0500 |
parents | |
children | 0dfaead1d284 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pairwise_metrics.xml Fri Feb 16 09:17:16 2018 -0500 @@ -0,0 +1,163 @@ +<tool id="sklearn_pairwise_metrics" name="Evaluate pairwise distances" version="@VERSION@"> + <description>or compute affinity or kernel for sets of samples</description> + <macros> + <import>main_macros.xml</import> + </macros> + <expand macro="python_requirements"/> + <expand macro="macro_stdio"/> + <version_command>echo "@VERSION@"</version_command> + <command> + <![CDATA[ + python "$pairwise_script" '$inputs' + ]]> + </command> + <configfiles> + <inputs name="inputs" /> + <configfile name="pairwise_script"> + <![CDATA[ +import sys +import json +import pandas +import numpy as np +from sklearn.metrics import pairwise +from sklearn.metrics import pairwise_distances_argmin +from scipy.io import mmread +from scipy.io import mmwrite + +input_json_path = sys.argv[1] +params = json.load(open(input_json_path, "r")) + +options = params["input_type"]["metric_functions"]["options"] +metric_function = params["input_type"]["metric_functions"]["selected_metric_function"] + +input_iter = [] +#for $i, $s in enumerate( $input_type.input_files ) +input_index=$i +input_path="${s.input.file_name}" +#if $input_type.selected_input_type == "sparse": +input_iter.append(mmread(open(input_path, 'r'))) +#else: +input_iter.append(pandas.read_csv(input_path, sep='\t', header=0, index_col=None, parse_dates=True, encoding=None, tupleize_cols=False ).values) +#end if +#end for + +if len(input_iter)>1: + X = input_iter[0] + Y = input_iter[1] +else: X = Y = input_iter[0] + +if metric_function=="pairwise_distances_argmin": + metric_res = pairwise_distances_argmin(X,Y,**options) +else: + my_function = getattr(pairwise, metric_function) + metric_res = my_function(X,Y,**options) + +pandas.DataFrame(metric_res).to_csv(path_or_buf = "$outfile", sep="\t", index=False, header=False) + ]]> + </configfile> + </configfiles> + <inputs> + <conditional name="input_type"> + <param name="selected_input_type" type="select" label="Select the type of your input data:"> + <option value="tabular" selected="true">Tabular data (.tabular, .txt)</option> + <option value="sparse">Sparse matrix (.mtx)</option> + </param> + <when value="tabular"> + <expand macro="multiple_input" max_num="2" format="tabular"/> + <conditional name="metric_functions"> + <expand macro="sparse_pairwise_metric_functions"> + <expand macro="pairwise_metric_functions"/> + </expand> + <when value="additive_chi2_kernel"> + </when> + <when value="chi2_kernel"> + <section name="options" title="Advanced Options" expanded="False"> + <expand macro="gamma" help_text="Floating point scaling parameter of the chi2 kernel. "/> + </section> + </when> + <when value="linear_kernel"> + </when> + <when value="manhattan_distances"> + <section name="options" title="Advanced Options" expanded="False"> + <param argument="sum_over_features" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="true" label="Sum over features" help="If True, return the pairwise distance matrix, else return the componentwise L1 pairwise-distances. "/> + </section> + </when> + <when value="polynomial_kernel"> + <section name="options" title="Advanced Options" expanded="False"> + <expand macro="gamma" default_value=""/> + <expand macro="degree"/> + <expand macro="coef0"/> + </section> + </when> + <when value="rbf_kernel"> + <section name="options" title="Advanced Options" expanded="False"> + <expand macro="gamma" default_value=""/> + </section> + </when> + <when value="laplacian_kernel"> + <section name="options" title="Advanced Options" expanded="False"> + <expand macro="gamma" default_value=""/> + </section> + </when> + <when value="pairwise_kernels"> + <section name="options" title="Advanced Options" expanded="False"> + <expand macro="pairwise_kernel_metrics"/> + </section> + </when> + <expand macro="sparse_pairwise_condition"> + <expand macro="distance_nonsparse_metrics"/> + </expand> + <expand macro="argmin_distance_condition"> + <expand macro="distance_nonsparse_metrics"/> + </expand> + </conditional> + </when> + <when value="sparse"> + <expand macro="multiple_input" max_num="2"/> + <conditional name="metric_functions"> + <expand macro="sparse_pairwise_metric_functions"/> + <expand macro="sparse_pairwise_condition"/> + <expand macro="argmin_distance_condition"/> + </conditional> + </when> + </conditional> + </inputs> + <outputs> + <data format="tabular" name="outfile"/> + </outputs> + <tests> + <test> + <param name="selected_input_type" value="tabular"/> + <param name="selected_metric_function" value="rbf_kernel"/> + <param name="input_files_0|input" value="test.tabular" ftype="tabular"/> + <param name="input_files_1|input" value="test2.tabular" ftype="tabular"/> + <param name="gamma" value="0.5"/> + <output name="outfile" file="pw_metric01.tabular" compare="sim_size" /> + </test> + <test> + <param name="selected_input_type" value="tabular"/> + <param name="selected_metric_function" value="pairwise_distances"/> + <param name="metric" value="manhattan"/> + <param name="input_files_0|input" value="test.tabular" ftype="tabular"/> + <output name="outfile" file="pw_metric02.tabular"/> + </test> + <test> + <param name="selected_input_type" value="sparse"/> + <param name="selected_metric_function" value="pairwise_distances"/> + <param name="metric" value="cosine"/> + <param name="input_files_0|input" value="sparse.mtx" ftype="txt"/> + <output name="outfile" file="pw_metric03.tabular"/> + </test> + </tests> + <help> + <![CDATA[ +**What it does** + +This tool consists of utilities to evaluate pairwise distances or affinity of sets of samples. +The base utilities are contained in Scikit-learn python library in sklearn.metrics package. +This module contains both distance metrics and kernels. For a brief summary, please refer to: +http://scikit-learn.org/stable/modules/metrics.html#metrics + ]]> + </help> + <expand macro="sklearn_citation"/> +</tool>