view Marea/marea_cluster.xml @ 31:944e15aa970a draft

Uploaded
author bimib
date Tue, 15 Oct 2019 12:22:43 -0400
parents 9fcb0e8d6d47
children abf0bfe01c78
line wrap: on
line source

<tool id="MaREA_cluester" name="Cluster Analysis" version="1.0.6">
    <description></description>
    <macros>
        <import>marea_macros.xml</import>
    </macros>
    <requirements>
        <requirement type="package" version="0.25.1">pandas</requirement>
        <requirement type="package" version="1.1.0">scipy</requirement>
        <requirement type="package" version="0.10.1">cobra</requirement>
        <requirement type="package" version="0.21.3">scikit-learn</requirement>
        <requirement type="package" version="2.2.2">matplotlib</requirement>
	<requirement type="package" version="1.17">numpy</requirement>
    </requirements>
    <command detect_errors="exit_code">
        <![CDATA[
      	python $__tool_directory__/marea_cluster.py
        --input $input
      	--tool_dir $__tool_directory__
        --out_log $log
        --best_cluster $best_cluster
        --cluster_type ${data.clust_type}
        #if $data.clust_type == 'kmeans':
        	--k_min ${data.k_min}
        	--k_max ${data.k_max}
        	--elbow ${data.elbow}
        	--silhouette ${data.silhouette}
        #end if
        #if $data.clust_type == 'dbscan':
        	#if $data.dbscan_advanced.advanced == 'true'
        		--eps ${data.dbscan_advanced.eps}
        		--min_samples ${data.dbscan_advanced.min_samples}
        	#end if
        #end if
        #if $data.clust_type == 'hierarchy':
        	--k_min ${data.k_min}
        	--k_max ${data.k_max}
      	#end if
        ]]>
    </command>
    <inputs>
        <param name="input" argument="--input" type="data" format="tabular, csv, tsv" label="Input dataset" />
        
        <conditional name="data">
			<param name="clust_type" argument="--cluster_type" type="select" label="Choose clustering type:">
                	<option value="kmeans" selected="true">KMeans</option>
                	<option value="dbscan">DBSCAN</option>
                	<option value="hierarchy">Agglomerative Hierarchical</option>
        	</param>
        	<when value="kmeans">
        		<param name="k_min" argument="--k_min" type="integer" min="2" max="20" value="2" label="Min number of clusters (k) to be tested" />
        		<param name="k_max" argument="--k_max" type="integer" min="2" max="20" value="3" label="Max number of clusters (k) to be tested" />
        		<param name="elbow" argument="--elbow" type="boolean" value="true" label="Draw the elbow plot from k-min to k-max"/>
        		<param name="silhouette" argument="--silhouette" type="boolean" value="true" label="Draw the Silhouette plot from k-min to k-max"/>
        	</when>
        	<when value="dbscan">
        		<conditional name="dbscan_advanced">
        			<param name="advanced" type="boolean" value="false" label="Want to use custom params for DBSCAN? (if not optimal values will be used)">
        				<option value="true">Yes</option>
        				<option value="false">No</option>
        			</param>
        			<when value="false"></when>
        			<when value="true">
        				<param name="eps" argument="--eps" type="float" value="0.5" label="Epsilon - The maximum distance between two samples for one to be considered as in the neighborhood of the other" />
        				<param name="min_samples" argument="min_samples" type="integer" value="5" label="Min samples - The number of samples in a neighborhood for a point to be considered as a core point (this includes the point itself)"/>
        			
        			</when>
        		</conditional>   	
        	</when>
        	<when value="hierarchy">
        		<param name="k_min" argument="--k_min" type="integer" min="2" max="99" value="3" label="Min number of clusters (k) to be tested" />
        		<param name="k_max" argument="--k_max" type="integer" min="3" max="99" value="5" label="Max number of clusters (k) to be tested" />
        	</when>
		</conditional>
    </inputs>

    <outputs>
        <data format="txt" name="log" label="${tool.name} - Log" />
        <data format="tabular" name="best_cluster" label="${tool.name} - Best cluster" />
        <collection name="results" type="list" label="${tool.name} - Plots and results">
            <discover_datasets pattern="__name_and_ext__" directory="clustering"/>
        </collection>
    </outputs>
    <help>
<![CDATA[

What it does
-------------


]]>
    </help>
    <expand macro="citations" />
</tool>