view scHicClusterSVL.xml @ 1:eeb26f378f34 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/schicexplorer commit 72e1e90ac05a32dbd6fc675073429c0086048b18"
author iuc
date Tue, 10 Mar 2020 15:08:53 -0400
parents 7cbd84a1f985
children e6ec3914a076
line wrap: on
line source

<tool id="schicexplorer_schicclustersvl" name="@BINARY@" version="@WRAPPER_VERSION@.0">
    <description>clusters single-cell Hi-C interaction matrices with svl dimension reduction</description>
    <macros>
        <token name="@BINARY@">scHicClusterSVL</token>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements" />
    <command detect_errors="exit_code"><![CDATA[
        @BINARY@

        --matrix '$matrix_scooler'

        --numberOfClusters $numberOfClusters
        --distanceShortRange $distanceShortRange
        --distanceLongRange $distanceLongRange

        --clusterMethod $clusterMethod_selector

        #if $chromosomes:
            #set $chromosome = ' '.join([ '\'%s\'' % $chrom for $chrom in str($chromosomes).split(' ') ])
            --chromosomes $chromosome
        #end if

        --outFileName cluster_list.txt

        --threads @THREADS@
    ]]></command>
    <inputs>
        
        <expand macro="matrix_scooler_macro"/>
        <param name="clusterMethod_selector" type="select" label="Cluster method:">
                <option value="kmeans" selected="True">K-means</option>
                <option value="spectral" >Spectral clustering</option>
        </param>

        <param name="numberOfClusters" type="integer" value="7"  label="Number of clusters" help='How many clusters should be computed by kmeans or spectral clustering' />   
        <param name="distanceShortRange" type="integer" value="2000000"  label="Distance short range" help='Distance for the short range to compute the ratio of sum (short range interactions) / sum (long range interactions)' />   
        <param name="distanceLongRange" type="integer" value="12000000"  label="Distance long range" help='Distance for the long range to compute the ratio of sum (short range interactions) / sum (long range interactions)' />   

        <param name='chromosomes' type='text' label='List of chromosomes to consider' help='Please separate the chromosomes by space'/>
    </inputs>
    <outputs>
        <data name="outFileName" from_work_dir="cluster_list.txt" format="txt" label="${tool.name} on ${on_string}: Cluster results"/>
    </outputs>
    <tests>
        <test>
            <param name='matrix_scooler' value='test_matrix.scool' />
            <param name='clusterMethod_selector' value='kmeans' />
            <param name='numberOfClusters' value='3' />
            <param name='distanceShortRange' value='2000000' />
            <param name='distanceLongRange' value='12000000' />

            <output name="outFileName" file="scHicClusterSVL/cluster_kmeans.txt" ftype="txt" compare="sim_size" delta="4000"/>
        </test>

        <test>
            <param name='matrix_scooler' value='test_matrix.scool' />
            <param name='clusterMethod_selector' value='spectral' />
            <param name='numberOfClusters' value='3' />
            <param name='distanceShortRange' value='2000000' />
            <param name='distanceLongRange' value='12000000' />

            <output name="outFileName" file="scHicClusterSVL/cluster_spectral.txt" ftype="txt" compare="sim_size" delta="4000"/>
        </test>
   
    </tests>
    <help><![CDATA[

Clustering with dimension reduction via short vs long range ratio
=================================================================

scHicClusterSVL uses kmeans or spectral clustering to associate each cell to a cluster and therefore to its cell cycle. 
The clustering is applied on dimension reduced data based on the ratio of short vs long range contacts per chromosome. This approach reduces the number of dimensions from samples * (number of bins)^2 to samples * (number of chromosomes). 
Please consider also the other clustering and dimension reduction approaches of the scHicExplorer suite such as `scHicCluster`, `scHicClusterMinHash` and `scHicClusterSVL`. They can give you better results, 
can be faster or less memory demanding.

For more information about scHiCExplorer please consider our documentation on readthedocs.io_

.. _readthedocs.io: http://schicexplorer.readthedocs.io/
]]></help>
    <expand macro="citations" />

</tool>