view hicFindTADs.xml @ 0:aab371aa615e draft

planemo upload for repository https://github.com/maxplanck-ie/HiCExplorer/tree/master/galaxy/wrapper/ commit 2f347b0756d720114f037ed1ff9ba4836e1b3b04
author bgruening
date Thu, 30 Mar 2017 03:22:25 -0400
parents
children a9c1d76b90c4
line wrap: on
line source

<tool id="hicexplorer_hicfindtads" name="@BINARY@" version="@WRAPPER_VERSION@.0">
    <description>find minimum cuts that correspond to boundaries</description>
    <macros>
        <token name="@BINARY@">hicFindTADs</token>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements" />
    <command><![CDATA[
        #if $mode.mode_selector == 'TAD_score':
            ln -s '$mode.matrix' matrix.npz.h5 &&
        #end if

        #if $mode.mode_selector == 'find_TADs':
            ln -s '$tad_score_zscore_matrix' ./tadScoreFile.tabular_zscore_matrix.h5 &&
        #end if

        hicFindTADs
            $mode.mode_selector

            #if $mode.mode_selector == 'find_TADs':
                --tadScoreFile '$tadScoreFile'
                --outPrefix galaxy_tad_prefix
                ##--maxThreshold $mode.maxThreshold
                --delta $mode.delta
                --pvalue $mode.pvalue
                #if $mode.minBoundaryDistance:
                    --minBoundaryDistance $mode.minBoundaryDistance
                #end if

            #else:
                --matrix matrix.npz.h5
                --maxDepth $mode.maxDepth
                --minDepth $mode.minDepth
                --outFileName ./tadScoreFile.tabular

                --step $mode.step
            #end if

    ]]></command>
    <inputs>

        <conditional name="mode">
            <param name="mode_selector" type="select" label="Range restriction (in bp)" argument="--range">
                <option value="find_TADs">find TADs</option>
                <option value="TAD_score">TAD score</option>
            </param>
            <when value="find_TADs">
                <param argument="--tadScoreFile" type="data" format="tabular" label="TAD score file"/>
                <param name="tad_score_zscore_matrix" type="data" format="h5" label="TAD score Matrix file"/>

                <param argument="--delta" type="float" value="0.001" optional="True"
                       label="Minimum threshold of the difference between the TAD-separation score of a putative boundary and the mean of the TAD-sep. score of surrounding bins."
                        help="The delta value reduces spurious boundaries that are shallow, which usually
                              occur at the center of large TADs when the TAD-sep. score is flat. Higher
                              delta threshold values produce more conservative boundary estimations. By
                              default, multiple delta thresholds are saved for the following delta
                              values: 0.001, 0.01, 0.03, 0.05, 0.1. Other single or multiple values
                              can be given."/>                <param argument="--pvalue" type="float" value="0.01" 
                    label="P-value threshold"
                    help="The probability of a local minima to be a boundary is estimated by comparing the distribution (Wilcoxon ranksum) of the
                            zscores between the left and right regions (diamond) at the local minimum with the matrix zscores for a diamond at
                            --minDepth to the left and a diamond --minDepth to the right.
                            The reported pvalue is the Bonferroni correction all pvalues."/>
                <param argument="--minBoundaryDistance" type="integer" value="" optional="True"
                    label="Minimum distance between boundaries (in bp)."
                    help="This parameter can be used to reduce spurious boundaries caused by noise. "/>

            </when>
            <when value="TAD_score">

                <param argument="--matrix" type="data" format="h5" label="Corrected Hi-C matrix to use for the computations"/>
                <param argument="--minDepth" type="integer" value="30000"
                    label="Minimum window length (in bp) to be considered to the left and to the right of each Hi-C bin."
                    help="This number should be at least 3 times as large as the bin size of the Hi-C matrix."/>
                <param argument="--maxDepth" type="integer" value="100000"
                    label="Maximum window length (in bp) to be considered to the left and to the right of each Hi-C bin."
                    help="This number should around 6-10 times as large as the bin size of the Hi-C matrix."/>
                <param argument="--step" type="integer" value="10000"
                    label="Step size when moving from minDepth to maxDepth"
                    help="The step size grows exponentially as `maxDeph + (step * int(x)**1.5) for x in [0, 1, ...]`
                    until  it reaches `maxDepth`. For example, selecting step=10,000, minDepth=20,000
                    and maxDepth=150,000 will compute TAD-scores for window sizes:
                    20,000, 30,000, 40,000, 70,000 and 100,000"/>
            </when>
        </conditional>


    </inputs>
    <outputs>
        <data name="outFileName" from_work_dir="tadScoreFile.tabular" format="tabular">
            <filter>mode['mode_selector'] == "TAD_score"</filter>
        </data>
        <data name="tad_score_zscore_matrix" from_work_dir="tadScoreFile.tabular_zscore_matrix.h5" format="h5">
            <filter>mode['mode_selector'] == "TAD_score"</filter>
        </data>
        <data name="boundaries" from_work_dir="galaxy_tad_prefix_boundaries.bed" format="bed"
            label="${tool.name} on ${on_string}: Boundary positions">
            <filter>mode['mode_selector'] == "find_TADs"</filter>
        </data>
        <data name="score" from_work_dir="galaxy_tad_prefix_score.bedgraph" format="bedgraph"
            label="${tool.name} on ${on_string}: Matrix with multi-scale TAD scores">
            <filter>mode['mode_selector'] == "find_TADs"</filter>
        </data>
        <data name="domains" from_work_dir="galaxy_tad_prefix_domains.bed" format="bed"
            label="${tool.name} on ${on_string}: TAD domains">
            <filter>mode['mode_selector'] == "find_TADs"</filter>
        </data>
        <data name="boundaries_bin" from_work_dir="galaxy_tad_prefix_boundaries.gff"
            format="bed" label="${tool.name} on ${on_string}: Boundary information plus score">
            <filter>mode['mode_selector'] == "find_TADs"</filter>
        </data>
    </outputs>
    <tests>
        <!--test>
            <param name="matrix" value="hicBuildMatrix_result1.h5" ftype="h5"/>
            <param name="mode_selector" value="TAD_score"/>
            <param name="minDepth" value="20000"/>
            <param name="maxDepth" value="60000"/>
            <param name="step" value="100000"/>
            <output name="outFileName" file="hicFindTADs_TAD_score.tabular" ftype="tabular"/>
            <output name="tad_score_zscore_matrix" file="tadScoreFile.tabular_zscore_matrix.h5" ftype="h5" compare="sim_size"/>
        </test>
        <test>
            <param name="tadScoreFile" value="hicFindTADs_TAD_score.tabular" ftype="tabular"/>
            <param name="tad_score_zscore_matrix" value="tadScoreFile.tabular_zscore_matrix.h5" ftype="h5"/>
            <param name="mode_selector" value="find_TADs"/>
            <param name="delta" value="0.002"/>
            <param name="pvalue" value="0.01"/>

            <output name="boundaries" file="hicFindTADs_find_boundaries.bed" ftype="bed"/>
            <output name="score" file="hicFindTADs_find_score.bedgraph" ftype="bedgraph"/>
            <output name="domains" file="hicFindTADs_find_domains.bed" ftype="bed"/>
            <output name="boundaries_bin" file="hicFindTADs_find_boundaries.gff" ftype="gff"/>
        </test-->
    </tests>
    <help><![CDATA[

**What it does**

Uses the graph clustering measure "conductance" to find minimum cuts that correspond to boundaries.

]]></help>
    <expand macro="citations" />
</tool>