Mercurial > repos > bgruening > hicexplorer_hicfindtads
view hicFindTADs.xml @ 4:8b60271e7e54 draft
planemo upload for repository https://github.com/maxplanck-ie/HiCExplorer/tree/master/galaxy/wrapper/ commit dfa5a68cb20842407941c7ffda9ef956a0e86a04
author | iuc |
---|---|
date | Sat, 16 Dec 2017 16:32:17 -0500 |
parents | a9c1d76b90c4 |
children | db2cc9e1ff76 |
line wrap: on
line source
<tool id="hicexplorer_hicfindtads" name="@BINARY@" version="@WRAPPER_VERSION@.0"> <description>find minimum cuts that correspond to boundaries</description> <macros> <token name="@BINARY@">hicFindTADs</token> <import>macros.xml</import> </macros> <expand macro="requirements" /> <command detect_errors="exit_code"><![CDATA[ hicFindTADs --matrix '$matrix' --delta $delta #if $minBoundaryDistance: --minBoundaryDistance $minBoundaryDistance #end if --minDepth $minDepth --maxDepth $maxDepth --step $step #if $multiple_comparison_conditional.multiple_comparison_selector == 'fdr': --correctForMultipleTesting fdr --threshold $multiple_comparison_conditional.threshold #elif $multiple_comparison_conditional.multiple_comparison_selector == 'bonferroni': --correctForMultipleTesting bonferroni --threshold $multiple_comparison_conditional.threshold #else: --multipleComparisons None #end if --numberOfProcessors @THREADS@ --outPrefix galaxy_tad_prefix ]]></command> <inputs> <param argument="--matrix" type="data" format="h5" label="Corrected Hi-C matrix to use for the computations"/> <param argument="--minDepth" type="integer" value="40000" label="Minimum window length (in bp) to be considered to the left and to the right of each Hi-C bin." help="This number should be at least 3 times as large as the bin size of the Hi-C matrix."/> <param argument="--maxDepth" type="integer" value="100000" label="Maximum window length (in bp) to be considered to the left and to the right of each Hi-C bin." help="This number should around 6-10 times as large as the bin size of the Hi-C matrix."/> <param argument="--step" type="integer" value="10000" label="Step size when moving from minDepth to maxDepth" help="The step size grows exponentially as maxDeph + (step * int(x)**1.5) for x in [0, 1, ...] until it reaches maxDepth. For example, selecting step=10,000, minDepth=20,000 and maxDepth=150,000 will compute TAD-scores for window sizes: 20,000, 30,000, 40,000, 70,000 and 100,000"/> <conditional name="multiple_comparison_conditional"> <param name="multiple_comparison_selector" type="select" label="Multiple Testing Corrections" > <option value="fdr" selected="True">False discovery rate</option> <option value="bonferroni">Bonferroni correction</option> <option value="None">No correction</option> </param> <when value="fdr"> <param name="threshold" type="float" value="0.01" label="q-value" /> </when> <when value="bonferroni"> <param name="threshold" type="float" value="0.01" label="p-value" /> </when> <when value="None" /> </conditional> <param argument="--delta" type="float" value="0.001" optional="True" label="Minimum threshold of the difference between the TAD-separation score of a putative boundary and the mean of the TAD-sep. score of surrounding bins." help="The delta value reduces spurious boundaries that are shallow, which usually occur at the center of large TADs when the TAD-sep. score is flat. Higher delta threshold values produce more conservative boundary estimations. By default, multiple delta thresholds are saved for the following delta values: 0.001, 0.01, 0.03, 0.05, 0.1. Other single or multiple values can be given."/> <param argument="--minBoundaryDistance" type="integer" value="" optional="True" label="Minimum distance between boundaries (in bp)." help="This parameter can be used to reduce spurious boundaries caused by noise. "/> </inputs> <outputs> <data name="boundaries" from_work_dir="galaxy_tad_prefix_boundaries.bed" format="bed" label="${tool.name} on ${on_string}: Boundary positions" /> <data name="score" from_work_dir="galaxy_tad_prefix_score.bedgraph" format="bedgraph" label="${tool.name} on ${on_string}: Matrix with multi-scale TAD scores" /> <data name="domains" from_work_dir="galaxy_tad_prefix_domains.bed" format="bed" label="${tool.name} on ${on_string}: TAD domains" /> <data name="boundaries_bin" from_work_dir="galaxy_tad_prefix_boundaries.gff" format="gff" label="${tool.name} on ${on_string}: Boundary information plus score" /> <data name="tad_score" from_work_dir="galaxy_tad_prefix_tad_score.bm" format="bedgraph" label="${tool.name} on ${on_string}: TAD information in bm file" /> <data name="matrix_output" from_work_dir="galaxy_tad_prefix_zscore_matrix.h5" format="h5" label="${tool.name} on ${on_string}: Z-score matrix in h5" /> </outputs> <tests> <test> <param name="matrix" value="small_test_matrix.h5" ftype="h5" /> <param name="minDepth" value="60000"/> <param name="maxDepth" value="180000"/> <param name="step" value="20000"/> <param name="minBoundaryDistance" value="20000" /> <conditional name="multiple_comparison_conditional"> <param name="multiple_comparison_selector" value="fdr"/> <param name="threshold" value="0.1" /> </conditional> <output name="boundaries" file="find_TADs/multiFDR_boundaries.bed" ftype="bed" compare="sim_size" delta="35000" /> <output name="boundaries_bin" file="find_TADs/multiFDR_boundaries.gff" ftype="gff" compare="sim_size" delta="35000" /> <output name="domains" file="find_TADs/multiFDR_domains.bed" ftype="bed" compare="sim_size" delta="35000" /> <output name="score" file="find_TADs/multiFDR_score.bedgraph" ftype="bedgraph" compare="sim_size" delta="35000" /> <output name="tad_score" file="find_TADs/multiFDR_tad_score.bm" ftype="bedgraph" compare="sim_size" delta="35000" /> <output name="matrix_output" file="find_TADs/multiFDR_zscore_matrix.h5" ftype="h5" compare="sim_size" delta="50000" /> </test> </tests> <help><![CDATA[ Calculate TADs ============== Topological associated domains (TADs) are regions on the DNA which tend to interact within the region a lot, but not outside their boundaries. More information_. Calculation ------------ ``hicFindTADs`` computes the TAD regions in two steps: In a first step it computes a TAD-separation score based on a z-score matrix for all bins. The z-score is defined as: “The absolute value of z represents the distance between the raw score and the population mean in units of the standard deviation. z is negative when the raw score is below the mean, positive when above.” [Source_]. .. image:: $PATH_TO_IMAGES/z-score.svg :width: 150 `Source of image <https://wikimedia.org/api/rest_v1/media/math/render/svg/5ceed701c4042bb34618535c9a902ca1a937a351>`_ In our case the distribution describes the counts per bin of a genomic distance. In a second step the local minima of the TAD-separation score is evaluated with respect to the surrounding bins to assign a p-value. Two multiple testing corrections can be applied to filter the results: `Bonferroni <https://en.wikipedia.org/wiki/Bonferroni_correction>`_ or the `false discovery rate <https://en.wikipedia.org/wiki/False_discovery_rate>`_. Input ----- Parameters __________ - contact matrix to compute the TADs on - minimum window length - maximum window length - step size - multiple testing correction - minimum threshold - minimum distance hicFindTADs tries to identify sensible parameters but those can be change to identify more stringent set of boundaries. Output ------ - Boundary positions as a bed file - Matrix with multi-scale TAD scores as a bedgraph - TAD domains as a bed file - Boundary information plus score as gff - TAD information in bm file - Z-score matrix in h5 The calulated TAD regions can be plotted with ``hicPlotTADs``. .. image:: $PATH_TO_IMAGES/master_TADs_plot.png :width: 80 % For more information about HiCExplorer please consider our documentation on readthedocs.io_ .. _readthedocs.io: http://hicexplorer.readthedocs.io/en/latest/index.html .. _Source: https://en.wikipedia.org/wiki/Standard_score#Calculation_from_raw_score .. _information: https://en.wikipedia.org/wiki/Topologically_associating_domain_ ]]></help> <expand macro="citations" /> </tool>