diff hicDetectLoops.xml @ 6:c3f9037423bd draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hicexplorer commit 2a0943e78bdc8ebb13f181399206a9eea37ed78f"
author iuc
date Tue, 16 Mar 2021 15:32:15 +0000
parents 1119cdd14ddb
children 0d6c2ff54c76
line wrap: on
line diff
--- a/hicDetectLoops.xml	Fri Dec 11 21:19:07 2020 +0000
+++ b/hicDetectLoops.xml	Tue Mar 16 15:32:15 2021 +0000
@@ -1,4 +1,4 @@
-<tool id="hicexplorer_hicdetectloops" name="@BINARY@" version="@WRAPPER_VERSION@.0">
+<tool id="hicexplorer_hicdetectloops" name="@BINARY@" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@">
     <description>searches for enriched regions</description>
     <macros>
         <token name="@BINARY@">hicDetectLoops</token>
@@ -10,7 +10,6 @@
         @BINARY@
 
             --matrix  'matrix.$matrix_h5_cooler.ext'
-
             #if $peakWidth:
                 --peakWidth $peakWidth
             #end if
@@ -27,9 +26,10 @@
                 --peakInteractionsThreshold $peakInteractionsThreshold
             #end if
 
-            #if $maximumInteractionPercentageThreshold:
-                --maximumInteractionPercentageThreshold $maximumInteractionPercentageThreshold
+            #if $obsExpThreshold:
+                --obsExpThreshold $obsExpThreshold
             #end if
+           
             #if $pValue:
                 --pValue $pValue
             #end if
@@ -46,10 +46,8 @@
                 --chromosomes $chromosome
             #end if
 
-            --statisticalTest $statisticalTest_selector
-
+            --expected $expected
             --outFileName output_loop.bedgraph
-
             --threads @THREADS@ -tpc @THREADS@
 ]]>
     </command>
@@ -60,32 +58,34 @@
                            region) are tested against the peak region for significant difference. The square will have the size of (2 * windowSize)^2 bins" />
         <param argument="--pValuePreselection" type="float" label="P-value preselection" help= "Only candidates with p-values less the given threshold will be considered as candidates.
                                 For each genomic distance a negative binomial distribution is fitted and for each pixel a p-value given by the cumulative density function is given.
-                                This does NOT influence the p-value for the neighborhood testing." value='0.05'/>
+                                This does NOT influence the p-value for the neighborhood testing." value='0.05' />
         <param argument="--peakInteractionsThreshold" type="integer" label="Minimum interaction number" help= "The minimum number of interactions a detected peaks needs to have to be considered." value='5' />
-        <param argument="--maximumInteractionPercentageThreshold" type="float" value='0.1' label="Maximum interaction share" help= "For each genomic distance the maximum value is considered and all candidates need to have at least \'max_value * maximumInteractionPercentageThreshold\' interactions." />
-        <param argument="--pValue" type="float" label="P-value" help= "Rejection level for the statistical test for H0. H0 is peak region and background have the same distribution." value='0.05'/>
-        <param argument="--maxLoopDistance" optional='true'  type="integer" label="Maximal loop distance" help= "Maximum genomic distance of a loop, usually loops are within a distance of ~2MB." value='2000000'/>
-        <param argument="--minLoopDistance" optional='true'  type="integer" label="Minimum loop distance" help= "Minimum genomic distance of a loop to be considered." value='100000'/>
-        <param argument="--chromosomes" optional='true'  type="text" label="Chromosomes to include" help= "Chromosomes to include in the analysis. If not set, all chromosomes are included." />
-        <param argument="--region" optional='true'  type="text" label="Chromosomes to include" help= "The format is chr:start-end." />
-        <param name="statisticalTest_selector" type="select" label="Stistical test">
-            <option value="wilcoxon-rank-sum" selected="True">Wilcoxon rank-sum'</option>
-            <option value="anderson-darling">Anderson-Darling</option>
+
+        <!-- new-->
+        <param argument="--obsExpThreshold" type="float" label="Obs/exp interaction threshold" help= "The minimum number of obs/exp interactions a detected peaks needs to have to be considered" value='1.5' />
+
+        <param argument="--pValue" type="float" label="P-value" help= "Rejection level for the statistical test for H0. H0 is peak region and background have the same distribution." value='0.05' />
+        <param argument="--maxLoopDistance" optional='true' type="integer" label="Maximal loop distance" help= "Maximum genomic distance of a loop, usually loops are within a distance of ~2MB." value='2000000' />
+        <param argument="--chromosomes" optional='true' type="text" label="Chromosomes to include" help= "Chromosomes to include in the analysis. If not set, all chromosomes are included." />
+        <param argument="--region" optional='true' type="text" label="Chromosomes to include" help= "The format is chr:start-end." />
+        <param argument="--expected" type="select" label="Method to compute the expected value">
+            <option value="mean" selected="True">mean</option>
+            <option value="mean_nonzero">mean_nonzero</option>
+            <option value="mean_nonzero">mean_nonzero_ligation</option>
         </param>
     </inputs>
     <outputs>
-        <data name='output_loops' from_work_dir='output_loop.bedgraph' format='bedgraph' label='Computed loops'/>
+        <data name='output_loops' from_work_dir='output_loop.bedgraph' format='bedgraph' label='Computed loops' />
     </outputs>
     <tests>
         <test>
-            <param name="matrix_h5_cooler" value="small_test_matrix.cool"/>
-            <param name="maxLoopDistance" value="30000000"/>
-            <param name="windowSize" value="5"/>
-            <param name="peakWidth" value="2"/>
-            <param name="pValue" value="0.5"/>
-            <param name="pValuePreselection" value="0.55"/>
-            <!-- <param name="chromosomes" value="X"/> -->
-            <output name="output_loops" file="hicDetectLoops/loops.bedgraph" ftype="bedgraph" compare="sim_size"/>
+            <param name="matrix_h5_cooler" value="small_test_matrix.cool" />
+            <param name="maxLoopDistance" value="30000000" />
+            <param name="windowSize" value="5" />
+            <param name="peakWidth" value="2" />
+            <param name="pValue" value="0.5" />
+            <param name="pValuePreselection" value="0.55" />
+            <output name="output_loops" file="hicDetectLoops/loops.bedgraph" ftype="bedgraph" compare="sim_size" />
         </test>
     </tests>
     <help><![CDATA[
@@ -95,7 +95,7 @@
 
 Computes enriched regions (peaks) or long range contacts on the given contact matrix.
 
-hicDetectLoops can detect enriched interaction regions (peaks / loops) based on a strict candidate selection, negative binomial distributions and Anderson-Darling / Wilcoxon rank-sum tests.
+hicDetectLoops can detect enriched interaction regions (peaks / loops) based on a strict candidate selection, negative binomial distributions and Wilcoxon rank-sum tests.
 
 The algorithm was mainly develop on GM12878 cells from Rao 2014 on 10kb and 5kb fixed bin size resolution.
 
@@ -106,7 +106,7 @@
 
 A command line example is available below (easily matchable in Galaxy using each field information):
 
-̀`$ hicDetectLoops -m matrix.cool -o loops.bedgraph --maxLoopDistance 2000000 --windowSize 10 --peakWidth 6 --pValuePreselection 0.05 --pValue 0.05 --peakInteractionsThreshold 20 --maximumInteractionPercentageThreshold 0.1 --statisticTest anderson-darling`
+̀`$ hicDetectLoops -m matrix.cool -o loops.bedgraph --maxLoopDistance 2000000 --windowSize 10 --peakWidth 6 --pValuePreselection 0.05 --pValue 0.05 --peakInteractionsThreshold 20`
 
 The candidate selection is based on the restriction of the maximum genomic distance, here 2MB. This distance is given by Rao 2014. For each genomic distance a negative binomial distribution is computed and only interaction pairs with a threshold less than ``--pValuePreselection`` are accepted. Detected candidates need to have at least an interaction count of ``--maximumInteractionPercentageThreshold`` times the maximum value for their genomic distance. Please note that ``--maximumInteractionPercentageThreshold`` was introduced with HiCExplorer release 3.2. Earlier versions did not have this parameter yet and therefore their outputs may differ. In a second step, each candidate is considered compared to its neighborhood. This neighborhood is defined by the ``--windowSize`` parameter in the x and y dimension. Per neighborhood only one candidate is considered, therefore only the candidate with the highest peak values is accepted. As a last step, the neighborhood is split into a peak and background region (parameter ``--peakWidth``). The peakWidth can never be larger than the windowSize. However, we recommend for 10kb matrices a windowSize of 10 and a peakWidth of 6.
 
@@ -126,6 +126,6 @@
 
 .. _readthedocs.io: http://hicexplorer.readthedocs.io/en/latest/index.html
 
-]]></help>
+]]>    </help>
     <expand macro="citations" />
 </tool>