Mercurial > repos > bgruening > hicexplorer_hicdetectloops
comparison hicDetectLoops.xml @ 6:c3f9037423bd draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hicexplorer commit 2a0943e78bdc8ebb13f181399206a9eea37ed78f"
author | iuc |
---|---|
date | Tue, 16 Mar 2021 15:32:15 +0000 |
parents | 1119cdd14ddb |
children | 0d6c2ff54c76 |
comparison
equal
deleted
inserted
replaced
5:f16ab52334dd | 6:c3f9037423bd |
---|---|
1 <tool id="hicexplorer_hicdetectloops" name="@BINARY@" version="@WRAPPER_VERSION@.0"> | 1 <tool id="hicexplorer_hicdetectloops" name="@BINARY@" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@"> |
2 <description>searches for enriched regions</description> | 2 <description>searches for enriched regions</description> |
3 <macros> | 3 <macros> |
4 <token name="@BINARY@">hicDetectLoops</token> | 4 <token name="@BINARY@">hicDetectLoops</token> |
5 <import>macros.xml</import> | 5 <import>macros.xml</import> |
6 </macros> | 6 </macros> |
8 <command detect_errors="exit_code"><![CDATA[ | 8 <command detect_errors="exit_code"><![CDATA[ |
9 ln -s '$matrix_h5_cooler' 'matrix.$matrix_h5_cooler.ext' && | 9 ln -s '$matrix_h5_cooler' 'matrix.$matrix_h5_cooler.ext' && |
10 @BINARY@ | 10 @BINARY@ |
11 | 11 |
12 --matrix 'matrix.$matrix_h5_cooler.ext' | 12 --matrix 'matrix.$matrix_h5_cooler.ext' |
13 | |
14 #if $peakWidth: | 13 #if $peakWidth: |
15 --peakWidth $peakWidth | 14 --peakWidth $peakWidth |
16 #end if | 15 #end if |
17 | 16 |
18 #if $windowSize: | 17 #if $windowSize: |
25 | 24 |
26 #if $peakInteractionsThreshold: | 25 #if $peakInteractionsThreshold: |
27 --peakInteractionsThreshold $peakInteractionsThreshold | 26 --peakInteractionsThreshold $peakInteractionsThreshold |
28 #end if | 27 #end if |
29 | 28 |
30 #if $maximumInteractionPercentageThreshold: | 29 #if $obsExpThreshold: |
31 --maximumInteractionPercentageThreshold $maximumInteractionPercentageThreshold | 30 --obsExpThreshold $obsExpThreshold |
32 #end if | 31 #end if |
32 | |
33 #if $pValue: | 33 #if $pValue: |
34 --pValue $pValue | 34 --pValue $pValue |
35 #end if | 35 #end if |
36 #if $maxLoopDistance: | 36 #if $maxLoopDistance: |
37 --maxLoopDistance $maxLoopDistance | 37 --maxLoopDistance $maxLoopDistance |
44 #if $chromosomes: | 44 #if $chromosomes: |
45 #set $chromosome = ' '.join([ '\'%s\'' % $chrom for $chrom in str($chromosomes).split(' ') ]) | 45 #set $chromosome = ' '.join([ '\'%s\'' % $chrom for $chrom in str($chromosomes).split(' ') ]) |
46 --chromosomes $chromosome | 46 --chromosomes $chromosome |
47 #end if | 47 #end if |
48 | 48 |
49 --statisticalTest $statisticalTest_selector | 49 --expected $expected |
50 | |
51 --outFileName output_loop.bedgraph | 50 --outFileName output_loop.bedgraph |
52 | |
53 --threads @THREADS@ -tpc @THREADS@ | 51 --threads @THREADS@ -tpc @THREADS@ |
54 ]]> | 52 ]]> |
55 </command> | 53 </command> |
56 <inputs> | 54 <inputs> |
57 <expand macro="matrix_h5_cooler_macro" /> | 55 <expand macro="matrix_h5_cooler_macro" /> |
58 <param argument="--peakWidth" type="integer" optional='true' label="Peak width" help= "The width of the peak region in bins. The square around the peak will include (2 * peakWidth)^2 bins." /> | 56 <param argument="--peakWidth" type="integer" optional='true' label="Peak width" help= "The width of the peak region in bins. The square around the peak will include (2 * peakWidth)^2 bins." /> |
59 <param argument="--windowSize" type="integer" optional='true' label="Window size" help= "The window size for the neighborhood region the peak is located in. All values from this region (exclude the values from the peak | 57 <param argument="--windowSize" type="integer" optional='true' label="Window size" help= "The window size for the neighborhood region the peak is located in. All values from this region (exclude the values from the peak |
60 region) are tested against the peak region for significant difference. The square will have the size of (2 * windowSize)^2 bins" /> | 58 region) are tested against the peak region for significant difference. The square will have the size of (2 * windowSize)^2 bins" /> |
61 <param argument="--pValuePreselection" type="float" label="P-value preselection" help= "Only candidates with p-values less the given threshold will be considered as candidates. | 59 <param argument="--pValuePreselection" type="float" label="P-value preselection" help= "Only candidates with p-values less the given threshold will be considered as candidates. |
62 For each genomic distance a negative binomial distribution is fitted and for each pixel a p-value given by the cumulative density function is given. | 60 For each genomic distance a negative binomial distribution is fitted and for each pixel a p-value given by the cumulative density function is given. |
63 This does NOT influence the p-value for the neighborhood testing." value='0.05'/> | 61 This does NOT influence the p-value for the neighborhood testing." value='0.05' /> |
64 <param argument="--peakInteractionsThreshold" type="integer" label="Minimum interaction number" help= "The minimum number of interactions a detected peaks needs to have to be considered." value='5' /> | 62 <param argument="--peakInteractionsThreshold" type="integer" label="Minimum interaction number" help= "The minimum number of interactions a detected peaks needs to have to be considered." value='5' /> |
65 <param argument="--maximumInteractionPercentageThreshold" type="float" value='0.1' label="Maximum interaction share" help= "For each genomic distance the maximum value is considered and all candidates need to have at least \'max_value * maximumInteractionPercentageThreshold\' interactions." /> | 63 |
66 <param argument="--pValue" type="float" label="P-value" help= "Rejection level for the statistical test for H0. H0 is peak region and background have the same distribution." value='0.05'/> | 64 <!-- new--> |
67 <param argument="--maxLoopDistance" optional='true' type="integer" label="Maximal loop distance" help= "Maximum genomic distance of a loop, usually loops are within a distance of ~2MB." value='2000000'/> | 65 <param argument="--obsExpThreshold" type="float" label="Obs/exp interaction threshold" help= "The minimum number of obs/exp interactions a detected peaks needs to have to be considered" value='1.5' /> |
68 <param argument="--minLoopDistance" optional='true' type="integer" label="Minimum loop distance" help= "Minimum genomic distance of a loop to be considered." value='100000'/> | 66 |
69 <param argument="--chromosomes" optional='true' type="text" label="Chromosomes to include" help= "Chromosomes to include in the analysis. If not set, all chromosomes are included." /> | 67 <param argument="--pValue" type="float" label="P-value" help= "Rejection level for the statistical test for H0. H0 is peak region and background have the same distribution." value='0.05' /> |
70 <param argument="--region" optional='true' type="text" label="Chromosomes to include" help= "The format is chr:start-end." /> | 68 <param argument="--maxLoopDistance" optional='true' type="integer" label="Maximal loop distance" help= "Maximum genomic distance of a loop, usually loops are within a distance of ~2MB." value='2000000' /> |
71 <param name="statisticalTest_selector" type="select" label="Stistical test"> | 69 <param argument="--chromosomes" optional='true' type="text" label="Chromosomes to include" help= "Chromosomes to include in the analysis. If not set, all chromosomes are included." /> |
72 <option value="wilcoxon-rank-sum" selected="True">Wilcoxon rank-sum'</option> | 70 <param argument="--region" optional='true' type="text" label="Chromosomes to include" help= "The format is chr:start-end." /> |
73 <option value="anderson-darling">Anderson-Darling</option> | 71 <param argument="--expected" type="select" label="Method to compute the expected value"> |
72 <option value="mean" selected="True">mean</option> | |
73 <option value="mean_nonzero">mean_nonzero</option> | |
74 <option value="mean_nonzero">mean_nonzero_ligation</option> | |
74 </param> | 75 </param> |
75 </inputs> | 76 </inputs> |
76 <outputs> | 77 <outputs> |
77 <data name='output_loops' from_work_dir='output_loop.bedgraph' format='bedgraph' label='Computed loops'/> | 78 <data name='output_loops' from_work_dir='output_loop.bedgraph' format='bedgraph' label='Computed loops' /> |
78 </outputs> | 79 </outputs> |
79 <tests> | 80 <tests> |
80 <test> | 81 <test> |
81 <param name="matrix_h5_cooler" value="small_test_matrix.cool"/> | 82 <param name="matrix_h5_cooler" value="small_test_matrix.cool" /> |
82 <param name="maxLoopDistance" value="30000000"/> | 83 <param name="maxLoopDistance" value="30000000" /> |
83 <param name="windowSize" value="5"/> | 84 <param name="windowSize" value="5" /> |
84 <param name="peakWidth" value="2"/> | 85 <param name="peakWidth" value="2" /> |
85 <param name="pValue" value="0.5"/> | 86 <param name="pValue" value="0.5" /> |
86 <param name="pValuePreselection" value="0.55"/> | 87 <param name="pValuePreselection" value="0.55" /> |
87 <!-- <param name="chromosomes" value="X"/> --> | 88 <output name="output_loops" file="hicDetectLoops/loops.bedgraph" ftype="bedgraph" compare="sim_size" /> |
88 <output name="output_loops" file="hicDetectLoops/loops.bedgraph" ftype="bedgraph" compare="sim_size"/> | |
89 </test> | 89 </test> |
90 </tests> | 90 </tests> |
91 <help><![CDATA[ | 91 <help><![CDATA[ |
92 | 92 |
93 Loop detection | 93 Loop detection |
94 ============== | 94 ============== |
95 | 95 |
96 Computes enriched regions (peaks) or long range contacts on the given contact matrix. | 96 Computes enriched regions (peaks) or long range contacts on the given contact matrix. |
97 | 97 |
98 hicDetectLoops can detect enriched interaction regions (peaks / loops) based on a strict candidate selection, negative binomial distributions and Anderson-Darling / Wilcoxon rank-sum tests. | 98 hicDetectLoops can detect enriched interaction regions (peaks / loops) based on a strict candidate selection, negative binomial distributions and Wilcoxon rank-sum tests. |
99 | 99 |
100 The algorithm was mainly develop on GM12878 cells from Rao 2014 on 10kb and 5kb fixed bin size resolution. | 100 The algorithm was mainly develop on GM12878 cells from Rao 2014 on 10kb and 5kb fixed bin size resolution. |
101 | 101 |
102 _________________ | 102 _________________ |
103 | 103 |
104 Usage | 104 Usage |
105 ----- | 105 ----- |
106 | 106 |
107 A command line example is available below (easily matchable in Galaxy using each field information): | 107 A command line example is available below (easily matchable in Galaxy using each field information): |
108 | 108 |
109 ̀`$ hicDetectLoops -m matrix.cool -o loops.bedgraph --maxLoopDistance 2000000 --windowSize 10 --peakWidth 6 --pValuePreselection 0.05 --pValue 0.05 --peakInteractionsThreshold 20 --maximumInteractionPercentageThreshold 0.1 --statisticTest anderson-darling` | 109 ̀`$ hicDetectLoops -m matrix.cool -o loops.bedgraph --maxLoopDistance 2000000 --windowSize 10 --peakWidth 6 --pValuePreselection 0.05 --pValue 0.05 --peakInteractionsThreshold 20` |
110 | 110 |
111 The candidate selection is based on the restriction of the maximum genomic distance, here 2MB. This distance is given by Rao 2014. For each genomic distance a negative binomial distribution is computed and only interaction pairs with a threshold less than ``--pValuePreselection`` are accepted. Detected candidates need to have at least an interaction count of ``--maximumInteractionPercentageThreshold`` times the maximum value for their genomic distance. Please note that ``--maximumInteractionPercentageThreshold`` was introduced with HiCExplorer release 3.2. Earlier versions did not have this parameter yet and therefore their outputs may differ. In a second step, each candidate is considered compared to its neighborhood. This neighborhood is defined by the ``--windowSize`` parameter in the x and y dimension. Per neighborhood only one candidate is considered, therefore only the candidate with the highest peak values is accepted. As a last step, the neighborhood is split into a peak and background region (parameter ``--peakWidth``). The peakWidth can never be larger than the windowSize. However, we recommend for 10kb matrices a windowSize of 10 and a peakWidth of 6. | 111 The candidate selection is based on the restriction of the maximum genomic distance, here 2MB. This distance is given by Rao 2014. For each genomic distance a negative binomial distribution is computed and only interaction pairs with a threshold less than ``--pValuePreselection`` are accepted. Detected candidates need to have at least an interaction count of ``--maximumInteractionPercentageThreshold`` times the maximum value for their genomic distance. Please note that ``--maximumInteractionPercentageThreshold`` was introduced with HiCExplorer release 3.2. Earlier versions did not have this parameter yet and therefore their outputs may differ. In a second step, each candidate is considered compared to its neighborhood. This neighborhood is defined by the ``--windowSize`` parameter in the x and y dimension. Per neighborhood only one candidate is considered, therefore only the candidate with the highest peak values is accepted. As a last step, the neighborhood is split into a peak and background region (parameter ``--peakWidth``). The peakWidth can never be larger than the windowSize. However, we recommend for 10kb matrices a windowSize of 10 and a peakWidth of 6. |
112 | 112 |
113 The output file (´´-o loops.bedgraph``) contains the x and y position of each loop and its corresponding p-value of the Anderson-Darling test. | 113 The output file (´´-o loops.bedgraph``) contains the x and y position of each loop and its corresponding p-value of the Anderson-Darling test. |
114 | 114 |
124 | 124 |
125 For more information about HiCExplorer please consider our documentation on readthedocs.io_. | 125 For more information about HiCExplorer please consider our documentation on readthedocs.io_. |
126 | 126 |
127 .. _readthedocs.io: http://hicexplorer.readthedocs.io/en/latest/index.html | 127 .. _readthedocs.io: http://hicexplorer.readthedocs.io/en/latest/index.html |
128 | 128 |
129 ]]></help> | 129 ]]> </help> |
130 <expand macro="citations" /> | 130 <expand macro="citations" /> |
131 </tool> | 131 </tool> |