Mercurial > repos > bgruening > hicexplorer_hiccorrectmatrix
comparison hicCorrectMatrix.xml @ 9:ac80bd0a96ca draft
planemo upload for repository https://github.com/maxplanck-ie/HiCExplorer/tree/master/galaxy/wrapper/ commit eec0a4d5a7c5ba4ec0fbd2ead8280c3d143bb9d8
author | iuc |
---|---|
date | Fri, 27 Apr 2018 03:29:59 -0400 |
parents | f7d344dacfeb |
children | bfa1c014f64a |
comparison
equal
deleted
inserted
replaced
8:f7d344dacfeb | 9:ac80bd0a96ca |
---|---|
1 <tool id="hicexplorer_hiccorrectmatrix" name="@BINARY@" version="@WRAPPER_VERSION@.0"> | 1 <tool id="hicexplorer_hiccorrectmatrix" name="@BINARY@" version="@WRAPPER_VERSION@.0"> |
2 <description>Runs Dekker's iterative correction over a hic matrix.</description> | 2 <description>run Imakaev's iterative correction over a Hi-C contact matrix.</description> |
3 <macros> | 3 <macros> |
4 <token name="@BINARY@">hicCorrectMatrix</token> | 4 <token name="@BINARY@">hicCorrectMatrix</token> |
5 <import>macros.xml</import> | 5 <import>macros.xml</import> |
6 </macros> | 6 </macros> |
7 <expand macro="requirements" /> | 7 <expand macro="requirements" /> |
62 ]]> | 62 ]]> |
63 </command> | 63 </command> |
64 <inputs> | 64 <inputs> |
65 <expand macro='matrix_h5_cooler_macro' /> | 65 <expand macro='matrix_h5_cooler_macro' /> |
66 <conditional name="mode"> | 66 <conditional name="mode"> |
67 <param name="mode_selector" type="select" label="Range restriction (in bp)" argument="--range"> | 67 <param name="mode_selector" type="select" label="Mode"> |
68 <option value="diagnostic_plot">Diagnostic plot</option> | 68 <option value="diagnostic_plot">Diagnostic plot</option> |
69 <option value="correct">Correct matrix</option> | 69 <option value="correct">Correct matrix</option> |
70 </param> | 70 </param> |
71 <when value="diagnostic_plot"> | 71 <when value="diagnostic_plot"> |
72 <expand macro="xMax" /> | 72 <expand macro="xMax" /> |
103 </conditional> | 103 </conditional> |
104 | 104 |
105 <repeat name="chromosomes" min="0" | 105 <repeat name="chromosomes" min="0" |
106 title="Include chromosomes" help="List of chromosomes to be included in the iterative correction. | 106 title="Include chromosomes" help="List of chromosomes to be included in the iterative correction. |
107 The order of the given chromosomes will be kept for the resulting corrected matrix"> | 107 The order of the given chromosomes will be kept for the resulting corrected matrix"> |
108 <param name="chromosome" type="text" value="" > | 108 <param name="chromosome" type="text" value="" label='chromosome (one per field)'> |
109 <validator type="empty_field" /> | 109 <validator type="empty_field" /> |
110 </param> | 110 </param> |
111 </repeat> | 111 </repeat> |
112 | 112 |
113 </inputs> | 113 </inputs> |
115 <data name="outFileName" from_work_dir="matrix" format="h5"> | 115 <data name="outFileName" from_work_dir="matrix" format="h5"> |
116 <change_format> | 116 <change_format> |
117 <when input="mode.outputFormat" value="cool" format="cool" /> | 117 <when input="mode.outputFormat" value="cool" format="cool" /> |
118 </change_format> | 118 </change_format> |
119 <filter>mode['mode_selector'] == "correct"</filter> | 119 <filter>mode['mode_selector'] == "correct"</filter> |
120 | 120 |
121 </data> | 121 </data> |
122 | 122 |
123 | 123 |
124 <data name="diagnostic_plot" from_work_dir="diagnostic_plot.png" format="png"> | 124 <data name="diagnostic_plot" from_work_dir="diagnostic_plot.png" format="png"> |
125 <filter>mode['mode_selector'] == "diagnostic_plot"</filter> | 125 <filter>mode['mode_selector'] == "diagnostic_plot"</filter> |
126 </data> | 126 </data> |
127 </outputs> | 127 </outputs> |
128 <tests> | 128 <tests> |
129 <test> | 129 <test> |
130 <param name="matrix_h5_cooler" value="small_test_matrix.h5"/> | 130 <param name="matrix_h5_cooler" value="small_test_matrix.h5"/> |
131 | 131 |
132 <param name="mode_selector" value="correct"/> | 132 <param name="mode_selector" value="correct"/> |
133 <repeat name="chromosomes"> | 133 <repeat name="chromosomes"> |
134 <param name="chromosome" value="chrUextra"/> | 134 <param name="chromosome" value="chrUextra"/> |
135 </repeat> | 135 </repeat> |
136 <repeat name="chromosomes"> | 136 <repeat name="chromosomes"> |
137 <param name="chromosome" value="chr3LHet"/> | 137 <param name="chromosome" value="chr3LHet"/> |
138 </repeat> | 138 </repeat> |
139 <param name='outputFormat' value='h5'/> | 139 <param name='outputFormat' value='h5'/> |
140 | 140 <param name='filterThreshold_low' value='-2.0' /> |
141 <param name='filterThreshold_large' value='4' /> | |
141 <output name="outFileName" file="hicCorrectMatrix_result1.npz.h5" ftype="h5" compare="sim_size"/> | 142 <output name="outFileName" file="hicCorrectMatrix_result1.npz.h5" ftype="h5" compare="sim_size"/> |
142 </test> | 143 </test> |
143 <test> | 144 <test> |
144 <param name="matrix_h5_cooler" value="small_test_matrix.h5"/> | 145 <param name="matrix_h5_cooler" value="small_test_matrix.h5"/> |
145 <param name="mode_selector" value="diagnostic_plot"/> | 146 <param name="mode_selector" value="diagnostic_plot"/> |
146 <repeat name="chromosomes"> | 147 <repeat name="chromosomes"> |
147 <param name="chromosome" value="chrUextra"/> | 148 <param name="chromosome" value="chrUextra"/> |
148 </repeat> | 149 </repeat> |
149 <repeat name="chromosomes"> | 150 <repeat name="chromosomes"> |
150 <param name="chromosome" value="chr3LHet"/> | 151 <param name="chromosome" value="chr3LHet"/> |
151 </repeat> | 152 </repeat> |
152 <output name="diagnostic_plot" file="diagnostic_plot.png" ftype="png" compare="sim_size"/> | 153 <output name="diagnostic_plot" file="diagnostic_plot.png" ftype="png" compare="sim_size"/> |
153 </test> | 154 </test> |
154 </tests> | 155 </tests> |
155 <help><![CDATA[ | 156 <help><![CDATA[ |
156 | 157 |
157 Matrix correction | 158 Hi-C contact matrix correction |
158 ================== | 159 ============================== |
159 | 160 |
160 ``hicCorrectMatrix`` runs Dekker's iterative correction over a Hi-C matrix (`Imakaev 2012`_.). For correcting the matrix, | 161 **hicCorrectMatrix** runs Imakaev's iterative correction, described in `Imakaev et al. (2012)`_, over a Hi-C matrix. For the matrix correction to be efficient, |
161 it is important to remove the unassembled scaffolds (e.g. `NT_`), mitochondrial DNA and Y chromosome and keep only | 162 it is important to remove the unassembled scaffolds (e.g. `NT_`), mitochondrial DNA and Y chromosome and keep only full length |
162 chromosomes, as scaffolds create problems with matrix correction. Therefore | 163 chromosomes, as scaffolds create problems with matrix correction. Therefore we use the chromosome names (1-19, X, Y) here. |
163 we use the chromosome names (1-19, X, Y) here. | 164 |
164 | |
165 **Important**: Use ‘chr1 chr2 chr3 etc.’ if your genome index uses chromosome names with the ‘chr’ prefix. | 165 **Important**: Use ‘chr1 chr2 chr3 etc.’ if your genome index uses chromosome names with the ‘chr’ prefix. |
166 | 166 |
167 Matrix correction works in two steps: first a histogram containing the sum of contact per bin (row sum) is produced. This plot needs to be inspected to decide the best threshold for removing bins with lower number of reads. The second steps removes the low scoring bins and does the correction. | 167 Also, for the method to work correctly, bins with zero reads assigned to them should be removed as they can not be corrected. Also, bins with low number of reads should be removed, otherwise, during the correction step, the counts associated with those bins will be amplified (usually, zero and low coverage bins tend contain repetitive regions). Bins with extremely high number of reads can also be removed from the correction as they may represent copy number variations. |
168 | 168 |
169 Input | 169 To aid in the identification of bins with low and high read coverage, the ``diagnostic plot`` function of **hicCorrectMatrix** must be used. |
170 | |
171 Indeed, **hicCorrectMatrix** works in two steps: | |
172 | |
173 - **Diagnostic plot**: First a histogram containing the sum of contact per bin (row sum) is produced. This plot needs to be inspected to decide the best threshold for removing bins with lower number of reads. | |
174 | |
175 - **Correct**: The second step removes the bins outside of the defined thresholds and perfroms the iterative correction. | |
176 | |
177 _________________ | |
178 | |
179 Usage | |
170 ----- | 180 ----- |
171 | 181 |
182 This tool must be used on uncorrected matrices at restriction enzyme resolution or with merged bins (``hicMergeMatrixBins``). | |
183 | |
184 _________________ | |
185 | |
186 Output | |
187 ------ | |
172 | 188 |
173 Diagnostic plot | 189 Diagnostic plot |
174 ~~~~~~~~~~~~~~~~ | 190 _______________ |
175 Plots a histogram of the coverage per bin together with the | 191 |
176 modified z-score based on the median absolute deviation | 192 The diagnostic plot consists of a bar plot of the contacts coverage per bins size together with the |
177 method. | 193 modified z-score based on the Median Absolute Deviation (MAD) method. |
178 | 194 |
179 See Boris Iglewicz and David Hoaglin 1993, Volume 16: | 195 See Boris Iglewicz and David Hoaglin 1993, Volume 16: |
180 How to Detect and Handle Outliers The ASQC Basic References in Quality Control: Statistical Techniques, | 196 How to Detect and Handle Outliers The ASQC Basic References in Quality Control: Statistical Techniques, |
181 Edward F. Mykytka, Ph.D., Editor. | 197 Edward F. Mykytka, Ph.D., Editor. |
182 | 198 |
183 Parameters | 199 Using this diagnostic plot, a user can decide if values |
184 __________ | 200 with a too low (and/or too high) number of contacts in respect to their genomic distance should |
185 - the contact matrix | 201 be removed from the data before the correction applies. |
186 - Max value for the x-axis in counts per bin | 202 |
187 - include chromosomes | 203 Moreover, the shown distribution should be a Gaussian bell. If it doesn’t follow a Gaussian distribution |
188 | 204 this is an indicator that the used data is of bad quality or that the used contact matrix |
205 is maybe not the one that should be used. It can happen that users select for example a merge | |
206 matrix with a lower resolution that was previously needed for plotting. In such cases the | |
207 diagnostic plot helps to detect this and prevent the user from running the analysis on a wrong dataset. | |
208 | |
209 | |
210 .. image:: $PATH_TO_IMAGES/diagnostic_plot.png | |
211 :width: 50% | |
212 | |
213 On the example plot above, a user can then use the lower threshold defined by the MAD method (black bold bar), or define its own threshold based on the contacts distribution. | |
189 | 214 |
190 Correct | 215 Correct |
191 ~~~~~~~ | 216 _______ |
192 | 217 |
193 Run the iterative correction. | 218 Run the iterative correction and outputs the corrected matrix. This matrix can then be used with all downstream analysis tools such as ``hicPlotMatrix``, ``hicPlotTADs``, ``hicPlotViewpoint``, ``hicAggregateContacts`` for **visualization of Hi-C data**, ``hicCorrelate``, ``hicPlotDistVsCounts``, ``hicTransform``, ``hicFindTADs``, ``hicPCA`` **for data and scores computation on Hi-C data**. |
194 | 219 |
195 Parameters | 220 It is noteworthy that ``hicSumMatrices`` and ``hicMergeMatrixBins`` **must be performed on uncorrected matrices**. |
196 __________ | 221 |
197 - number of iterations | 222 _________________ |
198 - inflation cutoff | |
199 - trans region cutoff | |
200 - sequenced count cutoff | |
201 - skip diagonal counts | |
202 - normalize each chromosome separately | |
203 - remove bins of low coverage | |
204 - remove bins of large coverage | |
205 - include chromosomes | |
206 | |
207 Output | |
208 ------ | |
209 | |
210 Diagnostic plot: | |
211 | |
212 .. image:: $PATH_TO_IMAGES/diagnostic_plot.png | |
213 :width: 70% | |
214 | |
215 Correct: | |
216 - the corrected contact matrix | |
217 | 223 |
218 | For more information about HiCExplorer please consider our documentation on readthedocs.io_ | 224 | For more information about HiCExplorer please consider our documentation on readthedocs.io_ |
219 | 225 |
220 .. _readthedocs.io: http://hicexplorer.readthedocs.io/en/latest/index.html | 226 .. _readthedocs.io: http://hicexplorer.readthedocs.io/en/latest/index.html |
221 | 227 .. _`Imakaev et al. (2012)`: http://doi.org/doi:10.1038/nmeth.2148 |
222 .. _`Imakaev 2012`: http://doi.org/doi:10.1038/nmeth.2148 | |
223 ]]></help> | 228 ]]></help> |
224 <expand macro="citations" /> | 229 <expand macro="citations" /> |
225 </tool> | 230 </tool> |
226 |