Mercurial > repos > iuc > hicexplorer_hicbuildmatrixmicroc
comparison hicBuildMatrixMicroC.xml @ 0:bce0ab313a89 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hicexplorer commit 69bb60ab875c1c1769298678f0890d8b92f1899d
| author | iuc |
|---|---|
| date | Thu, 05 Dec 2024 18:08:13 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:bce0ab313a89 |
|---|---|
| 1 <tool id="hicexplorer_hicbuildmatrixmicroc" name="@BINARY@" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> | |
| 2 <description>create a contact matrix</description> | |
| 3 <macros> | |
| 4 <token name="@BINARY@">hicBuildMatrixMicroC</token> | |
| 5 <import>macros.xml</import> | |
| 6 </macros> | |
| 7 <expand macro="requirements"/> | |
| 8 <command detect_errors="exit_code"><![CDATA[ | |
| 9 | |
| 10 mkdir ./QCfolder && | |
| 11 mkdir '$qc.files_path' && | |
| 12 @BINARY@ | |
| 13 --samFiles | |
| 14 #for $repeat in $samFiles: | |
| 15 '${repeat.samFile}' | |
| 16 #end for | |
| 17 | |
| 18 #if $maxLibraryInsertSize: | |
| 19 --maxLibraryInsertSize $maxLibraryInsertSize | |
| 20 #end if | |
| 21 | |
| 22 #if $binSizes: | |
| 23 --binSize | |
| 24 #for $repeat in $binSizes | |
| 25 '${repeat.binSize}' | |
| 26 #end for | |
| 27 #end if | |
| 28 | |
| 29 #if $chromosomeSizes: | |
| 30 --chromosomeSizes '$chromosomeSizes' | |
| 31 #end if | |
| 32 #if $dbKey: | |
| 33 --genomeAssembly '$dbKey' | |
| 34 #else | |
| 35 --genomeAssembly '$samFiles[0].samFile.metadata.dbkey' | |
| 36 #end if | |
| 37 | |
| 38 #if $region: | |
| 39 --region '$region' | |
| 40 #end if | |
| 41 | |
| 42 --outFileName 'matrix.$outputFormat' | |
| 43 | |
| 44 #if $outBam: | |
| 45 $outBam ./unsorted.bam | |
| 46 #end if | |
| 47 | |
| 48 $keepSelfCircles | |
| 49 $skipDuplicationCheck | |
| 50 | |
| 51 #if $minMappingQuality and $minMappingQuality is not None: | |
| 52 --minMappingQuality $minMappingQuality | |
| 53 #end if | |
| 54 | |
| 55 --threads @THREADS@ | |
| 56 | |
| 57 --QCfolder ./QCfolder | |
| 58 && | |
| 59 mv ./QCfolder/* $qc.files_path/ | |
| 60 && | |
| 61 mv '$qc.files_path/hicQC.html' '$qc' | |
| 62 && mv "$qc.files_path"/*.log raw_qc | |
| 63 && mv matrix.$outputFormat matrix | |
| 64 #if $outBam: | |
| 65 && samtools sort -@ @THREADS@ -T "\${TMPDIR:-.}" ./unsorted.bam -o sorted.bam | |
| 66 #end if | |
| 67 ]]> | |
| 68 </command> | |
| 69 <inputs> | |
| 70 <!-- can we use multiple=True here with min="2" and max="2" ? --> | |
| 71 <repeat max="2" min="2" name="samFiles" title="Sam/Bam files to process (forward/reverse)" help="Please use the special BAM datatype: qname_input_sorted.bam and use for 'bowtie2' the '--reorder' option to create a BAM file."> | |
| 72 <param name="samFile" type="data" format="sam,qname_input_sorted.bam"> | |
| 73 </param> | |
| 74 </repeat> | |
| 75 | |
| 76 <param argument="--maxLibraryInsertSize" type="integer" optional="true" value="" label="Maximum library insert size defines different cut offs based on the maximum expected library size" help="*This is not the average fragment size* but the higher end of the fragment size distribution (obtained using for example Fragment Analyzer) | |
| 77 which usually is between 800 to 1500 bp. If this value if not known use the default of 1000. The insert value is used to decide if two mates | |
| 78 belong to the same fragment (by checking if they are within this max insert size) and to decide if a mate | |
| 79 is too far away from the nearest restriction site." /> | |
| 80 | |
| 81 <repeat name="binSizes" title="Bin size in bp" min="1" help="If used, the restriction cut places (if given) are used to only consider reads that are in the vicinity of the resctriction sites. | |
| 82 Otherwise all reads in the interval are considered. Use multiple ones to create a mcool file."> | |
| 83 <param argument="--binSize" type="integer" optional="true" value="" label="Bin size in bp" /> | |
| 84 </repeat> | |
| 85 | |
| 86 <expand macro="region" /> | |
| 87 <param argument="--keepSelfCircles" type="boolean" truevalue="--keepSelfCircles" falsevalue="" label="Keep self circles" help="If set, outward facing reads without any restriction fragment (self circles) are kept. They will be counted and shown in the QC plots." /> | |
| 88 <expand macro="minMappingQuality" /> | |
| 89 <param argument="--skipDuplicationCheck" type="boolean" truevalue="--skipDuplicationCheck" falsevalue="" label="Skip duplication check" help="Identification of duplicated read pairs is memory consuming. Thus, in case of memory errors this check can be skipped." /> | |
| 90 <param argument="--chromosomeSizes" type="data" format="tabular" optional="true" label="Chromosome sizes for your genome" help="File with the chromosome sizes for your genome. A tab-delimited two column layout 'chr_name size' is expected | |
| 91 Usually the sizes can be determined from the SAM/BAM input files, however, | |
| 92 for cHi-C or scHi-C it can be that at the start or end no data is present. | |
| 93 Please consider that this option causes that only reads are considered which are on the listed chromosomes. | |
| 94 Use this option to guarantee fixed sizes. An example file is available via UCSC: | |
| 95 http://hgdownload.soe.ucsc.edu/goldenPath/dm3/bigZips/dm3.chrom.sizes" /> | |
| 96 <param name="dbKey" type="text" optional="true" label="Use this dbkey for your history genome" | |
| 97 help="You can set the reference genome in your history as metadata. In case you have not you can specify it here." /> | |
| 98 | |
| 99 <param argument="--outBam" type="boolean" truevalue="--outBam" falsevalue="" checked="false" label="Save valid Hi-C reads in BAM file" help="A bam | |
| 100 file containing all valid Hi-C reads can be created | |
| 101 using this option. This bam file could be useful to | |
| 102 inspect the distribution of valid Hi-C reads pairs or | |
| 103 for other downstream analyses, but is not used by any | |
| 104 HiCExplorer tool. Computation will be significantly | |
| 105 longer if this option is set." /> | |
| 106 | |
| 107 <param name="outputFormat" type="select" label="Output file format"> | |
| 108 <option value="h5">HiCExplorer format</option> | |
| 109 <option value="cool">cool</option> | |
| 110 </param> | |
| 111 </inputs> | |
| 112 <outputs> | |
| 113 <data name="outfileBam" from_work_dir="sorted.bam" format="bam" label="${tool.name} BAM file on ${on_string}"> | |
| 114 <filter>outBam</filter> | |
| 115 </data> | |
| 116 <data name="outFileName" from_work_dir="matrix" format="h5" label="${tool.name} MATRIX on ${on_string}"> | |
| 117 <change_format> | |
| 118 <when input="outputFormat" value="cool" format="cool" /> | |
| 119 </change_format> | |
| 120 </data> | |
| 121 <data name="qc" format="html" label="${tool.name} QC on ${on_string}" /> | |
| 122 <data name="raw_qc" from_work_dir="raw_qc" format="txt" label="${tool.name} raw QC on ${on_string}" /> | |
| 123 </outputs> | |
| 124 <tests> | |
| 125 <test expect_num_outputs="4"> | |
| 126 <repeat name="samFiles"> | |
| 127 <param name="samFile" value="small_test_R1_unsorted.sam" dbkey="hg38" /> | |
| 128 </repeat> | |
| 129 <repeat name="samFiles"> | |
| 130 <param name="samFile" value="small_test_R2_unsorted.sam" dbkey="hg38" /> | |
| 131 </repeat> | |
| 132 <param name="outputFormat" value="h5" /> | |
| 133 <repeat name="binSizes"> | |
| 134 <param name="binSize" value="5000" /> | |
| 135 </repeat> | |
| 136 <param name="outBam" value="True" /> | |
| 137 <output name="outfileBam" file="small_test_matrix_result_sorted_microc.bam" compare="diff" lines_diff="2" ftype="bam" /> | |
| 138 <output name="outFileName" ftype="h5"> | |
| 139 <assert_contents> | |
| 140 <has_h5_keys keys="intervals,matrix" /> | |
| 141 </assert_contents> | |
| 142 </output> | |
| 143 <output name="raw_qc" file="raw_qc_report_micro-c" compare="diff" lines_diff="2" /> | |
| 144 </test> | |
| 145 <test expect_num_outputs="4"> | |
| 146 <repeat name="samFiles"> | |
| 147 <param name="samFile" value="small_test_R1_unsorted.sam" dbkey="hg38" /> | |
| 148 </repeat> | |
| 149 <repeat name="samFiles"> | |
| 150 <param name="samFile" value="small_test_R2_unsorted.sam" dbkey="hg38" /> | |
| 151 </repeat> | |
| 152 <repeat name="binSizes"> | |
| 153 <param name="binSize" value="5000" /> | |
| 154 </repeat> | |
| 155 <param name="outputFormat" value="cool" /> | |
| 156 <param name="outBam" value="True" /> | |
| 157 <output name="outfileBam" file="small_test_matrix_result_sorted_microc.bam" compare="diff" lines_diff="2" ftype="bam" /> | |
| 158 <output name="outFileName" ftype="cool"> | |
| 159 <assert_contents> | |
| 160 <has_h5_keys keys="bins,chroms,indexes,pixels" /> | |
| 161 </assert_contents> | |
| 162 </output> | |
| 163 <output name="raw_qc" file="raw_qc_report_micro-c" compare="diff" lines_diff="2" /> | |
| 164 </test> | |
| 165 </tests> | |
| 166 <help><![CDATA[ | |
| 167 | |
| 168 Creation of the contact matrix | |
| 169 =============================== | |
| 170 | |
| 171 | |
| 172 **hicBuildMatrixMicroC** generates a contact matrix from Micro-C read pairs, using paired-end Hi-C reads mapped to a reference genome. This process requires two SAM or BAM files: one for the first mate and one for the second mate of the paired-end reads. These files must be unaligned by position (i.e., not sorted). Unlike traditional Hi-C data, where restriction enzyme cut sites determine resolution, Micro-C does not rely on such sites. Instead, the contact matrix is created using a fixed bin size (e.g., 10,000 bp). | |
| 173 | |
| 174 Additionally, **hicBuildMatrixMicroC** produces a quality control report to evaluate the quality of the Hi-C reads, aiding in determining the success of both the experimental protocol and sequencing process. | |
| 175 | |
| 176 | |
| 177 _________________ | |
| 178 | |
| 179 | |
| 180 Usage | |
| 181 ----- | |
| 182 | |
| 183 | |
| 184 This tool is designed to work with paired SAM/BAM files generated by alignment software supporting local alignment, such as Bowtie2, using the `--local` alignment option for paired-end reads. Both files should represent properly mapped reads. | |
| 185 | |
| 186 _________________ | |
| 187 | |
| 188 | |
| 189 Output | |
| 190 ------ | |
| 191 | |
| 192 **hicBuildMatrixMicroC** generates the following outputs: | |
| 193 | |
| 194 - **Contact Matrix**: A matrix compatible with HiCExplorer for downstream analyses. | |
| 195 - **Accepted Alignments BAM File**: This file includes valid Hi-C read pairs. While not directly used by HiCExplorer, it is valuable for inspecting the distribution of valid reads, such as around restriction enzyme sites, or for other analyses. | |
| 196 - **Quality Control Report**: This report provides an evaluation of the Hi-C data, helping to determine whether the library preparation and experimental workflow were successful. | |
| 197 | |
| 198 | |
| 199 Example plot | |
| 200 ++++++++++++ | |
| 201 | |
| 202 .. image:: hicPlotMatrix.png | |
| 203 :width: 50% | |
| 204 | |
| 205 *Contact matrix of *Drosophila melanogaster* embryos built using **hicBuildMatrix**. The example shows Micro-C data, visualized with `hicPlotMatrix`. Bins were merged to a 25 kb resolution using `hicMergeMatrixBins` before plotting.* | |
| 206 | |
| 207 | |
| 208 | |
| 209 | |
| 210 Quality report | |
| 211 ++++++++++++++ | |
| 212 | |
| 213 A detailed quality control report accompanies the contact matrix. This report is similar to the one generated by **hicBuildMatrix**, but excludes information specific to restriction cut sites, such as dangling ends and self-circles, as these features are not applicable to Micro-C data. | |
| 214 | |
| 215 | |
| 216 _________________ | |
| 217 | |
| 218 | For more information about HiCExplorer please consider our documentation on readthedocs.io_. | |
| 219 | |
| 220 .. _readthedocs.io: http://hicexplorer.readthedocs.io/en/latest/index.html | |
| 221 ]]> </help> | |
| 222 <expand macro="citations" /> | |
| 223 </tool> |
