comparison hicBuildMatrix.xml @ 19:9edf8894a22d draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hicexplorer commit 2a0943e78bdc8ebb13f181399206a9eea37ed78f"
author iuc
date Tue, 16 Mar 2021 15:12:54 +0000
parents 231687cac31b
children ed2cca6b5de4
comparison
equal deleted inserted replaced
18:921a2da49a0c 19:9edf8894a22d
1 <tool id="hicexplorer_hicbuildmatrix" name="@BINARY@" version="@WRAPPER_VERSION@.0"> 1 <tool id="hicexplorer_hicbuildmatrix" name="@BINARY@" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@">
2 <description>create a contact matrix</description> 2 <description>create a contact matrix</description>
3 <macros> 3 <macros>
4 <token name="@BINARY@">hicBuildMatrix</token> 4 <token name="@BINARY@">hicBuildMatrix</token>
5 <import>macros.xml</import> 5 <import>macros.xml</import>
6 </macros> 6 </macros>
7 <expand macro="requirements" > 7 <expand macro="requirements">
8 <requirement type="package" version="1.9">samtools</requirement> 8 <requirement type="package" version="1.9">samtools</requirement>
9 </expand> 9 </expand>
10 <command detect_errors="exit_code"><![CDATA[ 10 <command detect_errors="exit_code"><![CDATA[
11 11
12 mkdir ./QCfolder && 12 mkdir ./QCfolder &&
13 mkdir $qc.files_path && 13 mkdir '$qc.files_path' &&
14 @BINARY@ 14 @BINARY@
15
16 --samFiles 15 --samFiles
17 #for $repeat in $samFiles: 16 #for $repeat in $samFiles:
18 '${repeat.samFile}' 17 '${repeat.samFile}'
19 #end for 18 #end for
20 19
21 #if $restrictionCutFileBinSize_conditional.restrictionCutFileBinSize_selector == "optionRestrictionCutFile": 20 --restrictionCutFile '$restrictionCutFile'
22 --restrictionCutFile '$restrictionCutFileBinSize_conditional.restrictionCutFile' 21
23 --minDistance $restrictionCutFileBinSize_conditional.minDistance 22 #if $restrictionSequence:
24 --maxLibraryInsertSize $restrictionCutFileBinSize_conditional.maxLibraryInsertSize 23 --restrictionSequence '$restrictionSequence'
25 #end if 24 #end if
26 25 #if $danglingSequence:
27 #if $restrictionCutFileBinSize_conditional.restrictionCutFileBinSize_selector == "optionBinSize": 26 --danglingSequence '$danglingSequence'
27 #end if
28 #if $minDistance:
29 --minDistance $minDistance
30 #end if
31 #if $maxLibraryInsertSize:
32 --maxLibraryInsertSize $maxLibraryInsertSize
33 #end if
34
35 #if $binSizes:
28 --binSize 36 --binSize
29 #for $repeat in $restrictionCutFileBinSize_conditional.binSizes 37 #for $repeat in $binSizes
30 '${repeat.binSize}' 38 '${repeat.binSize}'
31 #end for 39 #end for
32 #end if 40 #end if
33 41
34 42 --genomeAssembly $samFiles[0].samFile.metadata.dbkey
35 #if $restrictionSequence:
36 --restrictionSequence '$restrictionSequence'
37 #end if
38 43
39 #if $region: 44 #if $region:
40 --region '$region' 45 --region '$region'
41 #end if 46 #end if
42 47
43 --outFileName matrix.$outputFormat 48 --outFileName matrix.$outputFormat
44 49
45 #if $outBam_Boolean: 50 #if $outBam:
46 $outBam_Boolean ./unsorted.bam 51 $outBam ./unsorted.bam
47 #end if 52 #end if
48 53
49 $keepSelfCircles 54 $keepSelfCircles
55 $removeSelfLigation
56 $skipDuplicationCheck
50 57
51 #if $minMappingQuality and $minMappingQuality is not None: 58 #if $minMappingQuality and $minMappingQuality is not None:
52 --minMappingQuality $minMappingQuality 59 --minMappingQuality $minMappingQuality
53 #end if 60 #end if
54 61
55 #if $danglingSequence: 62 #if $chromosomeSizes:
56 --danglingSequence '$danglingSequence' 63 --chromosomeSizes '$chromosomeSizes'
57 #end if 64 #end if
58 65
59 --threads @THREADS@ 66 --threads @THREADS@
60 67
61 --QCfolder ./QCfolder 68 --QCfolder ./QCfolder
62 && 69 &&
63 mv ./QCfolder/* $qc.files_path/ 70 mv ./QCfolder/* $qc.files_path/
64 && 71 &&
65 mv $qc.files_path/hicQC.html $qc 72 mv '$qc.files_path/hicQC.html' '$qc'
66 && mv $qc.files_path/*.log raw_qc 73 && mv "$qc.files_path"/*.log raw_qc
67 && mv matrix.$outputFormat matrix 74 && mv matrix.$outputFormat matrix
68 #if $outBam_Boolean: 75 #if $outBam:
69 && samtools sort -T "\${TMPDIR:-.}" ./unsorted.bam -o sorted.bam 76 && samtools sort -@ @THREADS@ -T "\${TMPDIR:-.}" ./unsorted.bam -o sorted.bam
70 #end if 77 #end if
71
72 ]]> 78 ]]>
73 </command> 79 </command>
74 <inputs> 80 <inputs>
75 <repeat max="2" min="2" name="samFiles" title="Sam/Bam files to process (forward/reverse)" 81 <!-- can we use multiple=True here with min="2" and max="2" ? -->
76 help="Please use the special BAM datatype: qname_input_sorted.bam and use for 'bowtie2' the '--reorder' option to create a BAM file."> 82 <repeat max="2" min="2" name="samFiles" title="Sam/Bam files to process (forward/reverse)" help="Please use the special BAM datatype: qname_input_sorted.bam and use for 'bowtie2' the '--reorder' option to create a BAM file.">
77 <param name="samFile" type="data" format="sam,qname_input_sorted.bam"/> 83 <param name="samFile" type="data" format="sam,qname_input_sorted.bam">
84 <validator type="unspecified_build" />
85 </param>
78 </repeat> 86 </repeat>
79 <conditional name="restrictionCutFileBinSize_conditional"> 87
80 <param name="restrictionCutFileBinSize_selector" type="select" label="Choose to use a restriction cut file or a bin size"> 88 <expand macro="restrictionCutFile" />
81 <option value="optionRestrictionCutFile">Restriction cut file</option> 89 <expand macro="restrictionSequence" />
82 <option value="optionBinSize" selected="True">Bin size</option> 90 <expand macro="danglingSequence" />
83 </param> 91
84 <when value="optionRestrictionCutFile"> 92 <param argument="--minDistance" type="integer" optional="true" value="" label="Minimum distance between restriction sites" help="Restriction sites that are closer that this distance are merged into one.
85 <param argument="--restrictionCutFile" type="data" format="bed" optional="true" label="BED file with all restriction cut places" 93 This option only applies if --restrictionCutFile is given." />
86 help="Should contaion only mappable restriction sites. If given, the bins are set to match the restriction fragments 94 <param argument="--maxLibraryInsertSize" type="integer" optional="true" value="" label="Maximum library insert size defines different cut offs based on the maximum expected library size" help="*This is not the average fragment size* but the higher end of the fragment size distribution (obtained using for example Fragment Analyzer)
87 (i.e. the region between one restriction site and the next)." /> 95 which usually is between 800 to 1500 bp. If this value if not known use the default of 1000. The insert value is used to decide if two mates
88 <param argument="--minDistance" type="integer" value="" optional="true" label="Minimum distance between restriction sites" 96 belong to the same fragment (by checking if they are within this max insert size) and to decide if a mate
89 help="Restriction sites that are closer that this distance are merged into one. 97 is too far away from the nearest restriction site." />
90 This option only applies if --restrictionCutFile is given."/> 98
91 <param argument="--maxLibraryInsertSize" type="integer" value="" optional="true" 99 <repeat name='binSizes' title='Bin size in bp' min="1" help="If used, the restriction cut places (if given) are used to only consider reads that are in the vicinity of the resctriction sites.
92 label="Maximum library insert size defines different cut offs based on the maximum expected library size" 100 Otherwise all reads in the interval are considered. Use multiple ones to create a mcool file.">
93 help="*This is not the average fragment size* but the higher end of the fragment size distribution (obtained using for example Fragment Analyzer) 101 <param argument="--binSize" type="integer" optional="true" value="" label="Bin size in bp" />
94 which usually is between 800 to 1500 bp. If this value if not known use the default of 1000. The insert value is used to decide if two mates 102 </repeat>
95 belong to the same fragment (by checking if they are within this max insert size) and to decide if a mate
96 is too far away from the nearest restriction site." />
97 </when>
98 <when value="optionBinSize">
99 <repeat name='binSizes' title='Bin size in bp' min="1" help="If used, the restriction cut places (if given) are used to only consider reads that are in the vicinity of the resctriction sites.
100 Otherwise all reads in the interval are considered. Use multiple ones to create a mcool file.">
101 <param argument="--binSize" type="integer" value="" optional="true" label="Bin size in bp"/>
102 </repeat>
103 </when>
104 </conditional>
105
106 <param argument="--restrictionSequence" type="text" optional="true" label="Sequence of the restriction site"
107 help="This is used to discard reads that end/start with such sequence and that are considered un-ligated fragments or
108 &quot;dangling-ends&quot;. If not given, such statistics will not be available." />
109 103
110 <expand macro="region" /> 104 <expand macro="region" />
111 105 <param argument="--keepSelfCircles" type="boolean" truevalue="--keepSelfCircles" falsevalue="" label="Keep self circles" help="If set, outward facing reads without any restriction fragment (self circles) are kept. They will be counted and shown in the QC plots." />
112 <param argument="--keepSelfCircles" type="boolean" truevalue="--keepSelfCircles" falsevalue="" 106 <param argument="--removeSelfLigation" type="boolean" truevalue="--removeSelfLigation" falsevalue="" label="remove self ligation" help="If set, inward facing reads less than 1000 bp apart and having a restriction site in between are removed. Although this reads do not contribute to any distant contact, they are useful to account for bias in the data" />
113 label="Keep self circles"
114 help="If set, outward facing reads without any restriction fragment (self circles) are kept. They will be counted and shown in the QC plots." />
115
116 <expand macro="minMappingQuality" /> 107 <expand macro="minMappingQuality" />
117 108 <param argument="--skipDuplicationCheck" type="boolean" truevalue="--skipDuplicationCheck" falsevalue="" label="Skip duplication check" help="Identification of duplicated read pairs is memory consuming. Thus, in case of memory errors this check can be skipped." />
118 <param argument="--danglingSequence" type="text" optional="true" label="Dangling sequence" 109 <param argument="--chromosomeSizes" type="data" format="tabular" optional="true" label="Chromosome sizes for your genome" help="File with the chromosome sizes for your genome. A tab-delimited two column layout 'chr_name size' is expected
119 help="Sequence left by the restriction enzyme after cutting. 110 Usually the sizes can be determined from the SAM/BAM input files, however,
120 Each restriction enzyme recognizes a different DNA sequence and, 111 for cHi-C or scHi-C it can be that at the start or end no data is present.
121 after cutting, they leave behind a specific ‘sticky’ end or dangling end sequence. 112 Please consider that this option causes that only reads are considered which are on the listed chromosomes.
122 For example, for HindIII the restriction site is AAGCTT and the dangling end is AGCT. 113 Use this option to guarantee fixed sizes. An example file is available via UCSC:
123 For DpnII, the restriction site and dangling end sequence are the same: GATC. 114 http://hgdownload.soe.ucsc.edu/goldenPath/dm3/bigZips/dm3.chrom.sizes" />
124 This information is easily found on the description of the restriction enzyme. 115
125 The dangling sequence is used to classify and report reads whose 5’ end starts with such sequence as dangling-end reads. 116 <param argument='--outBam' type='boolean' truevalue='--outBam' falsevalue="" checked="false" label="Save valid Hi-C reads in BAM file" help="A bam
126 A significant portion of dangling-end reads in a sample are indicative of a problem with the re-ligation step of the protocol. "/>
127
128 <param name='outBam_Boolean' type='boolean' truevalue='--outBam' falsevalue="" checked="false" label="Save valid Hi-C reads in BAM file"
129 help="A bam
130 file containing all valid Hi-C reads can be created 117 file containing all valid Hi-C reads can be created
131 using this option. This bam file could be useful to 118 using this option. This bam file could be useful to
132 inspect the distribution of valid Hi-C reads pairs or 119 inspect the distribution of valid Hi-C reads pairs or
133 for other downstream analyses, but is not used by any 120 for other downstream analyses, but is not used by any
134 HiCExplorer tool. Computation will be significantly 121 HiCExplorer tool. Computation will be significantly
135 longer if this option is set."/> 122 longer if this option is set." />
136 123
137 <param name='outputFormat' type='select' label="Output file format"> 124 <param name='outputFormat' type='select' label="Output file format">
138 <option value='h5'>HiCExplorer format</option> 125 <option value='h5'>HiCExplorer format</option>
139 <option value="cool">cool</option> 126 <option value="cool">cool</option>
140 </param> 127 </param>
141 </inputs> 128 </inputs>
142 <outputs> 129 <outputs>
143 <data name="outBam" from_work_dir="sorted.bam" format="bam" label="${tool.name} BAM file on ${on_string}"> 130 <data name="outfileBam" from_work_dir="sorted.bam" format="bam" label="${tool.name} BAM file on ${on_string}">
144 <filter>outBam_Boolean</filter> 131 <filter>outBam</filter>
145 </data> 132 </data>
146 <data name="outFileName" from_work_dir="matrix" format="h5" label="${tool.name} MATRIX on ${on_string}"> 133 <data name="outFileName" from_work_dir="matrix" format="h5" label="${tool.name} MATRIX on ${on_string}">
147 <change_format> 134 <change_format>
148 <when input="outputFormat" value="cool" format="cool" /> 135 <when input="outputFormat" value="cool" format="cool" />
149 </change_format> 136 </change_format>
150 </data> 137 </data>
151 <data name="qc" format="html" label="${tool.name} QC on ${on_string}"/> 138 <data name="qc" format="html" label="${tool.name} QC on ${on_string}" />
152
153 <data name="raw_qc" from_work_dir='raw_qc' format='txt' label="${tool.name} raw QC on ${on_string}" /> 139 <data name="raw_qc" from_work_dir='raw_qc' format='txt' label="${tool.name} raw QC on ${on_string}" />
154 </outputs> 140 </outputs>
155 <tests> 141 <tests>
156 <test> 142 <test expect_num_outputs="4">
157 <repeat name="samFiles"> 143 <repeat name="samFiles">
158 <param name="samFile" value="small_test_R1_unsorted.sam"/> 144 <param name="samFile" value="small_test_R1_unsorted.sam" dbkey="hg38" />
159 </repeat> 145 </repeat>
160 <repeat name="samFiles"> 146 <repeat name="samFiles">
161 <param name="samFile" value="small_test_R2_unsorted.sam"/> 147 <param name="samFile" value="small_test_R2_unsorted.sam" dbkey="hg38" />
162 </repeat> 148 </repeat>
163 <conditional name="restrictionCutFileBinSize_conditional"> 149 <param name='outputFormat' value='h5' />
164 <param name="restrictionCutFileBinSize_selector" value="optionBinSize"/> 150 <repeat name='binSizes'>
165 <repeat name='binSizes'> 151 <param name="binSize" value="5000" />
166 <param name="binSize" value="5000"/> 152 </repeat>
167 </repeat> 153 <param name='restrictionCutFile' value='DpnII_10k.bed' />
168 </conditional> 154 <param name='restrictionSequence' value='GATC' />
169 <param name='outputFormat' value='h5'/> 155 <param name='danglingSequence' value='GATC' />
170 <param name='outBam_Boolean' value="True" /> 156 <param name='outBam' value="True" />
171 <output name="outBam" file="small_test_matrix_result_sorted.bam" ftype="bam"/> 157 <output name="outfileBam" file="small_test_matrix_result_sorted.bam" ftype="bam" />
172 <output name="outFileName" ftype="h5"> 158 <output name="outFileName" ftype="h5">
173 <assert_contents> 159 <assert_contents>
174 <has_h5_keys keys='intervals,matrix'/> 160 <has_h5_keys keys='intervals,matrix' />
175 </assert_contents> 161 </assert_contents>
176 </output> 162 </output>
177 <output name="raw_qc" file='raw_qc_report' compare='diff' lines_diff='2'/> 163 <output name="raw_qc" file='raw_qc_report' compare='diff' lines_diff='2' />
178 </test> 164 </test>
179 <test> 165 <test expect_num_outputs="4">
180 <repeat name="samFiles"> 166 <repeat name="samFiles">
181 <param name="samFile" value="small_test_R1_unsorted.sam"/> 167 <param name="samFile" value="small_test_R1_unsorted.sam" dbkey="hg38" />
182 </repeat> 168 </repeat>
183 <repeat name="samFiles"> 169 <repeat name="samFiles">
184 <param name="samFile" value="small_test_R2_unsorted.sam"/> 170 <param name="samFile" value="small_test_R2_unsorted.sam" dbkey="hg38" />
185 </repeat> 171 </repeat>
186 <conditional name="restrictionCutFileBinSize_conditional"> 172 <repeat name='binSizes'>
187 <param name="restrictionCutFileBinSize_selector" value="optionBinSize"/> 173 <param name="binSize" value="5000" />
188 <repeat name='binSizes'> 174 </repeat>
189 <param name="binSize" value="5000"/> 175 <param name='restrictionCutFile' value='DpnII_10k.bed' />
190 </repeat> 176 <param name='restrictionSequence' value='GATC' />
191 </conditional> 177 <param name='danglingSequence' value='GATC' />
192 <param name='outputFormat' value='cool'/> 178 <param name='outputFormat' value='cool' />
193 <param name='outBam_Boolean' value="True" /> 179 <param name='outBam' value="True" />
194 <output name="outBam" file="small_test_matrix_result_sorted.bam" ftype="bam"/> 180 <output name="outfileBam" file="small_test_matrix_result_sorted.bam" ftype="bam" />
195 <output name="outFileName" ftype="cool"> 181 <output name="outFileName" ftype="cool">
196 <assert_contents> 182 <assert_contents>
197 <has_h5_keys keys='bins,chroms,indexes,pixels'/> 183 <has_h5_keys keys='bins,chroms,indexes,pixels' />
198 </assert_contents> 184 </assert_contents>
199 </output> 185 </output>
200 <output name="raw_qc" file='raw_qc_report' compare='diff' lines_diff='2'/> 186 <output name="raw_qc" file='raw_qc_report' compare='diff' lines_diff='2' />
201 </test> 187 </test>
202 </tests> 188 </tests>
203 <help><![CDATA[ 189 <help><![CDATA[
204 190
205 Creation of the contact matrix 191 Creation of the contact matrix
294 _________________ 280 _________________
295 281
296 | For more information about HiCExplorer please consider our documentation on readthedocs.io_. 282 | For more information about HiCExplorer please consider our documentation on readthedocs.io_.
297 283
298 .. _readthedocs.io: http://hicexplorer.readthedocs.io/en/latest/index.html 284 .. _readthedocs.io: http://hicexplorer.readthedocs.io/en/latest/index.html
299 ]]></help> 285 ]]> </help>
300 <expand macro="citations" /> 286 <expand macro="citations" />
301 </tool> 287 </tool>