Mercurial > repos > bgruening > hicexplorer_hicbuildmatrix
diff hicBuildMatrix.xml @ 19:9edf8894a22d draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hicexplorer commit 2a0943e78bdc8ebb13f181399206a9eea37ed78f"
author | iuc |
---|---|
date | Tue, 16 Mar 2021 15:12:54 +0000 |
parents | 231687cac31b |
children | ed2cca6b5de4 |
line wrap: on
line diff
--- a/hicBuildMatrix.xml Fri Dec 11 21:04:27 2020 +0000 +++ b/hicBuildMatrix.xml Tue Mar 16 15:12:54 2021 +0000 @@ -1,40 +1,45 @@ -<tool id="hicexplorer_hicbuildmatrix" name="@BINARY@" version="@WRAPPER_VERSION@.0"> +<tool id="hicexplorer_hicbuildmatrix" name="@BINARY@" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@"> <description>create a contact matrix</description> <macros> <token name="@BINARY@">hicBuildMatrix</token> <import>macros.xml</import> </macros> - <expand macro="requirements" > + <expand macro="requirements"> <requirement type="package" version="1.9">samtools</requirement> </expand> <command detect_errors="exit_code"><![CDATA[ mkdir ./QCfolder && - mkdir $qc.files_path && + mkdir '$qc.files_path' && @BINARY@ - --samFiles #for $repeat in $samFiles: '${repeat.samFile}' #end for - #if $restrictionCutFileBinSize_conditional.restrictionCutFileBinSize_selector == "optionRestrictionCutFile": - --restrictionCutFile '$restrictionCutFileBinSize_conditional.restrictionCutFile' - --minDistance $restrictionCutFileBinSize_conditional.minDistance - --maxLibraryInsertSize $restrictionCutFileBinSize_conditional.maxLibraryInsertSize + --restrictionCutFile '$restrictionCutFile' + + #if $restrictionSequence: + --restrictionSequence '$restrictionSequence' + #end if + #if $danglingSequence: + --danglingSequence '$danglingSequence' #end if - - #if $restrictionCutFileBinSize_conditional.restrictionCutFileBinSize_selector == "optionBinSize": + #if $minDistance: + --minDistance $minDistance + #end if + #if $maxLibraryInsertSize: + --maxLibraryInsertSize $maxLibraryInsertSize + #end if + + #if $binSizes: --binSize - #for $repeat in $restrictionCutFileBinSize_conditional.binSizes + #for $repeat in $binSizes '${repeat.binSize}' #end for #end if - - #if $restrictionSequence: - --restrictionSequence '$restrictionSequence' - #end if + --genomeAssembly $samFiles[0].samFile.metadata.dbkey #if $region: --region '$region' @@ -42,18 +47,20 @@ --outFileName matrix.$outputFormat - #if $outBam_Boolean: - $outBam_Boolean ./unsorted.bam + #if $outBam: + $outBam ./unsorted.bam #end if $keepSelfCircles + $removeSelfLigation + $skipDuplicationCheck #if $minMappingQuality and $minMappingQuality is not None: --minMappingQuality $minMappingQuality #end if - #if $danglingSequence: - --danglingSequence '$danglingSequence' + #if $chromosomeSizes: + --chromosomeSizes '$chromosomeSizes' #end if --threads @THREADS@ @@ -62,77 +69,57 @@ && mv ./QCfolder/* $qc.files_path/ && - mv $qc.files_path/hicQC.html $qc - && mv $qc.files_path/*.log raw_qc + mv '$qc.files_path/hicQC.html' '$qc' + && mv "$qc.files_path"/*.log raw_qc && mv matrix.$outputFormat matrix - #if $outBam_Boolean: - && samtools sort -T "\${TMPDIR:-.}" ./unsorted.bam -o sorted.bam + #if $outBam: + && samtools sort -@ @THREADS@ -T "\${TMPDIR:-.}" ./unsorted.bam -o sorted.bam #end if - ]]> </command> <inputs> - <repeat max="2" min="2" name="samFiles" title="Sam/Bam files to process (forward/reverse)" - help="Please use the special BAM datatype: qname_input_sorted.bam and use for 'bowtie2' the '--reorder' option to create a BAM file."> - <param name="samFile" type="data" format="sam,qname_input_sorted.bam"/> - </repeat> - <conditional name="restrictionCutFileBinSize_conditional"> - <param name="restrictionCutFileBinSize_selector" type="select" label="Choose to use a restriction cut file or a bin size"> - <option value="optionRestrictionCutFile">Restriction cut file</option> - <option value="optionBinSize" selected="True">Bin size</option> + <!-- can we use multiple=True here with min="2" and max="2" ? --> + <repeat max="2" min="2" name="samFiles" title="Sam/Bam files to process (forward/reverse)" help="Please use the special BAM datatype: qname_input_sorted.bam and use for 'bowtie2' the '--reorder' option to create a BAM file."> + <param name="samFile" type="data" format="sam,qname_input_sorted.bam"> + <validator type="unspecified_build" /> </param> - <when value="optionRestrictionCutFile"> - <param argument="--restrictionCutFile" type="data" format="bed" optional="true" label="BED file with all restriction cut places" - help="Should contaion only mappable restriction sites. If given, the bins are set to match the restriction fragments - (i.e. the region between one restriction site and the next)." /> - <param argument="--minDistance" type="integer" value="" optional="true" label="Minimum distance between restriction sites" - help="Restriction sites that are closer that this distance are merged into one. - This option only applies if --restrictionCutFile is given."/> - <param argument="--maxLibraryInsertSize" type="integer" value="" optional="true" - label="Maximum library insert size defines different cut offs based on the maximum expected library size" - help="*This is not the average fragment size* but the higher end of the fragment size distribution (obtained using for example Fragment Analyzer) - which usually is between 800 to 1500 bp. If this value if not known use the default of 1000. The insert value is used to decide if two mates - belong to the same fragment (by checking if they are within this max insert size) and to decide if a mate - is too far away from the nearest restriction site." /> - </when> - <when value="optionBinSize"> - <repeat name='binSizes' title='Bin size in bp' min="1" help="If used, the restriction cut places (if given) are used to only consider reads that are in the vicinity of the resctriction sites. - Otherwise all reads in the interval are considered. Use multiple ones to create a mcool file."> - <param argument="--binSize" type="integer" value="" optional="true" label="Bin size in bp"/> - </repeat> - </when> - </conditional> + </repeat> + + <expand macro="restrictionCutFile" /> + <expand macro="restrictionSequence" /> + <expand macro="danglingSequence" /> - <param argument="--restrictionSequence" type="text" optional="true" label="Sequence of the restriction site" - help="This is used to discard reads that end/start with such sequence and that are considered un-ligated fragments or - "dangling-ends". If not given, such statistics will not be available." /> + <param argument="--minDistance" type="integer" optional="true" value="" label="Minimum distance between restriction sites" help="Restriction sites that are closer that this distance are merged into one. + This option only applies if --restrictionCutFile is given." /> + <param argument="--maxLibraryInsertSize" type="integer" optional="true" value="" label="Maximum library insert size defines different cut offs based on the maximum expected library size" help="*This is not the average fragment size* but the higher end of the fragment size distribution (obtained using for example Fragment Analyzer) + which usually is between 800 to 1500 bp. If this value if not known use the default of 1000. The insert value is used to decide if two mates + belong to the same fragment (by checking if they are within this max insert size) and to decide if a mate + is too far away from the nearest restriction site." /> + + <repeat name='binSizes' title='Bin size in bp' min="1" help="If used, the restriction cut places (if given) are used to only consider reads that are in the vicinity of the resctriction sites. + Otherwise all reads in the interval are considered. Use multiple ones to create a mcool file."> + <param argument="--binSize" type="integer" optional="true" value="" label="Bin size in bp" /> + </repeat> <expand macro="region" /> - - <param argument="--keepSelfCircles" type="boolean" truevalue="--keepSelfCircles" falsevalue="" - label="Keep self circles" - help="If set, outward facing reads without any restriction fragment (self circles) are kept. They will be counted and shown in the QC plots." /> - + <param argument="--keepSelfCircles" type="boolean" truevalue="--keepSelfCircles" falsevalue="" label="Keep self circles" help="If set, outward facing reads without any restriction fragment (self circles) are kept. They will be counted and shown in the QC plots." /> + <param argument="--removeSelfLigation" type="boolean" truevalue="--removeSelfLigation" falsevalue="" label="remove self ligation" help="If set, inward facing reads less than 1000 bp apart and having a restriction site in between are removed. Although this reads do not contribute to any distant contact, they are useful to account for bias in the data" /> <expand macro="minMappingQuality" /> + <param argument="--skipDuplicationCheck" type="boolean" truevalue="--skipDuplicationCheck" falsevalue="" label="Skip duplication check" help="Identification of duplicated read pairs is memory consuming. Thus, in case of memory errors this check can be skipped." /> + <param argument="--chromosomeSizes" type="data" format="tabular" optional="true" label="Chromosome sizes for your genome" help="File with the chromosome sizes for your genome. A tab-delimited two column layout 'chr_name size' is expected + Usually the sizes can be determined from the SAM/BAM input files, however, + for cHi-C or scHi-C it can be that at the start or end no data is present. + Please consider that this option causes that only reads are considered which are on the listed chromosomes. + Use this option to guarantee fixed sizes. An example file is available via UCSC: + http://hgdownload.soe.ucsc.edu/goldenPath/dm3/bigZips/dm3.chrom.sizes" /> - <param argument="--danglingSequence" type="text" optional="true" label="Dangling sequence" - help="Sequence left by the restriction enzyme after cutting. - Each restriction enzyme recognizes a different DNA sequence and, - after cutting, they leave behind a specific ‘sticky’ end or dangling end sequence. - For example, for HindIII the restriction site is AAGCTT and the dangling end is AGCT. - For DpnII, the restriction site and dangling end sequence are the same: GATC. - This information is easily found on the description of the restriction enzyme. - The dangling sequence is used to classify and report reads whose 5’ end starts with such sequence as dangling-end reads. - A significant portion of dangling-end reads in a sample are indicative of a problem with the re-ligation step of the protocol. "/> - - <param name='outBam_Boolean' type='boolean' truevalue='--outBam' falsevalue="" checked="false" label="Save valid Hi-C reads in BAM file" - help="A bam + <param argument='--outBam' type='boolean' truevalue='--outBam' falsevalue="" checked="false" label="Save valid Hi-C reads in BAM file" help="A bam file containing all valid Hi-C reads can be created using this option. This bam file could be useful to inspect the distribution of valid Hi-C reads pairs or for other downstream analyses, but is not used by any HiCExplorer tool. Computation will be significantly - longer if this option is set."/> + longer if this option is set." /> <param name='outputFormat' type='select' label="Output file format"> <option value='h5'>HiCExplorer format</option> @@ -140,64 +127,63 @@ </param> </inputs> <outputs> - <data name="outBam" from_work_dir="sorted.bam" format="bam" label="${tool.name} BAM file on ${on_string}"> - <filter>outBam_Boolean</filter> + <data name="outfileBam" from_work_dir="sorted.bam" format="bam" label="${tool.name} BAM file on ${on_string}"> + <filter>outBam</filter> </data> <data name="outFileName" from_work_dir="matrix" format="h5" label="${tool.name} MATRIX on ${on_string}"> <change_format> <when input="outputFormat" value="cool" format="cool" /> </change_format> </data> - <data name="qc" format="html" label="${tool.name} QC on ${on_string}"/> - + <data name="qc" format="html" label="${tool.name} QC on ${on_string}" /> <data name="raw_qc" from_work_dir='raw_qc' format='txt' label="${tool.name} raw QC on ${on_string}" /> </outputs> <tests> - <test> + <test expect_num_outputs="4"> <repeat name="samFiles"> - <param name="samFile" value="small_test_R1_unsorted.sam"/> + <param name="samFile" value="small_test_R1_unsorted.sam" dbkey="hg38" /> </repeat> <repeat name="samFiles"> - <param name="samFile" value="small_test_R2_unsorted.sam"/> + <param name="samFile" value="small_test_R2_unsorted.sam" dbkey="hg38" /> + </repeat> + <param name='outputFormat' value='h5' /> + <repeat name='binSizes'> + <param name="binSize" value="5000" /> </repeat> - <conditional name="restrictionCutFileBinSize_conditional"> - <param name="restrictionCutFileBinSize_selector" value="optionBinSize"/> - <repeat name='binSizes'> - <param name="binSize" value="5000"/> - </repeat> - </conditional> - <param name='outputFormat' value='h5'/> - <param name='outBam_Boolean' value="True" /> - <output name="outBam" file="small_test_matrix_result_sorted.bam" ftype="bam"/> + <param name='restrictionCutFile' value='DpnII_10k.bed' /> + <param name='restrictionSequence' value='GATC' /> + <param name='danglingSequence' value='GATC' /> + <param name='outBam' value="True" /> + <output name="outfileBam" file="small_test_matrix_result_sorted.bam" ftype="bam" /> <output name="outFileName" ftype="h5"> <assert_contents> - <has_h5_keys keys='intervals,matrix'/> + <has_h5_keys keys='intervals,matrix' /> </assert_contents> </output> - <output name="raw_qc" file='raw_qc_report' compare='diff' lines_diff='2'/> + <output name="raw_qc" file='raw_qc_report' compare='diff' lines_diff='2' /> </test> - <test> + <test expect_num_outputs="4"> <repeat name="samFiles"> - <param name="samFile" value="small_test_R1_unsorted.sam"/> + <param name="samFile" value="small_test_R1_unsorted.sam" dbkey="hg38" /> </repeat> <repeat name="samFiles"> - <param name="samFile" value="small_test_R2_unsorted.sam"/> + <param name="samFile" value="small_test_R2_unsorted.sam" dbkey="hg38" /> + </repeat> + <repeat name='binSizes'> + <param name="binSize" value="5000" /> </repeat> - <conditional name="restrictionCutFileBinSize_conditional"> - <param name="restrictionCutFileBinSize_selector" value="optionBinSize"/> - <repeat name='binSizes'> - <param name="binSize" value="5000"/> - </repeat> - </conditional> - <param name='outputFormat' value='cool'/> - <param name='outBam_Boolean' value="True" /> - <output name="outBam" file="small_test_matrix_result_sorted.bam" ftype="bam"/> + <param name='restrictionCutFile' value='DpnII_10k.bed' /> + <param name='restrictionSequence' value='GATC' /> + <param name='danglingSequence' value='GATC' /> + <param name='outputFormat' value='cool' /> + <param name='outBam' value="True" /> + <output name="outfileBam" file="small_test_matrix_result_sorted.bam" ftype="bam" /> <output name="outFileName" ftype="cool"> <assert_contents> - <has_h5_keys keys='bins,chroms,indexes,pixels'/> + <has_h5_keys keys='bins,chroms,indexes,pixels' /> </assert_contents> </output> - <output name="raw_qc" file='raw_qc_report' compare='diff' lines_diff='2'/> + <output name="raw_qc" file='raw_qc_report' compare='diff' lines_diff='2' /> </test> </tests> <help><![CDATA[ @@ -296,6 +282,6 @@ | For more information about HiCExplorer please consider our documentation on readthedocs.io_. .. _readthedocs.io: http://hicexplorer.readthedocs.io/en/latest/index.html -]]></help> +]]> </help> <expand macro="citations" /> </tool>