diff bigwig_outlier_bed.xml @ 0:ebcd48f183b3 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bigwig_outlier_bed commit 091caba3c5b066b293745ccee5cd31132fec3b4b
author iuc
date Fri, 05 Jul 2024 06:00:15 +0000
parents
children 8377a6abb4da
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bigwig_outlier_bed.xml	Fri Jul 05 06:00:15 2024 +0000
@@ -0,0 +1,177 @@
+<tool name="Bigwig extremes to bed features" id="bigwig_outlier_bed" version="0.1.4" profile="22.05">
+  <description>Writes high and low bigwig runs as features in a bed file</description>
+  <edam_topics>
+      <edam_topic>topic_0157</edam_topic>
+      <edam_topic>topic_0092</edam_topic>
+  </edam_topics>
+  <edam_operations>
+      <edam_operation>operation_0337</edam_operation>
+  </edam_operations>
+  <xrefs>
+    <xref type="bio.tools">bigtools</xref>
+  </xrefs>
+  <requirements>
+    <requirement type="package" version="3.12.3">python</requirement>
+    <requirement type="package" version="2.0.0">numpy</requirement>
+    <requirement type="package" version="0.1.4">pybigtools</requirement>
+  </requirements>
+  <required_files>
+      <include path="bigwig_outlier_bed.py"/>
+  </required_files>
+  <version_command><![CDATA[python -c "import pybigtools; from importlib.metadata import version; print(version('pybigtools'))"]]></version_command>
+  <command><![CDATA[python '${__tool_directory__}/bigwig_outlier_bed.py'
+--bigwig 
+#for bw in $bigwig: 
+ '$bw' 
+#end for
+--bigwiglabels
+#for bw in $bigwig: 
+  '$bw.name'
+#end for
+--outbeds '$outbeds'
+#if $outbeds in ['outhilo', 'outall']:
+  --bedouthilo '$bedouthilo'
+#end if
+#if $outbeds in ['outhi', 'outall', 'outlohi']:
+  --bedouthi '$bedouthi'
+#end if
+#if $outbeds in ['outlo', 'outall', 'outlohi']:
+  --bedoutlo '$bedoutlo'
+#end if
+--minwin '$minwin'
+#if $qhi:
+--qhi '$qhi'
+#end if
+#if $qlo:
+--qlo '$qlo'
+#end if
+#if $tableout == "create":
+ --tableoutfile '$tableoutfile'
+#end if
+]]></command>
+  <inputs>
+    <param name="bigwig" type="data" optional="false" label="Choose one or more bigwig file(s) to return outlier regions as a bed file" 
+      help="If more than one, MUST all use the same reference sequence to be displayable. Feature names will include the bigwig label." format="bigwig" multiple="true"/>
+    <param name="minwin" type="integer" value="10" label="Minimum continuous bases to count as a high or low bed feature" 
+      help="Continuous features as long or longer than this window size will appear as bed features"/>
+    <param name="qhi" type="float" value="0.99" label="Quantile cutoff for a high region - 0.99 will cut off at or above the 99th percentile" help="Required" optional="false"/>
+    <param name="qlo" type="float" value="0.01" label="Quantile cutoff for a low region - 0.01 will cut off at or below the 1st percentile." help="Optional" optional="true"/>
+    <param name="outbeds" type="select" label="Select the required bed file outputs" help="Any combination of the 3 different kinds of bed file output can be made">
+      <option value="outhilo" selected="true">Make 1 bed output with both low and high regions</option>
+      <option value="outhi">Make 1 bed output with high regions only</option>
+      <option value="outlo">Make 1 bed output with low regions only</option>
+      <option value="outall">Make 3 bed outputs with low and high together in one, high in one and low in the other</option>
+      <option value="outlohi">Make 2 bed outputs with high in one and low in the other</option>
+    </param>
+    <param name="tableout" type="select" label="Write a table showing contig statistics for each bigwig input" help="">
+      <option value="donotmake">Do not create this report</option>
+      <option value="create" selected="true">Create this report</option>
+    </param>
+  </inputs>
+  <outputs>
+    <data name="bedouthilo" format="bed" label="High_and_low_bed" hidden="false">
+      <filter>outbeds in ["outall", "outhilo"]</filter>
+    </data>
+    <data name="bedouthi" format="bed" label="High bed" hidden="false">
+      <filter>outbeds in ["outall", "outlohi", "outhi"]</filter>
+    </data>
+    <data name="bedoutlo" format="bed" label="Low bed" hidden="false">
+      <filter>outbeds in ["outall", "outlohi", "outlo"]</filter>
+    </data>
+    <data name="tableoutfile" format="tabular" label="Contig statistics" hidden="false">
+      <filter>tableout == "create"</filter>
+    </data>
+  </outputs>
+  <tests>
+    <test expect_num_outputs="1">
+      <output name="bedouthilo" value="bedouthilo_sample" compare="diff" lines_diff="0"/>
+      <param name="outbeds" value="outhilo"/>
+      <param name="bigwig" value="bigwig_sample"/>
+      <param name="minwin" value="10"/>
+      <param name="qhi" value="0.99"/>
+      <param name="qlo" value="0.01"/>
+      <param name="tableout" value="donotmake"/>
+    </test>
+    <test expect_num_outputs="2">
+      <output name="bedouthilo" value="bedouthilo_sample" compare="diff" lines_diff="0"/>
+      <output name="tableoutfile" value="table_sample" compare="diff" lines_diff="0"/>
+      <param name="outbeds" value="outhilo"/>
+      <param name="bigwig" value="bigwig_sample"/>
+      <param name="minwin" value="10"/>
+      <param name="qhi" value="0.99"/>
+      <param name="qlo" value="0.01"/>
+      <param name="tableout" value="create"/>
+    </test>
+    <test expect_num_outputs="3">
+      <output name="bedouthi" value="bedouthi_sample" compare="diff" lines_diff="0"/>
+      <output name="bedoutlo" value="bedoutlo_sample" compare="diff" lines_diff="0"/>
+      <output name="tableoutfile" value="table_sample" compare="diff" lines_diff="0"/>
+      <param name="outbeds" value="outlohi"/>
+      <param name="bigwig" value="bigwig_sample"/>
+      <param name="minwin" value="10"/>
+      <param name="qhi" value="0.99"/>
+      <param name="qlo" value="0.01"/>
+      <param name="tableout" value="create"/>
+    </test>
+    <test expect_num_outputs="4">
+      <output name="bedouthilo" value="bedouthilo2_sample" compare="diff" lines_diff="0"/>
+      <output name="bedoutlo" value="bedoutlo2_sample" compare="diff" lines_diff="0"/>
+      <output name="bedouthi" value="bedouthi2_sample" compare="diff" lines_diff="0"/>
+      <output name="tableoutfile" value="table2_sample" compare="diff" lines_diff="0"/>
+      <param name="outbeds" value="outall"/>
+      <param name="bigwig" value="bigwig_sample,1.bigwig"/>
+      <param name="minwin" value="10"/>
+      <param name="qhi" value="0.99"/>
+      <param name="qlo" value="0.01"/>
+      <param name="tableout" value="create"/>
+    </test>
+  </tests>
+  <help><![CDATA[
+
+ **Purpose**
+ 
+ *Combine bigwig outlier regions into bed files*
+ 
+ Bigwigs allow quantative tracks to be viewed in an interactive genome browser like JBrowse2. 
+ Peaks are easy to see. Unusually low regions can be harder to spot, even if they are relatively large, unless the view is zoomed right in.
+ Automated methods for combining evidence from multiple bigwigs can be useful for constructing browseable *issues* or other kinds of summary bed format tracks.
+ For example, combining coverage outlier regions, with the frequency of specific dicnucleotide short tandem repeats,
+ for evaluating technical sequencing technology effects in the evaluation of a genome assembly described at https://github.com/arangrhie/T2T-Polish
+
+ **What does it produce?**
+
+ Bed format results are output, containing each continuous segment of at least *minwin* base pairs above a cut point, or below another cut point. 
+ These can be viewed as features on the reference genome using a genome browser tool like JBrowse2.
+ Three kinds of bed files can be created depending on the values included.  
+ Both high and low regions in one bed output is the default. This can be displayed in JBrowse2 with colour indicating the high or low status, 
+ one less track and a little easier to understand. High and low features can be output as separate bed files.
+
+ **How is it controlled?**
+
+ The cut points are calculated using a user supplied quantile, from each chromosome's bigwig value distribution. 
+ The defaults are 0.99 and 0.01 and the default *minwin* is 10.  
+ The probability of 10 values at or below the 1st percentile purely by chance is about 0.01**10, so false positives should be
+ rare, even in a 3GB genome.  
+ This data driven and non-parametric method is preferred for the asymmetrical distributions found in typical bigwigs, such as depth of coverage 
+ for genome sequencing reads. Coverage values are truncated at zero, and regions with very high values often form a long sparse right tail. 
+
+ **How do I choose the input data?**
+
+ One or more bigwigs and can be selected as inputs.  
+ Multiple bigwigs will be combined in bed files, so must share the reference genome to display
+ using JBrowse2.
+ 
+ .. class:: warningmark
+
+ **Lower quantile may not behave as expected in bigwigs with large fractions of zero values**
+
+ The lower cut point may be problematic for integer values like coverage if many values are zero. For example, if 5% of bases have zero coverage, the 1st percentile is also zero, 
+ but that cut point will include the entire 5% *at or below 0*
+
+ 
+  ]]></help>
+  <citations>
+    <citation type="doi">10.1093/bioinformatics/btae350</citation>
+  </citations>
+</tool>
+