view bigwig_outlier_bed.xml @ 4:2488bcddaf14 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bigwig_outlier_bed commit d27456ca56231eb9a4eb360c99f44af6c18a0afc
author iuc
date Mon, 30 Sep 2024 01:42:34 +0000
parents 00b3da7776a0
children
line wrap: on
line source

<tool name="Bigwig outliers to bed features" id="bigwig_outlier_bed" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="22.05">
  <description>Writes high and low bigwig runs as features in a bed file</description>
  <macros>
  <token name="@TOOL_VERSION@">0.2.0</token>
  <token name="@PYTHON_VERSION@">3.12.3</token>
  <token name="@VERSION_SUFFIX@">2</token>
  </macros>
  <edam_topics>
      <edam_topic>topic_0157</edam_topic>
      <edam_topic>topic_0092</edam_topic>
  </edam_topics>
  <edam_operations>
      <edam_operation>operation_0337</edam_operation>
  </edam_operations>
  <xrefs>
    <xref type="bio.tools">bigtools</xref>
  </xrefs>
  <requirements>
    <requirement type="package" version="@PYTHON_VERSION@">python</requirement>
    <requirement type="package" version="2.0.0">numpy</requirement>
    <requirement type="package" version="@TOOL_VERSION@">pybigtools</requirement>
  </requirements>
  <required_files>
      <include path="bigwig_outlier_bed.py"/>
  </required_files>
  <version_command><![CDATA[python -c "import pybigtools; from importlib.metadata import version; print(version('pybigtools'))"]]></version_command>
  <command><![CDATA[python '${__tool_directory__}/bigwig_outlier_bed.py'
#set bwns = [x.name for x in $bigwig]
#set bwfs = [x for x in $bigwig]
#for $bwf in $bwfs:
--bigwig '$bwf'
#end for
#for $bwn in $bwns:
--bigwiglabels '$bwn'
#end for
--outbeds '$outbeds'
#if $outbeds in ['outhilo', 'outall']:
  --bedouthilo '$bedouthilo'
#end if
#if $outbeds in ['outhi', 'outall', 'outlohi']:
  --bedouthi '$bedouthi'
#end if
#if $outbeds in ['outlo', 'outall', 'outlohi']:
  --bedoutlo '$bedoutlo'
#end if
#if $outbeds in ['outzero']:
  --bedoutzero '$bedoutzero'
#end if
--minwin '$minwin'
#if $qhi:
--qhi '$qhi'
#end if
#if $qlo:
--qlo '$qlo'
#end if
#if $tableout == "create" or $outbeds == "outtab":
 --tableoutfile '$tableoutfile'
#end if
]]></command>
  <inputs>
    <param name="bigwig" type="data" optional="false" label="Choose one or more bigwig file(s) to return outlier regions as a bed file" 
      help="If more than one, MUST all use the same reference sequence to be displayable. Feature names will include the bigwig label." format="bigwig" multiple="true"/>
    <param name="minwin" type="integer" value="10" label="Minimum continuous bases to count as a high or low bed feature" 
      help="Minimum continuous length to count as a bed feature. If windowed bigwig, must be bigger than window size to have any effect"/>
    <param name="qhi" type="float" value="0.99999" label="Quantile cutoff for a high region - 0.99999 will cut off at about 1 in 100,000" 
       help="1 per 100k might be a few thousand features in a 200M chromosome - depends on the distribution - see the table output" optional="false"/>
    <param name="qlo" type="float" value="0.00001" label="Quantile cutoff for a low region - 0.01 will cut off at or below the 1st percentile." help="Optional" optional="true"/>
    <param name="outbeds" type="select" label="Select the required bed file outputs or none for a bigwig value distribution report" 
      help="Any combination of the 3 different kinds of bed file output can be made">
      <option value="outhilo" selected="true">Make 1 bed output with both low and high regions</option>
      <option value="outhi">Make 1 bed output with high regions only</option>
      <option value="outlo">Make 1 bed output with low regions only</option>
      <option value="outzero">Make 1 bed output for regions with contiguous zero values only</option>
      <option value="outall">Make 3 bed outputs with low and high together in one, high in one and low in the other</option>
      <option value="outlohi">Make 2 bed outputs with high in one and low in the other</option>
      <option value="outtab">NO bed outputs. Report bigwig value distribution table only</option>
    </param>
    <param name="tableout" type="select" label="Write a table showing contig statistics for each bigwig input" help="">
      <option value="donotmake">Do not create this report</option>
      <option value="create" selected="true">Create this report</option>
    </param>
  </inputs>
  <outputs>
    <data name="bedouthilo" format="bed" label="High_and_low_bed">
      <filter>outbeds in ["outall", "outhilo"]</filter>
    </data>
    <data name="bedouthi" format="bed" label="High bed">
      <filter>outbeds in ["outall", "outlohi", "outhi"]</filter>
    </data>
    <data name="bedoutlo" format="bed" label="Low bed">
      <filter>outbeds in ["outall", "outlohi", "outlo"]</filter>
    </data>
    <data name="bedoutzero" format="bed" label="Zeros only bed">
      <filter>outbeds in ["outzero"]</filter>
    </data>
    <data name="tableoutfile" format="txt" label="Contig statistics">
      <filter>tableout == "create" or outbeds == "outtab"</filter>
    </data>
  </outputs>
  <tests>
    <test expect_num_outputs="1">
      <output name="bedouthilo" value="bedouthilo_sample" compare="diff" lines_diff="0"/>
      <param name="outbeds" value="outhilo"/>
      <param name="bigwig" value="fake.bigwig"/>
      <param name="minwin" value="10"/>
      <param name="qhi" value="0.6"/>
      <param name="qlo" value="0.01"/>
      <param name="tableout" value="donotmake"/>
    </test>
    <test expect_num_outputs="1">
      <output name="tableoutfile" value="table_only_sample" compare="diff" lines_diff="0"/>
      <param name="outbeds" value="outtab"/>
      <param name="bigwig" value="fake.bigwig"/>
      <param name="minwin" value="10"/>
      <param name="qhi" value="0.7"/>
      <param name="qlo" value="0.3"/>
      <param name="tableout" value="create"/>
    </test>
    <test expect_num_outputs="2">
      <output name="bedouthilo" value="bedouthilo_sample_2" compare="diff" lines_diff="0"/>
      <output name="tableoutfile" value="table_sample" compare="diff" lines_diff="0"/>
      <param name="outbeds" value="outhilo"/>
      <param name="bigwig" value="bigwig_sample"/>
      <param name="minwin" value="10"/>
      <param name="qhi" value="0.7"/>
      <param name="qlo" value="0.3"/>
      <param name="tableout" value="create"/>
    </test>
    <test expect_num_outputs="2">
      <output name="bedouthi" value="bedouthi_qlo_notset_sample" compare="diff" lines_diff="0"/>
      <output name="tableoutfile" value="table_qlo_notset_sample" compare="diff" lines_diff="0"/>
      <param name="outbeds" value="outhi"/>
      <param name="bigwig" value="bigwig_sample"/>
      <param name="minwin" value="10"/>
      <param name="qhi" value="0.69"/>
      <param name="qlo" value=""/>
      <param name="tableout" value="create"/>
    </test>
    <test expect_num_outputs="3">
      <output name="bedouthi" value="bedouthi_sample" compare="diff" lines_diff="0"/>
      <output name="bedoutlo" value="bedoutlo_sample" compare="diff" lines_diff="0"/>
      <output name="tableoutfile" value="table3_sample" compare="diff" lines_diff="0"/>
      <param name="outbeds" value="outlohi"/>
      <param name="bigwig" value="fake.bigwig"/>
      <param name="minwin" value="10"/>
      <param name="qhi" value="0.5"/>
      <param name="qlo" value="0.5"/>
      <param name="tableout" value="create"/>
    </test>
    <test expect_num_outputs="2">
      <output name="bedouthi" value="bedouthi2_sample" compare="diff" lines_diff="0"/>
      <output name="tableoutfile" value="table2_sample" compare="diff" lines_diff="0"/>
      <param name="outbeds" value="outhi"/>
      <param name="bigwig" value="fake.bigwig,bigwig_sample"/>
      <param name="minwin" value="10"/>
      <param name="qhi" value="0.5"/>
      <param name="qlo" value="0.5"/>
      <param name="tableout" value="create"/>
    </test>
    <test expect_num_outputs="1">
      <output name="bedoutzero" value="bedoutzero_sample" compare="diff" lines_diff="0"/>
      <param name="outbeds" value="outzero"/>
      <param name="bigwig" value="fake.bigwig,bigwig_sample"/>
      <param name="minwin" value="2"/>
      <param name="qhi" value="0.5"/>
      <param name="qlo" value="0.5"/>
      <param name="tableout" value="donotmake"/>
    </test>
  </tests>
  <help><![CDATA[

 **Purpose**
 
 *Combine bigwig outlier regions into bed files*
 
 Bigwigs allow quantative tracks to be viewed in an interactive genome browser like JBrowse2. 
 Peaks are easy to see. Unusually low regions can be harder to spot, even if they are relatively large, unless the view is zoomed right in.
 Automated methods for combining evidence from multiple bigwigs can be useful for constructing browseable *issues* or other kinds of summary bed format tracks.
 For example, combining coverage outlier regions, with the frequency of specific dicnucleotide short tandem repeats,
 for evaluating technical sequencing technology effects in the evaluation of a genome assembly described at https://github.com/arangrhie/T2T-Polish

 **What does it produce?**

 Bed format results are output, containing each continuous segment of at least *minwin* base pairs above a cut point, or below another cut point. 
 These can be viewed as features on the reference genome using a genome browser tool like JBrowse2.
 Three kinds of bed files can be created depending on the values included.  
 Both high and low regions in one bed output is the default. This can be displayed in JBrowse2 with colour indicating the high or low status, 
 one less track and a little easier to understand. High and low features can be output as separate bed files.

 **How is it controlled?**

 The cut points are calculated using a user supplied quantile, from each chromosome's bigwig value distribution. 
 The defaults are 0.99 and 0.01 and the default *minwin* is 10.  
 The probability of 10 values at or below the 1st percentile purely by chance is about 0.01**10, so false positives should be
 rare, even in a 3GB genome.  
 This data driven and non-parametric method is preferred for the asymmetrical distributions found in typical bigwigs, such as depth of coverage 
 for genome sequencing reads. Coverage values are truncated at zero, and regions with very high values often form a long sparse right tail. 

 **How do I choose the input data?**

 One or more bigwigs and can be selected as inputs.  
 Multiple bigwigs will be combined in bed files, so must share the reference genome to display
 using JBrowse2.
 
 .. class:: warningmark

 **Lower quantile may not behave as expected in bigwigs with large fractions of zero values**

 The lower cut point may be problematic for integer values like coverage if many values are zero. For example, if 5% of bases have zero coverage, the 1st percentile is also zero, 
 but that cut point will include the entire 5% *at or below 0*

 
  ]]></help>
  <citations>
    <citation type="doi">10.1093/bioinformatics/btae350</citation>
  </citations>
</tool>