comparison bigwig_outlier_bed.xml @ 0:ebcd48f183b3 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bigwig_outlier_bed commit 091caba3c5b066b293745ccee5cd31132fec3b4b
author iuc
date Fri, 05 Jul 2024 06:00:15 +0000
parents
children 8377a6abb4da
comparison
equal deleted inserted replaced
-1:000000000000 0:ebcd48f183b3
1 <tool name="Bigwig extremes to bed features" id="bigwig_outlier_bed" version="0.1.4" profile="22.05">
2 <description>Writes high and low bigwig runs as features in a bed file</description>
3 <edam_topics>
4 <edam_topic>topic_0157</edam_topic>
5 <edam_topic>topic_0092</edam_topic>
6 </edam_topics>
7 <edam_operations>
8 <edam_operation>operation_0337</edam_operation>
9 </edam_operations>
10 <xrefs>
11 <xref type="bio.tools">bigtools</xref>
12 </xrefs>
13 <requirements>
14 <requirement type="package" version="3.12.3">python</requirement>
15 <requirement type="package" version="2.0.0">numpy</requirement>
16 <requirement type="package" version="0.1.4">pybigtools</requirement>
17 </requirements>
18 <required_files>
19 <include path="bigwig_outlier_bed.py"/>
20 </required_files>
21 <version_command><![CDATA[python -c "import pybigtools; from importlib.metadata import version; print(version('pybigtools'))"]]></version_command>
22 <command><![CDATA[python '${__tool_directory__}/bigwig_outlier_bed.py'
23 --bigwig
24 #for bw in $bigwig:
25 '$bw'
26 #end for
27 --bigwiglabels
28 #for bw in $bigwig:
29 '$bw.name'
30 #end for
31 --outbeds '$outbeds'
32 #if $outbeds in ['outhilo', 'outall']:
33 --bedouthilo '$bedouthilo'
34 #end if
35 #if $outbeds in ['outhi', 'outall', 'outlohi']:
36 --bedouthi '$bedouthi'
37 #end if
38 #if $outbeds in ['outlo', 'outall', 'outlohi']:
39 --bedoutlo '$bedoutlo'
40 #end if
41 --minwin '$minwin'
42 #if $qhi:
43 --qhi '$qhi'
44 #end if
45 #if $qlo:
46 --qlo '$qlo'
47 #end if
48 #if $tableout == "create":
49 --tableoutfile '$tableoutfile'
50 #end if
51 ]]></command>
52 <inputs>
53 <param name="bigwig" type="data" optional="false" label="Choose one or more bigwig file(s) to return outlier regions as a bed file"
54 help="If more than one, MUST all use the same reference sequence to be displayable. Feature names will include the bigwig label." format="bigwig" multiple="true"/>
55 <param name="minwin" type="integer" value="10" label="Minimum continuous bases to count as a high or low bed feature"
56 help="Continuous features as long or longer than this window size will appear as bed features"/>
57 <param name="qhi" type="float" value="0.99" label="Quantile cutoff for a high region - 0.99 will cut off at or above the 99th percentile" help="Required" optional="false"/>
58 <param name="qlo" type="float" value="0.01" label="Quantile cutoff for a low region - 0.01 will cut off at or below the 1st percentile." help="Optional" optional="true"/>
59 <param name="outbeds" type="select" label="Select the required bed file outputs" help="Any combination of the 3 different kinds of bed file output can be made">
60 <option value="outhilo" selected="true">Make 1 bed output with both low and high regions</option>
61 <option value="outhi">Make 1 bed output with high regions only</option>
62 <option value="outlo">Make 1 bed output with low regions only</option>
63 <option value="outall">Make 3 bed outputs with low and high together in one, high in one and low in the other</option>
64 <option value="outlohi">Make 2 bed outputs with high in one and low in the other</option>
65 </param>
66 <param name="tableout" type="select" label="Write a table showing contig statistics for each bigwig input" help="">
67 <option value="donotmake">Do not create this report</option>
68 <option value="create" selected="true">Create this report</option>
69 </param>
70 </inputs>
71 <outputs>
72 <data name="bedouthilo" format="bed" label="High_and_low_bed" hidden="false">
73 <filter>outbeds in ["outall", "outhilo"]</filter>
74 </data>
75 <data name="bedouthi" format="bed" label="High bed" hidden="false">
76 <filter>outbeds in ["outall", "outlohi", "outhi"]</filter>
77 </data>
78 <data name="bedoutlo" format="bed" label="Low bed" hidden="false">
79 <filter>outbeds in ["outall", "outlohi", "outlo"]</filter>
80 </data>
81 <data name="tableoutfile" format="tabular" label="Contig statistics" hidden="false">
82 <filter>tableout == "create"</filter>
83 </data>
84 </outputs>
85 <tests>
86 <test expect_num_outputs="1">
87 <output name="bedouthilo" value="bedouthilo_sample" compare="diff" lines_diff="0"/>
88 <param name="outbeds" value="outhilo"/>
89 <param name="bigwig" value="bigwig_sample"/>
90 <param name="minwin" value="10"/>
91 <param name="qhi" value="0.99"/>
92 <param name="qlo" value="0.01"/>
93 <param name="tableout" value="donotmake"/>
94 </test>
95 <test expect_num_outputs="2">
96 <output name="bedouthilo" value="bedouthilo_sample" compare="diff" lines_diff="0"/>
97 <output name="tableoutfile" value="table_sample" compare="diff" lines_diff="0"/>
98 <param name="outbeds" value="outhilo"/>
99 <param name="bigwig" value="bigwig_sample"/>
100 <param name="minwin" value="10"/>
101 <param name="qhi" value="0.99"/>
102 <param name="qlo" value="0.01"/>
103 <param name="tableout" value="create"/>
104 </test>
105 <test expect_num_outputs="3">
106 <output name="bedouthi" value="bedouthi_sample" compare="diff" lines_diff="0"/>
107 <output name="bedoutlo" value="bedoutlo_sample" compare="diff" lines_diff="0"/>
108 <output name="tableoutfile" value="table_sample" compare="diff" lines_diff="0"/>
109 <param name="outbeds" value="outlohi"/>
110 <param name="bigwig" value="bigwig_sample"/>
111 <param name="minwin" value="10"/>
112 <param name="qhi" value="0.99"/>
113 <param name="qlo" value="0.01"/>
114 <param name="tableout" value="create"/>
115 </test>
116 <test expect_num_outputs="4">
117 <output name="bedouthilo" value="bedouthilo2_sample" compare="diff" lines_diff="0"/>
118 <output name="bedoutlo" value="bedoutlo2_sample" compare="diff" lines_diff="0"/>
119 <output name="bedouthi" value="bedouthi2_sample" compare="diff" lines_diff="0"/>
120 <output name="tableoutfile" value="table2_sample" compare="diff" lines_diff="0"/>
121 <param name="outbeds" value="outall"/>
122 <param name="bigwig" value="bigwig_sample,1.bigwig"/>
123 <param name="minwin" value="10"/>
124 <param name="qhi" value="0.99"/>
125 <param name="qlo" value="0.01"/>
126 <param name="tableout" value="create"/>
127 </test>
128 </tests>
129 <help><![CDATA[
130
131 **Purpose**
132
133 *Combine bigwig outlier regions into bed files*
134
135 Bigwigs allow quantative tracks to be viewed in an interactive genome browser like JBrowse2.
136 Peaks are easy to see. Unusually low regions can be harder to spot, even if they are relatively large, unless the view is zoomed right in.
137 Automated methods for combining evidence from multiple bigwigs can be useful for constructing browseable *issues* or other kinds of summary bed format tracks.
138 For example, combining coverage outlier regions, with the frequency of specific dicnucleotide short tandem repeats,
139 for evaluating technical sequencing technology effects in the evaluation of a genome assembly described at https://github.com/arangrhie/T2T-Polish
140
141 **What does it produce?**
142
143 Bed format results are output, containing each continuous segment of at least *minwin* base pairs above a cut point, or below another cut point.
144 These can be viewed as features on the reference genome using a genome browser tool like JBrowse2.
145 Three kinds of bed files can be created depending on the values included.
146 Both high and low regions in one bed output is the default. This can be displayed in JBrowse2 with colour indicating the high or low status,
147 one less track and a little easier to understand. High and low features can be output as separate bed files.
148
149 **How is it controlled?**
150
151 The cut points are calculated using a user supplied quantile, from each chromosome's bigwig value distribution.
152 The defaults are 0.99 and 0.01 and the default *minwin* is 10.
153 The probability of 10 values at or below the 1st percentile purely by chance is about 0.01**10, so false positives should be
154 rare, even in a 3GB genome.
155 This data driven and non-parametric method is preferred for the asymmetrical distributions found in typical bigwigs, such as depth of coverage
156 for genome sequencing reads. Coverage values are truncated at zero, and regions with very high values often form a long sparse right tail.
157
158 **How do I choose the input data?**
159
160 One or more bigwigs and can be selected as inputs.
161 Multiple bigwigs will be combined in bed files, so must share the reference genome to display
162 using JBrowse2.
163
164 .. class:: warningmark
165
166 **Lower quantile may not behave as expected in bigwigs with large fractions of zero values**
167
168 The lower cut point may be problematic for integer values like coverage if many values are zero. For example, if 5% of bases have zero coverage, the 1st percentile is also zero,
169 but that cut point will include the entire 5% *at or below 0*
170
171
172 ]]></help>
173 <citations>
174 <citation type="doi">10.1093/bioinformatics/btae350</citation>
175 </citations>
176 </tool>
177