comparison cwpair2.xml @ 0:8600bfe7ed52 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/cwpair2 commit e96df94dba60050fa28aaf55b5bb095717a5f260
author iuc
date Tue, 22 Dec 2015 17:03:46 -0500
parents
children d4db13c9dd7f
comparison
equal deleted inserted replaced
-1:000000000000 0:8600bfe7ed52
1 <?xml version="1.0"?>
2 <tool id="cwpair2" name="CWPair2" version="@WRAPPER_VERSION@.0">
3 <description>find matched pairs and unmatched orphans</description>
4 <macros>
5 <import>cwpair2_macros.xml</import>
6 </macros>
7 <expand macro="requirements" />
8 <command>
9 <![CDATA[
10 python $__tool_directory__/cwpair2.py
11 #for $i in $input:
12 --input "${i}" "${i.hid}"
13 #end for
14 --up_distance $up_distance
15 --down_distance $down_distance
16 --method $method
17 --binsize $binsize
18 --threshold_format $threshold_format_cond.threshold_format
19 #if str($threshold_format_cond.threshold_format) == "absolute_threshold":
20 --absolute_threshold $threshold_format_cond.absolute_threshold
21 #elif str($threshold_format_cond.threshold_format) == "relative_threshold":
22 --relative_threshold $threshold_format_cond.relative_threshold
23 #end if
24 --output_files $output_files
25 --statistics_output "$statistics_output"
26 ]]>
27 </command>
28 <inputs>
29 <param name="input" type="data" format="gff" multiple="True" label="Find matched pairs on" />
30 <param name="up_distance" type="integer" value="50" min="0" label="Distance upstream from a peak to allow a pair" help="The maximum distance upstream or 5’ to the primary peak"/>
31 <param name="down_distance" type="integer" value="100" min="0" label="Distance downstream from a peak to allow a pair" help="The maximum distance downstream or 3’ to the primary peak"/>
32 <param name="method" type="select" label="Method of finding a match">
33 <option value="mode" selected="True">Mode</option>
34 <option value="closest">Closest</option>
35 <option value="largest">Largest</option>
36 <option value="all">All</option>
37 </param>
38 <param name="binsize" type="integer" value="1" min="0" label="Width of bins for frequency plots and mode calculation" help="Value 1 implies no bins" />
39 <conditional name="threshold_format_cond">
40 <param name="threshold_format" type="select" label="Filter using">
41 <option value="relative_threshold" selected="True">Relative threshold</option>
42 <option value="absolute_threshold">Absolute threshold</option>
43 </param>
44 <when value="relative_threshold">
45 <param name="relative_threshold" type="float" value="0.0" min="0.0" label="Percentage of the 95 percentile value to filter below" help="Value 0 results in no filtering" />
46 </when>
47 <when value="absolute_threshold">
48 <param name="absolute_threshold" type="float" value="0.0" min="0.0" label="Absolute value to filter below" />
49 </when>
50 </conditional>
51 <param name="output_files" type="select" label="Please select the output you want to have" help="Statistics will always be generated." >
52 <option value="all" selected="True">no restrictions (output everything: C,D,F,O,P,MP)</option>
53 <option value="matched_pair">matched pairs only (MP)</option>
54 <option value="matched_pair_orphan">matched pairs and orphans only (O,MP)</option>
55 <option value="matched_pair_orphan_detail">matched pairs, orphans and details only (D,O,MP)</option>
56 </param>
57 </inputs>
58 <outputs>
59 <data name="statistics_output" format="tabular" label="Statistics Table: ${tool.name} on ${on_string}" />
60 <collection name="H" type="list" label="Statistics Histogram: ${tool.name} on ${on_string}">
61 <filter>output_files == "all"</filter>
62 <discover_datasets pattern="(?P&lt;designation&gt;.*)" directory="H" ext="pdf" visible="false" />
63 </collection>
64 <collection name="D" type="list" label="Data D: ${tool.name} on ${on_string}">
65 <filter>output_files in ["all", "matched_pair_orphan_detail"]</filter>
66 <discover_datasets pattern="(?P&lt;designation&gt;.*)" directory="data_D" ext="tabular" visible="false" />
67 </collection>
68 <collection name="O" type="list" label="Data O: ${tool.name} on ${on_string}">
69 <filter>output_files in ["all", "matched_pair_orphan", "matched_pair_orphan_detail"]</filter>
70 <discover_datasets pattern="(?P&lt;designation&gt;.*)" directory="data_O" ext="tabular" visible="false" />
71 </collection>
72 <collection name="MP" type="list" label="Data MP: ${tool.name} on ${on_string}">
73 <discover_datasets pattern="(?P&lt;designation&gt;.*)" directory="data_MP" ext="gff" visible="false" />
74 </collection>
75 </outputs>
76 <tests>
77 <test>
78 <param name="input" value="cwpair2_input1.gff" />
79 <param name="up_distance" value="25" />
80 <param name="down_distance" value="100" />
81 <param name="method" value="all" />
82 <param name="binsize" value="1" />
83 <param name="threshold_format" value="relative_threshold" />
84 <param name="relative_threshold" value="0.0" />
85 <param name="output_files" value="matched_pair" />
86 <output name="statistics_output" file="statistics1.tabular" ftype="tabular" />
87 <output_collection name="MP" type="list">
88 <element name="f0u25d100_on_data_1" file="closest_mp_output1.gff" ftype="gff" compare="contains"/>
89 <element name="f0u25d100_on_data_1" file="largest_mp_output1.gff" ftype="gff" compare="contains"/>
90 <element name="f0u25d100_on_data_1" file="mode_mp_output1.gff" ftype="gff" compare="contains"/>
91 </output_collection>
92 </test>
93 <test>
94 <param name="input" value="cwpair2_input1.gff" />
95 <param name="up_distance" value="50" />
96 <param name="down_distance" value="100" />
97 <param name="method" value="all" />
98 <param name="binsize" value="1" />
99 <param name="threshold_format" value="relative_threshold" />
100 <param name="relative_threshold" value="0.0" />
101 <param name="output_files" value="all" />
102 <output name="statistics_output" file="statistics2.tabular" ftype="tabular" />
103 <output_collection name="H" type="list">
104 <element name="histogram_C_mode_f0u50d100_on_data_1" file="mode_c_output2.pdf" ftype="pdf" compare="contains" lines_diff="12"/>
105 <element name="histogram_F_closest_f0u50d100_on_data_1" file="closest_f_output2.pdf" ftype="pdf" compare="contains" lines_diff="12"/>
106 <element name="histogram_F_largest_f0u50d100_on_data_1" file="largest_f_output2.pdf" ftype="pdf" compare="contains" lines_diff="12"/>
107 <element name="histogram_F_mode_f0u50d100_on_data_1" file="mode_f_output2.pdf" ftype="pdf" compare="contains" lines_diff="12"/>
108 <element name="histogram_P_mode_f0u50d100_on_data_1" file="mode_p_output2.pdf" ftype="pdf" compare="contains" lines_diff="12"/>
109 </output_collection>
110 <output_collection name="D" type="list">
111 <element name="data_D_closest_f0u50d100_on_data_1" file="closest_d_output2.tabular" ftype="tabular" compare="comntains"/>
112 <element name="data_D_largest_f0u50d100_on_data_1" file="largest_d_output2.tabular" ftype="tabular" compare="contains"/>
113 <element name="data_D_mode_f0u50d100_on_data_1" file="mode_d_output2.tabular" ftype="tabular" compare="contains"/>
114 </output_collection>
115 <output_collection name="O" type="list">
116 <element name="data_O_closest_f0u50d100_on_data_1" file="closest_o_output2.tabular" ftype="tabular" compare="contains"/>
117 <element name="data_O_largest_f0u50d100_on_data_1" file="largest_o_output2.tabular" ftype="tabular" compare="contains"/>
118 <element name="data_O_mode_f0u50d100_on_data_1" file="mode_o_output2.tabular" ftype="tabular" compare="contains"/>
119 </output_collection>
120 <output_collection name="MP" type="list">
121 <element name="data_MP_closest_f0u50d100_on_data_1" file="closest_mp_output2.gff" ftype="gff" compare="contains"/>
122 <element name="data_MP_largest_f0u50d100_on_data_1" file="largest_mp_output2.gff" ftype="gff" compare="contains"/>
123 <element name="data_MP_mode_f0u50d100_on_data_1" file="mode_mp_output2.gff" ftype="gff" compare="contains"/>
124 </output_collection>
125 </test>
126 </tests>
127 <help>
128 **What it does**
129
130 CWPair accepts one or more gff files as input and takes the peak location to be the midpoint between the
131 exclusion zone start and end coordinate (columns D and E). CWPair starts with the highest peak (primary peak)
132 in the dataset, and then looks on the opposite strand for another peak located within the distance defined by
133 a combination of the tool's **Distance upstream from a peak to allow a pair** (the distance upstream or 5’ to
134 the primary peak) and **Distance downstream from a peak to allow a pair** (the distance downstream or 3’ to the
135 primary peak) parameters. So "upstream" value 30 "downstream" value 20 makes the tool look 30 bp upstream and
136 20 bp downstream (inclusive). Consequently, the search space would be 51 bp, since it includes the primary peak
137 coordinate. The use of a negative number changes the direction of the search limits. So, "upstream" -30 and
138 "downstream" 20 produces an 11 bp downstream search window (20-30 bp downstream, inclusive).
139
140 .. image:: $PATH_TO_IMAGES/cwpair2.png
141
142 When encountering multiple candidate peaks within the search window, CWPair uses the resolution method defined by
143 the tool's **Method of finding a match** parameter as follows:
144
145
146 * **mode** - This is an iterative process in which all peak-pair distances within the search window are determined, and the mode calculated. The pair whose distance apart is closest to the mode is then selected.
147 * **closest** - Pairs the peak that has the closest absolute distance from the primary peak.
148 * **largest** - Pairs the peak that has the highest tag count.
149 * **all** - Runs all three methods, producing separate outputs for each.
150
151 When considering the candidate peaks for pairing to a primary peak, a tag-count threshold may also be set using
152 the tool's **Filter using relative/absolute threshold** parameter. A relative threshold determines the tag counts
153 at the 95th percentile of peak occupancy (i.e. top 5% in terms of tag counts), then uses a tag count threshold at
154 the specified percentage of this 95th percentile. So if the peak at the 95th percentile has 200 tags, and "relative
155 threshold" 50 is used, then it will not consider any peak having less than 100 tags.
156
157 -----
158
159 **Options**
160
161 * **Method of finding match** - Method of finding matched pair, mode, closest, largest, or all (run with each method).
162 * **Distance upstream from a peak to allow a pair** - The maximum distance (inclusive) upstream on the opposite strand from the primary peak to locate another peak, resulting in a pair.
163 * **Distance downstream from a peak to allow a pair** - The maximum distance (inclusive) downstream on the opposite strand from the primary peak to locate another peak, resulting in a pair.
164 * **Percentage of the 95 percentile value to filter below** - Percentage of the 95 percentile value below which to filter when using a relative threshold.
165 * **Absolute value to filter below** - Absolute value below which to filter when using an absolute threshold.
166 * **Output files** - Restrict output dataset collections to matched pairs only or one of several combinations of collection types.
167
168 -----
169
170 **Output Data Files**
171
172 * **closest/largest/mode MP** - gff file containing the Matched Pairs and includes the peak-pair midpoint coordinate (column D) and the coordinate +1 (column E). The tag count sum is reported in column F, along with the C-W distance in bp in column I.
173 * **closest/largest/mode O** - tabular file containing the Orphans (all peaks that are not in pairs).
174 * **closest/largest/mode D** - tabular file containing the Details, which lists + and – strand information separately. The start and end represent the lower and higher coordinates of the exclusion zone from GeneTrack, and “Value” is the tag count sum within the exclusion zone. The peak pair midpoint is calculated along with the distance between the two paired peaks (midpoint-to-midpoint or C-W distance).
175
176 **Output Statistics Files**
177
178 * **closest/largest/mode C** - pdf file that provides the frequency distribution of peak pair distances.
179 * **closest/largest/mode P** - pdf file that provides the preview plots graph (the initial iteration of the process for finding the mode).
180 * **closest/largest/mode F** - pdf file that provides the final plots graph.
181 * **Statistics Table** - provides the number of peaks in pairs (dividing this by 2 provides the number of peak-pairs).
182
183 </help>
184 <expand macro="citations" />
185 </tool>