comparison repmatch_gff3.xml @ 0:a072f0f30ea3 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/repmatch_gff3 commit 0e04a4c237677c1f5be1950babcf8591097996a9
author iuc
date Wed, 23 Dec 2015 09:25:42 -0500
parents
children e5c7fffdc078
comparison
equal deleted inserted replaced
-1:000000000000 0:a072f0f30ea3
1 <?xml version="1.0"?>
2 <tool id="repmatch_gff3" name="RepMatch" version="@WRAPPER_VERSION@.0">
3 <description>Match paired peaks from two or more replicates</description>
4 <macros>
5 <import>repmatch_gff3_macros.xml</import>
6 </macros>
7 <expand macro="requirements" />
8 <command>
9 python $__tool_directory__/repmatch_gff3.py
10 #for $i in $input:
11 --input "${i}" "${i.hid}"
12 #end for
13 --method $method
14 --distance $distance
15 --replicates $replicates
16 --output_files $output_files_cond.output_files
17 --output_matched_peaks "$output_matched_peaks"
18 #if str($output_files_cond.output_files) in ["all", "matched_peaks_unmatched_peaks"]:
19 --output_unmatched_peaks "$output_unmatched_peaks"
20 #end if
21 #if str($output_files_cond.output_files) =="all":
22 --output_detail "$output_detail"
23 --output_statistics_table "$output_statistics_table"
24 --output_statistics_histogram "$output_statistics_histogram"
25 #end if
26 #if str($advanced_options_cond.advanced_options) == "on":
27 --step $advanced_options_cond.step
28 --low_limit $advanced_options_cond.low_limit
29 --up_limit $advanced_options_cond.up_limit
30 #end if
31 </command>
32 <inputs>
33 <param name="input" type="data" format="gff" multiple="True" min="2" label="Match paired peaks on" />
34 <param name="method" type="select" label="Method of finding match">
35 <option value="closest" selected="True">Closest</option>
36 <option value="largest">Largest</option>
37 <option value="all">All</option>
38 </param>
39 <param name="distance" type="integer" value="50" min="0" label="Maximum distance between peaks in different replicates to allow merging" />
40 <param name="replicates" type="integer" value="2" min="2" label="Minimum number of replicates that must be matched for merging to occur" />
41 <conditional name="output_files_cond">
42 <param name="output_files" type="select" label="Select output" help="Statistics will always be generated." >
43 <option value="all" selected="True">everything</option>
44 <option value="matched_peaks">matched paired peaks only</option>
45 <option value="matched_peaks_unmatched_peaks">matched paired peaks and unmatched paired peaks only</option>
46 </param>
47 <when value="matched_peaks" />
48 <when value="matched_peaks_unmatched_peaks" />
49 <when value="all" />
50 </conditional>
51 <conditional name="advanced_options_cond">
52 <param name="advanced_options" type="select" label="Advanced options">
53 <option value="off" selected="true">Hide advanced options</option>
54 <option value="on">Display advanced options</option>
55 </param>
56 <when value="on">
57 <param name="step" type="integer" value="0" min="0" label="Step size" help="Distance for each iteration" />
58 <param name="low_limit" type="integer" value="-1000" label="Lower limit for Crick-Watson distance filter" />
59 <param name="up_limit" type="integer" value="1000" label="Upper limit for Crick-Watson distance filter" />
60 </when>
61 <when value="off" />
62 </conditional>
63 </inputs>
64 <outputs>
65 <data name="output_statistics_table" format="tabular" label="Statistics Table: ${tool.name} on ${on_string}">
66 <filter>output_files_cond["output_files"] == "all"</filter>
67 </data>
68 <data name="output_statistics_histogram" format="pdf" label="Statistics Histogram: ${tool.name} on ${on_string}">
69 <filter>output_files_cond["output_files"] == "all"</filter>
70 </data>
71 <data name="output_detail" format="tabular" label="Data D: ${tool.name} on ${on_string}">
72 <filter>output_files_cond["output_files"] == "all"</filter>
73 </data>
74 <data name="output_unmatched_peaks" format="tabular" label="Data UP: ${tool.name} on ${on_string}">
75 <filter>output_files_cond["output_files"] in ["all", "matched_peaks_unmatched_peaks"]</filter>
76 </data>
77 <data name="output_matched_peaks" format="gff" label="Data MP: ${tool.name} on ${on_string}" />
78 </outputs>
79 <tests>
80 <param name="input" value="closest_matched_pairs_input1.gff" ftype="gff" />
81 <param name="input" value="largest_matched_pairs_input1.gff" ftype="gff" />
82 <param name="method" value="closest" />
83 <param name="distance" value="50" />
84 <param name="replicates" value="2" />
85 <param name="output_files" value="all" />
86 <param name="step" value="0" />
87 <param name="low_limit" value="-1000" />
88 <param name="up_limit" value="1000" />
89 <output name="output_statistics_table" file="statistics_table_out1.tabular" ftype="tabular" />
90 <output name="output_statistics_histogram" file="statistics_histogram_out1.pdf" ftype="pdf" compare="sim_size" />
91 <output name="output_detail" file="detail_out1.tabular" ftype="tabular" />
92 <output name="output_unmatched_peaks" file="unmatched_peaks_out1.tabular" ftype="tabular" />
93 <output name="output_matched_peaks" file="matched_peaks_out1.gff" ftype="gff" />
94 </tests>
95 <help>
96 **What it does**
97
98 RepMatch accepts two or more input datasets, and starts by defining peak-pair midpoints in the first dataset. It then
99 discovers all peak-pair midpoints in the second dataset that are within the distance, defined by the tool's **Maximum
100 distance between peaks in different replicates to allow merging** parameter, from the peak-pair midpoint coordinate in
101 the first dataset. When encountering multiple candidates to match (one-to-many), RepMatch uses the method defined by
102 the tool's **Method of finding match** parameter so that there is at most only a one-to-one match across the two datasets.
103 This method provides the following options:
104
105 * **closest** - matches only the closest one in bp distance.
106 * **largest** - matches the one that contain the most number of reads.
107 * **all** - both methods are run separately.
108
109 RepMatch matching is an iterative process, as it attempts to find the centroid coordinate amongst all replicates. As such,
110 the centroid is the point of reference for "distqnce" and "closest". This process can be sped up by increasing the tool's
111 **Step size** parameter.
112
113 The minimum number of replicates that can be matched for a match to occur is defined by the tool's **Minimum number of
114 replicates that must be matched for merging to occur** parameter. Additional filters can be applied using the tool's
115 **Advanced options**, including a lower and upper limit for the C-W distance.
116
117 .. image:: $PATH_TO_IMAGES/repmatch.png
118
119 -----
120
121 **Options**
122
123 * **Distance** - Maximum distance for discovering all peak-pair midpoints in a second dataset relative to the peak-pair midpoints in the first dataset
124 * **Method** - Method to use when encountering multiple candidates to match so that there is at most only a one-to-one match across the two datasets.
125 * **Step Size** - Distance for each iteration.
126 * **Replicates** - Minimum number of replicates that can be matched for a match to occur. This value must be at least 2.
127 * **Lower Limit** - Lower limit for the Crick-Watson distance filter.
128 * **Upper Limit** - Upper limit for the Crick-Watson distance filter.
129
130 -----
131
132 **Output Data Files**
133
134 * **Data MP** - gff file consisting of only peak pairs
135
136 - Columns are **chr**, **script**, **blank**, **peak start**, **peak end**, **blank**, **normalized tag counts**, **blank** and **info**.
137 - Peak start and end are separated by one coordinate.
138 - Normalized tag is the occupancy averaged across replicates.
139 - Attributes include C-W distance, sum total of tag counts, number of replicates merged.
140
141 * **Data D** - tabular file consisting of the list of all matched replicates.
142 * **Data UP** - tabular file consisting of all unmatched peak-pairs.
143
144 **Output Statistics Files**
145
146 * **Statistics Table** - tabular file providing the description key of **Data D**.
147 * **Statistics Histogram** - graph of the number of matched locations having the indicated replicate counts.
148
149 **Comments on Replicates**
150
151 Three types of replicates may be considered. Biological replicates represent independently collected biological samples.
152 At least two biological replicate must be performed for each experiment from which a conclusion is being drawn, and the
153 conclusion must be evident in both biological replicates when analyzed separately. Technical replicates represent a re-run
154 of the assay on the same biological material. This is usually done when one replicate fails to produce quality data, and is
155 used to replace that earlier replicate. Sequencing replicates represent additional sequencing of the same successful library
156 in order to obtain more reads should the analysis require it. The reads from individual sequencing replicates are usually
157 merged without need for separate analysis.
158
159 </help>
160 <expand macro="citations" />
161 </tool>