Mercurial > repos > iuc > repmatch_gff3
comparison repmatch_gff3.xml @ 0:a072f0f30ea3 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/repmatch_gff3 commit 0e04a4c237677c1f5be1950babcf8591097996a9
author | iuc |
---|---|
date | Wed, 23 Dec 2015 09:25:42 -0500 |
parents | |
children | e5c7fffdc078 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:a072f0f30ea3 |
---|---|
1 <?xml version="1.0"?> | |
2 <tool id="repmatch_gff3" name="RepMatch" version="@WRAPPER_VERSION@.0"> | |
3 <description>Match paired peaks from two or more replicates</description> | |
4 <macros> | |
5 <import>repmatch_gff3_macros.xml</import> | |
6 </macros> | |
7 <expand macro="requirements" /> | |
8 <command> | |
9 python $__tool_directory__/repmatch_gff3.py | |
10 #for $i in $input: | |
11 --input "${i}" "${i.hid}" | |
12 #end for | |
13 --method $method | |
14 --distance $distance | |
15 --replicates $replicates | |
16 --output_files $output_files_cond.output_files | |
17 --output_matched_peaks "$output_matched_peaks" | |
18 #if str($output_files_cond.output_files) in ["all", "matched_peaks_unmatched_peaks"]: | |
19 --output_unmatched_peaks "$output_unmatched_peaks" | |
20 #end if | |
21 #if str($output_files_cond.output_files) =="all": | |
22 --output_detail "$output_detail" | |
23 --output_statistics_table "$output_statistics_table" | |
24 --output_statistics_histogram "$output_statistics_histogram" | |
25 #end if | |
26 #if str($advanced_options_cond.advanced_options) == "on": | |
27 --step $advanced_options_cond.step | |
28 --low_limit $advanced_options_cond.low_limit | |
29 --up_limit $advanced_options_cond.up_limit | |
30 #end if | |
31 </command> | |
32 <inputs> | |
33 <param name="input" type="data" format="gff" multiple="True" min="2" label="Match paired peaks on" /> | |
34 <param name="method" type="select" label="Method of finding match"> | |
35 <option value="closest" selected="True">Closest</option> | |
36 <option value="largest">Largest</option> | |
37 <option value="all">All</option> | |
38 </param> | |
39 <param name="distance" type="integer" value="50" min="0" label="Maximum distance between peaks in different replicates to allow merging" /> | |
40 <param name="replicates" type="integer" value="2" min="2" label="Minimum number of replicates that must be matched for merging to occur" /> | |
41 <conditional name="output_files_cond"> | |
42 <param name="output_files" type="select" label="Select output" help="Statistics will always be generated." > | |
43 <option value="all" selected="True">everything</option> | |
44 <option value="matched_peaks">matched paired peaks only</option> | |
45 <option value="matched_peaks_unmatched_peaks">matched paired peaks and unmatched paired peaks only</option> | |
46 </param> | |
47 <when value="matched_peaks" /> | |
48 <when value="matched_peaks_unmatched_peaks" /> | |
49 <when value="all" /> | |
50 </conditional> | |
51 <conditional name="advanced_options_cond"> | |
52 <param name="advanced_options" type="select" label="Advanced options"> | |
53 <option value="off" selected="true">Hide advanced options</option> | |
54 <option value="on">Display advanced options</option> | |
55 </param> | |
56 <when value="on"> | |
57 <param name="step" type="integer" value="0" min="0" label="Step size" help="Distance for each iteration" /> | |
58 <param name="low_limit" type="integer" value="-1000" label="Lower limit for Crick-Watson distance filter" /> | |
59 <param name="up_limit" type="integer" value="1000" label="Upper limit for Crick-Watson distance filter" /> | |
60 </when> | |
61 <when value="off" /> | |
62 </conditional> | |
63 </inputs> | |
64 <outputs> | |
65 <data name="output_statistics_table" format="tabular" label="Statistics Table: ${tool.name} on ${on_string}"> | |
66 <filter>output_files_cond["output_files"] == "all"</filter> | |
67 </data> | |
68 <data name="output_statistics_histogram" format="pdf" label="Statistics Histogram: ${tool.name} on ${on_string}"> | |
69 <filter>output_files_cond["output_files"] == "all"</filter> | |
70 </data> | |
71 <data name="output_detail" format="tabular" label="Data D: ${tool.name} on ${on_string}"> | |
72 <filter>output_files_cond["output_files"] == "all"</filter> | |
73 </data> | |
74 <data name="output_unmatched_peaks" format="tabular" label="Data UP: ${tool.name} on ${on_string}"> | |
75 <filter>output_files_cond["output_files"] in ["all", "matched_peaks_unmatched_peaks"]</filter> | |
76 </data> | |
77 <data name="output_matched_peaks" format="gff" label="Data MP: ${tool.name} on ${on_string}" /> | |
78 </outputs> | |
79 <tests> | |
80 <param name="input" value="closest_matched_pairs_input1.gff" ftype="gff" /> | |
81 <param name="input" value="largest_matched_pairs_input1.gff" ftype="gff" /> | |
82 <param name="method" value="closest" /> | |
83 <param name="distance" value="50" /> | |
84 <param name="replicates" value="2" /> | |
85 <param name="output_files" value="all" /> | |
86 <param name="step" value="0" /> | |
87 <param name="low_limit" value="-1000" /> | |
88 <param name="up_limit" value="1000" /> | |
89 <output name="output_statistics_table" file="statistics_table_out1.tabular" ftype="tabular" /> | |
90 <output name="output_statistics_histogram" file="statistics_histogram_out1.pdf" ftype="pdf" compare="sim_size" /> | |
91 <output name="output_detail" file="detail_out1.tabular" ftype="tabular" /> | |
92 <output name="output_unmatched_peaks" file="unmatched_peaks_out1.tabular" ftype="tabular" /> | |
93 <output name="output_matched_peaks" file="matched_peaks_out1.gff" ftype="gff" /> | |
94 </tests> | |
95 <help> | |
96 **What it does** | |
97 | |
98 RepMatch accepts two or more input datasets, and starts by defining peak-pair midpoints in the first dataset. It then | |
99 discovers all peak-pair midpoints in the second dataset that are within the distance, defined by the tool's **Maximum | |
100 distance between peaks in different replicates to allow merging** parameter, from the peak-pair midpoint coordinate in | |
101 the first dataset. When encountering multiple candidates to match (one-to-many), RepMatch uses the method defined by | |
102 the tool's **Method of finding match** parameter so that there is at most only a one-to-one match across the two datasets. | |
103 This method provides the following options: | |
104 | |
105 * **closest** - matches only the closest one in bp distance. | |
106 * **largest** - matches the one that contain the most number of reads. | |
107 * **all** - both methods are run separately. | |
108 | |
109 RepMatch matching is an iterative process, as it attempts to find the centroid coordinate amongst all replicates. As such, | |
110 the centroid is the point of reference for "distqnce" and "closest". This process can be sped up by increasing the tool's | |
111 **Step size** parameter. | |
112 | |
113 The minimum number of replicates that can be matched for a match to occur is defined by the tool's **Minimum number of | |
114 replicates that must be matched for merging to occur** parameter. Additional filters can be applied using the tool's | |
115 **Advanced options**, including a lower and upper limit for the C-W distance. | |
116 | |
117 .. image:: $PATH_TO_IMAGES/repmatch.png | |
118 | |
119 ----- | |
120 | |
121 **Options** | |
122 | |
123 * **Distance** - Maximum distance for discovering all peak-pair midpoints in a second dataset relative to the peak-pair midpoints in the first dataset | |
124 * **Method** - Method to use when encountering multiple candidates to match so that there is at most only a one-to-one match across the two datasets. | |
125 * **Step Size** - Distance for each iteration. | |
126 * **Replicates** - Minimum number of replicates that can be matched for a match to occur. This value must be at least 2. | |
127 * **Lower Limit** - Lower limit for the Crick-Watson distance filter. | |
128 * **Upper Limit** - Upper limit for the Crick-Watson distance filter. | |
129 | |
130 ----- | |
131 | |
132 **Output Data Files** | |
133 | |
134 * **Data MP** - gff file consisting of only peak pairs | |
135 | |
136 - Columns are **chr**, **script**, **blank**, **peak start**, **peak end**, **blank**, **normalized tag counts**, **blank** and **info**. | |
137 - Peak start and end are separated by one coordinate. | |
138 - Normalized tag is the occupancy averaged across replicates. | |
139 - Attributes include C-W distance, sum total of tag counts, number of replicates merged. | |
140 | |
141 * **Data D** - tabular file consisting of the list of all matched replicates. | |
142 * **Data UP** - tabular file consisting of all unmatched peak-pairs. | |
143 | |
144 **Output Statistics Files** | |
145 | |
146 * **Statistics Table** - tabular file providing the description key of **Data D**. | |
147 * **Statistics Histogram** - graph of the number of matched locations having the indicated replicate counts. | |
148 | |
149 **Comments on Replicates** | |
150 | |
151 Three types of replicates may be considered. Biological replicates represent independently collected biological samples. | |
152 At least two biological replicate must be performed for each experiment from which a conclusion is being drawn, and the | |
153 conclusion must be evident in both biological replicates when analyzed separately. Technical replicates represent a re-run | |
154 of the assay on the same biological material. This is usually done when one replicate fails to produce quality data, and is | |
155 used to replace that earlier replicate. Sequencing replicates represent additional sequencing of the same successful library | |
156 in order to obtain more reads should the analysis require it. The reads from individual sequencing replicates are usually | |
157 merged without need for separate analysis. | |
158 | |
159 </help> | |
160 <expand macro="citations" /> | |
161 </tool> |