repmatch_gff3: repmatch_gff3.xml comparison

comparison repmatch_gff3.xml @ 0:a072f0f30ea3 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/repmatch_gff3 commit 0e04a4c237677c1f5be1950babcf8591097996a9

author	iuc
date	Wed, 23 Dec 2015 09:25:42 -0500
parents
children	e5c7fffdc078

comparison

equal deleted inserted replaced

--1:000000000000
+:a072f0f30ea3
+<?xml version="1.0"?>
+<tool id="repmatch_gff3" name="RepMatch" version="@WRAPPER_VERSION@.0">
+<description>Match paired peaks from two or more replicates</description>
+<macros>
+<import>repmatch_gff3_macros.xml</import>
+</macros>
+<expand macro="requirements" />
+<command>
+python $__tool_directory__/repmatch_gff3.py
+#for $i in $input:
+--input "${i}" "${i.hid}"
+#end for
+--method $method
+--distance $distance
+--replicates $replicates
+--output_files $output_files_cond.output_files
+--output_matched_peaks "$output_matched_peaks"
+#if str($output_files_cond.output_files) in ["all", "matched_peaks_unmatched_peaks"]:
+--output_unmatched_peaks "$output_unmatched_peaks"
+#end if
+#if str($output_files_cond.output_files) =="all":
+--output_detail "$output_detail"
+--output_statistics_table "$output_statistics_table"
+--output_statistics_histogram "$output_statistics_histogram"
+#end if
+#if str($advanced_options_cond.advanced_options) == "on":
+--step $advanced_options_cond.step
+--low_limit $advanced_options_cond.low_limit
+--up_limit $advanced_options_cond.up_limit
+#end if
+</command>
+<inputs>
+<param  name="input" type="data" format="gff" multiple="True" min="2" label="Match paired peaks on" />
+<param name="method" type="select" label="Method of finding match">
+<option value="closest" selected="True">Closest</option>
+<option value="largest">Largest</option>
+<option value="all">All</option>
+</param>
+<param name="distance" type="integer" value="50" min="0" label="Maximum distance between peaks in different replicates to allow merging" />
+<param name="replicates" type="integer" value="2" min="2" label="Minimum number of replicates that must be matched for merging to occur" />
+<conditional name="output_files_cond">
+<param name="output_files" type="select" label="Select output" help="Statistics will always be generated." >
+<option value="all" selected="True">everything</option>
+<option value="matched_peaks">matched paired peaks only</option>
+<option value="matched_peaks_unmatched_peaks">matched paired peaks and unmatched paired peaks only</option>
+</param>
+<when value="matched_peaks" />
+<when value="matched_peaks_unmatched_peaks" />
+<when value="all" />
+</conditional>
+<conditional name="advanced_options_cond">
+<param name="advanced_options" type="select" label="Advanced options">
+<option value="off" selected="true">Hide advanced options</option>
+<option value="on">Display advanced options</option>
+</param>
+<when value="on">
+<param name="step" type="integer" value="0" min="0" label="Step size" help="Distance for each iteration" />
+<param name="low_limit" type="integer" value="-1000" label="Lower limit for Crick-Watson distance filter" />
+<param name="up_limit" type="integer" value="1000" label="Upper limit for Crick-Watson distance filter" />
+</when>
+<when value="off" />
+</conditional>
+</inputs>
+<outputs>
+<data name="output_statistics_table" format="tabular" label="Statistics Table: ${tool.name} on ${on_string}">
+<filter>output_files_cond["output_files"] == "all"</filter>
+</data>
+<data name="output_statistics_histogram" format="pdf" label="Statistics Histogram: ${tool.name} on ${on_string}">
+<filter>output_files_cond["output_files"] == "all"</filter>
+</data>
+<data name="output_detail" format="tabular" label="Data D: ${tool.name} on ${on_string}">
+<filter>output_files_cond["output_files"] == "all"</filter>
+</data>
+<data name="output_unmatched_peaks" format="tabular" label="Data UP: ${tool.name} on ${on_string}">
+<filter>output_files_cond["output_files"] in ["all", "matched_peaks_unmatched_peaks"]</filter>
+</data>
+<data name="output_matched_peaks" format="gff" label="Data MP: ${tool.name} on ${on_string}" />
+</outputs>
+<tests>
+<param name="input" value="closest_matched_pairs_input1.gff" ftype="gff" />
+<param name="input" value="largest_matched_pairs_input1.gff" ftype="gff" />
+<param name="method" value="closest" />
+<param name="distance" value="50" />
+<param name="replicates" value="2" />
+<param name="output_files" value="all" />
+<param name="step" value="0" />
+<param name="low_limit" value="-1000" />
+<param name="up_limit" value="1000" />
+<output name="output_statistics_table" file="statistics_table_out1.tabular" ftype="tabular" />
+<output name="output_statistics_histogram" file="statistics_histogram_out1.pdf" ftype="pdf" compare="sim_size" />
+<output name="output_detail" file="detail_out1.tabular" ftype="tabular" />
+<output name="output_unmatched_peaks" file="unmatched_peaks_out1.tabular" ftype="tabular" />
+<output name="output_matched_peaks" file="matched_peaks_out1.gff" ftype="gff" />
+</tests>
+<help>
+**What it does**
+RepMatch accepts two or more input datasets, and starts by defining peak-pair midpoints in the first dataset.  It then
+discovers all peak-pair midpoints in the second dataset that are within the distance, defined by the tool's **Maximum
+distance between peaks in different replicates to allow merging** parameter, from the peak-pair midpoint coordinate in
+the first dataset.  When encountering multiple candidates to match (one-to-many), RepMatch uses the method defined by
+the tool's **Method of finding match** parameter so that there is at most only a one-to-one match across the two datasets.
+This method provides the following options:
+* **closest** - matches only the closest one in bp distance.
+* **largest** -  matches the one that contain the most number of reads.
+* **all** -  both methods are run separately.
+RepMatch matching is an iterative process, as it attempts to find the centroid coordinate amongst all replicates. As such,
+the centroid is the point of reference for "distqnce" and "closest".  This process can be sped up by increasing the tool's
+**Step size** parameter.
+The minimum number of replicates that can be matched for a match to occur is defined by the tool's **Minimum number of
+replicates that must be matched for merging to occur** parameter.  Additional filters can be applied using the tool's
+**Advanced options**, including a lower and upper limit for the C-W distance.
+.. image:: $PATH_TO_IMAGES/repmatch.png
+-----
+**Options**
+* **Distance** - Maximum distance for discovering all peak-pair midpoints in a second dataset relative to the peak-pair midpoints in the first dataset
+* **Method** - Method to use when encountering multiple candidates to match so that there is at most only a one-to-one match across the two datasets.
+* **Step Size** - Distance for each iteration.
+* **Replicates** - Minimum number of replicates that can be matched for a match to occur.  This value must be at least 2.
+* **Lower Limit** - Lower limit for the Crick-Watson distance filter.
+* **Upper Limit** - Upper limit for the Crick-Watson distance filter.
+-----
+**Output Data Files**
+* **Data MP** - gff file consisting of only peak pairs
+- Columns are **chr**, **script**, **blank**, **peak start**, **peak end**, **blank**, **normalized tag counts**, **blank** and **info**.
+- Peak start and end are separated by one coordinate.
+- Normalized tag is the occupancy averaged across replicates.
+- Attributes include C-W distance, sum total of tag counts, number of replicates merged.
+* **Data D** - tabular file consisting of the list of all matched replicates.
+* **Data UP** - tabular file consisting of all unmatched peak-pairs.
+**Output Statistics Files**
+* **Statistics Table** - tabular file providing the description key of **Data D**.
+* **Statistics Histogram** - graph of the number of matched locations having the indicated replicate counts.
+**Comments on Replicates**
+Three types of replicates may be considered.  Biological replicates represent independently collected biological samples.
+At least two biological replicate must be performed for each experiment from which a conclusion is being drawn, and the
+conclusion must be evident in both biological replicates when analyzed separately.  Technical replicates represent a re-run
+of the assay on the same biological material.  This is usually done when one replicate fails to produce quality data, and is
+used to replace that earlier replicate.  Sequencing replicates represent additional sequencing of the same successful library
+in order to obtain more reads should the analysis require it.  The reads from individual sequencing replicates are usually
+merged without need for separate analysis.
+</help>
+<expand macro="citations" />
+</tool>

Mercurial > repos > iuc > repmatch_gff3

comparison repmatch_gff3.xml @ 0:a072f0f30ea3 draft