view WeightedAverage.xml @ 2:efa2b391e887 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/weightedaverage commit f770c3c58f1e7e1fa5ed22d7f7aca856d36729e8
author devteam
date Wed, 05 Oct 2016 13:39:38 -0400
parents 90611e86a998
children
line wrap: on
line source

<tool id="wtavg" name="Assign weighted-average" version="1.0.1">
  <description> of the values of features overlapping an interval </description>
  <requirements>
    <requirement type="package" version="1.0.0">galaxy-ops</requirement>
    <requirement type="package" version="0.7.1">bx-python</requirement>
  </requirements>
  <command interpreter="python">WeightedAverage.py  $genomic_interval $genomic_feature $out_file1 -1 ${genomic_interval.metadata.chromCol},${genomic_interval.metadata.startCol},${genomic_interval.metadata.endCol},${genomic_interval.metadata.strandCol} -2 ${genomic_feature.metadata.chromCol},${genomic_feature.metadata.startCol},${genomic_feature.metadata.endCol},${genomic_feature.metadata.strandCol},${genomic_feature.metadata.nameCol} ${allow_zeros}</command>

  <inputs>
    <param format="interval" name="genomic_interval" type="data" label="Genomic intervals (first dataset)" help="Dataset missing? See Note below."/>
    <param format="interval" name="genomic_feature" label="Genomic features (second dataset)" type="data" help="Make sure the value column is specified. See Note below." />
    <param name="allow_zeros" label="Include zeros in calculations" type="boolean" help="Otherwise, skip them as bad values" truevalue="--allow_zeros" falsevalue="" checked="True" />
  </inputs>
  <outputs>
    <data format="input" name="out_file1" metadata_source="genomic_interval" />
  </outputs>
  <tests>
    <!-- Test data with valid values -->
    <test>
      <param name="genomic_interval" value="interval_interpolate.bed"/>
      <param name="genomic_feature" value="value_interpolate.bed"/>
      <param name="allow_zeros" value="False" />
      <output name="out_file1" file="interpolate_result.bed"/>
    </test>
    <test>
      <param name="genomic_interval" value="interval_interpolate.bed"/>
      <param name="genomic_feature" value="value_interpolate.bed"/>
      <param name="allow_zeros" value="True" />
      <output name="out_file1" file="interpolate_result_zeros.bed"/>
    </test>
  </tests>
  <help>


.. class:: infomark

**What it does**

For each interval in your first dataset, this tool calculates the weighted average value of the overlapping features in your second dataset.

- When a genomic interval partially or totally overlaps a single genomic feature, the value of that genomic feature is assigned to the genomic interval.
- When a genomic interval partially or totally overlaps with more than one genomic features, the average of the values of the overlapping genomic features weighted by the corresponding number of overlapping bases is assigned to the genomic interval.
- When a genomic interval does not overlap with any genomic feature, 'NA' will be assigned as it's value.

-----

.. class:: warningmark

**Note**

The input datasets should be in **bed** or **interval** format. Please use "edit attributes"/pencil icon to specify the column containing the values for the features in the second dataset as **name/identifier** column.

The output will contain all the columns in the first input plus a new column containing the assigned value for each interval.

-----

**Example**

- Suppose our first dataset contains the following **genomic intervals**::
    
    chr  start stop
    chr1 1000  2000
    chr1 3000  5000
    chr1 8000  9000
  
- and our second dataset contains the following **genomic features** each having an associated value (in fourth column) ::

    chr  start stop name
    chr1 900   1200 0.5
    chr1 2900  3100 0.2
    chr1 4800  5100 0.8
  
- For each **genomic interval** in our first dataset, this tool calculates the weighted average value of the overlapping **genomic features** in our second dataset ::

    chr1 1000  2000 0.5
    chr1 3000  5000 0.6
    chr1 8000  9000 NA
  


</help>
</tool>