view tools/human_genome_variation/hilbertvis.xml @ 1:cdcb0ce84a1b

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:45:15 -0500
parents 9071e359b9a3
children
line wrap: on
line source

<tool id="hgv_hilbertvis" name="HVIS" version="1.0.0">
  <description>visualization of genomic data with the Hilbert curve</description>

  <command interpreter="bash">
    hilbertvis.sh $input $output $chromInfo "$chrom" $plot_value.score_col $level $mode
    #if isinstance( $input.datatype, $__app__.datatypes_registry.get_datatype_by_extension('gff').__class__)
      1 4 5 7
    #else
      ${input.metadata.chromCol} ${input.metadata.startCol} ${input.metadata.endCol} ${input.metadata.strandCol}
    #end if
  </command>

  <inputs>
    <param name="input" type="data" format="interval,gff" label="Dataset">
      <validator type="unspecified_build"/>
      <validator type="metadata" check="chromCol" message="chromCol missing"/>
      <validator type="metadata" check="startCol" message="startCol missing"/>
      <validator type="metadata" check="endCol" message="endCol missing"/>
    </param>
    <param name="chrom" type="text" label="Sequence to plot" help="Name of sequence (from the chromosome column in the dataset) to plot.  If left blank, the first sequence in the dataset will be plotted."/>
    <conditional name="plot_value">
      <param name="choice" type="select" label="Value to plot">
        <option value="score" selected="true">Score column from dataset</option>
        <option value="exist">Same value for each base (existence)</option>
      </param>
      <when value="score">
        <param name="score_col" type="data_column" data_ref="input" numerical="true" label="Score column"/>
      </when>
      <when value="exist">
        <param name="score_col" type="hidden" value="-1"/>
      </when>
    </conditional>
    <param name="level" type="integer" value="9" label="Level" help="Level of Hilbert curve.  The resulting image will have 2&lt;sup&gt;level&lt;/sup&gt; by 2&lt;sup&gt;level&lt;/sup&gt; pixels.">
      <validator type="in_range" min="1" message="The level must be an integer &gt;= 1."/>
    </param>
    <param name="mode" type="select" label="Summarization mode" help="Method used to determine a value for a point in the plot which covers multiple values in the input.">
      <option value="max">Maximal value in each bin</option>
      <option value="min">Minimal value in each bin</option>
      <option value="absmax" selected="true">Maximal absolute value in each bin</option>
      <option value="mean">Mean value of each bin</option>
    </param>
  </inputs>

  <outputs>
    <data name="output" format="pdf"/>
  </outputs>

  <tests>
    <test>
      <param name="input" value="hvis_mkar_chr22.tab"/>
      <param name="chrom" value="chr22"/>
      <param name="choice" value="score"/>
      <param name="score_col" value="15"/>
      <param name="level" value="9"/>
      <param name="mode" value="absmax"/>
      <output name="output" file="hvis_mkar_chr22.pdf" compare="sim_size" delta="7168"/>
    </test>
  </tests>

  <help>
**Dataset formats**

The input format is interval_, and the output is an image in PDF format.
(`Dataset missing?`_)

.. _interval: ./static/formatHelp.html#interval
.. _Dataset missing?: ./static/formatHelp.html

-----

**What it does**

HilbertVis uses the Hilbert space-filling curve to visualize the structure of
position-dependent data.  It maps the traditional one-dimensional line
visualization onto a two-dimensional square.  For example, here is a diagram
showing the path of a level-2 Hilbert curve.

.. image:: ./static/images/hilbertvisDiagram.png

The shade of each pixel represents the value for the corresponding bin of
consecutive genomic positions, calculated according to the specified
summarization mode.  The pixels are arranged so that bins that are close
to each other on the data vector are represented by pixels that are close
to each other in the plot.  In particular, adjacent bins are mapped to
adjacent pixels.  Hence, dark spots in a figure represent a peak; the area
of the spot in the two-dimensional plot is proportional to the width of the
peak in the one-dimensional data, and the darkness of the spot corresponds to
the height of the peak.

The input file is in interval format, and typically contains a column with
scores or other numbers, such as conservation scores, SNP density, the
coverage of aligned reads from ChIP-Seq data, etc.

Website: http://www.ebi.ac.uk/huber-srv/hilbert/

-----

**Examples**

Here are some examples from the HilbertVis homepage, using ChIP-Seq data.

.. image:: ./static/images/hilbertvis1.png

-----

.. image:: ./static/images/hilbertvis2.png

-----

**Reference**

Anders S. (2009)
Visualization of genomic data with the Hilbert curve.
Bioinformatics. 25(10):1231-5. Epub 2009 Mar 17.

  </help>
</tool>