view hicPCA.xml @ 14:3b5b54ed6392 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hicexplorer commit 2a0943e78bdc8ebb13f181399206a9eea37ed78f"
author iuc
date Tue, 16 Mar 2021 15:05:22 +0000
parents c435572f7cf1
children 41dbf4d162a2
line wrap: on
line source

<tool id="hicexplorer_hicpca" name="@BINARY@" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@">
    <description>compute the principal components for A / B compartment analysis</description>
    <macros>
        <token name="@BINARY@">hicPCA</token>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements" />
    <command detect_errors="exit_code"><![CDATA[
        ln -s '$matrix_h5_cooler' 'matrix.$matrix_h5_cooler.ext' &&
        @BINARY@ --matrix 'matrix.$matrix_h5_cooler.ext'

        @CHROMOSOME_LIST@

        --outputFileName pca1.$outputFormat pca2.$outputFormat
        --format $outputFormat

        #if $ligation_factor:
            $ligation_factor
        #end if

        #if $ignoreMaskedBins:
            $ignoreMaskedBins
        #end if

        #if $extra_track_conditional.extra_track_selection == 'gene_density':
            --extraTrack '$extra_track_conditional.extraTrack'
        #elif $extra_track_conditional.extra_track_selection == 'histon_mark':
            --extraTrack '$extra_track_conditional.extraTrack'
            --histonMarkType '$extra_track_conditional.histonMarkType'
        #end if

        #if $pearsonMatrix:
            --pearsonMatrix pearson.$matrix_h5_cooler.ext
        #end if

        #if $obsexpMatrix:
            --obsexpMatrix obsexp.$matrix_h5_cooler.ext
        #end if

        && mv pca1.$outputFormat pca1
        && mv pca2.$outputFormat pca2
        #if $pearsonMatrix:
            && mv pearson.$matrix_h5_cooler.ext pearson
        #end if

        #if $obsexpMatrix:
            && mv obsexp.$matrix_h5_cooler.ext obsexp
        #end if

]]>
    </command>
    <inputs>
        <expand macro='matrix_h5_cooler_macro' />
        <param name='outputFormat' type='select' label="Output file format">
            <option value='bigwig' selected="true">bigwig</option>
            <option value="bedgraph">bedgraph</option>
        </param>

        <expand macro="chromosome_list" />

        <param name='ligation_factor' type='boolean' truevalue='--ligation_factor' label='Multiplies a scaling factor to each entry of the expected matrix to take care of the proximity ligation' />
        <param name='ignoreMaskedBins' type='boolean' truevalue='--ignoreMaskedBins' label='This option removes the masked bins before the PCA is computed' />

        <conditional name="extra_track_conditional">
            <param name='extra_track_selection' label='Extra track type' type='select'>
                <option value='' selected='true'>No track</option>
                <option value='gene_density'>Gene density</option>
                <option value='histon_mark'>Histon mark coverage</option>
            </param>
            <when value='gene_density' >
                <param name='extraTrack' type='data' format='bed' label='Correlate PCA with e.g. gene density or histon marks to flip sign' />
            </when>
            <when value='histon_mark'>
                <param name='extraTrack' type='data' format='bed,bigwig' label='Correlate PCA with e.g. gene density or histon marks to flip sign' />
                <param name='histonMarkType' type='select' label="Histon mark type">
                    <option value='active' selected="true">active</option>
                    <option value="inactive">inactive</option>
                </param>
            </when>
            <when value='' />
        </conditional>
        <param argument='--pearsonMatrix' type='boolean' truevalue='--pearsonMatrix' falsevalue='' label='Return internally used Pearson matrix' />
        <param argument='--obsexpMatrix' type='boolean' truevalue='--obsexpMatrix' falsevalue='' label='Return internally used observed / expected matrix' />

    </inputs>
    <outputs>
        <data name="pca1" from_work_dir="pca1" format="bigwig" label="${tool.name} on ${matrix_h5_cooler.name} [${on_string}]: PC1">
            <change_format>
                <when input="outputFormat" value="bedgraph" format="bedgraph" />
            </change_format>
        </data>
        <data name="pca2" from_work_dir="pca2" format="bigwig" label="${tool.name} on ${matrix_h5_cooler.name} [${on_string}]: PC2">
            <change_format>
                <when input="outputFormat" value="bedgraph" format="bedgraph" />
            </change_format>
        </data>

        <data name="pearson_outfile" from_work_dir="pearson" format="cool">
            <filter>pearsonMatrix</filter>
            <change_format>
                <when input_dataset="matrix_h5_cooler" attribute="ext" value="h5" format="h5" />
            </change_format>
        </data>
        <data name="obsexp_outfile" from_work_dir="obsexp" format="cool">
            <filter>obsexpMatrix</filter>
            <change_format>
                <when input_dataset="matrix_h5_cooler" attribute="ext" value="h5" format="h5" />
            </change_format>
        </data>
    </outputs>
    <tests>
        <test>
            <param name="matrix_h5_cooler" value="small_test_matrix.cool" />
            <param name="outputFormat" value="bigwig" />
            <conditional name="extra_track_conditional">
                <param name="extra_track_selection" value="" />
            </conditional>

            <output name="pca1" file="hicPCA/pca1_test1.bw" ftype="bigwig" compare="sim_size" delta='40000' />
            <output name="pca2" file="hicPCA/pca2_test1.bw" ftype="bigwig" compare="sim_size" delta='40000' />
        </test>
        <test>
            <param name="matrix_h5_cooler" value="small_test_matrix.cool" />
            <param name="outputFormat" value="bigwig" />
            <param name="ligation_factor" value="True" />

            <conditional name="extra_track_conditional">
                <param name="extra_track_selection" value="" />
            </conditional>

            <output name="pca1" file="hicPCA/pca1_test1.bw" ftype="bigwig" compare="sim_size" delta='40000' />
            <output name="pca2" file="hicPCA/pca2_test1.bw" ftype="bigwig" compare="sim_size" delta='40000' />
        </test>
        <test>
            <param name="matrix_h5_cooler" value="small_test_matrix.cool" />
            <param name="outputFormat" value="bigwig" />
            <param name="ignoreMaskedBins" value="True" />

            <conditional name="extra_track_conditional">
                <param name="extra_track_selection" value="" />
            </conditional>

            <output name="pca1" file="hicPCA/pca1_test1.bw" ftype="bigwig" compare="sim_size" delta='60000' />
            <output name="pca2" file="hicPCA/pca2_test1.bw" ftype="bigwig" compare="sim_size" delta='60000' />
        </test>
        <test>
            <param name="matrix_h5_cooler" value="small_test_matrix.h5" />
            <param name="outputFormat" value="bigwig" />
            <conditional name="extra_track_conditional">
                <param name="extra_track_selection" value="gene_density" />
                <param name="extraTrack" value="dm3_genes.bed.gz" />
            </conditional>

            <repeat name="chromosome_list">
                <param name="chromosomes" value="chrX" />
            </repeat>
            <repeat name="chromosome_list">
                <param name="chromosomes" value="chrXHet" />
            </repeat>

            <output name="pca1" file="hicPCA/pca1_test2.bw" ftype="bigwig" compare="sim_size" delta='40000' />
            <output name="pca2" file="hicPCA/pca2_test2.bw" ftype="bigwig" compare="sim_size" delta='40000' />
        </test>
        <test>
            <param name="matrix_h5_cooler" value="small_test_matrix.h5" />

            <param name="outputFormat" value="bigwig" />
            <conditional name="extra_track_conditional">
                <param name="extra_track_selection" value="" />
            </conditional>
            <param name='pearsonMatrix' value='True' />
            <param name='obsexpMatrix' value='True' />

            <repeat name="chromosome_list">
                <param name="chromosomes" value="chrX" />
            </repeat>
            <repeat name="chromosome_list">
                <param name="chromosomes" value="chrXHet" />
            </repeat>

            <output name="pca1" file="hicPCA/pca1_test3.bw" ftype="bigwig" compare="sim_size" delta='40000' />
            <output name="pca2" file="hicPCA/pca2_test3.bw" ftype="bigwig" compare="sim_size" delta='40000' />

            <output name="pearson_outfile" ftype="h5">
                <assert_contents>
                    <has_h5_keys keys='intervals,matrix' />
                </assert_contents>
            </output>
            <output name="obsexp_outfile" ftype="h5">
                <assert_contents>
                    <has_h5_keys keys='intervals,matrix' />
                </assert_contents>
            </output>
        </test>
    </tests>
    <help><![CDATA[

Principal component analysis
============================

`Lieberman-Aiden et al. (2009)`_ demonstrated that open and closed chromatin domains throughout the genome occupy different spatial compartments in the nucleus, defined as A (activate) and B (inactive) compartments.

**hicPCA** computes two eigenvector files based on the input matrix for an A / B compartment analysis following the computation steps detailed by `Lieberman-Aiden et al. (2009)`_: the transformation of the contact matrix
into an observed vs. expected matrix and consecutively a Pearson correlation matrix shows a plaid pattern. These plaid pattern are called A and B. Applying a PCA on the Pearson correlation matrix gives the eigenvectors
and Lieberman-Aiden shows that the values of the eigenvectors correspond to the distribution of genes and with features of open and closed chromatin. In some cases the first principal component corresponds to the two
chromosomes arms and the second eigenvector to the plaid pattern. Therefore always the first two principal components needs to be returned and investigated.

_________________

Usage
-----

This tool must be used on Hi-C contact matrices with large bins (over 20kb) using ``hicMergeMatrixBins`` and corrected with ``hicCorrectMatrix``. Using matrices with a too high resolution (small bins or at restriction enzyme resolution) might take several days to run (even with over 100 CPU) or will fail due to memory limitations.

_________________

Output
------
Two files are outputed by **hicPCA**, one with the first (pca1) and one with the second (pca2) eigenvector as bigwig or bedgraph. These files can be plotted alongside Hi-C heatmaps, gene density or external datasets such as open chromatin or histone marks enrichment using ``pyGenomeTracks`` or ``hicPlotMatrix``.

For example, below you can find a ``hicPlotMatrix`` of the Pearson correlation matrix derived from a contact matrix for chromosome 6 in mouse computed with ``hicTransform`` (which is part of A/B compartments computation). The optional data track at the bottom shows the first eigenvector for A/B compartment obtained using **hicPCA**.

.. image:: $PATH_TO_IMAGES/hicPCA.png
   :scale: 35 %

_________________

| For more information about HiCExplorer please consider our documentation on readthedocs.io_

.. _readthedocs.io: http://hicexplorer.readthedocs.io/en/latest/index.html
.. _`Lieberman-Aiden et al. (2009)`: https://doi.org/10.1126%2Fscience.1181369
]]>    </help>
    <expand macro="citations" />
</tool>