view gemini_interactions.xml @ 8:f745f54638e5 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit f7bdf08922aaf4119aefe7041e754a69cf64aebd
author iuc
date Wed, 13 Jul 2022 15:21:00 +0000
parents 7028ca3cac1c
children
line wrap: on
line source

<tool id="gemini_@BINARY@" name="GEMINI @BINARY@" version="@VERSION@">
    <description>Find genes among variants that are interacting partners</description>
    <expand macro="bio_tools"/>
    <macros>
        <import>gemini_macros.xml</import>
        <token name="@BINARY@">interactions</token>
    </macros>
    <expand macro="requirements" />
    <expand macro="stdio">
        <!-- Fail loudly when the user-specified gene is unknown to gemini -->
        <regex match="Gene name not found or gene not in interaction file"
               source="stderr"
               level="fatal"
               description="The gene you specified is not defined in the interaction file" />
    </expand>
    <expand macro="version_command" />
    <command>
<![CDATA[
        #if str($interactions.source) == 'preinstalled':
            #set $annotation_databases = $interactions.annotation_databases
            @PROVIDE_ANNO_DATA@
        #end if

        gemini
            #set $gene = str($gene).strip()
            #if $gene:
                interactions -g '$gene'
            #else:
                ## lof interactions is a separate command line tool
                lof_interactions
            #end if

            #if str($interactions.source) == 'history':
                --edges '${interactions.data}'
            #end if

            -r $radius
            $variant_mode

            '$infile'
            > '$outfile'
]]>
    </command>
    <inputs>
        <expand macro="infile" />
        <conditional name="interactions">
            <param name="source" type="select" label="Interaction data source"
            help="This tool requires a catalogue of known protein-protein interactions. Such interaction data, obtained from the Human Protein Reference Database (HPRD), is part of GEMINI's own annotation data, but you can choose to provide your own interactions data instead.">
                <option value="preinstalled">HPRD interaction data bundled with GEMINI</option>
                <option value="history">History dataset with interactions</option>
            </param>
            <when value="preinstalled">
                <expand macro="annotation_dir" />
            </when>
            <when value="history">
                <param name="data" type="data" format="txt"
                label="Interactions data"
                help="You can provide interaction data as a simple text file with one interaction of the form geneA|geneB (e.g., ZFPM2|GATA4) per line." />
            </when>
        </conditional>
        <param name="gene" type="text" value=""
        label="Report affected interaction partners of this particular gene"
        help="By default, the tool finds all genes affected by loss-of-function variants in your input, then, for every such gene, reports its interaction partners if they are also affected by any variant. If you specify the name of a gene of interest (e.g. PTPN22) here, you get the affected interaction partners of only this particular gene reported, irrespective of whether your gene of interest itself is affected by any variant or not." />
        <param argument="-r" name="radius" type="integer" value="3" min="0"
        label="Report interaction partners up to (and including) this order"
        help="A value of 1, for example, means: report only affected direct interaction partners. A value of 0 restricts the report to just variants in the query gene itself." />
        <param argument="--var" name="variant_mode" type="select" display="radio"
        label="Report format for interactions"
        help="">
            <option value="">Interaction partners only</option>
            <option value="--var">Interaction partners and the variants affecting them</option>
        </param>
    </inputs>
    <outputs>
        <data name="outfile" format="tabular" />
    </outputs>
    <tests>
        <test>
            <param name="infile" value="gemini_comphets_input.db" ftype="gemini.sqlite" />
            <param name="radius" value="5" />
            <output name="outfile">
                <assert_contents>
                    <has_line line="sample&#009;lof_gene&#009;order_of_interaction&#009;interacting_gene" />
                    <has_n_columns n="4" />
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="infile" value="gemini_comphets_input.db" ftype="gemini.sqlite" />
            <param name="radius" value="5" />
            <param name="variant_mode" value="--var" />
            <output name="outfile">
                <assert_contents>
                    <has_line_matching expression="sample&#009;lof_gene&#009;order_of_interaction&#009;interacting_gene.+" />
                </assert_contents>
            </output>
        </test>
    </tests>
    <help><![CDATA[
**What it does**

Integrating the knowledge of the known protein-protein interactions would be useful in explaining variation data.
Meaning to say that a damaging variant in an interacting partner of a potential protein may be equally interesting as the
protein itself. We have used the HPRD_ binary interaction data to build a p-p network graph which can be explored by GEMINI.

.. _HPRD: http://www.ncbi.nlm.nih.gov/pubmed/18988627

**Details**

*interactions: Find genes among variants that are interacting partners.*

Integrating the knowledge of the known protein-protein interactions would be useful in explaining variation data. Meaning to say that a damaging variant in an interacting partner of a potential protein may be equally interesting as the protein itself. We have used the HPRD binary interaction data to build a p-p network graph which can be explored by GEMINI.

**Examples**

EXAMPLE with setting -g CTBP2 and -r 3::

 sample   gene    order_of_interaction    interacting_gene
 M128215  CTBP2   0_order:                CTBP2
 M128215  CTBP2   1_order:                RAI2
 M128215  CTBP2   2_order:                RB1
 M128215  CTBP2   3_order:                TGM2,NOTCH2NL

Return CTBP2 (-g) interacting gene variants till the third order (-r)

EXAMPLE lof_interactions (use this option to restrict your analysis to only LoF variants); lof_interactions and -r 3::

 sample    lof_gene    order_of_interaction    interacting_gene
 M128215   TGM2        1_order:                RB1
 M128215   TGM2        2_order:                none
 M128215   TGM2        3_order:                NOTCH2NL,CTBP2

Meaning to say return all LoF gene TGM2 (in sample M128215) interacting partners to a 3rd order of interaction.

EXAMPLE --var. An extended variant information (chrom, start, end etc.) for the interacting gene may be achieved with the –var option for both the interactions and the lof_interactions. Settings '-g CTBP2', '-r 3' and '--var'::

 sample   gene    order_of_interaction    interacting_gene    var_id  chrom   start           end             impact          biotype         in_dbsnp    clinvar_sig   clinvar_disease_name    aaf_1kg_all     aaf_esp_all
 M128215  CTBP2   0                       CTBP2               5       chr10   126678091       126678092       stop_gain       protein_coding  1           None          None                    None            None
 M128215  CTBP2   1                       RAI2                9       chrX    17819376        17819377        non_syn_coding  protein_coding  1           None          None                    1               0.000473
 M128215  CTBP2   2                       RB1                 7       chr13   48873834        48873835        upstream        protein_coding  1           None          None                    0.94            None
 M128215  CTBP2   3                       NOTCH2NL            1       chr1    145273344       145273345       non_syn_coding  protein_coding  1           None          None                    None            None
 M128215  CTBP2   3                       TGM2                8       chr20   36779423        36779424        stop_gain       protein_coding  0           None          None                    None            None

EXAMPLE with the following settings; '-r 3', '--var'::

 sample    lof_gene   order_of_interaction   interacting_gene   var_id   chrom   start         end             impact          biotype         in_dbsnp   clinvar_sig   clinvar_disease_name    aaf_1kg_all     aaf_esp_all
 M128215   TGM2       1                      RB1                7        chr13   48873834      48873835        upstream        protein_coding  1          None          None                    0.94            None
 M128215   TGM2       3                      NOTCH2NL           1        chr1    145273344     145273345       non_syn_coding  protein_coding  1          None          None                    None            None
 M128215   TGM2       3                      CTBP2              5        chr10   126678091     126678092       stop_gain       protein_coding  1          None          None                    None            None

    ]]></help>
    <expand macro="citations">
        <citation type="doi">10.1093/nar/gkn892</citation><!-- HPRD citation -->
    </expand>
</tool>