view ensembl2symbol.xml @ 2:2c218a253d56 draft default tip

"planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
author malex
date Thu, 29 Jul 2021 20:48:10 +0000
parents ec9ee8edb84d
children
line wrap: on
line source

<tool id="secimtools_ensembl2symbol" name="Map ENSEMBLIDs to Gene Symbols." version="@WRAPPER_VERSION@">
  <description></description>
  <macros>
      <import>macros.xml</import>
  </macros>
  <expand macro="requirements" />
  <stdio>
    <exit_code range="1" level="fatal" description="Repeated Unique IDs"/>
  </stdio>
  <command detect_errors="exit_code"><![CDATA[
  ensembl2symbol.py
    -s=$species
    -ga=$geneAnnot
    -id=$uniqId
    -e=$ensemblId
    -o=$output
  ]]></command>
  <inputs>
    <param name="species" type="select" label="Select the species your ENSEMBLIDs are from" >
     <option value="human">Homo sapiens</option>
     <option value="mouse">Mus musculus</option>
     <option value="rat">Rattus norvegicus</option>
     <option value="fruitfly">Drosophila melanogaster</option>
     <option value="thale-cress">Arabidopsis thaliana</option>
     <option value="nematode">Caenorhabditis elegans</option>
    </param>
    <param name="geneAnnot" type="data" format="tabular" label="Select the Dataset from your History containing the ENSEMBLIDs"/>
    <param name="uniqId" type="text" size="30" value="" label="Unique Gene FeatureID" help="Name of the column in your dataset containing unique FeatureIDs."/>
    <param name="ensemblId" type="text" size="30" value="" label="ENSEMBLID" help="Name of the column containing the ENSEMBLIDs to use for linking to gene symbols."/>
  </inputs>
  <outputs>
    <data format="tabular" name="output" label="${tool.name} on ${on_string}: ENSEMBL to GeneSymbol Annotation File"/>
  </outputs>
  <tests>
    <test>
      <param name="species" value="rat"/>
      <param name="geneAnnot" value="gene_annotation.tsv"/>
      <param name="uniqId" value="UniqueID"/>
      <param name="ensemblId" value="GeneName"/>
      <output name="output" value="ensembl2symbol_annotation.tsv"/>
    </test>
  </tests>
  <help><![CDATA[

**Tool Description**

This tool takes an annotation data file containing unique FeatureIDs and Ensembl IDs and adds
gene symbols. The link from the Ensembl IDs to gene symbols is made using mygene
(https://mygene.info/). The tool adds the following columns to the input annotation data file:
GeneSymbol, Score, Selected and Tie.

The GeneSymbol column contains the short-form abbreviation for the gene. The Score column
contains a value generated by mygene indicating how well the Ensembl ID matched the returned gene
symbol(s) (https://genomebiology.biomedcentral.com/articles/10.1186/s13059-016-0953-9). For
cases where an Ensembl ID uniquely matches to a gene symbol, the Selected column = ‘Yes”. For
cases where an Ensembl ID matches to more than one gene symbol, the Selected column = ‘Yes’ for
the gene symbol with the best Score value. If there is a tie, the alphabetically first gene
symbol is selected and the Tie column = ‘Yes’. We note that FeatureID may not be unique in the
resulting output dataset.

--------------------------------------------------------------------------------

**Input**

**Dataset with unique FeatureID and ENSEMBLID values**

 +-------------+--------------+-----+
 | FeatureID   | ENSEMBLID    | ... |
 +=============+==============+=====+
 | FeatureID_1 | ENS...       | ... |
 +-------------+--------------+-----+
 | FeatureID_2 | ENS...       | ... |
 +-------------+--------------+-----+
 | FeatureID_3 | ENS...       | ... |
 +-------------+--------------+-----+
 | ...         | ...          | ... |
 +-------------+--------------+-----+

    **NOTE:** This file must contain at least two columns, a column with unique FeatureIDs and a column containing ENSEMBLIDs. Other columns may be present.

**Unique FeatureID**

Name of the column in your input dataset that has unique FeatureIDs.

**ENSEMBLID**

Name of the column containing the ENSEMBLIDs.

--------------------------------------------------------------------------------

**OUTPUT**

The user will get a single output file containing the linked gene symbols.

**Output Table**

  +-------------+------------+--------------+-------------+-----------+----------+
  | FeatureID   | ENSEMBLID  | ...          | GeneSymbol  | Score     | Selected |
  +=============+============+==============+=============+===========+==========+
  | FeatureID_1 | ENS...     | ...          | one*        | 13.550056 | Yes      |
  +-------------+------------+--------------+-------------+-----------+----------+
  | FeatureID_2 | ENS...     | ...          | two*        | 12.984067 | Yes      |
  +-------------+------------+--------------+-------------+-----------+----------+
  | FeatureID_2 | ENS...     | ...          | three*      | 11.995048 | No       |
  +-------------+------------+--------------+-------------+-----------+----------+
  | FeatureID_3 | ENS...     | ...          | four*       | 12.549084 | Yes      |
  +-------------+------------+--------------+-------------+-----------+----------+
  | ...         | ...        | ...          | ...         | ...       | ...      |
  +-------------+------------+--------------+-------------+-----------+----------+

  '*'=refers to the matched gene

  ]]></help>
  <citations>
    <citation type="bibtex">@ARTICLE{Kirpich17secimtools,
    author = {Alexander S. Kirpich, Miguel Ibarra, Oleksandr Moskalenko, Justin M. Fear, Joseph Gerken, Xinlei Mi, Ali Ashrafi, Alison M. Morse, Lauren M. McIntyre},
    title = {SECIMTools: A suite of Metabolomics Data Analysis Tools},
    journal = {BMC Bioinformatics},
    year = {2018}
    }</citation>
    <citation type="bibtex">@article{Mor2021GaitGM,
    title={GAIT-GM integrative cross-omics analyses reveal cholinergic defects in a C. elegans model of Parkinson's disease},
    author={Mor, DE and Huertas, F and Morse, AM and Kaletsky, R and Murphy, CT and Kalia, V and Miller, GW and Moskalenko, O and Conesa, A and McIntyre, LM},
    journal={BMC Genomics},
    year={submitted},
    }</citation>
    <citation type="bibtex">@article{xim2016mygene,
    title={High-performance web services for querying gene and variant annotation},
    author={Xin, J and Mark, A and Afrashiabi, C and Tsueng, G and Juchler, M and Gopal, N and Stupp, GS and Putman, TE and Ainscough, BJ and Griffith, OL and Torkamani, A and Whetzel, PL and Mungall, CJ and Mooney, SD and Su, AI and Wu, C},
    journal={Genome Biology},
    year={2016},
    }</citation>
  </citations>
</tool>