view ensembl2symbol.xml @ 2:2c218a253d56 draft default tip

"planemo upload for repository commit 758394addb95b09e794132a23a1f7e95ba39df0b"
author malex
date Thu, 29 Jul 2021 20:48:10 +0000
parents ec9ee8edb84d
line wrap: on
line source

<tool id="secimtools_ensembl2symbol" name="Map ENSEMBLIDs to Gene Symbols." version="@WRAPPER_VERSION@">
  <expand macro="requirements" />
    <exit_code range="1" level="fatal" description="Repeated Unique IDs"/>
  <command detect_errors="exit_code"><![CDATA[
    <param name="species" type="select" label="Select the species your ENSEMBLIDs are from" >
     <option value="human">Homo sapiens</option>
     <option value="mouse">Mus musculus</option>
     <option value="rat">Rattus norvegicus</option>
     <option value="fruitfly">Drosophila melanogaster</option>
     <option value="thale-cress">Arabidopsis thaliana</option>
     <option value="nematode">Caenorhabditis elegans</option>
    <param name="geneAnnot" type="data" format="tabular" label="Select the Dataset from your History containing the ENSEMBLIDs"/>
    <param name="uniqId" type="text" size="30" value="" label="Unique Gene FeatureID" help="Name of the column in your dataset containing unique FeatureIDs."/>
    <param name="ensemblId" type="text" size="30" value="" label="ENSEMBLID" help="Name of the column containing the ENSEMBLIDs to use for linking to gene symbols."/>
    <data format="tabular" name="output" label="${} on ${on_string}: ENSEMBL to GeneSymbol Annotation File"/>
      <param name="species" value="rat"/>
      <param name="geneAnnot" value="gene_annotation.tsv"/>
      <param name="uniqId" value="UniqueID"/>
      <param name="ensemblId" value="GeneName"/>
      <output name="output" value="ensembl2symbol_annotation.tsv"/>

**Tool Description**

This tool takes an annotation data file containing unique FeatureIDs and Ensembl IDs and adds
gene symbols. The link from the Ensembl IDs to gene symbols is made using mygene
( The tool adds the following columns to the input annotation data file:
GeneSymbol, Score, Selected and Tie.

The GeneSymbol column contains the short-form abbreviation for the gene. The Score column
contains a value generated by mygene indicating how well the Ensembl ID matched the returned gene
symbol(s) ( For
cases where an Ensembl ID uniquely matches to a gene symbol, the Selected column = ‘Yes”. For
cases where an Ensembl ID matches to more than one gene symbol, the Selected column = ‘Yes’ for
the gene symbol with the best Score value. If there is a tie, the alphabetically first gene
symbol is selected and the Tie column = ‘Yes’. We note that FeatureID may not be unique in the
resulting output dataset.



**Dataset with unique FeatureID and ENSEMBLID values**

 | FeatureID   | ENSEMBLID    | ... |
 | FeatureID_1 | ENS...       | ... |
 | FeatureID_2 | ENS...       | ... |
 | FeatureID_3 | ENS...       | ... |
 | ...         | ...          | ... |

    **NOTE:** This file must contain at least two columns, a column with unique FeatureIDs and a column containing ENSEMBLIDs. Other columns may be present.

**Unique FeatureID**

Name of the column in your input dataset that has unique FeatureIDs.


Name of the column containing the ENSEMBLIDs.



The user will get a single output file containing the linked gene symbols.

**Output Table**

  | FeatureID   | ENSEMBLID  | ...          | GeneSymbol  | Score     | Selected |
  | FeatureID_1 | ENS...     | ...          | one*        | 13.550056 | Yes      |
  | FeatureID_2 | ENS...     | ...          | two*        | 12.984067 | Yes      |
  | FeatureID_2 | ENS...     | ...          | three*      | 11.995048 | No       |
  | FeatureID_3 | ENS...     | ...          | four*       | 12.549084 | Yes      |
  | ...         | ...        | ...          | ...         | ...       | ...      |

  '*'=refers to the matched gene

    <citation type="bibtex">@ARTICLE{Kirpich17secimtools,
    author = {Alexander S. Kirpich, Miguel Ibarra, Oleksandr Moskalenko, Justin M. Fear, Joseph Gerken, Xinlei Mi, Ali Ashrafi, Alison M. Morse, Lauren M. McIntyre},
    title = {SECIMTools: A suite of Metabolomics Data Analysis Tools},
    journal = {BMC Bioinformatics},
    year = {2018}
    <citation type="bibtex">@article{Mor2021GaitGM,
    title={GAIT-GM integrative cross-omics analyses reveal cholinergic defects in a C. elegans model of Parkinson's disease},
    author={Mor, DE and Huertas, F and Morse, AM and Kaletsky, R and Murphy, CT and Kalia, V and Miller, GW and Moskalenko, O and Conesa, A and McIntyre, LM},
    journal={BMC Genomics},
    <citation type="bibtex">@article{xim2016mygene,
    title={High-performance web services for querying gene and variant annotation},
    author={Xin, J and Mark, A and Afrashiabi, C and Tsueng, G and Juchler, M and Gopal, N and Stupp, GS and Putman, TE and Ainscough, BJ and Griffith, OL and Torkamani, A and Whetzel, PL and Mungall, CJ and Mooney, SD and Su, AI and Wu, C},
    journal={Genome Biology},