comparison ensembl2symbol.xml @ 2:2c218a253d56 draft default tip

"planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
author malex
date Thu, 29 Jul 2021 20:48:10 +0000
parents ec9ee8edb84d
children
comparison
equal deleted inserted replaced
1:ec9ee8edb84d 2:2c218a253d56
2 <description></description> 2 <description></description>
3 <macros> 3 <macros>
4 <import>macros.xml</import> 4 <import>macros.xml</import>
5 </macros> 5 </macros>
6 <expand macro="requirements" /> 6 <expand macro="requirements" />
7 <stdio> 7 <stdio>
8 <exit_code range="1" level="fatal" description="Repeated Unique IDs"/> 8 <exit_code range="1" level="fatal" description="Repeated Unique IDs"/>
9 </stdio> 9 </stdio>
10 <command detect_errors="exit_code"><![CDATA[ 10 <command detect_errors="exit_code"><![CDATA[
11 ensembl2symbol.py 11 ensembl2symbol.py
12 -s=$species 12 -s=$species
13 -ga=$geneAnnot 13 -ga=$geneAnnot
14 -id=$uniqId 14 -id=$uniqId
19 <param name="species" type="select" label="Select the species your ENSEMBLIDs are from" > 19 <param name="species" type="select" label="Select the species your ENSEMBLIDs are from" >
20 <option value="human">Homo sapiens</option> 20 <option value="human">Homo sapiens</option>
21 <option value="mouse">Mus musculus</option> 21 <option value="mouse">Mus musculus</option>
22 <option value="rat">Rattus norvegicus</option> 22 <option value="rat">Rattus norvegicus</option>
23 <option value="fruitfly">Drosophila melanogaster</option> 23 <option value="fruitfly">Drosophila melanogaster</option>
24 <option value="thale-cress">Arabidopsis thaliana</option>
25 <option value="nematode">Caenorhabditis elegans</option>
24 </param> 26 </param>
25 <param name="geneAnnot" type="data" format="tabular" label="Select the Dataset from your History containing the ENSEMBLIDs"/> 27 <param name="geneAnnot" type="data" format="tabular" label="Select the Dataset from your History containing the ENSEMBLIDs"/>
26 <param name="uniqId" type="text" size="30" value="" label="Unique Gene FeatureID" help="Name of the column in your dataset containing unique FeatureIDs."/> 28 <param name="uniqId" type="text" size="30" value="" label="Unique Gene FeatureID" help="Name of the column in your dataset containing unique FeatureIDs."/>
27 <param name="ensemblId" type="text" size="30" value="" label="ENSEMBLID" help="Name of the column containing the ENSEMBLIDs to use for linking to gene symbols."/> 29 <param name="ensemblId" type="text" size="30" value="" label="ENSEMBLID" help="Name of the column containing the ENSEMBLIDs to use for linking to gene symbols."/>
28 </inputs> 30 </inputs>
29 <outputs> 31 <outputs>
30 <data format="tabular" name="output" label="${tool.name} on ${on_string}: ENSEMBL to GeneSymbol Annotation File"/> 32 <data format="tabular" name="output" label="${tool.name} on ${on_string}: ENSEMBL to GeneSymbol Annotation File"/>
31 </outputs> 33 </outputs>
32 <tests> 34 <tests>
33 <test> 35 <test>
34 <param name="species" value="rat"/> 36 <param name="species" value="rat"/>
35 <param name="geneAnnot" value="gene_annotation_file_01fhl.tsv"/> 37 <param name="geneAnnot" value="gene_annotation.tsv"/>
36 <param name="uniqId" value="UniqueID"/> 38 <param name="uniqId" value="UniqueID"/>
37 <param name="ensemblId" value="GeneName"/> 39 <param name="ensemblId" value="GeneName"/>
38 <param name="Output" value="ensembl2symbol_annotation_file_01fhl.tsv"/> 40 <output name="output" value="ensembl2symbol_annotation.tsv"/>
39 </test> 41 </test>
40 </tests> 42 </tests>
41 <help><![CDATA[ 43 <help><![CDATA[
42 44
43 **Tool Description** 45 **Tool Description**
44 46
45 This tool takes a dataset containing unique FeatureIDs and ENSEMBLIDs and provides gene symbols. The link from the ENSEMBLIDs to gene symbols is made using Biomart. The tool adds the following columns to the input dataset: GeneSymbol, Score, Selected, and Tie. The GeneSymbol column contains the short identifiers (typically 3 letter abbreviations) of the gene name. The Score column contains a value that represents how well the ENSEMBLID matched the returned GeneSymbol using the PyPi package Gene 3.0.0 (https://www.ncbi.nlm.nih.gov/pubmed/23175613). The Selected column = 'Yes' when an ENSEMBLID uniquely matches a GeneSymbol or when that row has the highest Score value. The Selected column = 'No' in the absence of a unique match for rows lower than the maximum score. If there is a tie in the Score the alphabetically first value is selected and the Tie column has a value of Yes. We note that FeatureID may not be unique in the resulting output dataset. 47 This tool takes an annotation data file containing unique FeatureIDs and Ensembl IDs and adds
48 gene symbols. The link from the Ensembl IDs to gene symbols is made using mygene
49 (https://mygene.info/). The tool adds the following columns to the input annotation data file:
50 GeneSymbol, Score, Selected and Tie.
51
52 The GeneSymbol column contains the short-form abbreviation for the gene. The Score column
53 contains a value generated by mygene indicating how well the Ensembl ID matched the returned gene
54 symbol(s) (https://genomebiology.biomedcentral.com/articles/10.1186/s13059-016-0953-9). For
55 cases where an Ensembl ID uniquely matches to a gene symbol, the Selected column = ‘Yes”. For
56 cases where an Ensembl ID matches to more than one gene symbol, the Selected column = ‘Yes’ for
57 the gene symbol with the best Score value. If there is a tie, the alphabetically first gene
58 symbol is selected and the Tie column = ‘Yes’. We note that FeatureID may not be unique in the
59 resulting output dataset.
46 60
47 -------------------------------------------------------------------------------- 61 --------------------------------------------------------------------------------
48 62
49 **INPUT** 63 **Input**
50 64
51 **Dataset with unique FeatureIDs and ENSEMBLIDs** 65 **Dataset with unique FeatureID and ENSEMBLID values**
52 66
53 +-------------+--------------+-----+ 67 +-------------+--------------+-----+
54 | FeatureID | ENSEMBLID | ... | 68 | FeatureID | ENSEMBLID | ... |
55 +=============+==============+=====+ 69 +=============+==============+=====+
56 | FeatureID_1 | ENS... | ... | 70 | FeatureID_1 | ENS... | ... |
57 +-------------+--------------+-----+ 71 +-------------+--------------+-----+
58 | FeatureID_2 | ENS... | ... | 72 | FeatureID_2 | ENS... | ... |
59 +-------------+--------------+-----+ 73 +-------------+--------------+-----+
60 | FeatureID_3 | ENS... | ... | 74 | FeatureID_3 | ENS... | ... |
61 +-------------+--------------+-----+ 75 +-------------+--------------+-----+
62 | ... | ... | ... | 76 | ... | ... | ... |
63 +-------------+--------------+-----+ 77 +-------------+--------------+-----+
64 78
65 **NOTE:** This file must contain at least two columns, a column with unique FeatureIDs and a column containing ENSEMBLIDs. Other columns may be present. 79 **NOTE:** This file must contain at least two columns, a column with unique FeatureIDs and a column containing ENSEMBLIDs. Other columns may be present.
66 80
67 **Unique FeatureID** 81 **Unique FeatureID**
68 82
69 Name of the column in your input dataset that has unique FeatureIDs. 83 Name of the column in your input dataset that has unique FeatureIDs.
70 84
71 **ENSEMBLID** 85 **ENSEMBLID**
72 86
73 Name of the column containing the ENSEMBLIDs. 87 Name of the column containing the ENSEMBLIDs.
74 88
75 -------------------------------------------------------------------------------- 89 --------------------------------------------------------------------------------
76 90
77 **OUTPUT** 91 **OUTPUT**
78 92
79 The user will get a single output file. 93 The user will get a single output file containing the linked gene symbols.
80 94
81 **Output Table** 95 **Output Table**
82 96
83 +------------+------------+--------------+-------------+-----------+----------+ 97 +-------------+------------+--------------+-------------+-----------+----------+
84 | FeatureID | ENSEMBLID | ... | GeneSymbol | Score | Selected | 98 | FeatureID | ENSEMBLID | ... | GeneSymbol | Score | Selected |
85 +============+============+==============+=============+===========+==========+ 99 +=============+============+==============+=============+===========+==========+
86 | FeatureID_1 | ENS... | ... | one* | 13.550056 | Yes | 100 | FeatureID_1 | ENS... | ... | one* | 13.550056 | Yes |
87 +------------+------------+--------------+-------------+-----------+----------+ 101 +-------------+------------+--------------+-------------+-----------+----------+
88 | FeatureID_2 | ENS... | ... | two* | 12.984067 | Yes | 102 | FeatureID_2 | ENS... | ... | two* | 12.984067 | Yes |
89 +------------+------------+--------------+-------------+-----------+----------+ 103 +-------------+------------+--------------+-------------+-----------+----------+
90 | FeatureID_2 | ENS... | ... | three* | 11.995048 | No | 104 | FeatureID_2 | ENS... | ... | three* | 11.995048 | No |
91 +------------+------------+--------------+-------------+-----------+----------+ 105 +-------------+------------+--------------+-------------+-----------+----------+
92 | FeatureID_3 | ENS... | ... | four* | 12.549084 | Yes | 106 | FeatureID_3 | ENS... | ... | four* | 12.549084 | Yes |
93 +------------+------------+--------------+-------------+-----------+----------+ 107 +-------------+------------+--------------+-------------+-----------+----------+
94 | ... | ... | ... | ... | ... | ... | 108 | ... | ... | ... | ... | ... | ... |
95 +------------+------------+--------------+-------------+-----------+----------+ 109 +-------------+------------+--------------+-------------+-----------+----------+
96 110
97 ]]> 111 '*'=refers to the matched gene
98 </help> 112
113 ]]></help>
99 <citations> 114 <citations>
100 <citation type="bibtex">@article{wu2014mygene,
101 title={MyGene. info: gene annotation query as a service},
102 author={Wu, Chunlei and Mark, Adam and Su, Andrew I},
103 journal={bioRxiv},
104 pages={009332},
105 year={2014},
106 publisher={Cold Spring Harbor Laboratory}
107 }</citation>
108 <citation type="bibtex">@ARTICLE{Kirpich17secimtools, 115 <citation type="bibtex">@ARTICLE{Kirpich17secimtools,
109 author = {Alexander S. Kirpich, Miguel Ibarra, Oleksandr Moskalenko, Justin M. Fear, Joseph Gerken, Xinlei Mi, Ali Ashrafi, Alison M. Morse, Lauren M. McIntyre}, 116 author = {Alexander S. Kirpich, Miguel Ibarra, Oleksandr Moskalenko, Justin M. Fear, Joseph Gerken, Xinlei Mi, Ali Ashrafi, Alison M. Morse, Lauren M. McIntyre},
110 title = {SECIMTools: A suite of Metabolomics Data Analysis Tools}, 117 title = {SECIMTools: A suite of Metabolomics Data Analysis Tools},
111 journal = {BMC Bioinformatics}, 118 journal = {BMC Bioinformatics},
112 year = {in press} 119 year = {2018}
113 }</citation> 120 }</citation>
114 <citation type="bibtex"> 121 <citation type="bibtex">@article{Mor2021GaitGM,
115 @article{garcia2010paintomics, 122 title={GAIT-GM integrative cross-omics analyses reveal cholinergic defects in a C. elegans model of Parkinson's disease},
116 title={Paintomics: a web based tool for the joint visualization of transcriptomics and metabolomics data}, 123 author={Mor, DE and Huertas, F and Morse, AM and Kaletsky, R and Murphy, CT and Kalia, V and Miller, GW and Moskalenko, O and Conesa, A and McIntyre, LM},
117 author={Garc{\'\i}a-Alcalde, Fernando and Garc{\'\i}a-L{\'o}pez, Federico and Dopazo, Joaqu{\'\i}n and Conesa, Ana}, 124 journal={BMC Genomics},
118 journal={Bioinformatics}, 125 year={submitted},
119 volume={27}, 126 }</citation>
120 number={1}, 127 <citation type="bibtex">@article{xim2016mygene,
121 pages={137--139}, 128 title={High-performance web services for querying gene and variant annotation},
122 year={2010}, 129 author={Xin, J and Mark, A and Afrashiabi, C and Tsueng, G and Juchler, M and Gopal, N and Stupp, GS and Putman, TE and Ainscough, BJ and Griffith, OL and Torkamani, A and Whetzel, PL and Mungall, CJ and Mooney, SD and Su, AI and Wu, C},
123 publisher={Oxford University Press} 130 journal={Genome Biology},
131 year={2016},
124 }</citation> 132 }</citation>
125 </citations> 133 </citations>
126 </tool> 134 </tool>