comparison add_kegg_pathway_info.xml @ 1:ec9ee8edb84d draft

Initial upload of 21.6.10 release.
author malex
date Fri, 18 Jun 2021 20:23:19 +0000
parents
children 2c218a253d56
comparison
equal deleted inserted replaced
0:864fc6430432 1:ec9ee8edb84d
1 <tool id="secimtools_add_kegg_pathway_info" name="Add KEGG Pathway Information" version="@WRAPPER_VERSION@">
2 <description>using KEGGIDs</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="requirements" />
7 <command detect_errors="exit_code"><![CDATA[
8 add_kegg_pathway_info.py
9 -sp=$species
10 #if $dataSets.whichDataSet == "geneDataset":
11 -gka=$dataSets.geneKeggAnnot
12 -gid=$dataSets.geneUniqId
13 -gn=$dataSets.geneName
14 -gkid=$dataSets.geneKeggId
15 -go=$geneOutput
16 -kg2p=$gene_keggID2pathID
17 #end if
18 #if $dataSets.whichDataSet == "metDataset":
19 -mka=$dataSets.metKeggAnnot
20 -mid=$dataSets.metUniqId
21 -mn=$dataSets.metName
22 -mkid=$dataSets.metKeggId
23 -mo=$metOutput
24 -km2p=$met_keggID2pathID
25 #end if
26 #if $dataSets.whichDataSet == "both":
27 -gka=$dataSets.geneKeggAnnot
28 -gid=$dataSets.geneUniqId
29 -gn=$dataSets.geneName
30 -gkid=$dataSets.geneKeggId
31 -go=$geneOutput
32 -kg2p=$gene_keggID2pathID
33 -mka=$dataSets.metKeggAnnot
34 -mid=$dataSets.metUniqId
35 -mn=$dataSets.metName
36 -mkid=$dataSets.metKeggId
37 -mo=$metOutput
38 -km2p=$met_keggID2pathID
39 #end if
40 -p=$pathways
41 ]]></command>
42 <inputs>
43 <param name="species" type="select" label="Select your Species from the List" >
44 <option value="hsa">Homo sapiens</option>
45 <option value="mmu">Mus musculus</option>
46 <option value="rno">Rattus norvegicus</option>
47 <option value="dme">Drosophila melanogaster</option>
48 <option value="ath">Arabidopsis thaliana</option>
49 <option value="sce">Saccharomyces cerevisiae</option>
50 <option value="eco">Escherichia coli</option>
51 </param>
52 <conditional name="dataSets">
53 <param name="whichDataSet" type="select" display="radio" label="Select the Datasets you want to add KEGG pathway information to. Note: datasets must contain KEGGIDs.">
54 <option value="both" selected="true">Gene Expression + Metabolomic Files with KEGGIDs</option>
55 <option value="geneDataset">Gene Expression File with KEGGIDs</option>
56 <option value="metDataset">Metabolomic File with KEGGIDs</option>
57 <validator type="no_options" message="You must select at least one option." />
58 </param>
59 <when value="both">
60 <param name="geneKeggAnnot" type="data" format="tabular" label="Gene to KEGGID Link File" help="Select the Gene to KEGGID Link File that contains KEGG identifiers from your history"/>
61 <param name="geneUniqId" type="text" size="30" value="" label="Gene unique FeatureID" help="Name of the column in your Gene to KEGGID Link File that contains unique FeatureIDs."/>
62 <param name="geneName" type="text" size="30" value="" label="Gene Symbol" help="Name of the column in your Gene to KEGGID Link File that contains Gene Symbols."/>
63 <param name="geneKeggId" type="text" size="30" value="" label="Gene KEGGID" help="Name of the column in your Gene to KEGGID Link File that contains KEGGIDs."/>
64 <param name="metKeggAnnot" type="data" format="tabular" label="Metabolite to KEGGID Link File" help="Select the Metabolite to KEGGID Link File that contains KEGG identifiers from your history"/>
65 <param name="metUniqId" type="text" size="30" value="" label="Metabolite unique FeatureID" help="Name of the column in your Metabolite to KEGGID Link File that contains unique FeatureIDs."/>
66 <param name="metName" type="text" size="30" value="" label="Metabolite Names" help="Name of the column in your Metabolite to KEGGID Link File that contains Metabolite Names."/>
67 <param name="metKeggId" type="text" size="30" value="" label="Metabolite KEGGID" help="Name of the column in your Metabolite to KEGGID Link File that contains KEGGIDs."/>
68 </when>
69 <when value="geneDataset">
70 <param name="geneKeggAnnot" type="data" format="tabular" label="Gene to KEGGID Link File" help="Select the Gene to KEGGID Link File that contains KEGG identifiers from your history"/>
71 <param name="geneUniqId" type="text" size="30" value="" label="Gene unique FeatureID" help="Name of the column in your Gene to KEGGID Link File that contains unique FeatureIDs."/>
72 <param name="geneName" type="text" size="30" value="" label="Gene Symbol" help="Name of the column in your Gene to KEGGID Link File that contains Gene Symbols."/>
73 <param name="geneKeggId" type="text" size="30" value="" label="Gene KEGGID" help="Name of the column in your Gene to KEGGID Link File that contains KEGGIDs."/>
74 </when>
75 <when value="metDataset">
76 <param name="metKeggAnnot" type="data" format="tabular" label="Metabolite to KEGGID Link File" help="Select the Metabolite to KEGGID Link File that contains KEGG identifiers from your history"/>
77 <param name="metUniqId" type="text" size="30" value="" label="Metabolite unique FeatureID" help="Name of the column in your Metabolite to KEGGID Link File that contains unique FeatureIDs."/>
78 <param name="metName" type="text" size="30" value="" label="Metabolite Names" help="Name of the column in your Metabolite to KEGGID Link File that contains Metabolite Names."/>
79 <param name="metKeggId" type="text" size="30" value="" label="Metabolite KEGGID" help="Name of the column in your Metabolite to KEGGID Link File that contains KEGGIDs."/>
80 </when>
81 </conditional>
82 </inputs>
83 <outputs>
84 <data format="tabular" name="gene_keggID2pathID" label="${tool.name} on ${on_string}: GeneKeggID2PathwayID">
85 <filter>(dataSets['whichDataSet'] == 'geneDataset') or (dataSets['whichDataSet'] == 'both')</filter>
86 </data>
87 <data format="tabular" name="met_keggID2pathID" label="${tool.name} on ${on_string}: MetaboliteKeggID2PathwayID">
88 <filter>(dataSets['whichDataSet'] == 'metDataset') or (dataSets['whichDataSet'] == 'both')</filter>
89 </data>
90 <data format="tabular" name="pathways" label="${tool.name} on ${on_string}: PathwayID2PathwayNames"/>
91 <data format="tabular" name="geneOutput" label="${tool.name} on ${on_string}: Gene KEGG Pathway File">
92 <filter>(dataSets['whichDataSet'] == 'geneDataset') or (dataSets['whichDataSet'] == 'both')</filter>
93 </data>
94 <data format="tabular" name="metOutput" label="${tool.name} on ${on_string}: Metabolite KEGG Pathway File">
95 <filter>(dataSets['whichDataSet'] == 'metDataset') or (dataSets['whichDataSet'] == 'both')</filter>
96 </data>
97 </outputs>
98 <tests>
99 <test>
100 <param name="species" value="rno"/>
101 <param name="whichDataSet" value="both"/>
102 <param name="geneKeggAnnot" value="gene_to_keggId_link_01fhl.tsv"/>
103 <param name="geneUniqId" value="UniqueID"/>
104 <param name="geneName" value="GeneSymbol"/>
105 <param name="geneKeggId" value="KEGGID"/>
106 <param name="metKeggAnnot" value="metabolite_to_keggId_link_01fhl.tsv"/>
107 <param name="metUniqId" value="UniqueID"/>
108 <param name="metName" value="MetName"/>
109 <param name="metKeggId" value="KEGGID"/>
110 <output name="kgen2pathway" file="KGEN2PATHWAY"/>
111 <output name="kmet2pathway" file="KMET2PATHWAY"/>
112 <output name="pathways" file="PATHWAYS"/>
113 <output name="output" file="kegg_downloader_table"/>
114 </test>
115 </tests>
116 <help><![CDATA[
117
118 **Tool Description**
119
120 This tool takes a Gene to KEGG Link dataset, a Metabolomic to KEGG Link dataset or both and adds KEGG Pathway Names using KEGGIDs. The tool was designed to take the output from the ''Link Name to KEGGID' tool as input (for example the Gene to KEGGID Link dataset) but other datasets containing KEGGIDs can be used as well.
121
122 The user will get different outputs from the 'Add KEGG Pathway Info Tool', depending on the input. If a Gene to KEGGID Link dataset is given as input the tool outputs the following three files: 1) a Gene KEGG Pathway dataset containing the FeatureID, Feature_Name, Feature_Type and KEGGID columns from the input file and KEGG_PathwayIDs and KEGG Pathway Names from KEGG, 2) a GeneKeggID2PathwayID dataset containing all gene KEGGIDs in KEGG and their associated pathway KEGGIDs and 3) a PathwayID2PathwayNames dataset containing all of the pathway KEGGIDs and their associated KEGG pathway names.
123
124 Analoguous files are generated by the tool if a Metabolite to KEGGID Link dataset is input by the user.
125
126 Note: FeatureIDs and KEGGIDs may not be unique in the output.
127
128 --------------------------------------------------------------------------------
129
130 **INPUT**
131
132 **Gene Dataset containing KEGGIDs - for example, output from the Link Name to KEGG IDtool**
133
134 +----------+-----------+--------------+---------+--------------+---------+------------+-----+----------+
135 | UniqueId | Gene_Name | Feature_Type | Matched | Name_in_KEGG | KEGGID | Similarity | Tie | Selected |
136 +==========+===========+==============+=========+==============+=========+============+=====+==========+
137 | Gene_1 | one | Gene | Yes | one* | mmu:... | 1.0 | No | Yes |
138 +----------+-----------+--------------+---------+--------------+---------+------------+-----+----------+
139 | Gene_2 | two | Gene | Yes | two* | mmu:... | 1.0 | No | Yes |
140 +----------+-----------+--------------+---------+--------------+---------+------------+-----+----------+
141 | Gene_3 | three | Gene | Yes | three* | mmu:... | 1.0 | No | Yes |
142 +----------+-----------+--------------+---------+--------------+---------+------------+-----+----------+
143 | Gene_4 | four | Gene | No | NA | NA | NA | Na | NA |
144 +----------+-----------+--------------+---------+--------------+---------+------------+-----+----------+
145 | ... | ... | ... | ... | ... | ... | ... | ... | ... |
146 +----------+-----------+--------------+---------+--------------+---------+------------+-----+----------+
147
148 **Metabolite Dataset containing KEGGIDs - for example, output from the Link Name to KEGGID tool**
149
150 +----------+----------+--------------+---------+--------------+----------+------------+-----+----------+
151 | UniqueId | Met_Name | Feature_Type | Matched | Name_in_KEGG | KEGGID | Similarity | Tie | Selected |
152 +==========+==========+==============+=========+==============+==========+============+=====+==========+
153 | Met_1 | one | Metabolite | Yes | one* | cpd:... | 1.0 | No | Yes |
154 +----------+----------+--------------+---------+--------------+----------+------------+-----+----------+
155 | Met_2 | two | Metabolite | Yes | two* | cpd:... | 1.0 | No | Yes |
156 +----------+----------+--------------+---------+--------------+----------+------------+-----+----------+
157 | Met_3 | three | Metabolite | Yes | three* | cpd:... | 1.0 | No | Yes |
158 +----------+----------+--------------+---------+--------------+----------+------------+-----+----------+
159 | Met_4 | four | Metabolite | No | NA | NA | NA | Na | NA |
160 +----------+----------+--------------+---------+--------------+----------+------------+-----+----------+
161 | ... | ... | ... | ... | ... | ... | ... | ... | ... |
162 +----------+----------+--------------+---------+--------------+----------+------------+-----+----------+
163
164 **Gene unique FeatureID and/or Metabolite unique ID**
165
166 Name of the column in your KEGGID Link File that contains unique identifiers (for genes and/or metabolites).
167
168 **Gene Symbol**
169
170 Name of the column in your KEGGID Link File that has Gene Symbols (for genes) or Metabolite Names (for metabolite data).
171
172 **Gene KEGGID and/or Metabolite KEGGID**
173
174 Name of the column in your KEGGID Link File that contains KEGGIDs (for genes and/or metabolites).
175
176 --------------------------------------------------------------------------------
177
178 **OUTPUT**
179
180 The user will get different output from the tool, depending on whether they include the 'Gene to KEGGID Link' File, the 'Metabolite to KEGGID Link' File, or both.
181
182 (1) **GeneKeggID2PathwayID.** Downloaded file from KEGG for the selected species that contains ALL KEGGIDs to PathwayIDs.
183 (2) **MetaboliteKeggID2PathwayID.** Downloaded file from KEGG for the selected species that contains ALL the Metabolite KeggIDs to PathwayIDs.
184 (3) **Pathways Names.** Downloaded file from KEGG for the selected species that contains ALL the PathwayIDs to Pathway Names.
185 (4) **Gene KEGG Pathway File.** Tabular file with genes, feature types, KEGGIDs, PathwayIDs and Pathway Names.
186
187 +-------------+--------------+--------------+---------+--------------+--------------+
188 | FeatureID | Gene_Symbol | Feature_Type | KEGGID | PathwayID | Pathway_Name |
189 +=============+==============+==============+=========+==============+==============+
190 | FeatureID_1 | one | Gene | mmu:... | path:mmu:... | Pathway_A |
191 +-------------+--------------+--------------+---------+--------------+--------------+
192 | FeatureID_2 | two | Gene | mmu:... | path:mmu:... | Pathway_B |
193 +-------------+--------------+--------------+---------+--------------+--------------+
194 | FeatureID_3 | three | Gene | mmu:... | path:mmu:... | Pathway_C |
195 +-------------+--------------+--------------+---------+--------------+--------------+
196 | FeatureID_4 | four | Gene | mmu:... | path:mmu:... | Pathway_D |
197 +-------------+--------------+--------------+---------+--------------+--------------+
198 | ... | ... | ... | ... | ... | ... |
199 +-------------+--------------+--------------+---------+--------------+--------------+
200
201
202
203 (5) **Metabolite KEGG Pathway File.** Tabular file with metabolites, feature types, KEGGIDs, PathwayIDs and Pathway Names.
204
205 +-------------+-----------------+--------------+---------+--------------+--------------+
206 | FeatureID | Metabolite_Name | Feature_Type | KEGGID | PathwayID | Pathway_Name |
207 +=============+=================+==============+=========+==============+==============+
208 | FeatureID_1 | one | Metabolite | cdp:... | path:map:... | Pathway_A |
209 +-------------+-----------------+--------------+---------+--------------+--------------+
210 | FeatureID_2 | two | Metabolite | cdp:... | path:map:... | Pathway_B |
211 +-------------+-----------------+--------------+---------+--------------+--------------+
212 | FeatureID_3 | three | Metabolite | cdp:... | path:map:... | Pathway_C |
213 +-------------+-----------------+--------------+---------+--------------+--------------+
214 | FeatureID_4 | four | Metabolite | cdp:... | path:map:... | Pathway_D |
215 +-------------+-----------------+--------------+---------+--------------+--------------+
216 | ... | ... | ... | ... | ... | ... |
217 +-------------+-----------------+--------------+---------+--------------+--------------+
218
219 ]]>
220 </help>
221 <citations>
222 <citation type="bibtex">@ARTICLE{Kirpich17secimtools,
223 author = {Alexander S. Kirpich, Miguel Ibarra, Oleksandr Moskalenko, Justin M. Fear, Joseph Gerken, Xinlei Mi, Ali Ashrafi, Alison M. Morse, Lauren M. McIntyre},
224 title = {SECIMTools: A suite of Metabolomics Data Analysis Tools},
225 journal = {BMC Bioinformatics},
226 year = {in press}
227 }</citation>
228 <citation type="bibtex">@article{garcia2010paintomics,
229 title={Paintomics: a web based tool for the joint visualization of transcriptomics and metabolomics data},
230 author={Garc{\'\i}a-Alcalde, Fernando and Garc{\'\i}a-L{\'o}pez, Federico and Dopazo, Joaqu{\'\i}n and Conesa, Ana},
231 journal={Bioinformatics},
232 volume={27},
233 number={1},
234 pages={137--139},
235 year={2010},
236 publisher={Oxford University Press}
237 }</citation>
238 </citations>
239 </tool>