annotate add_kegg_anno_info.xml @ 2:2c218a253d56 draft default tip

"planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
author malex
date Thu, 29 Jul 2021 20:48:10 +0000
parents ec9ee8edb84d
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
1 <tool id="secimtools_add_kegg_anno_info" name="Link Name to KEGGID" version="@WRAPPER_VERSION@">
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
2 <description></description>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
3 <macros>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
4 <import>macros.xml</import>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
5 </macros>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
6 <expand macro="requirements" />
2
2c218a253d56 "planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
malex
parents: 1
diff changeset
7 <stdio>
2c218a253d56 "planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
malex
parents: 1
diff changeset
8 <exit_code range="1" level="fatal" description="Repeated Unique IDs"/>
2c218a253d56 "planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
malex
parents: 1
diff changeset
9 </stdio>
1
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
10 <command detect_errors="exit_code"><![CDATA[
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
11 add_kegg_anno_info.py
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
12 -s=$species
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
13 #if $dataSets.whichDataSet == "geneDataset":
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
14 -ga=$dataSets.geneAnnot
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
15 -gid=$dataSets.geneUniqId
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
16 -gn=$dataSets.geneName
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
17 #end if
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
18 #if $dataSets.whichDataSet == "metDataset":
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
19 -ma=$dataSets.metAnnot
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
20 -mid=$dataSets.metUniqId
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
21 -mn=$dataSets.metName
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
22 #end if
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
23 #if $dataSets.whichDataSet == "geneDataset,metDataset":
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
24 -ga=$dataSets.geneAnnot
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
25 -gid=$dataSets.geneUniqId
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
26 -gn=$dataSets.geneName
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
27 -ma=$dataSets.metAnnot
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
28 -mid=$dataSets.metUniqId
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
29 -mn=$dataSets.metName
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
30 #end if
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
31 -go=$geneOutput
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
32 -mo=$metOutput
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
33 ]]></command>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
34 <inputs>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
35 <param name="species" type="select" label="Select Species from the list" >
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
36 <option value="hsa">Homo sapiens</option>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
37 <option value="mmu">Mus musculus</option>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
38 <option value="rno">Rattus norvegicus</option>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
39 <option value="dme">Drosophila melanogaster</option>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
40 <option value="ath">Arabidopsis thaliana</option>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
41 <option value="sce">Saccharomyces cerevisiae</option>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
42 <option value="eco">Escherichia coli</option>
2
2c218a253d56 "planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
malex
parents: 1
diff changeset
43 <option value="cel">Caenorhabditis elegans</option>
1
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
44 </param>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
45 <conditional name="dataSets">
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
46 <param name="whichDataSet" type="select" display="radio" label="Select Annotation Dataset(s)">
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
47 <option value="geneDataset,metDataset" selected="true">Gene Expression + Metabolomic Annotation Datasets</option>
2
2c218a253d56 "planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
malex
parents: 1
diff changeset
48 <option value="geneDataset">Gene Expression Annotation Dataset</option>
1
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
49 <option value="metDataset">Metabolomic Annotation Dataset</option>
2
2c218a253d56 "planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
malex
parents: 1
diff changeset
50 <validator type="no_options" message="You must select at least one option." />
1
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
51 </param>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
52 <when value="geneDataset">
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
53 <param name="geneAnnot" type="data" format="tabular" label="Select the Gene Expression Annotation dataset from your History"/>
2
2c218a253d56 "planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
malex
parents: 1
diff changeset
54 <param name="geneUniqId" type="text" size="30" value="" label="Gene Unique FeatureID" help="Name of the column in your Gene Expression Annotation dataset that contains the unique FeatureIDs."/>
2c218a253d56 "planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
malex
parents: 1
diff changeset
55 <param name="geneName" type="text" size="30" value="" label="Gene Symbol" help="Name of the column in your Gene Expression Annotation dataset that contains Gene Symbols to use for linking to KEGGIDs."/>
1
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
56 </when>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
57 <when value="metDataset">
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
58 <param name="metAnnot" type="data" format="tabular" label="Select the Metabolomic Annotation dataset from your History" />
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
59 <param name="metUniqId" type="text" size="30" value="" label="Metabolite Unique FeatureID" help="Name of the column in your Metabolomic Annotation dataset that contains the unique FeatureIDs."/>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
60 <param name="metName" type="text" size="30" value="" label="Metabolite Names" help="Name of the column in your Metabolomic Annotation dataset that has metabolite names to use for linking to KEGGIDs."/>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
61 </when>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
62 <when value="geneDataset,metDataset">
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
63 <param name="geneAnnot" type="data" format="tabular" label="Select the Gene Expression Annotation dataset from your History"/>
2
2c218a253d56 "planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
malex
parents: 1
diff changeset
64 <param name="geneUniqId" type="text" size="30" value="" label="Gene Unique FeatureID" help="Name of the column in your Gene Expression Annotation dataset that contains the unique FeatureIDs."/>
2c218a253d56 "planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
malex
parents: 1
diff changeset
65 <param name="geneName" type="text" size="30" value="" label="Gene Symbol" help="Name of the Column in your Gene Expression Annotation dataset that contains Gene Symbols to use for linking to KEGGIDs."/>
1
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
66 <param name="metAnnot" type="data" format="tabular" label="Select the Metabolomic Annotation dataset from your History" />
2
2c218a253d56 "planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
malex
parents: 1
diff changeset
67 <param name="metUniqId" type="text" size="30" value="" label="Metabolite Unique FeatureID" help="Name of the Column in your Metabolomic Annotation dataset that contains the unique FeatureIDs."/>
1
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
68 <param name="metName" type="text" size="30" value="" label="Metabolite Names" help="Name of the Column in your Metabolomic Annotation dataset that has metabolite names to use for linking to KEGGIDs."/>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
69 </when>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
70 </conditional>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
71 </inputs>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
72 <outputs>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
73 <data format="tabular" name="geneOutput" label="${tool.name} on ${on_string}: Gene to KEGGID link">
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
74 <filter>(dataSets['whichDataSet'] == 'geneDataset') or (dataSets['whichDataSet'] == 'geneDataset,metDataset')</filter>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
75 </data>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
76 <data format="tabular" name="metOutput" label="${tool.name} on ${on_string}: Metabolite to KEGGID link">
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
77 <filter>(dataSets['whichDataSet'] == 'metDataset') or (dataSets['whichDataSet'] == 'geneDataset,metDataset')</filter>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
78 </data>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
79 </outputs>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
80 <tests>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
81 <test>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
82 <param name="species" value="rno"/>
2
2c218a253d56 "planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
malex
parents: 1
diff changeset
83 <param name="geneAnnot" value="ensembl2symbol_annotation.tsv"/>
1
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
84 <param name="geneUniqId" value="UniqueID"/>
2
2c218a253d56 "planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
malex
parents: 1
diff changeset
85 <param name="geneName" value="GeneName"/>
2c218a253d56 "planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
malex
parents: 1
diff changeset
86 <param name="metAnnot" value="metabolite_annotation.tsv"/>
1
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
87 <param name="metUniqId" value="UniqueID"/>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
88 <param name="metName" value="MetName"/>
2
2c218a253d56 "planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
malex
parents: 1
diff changeset
89 <output name="geneOutput" value="gene_to_keggId_link.tsv" compare="diff" lines_diff="100000"/>
2c218a253d56 "planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
malex
parents: 1
diff changeset
90 <output name="metOutput" value="metabolite_to_keggId_link.tsv" compare="diff" lines_diff="10000"/>
1
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
91 </test>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
92 </tests>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
93 <help><![CDATA[
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
94
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
95 **Tool Description**
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
96
2
2c218a253d56 "planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
malex
parents: 1
diff changeset
97 This tool takes an annotation dataset containing metabolite compound names or gene symbols and links them to identifiers in KEGG (KEGGIDs)
2c218a253d56 "planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
malex
parents: 1
diff changeset
98 creating either:
2c218a253d56 "planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
malex
parents: 1
diff changeset
99 (a) a Gene to KEGGID Link or
2c218a253d56 "planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
malex
parents: 1
diff changeset
100 (b) a Metabolite to KEGGID Link dataset.
2c218a253d56 "planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
malex
parents: 1
diff changeset
101
2c218a253d56 "planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
malex
parents: 1
diff changeset
102 For gene expression data, the tool is designed to
2c218a253d56 "planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
malex
parents: 1
diff changeset
103 take the output from the 'Map ENSEMBLIDs to Gene Symbols' tool as input. If your input dataset
2c218a253d56 "planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
malex
parents: 1
diff changeset
104 contains a Selected column, the tool will
2c218a253d56 "planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
malex
parents: 1
diff changeset
105 link GeneSymbols to KEGGIDs where Selected = 'Yes'. Input Files without a Selected column must
2c218a253d56 "planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
malex
parents: 1
diff changeset
106 have a column containing unique FeatureIDs.
2c218a253d56 "planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
malex
parents: 1
diff changeset
107 This tool takes an annotation dataset containing unique FeatureIDs, ENSEMBLIDs (for gene
2c218a253d56 "planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
malex
parents: 1
diff changeset
108 expression data) and GeneSymbol/MetaboliteName
2c218a253d56 "planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
malex
parents: 1
diff changeset
109 and adds the following columns:
1
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
110
2
2c218a253d56 "planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
malex
parents: 1
diff changeset
111 1) Name_in_KEGG, the name found in KEGG
2c218a253d56 "planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
malex
parents: 1
diff changeset
112 2) Matched, a column indicating whether a match was found in KEGG,
2c218a253d56 "planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
malex
parents: 1
diff changeset
113 3) KEGGID, the KEGG identifier for the Match
2c218a253d56 "planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
malex
parents: 1
diff changeset
114 4) Score, a similarity score representing match similarity (calculated using the python internal function SequenceMatcher from difflib (check)
2c218a253d56 "planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
malex
parents: 1
diff changeset
115 5) a Tie column to indicate if a gene symbol or metabolite name matched more than one KEGGID.
2c218a253d56 "planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
malex
parents: 1
diff changeset
116
2c218a253d56 "planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
malex
parents: 1
diff changeset
117 User-specified metabolite names are linked to KEGGIDs by identifying the best match using the following procedure: Common metabolite prefixes
2c218a253d56 "planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
malex
parents: 1
diff changeset
118 are removed (cis-, trans-, d- , l- , (s)-, alpha-, beta-, alpha, beta, alpha-d-, beta-d-, alpha-l-, beta-l-, l-beta-, l-alpha-, d-beta-, d-alpha-).
2c218a253d56 "planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
malex
parents: 1
diff changeset
119 If the metabolite name given is an acid, then the name is modified to the conjugate base by replacing “ic acid”, “icacid” or “ic_acid” with “ate”.
2c218a253d56 "planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
malex
parents: 1
diff changeset
120 If amino acids are given in 1-letter or 3-letter abbreviations, names are modified to the full amino acid name. The following commonly used lipid
2c218a253d56 "planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
malex
parents: 1
diff changeset
121 abbreviations are modified to reflect the full names (SM = sphingomyelin, lysopc = lysophosphatidylcholine, PC = phosphatidylcholine,
2c218a253d56 "planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
malex
parents: 1
diff changeset
122 PE = phosphatidylethanolamine and LysoPE = lysophosphatidylethanolamine). Similarly, abbreviations for other commonly assayed metabolites are
2c218a253d56 "planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
malex
parents: 1
diff changeset
123 modified to reflect the full names (cit = citrate, orn = ornithine, thyr = thyroxine and boc = butoxycarbonyl). The code allows the addition of
1
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
124 more synonyms. The user-specified metabolite names are retained in the output dataset for comparisons with the KEGG database.
2
2c218a253d56 "planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
malex
parents: 1
diff changeset
125
2c218a253d56 "planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
malex
parents: 1
diff changeset
126 Each parsed metabolite name is compared to metabolite names in KEGG. The best match in KEGG based on similarity score is returned. The similarity
2c218a253d56 "planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
malex
parents: 1
diff changeset
127 score (Score column) is based on the longest contiguous matching subsequence that does not contain 'junk' elements, where 'junk' elements are defined
1
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
128 as duplicates making up more than 1% of a sequence with minimum length of 200 (python SequenceMatcher class from difflib)
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
129
2
2c218a253d56 "planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
malex
parents: 1
diff changeset
130 Selected = Yes for the match with the highest similarity score.
1
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
131
2
2c218a253d56 "planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
malex
parents: 1
diff changeset
132 For metabolite names where the best match is tied with at least one other compound in KEGG, all matches are returned. A tie is determined as follows:
1
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
133 if the Score is greater than 95% for 2 or more matches in the metabolite name then:
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
134 1) the Tie column = 'Yes' and a warning message will appear
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
135 2) the Selected column is sorted alphabetically on the Name_in_KEGG column. Note that the user-specified FeatureID and MetaboliteName may not be unique in the resulting output dataset.
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
136
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
137 --------------------------------------------------------------------------------
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
138
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
139 **INPUT**
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
140
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
141 **Annotation File**
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
142
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
143 +-------------+--------------+-----+
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
144 | FeatureID | Name | ... |
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
145 +=============+==============+=====+
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
146 | FeatureID_1 | one | ... |
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
147 +-------------+--------------+-----+
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
148 | FeatureID_2 | two | ... |
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
149 +-------------+--------------+-----+
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
150 | FeatureID_3 | three | ... |
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
151 +-------------+--------------+-----+
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
152 | FeatureID_4 | four | ... |
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
153 +-------------+--------------+-----+
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
154 | ... | ... | ... |
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
155 +-------------+--------------+-----+
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
156
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
157 **NOTE:** This dataset must contain at least two columns, a column of FeatureIDs and a column containing names (e.g. gene symbol or compound names) to use for linking to KEGGIDs. Other columns may be present in the dataset. The user can use a Gene Expression Annotation dataset, a Metabolomic Annotation dataset or both.
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
158
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
159 **Unique FeatureID**
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
160
2
2c218a253d56 "planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
malex
parents: 1
diff changeset
161 Name of the column in your gene expression or metabolomic Annotation dataset that contains the Unique FeatureIDs.
1
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
162
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
163 **Gene Symbol or Metabolite Names**
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
164
2
2c218a253d56 "planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
malex
parents: 1
diff changeset
165 Name of the column in your gene expression or metabolomic Annotation dataset with the names to use for matching to KEGGIDs.
1
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
166
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
167 --------------------------------------------------------------------------------
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
168
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
169 **OUTPUT**
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
170
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
171 For each input Annotation file, a TSV file containing the following columns is generated:
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
172
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
173 (1) **unique FeatureID:** column from the Annotation dataset containing the unique FeatureIDs.
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
174 (2) **Name:** column from Annotation dataset used for matching in KEGG.
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
175 (3) **Feature_Type:** column indicating whether matching was for metabolites or genes.
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
176 (4) **Matched:** column indicating whether a match in KEGG was found. Yes/No
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
177 (5) **Name_in_KEGG:** column containing the KEGG name for the match.
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
178 (6) **KEGGID:** column containing the KEGG identifier for the match.
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
179 (7) **Similarity:** value indicating the similarity between the given feature and the match in KEGG. Ranges from 0 to 1.
2
2c218a253d56 "planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
malex
parents: 1
diff changeset
180 (8) **Tie:** in cases where multiple matches are found for a given feature, Tie = yes if the similarity is greater than 95%.
1
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
181 (9) **Selected:** for features with multiple matches and different similarity scores, the 'Selected' column = yes for the match with the highest similarity score. For features with multiple matches and the same similarity score, the 'Selected' column = yes based on the alphabetical order of the returned match.
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
182
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
183
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
184 **Example Metabolite to KEGGID Link Table**
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
185
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
186 +-------------+------------+--------------+---------+--------------+----------+------------+-----+----------+
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
187 | FeatureID | Name | Feature_Type | Matched | Name_in_KEGG | KEGG_ID | Similarity | Tie | Selected |
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
188 +=============+============+==============+=========+==============+==========+============+=====+==========+
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
189 | FeatureID_1 | one | Metabolite | Yes | one* | cpd:... | 1.0 | No | Yes |
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
190 +-------------+------------+--------------+---------+--------------+----------+------------+-----+----------+
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
191 | FeatureID_2 | two | Metabolite | Yes | two* | cpd:... | 1.0 | No | Yes |
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
192 +-------------+------------+--------------+---------+--------------+----------+------------+-----+----------+
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
193 | FeatureID_3 | two | Metabolite | Yes | three* | cpd:... | 0.87 | No | No |
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
194 +-------------+------------+--------------+---------+--------------+----------+------------+-----+----------+
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
195 | FeatureID_4 | four | Metabolite | No | NA | NA | NA | NA | NA |
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
196 +-------------+------------+--------------+---------+--------------+----------+------------+-----+----------+
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
197 | ... | ... | ... | ... | ... | ... | ... | ... | ... |
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
198 +-------------+------------+--------------+---------+--------------+----------+------------+-----+----------+
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
199
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
200 **NOTE:** Warning messages appear in cases of a Tie (greater than 95% similarity). Please check these results carefully.
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
201
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
202 ]]>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
203 </help>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
204 <citations>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
205 <citation type="bibtex">@ARTICLE{Kirpich17secimtools,
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
206 author = {Alexander S. Kirpich, Miguel Ibarra, Oleksandr Moskalenko, Justin M. Fear, Joseph Gerken, Xinlei Mi, Ali Ashrafi, Alison M. Morse, Lauren M. McIntyre},
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
207 title = {SECIMTools: A suite of Metabolomics Data Analysis Tools},
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
208 journal = {BMC Bioinformatics},
2
2c218a253d56 "planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
malex
parents: 1
diff changeset
209 year = {2018}
1
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
210 }</citation>
2
2c218a253d56 "planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
malex
parents: 1
diff changeset
211 <citation type="bibtex">@article{Mor2021GaitGM,
2c218a253d56 "planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
malex
parents: 1
diff changeset
212 title={GAIT-GM integrative cross-omics analyses reveal cholinergic defects in a C. elegans model of Parkinson's disease},
2c218a253d56 "planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
malex
parents: 1
diff changeset
213 author={Mor, DE and Huertas, F and Morse, AM and Kaletsky, R and Murphy, CT and Kalia, V and Miller, GW and Moskalenko, O and Conesa, A and McIntyre, LM},
2c218a253d56 "planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
malex
parents: 1
diff changeset
214 journal={BMC Genomics},
2c218a253d56 "planemo upload for repository https://github.com/secimTools/gait-gm/tree/main/galaxy commit 758394addb95b09e794132a23a1f7e95ba39df0b"
malex
parents: 1
diff changeset
215 year={submitted},
1
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
216 }</citation>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
217 </citations>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
218 </tool>