annotate add_kegg_anno_info.xml @ 1:ec9ee8edb84d draft

Initial upload of 21.6.10 release.
author malex
date Fri, 18 Jun 2021 20:23:19 +0000
parents
children 2c218a253d56
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
1 <tool id="secimtools_add_kegg_anno_info" name="Link Name to KEGGID" version="@WRAPPER_VERSION@">
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
2 <description></description>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
3 <macros>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
4 <import>macros.xml</import>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
5 </macros>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
6 <expand macro="requirements" />
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
7 <stdio>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
8 <exit_code range="1" level="fatal" description="Repeated Unique IDs"/>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
9 </stdio>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
10 <command detect_errors="exit_code"><![CDATA[
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
11 add_kegg_anno_info.py
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
12 -s=$species
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
13 #if $dataSets.whichDataSet == "geneDataset":
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
14 -ga=$dataSets.geneAnnot
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
15 -gid=$dataSets.geneUniqId
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
16 -gn=$dataSets.geneName
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
17 #end if
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
18 #if $dataSets.whichDataSet == "metDataset":
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
19 -ma=$dataSets.metAnnot
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
20 -mid=$dataSets.metUniqId
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
21 -mn=$dataSets.metName
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
22 #end if
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
23 #if $dataSets.whichDataSet == "geneDataset,metDataset":
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
24 -ga=$dataSets.geneAnnot
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
25 -gid=$dataSets.geneUniqId
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
26 -gn=$dataSets.geneName
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
27 -ma=$dataSets.metAnnot
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
28 -mid=$dataSets.metUniqId
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
29 -mn=$dataSets.metName
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
30 #end if
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
31 -go=$geneOutput
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
32 -mo=$metOutput
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
33 ]]></command>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
34 <inputs>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
35 <param name="species" type="select" label="Select Species from the list" >
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
36 <option value="hsa">Homo sapiens</option>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
37 <option value="mmu">Mus musculus</option>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
38 <option value="rno">Rattus norvegicus</option>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
39 <option value="dme">Drosophila melanogaster</option>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
40 <option value="ath">Arabidopsis thaliana</option>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
41 <option value="sce">Saccharomyces cerevisiae</option>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
42 <option value="eco">Escherichia coli</option>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
43 </param>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
44 <conditional name="dataSets">
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
45 <param name="whichDataSet" type="select" display="radio" label="Select Annotation Dataset(s)">
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
46 <option value="geneDataset,metDataset" selected="true">Gene Expression + Metabolomic Annotation Datasets</option>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
47 <option value="geneDataset">Gene Expression Annotation Dataset</option>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
48 <option value="metDataset">Metabolomic Annotation Dataset</option>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
49 <validator type="no_options" message="You must select at least one option." />
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
50 </param>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
51 <when value="geneDataset">
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
52 <param name="geneAnnot" type="data" format="tabular" label="Select the Gene Expression Annotation dataset from your History"/>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
53 <param name="geneUniqId" type="text" size="30" value="" label="Gene Unique FeatureID" help="Name of the column in your Gene Expression Annotation dataset that contains the unique FeatureIDs."/>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
54 <param name="geneName" type="text" size="30" value="" label="Gene Symbol" help="Name of the column in your Gene Expression Annotation dataset that contains Gene Symbols to use for linking to KEGGIDs."/>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
55 </when>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
56 <when value="metDataset">
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
57 <param name="metAnnot" type="data" format="tabular" label="Select the Metabolomic Annotation dataset from your History" />
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
58 <param name="metUniqId" type="text" size="30" value="" label="Metabolite Unique FeatureID" help="Name of the column in your Metabolomic Annotation dataset that contains the unique FeatureIDs."/>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
59 <param name="metName" type="text" size="30" value="" label="Metabolite Names" help="Name of the column in your Metabolomic Annotation dataset that has metabolite names to use for linking to KEGGIDs."/>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
60 </when>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
61 <when value="geneDataset,metDataset">
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
62 <param name="geneAnnot" type="data" format="tabular" label="Select the Gene Expression Annotation dataset from your History"/>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
63 <param name="geneUniqId" type="text" size="30" value="" label="Gene Unique FeatureID" help="Name of the column in your Gene Expression Annotation dataset that contains the unique FeatureIDs."/>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
64 <param name="geneName" type="text" size="30" value="" label="Gene Symbol" help="Name of the Column in your Gene Expression Annotation dataset that contains Gene Symbols to use for linking to KEGGIDs."/>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
65 <param name="metAnnot" type="data" format="tabular" label="Select the Metabolomic Annotation dataset from your History" />
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
66 <param name="metUniqId" type="text" size="30" value="" label="Metabolite Unique FeatureID" help="Name of the Column in your Metabolomic Annotation dataset that contains the unique FeatureIDs."/>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
67 <param name="metName" type="text" size="30" value="" label="Metabolite Names" help="Name of the Column in your Metabolomic Annotation dataset that has metabolite names to use for linking to KEGGIDs."/>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
68 </when>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
69 </conditional>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
70 </inputs>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
71 <outputs>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
72 <data format="tabular" name="geneOutput" label="${tool.name} on ${on_string}: Gene to KEGGID link">
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
73 <filter>(dataSets['whichDataSet'] == 'geneDataset') or (dataSets['whichDataSet'] == 'geneDataset,metDataset')</filter>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
74 </data>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
75 <data format="tabular" name="metOutput" label="${tool.name} on ${on_string}: Metabolite to KEGGID link">
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
76 <filter>(dataSets['whichDataSet'] == 'metDataset') or (dataSets['whichDataSet'] == 'geneDataset,metDataset')</filter>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
77 </data>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
78 </outputs>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
79 <tests>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
80 <test>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
81 <param name="species" value="rno"/>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
82 <param name="geneAnnot" value="ensembl2symbol_annotation_file_01fhl.tsv"/>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
83 <param name="geneUniqId" value="UniqueID"/>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
84 <param name="geneName" value="GeneSymbol"/>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
85 <param name="metAnnot" value="metabolite_annotation_file_01fhl.tsv"/>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
86 <param name="metUniqId" value="UniqueID"/>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
87 <param name="metName" value="MetName"/>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
88 <param name="geneOutput" value="gene_link_kegg_annotation_file_01fhl.tsv"/>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
89 <param name="metOutput" value="metabolite_link_kegg_annotation_file_01fhl.tsv"/>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
90 </test>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
91 </tests>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
92 <help><![CDATA[
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
93
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
94 **Tool Description**
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
95
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
96 This tool takes an annotation dataset containing metabolite compound names or gene symbols and links them to identifiers in KEGG (KEGGIDs)
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
97 creating either a (a) Gene to KEGGID Link or a (b) Metabolite to KEGGID Link dataset. For gene expression data, the tool is designed to
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
98 take the output from the 'Map ENSEMBLIDs to Gene Symbols' tool as input. If your input dataset contains a Selected column, the tool will
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
99 link GeneSymbols to KEGGIDs where Selected = 'Yes'. Input Files without a Selected column must have a column containing unique FeatureIDs.
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
100 This tool takes an annotation dataset containing unique FeatureIDs, ENSEMBLIDs (for gene expression data) and GeneSymbol/MetaboliteName
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
101 and adds the following columns: 1) Name_in_KEGG, the name found in KEGG, 2) Matched, a column indicating whether a match was found in KEGG,
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
102 3) KEGGID, the KEGG identifier for the Match, 4) Score, a similarity score representing match similarity (caluclated using the python internal
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
103 function SequenceMatcher from difflib (check) and 5) a Tie column to indicate if a gene symbol or metabolite name matched more than one KEGGID.
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
104
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
105 User-specified metabolite names are linked to KEGGIDs by identifying the best match using the following procedure. Common metabolite prefixes
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
106 are removed (cis-, trans-, d- , l- , (s)-, alpha-, beta-, alpha, beta, alpha-d-, beta-d-, alpha-l-, beta-l-, l-beta-, l-alpha-, d-beta-, d-alpha-).
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
107 If the metabolite name given is an acid, then the name is modified to the conjugate base by replacing “ic acid”, “icacid” or “ic_acid” with “ate”.
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
108 If amino acids are given in 1-letter or 3-letter abbreviations, names are modified to the full amino acid name. The following commonly used lipid
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
109 abbreviations are modified to reflect the full names (SM = sphingomyelin, lysopc = lysophosphatidylcholine, PC = phosphatidylcholine,
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
110 PE = phosphatidylethanolamine and LysoPE = lysophosphatidylethanolamine). Similarly, abbreviations for other commonly assayed metabolites are
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
111 modified to reflect the full names (cit = citrate, orn = ornithine, thyr = thyroxine and boc = butoxycarbonyl). The code allows the addition of
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
112 more synonyms. The user-specified metabolite names are retained in the output dataset for comparisons with the KEGG database.
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
113
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
114 Each parsed metabolite name is compared to metabolite names in KEGG. The best match in KEGG based on similarity score is returned. The similarity
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
115 score (Score column) is based on the longest contiguous matching subsequence that does not contain 'junk' elements where 'junk' elements are defined
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
116 as duplicates making up more than 1% of a sequence with minimum length of 200 (python SequenceMatcher class from difflib)
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
117
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
118 Selected = Yes for the match with the highest similarity score.
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
119
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
120 For metabolite names where the best match is tied with at least one other compound in KEGG, all matches are returned. A tie is determined as follows:
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
121 if the Score is greater than 95% for 2 or more matches in the metabolite name then:
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
122 1) the Tie column = 'Yes' and a warning message will appear
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
123 2) the Selected column is sorted alphabetically on the Name_in_KEGG column. Note that the user-specified FeatureID and MetaboliteName may not be unique in the resulting output dataset.
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
124
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
125 --------------------------------------------------------------------------------
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
126
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
127 **INPUT**
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
128
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
129 **Annotation File**
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
130
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
131 +-------------+--------------+-----+
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
132 | FeatureID | Name | ... |
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
133 +=============+==============+=====+
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
134 | FeatureID_1 | one | ... |
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
135 +-------------+--------------+-----+
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
136 | FeatureID_2 | two | ... |
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
137 +-------------+--------------+-----+
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
138 | FeatureID_3 | three | ... |
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
139 +-------------+--------------+-----+
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
140 | FeatureID_4 | four | ... |
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
141 +-------------+--------------+-----+
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
142 | ... | ... | ... |
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
143 +-------------+--------------+-----+
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
144
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
145 **NOTE:** This dataset must contain at least two columns, a column of FeatureIDs and a column containing names (e.g. gene symbol or compound names) to use for linking to KEGGIDs. Other columns may be present in the dataset. The user can use a Gene Expression Annotation dataset, a Metabolomic Annotation dataset or both.
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
146
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
147 **Unique FeatureID**
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
148
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
149 Name of the column in your gene expression or metabolomic Annotation dataset that contains the Unique FeatureIDs.
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
150
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
151 **Gene Symbol or Metabolite Names**
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
152
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
153 Name of the column in your gene expression or metabolomic Annotation dataset with the names to use for matching to KEGGIDs.
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
154
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
155 --------------------------------------------------------------------------------
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
156
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
157 **OUTPUT**
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
158
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
159 For each input Annotation file, a TSV file containing the following columns is generated:
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
160
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
161 (1) **unique FeatureID:** column from the Annotation dataset containing the unique FeatureIDs.
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
162 (2) **Name:** column from Annotation dataset used for matching in KEGG.
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
163 (3) **Feature_Type:** column indicating whether matching was for metabolites or genes.
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
164 (4) **Matched:** column indicating whether a match in KEGG was found. Yes/No
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
165 (5) **Name_in_KEGG:** column containing the KEGG name for the match.
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
166 (6) **KEGGID:** column containing the KEGG identifier for the match.
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
167 (7) **Similarity:** value indicating the similarity between the given feature and the match in KEGG. Ranges from 0 to 1.
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
168 (8) **Tie:** in cases where multiple matches are found for a given feature, Tie = yes if the similarity is greater than 95%.
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
169 (9) **Selected:** for features with multiple matches and different similarity scores, the 'Selected' column = yes for the match with the highest similarity score. For features with multiple matches and the same similarity score, the 'Selected' column = yes based on the alphabetical order of the returned match.
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
170
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
171
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
172 **Example Metabolite to KEGGID Link Table**
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
173
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
174 +-------------+------------+--------------+---------+--------------+----------+------------+-----+----------+
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
175 | FeatureID | Name | Feature_Type | Matched | Name_in_KEGG | KEGG_ID | Similarity | Tie | Selected |
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
176 +=============+============+==============+=========+==============+==========+============+=====+==========+
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
177 | FeatureID_1 | one | Metabolite | Yes | one* | cpd:... | 1.0 | No | Yes |
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
178 +-------------+------------+--------------+---------+--------------+----------+------------+-----+----------+
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
179 | FeatureID_2 | two | Metabolite | Yes | two* | cpd:... | 1.0 | No | Yes |
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
180 +-------------+------------+--------------+---------+--------------+----------+------------+-----+----------+
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
181 | FeatureID_3 | two | Metabolite | Yes | three* | cpd:... | 0.87 | No | No |
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
182 +-------------+------------+--------------+---------+--------------+----------+------------+-----+----------+
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
183 | FeatureID_4 | four | Metabolite | No | NA | NA | NA | NA | NA |
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
184 +-------------+------------+--------------+---------+--------------+----------+------------+-----+----------+
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
185 | ... | ... | ... | ... | ... | ... | ... | ... | ... |
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
186 +-------------+------------+--------------+---------+--------------+----------+------------+-----+----------+
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
187
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
188 **NOTE:** Warning messages appear in cases of a Tie (greater than 95% similarity). Please check these results carefully.
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
189
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
190 ]]>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
191 </help>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
192 <citations>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
193 <citation type="bibtex">@ARTICLE{Kirpich17secimtools,
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
194 author = {Alexander S. Kirpich, Miguel Ibarra, Oleksandr Moskalenko, Justin M. Fear, Joseph Gerken, Xinlei Mi, Ali Ashrafi, Alison M. Morse, Lauren M. McIntyre},
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
195 title = {SECIMTools: A suite of Metabolomics Data Analysis Tools},
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
196 journal = {BMC Bioinformatics},
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
197 year = {in press}
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
198 }</citation>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
199 <citation type="bibtex">
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
200 @article{garcia2010paintomics,
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
201 title={Paintomics: a web based tool for the joint visualization of transcriptomics and metabolomics data},
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
202 author={Garc{\'\i}a-Alcalde, Fernando and Garc{\'\i}a-L{\'o}pez, Federico and Dopazo, Joaqu{\'\i}n and Conesa, Ana},
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
203 journal={Bioinformatics},
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
204 volume={27},
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
205 number={1},
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
206 pages={137--139},
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
207 year={2010},
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
208 publisher={Oxford University Press}
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
209 }</citation>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
210 <citation>@article{wu2014mygene,
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
211 title={MyGene. info: gene annotation query as a service},
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
212 author={Wu, Chunlei and Mark, Adam and Su, Andrew I},
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
213 journal={bioRxiv},
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
214 pages={009332},
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
215 year={2014},
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
216 publisher={Cold Spring Harbor Laboratory}
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
217 }</citation>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
218 </citations>
ec9ee8edb84d Initial upload of 21.6.10 release.
malex
parents:
diff changeset
219 </tool>