7
|
1 <tool name="TermMapperTool" id="TermMapperTool1" version="0.0.2">
|
|
2 <description>use cross-reference lookup tables to annotate results</description>
|
|
3 <!--
|
|
4 For remote debugging start you listener on port 8000 and use the following as command interpreter:
|
|
5 java -jar -Xdebug -Xrunjdwp:transport=dt_socket,address=D0100564.wurnet.nl:8000
|
|
6 -->
|
|
7 <!-- similar to "join two datasets" tool http://galaxy.wur.nl/galaxy_production/root?tool_id=join1
|
|
8 but this one is probably having more powerful features like supporting multiple ';' codes in key fields
|
|
9 and the feature in termColName(s) supporting direct hierarchy like annotation -->
|
|
10 <command interpreter="java -jar ">
|
|
11 TermMapperTool.jar
|
|
12 -inputFileName $inputFileName
|
|
13 -inputIdColumnName "$inputIdColumnName"
|
|
14 #if $inputIdCol.inputIdHasPrefix == True
|
|
15 -inputIdPrefix "$inputIdCol.inputIdPrefix"
|
|
16 #end if
|
|
17
|
|
18 -mappingFileName $mappingFileName
|
|
19 -mappingFileIdColName "$mappingFileIdColName"
|
|
20
|
|
21 #if $mappingIdCol.mappingIdHasPrefix == True
|
|
22 -mappingIdPrefix "$mappingIdCol.mappingIdPrefix"
|
|
23 #end if
|
|
24
|
|
25 -mappingFileTermColName "$mappingFileTermColName"
|
|
26
|
|
27 -outputFileName $outputFileName
|
|
28
|
|
29 #if $genObservations.genObservationsFile == True
|
|
30 -outputObservationsFileName $outputObservationsFileName
|
|
31 -quantifColumn "$genObservations.quantifColumn"
|
|
32 #end if
|
|
33
|
|
34 -mappedTermsColName $mappedTermsColName
|
8
|
35 -numberOfHeaderLines $numberOfHeaderLines
|
7
|
36
|
|
37 </command>
|
|
38
|
|
39 <inputs>
|
|
40
|
|
41 <param name="inputFileName" type="data" format="tabular,csv" label="Target file (TSV/CSV)" />
|
|
42
|
|
43 <param name="inputIdColumnName" type="text" size="50" value="" label="ID column name"
|
|
44 help="Name of the column containing the identification codes (in the given input file)"/>
|
|
45
|
|
46 <conditional name="inputIdCol">
|
|
47 <param name="inputIdHasPrefix" type="boolean" truevalue="Yes" falsevalue="No" checked="false"
|
|
48 label="ID values have a prefix"/>
|
|
49 <when value="Yes">
|
|
50 <param name="inputIdPrefix" type="text" size="50" value="" label="Prefix in ID column"
|
|
51 help="Fill in if any prefix is found in the ID column values (e.g. in some
|
|
52 files the value is preceded by a fixed value like for example 'lipidmaps:LMFA00000007' instead of just 'LMFA00000007' - in this
|
|
53 example one would fill in 'lipidmaps:' as prefix)"/>
|
|
54 </when>
|
|
55 <when value="No">
|
|
56 </when>
|
|
57 </conditional>
|
|
58
|
|
59 <!-- =================== cross-reference part ============== -->
|
|
60 <param name="mappingFileName" type="data" format="tabular,csv" label="Lookup table (TSV/CSV)" help="Simple mapping file between the coding scheme used to another scheme"/>
|
8
|
61 <param name="numberOfHeaderLines" type="select" label="Number of header lines in mapping file"
|
|
62 help="If this is '0', use the column numbers starting from 1 as the 'names' in the paramters below.">
|
|
63 <option value="0" >0</option>
|
|
64 <option value="1" selected="true">1</option>
|
|
65 </param>
|
|
66
|
|
67
|
7
|
68 <param name="mappingFileIdColName" type="text" size="50" value="" label="ID column name (in lookup table)" help="Name of the ID column for the lookup"/>
|
|
69
|
|
70 <conditional name="mappingIdCol">
|
|
71 <param name="mappingIdHasPrefix" type="boolean" truevalue="Yes" falsevalue="No" checked="false"
|
|
72 label="ID values have a prefix"/>
|
|
73 <when value="Yes">
|
|
74 <param name="mappingIdPrefix" type="text" size="50" value="" label="Prefix in ID column"
|
|
75 help="Fill in if any prefix is found in the ID column values (e.g. in some
|
|
76 files the value is preceded by a fixed value like for example 'lipidmaps:LMFA00000007' instead of just 'LMFA00000007' - in this
|
|
77 example one would fill in 'lipidmaps:' as prefix)"/>
|
|
78 </when>
|
|
79 <when value="No">
|
|
80 </when>
|
|
81 </conditional>
|
|
82
|
|
83 <param name="mappingFileTermColName" type="text" size="50" value="" label="Term column name(s) or number(s)"
|
|
84 help="Name(s) or number(s) of the column(s) containing the term(s) in the lookup table (and which will be transfered to the target file based on ID match in 'ID column name').
|
|
85 For using multiple term column names, set the names separated by comma (,).
|
|
86 If multiple columns are specified, the algorithm will look for an annotation in the first one, if none
|
|
87 found it will try the second one, and so forth. "/>
|
|
88
|
|
89
|
|
90 <param name="mappedTermsColName" type="text" size="50" value="Mapped terms" label="Name to give to the new column:"
|
|
91 help="Name to give to the new column that will be added to the target file. This new column is the one
|
|
92 that will contain the respectively mapped terms."/>
|
|
93
|
|
94 <conditional name="genObservations">
|
|
95 <param name="genObservationsFile" type="boolean" truevalue="Yes" falsevalue="No" checked="false"
|
|
96 label="Generate also observations file"/>
|
|
97 <when value="Yes">
|
|
98 <param name="quantifColumn" type="text" size="50" value=""
|
|
99 label="(Optional) Values column name"
|
|
100 help="Name of the column containing the quantification values (in the given input file)"/>
|
|
101 </when>
|
|
102 <when value="No">
|
|
103 </when>
|
|
104 </conditional>
|
|
105
|
8
|
106
|
|
107
|
|
108
|
|
109
|
7
|
110 </inputs>
|
|
111 <outputs>
|
|
112 #if isinstance( $inputFileName.datatype, $__app__.datatypes_registry.get_datatype_by_extension('tabular').__class__):
|
|
113 <data name="outputFileName" format="tabular" label="${tool.name} on ${on_string}: annotated file " ></data>
|
|
114 #else:
|
|
115 <data name="outputFileName" format="csv" label="${tool.name} on ${on_string}: annotated file " ></data>
|
|
116 #end if
|
|
117
|
|
118 <data name="outputObservationsFileName" format="tabular" label="${tool.name} on ${on_string}: term observations file (TSV)">
|
|
119 <!-- If the expression is false, the file is not created -->
|
|
120 <filter>( genObservations.genObservationsFile == True )</filter>
|
|
121 </data>
|
|
122 </outputs>
|
|
123 <tests>
|
|
124 <!-- find out how to use -->
|
|
125 <test>
|
|
126 </test>
|
|
127 </tests>
|
|
128 <help>
|
|
129
|
|
130 .. class:: infomark
|
|
131
|
|
132
|
|
133 This tool is responsible for annotating the given target file
|
|
134 with the terms given in a lookup table. This lookup table maps the items found in the target file
|
|
135 (e.g. protein identifications coded in common protein coding formats such as UniProt )
|
|
136 to their respective terms (e.g. GO terms). It enables users to use the cross-reference
|
|
137 information now available from different repositories (like uniprot and KEGG - see for example
|
|
138 http://www.uniprot.org/taxonomy/ or http://www.genome.jp/linkdb/ )
|
|
139 to map their data to other useful coding schemes or to ontologies and functional annotations.
|
|
140
|
|
141 .. class:: infomark
|
|
142
|
|
143 **NB:** Currently the tool will do "smart parsing" of hierarchy based fields in the target file ID column.
|
|
144 This means that if the colum contains a ".", the trailing part of the ID after the "." is ignored if the full
|
|
145 ID does not get a match in the lookup table while the part before the "." does.
|
|
146
|
|
147 .. class:: infomark
|
|
148
|
|
149 Examples of usage:
|
|
150
|
|
151 annotate protein identifications with Gene Ontology[GO] terms
|
|
152
|
|
153 annotate metabolite CAS identifications with chebi codes
|
|
154
|
|
155 add KEGG gene codes to a file containing UNIPROT codes
|
|
156
|
|
157 add KEGG compound codes to a file containing chebi codes
|
|
158
|
|
159 etc
|
|
160
|
|
161 As an example for transcripts and proteins, users can check http://www.uniprot.org/taxonomy/ to
|
|
162 see if their organism has been mapped to GO terms by Uniprot. For example the link
|
|
163 http://www.uniprot.org/uniprot/?query=taxonomy:2850 will show the Uniprot repository and cross-references
|
|
164 for the taxonomy 2850.
|
|
165 When the organism being studied is not available, then other strategies
|
|
166 could be tried (like Blast2GO for example).
|
|
167
|
|
168 Despite the specific examples above, this class is generic and can be used to map any
|
|
169 values to new terms according to a given lookup table.
|
|
170
|
|
171 .. class:: infomark
|
|
172
|
|
173 *Omics cross-reference resources on the web:*
|
|
174
|
|
175 LinkDB: http://www.genome.jp/linkdb/
|
|
176
|
|
177 *Ready to use metabolomics links:*
|
|
178
|
|
179 http://rest.genome.jp/link/compound/chebi
|
|
180
|
|
181 http://rest.genome.jp/link/compound/lipidmaps
|
|
182
|
|
183 http://rest.genome.jp/link/compound/lipidbank
|
|
184
|
|
185 http://rest.genome.jp/link/compound/hmdb
|
|
186
|
|
187
|
|
188 *Ready to use proteomics links:*
|
|
189
|
|
190 http://rest.genome.jp/link/uniprot/pti (Phaeodactylum Tri.)
|
|
191
|
|
192 http://rest.genome.jp/link/uniprot/hsa (Homo Sapiens)
|
|
193
|
|
194 (for organism code list see: )
|
|
195
|
|
196
|
|
197 Uniprot to GO
|
|
198
|
|
199 http://www.uniprot.org/taxonomy/
|
|
200
|
|
201
|
|
202 -----
|
|
203
|
|
204 **Output**
|
|
205
|
|
206 This method will read in the given input file and for each line it will add a new column
|
|
207 containing the terms found for the ID in that line. So the output file is the same as the
|
|
208 input file + extra terms column (separated by ; ).
|
|
209
|
|
210 -----
|
|
211
|
|
212 **Link to ontology viewer**
|
|
213
|
|
214 A second summarized "terms observations" file can also be generated.
|
|
215 In case the terms are ontology terms, this file can be used for visualizing the results
|
|
216 in the ontology viewer "OntologyAndObservationsViewer".
|
|
217
|
|
218 </help>
|
|
219 </tool>
|