Mercurial > repos > galaxyp > reactome_pathwaymatcher
comparison pathwaymatcher.xml @ 0:f66af2b04a98 draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pathwaymatcher commit c12a99d3da62c83b779175b3c9022e7d5622053a
author | galaxyp |
---|---|
date | Wed, 20 Jun 2018 14:21:10 -0400 |
parents | |
children | 5d0c44bc354d |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:f66af2b04a98 |
---|---|
1 <tool id="reactome_pathwaymatcher" name="Pathway Matcher" version="@PATHWAYMATCHER_VERSION@.@TOOL_SUBVERSION@"> | |
2 <description> | |
3 PathwayMatcher is a software tool to search for pathways related to a list of proteins in Reactome. | |
4 </description> | |
5 <macros> | |
6 <token name="@PATHWAYMATCHER_VERSION@">1.8</token> | |
7 <token name="@TOOL_SUBVERSION@">0</token> | |
8 <xml name="input_fasta"> | |
9 <param format="fasta" name="input_database" type="data" label="Protein Database" | |
10 help="Select FASTA database from history"/> | |
11 </xml> | |
12 </macros> | |
13 <requirements> | |
14 <requirement type="package" version="@PATHWAYMATCHER_VERSION@">pathwaymatcher</requirement> | |
15 <requirement type="package" version="3.0">zip</requirement> | |
16 </requirements> | |
17 <stdio> | |
18 <exit_code range="1:" level="fatal" description="Job Failed" /> | |
19 <regex match="java.*Exception" level="fatal" description="Java Exception"/> | |
20 <regex match="Could not create the Java virtual machine" level="fatal" description="JVM Error"/> | |
21 </stdio> | |
22 <command> | |
23 <![CDATA[ | |
24 #from datetime import datetime | |
25 #import json | |
26 #import os | |
27 #set $exp_str = "Galaxy_Experiment_%s" % datetime.now().strftime("%Y%m%d%H%M%s") | |
28 #set $samp_str = "Sample_%s" % datetime.now().strftime("%Y%m%d%H%M%s") | |
29 #set $temp_stderr = "pathwaym_stderr" | |
30 #set $bin_dir = "bin" | |
31 | |
32 mkdir output; | |
33 cwd=`pwd`; | |
34 export HOME=\$cwd; | |
35 | |
36 ##################### | |
37 ## Pathway Matcher ## | |
38 ##################### | |
39 (pathwaymatcher src.main.java.no.uib.pap.pathwaymatcher.PathwayMatcher | |
40 | |
41 #for $i, $s in enumerate($input_types) | |
42 | |
43 ## GENETIC VARIANTS | |
44 | |
45 #if $s.input_type.input_type_selector == "rsid" | |
46 -t rsid -i '${s.input_type.input_rsid}' | |
47 #end if | |
48 | |
49 #if $s.input_type.input_type_selector == "chrbp" | |
50 -t chrbp -i '${s.input_type.input_chrbp}' | |
51 #end if | |
52 | |
53 #if $s.input_type.input_type_selector == "vcf" | |
54 -t vcf -i '${s.input_type.input_vcf}' | |
55 #end if | |
56 | |
57 ## GENES | |
58 | |
59 #if $s.input_type.input_type_selector == "gene" | |
60 -t gene -i '${s.input_type.input_gene}' | |
61 #end if | |
62 | |
63 ## PEPTIDES | |
64 | |
65 #if $s.input_type.input_type_selector == "peptide" | |
66 -t peptide -i '${s.input_type.input_peptide}' | |
67 -f '${s.input_type.input_database}' | |
68 -r '${s.input_type.ptm_range}' | |
69 #end if | |
70 | |
71 #if $s.input_type.input_type_selector == "modifiedpeptide" | |
72 -t modifiedpeptide -i '${s.input_type.input_modifiedpeptide}' | |
73 -f '${s.input_type.input_database}' | |
74 -r '${s.input_type.ptm_range}' | |
75 #end if | |
76 | |
77 ## PROTEINS | |
78 | |
79 #if $s.input_type.input_type_selector == "uniprot" | |
80 -t uniprot -i '${s.input_type.input_uniprot}' | |
81 #end if | |
82 | |
83 #if $s.input_type.input_type_selector == "ensembl" | |
84 -t ensembl -i '${s.input_type.input_ensembl}' | |
85 #end if | |
86 | |
87 ## PROTEOFORMS | |
88 | |
89 #if $s.input_type.input_type_selector == "proteoforms" | |
90 | |
91 #if $s.input_type.proteoform_match_criteria: | |
92 -t proteoform -m '${s.input_type.proteoform_match_criteria}' -i '${s.input_type.input_proteoforms}' | |
93 #else: | |
94 -t proteoform -i '${s.input_type.input_proteoforms}' | |
95 #end if | |
96 | |
97 #end if | |
98 | |
99 #end for | |
100 | |
101 ## OUTPUT OPTIONS | |
102 | |
103 #if $output_options.search_top_level_info: | |
104 -tlp | |
105 #end if | |
106 | |
107 #set $output_graphs_list = str($output_options.output_graphs).split(',') | |
108 | |
109 #if 'gg' in $output_graphs_list: | |
110 -gg | |
111 #end if | |
112 | |
113 #if 'gu' in $output_graphs_list: | |
114 -gu | |
115 #end if | |
116 | |
117 #if 'gp' in $output_graphs_list: | |
118 -gp | |
119 #end if | |
120 | |
121 2>> $temp_stderr); | |
122 | |
123 ## We create a folder to contain graphs files. | |
124 #if $output_options.output_graphs: | |
125 mkdir "graphs"; | |
126 #end if | |
127 | |
128 #if 'gg' in $output_graphs_list: | |
129 mv -t "graphs" "geneExternalEdges.tsv" "geneInternalEdges.tsv" "geneVertices.tsv" ; | |
130 #end if | |
131 | |
132 #if 'gu' in $output_graphs_list: | |
133 mv -t "graphs" "proteinExternalEdges.tsv" "proteinInternalEdges.tsv" "proteinVertices.tsv"; | |
134 #end if | |
135 | |
136 #if 'gp' in $output_graphs_list: | |
137 mv -t "graphs" "proteoformExternalEdges.tsv" "proteoformInternalEdges.tsv" "proteoformVertices.tsv"; | |
138 #end if | |
139 | |
140 exit_code_for_galaxy=\$?; | |
141 cat $temp_stderr 2>&1; | |
142 (exit \$exit_code_for_galaxy) | |
143 ]]> | |
144 </command> | |
145 <inputs> | |
146 | |
147 <repeat name="input_types" title="Input" min="1"> | |
148 <conditional name="input_type"> | |
149 <param name="input_type_selector" type="select" label="Input type" | |
150 help=""> | |
151 <option value="rsid">Genetic variants - SNP rsId list</option> | |
152 <option value="chrbp">Genetic variants - Chromosomes and base pairs</option> | |
153 <option value="vcf">Genetic variants - Variant Call Format Specification</option> | |
154 <option value="gene">Genes</option> | |
155 <option value="peptide">Peptides - Simple list</option> | |
156 <option value="modifiedpeptide">Peptides - Peptide List with PTM types and sites</option> | |
157 <option value="uniprot">Proteins - UniProt Accession list</option> | |
158 <option value="ensembl">Proteins - Ensembl identifier list</option> | |
159 <option value="proteoforms">Proteoforms</option> | |
160 </param> | |
161 | |
162 <!-- Genetic variants --> | |
163 <when value="rsid"> | |
164 <param format="txt" name="input_rsid" type="data" label="SNP rsId list" | |
165 help="The file contains one rsid identifier as defined in dbSNP[1] on each row. | |
166 The list must be ordered by chromosome and base pair (bp). The list must not have duplicates. | |
167 All rsids must appear in the human assembly GRCh37.p13. "/> | |
168 </when> | |
169 | |
170 <when value="chrbp"> | |
171 <param format="txt" name="input_chrbp" type="data" label="Chromosomes and base pairs" | |
172 help="Genetic variants can also be represented using the chromosome and the base pair numbers. | |
173 The input should be sorted by chromosome number and then by base pair. "/> | |
174 </when> | |
175 | |
176 <when value="vcf"> | |
177 <param format="vcf" name="input_vcf" type="data" label="Variant Call Format Specification" | |
178 help="The input follows the Variant Call Format Specification[2] v4.3. | |
179 It also allows the possibility to specify only the first 4 columns in the data section of the file: | |
180 CHROM, POS, ID, REF. "/> | |
181 </when> | |
182 | |
183 <!-- Genes --> | |
184 <when value="gene"> | |
185 <param format="txt" name="input_gene" type="data" label="Genes" | |
186 help="File with a one gene name in each line. Genes follow the HUGO gene nomenclature[3]."/> | |
187 </when> | |
188 | |
189 <!-- Peptides --> | |
190 <when value="peptide"> | |
191 <param format="txt" name="input_peptide" type="data" label="Simple list" | |
192 help="File with a one peptide sequence in each line."/> | |
193 | |
194 <expand macro="input_fasta" /> | |
195 | |
196 <param name="ptm_range" type="integer" value="0" label="PTM position range" optional="true" | |
197 help="Plus minus positions for the same PTM site."/> | |
198 </when> | |
199 | |
200 <when value="modifiedpeptide"> | |
201 <param format="txt" name="input_modifiedpeptide" type="data" label="Peptide List with PTM types and sites" | |
202 help="Each line of the file corresponds to a single peptide with post-translational modifications."/> | |
203 | |
204 <expand macro="input_fasta" /> | |
205 | |
206 <param name="ptm_range" type="integer" value="0" label="PTM position range" optional="true" | |
207 help="Plus minus positions for the same PTM site."/> | |
208 </when> | |
209 | |
210 <!-- Proteins --> | |
211 <when value="uniprot"> | |
212 <param format="txt" name="input_uniprot" type="data" label="UniProt Accession list" | |
213 help="File with a one Uniprot Accession [4] in each line."/> | |
214 </when> | |
215 | |
216 <when value="ensembl"> | |
217 <param format="txt" name="input_ensembl" type="data" label="Ensembl identifier list" | |
218 help="File with a one Ensembl identifier [5] in each line."/> | |
219 </when> | |
220 | |
221 <!-- Proteoforms --> | |
222 <when value="proteoforms"> | |
223 <param format="txt" name="input_proteoforms" type="data" label="Proteoforms" | |
224 help="A proteoform defines a specific state of a protein. | |
225 It is composed by the protein UniProt accession, isoform and set of post translational modifications. | |
226 The input file contains one line for each proteoform. Each PTM is specified using a modification | |
227 identifier and a site, separated by ':'(semicolon). For example: '00046:133'. | |
228 The identifier is a 5 digit id from the PSI-MOD Protein Modification Onthology [6]."/> | |
229 | |
230 <param name="proteoform_match_criteria" type="select" label="Proteoform match criteria"> | |
231 <option value="STRICT">STRICT</option> | |
232 <option value="ONE">ONE</option> | |
233 <option value="SUPERSET" selected="True">SUPERSET</option> | |
234 <option value="SUBSET">SUBSET</option> | |
235 </param> | |
236 </when> | |
237 | |
238 </conditional> | |
239 | |
240 </repeat> | |
241 | |
242 <section name="output_options" expanded="true" title="Output options"> | |
243 | |
244 <param name="search_top_level_info" type="select" label="Add search top level info"> | |
245 <option value="0" selected="True">False</option> | |
246 <option value="1">True</option> | |
247 </param> | |
248 | |
249 <param name="output_graphs" type="select" display="checkboxes" multiple="True" label="Connection graphs" | |
250 help="Generates a zipped file with connection graphs as an additional output when executing the pathway search and analysis. | |
251 The graph can use genes, proteins or proteoforms as vertices."> | |
252 <option value="gg">Genes</option> | |
253 <option value="gu">Proteins</option> | |
254 <option value="gp">Proteoforms</option> | |
255 </param> | |
256 | |
257 </section> | |
258 | |
259 </inputs> | |
260 <outputs> | |
261 <data name="search" format="tsv" from_work_dir="search.tsv" label="${tool.name} - search on ${on_string}" /> | |
262 <data name="analysis" format="tsv" from_work_dir="analysis.tsv" label="${tool.name} - analysis on ${on_string}" /> | |
263 <collection name="graphs_files" type="list" label="${tool.name} - graphs on ${on_string}" > | |
264 <filter>output_options['output_graphs'] != None</filter> | |
265 <discover_datasets pattern="__name_and_ext__" directory="graphs" ext="tsv"/> | |
266 </collection> | |
267 </outputs> | |
268 | |
269 | |
270 <tests> | |
271 | |
272 <!-- Test that genes search works --> | |
273 <test> | |
274 <repeat name="input_types"> | |
275 <conditional name="input_type"> | |
276 <param name="input_type_selector" value="gene"/> | |
277 <param name="input_gene" value="genes.txt" ftype="txt" /> | |
278 </conditional> | |
279 </repeat> | |
280 <output name="search" file="genes_search.tsv" ftype="tsv" compare="sim_size" delta="3000" /> | |
281 </test> | |
282 | |
283 <!-- Test graphs from proteoforms --> | |
284 <test> | |
285 <repeat name="input_types"> | |
286 <conditional name="input_type"> | |
287 <param name="input_type_selector" value="proteoforms"/> | |
288 <param name="input_proteoforms" value="proteoforms.txt" ftype="txt" /> | |
289 </conditional> | |
290 </repeat> | |
291 <param name="output_graphs" value="gg,gu,gp" /> | |
292 <output_collection name="graphs_files" type="list"> | |
293 <element name="geneExternalEdges" ftype="tsv" file="proteoforms_graphs/geneExternalEdges.tsv" compare="sim_size" delta="1000" /> | |
294 <element name="geneInternalEdges" ftype="tsv" file="proteoforms_graphs/geneInternalEdges.tsv" compare="sim_size" delta="1000"/> | |
295 <element name="geneVertices" ftype="tsv" file="proteoforms_graphs/geneVertices.tsv" compare="sim_size" delta="1000"/> | |
296 <element name="proteinExternalEdges" ftype="tsv" file="proteoforms_graphs/proteinExternalEdges.tsv" compare="sim_size" delta="10000"/> | |
297 <element name="proteinInternalEdges" ftype="tsv" file="proteoforms_graphs/proteinInternalEdges.tsv" compare="sim_size" delta="1000"/> | |
298 <element name="proteinVertices" ftype="tsv" file="proteoforms_graphs/proteinVertices.tsv" compare="sim_size" delta="1000"/> | |
299 <element name="proteoformExternalEdges" ftype="tsv" file="proteoforms_graphs/proteoformExternalEdges.tsv" compare="sim_size" delta="1000"/> | |
300 <element name="proteoformInternalEdges" ftype="tsv" file="proteoforms_graphs/proteoformInternalEdges.tsv" compare="sim_size" delta="1000"/> | |
301 <element name="proteoformVertices" ftype="tsv" file="proteoforms_graphs/proteoformVertices.tsv" compare="sim_size" delta="1000"/> | |
302 </output_collection> | |
303 </test> | |
304 | |
305 </tests> | |
306 <help> | |
307 | |
308 .. class:: infomark | |
309 | |
310 **Introduction** | |
311 | |
312 Biological pathways are an excellent resource to analyze the causes and consequences of certain phenotypes. | |
313 Most of the components of the pathways are proteins. When searching for relevant pathways to perform analysis | |
314 of a patient sample proteins, it is very common to lose information due to lack of precision in the search. | |
315 | |
316 This leads to result sets with many extra selected pathways that are not really related to the input sample. | |
317 | |
318 .. class:: infomark | |
319 | |
320 **What it does** | |
321 | |
322 We present more fine grained approach to search, not only with the gene names, but also with post translational | |
323 modifications of the proteins, such as phosphorylation. | |
324 | |
325 Ultimately, any omics dataset with its mutations and | |
326 modifications will be mapped directly to the functional knowledgebases allowing the functional interpretation by | |
327 researchers and clinicians. | |
328 | |
329 The reference database used is Reactome, a free, open source, curated and peer reviewed database of biological reactions, that contains the quality data needed for this type of fine grained search. database of biological reactions. It can be readily queried with omics datasets, and we are improving its features by extending the matching the clinical data to the biological pathways. Not only will the gene names be used, but also mutations or post translational modifications such as phosphorylation. | |
330 | |
331 | |
332 .. class:: infomark | |
333 | |
334 **Inputs and outputs** | |
335 | |
336 PathwayMatcher can search for reactions and pathways with various input types, and generates mapping files to the database. | |
337 | |
338 The input can be: | |
339 | |
340 - Genetic variants | |
341 - Genes | |
342 - Peptides | |
343 - Protein | |
344 - Proteoforms | |
345 | |
346 The output of PathwayMatcher is composed of two files, the Reaction and Pathway mapping and the statistical analysis of the relevant pathways. | |
347 | |
348 .. class:: infomark | |
349 | |
350 Information included with this tool is a brief summary of the main one included in PathwayMatcher_. | |
351 | |
352 Specific information about PathwayMatcher's Input_ and Output_ may also be found there. | |
353 | |
354 | |
355 .. class:: infomark | |
356 | |
357 **References** | |
358 | |
359 [1] dbSNP_ | |
360 | |
361 [2] VCF v4.3: | |
362 http://samtools.github.io/hts-specs/VCFv4.3.pdf | |
363 | |
364 [3] genenames.org: the HGNC resources in 2015. Nucleic Acids Res. 2015 Jan;43(Database issue):D1079-85. doi: 10.1093/nar/gku1071. : | |
365 https://www.ncbi.nlm.nih.gov/pubmed/25361968 | |
366 | |
367 [4] UniProt: the universal protein knowledgebase. Nucleic Acids Res. 45: D158-D169 (2017): | |
368 http://dx.doi.org/doi:10.1093/nar/gkw1099 | |
369 | |
370 [5] Ensembl: | |
371 https://www.ensembl.org/info/genome/stable_ids/index.html | |
372 | |
373 [6] The PSI-MOD community standard for representation of protein modification data. Nature Biotechnology 26, 864 - 866 (2008): | |
374 http://www.nature.com/nbt/journal/v26/n8/full/nbt0808-864.html | |
375 | |
376 .. _dbSNP: https://www.ncbi.nlm.nih.gov/projects/SNP/ | |
377 .. _PathwayMatcher: https://github.com/LuisFranciscoHS/PathwayMatcher | |
378 .. _Input: https://github.com/LuisFranciscoHS/PathwayMatcher/wiki/Input | |
379 .. _Output: https://github.com/LuisFranciscoHS/PathwayMatcher/wiki/Output | |
380 | |
381 </help> | |
382 | |
383 </tool> |