Mercurial > repos > galaxyp > reactome_pathwaymatcher
diff pathwaymatcher.xml @ 3:2cd67294abbd draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pathwaymatcher commit 79d03b32b395b4c1385ff934251b17ea8950187b
author | galaxyp |
---|---|
date | Wed, 08 May 2019 13:41:47 -0400 |
parents | 9a599f278852 |
children | 3e510701a712 |
line wrap: on
line diff
--- a/pathwaymatcher.xml Thu Jul 26 17:00:38 2018 -0400 +++ b/pathwaymatcher.xml Wed May 08 13:41:47 2019 -0400 @@ -3,8 +3,8 @@ PathwayMatcher is a software tool to search for pathways related to a list of proteins in Reactome. </description> <macros> - <token name="@PATHWAYMATCHER_VERSION@">1.8.1</token> - <token name="@TOOL_SUBVERSION@">2</token> + <token name="@PATHWAYMATCHER_VERSION@">1.9.1</token> + <token name="@TOOL_SUBVERSION@">1</token> <xml name="input_fasta"> <param format="fasta" name="input_database" type="data" label="Protein Database" help="Select FASTA database from history"/> @@ -33,73 +33,98 @@ cwd=`pwd`; export HOME=\$cwd; + ## If we use peptideshaker files as inputs, firstly we need to uncompress their proteoforms files. + #for $i, $s in enumerate($match_types) + #if $s.match_type.match_type_selector == "peptideshakerzip_proteoforms" + ##unzip -l $s.match_type.input_peptideshakerzip_proteoforms; + unzip -j '${$s.match_type.input_peptideshakerzip_proteoforms}' 'output_reports/proteoforms.txt' -d './'; + mv proteoforms.txt ps_proteoforms_'${$i}'.txt; + #end if + #end for + ##################### ## Pathway Matcher ## ##################### - (pathwaymatcher src.main.java.no.uib.pap.pathwaymatcher.PathwayMatcher + (pathwaymatcher + + #for $i, $s in enumerate($match_types) + + + ## PROTEOFORMS + + #if $s.match_type.match_type_selector == "proteoforms" + #if $s.match_type.proteoform_match_criteria: + match-proteoforms -m '${s.match_type.proteoform_match_criteria}' -i '${s.match_type.input_proteoforms}' -r '${s.match_type.proteoform_range}' + #else: + match-proteoforms -i '${s.match_type.input_proteoforms}' -r '${s.match_type.proteoform_range}' + #end if + #end if + + ## PROTEOFORMS FROM PEPTIDESHAKER FILE - #for $i, $s in enumerate($input_types) + #if $s.match_type.match_type_selector == "peptideshakerzip_proteoforms" + #if $s.match_type.proteoform_peptideshakerzip_match_criteria: + match-proteoforms -m '${s.match_type.proteoform_peptideshakerzip_match_criteria}' -i ps_proteoforms_'${$i}'.txt -r '${s.match_type.proteoform_peptideshakerzip_range}' + #else: + match-proteoforms -i ps_proteoforms_'${$i}'.txt -r '${s.match_type.proteoform_peptideshakerzip_range}' + #end if + #end if + + + ## GENES + + #if $s.match_type.match_type_selector == "gene" + match-genes -i '${s.match_type.input_gene}' + #end if + + + ## PROTEINS + + #if $s.match_type.match_type_selector == "uniprot" + match-uniprot -i '${s.match_type.input_uniprot}' + #end if + + #if $s.match_type.match_type_selector == "ensembl" + match-ensembl -i '${s.match_type.input_ensembl}' + #end if + ## GENETIC VARIANTS - #if $s.input_type.input_type_selector == "rsid" - -t rsid -i '${s.input_type.input_rsid}' - #end if - - #if $s.input_type.input_type_selector == "chrbp" - -t chrbp -i '${s.input_type.input_chrbp}' + #if $s.match_type.match_type_selector == "vcf" + match-vcf -i '${s.match_type.input_vcf}' #end if - #if $s.input_type.input_type_selector == "vcf" - -t vcf -i '${s.input_type.input_vcf}' + #if $s.match_type.match_type_selector == "chrbp" + match-chrbp -i '${s.match_type.input_chrbp}' #end if - ## GENES + #if $s.match_type.match_type_selector == "rsid" + match-rsids -i '${s.match_type.input_rsid}' + #end if - #if $s.input_type.input_type_selector == "gene" - -t gene -i '${s.input_type.input_gene}' - #end if ## PEPTIDES - #if $s.input_type.input_type_selector == "peptide" - -t peptide -i '${s.input_type.input_peptide}' - -f '${s.input_type.input_database}' - -r '${s.input_type.ptm_range}' - #end if - - #if $s.input_type.input_type_selector == "modifiedpeptide" - -t modifiedpeptide -i '${s.input_type.input_modifiedpeptide}' - -f '${s.input_type.input_database}' - -r '${s.input_type.ptm_range}' + #if $s.match_type.match_type_selector == "peptide" + match-peptides -i '${s.match_type.input_peptide}' + -f '${s.match_type.input_database}' #end if - ## PROTEINS - - #if $s.input_type.input_type_selector == "uniprot" - -t uniprot -i '${s.input_type.input_uniprot}' - #end if - - #if $s.input_type.input_type_selector == "ensembl" - -t ensembl -i '${s.input_type.input_ensembl}' + #if $s.match_type.match_type_selector == "modifiedpeptide" + match-modified-peptides -i '${s.match_type.input_modifiedpeptide}' + -f '${s.match_type.input_database}' + -m '${s.match_type.modifiedpeptide_match_criteria}' + -r '${s.match_type.modifiedpeptide_ptm_range}' #end if - ## PROTEOFORMS - - #if $s.input_type.input_type_selector == "proteoforms" - #if $s.input_type.proteoform_match_criteria: - -t proteoform -m '${s.input_type.proteoform_match_criteria}' -i '${s.input_type.input_proteoforms}' - #else: - -t proteoform -i '${s.input_type.input_proteoforms}' - #end if - #end if #end for ## OUTPUT OPTIONS #if $output_options.search_top_level_info: - -tlp + -T #end if #set $output_graphs_list = str($output_options.output_graphs).split(',') @@ -142,41 +167,69 @@ </command> <inputs> - <repeat name="input_types" title="Input" min="1"> - <conditional name="input_type"> - <param name="input_type_selector" type="select" label="Input type" + <repeat name="match_types" title="Match" min="1"> + <conditional name="match_type"> + <param name="match_type_selector" type="select" label="Match type" help=""> - <option value="rsid">Genetic variants - SNP rsId list</option> + <option value="proteoforms">Proteoforms</option> + <option value="peptideshakerzip_proteoforms">Proteoforms from Peptideshaker Archive</option> + <option value="gene">Genes</option> + <option value="uniprot">Proteins - UniProt Accession list</option> + <option value="ensembl">Proteins - Ensembl identifier list</option> + <option value="vcf">Genetic variants - Variant Call Format Specification</option> <option value="chrbp">Genetic variants - Chromosomes and base pairs</option> - <option value="vcf">Genetic variants - Variant Call Format Specification</option> - <option value="gene">Genes</option> + <option value="rsid">Genetic variants - SNP rsId list</option> <option value="peptide">Peptides - Simple list</option> <option value="modifiedpeptide">Peptides - Peptide List with PTM types and sites</option> - <option value="uniprot">Proteins - UniProt Accession list</option> - <option value="ensembl">Proteins - Ensembl identifier list</option> - <option value="proteoforms">Proteoforms</option> + </param> - <!-- Genetic variants --> - <when value="rsid"> - <param format="txt" name="input_rsid" type="data" label="SNP rsId list" - help="The file contains one rsid identifier as defined in dbSNP[1] on each row. - The list must be ordered by chromosome and base pair (bp). The list must not have duplicates. - All rsids must appear in the human assembly GRCh37.p13. "/> + + <!-- Proteoforms --> + <when value="proteoforms"> + <param format="txt" name="input_proteoforms" type="data" label="Proteoforms" + help="A proteoform defines a specific state of a protein. + It is composed by the protein UniProt accession, isoform and set of post translational modifications. + The input file contains one line for each proteoform. Each PTM is specified using a modification + identifier and a site, separated by ':'(semicolon). For example: '00046:133'. + The identifier is a 5 digit id from the PSI-MOD Protein Modification Onthology [6]."/> + + <param name="proteoform_match_criteria" type="select" label="Proteoform match criteria"> + <option value="STRICT">STRICT</option> + <option value="SUPERSET">SUPERSET</option> + <option value="SUPERSET_NO_TYPES">SUPERSET NO TYPES</option> + <option value="SUBSET" selected="True">SUBSET</option> + <option value="SUBSET_NO_TYPES">SUBSET NO TYPES</option> + <option value="ONE">ONE</option> + <option value="ONE_NO_TYPES">ONE_NO_TYPES</option> + </param> + + <param name="proteoform_range" type="integer" value="0" label="Integer range of error for PTM sites" optional="true" + help="Plus minus positions for the same PTM site"/> </when> - <when value="chrbp"> - <param format="txt" name="input_chrbp" type="data" label="Chromosomes and base pairs" - help="Genetic variants can also be represented using the chromosome and the base pair numbers. - The input should be sorted by chromosome number and then by base pair. "/> + <when value="peptideshakerzip_proteoforms"> + <param format="zip" name="input_peptideshakerzip_proteoforms" type="data" label="Proteoforms from Peptideshaker Archive" + help="A proteoform defines a specific state of a protein. + It is composed by the protein UniProt accession, isoform and set of post translational modifications. + The input file contains one line for each proteoform. Each PTM is specified using a modification + identifier and a site, separated by ':'(semicolon). For example: '00046:133'. + The identifier is a 5 digit id from the PSI-MOD Protein Modification Onthology [6]."/> + + <param name="proteoform_peptideshakerzip_match_criteria" type="select" label="Proteoform match criteria"> + <option value="STRICT">STRICT</option> + <option value="SUPERSET">SUPERSET</option> + <option value="SUPERSET_NO_TYPES">SUPERSET NO TYPES</option> + <option value="SUBSET" selected="True">SUBSET</option> + <option value="SUBSET_NO_TYPES">SUBSET NO TYPES</option> + <option value="ONE">ONE</option> + <option value="ONE_NO_TYPES">ONE_NO_TYPES</option> + </param> + + <param name="proteoform_peptideshakerzip_range" type="integer" value="0" label="Integer range of error for PTM sites" optional="true" + help="Plus minus positions for the same PTM site"/> </when> - <when value="vcf"> - <param format="vcf" name="input_vcf" type="data" label="Variant Call Format Specification" - help="The input follows the Variant Call Format Specification[2] v4.3. - It also allows the possibility to specify only the first 4 columns in the data section of the file: - CHROM, POS, ID, REF. "/> - </when> <!-- Genes --> <when value="gene"> @@ -184,26 +237,6 @@ help="File with a one gene name in each line. Genes follow the HUGO gene nomenclature[3]."/> </when> - <!-- Peptides --> - <when value="peptide"> - <param format="txt" name="input_peptide" type="data" label="Simple list" - help="File with a one peptide sequence in each line."/> - - <expand macro="input_fasta" /> - - <param name="ptm_range" type="integer" value="0" label="PTM position range" optional="true" - help="Plus minus positions for the same PTM site."/> - </when> - - <when value="modifiedpeptide"> - <param format="txt" name="input_modifiedpeptide" type="data" label="Peptide List with PTM types and sites" - help="Each line of the file corresponds to a single peptide with post-translational modifications."/> - - <expand macro="input_fasta" /> - - <param name="ptm_range" type="integer" value="0" label="PTM position range" optional="true" - help="Plus minus positions for the same PTM site."/> - </when> <!-- Proteins --> <when value="uniprot"> @@ -216,30 +249,67 @@ help="File with a one Ensembl identifier [5] in each line."/> </when> - <!-- Proteoforms --> - <when value="proteoforms"> - <param format="txt" name="input_proteoforms" type="data" label="Proteoforms" - help="A proteoform defines a specific state of a protein. - It is composed by the protein UniProt accession, isoform and set of post translational modifications. - The input file contains one line for each proteoform. Each PTM is specified using a modification - identifier and a site, separated by ':'(semicolon). For example: '00046:133'. - The identifier is a 5 digit id from the PSI-MOD Protein Modification Onthology [6]."/> + <!-- Genetic variants --> + + <when value="vcf"> + <param format="vcf" name="input_vcf" type="data" label="Variant Call Format Specification" + help="The input follows the Variant Call Format Specification[2] v4.3. + It also allows the possibility to specify only the first 4 columns in the data section of the file: + CHROM, POS, ID, REF. "/> + </when> + + <when value="chrbp"> + <param format="txt" name="input_chrbp" type="data" label="Chromosomes and base pairs" + help="Genetic variants can also be represented using the chromosome and the base pair numbers. + The input should be sorted by chromosome number and then by base pair. "/> + </when> + + <when value="rsid"> + <param format="txt" name="input_rsid" type="data" label="SNP rsId list" + help="The file contains one rsid identifier as defined in dbSNP[1] on each row. + The list must be ordered by chromosome and base pair (bp). The list must not have duplicates. + All rsids must appear in the human assembly GRCh37.p13. "/> + </when> + - <param name="proteoform_match_criteria" type="select" label="Proteoform match criteria"> + <!-- Peptides --> + <when value="peptide"> + <param format="txt" name="input_peptide" type="data" label="Simple list" + help="File with a one peptide sequence in each line."/> + + <expand macro="input_fasta" /> + + </when> + + <when value="modifiedpeptide"> + <param format="txt" name="input_modifiedpeptide" type="data" label="Peptide List with PTM types and sites" + help="Each line of the file corresponds to a single peptide with post-translational modifications."/> + + <expand macro="input_fasta" /> + + <param name="modifiedpeptide_match_criteria" type="select" label="Proteoform match criteria. Only modified peptides."> <option value="STRICT">STRICT</option> + <option value="SUPERSET">SUPERSET</option> + <option value="SUPERSET_NO_TYPES">SUPERSET NO TYPES</option> + <option value="SUBSET" selected="True">SUBSET</option> + <option value="SUBSET_NO_TYPES">SUBSET NO TYPES</option> <option value="ONE">ONE</option> - <option value="SUPERSET" selected="True">SUPERSET</option> - <option value="SUBSET">SUBSET</option> + <option value="ONE_NO_TYPES">ONE_NO_TYPES</option> </param> + + <param name="modifiedpeptide_ptm_range" type="integer" value="0" label="PTM position range" optional="true" + help="Integer number margin error for sites of PTMs. Only for modified peptides."/> </when> + + </conditional> </repeat> <section name="output_options" expanded="true" title="Output options"> - <param name="search_top_level_info" type="select" label="Add search top level info"> + <param name="search_top_level_info" type="select" label="Add Top Level Pathways in the search result."> <option value="0" selected="True">False</option> <option value="1">True</option> </param> @@ -269,9 +339,9 @@ <!-- Test that genes search works --> <test> - <repeat name="input_types"> - <conditional name="input_type"> - <param name="input_type_selector" value="gene"/> + <repeat name="match_types"> + <conditional name="match_type"> + <param name="match_type_selector" value="gene"/> <param name="input_gene" value="genes.txt" ftype="txt" /> </conditional> </repeat> @@ -280,15 +350,16 @@ <!-- Test graphs from proteoforms --> <test> - <repeat name="input_types"> - <conditional name="input_type"> - <param name="input_type_selector" value="proteoforms"/> + <repeat name="match_types"> + <conditional name="match_type"> + <param name="match_type_selector" value="proteoforms"/> <param name="input_proteoforms" value="proteoforms.txt" ftype="txt" /> + <param name="proteoform_match_criteria" value="SUBSET"/> </conditional> </repeat> <param name="output_graphs" value="gg,gu,gp" /> <output_collection name="graphs_files" type="list"> - <element name="geneExternalEdges" ftype="tsv" file="proteoforms_graphs/geneExternalEdges.tsv" compare="sim_size" delta="1000" /> +<!-- <element name="geneExternalEdges" ftype="tsv" file="proteoforms_graphs/geneExternalEdges.tsv" compare="sim_size" delta="1000" /> --> <element name="geneInternalEdges" ftype="tsv" file="proteoforms_graphs/geneInternalEdges.tsv" compare="sim_size" delta="1000"/> <element name="geneVertices" ftype="tsv" file="proteoforms_graphs/geneVertices.tsv" compare="sim_size" delta="1000"/> <element name="proteinExternalEdges" ftype="tsv" file="proteoforms_graphs/proteinExternalEdges.tsv" compare="sim_size" delta="10000"/> @@ -349,9 +420,9 @@ You can easily test PathwayMatcher functionality using the example files we provide with proteoforms and proteins information of Cystic Fibrosis: -https://raw.githubusercontent.com/PathwayAnalysisPlatform/PathwayMatcher/master/resources/input/Proteoforms/Simple/CysticFibrosis.txt +https://media.githubusercontent.com/media/PathwayAnalysisPlatform/PathwayMatcher/master/src/test/resources/Proteoforms/Simple/CysticFibrosis.txt -https://raw.githubusercontent.com/PathwayAnalysisPlatform/PathwayMatcher/master/resources/input/Proteins/UniProt/CysticFibrosis.txt +https://media.githubusercontent.com/media/PathwayAnalysisPlatform/PathwayMatcher/master/src/test/resources/Proteins/UniProt/CysticFibrosis.txt You can upload them to Galaxy by directly copying and pasting their URL into the Galaxy upload dialog (the button with the arrow pointing up in the top-left area, and then choosing *Pasta/Fetch data*). @@ -397,4 +468,8 @@ </help> + <citations> + <citation type="doi">doi:10.1101/375097</citation> + </citations> + </tool>