diff pathwaymatcher.xml @ 3:2cd67294abbd draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pathwaymatcher commit 79d03b32b395b4c1385ff934251b17ea8950187b
author galaxyp
date Wed, 08 May 2019 13:41:47 -0400
parents 9a599f278852
children 3e510701a712
line wrap: on
line diff
--- a/pathwaymatcher.xml	Thu Jul 26 17:00:38 2018 -0400
+++ b/pathwaymatcher.xml	Wed May 08 13:41:47 2019 -0400
@@ -3,8 +3,8 @@
         PathwayMatcher is a software tool to search for pathways related to a list of proteins in Reactome.
     </description>
     <macros>
-       <token name="@PATHWAYMATCHER_VERSION@">1.8.1</token>
-       <token name="@TOOL_SUBVERSION@">2</token>
+       <token name="@PATHWAYMATCHER_VERSION@">1.9.1</token>
+       <token name="@TOOL_SUBVERSION@">1</token>
        <xml name="input_fasta">
          <param format="fasta" name="input_database" type="data" label="Protein Database"
              help="Select FASTA database from history"/>
@@ -33,73 +33,98 @@
         cwd=`pwd`;
         export HOME=\$cwd;
 
+        ## If we use peptideshaker files as inputs, firstly we need to uncompress their proteoforms files.
+        #for $i, $s in enumerate($match_types)
+            #if $s.match_type.match_type_selector == "peptideshakerzip_proteoforms"
+                ##unzip -l $s.match_type.input_peptideshakerzip_proteoforms;
+                unzip -j '${$s.match_type.input_peptideshakerzip_proteoforms}' 'output_reports/proteoforms.txt' -d './';
+                mv proteoforms.txt ps_proteoforms_'${$i}'.txt;
+            #end if
+        #end for
+
         #####################
         ## Pathway Matcher ##
         #####################
-        (pathwaymatcher src.main.java.no.uib.pap.pathwaymatcher.PathwayMatcher
+        (pathwaymatcher
+
+            #for $i, $s in enumerate($match_types)
+
+
+                ## PROTEOFORMS
+
+                #if $s.match_type.match_type_selector == "proteoforms"
+                    #if $s.match_type.proteoform_match_criteria:
+                      match-proteoforms -m '${s.match_type.proteoform_match_criteria}' -i '${s.match_type.input_proteoforms}' -r '${s.match_type.proteoform_range}'
+                    #else:
+                      match-proteoforms -i '${s.match_type.input_proteoforms}' -r '${s.match_type.proteoform_range}'
+                    #end if
+                #end if
+
+                ## PROTEOFORMS FROM PEPTIDESHAKER FILE
 
-            #for $i, $s in enumerate($input_types)
+                #if $s.match_type.match_type_selector == "peptideshakerzip_proteoforms"
+                    #if $s.match_type.proteoform_peptideshakerzip_match_criteria:
+                      match-proteoforms -m '${s.match_type.proteoform_peptideshakerzip_match_criteria}' -i ps_proteoforms_'${$i}'.txt -r '${s.match_type.proteoform_peptideshakerzip_range}'
+                    #else:
+                      match-proteoforms -i ps_proteoforms_'${$i}'.txt -r '${s.match_type.proteoform_peptideshakerzip_range}'
+                    #end if
+                #end if
+
+
+                ## GENES
+
+                #if $s.match_type.match_type_selector == "gene"
+                    match-genes -i '${s.match_type.input_gene}'
+                #end if
+
+
+                ## PROTEINS
+
+                #if $s.match_type.match_type_selector == "uniprot"
+                    match-uniprot -i '${s.match_type.input_uniprot}'
+                #end if
+
+                #if $s.match_type.match_type_selector == "ensembl"
+                    match-ensembl -i '${s.match_type.input_ensembl}'
+                #end if
+
 
                 ## GENETIC VARIANTS
 
-                #if $s.input_type.input_type_selector == "rsid"
-                    -t rsid -i '${s.input_type.input_rsid}'
-                #end if
-
-                #if $s.input_type.input_type_selector == "chrbp"
-                    -t chrbp -i '${s.input_type.input_chrbp}'
+                #if $s.match_type.match_type_selector == "vcf"
+                    match-vcf -i '${s.match_type.input_vcf}'
                 #end if
 
-                #if $s.input_type.input_type_selector == "vcf"
-                    -t vcf -i '${s.input_type.input_vcf}'
+                #if $s.match_type.match_type_selector == "chrbp"
+                    match-chrbp -i '${s.match_type.input_chrbp}'
                 #end if
 
-                ## GENES
+                #if $s.match_type.match_type_selector == "rsid"
+                    match-rsids -i '${s.match_type.input_rsid}'
+                #end if
 
-                #if $s.input_type.input_type_selector == "gene"
-                    -t gene -i '${s.input_type.input_gene}'
-                #end if
 
                 ## PEPTIDES
 
-                #if $s.input_type.input_type_selector == "peptide"
-                    -t peptide -i '${s.input_type.input_peptide}'
-                    -f '${s.input_type.input_database}'
-                    -r '${s.input_type.ptm_range}'
-                #end if
-
-                #if $s.input_type.input_type_selector == "modifiedpeptide"
-                    -t modifiedpeptide -i '${s.input_type.input_modifiedpeptide}'
-                    -f '${s.input_type.input_database}'
-                    -r '${s.input_type.ptm_range}'
+                #if $s.match_type.match_type_selector == "peptide"
+                    match-peptides -i '${s.match_type.input_peptide}'
+                    -f '${s.match_type.input_database}'
                 #end if
 
-                ## PROTEINS
-
-                #if $s.input_type.input_type_selector == "uniprot"
-                    -t uniprot -i '${s.input_type.input_uniprot}'
-                #end if
-
-                #if $s.input_type.input_type_selector == "ensembl"
-                    -t ensembl -i '${s.input_type.input_ensembl}'
+                #if $s.match_type.match_type_selector == "modifiedpeptide"
+                    match-modified-peptides -i '${s.match_type.input_modifiedpeptide}'
+                    -f '${s.match_type.input_database}'
+                    -m '${s.match_type.modifiedpeptide_match_criteria}'
+                    -r '${s.match_type.modifiedpeptide_ptm_range}'
                 #end if
 
-                ## PROTEOFORMS
-
-                #if $s.input_type.input_type_selector == "proteoforms"
-                    #if $s.input_type.proteoform_match_criteria:
-                      -t proteoform -m '${s.input_type.proteoform_match_criteria}' -i '${s.input_type.input_proteoforms}'
-                    #else:
-                      -t proteoform -i '${s.input_type.input_proteoforms}'
-                    #end if
-                #end if
 
             #end for
 
             ## OUTPUT OPTIONS
 
             #if $output_options.search_top_level_info:
-                -tlp
+                -T
             #end if
 
             #set $output_graphs_list = str($output_options.output_graphs).split(',')
@@ -142,41 +167,69 @@
     </command>
     <inputs>
 
-        <repeat name="input_types" title="Input" min="1">
-            <conditional name="input_type">
-                  <param name="input_type_selector" type="select" label="Input type"
+        <repeat name="match_types" title="Match" min="1">
+            <conditional name="match_type">
+                  <param name="match_type_selector" type="select" label="Match type"
                       help="">
-                      <option value="rsid">Genetic variants - SNP rsId list</option>
+                      <option value="proteoforms">Proteoforms</option>
+                      <option value="peptideshakerzip_proteoforms">Proteoforms from Peptideshaker Archive</option>
+                      <option value="gene">Genes</option>
+                      <option value="uniprot">Proteins - UniProt Accession list</option>
+                      <option value="ensembl">Proteins - Ensembl identifier list</option>
+                      <option value="vcf">Genetic variants - Variant Call Format Specification</option>
                       <option value="chrbp">Genetic variants - Chromosomes and base pairs</option>
-                      <option value="vcf">Genetic variants - Variant Call Format Specification</option>
-                      <option value="gene">Genes</option>
+                      <option value="rsid">Genetic variants - SNP rsId list</option>
                       <option value="peptide">Peptides - Simple list</option>
                       <option value="modifiedpeptide">Peptides - Peptide List with PTM types and sites</option>
-                      <option value="uniprot">Proteins - UniProt Accession list</option>
-                      <option value="ensembl">Proteins - Ensembl identifier list</option>
-                      <option value="proteoforms">Proteoforms</option>
+
                   </param>
 
-                  <!-- Genetic variants -->
-                  <when value="rsid">
-                      <param format="txt" name="input_rsid" type="data" label="SNP rsId list"
-                          help="The file contains one rsid identifier as defined in dbSNP[1] on each row.
-                          The list must be ordered by chromosome and base pair (bp). The list must not have duplicates.
-                          All rsids must appear in the human assembly GRCh37.p13. "/>
+
+                  <!-- Proteoforms  -->
+                  <when value="proteoforms">
+                      <param format="txt" name="input_proteoforms" type="data" label="Proteoforms"
+                          help="A proteoform defines a specific state of a protein.
+                          It is composed by the protein UniProt accession, isoform and set of post translational modifications.
+                          The input file contains one line for each proteoform. Each PTM is specified using a modification
+                          identifier and a site, separated by ':'(semicolon). For example: '00046:133'.
+                          The identifier is a 5 digit id from the PSI-MOD Protein Modification Onthology [6]."/>
+
+                      <param name="proteoform_match_criteria" type="select" label="Proteoform match criteria">
+                          <option value="STRICT">STRICT</option>
+                          <option value="SUPERSET">SUPERSET</option>
+                          <option value="SUPERSET_NO_TYPES">SUPERSET NO TYPES</option>
+                          <option value="SUBSET" selected="True">SUBSET</option>
+                          <option value="SUBSET_NO_TYPES">SUBSET NO TYPES</option>
+                          <option value="ONE">ONE</option>
+                          <option value="ONE_NO_TYPES">ONE_NO_TYPES</option>
+                      </param>
+
+                      <param name="proteoform_range" type="integer" value="0" label="Integer range of error for PTM sites" optional="true"
+                          help="Plus minus positions for the same PTM site"/>
                   </when>
 
-                  <when value="chrbp">
-                      <param format="txt" name="input_chrbp" type="data" label="Chromosomes and base pairs"
-                          help="Genetic variants can also be represented using the chromosome and the base pair numbers.
-                          The input should be sorted by chromosome number and then by base pair.  "/>
+                  <when value="peptideshakerzip_proteoforms">
+                      <param format="zip" name="input_peptideshakerzip_proteoforms" type="data" label="Proteoforms from Peptideshaker Archive"
+                          help="A proteoform defines a specific state of a protein.
+                          It is composed by the protein UniProt accession, isoform and set of post translational modifications.
+                          The input file contains one line for each proteoform. Each PTM is specified using a modification
+                          identifier and a site, separated by ':'(semicolon). For example: '00046:133'.
+                          The identifier is a 5 digit id from the PSI-MOD Protein Modification Onthology [6]."/>
+
+                      <param name="proteoform_peptideshakerzip_match_criteria" type="select" label="Proteoform match criteria">
+                          <option value="STRICT">STRICT</option>
+                          <option value="SUPERSET">SUPERSET</option>
+                          <option value="SUPERSET_NO_TYPES">SUPERSET NO TYPES</option>
+                          <option value="SUBSET" selected="True">SUBSET</option>
+                          <option value="SUBSET_NO_TYPES">SUBSET NO TYPES</option>
+                          <option value="ONE">ONE</option>
+                          <option value="ONE_NO_TYPES">ONE_NO_TYPES</option>
+                      </param>
+
+                      <param name="proteoform_peptideshakerzip_range" type="integer" value="0" label="Integer range of error for PTM sites" optional="true"
+                          help="Plus minus positions for the same PTM site"/>
                   </when>
 
-                  <when value="vcf">
-                      <param format="vcf" name="input_vcf" type="data" label="Variant Call Format Specification"
-                          help="The input follows the Variant Call Format Specification[2] v4.3.
-                          It also allows the possibility to specify only the first 4 columns in the data section of the file:
-                          CHROM, POS, ID, REF.  "/>
-                  </when>
 
                   <!-- Genes -->
                   <when value="gene">
@@ -184,26 +237,6 @@
                           help="File with a one gene name in each line. Genes follow the HUGO gene nomenclature[3]."/>
                   </when>
 
-                  <!-- Peptides  -->
-                  <when value="peptide">
-                      <param format="txt" name="input_peptide" type="data" label="Simple list"
-                          help="File with a one peptide sequence in each line."/>
-
-                      <expand macro="input_fasta" />
-
-                      <param name="ptm_range" type="integer" value="0" label="PTM position range" optional="true"
-                          help="Plus minus positions for the same PTM site."/>
-                  </when>
-
-                  <when value="modifiedpeptide">
-                      <param format="txt" name="input_modifiedpeptide" type="data" label="Peptide List with PTM types and sites"
-                         help="Each line of the file corresponds to a single peptide with post-translational modifications."/>
-
-                      <expand macro="input_fasta" />
-
-                      <param name="ptm_range" type="integer" value="0" label="PTM position range" optional="true"
-                          help="Plus minus positions for the same PTM site."/>
-                  </when>
 
                   <!-- Proteins  -->
                   <when value="uniprot">
@@ -216,30 +249,67 @@
                           help="File with a one Ensembl identifier [5] in each line."/>
                   </when>
 
-                  <!-- Proteoforms  -->
-                  <when value="proteoforms">
-                      <param format="txt" name="input_proteoforms" type="data" label="Proteoforms"
-                          help="A proteoform defines a specific state of a protein.
-                          It is composed by the protein UniProt accession, isoform and set of post translational modifications.
-                          The input file contains one line for each proteoform. Each PTM is specified using a modification
-                          identifier and a site, separated by ':'(semicolon). For example: '00046:133'.
-                          The identifier is a 5 digit id from the PSI-MOD Protein Modification Onthology [6]."/>
+                  <!-- Genetic variants -->
+
+                  <when value="vcf">
+                      <param format="vcf" name="input_vcf" type="data" label="Variant Call Format Specification"
+                          help="The input follows the Variant Call Format Specification[2] v4.3.
+                          It also allows the possibility to specify only the first 4 columns in the data section of the file:
+                          CHROM, POS, ID, REF.  "/>
+                  </when>
+
+                  <when value="chrbp">
+                      <param format="txt" name="input_chrbp" type="data" label="Chromosomes and base pairs"
+                          help="Genetic variants can also be represented using the chromosome and the base pair numbers.
+                          The input should be sorted by chromosome number and then by base pair.  "/>
+                  </when>
+
+                  <when value="rsid">
+                      <param format="txt" name="input_rsid" type="data" label="SNP rsId list"
+                          help="The file contains one rsid identifier as defined in dbSNP[1] on each row.
+                          The list must be ordered by chromosome and base pair (bp). The list must not have duplicates.
+                          All rsids must appear in the human assembly GRCh37.p13. "/>
+                  </when>
+
 
-                      <param name="proteoform_match_criteria" type="select" label="Proteoform match criteria">
+                  <!-- Peptides  -->
+                  <when value="peptide">
+                      <param format="txt" name="input_peptide" type="data" label="Simple list"
+                          help="File with a one peptide sequence in each line."/>
+
+                      <expand macro="input_fasta" />
+
+                  </when>
+
+                  <when value="modifiedpeptide">
+                      <param format="txt" name="input_modifiedpeptide" type="data" label="Peptide List with PTM types and sites"
+                         help="Each line of the file corresponds to a single peptide with post-translational modifications."/>
+
+                      <expand macro="input_fasta" />
+
+                      <param name="modifiedpeptide_match_criteria" type="select" label="Proteoform match criteria. Only modified peptides.">
                           <option value="STRICT">STRICT</option>
+                          <option value="SUPERSET">SUPERSET</option>
+                          <option value="SUPERSET_NO_TYPES">SUPERSET NO TYPES</option>
+                          <option value="SUBSET" selected="True">SUBSET</option>
+                          <option value="SUBSET_NO_TYPES">SUBSET NO TYPES</option>
                           <option value="ONE">ONE</option>
-                          <option value="SUPERSET" selected="True">SUPERSET</option>
-                          <option value="SUBSET">SUBSET</option>
+                          <option value="ONE_NO_TYPES">ONE_NO_TYPES</option>
                       </param>
+
+                      <param name="modifiedpeptide_ptm_range" type="integer" value="0" label="PTM position range" optional="true"
+                          help="Integer number margin error for sites of PTMs. Only for modified peptides."/>
                   </when>
 
+
+
             </conditional>
 
         </repeat>
 
         <section name="output_options" expanded="true" title="Output options">
 
-            <param name="search_top_level_info" type="select" label="Add search top level info">
+            <param name="search_top_level_info" type="select" label="Add Top Level Pathways in the search result.">
                 <option value="0" selected="True">False</option>
                 <option value="1">True</option>
             </param>
@@ -269,9 +339,9 @@
 
       <!-- Test that genes search works -->
       <test>
-          <repeat name="input_types">
-              <conditional name="input_type">
-                  <param name="input_type_selector" value="gene"/>
+          <repeat name="match_types">
+              <conditional name="match_type">
+                  <param name="match_type_selector" value="gene"/>
                   <param name="input_gene" value="genes.txt" ftype="txt" />
               </conditional>
           </repeat>
@@ -280,15 +350,16 @@
 
       <!-- Test graphs from proteoforms -->
       <test>
-          <repeat name="input_types">
-              <conditional name="input_type">
-                  <param name="input_type_selector" value="proteoforms"/>
+          <repeat name="match_types">
+              <conditional name="match_type">
+                  <param name="match_type_selector" value="proteoforms"/>
                   <param name="input_proteoforms" value="proteoforms.txt" ftype="txt" />
+                  <param name="proteoform_match_criteria" value="SUBSET"/>
               </conditional>
           </repeat>
           <param name="output_graphs" value="gg,gu,gp" />
           <output_collection name="graphs_files" type="list">
-              <element name="geneExternalEdges" ftype="tsv" file="proteoforms_graphs/geneExternalEdges.tsv" compare="sim_size" delta="1000" />
+<!--              <element name="geneExternalEdges" ftype="tsv" file="proteoforms_graphs/geneExternalEdges.tsv" compare="sim_size" delta="1000" /> -->
               <element name="geneInternalEdges" ftype="tsv" file="proteoforms_graphs/geneInternalEdges.tsv" compare="sim_size" delta="1000"/>
               <element name="geneVertices" ftype="tsv" file="proteoforms_graphs/geneVertices.tsv" compare="sim_size" delta="1000"/>
               <element name="proteinExternalEdges" ftype="tsv" file="proteoforms_graphs/proteinExternalEdges.tsv" compare="sim_size" delta="10000"/>
@@ -349,9 +420,9 @@
 
 You can easily test PathwayMatcher functionality using the example files we provide with proteoforms and proteins information of Cystic Fibrosis:
 
-https://raw.githubusercontent.com/PathwayAnalysisPlatform/PathwayMatcher/master/resources/input/Proteoforms/Simple/CysticFibrosis.txt
+https://media.githubusercontent.com/media/PathwayAnalysisPlatform/PathwayMatcher/master/src/test/resources/Proteoforms/Simple/CysticFibrosis.txt
 
-https://raw.githubusercontent.com/PathwayAnalysisPlatform/PathwayMatcher/master/resources/input/Proteins/UniProt/CysticFibrosis.txt
+https://media.githubusercontent.com/media/PathwayAnalysisPlatform/PathwayMatcher/master/src/test/resources/Proteins/UniProt/CysticFibrosis.txt
 
 You can upload them to Galaxy by directly copying and pasting their URL into the Galaxy upload dialog (the button with the arrow pointing up in the top-left area, and then choosing *Pasta/Fetch data*).
 
@@ -397,4 +468,8 @@
 
     </help>
 
+    <citations>
+        <citation type="doi">doi:10.1101/375097</citation>
+    </citations>
+
 </tool>