diff maxquant.xml @ 6:2133b0be850a draft

"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/maxquant commit 4e77eeda8a112fb50af00325a5164b986c16fc5c"
author galaxyp
date Wed, 06 May 2020 13:35:51 -0400
parents 7f432d87c82c
children d253b379322b
line wrap: on
line diff
--- a/maxquant.xml	Wed Apr 15 11:17:42 2020 -0400
+++ b/maxquant.xml	Wed May 06 13:35:51 2020 -0400
@@ -1,4 +1,4 @@
-<tool id="maxquant" name="MaxQuant" version="@VERSION@">
+<tool id="maxquant" name="MaxQuant" version="@VERSION@+galaxy1">
     <macros>
         <xml name="output" token_format="tabular" token_label="default description" token_name="default">
             <data format="@FORMAT@" label="@LABEL@ for ${on_string}" name="@NAME@">
@@ -81,15 +81,26 @@
             calcPeakProperties: ${search_opts.calc_peak_properties}
             #set writeMzTab = "mzTab" in $output_opts.output
             writeMzTab: $writeMzTab
-            #if $lfq_opts.do_ibaq.ibaq == 'True':
-            ibaq: ${lfq_opts.do_ibaq.ibaq}
-            ibaqLogFit: ${lfq_opts.do_ibaq.ibaqLogFit}
+            #if $protein_quant.lfq_opts.do_ibaq.ibaq == 'True':
+            ibaq: ${protein_quant.lfq_opts.do_ibaq.ibaq}
+            ibaqLogFit: ${protein_quant.lfq_opts.do_ibaq.ibaqLogFit}
             #end if
-            separateLfq: ${lfq_opts.separateLfq}
-            lfqStabilizeLargeRatios: ${lfq_opts.lfqStabilizeLargeRatios}
-            lfqRequireMsms: ${lfq_opts.lfqRequireMsms}
-            advancedSiteIntensities: ${lfq_opts.advancedSiteIntensities}
+            separateLfq: ${protein_quant.lfq_opts.separateLfq}
+            lfqStabilizeLargeRatios: ${protein_quant.lfq_opts.lfqStabilizeLargeRatios}
+            lfqRequireMsms: ${protein_quant.lfq_opts.lfqRequireMsms}
+            advancedSiteIntensities: ${protein_quant.lfq_opts.advancedSiteIntensities}
             matchBetweenRuns: ${search_opts.match_between_runs}
+            includeContaminants: ${search_opts.incl_contaminants}
+            quantMode: ${protein_quant.peptides_for_quantification}
+            restrictProteinQuantification: ${protein_quant.only_unmod_prot.unmod_prot}
+            #if $protein_quant.only_unmod_prot.unmod_prot == 'True':
+            #if $protein_quant.only_unmod_prot.mods_used_prot_quant:
+            restrictMods: [${protein_quant.only_unmod_prot.mods_used_prot_quant}]
+            #else:
+            restrictMods: []
+            #end if
+            useCounterparts: $protein_quant.only_unmod_prot.discard_unmod_cpart_peptides
+            #end if
             paramGroups:
             #for $pg in $paramGroups:
               #set names = [re.sub('@SUBSTITUTION_RX@', '_', str($n.element_identifier)) for $n in $pg.files]
@@ -111,6 +122,7 @@
                 #else:
                 enzymes: []
                 #end if
+                enzymeMode: ${pg.digestion_mode}
                 #if $pg.quant_method.select_quant_method == 'silac':
                 labelMods:
                   #if $pg.quant_method.light_labels:
@@ -139,9 +151,10 @@
                 #if $pg.quant_method.select_quant_method == 'reporter_ion_ms2':
                 lcmsRunType: 'Reporter ion MS2'
                 reporterMassTolerance: 0.003
-                reporterPif: 0
+                reporterPif: ${pg.quant_method.reporter_pif}
                 reporterFraction: 0
                 reporterBasePeakRatio: 0
+                filterPif: ${pg.quant_method.filter_pif}
                 isobaricLabels:
                   #if $pg.quant_method.iso_labels.labeling == 'custom':
                     #for $l in $pg.quant_method.iso_labels.iso_label:
@@ -216,7 +229,7 @@
             </param>
             <param format="fasta" multiple="true" name="fasta_files"
                    type="data" label="FASTA files"
-                   help="Specify one or more FASTA databases." />
+                   help="Specify one or more FASTA databases."/>
             <param name="identifier_parse_rule" type="text"
                    label="identifier parse rule" value="^&gt;.*\|(.*)\|.*$">
                 <sanitizer>
@@ -239,49 +252,133 @@
         <section name="search_opts" title="Search Options" expanded="true">
             <param format="tabular" name="template" type="data" optional="true"
                    label="Specify an experimental design template (if needed). For detailed
-                          instructions see the help text." />
+                          instructions see the help text."/>
             <param type="integer" name="min_peptide_len"
-	           label="minimum peptide length" value="7"
-                help="Peptides shorter than this value will not be reported nor be considered during protein identification and quantification 
+                   label="minimum peptide length" value="7"
+                   help="Peptides shorter than this value will not be reported nor be considered during protein identification and quantification
 short peptides are usually not unique in the protein database and therefore not statistically informative."/>
             <param type="integer" name="max_peptide_mass"
-	           label="maximum peptide mass [Da]" value="4600"
-        help="Peptides that are heavier than this mass will be discarded in the Andromeda search."/>
+                   label="maximum peptide mass [Da]" value="4600"
+                   help="Peptides that are heavier than this mass will be discarded in the Andromeda search."/>
             <param type="integer" name="min_unique_pep"
-	           label="minimum unique peptides" value="0" 
-        help="The minimum number of unique peptides a protein group should have to be considered as identified and reported in the final table." />
+                   label="minimum unique peptides" value="0"
+                   help="The minimum number of unique peptides a protein group should have to be considered as identified and reported in the final table."/>
             <param name="calc_peak_properties" type="boolean" checked="false"
-	           label="Calculate peak properties"
-	           truevalue="True" falsevalue="False" 
-        help="If checked, several quantities characterizing peaks and isotopes patterns are calculated. This may lead to a substantial increase in computation time."/>
+                   label="Calculate peak properties"
+                   truevalue="True" falsevalue="False"
+                   help="If checked, several quantities characterizing peaks and isotopes patterns are calculated. This may lead to a substantial increase in computation time."/>
             <param name="match_between_runs" type="boolean" checked="false"
-	           label="Match between runs"
-	           truevalue="True" falsevalue="False" 
-        help="Identifications are transferred to non-sequenced or non-identified MS features in other LC-MS runs."/>
+                   label="Match between runs"
+                   truevalue="True" falsevalue="False"
+                   help="Identifications are transferred to non-sequenced or non-identified MS features in other LC-MS runs."/>
+            <param name="incl_contaminants" type="boolean" checked="true"
+                   label="Include contaminants"
+                   truevalue="True" falsevalue="False"
+                   help="‘Yes’ means that proteins from a contaminant list are automatically added to the list of proteins for in silico digestion (in the fasta file)"/>
+
         </section>
-        
+
+        <section name="protein_quant" title="Protein quantification" expanded="true">
+
+            <param name="peptides_for_quantification" type="select" label="Peptides for quantification"
+                   help="Specify how the protein ratios will be calculated. When ‘All’ is selected the quantification is done on all peptides. With ‘Unique’, only the peptides unique for that specific protein group are used for quantification. The ‘unique + razor’ mode calculates protein ratios from the unique and razor peptides. Razor peptides are non-unique peptides assigned to the protein group with the most other peptides (Occam’s razor principle).">
+                <option value="0">All</option>
+                <option value="1" selected="true">Unique + razor</option>
+                <option value="2">Unique</option>
+            </param>
+
+            <conditional name="only_unmod_prot">
+                <param name="unmod_prot" type="select"
+                       label="Use only unmodified peptides"
+                       help="Indicate here which kind of peptides should be used for protein quantification. If ‘No’, all peptides are used irrespective of their modification state. If ‘Yes’, all unmodified peptides are used plus those peptides that have only the modifications specified in the field below.">
+                    <option value="False">No</option>
+                    <option value="True" selected="true">Yes</option>
+                </param>
+                <when value="True">
+                    <param name="mods_used_prot_quant" type="select" multiple="true"
+                           label="Modifications used in protein quantification"
+                           help="Peptides containing only these modifications will be used for the quantification of proteins in addition to unmodified peptides. This includes ratio and intensity calculations including iBAQ">
+                        <expand macro="modification"/>
+                    </param>
+                    <!-- note: the following default is different to the mquant default -->
+                    <param name="discard_unmod_cpart_peptides" type="boolean" label="Discard unmodified counterpart peptides" checked="true"
+                           truevalue="True" falsevalue="False"
+                           help="If selected, also unmodified peptides will be discarded for which a modified counterpart has been identified with a modification that is not specified in the field above."/>
+                </when>
+                <when value="False">
+                </when>
+            </conditional>
+
+            <section name="lfq_opts" title="LFQ Options" expanded="false">
+                <param name="separateLfq" type="boolean" checked="false"
+                       label="Separate LFQ in parameter Groups"
+                       truevalue="True" falsevalue="False"
+                       help="The MaxLFQ algorithm will be applied independently to samples in different parameter groups."/>
+                <param name="lfqStabilizeLargeRatios" type="boolean" checked="true"
+                       label="Stabilize large LFQ ratios"
+                       truevalue="True" falsevalue="False"
+                       help="Large protein ratios will get an admixture of the total protein intensity ratio as described in the MaxLFQ paper"/>
+                <param name="lfqRequireMsms" type="boolean" checked="true"
+                       label="Require MS/MS for LFQ comparisons"
+                       truevalue="True" falsevalue="False"
+                       help="Requires for each pari-wise peptide intensity comparison that at least one of two peptides has been identified by MS/MS"/>
+                <conditional name="do_ibaq">
+                    <param name="ibaq" type="select"
+                           label="iBAQ (calculates absolute protein abundances by normalizing to copy number and not protein mass)">
+                        <option value="False">No</option>
+                        <option value="True">Yes</option>
+                    </param>
+                    <when value="True">
+                        <param name="ibaqLogFit" type="boolean" checked="true"
+                               label="Logarithmic fit"
+                               truevalue="True" falsevalue="False"/>
+                    </when>
+                    <when value="False">
+                    </when>
+                </conditional>
+                <param name="advancedSiteIntensities" type="boolean" checked="true"
+                       label="Advanced site intensities"
+                       truevalue="True" falsevalue="False"/>
+            </section>
+
+        </section>
+
+
         <repeat name="paramGroups" title="Parameter Group" min="1" default="1">
             <param type="data" format="thermo.raw,mzXML,mzML" name="files" label="Infiles" multiple="true"
                    help="Only select infiles matching the filetype specified in the input options."/>
             <param type="integer" name="maxMissedCleavages"
-	           label="missed cleavages" value="2"
-         help="The number of missed cleavages that are maximally tolerated in the in-silico digestion of the protien sequences."/>
+                   label="missed cleavages" value="2"
+                   help="The number of missed cleavages that are maximally tolerated in the in-silico digestion of the protien sequences."/>
             <param name="fixedModifications" type="select" label="fixed modifications"
-	           multiple="true" help="Select zero or more fixed modifications. They will always be attached to any occurence of the respective amino acid.">
-	        <expand macro="modification"/>
+                   multiple="true"
+                   help="Select zero or more fixed modifications. They will always be attached to any occurence of the respective amino acid.">
+                <expand macro="modification"/>
                 <expand macro="default_mod_option" value="Carbamidomethyl (C)"/>
             </param>
             <param name="variableModifications" type="select" label="variable modifications"
-	           multiple="true" help="Select zero or more variable modifications. Do not specify label modifications here, neither ms1 level labels, like SILAC, nor isobaric labels.">
+                   multiple="true"
+                   help="Select zero or more variable modifications. Do not specify label modifications here, neither ms1 level labels, like SILAC, nor isobaric labels.">
                 <expand macro="default_mod_option" value="Oxidation (M)"/>
                 <expand macro="default_mod_option" value="Acetyl (Protein N-term)"/>
-	        <expand macro="modification"/>
+                <expand macro="modification"/>
             </param>
             <param name="enzymes" type="select" label="enzyme"
-	           multiple="true" help="Select zero or more enzymes. The enzymes used for generating the in silico peptides for the Andromeda search.">
+                   multiple="true"
+                   help="Select zero or more enzymes. The enzymes used for generating the in silico peptides for the Andromeda search.">
                 <expand macro="default_mod_option" value="Trypsin/P"/>
-	        <expand macro="proteases"/>
+                <expand macro="proteases"/>
             </param>
+            <param name="digestion_mode" type="select" label="Digestion mode"
+                   help="'Unspecific means that no cleavage rule is applied but instead all peptides up to a maximal length are considered">
+                <option value="0" selected="true">Specific</option>
+                <option value="1">Semi-specific free N-terminus</option>
+                <option value="2">Semi-specific free C-terminus</option>
+                <option value="3">Semi-specific</option>
+                <option value="4">Unspecific</option>
+                <option value="5">No digestion</option>
+            </param>
+
             <conditional name="quant_method">
                 <param name="select_quant_method" type="select" label="Quantitation Methods"
                        help="Select a method if needed.">
@@ -292,33 +389,34 @@
                 </param>
                 <when value=""/>
                 <when value="silac">
-	            <param name="light_labels" type="select" label="light labels"
-		           multiple="true" help="Select zero or more light modifications.">
-	                <expand macro="label"/>
-	            </param>
-	            <param name="medium_labels" type="select" label="medium labels"
-		           multiple="true" help="Select zero modifications if you have two labels. Select a medium modification if you have three labels.">
-	                <expand macro="label"/>
-	            </param>
-	            <param name="heavy_labels" type="select" label="heavy labels"
-		           multiple="true" help="Select zero or more heavy modifications.">
-	                <expand macro="label"/>
-	            </param>
+                    <param name="light_labels" type="select" label="light labels"
+                           multiple="true" help="Select zero or more light modifications.">
+                        <expand macro="label"/>
+                    </param>
+                    <param name="medium_labels" type="select" label="medium labels"
+                           multiple="true"
+                           help="Select zero modifications if you have two labels. Select a medium modification if you have three labels.">
+                        <expand macro="label"/>
+                    </param>
+                    <param name="heavy_labels" type="select" label="heavy labels"
+                           multiple="true" help="Select zero or more heavy modifications.">
+                        <expand macro="label"/>
+                    </param>
                 </when>
                 <when value="lfq">
-		    <param type="integer" name="lfqMinRatioCount"
-		           label="LFQ minimum ratio count" value="2"
-                   help="Minimum number of peptides that has to be available in pair-wise comparisons between two samples for a protein."/>
-		    <param type="integer" name="lfqMinEdgesPerNode"
-			   label="LFQ minimum number of neighbours" value="3"
-               help="Defines the network to normalize the samples in the fast LFQ mode."/>
-		    <param type="integer" name="lfqAvEdgesPerNode"
-			   label="LFQ average number of neighbours" value="6"
-               help="Defines the network to normalize the samples in the fast LFQ mode."/>
-		    <param type="boolean" name="lfqSkipNorm" checked="true"
-			   label="Skip normalization"
-			   truevalue="True" falsevalue="False" 
-               help="If checked the high-speed version of MaxLFQ is used. This is recommended for large numbers of samples (Experiments). For less than 10 samples the original MaxLFQ normalization algorithm is used."/>
+                    <param type="integer" name="lfqMinRatioCount"
+                           label="LFQ minimum ratio count" value="2"
+                           help="Minimum number of peptides that has to be available in pair-wise comparisons between two samples for a protein."/>
+                    <param type="integer" name="lfqMinEdgesPerNode"
+                           label="LFQ minimum number of neighbours" value="3"
+                           help="Defines the network to normalize the samples in the fast LFQ mode."/>
+                    <param type="integer" name="lfqAvEdgesPerNode"
+                           label="LFQ average number of neighbours" value="6"
+                           help="Defines the network to normalize the samples in the fast LFQ mode."/>
+                    <param type="boolean" name="lfqSkipNorm" checked="true"
+                           label="Skip normalization"
+                           truevalue="True" falsevalue="False"
+                           help="If checked the high-speed version of MaxLFQ is used. This is recommended for large numbers of samples (Experiments). For less than 10 samples the original MaxLFQ normalization algorithm is used."/>
                 </when>
                 <when value="reporter_ion_ms2">
                     <conditional name="iso_labels">
@@ -334,6 +432,7 @@
                             <option value="itraq8plex">iTRAQ8plex</option>
                             <option value="iodotmt6plex">iodoTMT6plex</option>
                         </param>
+
                         <when value="tmt2plex"></when>
                         <when value="tmt6plex"></when>
                         <when value="tmt8plex"></when>
@@ -345,62 +444,37 @@
                         <when value="custom">
                             <repeat name="iso_label" title="Isobaric Label" min="1" default="1">
                                 <param name="internallabel" type="select" label="internal label" help="contains Lys">
-	                            <expand macro="iso_labels"/>
-	                        </param>
+                                    <expand macro="iso_labels"/>
+                                </param>
                                 <param name="terminallabel" type="select" label="terminal label" help="contains Nter">
                                     <option value="">None</option>
-	                            <expand macro="iso_labels"/>
-	                        </param>
+                                    <expand macro="iso_labels"/>
+                                </param>
                                 <param name="cm2" type="float" label="correction factor -2%" value="0.0"/>
                                 <param name="cm1" type="float" label="correction factor -1%" value="0.0"/>
                                 <param name="cp1" type="float" label="correction factor +1%" value="0.0"/>
                                 <param name="cp2" type="float" label="correction factor +2%" value="0.0"/>
-		                <param type="boolean" name="tmtlike" checked="false"
-			               label="TMT-like"
-			               truevalue="True" falsevalue="False" />
+                                <param type="boolean" name="tmtlike" checked="false"
+                                       label="TMT-like"
+                                       truevalue="True" falsevalue="False"/>
                             </repeat>
                         </when>
                     </conditional>
+                    <param name="filter_pif" type="boolean" checked="false" label="Filter by PIF"
+                           truevalue="True" falsevalue="False"
+                           help="If checked, quantified PSMs will be filtered by their precursor intensity fraction to avoid using peptides that clearly suffer from co-fragmentation with other peptide species."/>
+                    <param name="reporter_pif" type="float" label="Minimum reporter PIF" value="0.75" min="0" max="1"
+                           help="MS/MS spectra with a lower value for the PIF will be discarded for isobaric label quantification."/>
                 </when>
             </conditional>
         </repeat>
 
-        <section title="LFQ Options" name="lfq_opts" expanded="false">
-	    <param name="separateLfq" type="boolean" checked="false"
-		   label="Separate LFQ in parameter Groups"
-		   truevalue="True" falsevalue="False" 
-           help="The MaxLFQ algorithm will be applied independently to samples in different parameter groups."/>
-	    <param name="lfqStabilizeLargeRatios" type="boolean" checked="true"
-		   label="Stabilize large LFQ ratios"
-		   truevalue="True" falsevalue="False" 
-           help="Large protein ratios will get an admixture of the total protein intensity ratio as described in the MaxLFQ paper"/>
-	    <param name="lfqRequireMsms" type="boolean" checked="true"
-		   label="Require MS/MS for LFQ comparisons"
-		   truevalue="True" falsevalue="False" 
-           help="Requires for each pari-wise peptide intensity comparison that at least one of two peptides has been identified by MS/MS"/>
-	    <conditional name="do_ibaq">
-                <param name="ibaq" type="select" label="iBAQ (calculates absolute protein abundances by normalizing to copy number and not protein mass)">
-                    <option value="False">No</option>
-                    <option value="True">Yes</option>
-		</param>
-                <when value="True">
-                    <param name="ibaqLogFit" type="boolean" checked="true"
-                           label="Logarithmic fit"
-                           truevalue="True" falsevalue="False" />
-                </when>
-                <when value="False">
-                </when>
-	    </conditional>
-	    <param name="advancedSiteIntensities" type="boolean" checked="true"
-		   label="Advanced site intensities"
-		   truevalue="True" falsevalue="False" />
-        </section>
+        <expand macro="ptxqc-opts"/>
 
-        <expand macro="ptxqc-opts"/>
         <section title="Output Options" name="output_opts" expanded="true">
             <param name="dry_run" type="boolean" checked="false"
-	           label="Only create config files? (i.e. for testing)"
-	           truevalue="True" falsevalue="" />            
+                   label="Only create config files? (i.e. for testing)"
+                   truevalue="True" falsevalue=""/>
             <param type="select" name="output" label="Select the desired outputs."
                    multiple="true" optional="false">
                 <option value="proteinGroups">Protein Groups</option>
@@ -430,6 +504,7 @@
 
     <tests>
         <!-- functional test with small mzXML file, no quantitation methods -->
+        <!-- Disable this test when initally testing as it takes avery long tome to run -->
         <test expect_num_outputs="15">
             <param name="ftype" value=".mzxml" />
             <param name="fasta_files" value="bsa.fasta" />
@@ -464,6 +539,14 @@
             <param name="fasta_files" value="bsa.fasta" />
             <param name="identifier_parse_rule" value="&gt;([^\s]*)" />
             <param name="description_parse_rule" value="&gt;(.*)" />
+            <param name="incl_contaminants" value="False" /> <!-- non-default value -->
+            <section name="protein_quant">
+                <param name="peptides_for_quantification" value="0"/><!-- non-default value -->
+                <conditional name="only_unmod_prot">
+                    <param name="unmod_prot" value="True"/>
+                    <param name="mods_used_prot_quant" value="Pro5,Pro6"/>
+                </conditional>
+            </section>
             <repeat name="paramGroups">
                 <param name="files" value="BSA_min_23.mzXML"/>
                 <param name="variableModifications" value="Oxidation (M)" />
@@ -485,6 +568,8 @@
                             <param name="tmtlike" value="True" />
                         </repeat>
                     </conditional>
+                    <param name="filter_pif" value="False" />
+                    <param name="reporter_pif" value="0.6" />
                 </conditional>
             </repeat>
             <repeat name="paramGroups">
@@ -497,6 +582,8 @@
                     <conditional name="iso_labels">
                         <param name="labeling" value="itraq4plex" />
                     </conditional>
+                    <param name="filter_pif" value="True" />
+                    <param name="reporter_pif" value="0.8" />
                 </conditional>
             </repeat>
             <param name="dry_run" value="True" />