view peptide_shaker.xml @ 35:5a38c0d33704 draft

planemo upload for repository commit a9c70724c3561247e91f0b69b35022adcf3445ff
author galaxyp
date Mon, 27 Feb 2017 13:16:20 -0500
parents 0ebf3d3e4c90
children bced7eb9e197
line wrap: on
line source

<tool id="peptide_shaker" name="Peptide Shaker" version="1.15.1">
        Perform protein identification using various search engines based on results from SearchGUI
        <requirement type="package" version="1.15.1">peptide-shaker</requirement>
    <expand macro="stdio" />
        #from datetime import datetime
        #set $exp_str = "Galaxy_Experiment_%s" %"%Y%m%d%H%M%s")
        #set $samp_str = "Sample_%s" %"%Y%m%d%H%M%s")
        #set $temp_stderr = "peptideshaker_stderr"
        #set $bin_dir = "bin"

        mkdir output_reports;

        ln -s '$searchgui_input' &&
        jar xvf SEARCHGUI_IdentificationParameters.par &&
        ## PeptideShakerCLI ##
        (peptide-shaker eu.isas.peptideshaker.cmd.PeptideShakerCLI
            -temp_folder \$cwd/PeptideShakerCLI
            -experiment '$exp_str'
            -sample '$samp_str'
            -replicate 1
            -identification_files \$cwd/
            -id_params \$cwd/SEARCHGUI_IdentificationParameters.par
            -out \$cwd/peptideshaker_output.cpsx
            -zip \$cwd/

            -threads "\${GALAXY_SLOTS:-12}"

            ##Optional processing parameters:
            #if $processing_options.processing_options_selector == "yes"
                -protein_fdr "${processing_options.protein_fdr}"
                -peptide_fdr "${processing_options.peptide_fdr}"
                -psm_fdr "${processing_options.psm_fdr}"
                -ptm_score "${processing_options.ptm_score.ptm_score_selector}"
                #if $processing_options.ptm_score.ptm_score_selector == 1
                    -score_neutral_losses "${processing_options.ptm_score.neutral_losses}"
                    #if str($processing_options.ptm_score.ptm_threshold) != ''
                        -ptm_threshold "${processing_options.ptm_score.ptm_threshold}"
                    #end if
                #end if
                -protein_fraction_mw_confidence "${processing_options.protein_fraction_mw_confidence}"
                -ptm_alignment "${processing_options.ptm_alignment}"
            #end if

           ##Optional filtering parameters:
            #if $filtering_options.filtering_options_selector == "yes":
                -import_peptide_length_min "${filtering_options.min_peptide_length}"
                -import_peptide_length_max "${filtering_options.max_peptide_length}"
                -import_precurosor_mz "${filtering_options.max_precursor_error}"
                -import_precurosor_mz_ppm "${filtering_options.max_precursor_error_type}"
                ##-max_xtandem_e "${filtering_options.max_xtandem_e}"
                ##-max_omssa_e "${filtering_options.max_omssa_e}"
                ##-max_mascot_e "${filtering_options.max_mascot_e}"
                -exclude_unknown_ptms "${filtering_options.exclude_unknown_ptms}"
            #end if

        2>> $temp_stderr)


        echo "Running Reports";

        ## PeptideShaker Report options ##

        #if 'mzidentML' in str($outputs).split(','):
            echo "Generating mzIdentML";
            (peptide-shaker eu.isas.peptideshaker.cmd.MzidCLI
                -in \$cwd/
                -output_file \$cwd/output.mzid
                #if $contact_options.contact_options_selector == "yes":
                    -contact_first_name "$contact_options.contact_first_name"
                    -contact_last_name "$contact_options.contact_last_name"
                    -contact_email "$contact_options.contact_email"
                    -contact_address "$contact_options.contact_address"
                    #if str($contact_options.contact_url).strip() != '':
                        -contact_url = "$contact_options.contact_url"
                    #end if
                    -organization_name "$contact_options.organization_name"
                    -organization_email "$contact_options.organization_email"
                    -organization_address "$contact_options.organization_address"
                    #if str($contact_options.organization_url).strip() != '':
                        -organization_url = "$contact_options.organization_url"
                    #end if
                    -contact_first_name "Proteomics"
                    -contact_last_name "Galaxy"
                    -contact_email ""
                    -contact_address ""
                    -organization_name "University of Minnesota"
                    -organization_email ""
                    -organization_address "Minneapolis, MN 55455, Vereinigte Staaten"
                #end if
            2>> $temp_stderr) 
        #end if

        ## Generate Reports if the user has selected one of the 8 additional reports
        ## 'cps', 'mzidentML' and 'zip' are not valid options for PeptideShaker 
        ## and will not be passed to the command line
        #if set(["0","1","2","3","4","5","6","7"]).intersection( set( str( $outputs ).split(',') ) ):

            (peptide-shaker eu.isas.peptideshaker.cmd.ReportCLI
                -temp_folder \$cwd/ReportCLI
                -in \$cwd/
                -out_reports \$cwd/output_reports
                #set $cleaned_list = str($outputs).split(',')
                #if 'cps' in $cleaned_list:
                    #silent $cleaned_list.remove('cps')
                #end if
                #if 'mzidentML' in $cleaned_list:
                    #silent $cleaned_list.remove('mzidentML')
                #end if
                #if 'zip' in $cleaned_list:
                    #silent $cleaned_list.remove('zip')
                #end if
                ## Only numbers are left over. These corresponds to different reports.
                -reports #echo ','.join($cleaned_list)#

            2>> $temp_stderr)
        #end if

        ## # ls -l \$cwd/output_reports/* ;

        #if '0' in str($outputs).split(','):
            find \$cwd/output_reports -name '*Certificate_of_Analysis*' -exec bash -c 'mv "$0" "certificate.txt"' {} \;
        #end if
        #if '1' in str($outputs).split(','):
            find \$cwd/output_reports -name '*Hierarchical*' -exec bash -c 'mv "$0" "hierarchical.txt"' {} \;
        #end if
        #if '2' in str($outputs).split(','):
            find \$cwd/output_reports -name '*PSM_Phosphorylation_Report*' -exec bash -c 'mv "$0" "psm_phospho.txt"' {} \;
        #end if
        #if '8' in str($outputs).split(','):
            find \$cwd/output_reports -name '*Extended_PSM_Report*' -exec bash -c 'mv "$0" "psmx.txt"' {} \;
        #end if
        #if '3' in str($outputs).split(','):
            find \$cwd/output_reports -name '*PSM_Report*' -exec bash -c 'mv "$0" "psm.txt"' {} \;
        #end if
        #if '4' in str($outputs).split(','):
            find \$cwd/output_reports -name '*Peptide_Phosphorylation_Report*' -exec bash -c 'mv "$0" "peptides_phospho.txt"' {} \;
        #end if
        #if '5' in str($outputs).split(','):
            find \$cwd/output_reports -name '*Peptide_Report*' -exec bash -c 'mv "$0" "peptides.txt"' {} \;
        #end if
        #if '6' in str($outputs).split(','):
            find \$cwd/output_reports -name '*Protein_Phosphorylation_Report*' -exec bash -c 'mv "$0" "proteins_phospho.txt"' {} \;
        #end if
        #if '7' in str($outputs).split(','):
            find \$cwd/output_reports -name '*Protein_Report*' -exec bash -c 'mv "$0" "proteins.txt"' {} \;
        #end if

        cat $temp_stderr 2>&1;
        (exit \$exit_code_for_galaxy)

        <param name="searchgui_input" format="searchgui_archive" type="data" label="Compressed SearchGUI results"
            help="SearchGUI Results from History">
            <options options_filter_attribute="metadata.searchgui_major_version" >
                <filter type="add_value" value="@SEARCHGUI_MAJOR_VERSION@" />
        <conditional name="processing_options">
            <param name="processing_options_selector" type="select" label="Specify Advanced PeptideShaker Processing Options">
                <option value="no" selected="True">Default Processing Options</option>
                <option value="yes">Advanced Processing Options</option>
            <when value="no" />
            <when value="yes">
                <param name="protein_fdr" label="FDR at the protein level" help="In percent (default 1% FDR: '1')" value="1" type="float" />
                <param name="peptide_fdr" label="FDR at the peptide level" help="In percent (default 1% FDR: '1')" value="1" type="float" />
                <param name="psm_fdr" label="FDR at the PSM level" help="In percent (default 1% FDR: '1')" value="1" type="float" />
                <param name="protein_fraction_mw_confidence" value="95.0" type="float" label="Minimum confidence required for a protein in the fraction MW plot"
                    help="default 95%: '95.0'" />
                <conditional name="ptm_score">
                    <param name="ptm_score_selector" type="select" label="The PTM probabilistic score to use for PTM localization">
                        <option value="0" selected="True">A-score</option>
                        <option value="1">PhosphoRS</option>
                    <when value="0" />
                    <when value="1">
                        <param name="score_neutral_losses" label="Include Neutral Losses in A Score" type="boolean" truevalue="1" falsevalue="0" />
                        <param name="ptm_threshold" label="The threshold to use for the PTM scores" optional="true" value="" type="float"
                            help="Automatic mode will be used if not set" />
                <param name="ptm_alignment" label="Align peptide ambiguously localized PTMs on confident sites" type="boolean" truevalue="1" falsevalue="0" checked="true"/>
                <!-- SKIPPING -protein_fraction_mw_confidence ${processing_options.protein_fraction_mw_confidence} -->
        <conditional name="filtering_options">
            <param name="filtering_options_selector" type="select" label="Specify Advanced Filtering Options"
                help="Filter based on peptide lengths, precursor mass error, E value errors from X! Tandem and OMSSA, and include/exclude unknown PTM’s">
                <option value="no" selected="True">Default Filtering Options</option>
                <option value="yes">Advanced Filtering Options</option>
            <when value="no" />
            <when value="yes">
                <param name="min_peptide_length" type="integer" label="Minimum Peptide Length" value="6" />
                <param name="max_peptide_length" type="integer" label="Maximum Peptide Length" value="30" />
                <param name="max_precursor_error" type="float" label="Maximum Precursor Error" value="10"
                    help="Next option specifies units (Da or ppm)" />
                <param name="max_precursor_error_type" label="Maximum Precursor Error Type" type="select">
                    <option value="1">ppm</option>
                    <option value="0">Daltons</option>
                <!--param name="max_xtandem_e" label="Maximum X! Tandem e-value" value="100" type="float" help="" /-->
                <!--param name="max_omssa_e" label="Maximum OMSSA e-value" value="100" type="float" help="" /-->
                <!--param name="max_mascot_e" label="Maximum Mascot e-value filter" value="100" type="float" help="" /-->
                <param name="exclude_unknown_ptms" label="Exclude Unknown PTMs" type="boolean" truevalue="1" falsevalue="0" checked="true" />
        <conditional name="contact_options">
            <param name="contact_options_selector" type="select" label="Specify Contact Information for mzIdendML"
                help="Create a Galaxy workflow to save these values">
                <option value="no" selected="True">GalaxyP Project contact (Not suitable for PRIDE submission)</option>
                <option value="yes">Specify Contact Information</option>
            <when value="no" />
            <when value="yes">
                <param name="contact_first_name" type="text" value="" label="Contact first name.">
                    <validator type="regex" message="">\S+.*</validator>
                <param name="contact_last_name" type="text" value="" label="Contact last name.">
                    <validator type="regex" message="">\S+.*</validator>
                <param name="contact_email" type="text" value="" label="Contact e-mail.">
                    <validator type="regex" message="">\S+@\S+</validator>
                <param name="contact_address" type="text" value="" label="Contact address.">
                    <validator type="regex" message="">\S+.*</validator>
                <param name="contact_url" type="text" value="" optional="true" label="Contact URL.">
                <param name="organization_name" type="text" value="" label="Organization name.">
                    <validator type="regex" message="">\S+.*</validator>
                <param name="organization_email" type="text" value="" label="Organization e-mail.">
                    <validator type="regex" message="">\S+@\S+</validator>
                <param name="organization_address" type="text" value="" label="Organization address.">
                    <validator type="regex" message="">\S+.*</validator>
                <param name="organization_url" type="text" value="" optional="true" label="Organization URL.">

        <param name="include_sequences" type="boolean" truevalue="1" falsevalue="0" checked="false" label="Include the protein sequences in mzIdentML" />
        <param name="outputs" type="select" display="checkboxes" multiple="True" label="Output options">
            <option value="zip">Zip File for import to Desktop App</option>
            <option value="mzidentML" selected="True">mzidentML File</option>
            <option value="3">PSM Report</option>
            <option value="8">Extended PSM Report</option>
            <option value="2">PSM Phosphorylation Report</option>
            <option value="5">Peptide Report</option>
            <option value="4">Peptide Phosphorylation Report</option>
            <option value="7">Protein Report</option>
            <option value="6">Protein Phosphorylation Report</option>
            <option value="0">Certificate of Analysis</option>
            <option value="1">Hierarchical Report</option>
            <option value="cps">CPS file</option>
            <validator type="no_options" message="Please select at least one output file" />

        <data format="mzid" name="mzidentML" from_work_dir="output.mzid" label="${} on ${on_string}: mzidentML file">
            <filter>'mzidentML' in outputs</filter>
        <data format="peptideshaker_archive" name="output_cps" from_work_dir="peptideshaker_output.cps" label="${} on ${on_string}: CPS file">
            <filter>'cps' in outputs</filter>
        <data format="zip" name="output_zip" from_work_dir="" label="${} on ${on_string}: Archive">
            <filter>'zip' in outputs</filter>
        <data format="txt" name="output_certificate" from_work_dir="certificate.txt" label="${} on ${on_string}: Parameters">
            <filter>'0' in outputs</filter>
        <data format="tabular" name="output_hierarchical" from_work_dir="hierarchical.txt" label="${} on ${on_string}: Hierarchical Report">
            <filter>'1' in outputs</filter>
        <data format="tabular" name="output_psm_phosphorylation" from_work_dir="psm_phospho.txt" label="${} on ${on_string}: PSM Phosphorylation Report">
            <filter>'2' in outputs</filter>
        <data format="tabular" name="output_psm" from_work_dir="psm.txt" label="${} on ${on_string}: PSM Report">
            <filter>'3' in outputs</filter>
        <data format="tabular" name="output_extended_psm" from_work_dir="psmx.txt" label="${} on ${on_string}: Extended PSM Report">
            <filter>'8' in outputs</filter>
        <data format="tabular" name="output_peptides_phosphorylation" from_work_dir="peptides_phospho.txt" label="${} on ${on_string}: Peptide Phosphorylation Report">
            <filter>'4' in outputs</filter>
        <data format="tabular" name="output_peptides" from_work_dir="peptides.txt" label="${} on ${on_string}: Peptide Report">
            <filter>'5' in outputs</filter>
        <data format="tabular" name="output_proteins_phosphorylation" from_work_dir="proteins_phospho.txt" label="${} on ${on_string}: Protein Phosphorylation Report">
            <filter>'6' in outputs</filter>
        <data format="tabular" name="output_proteins" from_work_dir="proteins.txt" label="${} on ${on_string}: Protein Report">
            <filter>'7' in outputs</filter>
            <param name="searchgui_input" value="" ftype="searchgui_archive"/>
            <param name="processing_options_selector" value="no"/>
            <param name="filtering_options_selector" value="no"/>
            <param name="outputs" value="zip,3"/>
            <output name="output_zip" file="" ftype="zip" compare="sim_size" delta="3000" />
            <output name="output_psm">
                    <has_text text="cds.comp41779_c0_seq1" />
            <param name="searchgui_input" value="" ftype="searchgui_archive"/>
            <param name="processing_options_selector" value="no"/>
            <param name="filtering_options_selector" value="yes"/>
            <param name="min_peptide_length" value="1"/>
            <param name="outputs" value="0,1,2,3,4,5,6,7"/>
            <output name="output_certificate">
                    <has_text text="Tolerance: 100" />
                    <has_text text="Carbamidomethylation of C" />
            <output name="output_hierarchical">
                    <has_text_matching expression="1.1\tcds.comp" />
            <output name="output_psm">
                    <not_has_text text="Phosphosite" />
                    <has_text text="cds.comp41779_c0_seq1" />
            <output name="output_psm_phosphorylation">
                    <has_text text="Phosphosite" />
                    <has_text text="cds.comp41779_c0_seq1" />
            <output name="output_peptides">
                    <has_text text="Modification" />
                    <has_text text="cds.comp41779_c0_seq1" />
            <output name="output_peptides_phosphorylation">
                    <has_text text="Phosphosite" />
                    <has_text text="cds.comp41779_c0_seq1" />
            <output name="output_proteins">
                    <has_text text="Modification" />
                    <has_text text="cds.comp41779_c0_seq1" />
            <output name="output_proteins_phosphorylation">
                    <has_text text="Phosphosite" />
                    <has_text text="cds.comp41779_c0_seq1" />
**What it does**

Runs multiple search engines (X! Tandem, OMSSA and MS-GF+) on any number of MGF peak lists using the SearchGUI application and combines the results.



PSM Report

* Protein(s):                Protein(s) to which the peptide can be attached
* Sequence:                  Sequence of the peptide
* Variable Modifications:   The variable modifications
* D-score:	                D-score for variable PTM localization
* probabilistic PTM score:	The probabilistic score (e.g. A-score or PhosphoRS) used for variable PTM localization.
* Localization Confidence:	The confidence in variable PTM localization.
* Fixed Modifications:	    The fixed modifications.
* Spectrum File:	The spectrum file.
* Spectrum Title:	The title of the spectrum.
* Spectrum Scan Number:	The spectrum scan number.
* RT:	Retention time
* m/z:	Measured m/z
* Measured Charge:	The charge as given in the spectrum file.
* Identification Charge:	The charge as inferred by the search engine.
* Theoretical Mass:	The theoretical mass of the peptide.
* Isotope Number:	The isotope number targetted by the instrument.
* Precursor m/z Error:	The precursor m/z matching error.
* Score:	Score of the retained peptide as a combination of the algorithm scores (used to rank PSMs).
* Confidence:	Confidence in percent associated to the retained PSM.
* Decoy:	Indicates whether the peptide is a decoy (1: yes, 0: no).
* Validation: Indicates the validation level of the protein group.

Protein Report

* Main Accession:	Main accession of the protein group.
* Description:	Description of the protein designed by the main accession.
* Gene Name:	The gene names of the Ensembl gene ID associated to the main accession.
* Chromosome:	The chromosome of the Ensembl gene ID associated to the main accession.
* PI:	Protein Inference status of the protein group.
* Secondary Accessions:	Other accessions in the protein group (alphabetical order).
* Protein Group:	The complete protein group (alphabetical order).
* #Peptides:	Total number of peptides.
* #Validated Peptides:	Number of validated peptides.
* #Unique:	Total number of peptides unique to this protein group.
* #PSMs:	Number of PSMs
* #Validated PSMs:	Number of validated PSMs
* Coverage (%):	Sequence coverage in percent of the protein designed by the main accession.
* Possible Coverage (%):	Possible sequence coverage in percent of the protein designed by the main accession according to the search settings.
* MW (kDa):	Molecular Weight.
* Spectrum Counting NSAF: 	Normalized Spectrum Abundance Factor (NSAF)
* Spectrum Counting emPAI:	exponentially modified Protein Abundance Index (emPAI)
* Confident Modification Sites: Number of Confident Modification Sites	List of the sites where a variable modification was confidently localized.
* Other Modification Sites: Number of other Modification Sites	List of the non*confident sites where a variable modification was localized.
* Score:	Score of the protein group.
* Confidence:	Confidence in percent associated to the protein group.
* Decoy:	Indicates whether the protein group is a decoy (1: yes, 0: no).
* Validation:	Indicates the validation level of the protein group.

Peptide Report

* Protein(s):	Protein(s) to which this peptide can be attached.
* AAs Before:	The amino-acids before the sequence.
* Sequence:	Sequence of the peptide.
* AAs After:	The amino-acids after the sequence.
* Modified Sequence:	The peptide sequence annotated with variable modifications.
* Variable Modifications:	The variable modifications.
* Localization Confidence:	The confidence in PTMs localization.
* Fixed Modifications:	The fixed modifications.
* #Validated PSMs:	Number of validated PSMs.
* #PSMs:	Number of PSMs.
* Score:	Score of the peptide.
* Confidence:	Confidence in percent associated to the peptide.
* Decoy:	Indicates whether the peptide is a decoy (1: yes, 0: no).
* Validation:	Indicates the validation level of the protein group.

Hierachical Report

* Main Accession:	Main accession of the protein group.
* Description:	Description of the protein designed by the main accession.
* PI:	Protein Inference status of the protein group.
* Secondary Accessions:	Other accessions in the protein group (alphabetical order).
* Protein Group:	The complete protein group (alphabetical order).
* #Peptides:	Total number of peptides.
* #Validated Peptides:	Number of validated peptides.
* #Unique:	Total number of peptides unique to this protein group.
* #PSMs:	Number of PSMs
* #Validated PSMs:	Number of validated PSMs
* Coverage (%):	Sequence coverage in percent of the protein designed by the main accession.
* Possible Coverage (%):	Possible sequence coverage in percent of the protein designed by the main accession according to the search settings.
* MW (kDa):	Molecular Weight.
* Spectrum Counting NSAF: 	Normalized Spectrum Abundance Factor (NSAF)
* Spectrum Counting emPAI:	exponentially modified Protein Abundance Index (emPAI)
* Confident Modification Sites: # Confident Modification Sites	List of the sites where a variable modification was confidently localized.
* Other Modification Sites: # Other Modification Sites	List of the non-confident sites where a variable modification was localized.
* Score:	Score of the protein group.
* Confidence:	Confidence in percent associated to the protein group.
* Decoy:	Indicates whether the protein group is a decoy (1: yes, 0: no).
* Validation:	Indicates the validation level of the protein group.
* Protein(s):	Protein(s) to which this peptide can be attached.
* AAs Before:	The amino-acids before the sequence.
* Sequence:	Sequence of the peptide.
* AAs After:	The amino-acids after the sequence.
* Variable Modifications:	The variable modifications.
* Localization Confidence:	The confidence in PTMs localization.
* Fixed Modifications:	The fixed modifications.
* #Validated PSMs:	Number of validated PSMs.
* #PSMs:	Number of PSMs.
* Score:	Score of the peptide.
* Confidence:	Confidence in percent associated to the peptide.
* Decoy:	Indicates whether the peptide is a decoy (1: yes, 0: no).
* Validation:	Indicates the validation level of the protein group.
* Protein(s):	Protein(s) to which the peptide can be attached.
* Sequence:	Sequence of the peptide.
* Modified Sequence:	The peptide sequence annotated with variable modifications.
* Variable Modifications:	The variable modifications.
* D-score:	D-score for variable PTM localization.
* probabilistic PTM score:	The probabilistic score (e.g. A-score or PhosphoRS) used for variable PTM localization.
* Localization Confidence:	The confidence in variable PTM localization.
* Fixed Modifications:	The fixed modifications.
* Spectrum File:	The spectrum file.
* Spectrum Title:	The title of the spectrum.
* Spectrum Scan Number:	The spectrum scan number.
* RT:	Retention time
* m/z:	Measured m/z
* Measured Charge:	The charge as given in the spectrum file.
* Identification Charge:	The charge as inferred by the search engine.
* Theoretical Mass:	The theoretical mass of the peptide.
* Isotope Number:	The isotope number targetted by the instrument.
* Precursor m/z Error:	The precursor m/z matching error.
* Score:	Score of the retained peptide as a combination of the algorithm scores (used to rank PSMs).
* Confidence:	Confidence in percent associated to the retained PSM.
* Decoy:	Indicates whether the peptide is a decoy (1: yes, 0: no).
* Validation:	Indicates the validation level of the protein group.



To cite the underlying tools (PeptideShaker and SearchGUI) please refer to the list of papers at

If you use this tool in Galaxy, please cite Chilton J, Ira Cooke, Bjoern Gruening et al.
    <expand macro="citations" />